Spaces:
Paused
Paused
| const express = require("express"); | |
| const { chromium } = require("playwright"); | |
| const app = express(); | |
| const PORT = process.env.PORT || 7860; | |
| let browser; | |
| // Launch Playwright browser once at startup | |
| (async () => { | |
| browser = await chromium.launch({ | |
| headless: true, | |
| args: [ | |
| "--no-sandbox", | |
| "--disable-setuid-sandbox", | |
| "--disable-dev-shm-usage" | |
| ] | |
| }); | |
| })(); | |
| // Core scraping function | |
| async function scrapeChannelVideos(channelName) { | |
| const url = `https://www.youtube.com/@${channelName}/videos`; | |
| const context = await browser.newContext({ | |
| userAgent: | |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + | |
| "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" | |
| }); | |
| const page = await context.newPage(); | |
| try { | |
| await page.goto(url, { waitUntil: "networkidle", timeout: 20000 }); | |
| await page.waitForSelector('a#video-title-link', { timeout: 10000 }); | |
| const videos = await page.$$eval("#dismissible", (els) => | |
| els.slice(0, 31).map((el) => { | |
| const anchor = el.querySelector('a#video-title-link'); | |
| const img = el.querySelector("img"); | |
| const meta = el.querySelector("#metadata-line span"); | |
| const vidUrl = anchor?.href || ""; | |
| const vidIdMatch = vidUrl.match(/v=([^&]+)/); | |
| return { | |
| title: anchor?.title || anchor?.textContent.trim() || "", | |
| videoId: vidIdMatch?.[1] || null, | |
| url: vidUrl, | |
| thumbnail: | |
| vidIdMatch && `https://img.youtube.com/vi/${vidIdMatch[1]}/maxresdefault.jpg`, | |
| published: meta?.textContent.trim() || "" | |
| }; | |
| }) | |
| ); | |
| await context.close(); | |
| return videos; | |
| } catch (err) { | |
| await context.close(); | |
| throw err; | |
| } | |
| } | |
| // Home route | |
| app.get("/", (req, res) => { | |
| res.json({ | |
| message: "Welcome to the YouTube Video Scraper API", | |
| docs: "/api", | |
| example: "/api/video/MrBeast" | |
| }); | |
| }); | |
| // API landing/documentation route | |
| app.get("/api", (req, res) => { | |
| res.json({ | |
| endpoints: [ | |
| { | |
| route: "/api/video/:channelName", | |
| method: "GET", | |
| description: "Scrape the latest 3 videos for a given YouTube channel" | |
| } | |
| ] | |
| }); | |
| }); | |
| // Dynamic video-scraping endpoint | |
| app.get("/api/video/:channelName", async (req, res) => { | |
| try { | |
| const channel = req.params.channelName; | |
| const videos = await scrapeChannelVideos(channel); | |
| res.json({ channel, videos, timestamp: new Date().toISOString() }); | |
| } catch (error) { | |
| res.status(500).json({ error: error.message }); | |
| } | |
| }); | |
| app.listen(PORT, () => { | |
| console.log(`Server listening on port ${PORT}`); | |
| }); | |