File size: 2,630 Bytes
d751bce
 
b87c704
 
d751bce
b87c704
d751bce
b87c704
d751bce
 
 
 
 
 
 
 
 
 
 
b87c704
d751bce
 
 
 
 
 
 
 
 
61c2768
d751bce
 
 
b87c704
d751bce
57a02ae
d751bce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61c2768
d751bce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b87c704
61c2768
d751bce
 
 
 
 
 
 
 
 
 
 
 
305000a
d751bce
 
 
 
 
 
 
 
 
98e9a75
 
b87c704
d751bce
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
const express = require("express");
const { chromium } = require("playwright");

const app = express();
const PORT = process.env.PORT || 7860;

let browser;

// Launch Playwright browser once at startup
(async () => {
  browser = await chromium.launch({
    headless: true,
    args: [
      "--no-sandbox",
      "--disable-setuid-sandbox",
      "--disable-dev-shm-usage"
    ]
  });
})();

// Core scraping function
async function scrapeChannelVideos(channelName) {
  const url = `https://www.youtube.com/@${channelName}/videos`;
  const context = await browser.newContext({
    userAgent:
      "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
      "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
  });
  const page = await context.newPage();

  try {
    await page.goto(url, { waitUntil: "networkidle", timeout: 20000 });
    await page.waitForSelector('a#video-title-link', { timeout: 10000 });

    const videos = await page.$$eval("#dismissible", (els) =>
      els.slice(0, 31).map((el) => {
        const anchor = el.querySelector('a#video-title-link');
        const img = el.querySelector("img");
        const meta = el.querySelector("#metadata-line span");
        const vidUrl = anchor?.href || "";
        const vidIdMatch = vidUrl.match(/v=([^&]+)/);
        return {
          title: anchor?.title || anchor?.textContent.trim() || "",
          videoId: vidIdMatch?.[1] || null,
          url: vidUrl,
          thumbnail:
            vidIdMatch && `https://img.youtube.com/vi/${vidIdMatch[1]}/maxresdefault.jpg`,
          published: meta?.textContent.trim() || ""
        };
      })
    );

    await context.close();
    return videos;
  } catch (err) {
    await context.close();
    throw err;
  }
}

// Home route
app.get("/", (req, res) => {
  res.json({
    message: "Welcome to the YouTube Video Scraper API",
    docs: "/api",
    example: "/api/video/MrBeast"
  });
});

// API landing/documentation route
app.get("/api", (req, res) => {
  res.json({
    endpoints: [
      {
        route: "/api/video/:channelName",
        method: "GET",
        description: "Scrape the latest 3 videos for a given YouTube channel"
      }
    ]
  });
});

// Dynamic video-scraping endpoint
app.get("/api/video/:channelName", async (req, res) => {
  try {
    const channel = req.params.channelName;
    const videos = await scrapeChannelVideos(channel);
    res.json({ channel, videos, timestamp: new Date().toISOString() });
  } catch (error) {
    res.status(500).json({ error: error.message });
  }
});

app.listen(PORT, () => {
  console.log(`Server listening on port ${PORT}`);
});