Spaces:
Paused
Paused
File size: 2,630 Bytes
d751bce b87c704 d751bce b87c704 d751bce b87c704 d751bce b87c704 d751bce 61c2768 d751bce b87c704 d751bce 57a02ae d751bce 61c2768 d751bce b87c704 61c2768 d751bce 305000a d751bce 98e9a75 b87c704 d751bce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
const express = require("express");
const { chromium } = require("playwright");
const app = express();
const PORT = process.env.PORT || 7860;
let browser;
// Launch Playwright browser once at startup
(async () => {
browser = await chromium.launch({
headless: true,
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage"
]
});
})();
// Core scraping function
async function scrapeChannelVideos(channelName) {
const url = `https://www.youtube.com/@${channelName}/videos`;
const context = await browser.newContext({
userAgent:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
});
const page = await context.newPage();
try {
await page.goto(url, { waitUntil: "networkidle", timeout: 20000 });
await page.waitForSelector('a#video-title-link', { timeout: 10000 });
const videos = await page.$$eval("#dismissible", (els) =>
els.slice(0, 31).map((el) => {
const anchor = el.querySelector('a#video-title-link');
const img = el.querySelector("img");
const meta = el.querySelector("#metadata-line span");
const vidUrl = anchor?.href || "";
const vidIdMatch = vidUrl.match(/v=([^&]+)/);
return {
title: anchor?.title || anchor?.textContent.trim() || "",
videoId: vidIdMatch?.[1] || null,
url: vidUrl,
thumbnail:
vidIdMatch && `https://img.youtube.com/vi/${vidIdMatch[1]}/maxresdefault.jpg`,
published: meta?.textContent.trim() || ""
};
})
);
await context.close();
return videos;
} catch (err) {
await context.close();
throw err;
}
}
// Home route
app.get("/", (req, res) => {
res.json({
message: "Welcome to the YouTube Video Scraper API",
docs: "/api",
example: "/api/video/MrBeast"
});
});
// API landing/documentation route
app.get("/api", (req, res) => {
res.json({
endpoints: [
{
route: "/api/video/:channelName",
method: "GET",
description: "Scrape the latest 3 videos for a given YouTube channel"
}
]
});
});
// Dynamic video-scraping endpoint
app.get("/api/video/:channelName", async (req, res) => {
try {
const channel = req.params.channelName;
const videos = await scrapeChannelVideos(channel);
res.json({ channel, videos, timestamp: new Date().toISOString() });
} catch (error) {
res.status(500).json({ error: error.message });
}
});
app.listen(PORT, () => {
console.log(`Server listening on port ${PORT}`);
});
|