import { Router } from 'express'; import { mkdirSync } from 'fs'; import { join } from 'path'; import * as cheerio from 'cheerio'; import { scrapeForumPage, getPageUrl, detectMaxPage, buildSizeIndex, CookieExpiredError, fixCookieIp, FORUM_UA } from './scrapers/forum.js'; import { refreshForumCookies, fsCreateSession, fsDestroySession, fsGet, fsPost } from './flaresolverr.js'; import { parseUserUrl, fetchAllPosts, fetchSearchPosts, downloadFiles } from './scrapers/coomer.js'; import { parseMediaUrl, fetchAllMedia, fetchAllMediaFromHtml, downloadMedia } from './scrapers/medialink.js'; import { parseMegaUrl, listAllFiles, downloadMegaFiles } from './scrapers/mega.js'; import { runYtdlp } from './scrapers/ytdlp.js'; import { parseLeakGalleryUrl, fetchAllMedia as fetchLeakGalleryMedia, downloadMedia as downloadLeakGalleryMedia } from './scrapers/leakgallery.js'; import { getAutoScrapeJobs, addAutoScrapeJob, removeAutoScrapeJob, getForumSites, getForumSiteById, createForumSite, updateForumSite, deleteForumSite } from './db.js'; const router = Router(); const MEDIA_PATH = process.env.MEDIA_PATH || './data/media'; const jobsMap = new Map(); let jobCounter = 0; const MAX_COMPLETED = 50; const MAX_LOGS = 200; function createJob(type, config) { const id = `scrape_${Date.now()}_${++jobCounter}`; const job = { id, type, config, progress: { total: 0, completed: 0, errors: 0 }, running: true, cancelled: false, logs: [], startedAt: new Date().toISOString(), completedAt: null, folderName: config.folderName || 'scrape', }; jobsMap.set(id, job); return job; } function addLog(job, msg) { const ts = new Date().toLocaleTimeString('en-US', { hour12: false }); job.logs.push(`[${ts}] ${msg}`); if (job.logs.length > MAX_LOGS) job.logs.shift(); } function pruneCompleted() { const completed = [...jobsMap.values()] .filter(j => !j.running) .sort((a, b) => new Date(b.completedAt) - new Date(a.completedAt)); if (completed.length > MAX_COMPLETED) { for (const old of completed.slice(MAX_COMPLETED)) { jobsMap.delete(old.id); } } } function jobToJson(job) { return { id: job.id, type: job.type, config: job.config, progress: job.progress, running: job.running, cancelled: job.cancelled, paused: job.paused || false, resumeAt: job.resumeAt || null, folderName: job.folderName, startedAt: job.startedAt, completedAt: job.completedAt, logCount: job.logs.length, }; } // --- Forum Scrape --- async function runForumScrape(job) { let { url, startPage, endPage, delay, folderName, siteId, lastPageOnly } = job.config; let { cookies } = job.config; let userAgent = job.config.userAgent || ''; // Load cookies from forum site record if siteId provided and no cookies passed if (!cookies && siteId) { const site = getForumSiteById(siteId); if (site && site.cookies) { cookies = site.cookies; userAgent = site.user_agent || userAgent; job.config.cookies = cookies; job.config.userAgent = userAgent; addLog(job, `Loaded cookies from forum site: ${site.name}${userAgent ? ` (UA pinned)` : ''}`); } } const outputDir = join(MEDIA_PATH, folderName); mkdirSync(outputDir, { recursive: true }); const downloadedSet = new Set(); const sizeIndex = buildSizeIndex(outputDir); let totalImages = 0; // When a siteId is in play, page HTML must be fetched through FlareSolverr — // direct fetch hits DDoS-Guard's browser-fingerprint check and gets 403. let fsSession = null; if (siteId && cookies) { try { const baseHost = new URL(url).origin; addLog(job, `Opening FlareSolverr session for ${baseHost}...`); fsSession = await fsCreateSession(baseHost + '/'); addLog(job, `FlareSolverr session ready (${fsSession.slice(0, 8)}...)`); } catch (e) { addLog(job, `FlareSolverr session failed (${e.message}) — falling back to direct fetch`); } } // If lastPageOnly, detect the last page and only scrape that if (lastPageOnly) { addLog(job, 'Detecting last page...'); const maxPage = await detectMaxPage(url, (msg) => addLog(job, msg), cookies, userAgent, fsSession); if (maxPage) { startPage = maxPage; endPage = maxPage; addLog(job, `Last page detected: ${maxPage}`); } else { addLog(job, 'Could not detect last page — falling back to page range'); } } addLog(job, `Starting forum scrape: pages ${startPage}-${endPage}`); addLog(job, `Output: ${outputDir}`); job.progress.total = endPage - startPage + 1; try { for (let page = startPage; page <= endPage; page++) { if (job.cancelled) { addLog(job, 'Cancelled by user'); break; } const pageUrl = getPageUrl(url, page); addLog(job, `--- Page ${page}/${endPage} ---`); let count; try { count = await scrapeForumPage(pageUrl, outputDir, downloadedSet, (msg) => addLog(job, msg), cookies, userAgent, fsSession, sizeIndex); } catch (err) { if (err instanceof CookieExpiredError && siteId) { addLog(job, `Cookie expired (HTTP ${err.statusCode}) — attempting auto-refresh via FlareSolverr...`); try { cookies = await refreshForumCookies(siteId); const refreshed = getForumSiteById(siteId); userAgent = refreshed?.user_agent || userAgent; job.config.cookies = cookies; job.config.userAgent = userAgent; addLog(job, 'Cookies refreshed successfully — retrying page...'); count = await scrapeForumPage(pageUrl, outputDir, downloadedSet, (msg) => addLog(job, msg), cookies, userAgent, fsSession, sizeIndex); } catch (refreshErr) { addLog(job, `Cookie refresh failed: ${refreshErr.message}`); addLog(job, 'Stopping scrape — fix credentials or refresh cookies manually'); break; } } else if (err instanceof CookieExpiredError) { addLog(job, `Cookie expired (HTTP ${err.statusCode}) — no siteId configured for auto-refresh`); addLog(job, 'Stopping scrape — refresh cookies manually and try again'); break; } else { throw err; } } totalImages += count; job.progress.completed = page - startPage + 1; if (page < endPage && !job.cancelled) { await new Promise(r => setTimeout(r, delay * 1000)); } } } catch (err) { addLog(job, `Error: ${err.message}`); job.progress.errors++; } finally { if (fsSession) { await fsDestroySession(fsSession); addLog(job, `FlareSolverr session closed`); } job.running = false; job.completedAt = new Date().toISOString(); addLog(job, `Done! ${totalImages} files saved to ${folderName}/`); pruneCompleted(); } } // --- Coomer Scrape --- async function runCoomerScrape(job) { const { url, pages, workers, folderName } = job.config; const outputDir = join(MEDIA_PATH, folderName); mkdirSync(outputDir, { recursive: true }); addLog(job, `Starting coomer scrape: ${url}`); addLog(job, `Pages: ${pages}, Workers: ${workers}`); try { const parsed = parseUserUrl(url); let files; if (parsed.mode === 'search') { addLog(job, `Site: ${parsed.base}, Search: "${parsed.query}"`); addLog(job, `Fetching up to ${pages} pages...`); files = await fetchSearchPosts(parsed.base, parsed.query, pages, (msg) => addLog(job, msg), () => job.cancelled ); } else { addLog(job, `Site: ${parsed.base}, Service: ${parsed.service}, User: ${parsed.userId}`); addLog(job, `Fetching up to ${pages} pages...`); files = await fetchAllPosts(parsed.base, parsed.service, parsed.userId, pages, (msg) => addLog(job, msg), () => job.cancelled ); } if (job.cancelled) { addLog(job, 'Cancelled by user'); return; } if (files.length === 0) { addLog(job, 'No files found'); return; } job.progress.total = files.length; addLog(job, `Found ${files.length} files. Starting downloads...`); // Phase 2: Download const result = await downloadFiles(files, outputDir, workers, (msg) => addLog(job, msg), (completed, errors, total) => { job.progress.completed = completed; job.progress.errors = errors; job.progress.total = total; }, () => job.cancelled ); addLog(job, `Done! ${result.completed} downloaded, ${result.errors} failed, ${result.skipped} skipped`); } catch (err) { addLog(job, `Error: ${err.message}`); job.progress.errors++; } finally { job.running = false; job.completedAt = new Date().toISOString(); pruneCompleted(); } } // --- MediaLink Scrape --- async function runMediaLinkScrape(job) { const { url, pages, workers, delay, folderName } = job.config; const outputDir = join(MEDIA_PATH, folderName); mkdirSync(outputDir, { recursive: true }); addLog(job, `Starting medialink scrape: ${url}`); addLog(job, `Pages: ${pages}, Workers: ${workers}, Delay: ${delay}ms`); try { const { base, userId, mode } = parseMediaUrl(url); addLog(job, `Site: ${base}, ${mode === 'html' ? 'Slug' : 'User ID'}: ${userId} (${mode} mode)`); // Phase 1: Collect all media let items; if (mode === 'html') { addLog(job, `Fetching up to ${pages} pages via HTML scraping...`); items = await fetchAllMediaFromHtml(base, userId, pages, delay, (msg) => addLog(job, msg), () => job.cancelled ); } else { addLog(job, `Fetching up to ${pages} pages from API...`); items = await fetchAllMedia(base, userId, pages, delay, (msg) => addLog(job, msg), () => job.cancelled ); } if (job.cancelled) { addLog(job, 'Cancelled by user'); return; } if (items.length === 0) { addLog(job, 'No media found'); return; } job.progress.total = items.length; addLog(job, `Found ${items.length} media items. Downloading...`); // Phase 2: Download all media files const result = await downloadMedia(items, outputDir, workers, (msg) => addLog(job, msg), (completed, errors, total) => { job.progress.completed = completed; job.progress.errors = errors; job.progress.total = total; }, () => job.cancelled, base + '/' ); addLog(job, `Done! ${result.completed} downloaded, ${result.errors} failed, ${result.skipped} skipped`); } catch (err) { addLog(job, `Error: ${err.message}`); job.progress.errors++; } finally { job.running = false; job.completedAt = new Date().toISOString(); pruneCompleted(); } } // --- Mega Scrape --- async function runMegaScrape(job) { const { url, workers, folderName } = job.config; const outputDir = join(MEDIA_PATH, folderName); mkdirSync(outputDir, { recursive: true }); addLog(job, `Starting mega.nz scrape: ${url}`); addLog(job, `Workers: ${workers}`); try { parseMegaUrl(url); // Phase 1: List all files const { folderName: megaName, items } = await listAllFiles(url, (msg) => addLog(job, msg) ); if (job.cancelled) { addLog(job, 'Cancelled by user'); return; } if (items.length === 0) { addLog(job, 'No files found in folder'); return; } job.progress.total = items.length; const totalSizeMb = (items.reduce((s, i) => s + i.size, 0) / (1024 * 1024)).toFixed(0); addLog(job, `Found ${items.length} files (${totalSizeMb} MB). Downloading...`); // Phase 2: Download const result = await downloadMegaFiles(items, outputDir, workers, (msg) => addLog(job, msg), (completed, errors, total) => { job.progress.completed = completed; job.progress.errors = errors; job.progress.total = total; }, () => job.cancelled, (status) => { job.paused = status.paused; job.resumeAt = status.resumeAt; } ); addLog(job, `Done! ${result.completed} downloaded, ${result.errors} failed, ${result.skipped} skipped`); } catch (err) { addLog(job, `Error: ${err.message}`); job.progress.errors++; } finally { job.running = false; job.completedAt = new Date().toISOString(); pruneCompleted(); } } // --- yt-dlp Scrape --- async function runYtdlpScrape(job) { const config = job.config; addLog(job, `Starting yt-dlp download: ${config.url}`); addLog(job, `Quality: ${config.quality || 'best'}, Playlist: ${config.playlist ? 'yes' : 'no'}`); try { const result = await runYtdlp( config, (msg) => addLog(job, msg), (completed, errors) => { job.progress.completed = completed; job.progress.errors += errors; if (completed > job.progress.total) job.progress.total = completed; }, () => job.cancelled ); if (result.cancelled) { addLog(job, 'Cancelled by user'); } else { addLog(job, `Done! ${result.files} file${result.files !== 1 ? 's' : ''} downloaded`); } } catch (err) { addLog(job, `Error: ${err.message}`); job.progress.errors++; } finally { job.running = false; job.completedAt = new Date().toISOString(); pruneCompleted(); } } // --- LeakGallery Scrape --- async function runLeakGalleryScrape(job) { const { url, pages, workers, delay, folderName } = job.config; const outputDir = join(MEDIA_PATH, folderName); mkdirSync(outputDir, { recursive: true }); addLog(job, `Starting leakgallery scrape: ${url}`); addLog(job, `Pages: ${pages}, Workers: ${workers}, Delay: ${delay}ms`); try { const { username } = parseLeakGalleryUrl(url); addLog(job, `Username: ${username}`); // Phase 1: Collect all media addLog(job, `Fetching up to ${pages} pages from API...`); const items = await fetchLeakGalleryMedia(username, pages, delay, (msg) => addLog(job, msg), () => job.cancelled ); if (job.cancelled) { addLog(job, 'Cancelled by user'); return; } if (items.length === 0) { addLog(job, 'No media found'); return; } job.progress.total = items.length; addLog(job, `Found ${items.length} media items. Downloading...`); // Phase 2: Download all media files const result = await downloadLeakGalleryMedia(items, outputDir, workers, (msg) => addLog(job, msg), (completed, errors, total) => { job.progress.completed = completed; job.progress.errors = errors; job.progress.total = total; }, () => job.cancelled ); addLog(job, `Done! ${result.completed} downloaded, ${result.errors} failed, ${result.skipped} skipped`); } catch (err) { addLog(job, `Error: ${err.message}`); job.progress.errors++; } finally { job.running = false; job.completedAt = new Date().toISOString(); pruneCompleted(); } } // --- Endpoints --- router.post('/api/scrape/forum', (req, res) => { const { url, folderName, startPage, endPage, delay, cookies, siteId, lastPageOnly } = req.body; if (!url) return res.status(400).json({ error: 'URL is required' }); if (!folderName) return res.status(400).json({ error: 'Folder name is required' }); const config = { url: url.includes('page-') ? url : `${url.replace(/\/$/, '')}/page-1`, folderName, startPage: parseInt(startPage) || 1, endPage: parseInt(endPage) || 10, delay: parseFloat(delay) || 1.0, cookies: cookies || '', siteId: siteId ? parseInt(siteId, 10) : null, lastPageOnly: !!lastPageOnly, }; const job = createJob('forum', config); runForumScrape(job).catch(err => { addLog(job, `Fatal error: ${err.message}`); job.running = false; job.completedAt = new Date().toISOString(); }); res.json({ jobId: job.id, message: 'Forum scrape started' }); }); router.post('/api/scrape/coomer', (req, res) => { const { url, folderName, pages, workers } = req.body; if (!url) return res.status(400).json({ error: 'URL is required' }); if (!folderName) return res.status(400).json({ error: 'Folder name is required' }); const config = { url, folderName, pages: parseInt(pages) || 10, workers: Math.min(Math.max(parseInt(workers) || 10, 1), 20), }; const job = createJob('coomer', config); runCoomerScrape(job).catch(err => { addLog(job, `Fatal error: ${err.message}`); job.running = false; job.completedAt = new Date().toISOString(); }); res.json({ jobId: job.id, message: 'Coomer scrape started' }); }); router.post('/api/scrape/medialink', (req, res) => { const { url, folderName, pages, workers, delay } = req.body; if (!url) return res.status(400).json({ error: 'URL is required' }); if (!folderName) return res.status(400).json({ error: 'Folder name is required' }); const config = { url, folderName, pages: parseInt(pages) || 50, workers: Math.min(Math.max(parseInt(workers) || 3, 1), 10), delay: parseInt(delay) || 500, }; const job = createJob('medialink', config); runMediaLinkScrape(job).catch(err => { addLog(job, `Fatal error: ${err.message}`); job.running = false; job.completedAt = new Date().toISOString(); }); res.json({ jobId: job.id, message: 'MediaLink scrape started' }); }); router.post('/api/scrape/mega', (req, res) => { const { url, folderName, workers } = req.body; if (!url) return res.status(400).json({ error: 'URL is required' }); if (!folderName) return res.status(400).json({ error: 'Folder name is required' }); try { parseMegaUrl(url); } catch (err) { return res.status(400).json({ error: err.message }); } const config = { url, folderName, workers: Math.min(Math.max(parseInt(workers) || 3, 1), 10), }; const job = createJob('mega', config); runMegaScrape(job).catch(err => { addLog(job, `Fatal error: ${err.message}`); job.running = false; job.completedAt = new Date().toISOString(); }); res.json({ jobId: job.id, message: 'Mega scrape started' }); }); router.post('/api/scrape/ytdlp', (req, res) => { const { url, quality, customFormat, embedMetadata, embedThumbnail, embedSubs, writeSubs, subLangs, restrictFilenames, outputTemplate, playlist, maxDownloads, concurrentFragments, rateLimit, sponsorBlock, cookiesFile } = req.body; if (!url) return res.status(400).json({ error: 'URL is required' }); const config = { url, quality: quality || 'best', customFormat: customFormat || '', embedMetadata: embedMetadata !== false, embedThumbnail: embedThumbnail !== false, embedSubs: embedSubs !== false, writeSubs: writeSubs || false, subLangs: subLangs || 'en', restrictFilenames: restrictFilenames !== false, outputTemplate: outputTemplate || '%(title)s.%(ext)s', playlist: playlist || false, maxDownloads: parseInt(maxDownloads) || 0, concurrentFragments: Math.min(Math.max(parseInt(concurrentFragments) || 4, 1), 16), rateLimit: rateLimit || '', sponsorBlock: sponsorBlock || 'off', cookiesFile: cookiesFile || '', folderName: (() => { try { const u = new URL(url); const path = u.pathname.replace(/^\//, '').replace(/\/$/, ''); return path ? `${u.hostname}/${path}`.slice(0, 60) : u.hostname; } catch { return url.slice(0, 60); } })(), }; const job = createJob('ytdlp', config); runYtdlpScrape(job).catch(err => { addLog(job, `Fatal error: ${err.message}`); job.running = false; job.completedAt = new Date().toISOString(); }); res.json({ jobId: job.id, message: 'yt-dlp download started' }); }); router.post('/api/scrape/leakgallery', (req, res) => { const { url, folderName, pages, workers, delay } = req.body; if (!url) return res.status(400).json({ error: 'URL is required' }); if (!folderName) return res.status(400).json({ error: 'Folder name is required' }); try { parseLeakGalleryUrl(url); } catch (err) { return res.status(400).json({ error: err.message }); } const config = { url, folderName, pages: parseInt(pages) || 100, workers: Math.min(Math.max(parseInt(workers) || 3, 1), 10), delay: parseInt(delay) || 300, }; const job = createJob('leakgallery', config); runLeakGalleryScrape(job).catch(err => { addLog(job, `Fatal error: ${err.message}`); job.running = false; job.completedAt = new Date().toISOString(); }); res.json({ jobId: job.id, message: 'LeakGallery scrape started' }); }); router.get('/api/scrape/jobs', (_req, res) => { const jobs = [...jobsMap.values()].map(jobToJson); jobs.sort((a, b) => new Date(b.startedAt) - new Date(a.startedAt)); res.json(jobs); }); router.get('/api/scrape/jobs/:jobId', (req, res) => { const job = jobsMap.get(req.params.jobId); if (!job) return res.status(404).json({ error: 'Job not found' }); res.json({ ...jobToJson(job), logs: job.logs }); }); router.post('/api/scrape/jobs/:jobId/cancel', (req, res) => { const job = jobsMap.get(req.params.jobId); if (!job) return res.status(404).json({ error: 'Job not found' }); if (!job.running) return res.status(400).json({ error: 'Job is not running' }); job.cancelled = true; addLog(job, 'Cancel requested'); res.json({ message: 'Cancel requested' }); }); router.delete('/api/scrape/jobs/:jobId', (req, res) => { const job = jobsMap.get(req.params.jobId); if (!job) return res.status(404).json({ error: 'Job not found' }); job.cancelled = true; job.running = false; jobsMap.delete(req.params.jobId); res.json({ message: 'Job removed' }); }); // Auto-detect max page for forum URLs router.post('/api/scrape/forum/detect-pages', async (req, res) => { const { url, cookies } = req.body; if (!url) return res.status(400).json({ error: 'URL is required' }); const logs = []; const maxPage = await detectMaxPage(url, (msg) => logs.push(msg), cookies); res.json({ maxPage, logs }); }); // Search a forum site for threads matching a query, return preview images per thread const SEARCH_SKIP_PATTERNS = ['avatar', 'smilie', 'emoji', 'icon', 'logo', 'button', 'sprite', 'badge', 'rank', 'star']; const PREVIEW_IMG_EXTS = /\.(jpg|jpeg|png|webp|gif)(\?|$)/i; router.post('/api/scrape/forum/search', async (req, res) => { const { query, siteId = 2, maxThreads = 5, previewsPerThread = 4, titleOnly = true } = req.body; if (!query) return res.status(400).json({ error: 'query is required' }); const site = getForumSiteById(siteId); if (!site) return res.status(404).json({ error: `Forum site ${siteId} not found` }); if (!site.cookies) return res.status(400).json({ error: 'Forum site has no cookies — refresh first' }); const baseUrl = (site.base_url || 'https://simpcity.cr').replace(/\/$/, ''); const cookies = site.cookies; let fsSession = null; try { fsSession = await fsCreateSession(baseUrl + '/'); // Step 1: GET search form to grab the XenForo CSRF token const formRes = await fsGet(fsSession, baseUrl + '/search/', cookies); if (formRes.status !== 200) { return res.status(formRes.status).json({ error: `Search form fetch failed: HTTP ${formRes.status}` }); } const xfMatch = formRes.html.match(/name="_xfToken"\s+value="([^"]+)"/); if (!xfMatch) { return res.status(503).json({ error: 'No _xfToken on search form — cookies likely expired. Refresh via /api/flaresolverr/refresh/' + siteId }); } const xfToken = xfMatch[1]; // Step 2: POST the search; XenForo redirects to /search// with results const postBody = new URLSearchParams({ keywords: query, 'c[title_only]': titleOnly ? '1' : '', 'c[users]': '', _xfToken: xfToken, }); const postRes = await fsPost(fsSession, baseUrl + '/search/search', cookies, postBody.toString()); const html = postRes.html; // Parse thread results from contentRow-title anchors (XenForo result layout) const $ = cheerio.load(html); const seen = new Set(); const threads = []; $('h3.contentRow-title a[href*="/threads/"]').each((_, el) => { const $a = $(el); let href; try { href = new URL($a.attr('href'), baseUrl).href; } catch { return; } const m = href.match(/\/threads\/([^\/]+\.\d+)\//); if (!m) return; const threadRoot = `${baseUrl}/threads/${m[1]}/`; if (seen.has(threadRoot)) return; seen.add(threadRoot); const title = $a.text().replace(/\s+/g, ' ').trim(); if (!title || title.length < 3) return; threads.push({ threadUrl: threadRoot, title }); }); if (threads.length === 0) { return res.json({ query, results: [] }); } // For top N threads, fetch last page and pull preview image URLs const topThreads = threads.slice(0, maxThreads); const results = []; for (const t of topThreads) { try { const maxPage = await detectMaxPage(t.threadUrl, () => {}, cookies, '', fsSession); const lastPageUrl = maxPage && maxPage > 1 ? `${t.threadUrl}page-${maxPage}` : t.threadUrl; const pageRes = await fsGet(fsSession, lastPageUrl, cookies); if (pageRes.status !== 200) { results.push({ ...t, lastPageUrl, lastPageNum: maxPage || 1, previews: [], error: `HTTP ${pageRes.status}` }); continue; } const $p = cheerio.load(pageRes.html); const imgUrls = []; $p('.message-body img, .bbWrapper img').each((_, el) => { const $img = $p(el); const src = $img.attr('src') || $img.attr('data-src') || $img.attr('data-url'); if (!src) return; let absSrc; try { absSrc = new URL(src, lastPageUrl).href; } catch { return; } const lower = absSrc.toLowerCase(); if (SEARCH_SKIP_PATTERNS.some(p => lower.includes(p))) return; const $parentA = $img.closest('a'); if ($parentA.length && $parentA.attr('href')) { try { const aHref = new URL($parentA.attr('href'), lastPageUrl).href; if (PREVIEW_IMG_EXTS.test(aHref)) { imgUrls.push(aHref); return; } } catch {} } let upgraded = absSrc.replace('.th.', '.').replace('.md.', '.'); if (PREVIEW_IMG_EXTS.test(upgraded) || /\/data\/attachments|proxy\.php/.test(upgraded)) { imgUrls.push(upgraded); } }); const unique = [...new Set(imgUrls)]; const previews = unique.slice(-previewsPerThread); results.push({ ...t, lastPageUrl, lastPageNum: maxPage || 1, previews }); } catch (err) { results.push({ ...t, previews: [], error: err.message }); } } res.json({ query, results }); } catch (err) { console.error('[scrape/forum/search]', err); res.status(500).json({ error: err.message }); } finally { if (fsSession) await fsDestroySession(fsSession); } }); // --- Forum Sites CRUD --- router.get('/api/scrape/forum-sites', (_req, res) => { res.json(getForumSites()); }); router.post('/api/scrape/forum-sites', (req, res) => { const { name, baseUrl, cookies, username, password } = req.body; if (!name) return res.status(400).json({ error: 'Name is required' }); const id = createForumSite(name, baseUrl, cookies, username, password); res.json(getForumSiteById(id)); }); router.put('/api/scrape/forum-sites/:id', (req, res) => { const id = parseInt(req.params.id, 10); const site = getForumSiteById(id); if (!site) return res.status(404).json({ error: 'Forum site not found' }); const { name, baseUrl, cookies, username, password } = req.body; const fields = {}; if (name !== undefined) fields.name = name; if (baseUrl !== undefined) fields.base_url = baseUrl; if (cookies !== undefined) fields.cookies = cookies; if (username !== undefined) fields.username = username; if (password !== undefined) fields.password = password; updateForumSite(id, fields); res.json(getForumSiteById(id)); }); router.delete('/api/scrape/forum-sites/:id', (req, res) => { const id = parseInt(req.params.id, 10); deleteForumSite(id); res.json({ ok: true }); }); // --- Auto-scrape CRUD --- router.get('/api/scrape/auto', (_req, res) => { res.json(getAutoScrapeJobs()); }); router.post('/api/scrape/auto', (req, res) => { const { type, url, folderName, config } = req.body; if (!type || !url || !folderName || !config) { return res.status(400).json({ error: 'type, url, folderName, and config are required' }); } addAutoScrapeJob(type, url, folderName, config); res.json({ ok: true }); }); router.delete('/api/scrape/auto/:id', (req, res) => { removeAutoScrapeJob(parseInt(req.params.id)); res.json({ ok: true }); }); export function getActiveScrapeCount() { let count = 0; for (const job of jobsMap.values()) { if (job.running) count++; } return count; } export function getActiveScrapesList() { const list = []; for (const job of jobsMap.values()) { if (job.running) { list.push({ type: job.type, folderName: job.folderName, progress: job.progress }); } } return list; } export { runForumScrape, runCoomerScrape, runMediaLinkScrape, runMegaScrape, runYtdlpScrape, runLeakGalleryScrape, createJob }; export default router;