Add app auth, dashboard, scheduler, video management, and new scrapers
- JWT-based app authentication with user roles, folder/route access control - Dashboard with storage stats, health checks, and recent activity - Auto-download/scrape scheduler (12h interval) with per-user and per-job configs - Video upload, tagging, HLS transcoding, and detail pages - New scrapers: LeakGallery, Mega (megajs), yt-dlp - FlareSolverr integration for Cloudflare-protected sites - Gallery: advanced filtering (date, size, search), sort modes, equal-mix shuffle - Forum sites management with stored cookies/auth - GridWall/GridCell components for responsive media grid - Media API with folder-access permissions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -7,9 +7,16 @@ const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (
|
||||
export function parseUserUrl(url) {
|
||||
const parsed = new URL(url);
|
||||
const base = `${parsed.protocol}//${parsed.hostname}`;
|
||||
|
||||
// Search URL: /posts?q=query
|
||||
if (parsed.pathname === '/posts' && parsed.searchParams.get('q')) {
|
||||
return { base, mode: 'search', query: parsed.searchParams.get('q') };
|
||||
}
|
||||
|
||||
// User URL: /SERVICE/user/USER_ID
|
||||
const m = parsed.pathname.match(/^\/([^/]+)\/user\/([^/?#]+)/);
|
||||
if (!m) throw new Error(`Can't parse URL. Expected: https://coomer.su/SERVICE/user/USER_ID`);
|
||||
return { base, service: m[1], userId: m[2] };
|
||||
if (!m) throw new Error(`Can't parse URL. Expected: https://coomer.su/SERVICE/user/USER_ID or https://coomer.su/posts?q=QUERY`);
|
||||
return { base, mode: 'user', service: m[1], userId: m[2] };
|
||||
}
|
||||
|
||||
async function fetchApi(apiUrl, logFn, retries = 3) {
|
||||
@@ -150,6 +157,45 @@ export async function fetchAllPosts(base, service, userId, maxPages, logFn, chec
|
||||
return allFiles;
|
||||
}
|
||||
|
||||
export async function fetchSearchPosts(base, query, maxPages, logFn, checkCancelled) {
|
||||
const allFiles = [];
|
||||
|
||||
for (let page = 0; page < maxPages; page++) {
|
||||
if (checkCancelled()) break;
|
||||
|
||||
const offset = page * 50;
|
||||
const apiUrl = `${base}/api/v1/posts?q=${encodeURIComponent(query)}&o=${offset}`;
|
||||
|
||||
let data;
|
||||
try {
|
||||
data = await fetchApi(apiUrl, logFn);
|
||||
} catch (err) {
|
||||
logFn(`API failed: ${err.message}`);
|
||||
break;
|
||||
}
|
||||
|
||||
// Search API returns { count, posts: [...] } not a plain array
|
||||
const posts = data?.posts || data;
|
||||
if (!posts || !Array.isArray(posts) || posts.length === 0) break;
|
||||
|
||||
const parsed = new URL(base);
|
||||
const cdnHost = `n1.${parsed.hostname}`;
|
||||
const cdnBase = `${parsed.protocol}//${cdnHost}/data`;
|
||||
|
||||
const files = collectFiles(posts, cdnBase);
|
||||
allFiles.push(...files);
|
||||
|
||||
if (page === 0 && data?.count) {
|
||||
logFn(`Search found ${data.count} total results`);
|
||||
}
|
||||
logFn(`Page ${page + 1}: ${posts.length} posts (${allFiles.length} files total)`);
|
||||
|
||||
if (posts.length < 50) break;
|
||||
}
|
||||
|
||||
return allFiles;
|
||||
}
|
||||
|
||||
export async function downloadFiles(files, outputDir, concurrency, logFn, progressFn, checkCancelled) {
|
||||
mkdirSync(outputDir, { recursive: true });
|
||||
|
||||
|
||||
+189
-44
@@ -1,13 +1,43 @@
|
||||
import * as cheerio from 'cheerio';
|
||||
import { createWriteStream, existsSync, mkdirSync, statSync } from 'fs';
|
||||
import { createWriteStream, existsSync, mkdirSync, statSync, writeFileSync } from 'fs';
|
||||
import { basename, join, extname } from 'path';
|
||||
import { pipeline } from 'stream/promises';
|
||||
import { execFile } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
import { upsertMediaFile } from '../db.js';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
||||
const SERVER_IP = '47.185.183.191';
|
||||
|
||||
export class CookieExpiredError extends Error {
|
||||
constructor(statusCode) {
|
||||
super(`Cookie expired or invalid (HTTP ${statusCode})`);
|
||||
this.name = 'CookieExpiredError';
|
||||
this.statusCode = statusCode;
|
||||
}
|
||||
}
|
||||
|
||||
// Replace DDoS-Guard __ddg9_ cookie IP with server's IP so cookies work from any browser
|
||||
function fixCookieIp(cookies) {
|
||||
if (!cookies) return cookies;
|
||||
return cookies.replace(/__ddg9_=[^;]+/, `__ddg9_=${SERVER_IP}`);
|
||||
}
|
||||
|
||||
const IMAGE_EXTS = new Set(['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff']);
|
||||
const SKIP_PATTERNS = ['avatar', 'smilie', 'emoji', 'icon', 'logo', 'button', 'sprite', 'badge', 'rank', 'star'];
|
||||
const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v', '.wmv', '.flv', '.ts']);
|
||||
const SKIP_PATTERNS = ['avatar', 'smilie', 'emoji', 'icon', 'logo', 'button', 'sprite', 'badge', 'rank', 'star', 'dc_thumbnails'];
|
||||
|
||||
// External hosts that gallery-dl can resolve
|
||||
const GALLERY_DL_HOSTS = [
|
||||
/saint\d*\.\w+/i,
|
||||
/cyberdrop\.\w+/i,
|
||||
/bunkr+\.\w+/i,
|
||||
/pixeldrain\.com/i,
|
||||
/gofile\.io/i,
|
||||
/turbo\.\w+/i,
|
||||
];
|
||||
|
||||
function isImageUrl(url) {
|
||||
try {
|
||||
@@ -16,26 +46,44 @@ function isImageUrl(url) {
|
||||
} catch { return false; }
|
||||
}
|
||||
|
||||
function isVideoUrl(url) {
|
||||
try {
|
||||
const path = new URL(url).pathname.toLowerCase();
|
||||
return [...VIDEO_EXTS].some(ext => path.endsWith(ext));
|
||||
} catch { return false; }
|
||||
}
|
||||
|
||||
function isMediaUrl(url) {
|
||||
return isImageUrl(url) || isVideoUrl(url);
|
||||
}
|
||||
|
||||
function isExternalHost(url) {
|
||||
try {
|
||||
const hostname = new URL(url).hostname.toLowerCase();
|
||||
return GALLERY_DL_HOSTS.some(p => p.test(hostname));
|
||||
} catch { return false; }
|
||||
}
|
||||
|
||||
export function getPageUrl(baseUrl, pageNum) {
|
||||
const url = baseUrl.replace(/page-\d+/, `page-${pageNum}`);
|
||||
return url.split('#')[0];
|
||||
}
|
||||
|
||||
export async function detectMaxPage(baseUrl, logFn) {
|
||||
export async function detectMaxPage(baseUrl, logFn, cookies) {
|
||||
try {
|
||||
const resp = await fetch(baseUrl, { headers: { 'User-Agent': UA }, signal: AbortSignal.timeout(15000) });
|
||||
const headers = { 'User-Agent': UA };
|
||||
if (cookies) headers['Cookie'] = fixCookieIp(cookies);
|
||||
const resp = await fetch(baseUrl, { headers, signal: AbortSignal.timeout(15000) });
|
||||
if (!resp.ok) return null;
|
||||
const html = await resp.text();
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
let maxPage = 1;
|
||||
// XenForo-style
|
||||
$('a.pageNav-page, .pageNav a[href*="page-"], .pagination a[href*="page-"]').each((_, el) => {
|
||||
const href = $(el).attr('href') || '';
|
||||
const m = href.match(/page-(\d+)/);
|
||||
if (m) maxPage = Math.max(maxPage, parseInt(m[1], 10));
|
||||
});
|
||||
// Generic pagination text
|
||||
$('a').each((_, el) => {
|
||||
const text = $(el).text().trim();
|
||||
if (/^\d+$/.test(text)) {
|
||||
@@ -58,6 +106,7 @@ export async function detectMaxPage(baseUrl, logFn) {
|
||||
function tryFullSizeUrl(thumbUrl) {
|
||||
const candidates = [];
|
||||
if (thumbUrl.includes('.th.')) candidates.push(thumbUrl.replace('.th.', '.'));
|
||||
if (thumbUrl.includes('.md.')) candidates.push(thumbUrl.replace('.md.', '.'));
|
||||
if (/_thumb\./i.test(thumbUrl)) candidates.push(thumbUrl.replace(/_thumb\./i, '.'));
|
||||
if (thumbUrl.includes('/thumbs/')) {
|
||||
candidates.push(thumbUrl.replace('/thumbs/', '/images/'));
|
||||
@@ -74,7 +123,7 @@ function tryFullSizeUrl(thumbUrl) {
|
||||
return candidates;
|
||||
}
|
||||
|
||||
async function downloadImage(url, outputDir, downloadedSet, logFn) {
|
||||
async function downloadImage(url, outputDir, downloadedSet, logFn, cookies) {
|
||||
if (downloadedSet.has(url)) return false;
|
||||
if (!isImageUrl(url)) return false;
|
||||
const lower = url.toLowerCase();
|
||||
@@ -83,47 +132,34 @@ async function downloadImage(url, outputDir, downloadedSet, logFn) {
|
||||
downloadedSet.add(url);
|
||||
|
||||
let filename;
|
||||
try {
|
||||
filename = basename(new URL(url).pathname);
|
||||
} catch { return false; }
|
||||
try { filename = basename(new URL(url).pathname); } catch { return false; }
|
||||
if (!filename) return false;
|
||||
filename = filename.replace('.th.', '.').replace('.md.', '.');
|
||||
|
||||
filename = filename.replace('.th.', '.');
|
||||
|
||||
let filepath = join(outputDir, filename);
|
||||
const filepath = join(outputDir, filename);
|
||||
if (existsSync(filepath)) {
|
||||
const ext = extname(filename);
|
||||
const name = filename.slice(0, -ext.length);
|
||||
let i = 1;
|
||||
while (existsSync(filepath)) {
|
||||
filepath = join(outputDir, `${name}_${i}${ext}`);
|
||||
i++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
const resp = await fetch(url, {
|
||||
headers: { 'User-Agent': UA },
|
||||
signal: AbortSignal.timeout(30000),
|
||||
});
|
||||
const dlHeaders = { 'User-Agent': UA };
|
||||
if (cookies) dlHeaders['Cookie'] = fixCookieIp(cookies);
|
||||
const resp = await fetch(url, { headers: dlHeaders, signal: AbortSignal.timeout(30000) });
|
||||
if (!resp.ok) {
|
||||
logFn(`FAILED (${resp.status}): ${url}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Read full body to check size
|
||||
const buf = Buffer.from(await resp.arrayBuffer());
|
||||
if (buf.length < 1000) {
|
||||
downloadedSet.delete(url);
|
||||
return false;
|
||||
}
|
||||
|
||||
const { writeFileSync } = await import('fs');
|
||||
writeFileSync(filepath, buf);
|
||||
|
||||
const savedName = basename(filepath);
|
||||
const folderName = basename(outputDir);
|
||||
try { upsertMediaFile(folderName, savedName, 'image', buf.length, Date.now(), null); } catch { /* ignore */ }
|
||||
try { upsertMediaFile(folderName, savedName, 'image', buf.length, Date.now(), null); } catch {}
|
||||
|
||||
const sizeKb = (buf.length / 1024).toFixed(1);
|
||||
logFn(`Downloaded: ${savedName} (${sizeKb} KB)`);
|
||||
@@ -134,28 +170,101 @@ async function downloadImage(url, outputDir, downloadedSet, logFn) {
|
||||
}
|
||||
}
|
||||
|
||||
export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn) {
|
||||
// Use gallery-dl to download from external hosts (bunkr, saint, cyberdrop, etc.)
|
||||
async function downloadFromExternalHost(url, outputDir, downloadedSet, logFn) {
|
||||
if (downloadedSet.has(url)) return 0;
|
||||
downloadedSet.add(url);
|
||||
|
||||
logFn(`Resolving via gallery-dl: ${url}`);
|
||||
|
||||
try {
|
||||
const args = [
|
||||
'-d', outputDir,
|
||||
'--filename', '{filename}.{extension}',
|
||||
'--no-mtime',
|
||||
'-o', 'directory=[]',
|
||||
url,
|
||||
];
|
||||
|
||||
const { stdout, stderr } = await execFileAsync('gallery-dl', args, {
|
||||
timeout: 300000, // 5 min per external link
|
||||
maxBuffer: 10 * 1024 * 1024,
|
||||
});
|
||||
|
||||
let count = 0;
|
||||
const lines = (stdout + '\n' + stderr).split('\n').filter(Boolean);
|
||||
for (const line of lines) {
|
||||
// gallery-dl outputs file paths for downloaded files
|
||||
const trimmed = line.trim();
|
||||
if (trimmed.startsWith(outputDir) || trimmed.startsWith('/')) {
|
||||
const filePath = trimmed.replace(/^# /, '');
|
||||
if (existsSync(filePath)) {
|
||||
const stat = statSync(filePath);
|
||||
const savedName = basename(filePath);
|
||||
const folderName = basename(outputDir);
|
||||
const ext = extname(savedName).toLowerCase();
|
||||
const type = VIDEO_EXTS.has(ext) ? 'video' : 'image';
|
||||
const sizeStr = type === 'video'
|
||||
? `${(stat.size / (1024 * 1024)).toFixed(1)} MB`
|
||||
: `${(stat.size / 1024).toFixed(1)} KB`;
|
||||
|
||||
try { upsertMediaFile(folderName, savedName, type, stat.size, Date.now(), null); } catch {}
|
||||
logFn(`Downloaded: ${savedName} (${sizeStr}) [${type}]`);
|
||||
count++;
|
||||
}
|
||||
} else if (trimmed.includes('Downloading') || trimmed.includes('Skipping')) {
|
||||
logFn(` ${trimmed}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (count === 0) {
|
||||
// gallery-dl doesn't always output paths clearly, check stderr for errors
|
||||
const errLines = stderr ? stderr.split('\n').filter(l => l.trim()) : [];
|
||||
for (const line of errLines) {
|
||||
if (line.includes('ERROR') || line.includes('error')) {
|
||||
logFn(` gallery-dl: ${line.trim()}`);
|
||||
}
|
||||
}
|
||||
logFn(` gallery-dl finished but no files detected from output`);
|
||||
}
|
||||
|
||||
return count;
|
||||
} catch (err) {
|
||||
if (err.stderr) {
|
||||
const errMsg = err.stderr.split('\n').find(l => l.includes('ERROR') || l.includes('error')) || err.stderr.slice(0, 200);
|
||||
logFn(`gallery-dl error: ${errMsg.trim()}`);
|
||||
} else {
|
||||
logFn(`gallery-dl error: ${err.message}`);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn, cookies) {
|
||||
logFn(`Fetching page: ${pageUrl}`);
|
||||
|
||||
let html;
|
||||
try {
|
||||
const resp = await fetch(pageUrl, {
|
||||
headers: { 'User-Agent': UA },
|
||||
signal: AbortSignal.timeout(15000),
|
||||
});
|
||||
const headers = { 'User-Agent': UA };
|
||||
if (cookies) headers['Cookie'] = fixCookieIp(cookies);
|
||||
const resp = await fetch(pageUrl, { headers, signal: AbortSignal.timeout(15000) });
|
||||
if (!resp.ok) {
|
||||
// SimpCity returns 404 for expired sessions, 403 for blocked
|
||||
if (cookies && (resp.status === 404 || resp.status === 403)) {
|
||||
throw new CookieExpiredError(resp.status);
|
||||
}
|
||||
logFn(`Failed to fetch page (${resp.status})`);
|
||||
return 0;
|
||||
}
|
||||
html = await resp.text();
|
||||
} catch (err) {
|
||||
if (err instanceof CookieExpiredError) throw err;
|
||||
logFn(`Failed to fetch page: ${err.message}`);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
// Try known content selectors, fall back to whole page
|
||||
const selectors = '.message-body, .post-body, .post_body, .postcontent, .messageContent, .bbWrapper, article, .entry-content, .post_message, .post-content, #posts, .threadBody';
|
||||
let contentAreas = $(selectors).toArray();
|
||||
if (contentAreas.length === 0) {
|
||||
@@ -163,6 +272,7 @@ export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn)
|
||||
}
|
||||
|
||||
const imageUrls = [];
|
||||
const externalUrls = new Set();
|
||||
|
||||
for (const area of contentAreas) {
|
||||
const $area = $(area);
|
||||
@@ -176,7 +286,6 @@ export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn)
|
||||
let absSrc;
|
||||
try { absSrc = new URL(src, pageUrl).href; } catch { return; }
|
||||
|
||||
// Check parent <a> for direct image link
|
||||
const $parentA = $img.closest('a');
|
||||
if ($parentA.length && $parentA.attr('href')) {
|
||||
try {
|
||||
@@ -188,7 +297,6 @@ export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn)
|
||||
} catch {}
|
||||
}
|
||||
|
||||
// Try to derive full-size from thumbnail URL
|
||||
const fullCandidates = tryFullSizeUrl(absSrc);
|
||||
if (fullCandidates.length > 0) {
|
||||
imageUrls.push(...fullCandidates);
|
||||
@@ -196,7 +304,6 @@ export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn)
|
||||
imageUrls.push(absSrc);
|
||||
}
|
||||
|
||||
// Also check data attributes
|
||||
for (const attr of ['data-src', 'data-url', 'data-orig', 'data-original', 'data-full-url', 'data-zoom-src']) {
|
||||
const val = $img.attr(attr);
|
||||
if (val && val !== src) {
|
||||
@@ -205,26 +312,64 @@ export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn)
|
||||
}
|
||||
});
|
||||
|
||||
// Pass 2: <a href> pointing directly to images (no child <img>)
|
||||
// Pass 2: <a href> links — images + external hosts
|
||||
$area.find('a[href]').each((_, el) => {
|
||||
const $a = $(el);
|
||||
if ($a.find('img').length) return;
|
||||
let href;
|
||||
try { href = new URL($a.attr('href'), pageUrl).href; } catch { return; }
|
||||
|
||||
// Skip same-forum links
|
||||
try {
|
||||
const href = new URL($a.attr('href'), pageUrl).href;
|
||||
if (isImageUrl(href)) imageUrls.push(href);
|
||||
if (new URL(href).hostname === new URL(pageUrl).hostname) return;
|
||||
} catch {}
|
||||
|
||||
// Direct image link (without child img — those are handled in Pass 1)
|
||||
if (isImageUrl(href) && $a.find('img').length === 0) {
|
||||
imageUrls.push(href);
|
||||
return;
|
||||
}
|
||||
|
||||
// Direct video link
|
||||
if (isVideoUrl(href)) {
|
||||
externalUrls.add(href);
|
||||
return;
|
||||
}
|
||||
|
||||
// External file host (bunkr, saint, cyberdrop, etc.)
|
||||
if (isExternalHost(href)) {
|
||||
externalUrls.add(href);
|
||||
}
|
||||
});
|
||||
|
||||
// Pass 3: iframe embeds
|
||||
$area.find('iframe[src]').each((_, el) => {
|
||||
const src = $(el).attr('src');
|
||||
if (src) {
|
||||
try {
|
||||
const absUrl = new URL(src, pageUrl).href;
|
||||
if (isExternalHost(absUrl)) externalUrls.add(absUrl);
|
||||
} catch {}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
logFn(`Found ${imageUrls.length} candidate URLs`);
|
||||
logFn(`Found ${imageUrls.length} images, ${externalUrls.size} external links`);
|
||||
|
||||
let count = 0;
|
||||
|
||||
// Download images
|
||||
for (const imgUrl of imageUrls) {
|
||||
if (await downloadImage(imgUrl, outputDir, downloadedSet, logFn)) {
|
||||
if (await downloadImage(imgUrl, outputDir, downloadedSet, logFn, cookies)) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
logFn(`${count} images from this page`);
|
||||
// Download from external hosts via gallery-dl
|
||||
for (const extUrl of externalUrls) {
|
||||
const dlCount = await downloadFromExternalHost(extUrl, outputDir, downloadedSet, logFn);
|
||||
count += dlCount;
|
||||
}
|
||||
|
||||
logFn(`${count} files from this page`);
|
||||
return count;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,191 @@
|
||||
import { existsSync, writeFileSync, mkdirSync } from 'fs';
|
||||
import { basename, join, extname } from 'path';
|
||||
import { upsertMediaFile } from '../db.js';
|
||||
|
||||
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
||||
const API_BASE = 'https://api.leakgallery.com';
|
||||
const CDN_BASE = 'https://cdn.leakgallery.com';
|
||||
const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v']);
|
||||
|
||||
export function parseLeakGalleryUrl(url) {
|
||||
const parsed = new URL(url);
|
||||
if (!parsed.hostname.includes('leakgallery.com')) {
|
||||
throw new Error('Not a leakgallery.com URL');
|
||||
}
|
||||
// URL format: https://leakgallery.com/{username}
|
||||
const m = parsed.pathname.match(/^\/([a-zA-Z0-9_.-]+)\/?$/);
|
||||
if (!m) throw new Error('Expected URL format: https://leakgallery.com/username');
|
||||
return { username: m[1] };
|
||||
}
|
||||
|
||||
async function fetchPage(username, page, logFn) {
|
||||
// Page 1: /profile/{username}?type=All&sort=MostRecent
|
||||
// Page 2+: /profile/{username}/{page}?type=All&sort=MostRecent
|
||||
const pagePath = page <= 1 ? '' : `/${page}`;
|
||||
const apiUrl = `${API_BASE}/profile/${username}${pagePath}?type=All&sort=MostRecent`;
|
||||
|
||||
try {
|
||||
const resp = await fetch(apiUrl, {
|
||||
headers: {
|
||||
'User-Agent': UA,
|
||||
'Accept': 'application/json',
|
||||
'Origin': 'https://leakgallery.com',
|
||||
'Referer': 'https://leakgallery.com/',
|
||||
},
|
||||
signal: AbortSignal.timeout(15000),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
if (resp.status === 404) return null;
|
||||
logFn(`API error (${resp.status}): ${apiUrl}`);
|
||||
return null;
|
||||
}
|
||||
return await resp.json();
|
||||
} catch (err) {
|
||||
logFn(`API fetch error: ${err.message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function fetchAllMedia(username, maxPages, delay, logFn, checkCancelled) {
|
||||
const allItems = [];
|
||||
const seen = new Set();
|
||||
let totalCount = 0;
|
||||
|
||||
for (let page = 1; page <= maxPages; page++) {
|
||||
if (checkCancelled()) break;
|
||||
|
||||
logFn(`Fetching page ${page}...`);
|
||||
const data = await fetchPage(username, page, logFn);
|
||||
|
||||
if (!data) {
|
||||
logFn(`Page ${page}: no data — stopping`);
|
||||
break;
|
||||
}
|
||||
|
||||
// First page includes mediaCount
|
||||
if (page === 1 && data.mediaCount) {
|
||||
totalCount = data.mediaCount;
|
||||
logFn(`Profile has ${totalCount} total media items`);
|
||||
}
|
||||
|
||||
const medias = data.medias;
|
||||
if (!medias || !Array.isArray(medias) || medias.length === 0) {
|
||||
logFn(`Page ${page}: no more items — done`);
|
||||
break;
|
||||
}
|
||||
|
||||
let newCount = 0;
|
||||
for (const item of medias) {
|
||||
if (seen.has(item.id)) continue;
|
||||
seen.add(item.id);
|
||||
newCount++;
|
||||
|
||||
// file_path is relative, e.g. content4/username/watermark_hash__username__id_580px.webp
|
||||
// Full-size: remove _580px.webp suffix, use .jpg (or .mp4 for videos)
|
||||
const isVideo = !!item.is_video;
|
||||
let fullUrl;
|
||||
let filename;
|
||||
|
||||
if (isVideo) {
|
||||
// Videos: file_path is already the video file
|
||||
fullUrl = `${CDN_BASE}/${item.file_path}`;
|
||||
filename = basename(item.file_path);
|
||||
} else {
|
||||
// Images: thumbnail has _580px.webp — convert to full-size .jpg
|
||||
const filePath = item.file_path || item.thumbnail_path || '';
|
||||
const fullPath = filePath
|
||||
.replace(/_580px\.webp$/, '.jpg')
|
||||
.replace(/_300px\.webp$/, '.jpg');
|
||||
fullUrl = `${CDN_BASE}/${fullPath}`;
|
||||
filename = basename(fullPath);
|
||||
}
|
||||
|
||||
allItems.push({
|
||||
id: item.id,
|
||||
url: fullUrl,
|
||||
filename,
|
||||
type: isVideo ? 'video' : 'image',
|
||||
});
|
||||
}
|
||||
|
||||
if (newCount === 0) {
|
||||
logFn(`Page ${page}: all duplicates — stopping`);
|
||||
break;
|
||||
}
|
||||
|
||||
logFn(`Page ${page}: ${medias.length} items (${newCount} new, ${allItems.length} total)`);
|
||||
|
||||
if (page < maxPages && !checkCancelled()) {
|
||||
await new Promise(r => setTimeout(r, delay));
|
||||
}
|
||||
}
|
||||
|
||||
return allItems;
|
||||
}
|
||||
|
||||
async function tryFetch(url) {
|
||||
try {
|
||||
const resp = await fetch(url, {
|
||||
headers: {
|
||||
'User-Agent': UA,
|
||||
'Referer': 'https://leakgallery.com/',
|
||||
},
|
||||
signal: AbortSignal.timeout(60000),
|
||||
});
|
||||
if (!resp.ok) return null;
|
||||
const buf = Buffer.from(await resp.arrayBuffer());
|
||||
if (buf.length < 500) return null;
|
||||
return buf;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function downloadMedia(items, outputDir, workers, logFn, progressFn, checkCancelled) {
|
||||
mkdirSync(outputDir, { recursive: true });
|
||||
|
||||
let completed = 0;
|
||||
let errors = 0;
|
||||
let skipped = 0;
|
||||
let index = 0;
|
||||
|
||||
async function processNext() {
|
||||
while (index < items.length) {
|
||||
if (checkCancelled()) return;
|
||||
|
||||
const current = index++;
|
||||
const item = items[current];
|
||||
const filename = item.filename || `${item.id}.${item.type === 'video' ? 'mp4' : 'jpg'}`;
|
||||
const filepath = join(outputDir, filename);
|
||||
|
||||
if (existsSync(filepath)) {
|
||||
skipped++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
continue;
|
||||
}
|
||||
|
||||
const buf = await tryFetch(item.url);
|
||||
if (buf) {
|
||||
writeFileSync(filepath, buf);
|
||||
const folderName = basename(outputDir);
|
||||
const fileType = VIDEO_EXTS.has(extname(filename).toLowerCase()) ? 'video' : 'image';
|
||||
try { upsertMediaFile(folderName, filename, fileType, buf.length, Date.now(), null); } catch {}
|
||||
completed++;
|
||||
logFn(`[${completed}/${items.length}] ${filename} (${(buf.length / 1024).toFixed(1)} KB)`);
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
} else {
|
||||
logFn(`FAILED: ${filename}`);
|
||||
errors++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const workerPromises = [];
|
||||
for (let i = 0; i < Math.min(workers, items.length); i++) {
|
||||
workerPromises.push(processNext());
|
||||
}
|
||||
await Promise.all(workerPromises);
|
||||
|
||||
return { completed, errors, skipped, total: items.length };
|
||||
}
|
||||
+222
-59
@@ -1,6 +1,7 @@
|
||||
import { existsSync, writeFileSync, mkdirSync } from 'fs';
|
||||
import { existsSync, writeFileSync, mkdirSync, unlinkSync } from 'fs';
|
||||
import { basename, join, extname } from 'path';
|
||||
import { upsertMediaFile } from '../db.js';
|
||||
import { load as cheerioLoad } from 'cheerio';
|
||||
import { upsertMediaFile, removeMediaFile } from '../db.js';
|
||||
|
||||
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
||||
|
||||
@@ -9,10 +10,13 @@ const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v']);
|
||||
export function parseMediaUrl(url) {
|
||||
const parsed = new URL(url);
|
||||
const base = `${parsed.protocol}//${parsed.hostname}`;
|
||||
// Support /model/{id} or /media/{id}
|
||||
// Support /model/{id} or /media/{id} (fapello.to JSON API)
|
||||
const m = parsed.pathname.match(/\/(?:model|media)\/(\d+)/);
|
||||
if (!m) throw new Error(`Can't parse URL. Expected: https://fapello.to/model/12345`);
|
||||
return { base, userId: m[1] };
|
||||
if (m) return { base, userId: m[1], mode: 'api' };
|
||||
// Support fapello.com profile slug URLs like /josie-hamming-41/
|
||||
const slugMatch = parsed.pathname.match(/^\/([a-zA-Z0-9_-]+)\/?$/);
|
||||
if (slugMatch) return { base, userId: slugMatch[1], mode: 'html' };
|
||||
throw new Error(`Can't parse URL. Expected: https://fapello.to/model/12345 or https://fapello.com/username/`);
|
||||
}
|
||||
|
||||
// Fetch JSON from the API endpoint
|
||||
@@ -73,6 +77,7 @@ export async function fetchAllMedia(base, userId, maxPages, delay, logFn, checkC
|
||||
allItems.push({
|
||||
id: item.id,
|
||||
url: fullUrl,
|
||||
thumbUrl: item.newUrlThumb || null,
|
||||
type: isVideo ? 'video' : 'image',
|
||||
});
|
||||
}
|
||||
@@ -92,13 +97,171 @@ export async function fetchAllMedia(base, userId, maxPages, delay, logFn, checkC
|
||||
return allItems;
|
||||
}
|
||||
|
||||
// --- HTML-based scraping (fapello.com profile pages) ---
|
||||
|
||||
function parseMediaFromHtml(html, base) {
|
||||
const $ = cheerioLoad(html);
|
||||
const items = [];
|
||||
|
||||
// Find all image thumbnails in the grid
|
||||
$('img[src*="_300px."]').each((_, el) => {
|
||||
const thumbUrl = $(el).attr('src');
|
||||
if (!thumbUrl) return;
|
||||
// Convert thumbnail to full-size: remove _300px
|
||||
const fullUrl = thumbUrl.replace(/_300px\./, '.');
|
||||
const absUrl = fullUrl.startsWith('http') ? fullUrl : `${base}${fullUrl}`;
|
||||
items.push({ url: absUrl, type: 'image' });
|
||||
});
|
||||
|
||||
// Find video elements (source tags with .mp4)
|
||||
$('video source[src*=".mp4"], video[src*=".mp4"]').each((_, el) => {
|
||||
const src = $(el).attr('src');
|
||||
if (!src) return;
|
||||
const absUrl = src.startsWith('http') ? src : `${base}${src}`;
|
||||
items.push({ url: absUrl, type: 'video' });
|
||||
});
|
||||
|
||||
return items;
|
||||
}
|
||||
|
||||
export async function fetchAllMediaFromHtml(base, slug, maxPages, delay, logFn, checkCancelled) {
|
||||
const allItems = [];
|
||||
const seen = new Set();
|
||||
let totalPages = maxPages;
|
||||
|
||||
// Phase 1: Fetch initial profile page to get data-max
|
||||
logFn(`Fetching profile page: ${base}/${slug}/`);
|
||||
try {
|
||||
const resp = await fetch(`${base}/${slug}/`, {
|
||||
headers: { 'User-Agent': UA },
|
||||
signal: AbortSignal.timeout(15000),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
logFn(`Profile page error (${resp.status})`);
|
||||
return allItems;
|
||||
}
|
||||
const html = await resp.text();
|
||||
const $ = cheerioLoad(html);
|
||||
|
||||
// Get max pages from data-max attribute
|
||||
const dataMax = $('#showmore').attr('data-max');
|
||||
if (dataMax) {
|
||||
totalPages = Math.min(parseInt(dataMax, 10) || maxPages, maxPages);
|
||||
logFn(`Detected ${totalPages} pages`);
|
||||
}
|
||||
|
||||
// Parse initial page content
|
||||
const initialItems = parseMediaFromHtml(html, base);
|
||||
for (const item of initialItems) {
|
||||
if (!seen.has(item.url)) {
|
||||
seen.add(item.url);
|
||||
allItems.push({ ...item, id: seen.size });
|
||||
}
|
||||
}
|
||||
logFn(`Page 1: ${initialItems.length} items (${allItems.length} total)`);
|
||||
} catch (err) {
|
||||
logFn(`Error fetching profile: ${err.message}`);
|
||||
return allItems;
|
||||
}
|
||||
|
||||
// Phase 2: Paginate through AJAX pages
|
||||
for (let page = 2; page <= totalPages; page++) {
|
||||
if (checkCancelled()) break;
|
||||
|
||||
const ajaxUrl = `${base}/ajax/model/${slug}/page-${page}/`;
|
||||
try {
|
||||
const resp = await fetch(ajaxUrl, {
|
||||
headers: {
|
||||
'User-Agent': UA,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': `${base}/${slug}/`,
|
||||
},
|
||||
signal: AbortSignal.timeout(15000),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
if (resp.status === 404) {
|
||||
logFn(`Page ${page}: 404 — done`);
|
||||
break;
|
||||
}
|
||||
logFn(`Page ${page}: error (${resp.status})`);
|
||||
continue;
|
||||
}
|
||||
const html = await resp.text();
|
||||
if (!html || html.trim().length === 0) {
|
||||
logFn(`Page ${page}: empty — done`);
|
||||
break;
|
||||
}
|
||||
|
||||
const pageItems = parseMediaFromHtml(html, base);
|
||||
let newCount = 0;
|
||||
for (const item of pageItems) {
|
||||
if (!seen.has(item.url)) {
|
||||
seen.add(item.url);
|
||||
allItems.push({ ...item, id: seen.size });
|
||||
newCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (newCount === 0) {
|
||||
logFn(`Page ${page}: all duplicates — stopping`);
|
||||
break;
|
||||
}
|
||||
|
||||
logFn(`Page ${page}: ${pageItems.length} items (${newCount} new, ${allItems.length} total)`);
|
||||
} catch (err) {
|
||||
logFn(`Page ${page}: error — ${err.message}`);
|
||||
}
|
||||
|
||||
if (page < totalPages && !checkCancelled()) {
|
||||
await new Promise(r => setTimeout(r, delay));
|
||||
}
|
||||
}
|
||||
|
||||
return allItems;
|
||||
}
|
||||
|
||||
// Helper: derive filename from URL, with fallback
|
||||
function filenameFromUrl(url, item) {
|
||||
try {
|
||||
const name = basename(new URL(url).pathname);
|
||||
if (name && name !== '/') return name;
|
||||
} catch {}
|
||||
return `${item.id}.${item.type === 'video' ? 'mp4' : 'jpg'}`;
|
||||
}
|
||||
|
||||
// Helper: add _md suffix before extension
|
||||
function mdFilename(filename) {
|
||||
const ext = extname(filename);
|
||||
return filename.slice(0, -ext.length) + '_md' + ext;
|
||||
}
|
||||
|
||||
// Helper: try fetching a URL, return buffer or null
|
||||
async function tryFetch(url, referer) {
|
||||
if (!url) return null;
|
||||
try {
|
||||
const resp = await fetch(url, {
|
||||
headers: { 'User-Agent': UA, 'Referer': referer || 'https://fapello.to/' },
|
||||
signal: AbortSignal.timeout(60000),
|
||||
});
|
||||
if (!resp.ok) return null;
|
||||
const buf = Buffer.from(await resp.arrayBuffer());
|
||||
if (buf.length < 500) return null;
|
||||
return buf;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Download all collected media items with concurrency
|
||||
export async function downloadMedia(items, outputDir, workers, logFn, progressFn, checkCancelled) {
|
||||
// Fallback: if full-res URL fails, download medium (thumbUrl) with _md suffix.
|
||||
// Upgrade: if _md file exists, try full-res again; replace _md on success.
|
||||
export async function downloadMedia(items, outputDir, workers, logFn, progressFn, checkCancelled, referer) {
|
||||
mkdirSync(outputDir, { recursive: true });
|
||||
|
||||
let completed = 0;
|
||||
let errors = 0;
|
||||
let skipped = 0;
|
||||
let upgraded = 0;
|
||||
let index = 0;
|
||||
|
||||
async function processNext() {
|
||||
@@ -108,72 +271,71 @@ export async function downloadMedia(items, outputDir, workers, logFn, progressFn
|
||||
const current = index++;
|
||||
const item = items[current];
|
||||
|
||||
let filename;
|
||||
try {
|
||||
filename = basename(new URL(item.url).pathname);
|
||||
if (!filename || filename === '/') {
|
||||
filename = `${item.id}.${item.type === 'video' ? 'mp4' : 'jpg'}`;
|
||||
}
|
||||
} catch {
|
||||
filename = `${item.id}.${item.type === 'video' ? 'mp4' : 'jpg'}`;
|
||||
}
|
||||
const filename = filenameFromUrl(item.url, item);
|
||||
const filepath = join(outputDir, filename);
|
||||
const mdName = mdFilename(filename);
|
||||
const mdPath = join(outputDir, mdName);
|
||||
|
||||
let filepath = join(outputDir, filename);
|
||||
// Full-res already exists — skip
|
||||
if (existsSync(filepath)) {
|
||||
skipped++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const resp = await fetch(item.url, {
|
||||
headers: {
|
||||
'User-Agent': UA,
|
||||
'Referer': 'https://fapello.to/',
|
||||
},
|
||||
signal: AbortSignal.timeout(60000),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
logFn(`FAILED (${resp.status}): ${filename}`);
|
||||
errors++;
|
||||
// Medium version exists — try to upgrade to full-res
|
||||
if (existsSync(mdPath)) {
|
||||
const buf = await tryFetch(item.url, referer);
|
||||
if (buf) {
|
||||
writeFileSync(filepath, buf);
|
||||
try { unlinkSync(mdPath); } catch {}
|
||||
const folderName = basename(outputDir);
|
||||
const fileType = VIDEO_EXTS.has(extname(filename).toLowerCase()) ? 'video' : 'image';
|
||||
try { removeMediaFile(folderName, mdName); } catch {}
|
||||
try { upsertMediaFile(folderName, filename, fileType, buf.length, Date.now(), null); } catch {}
|
||||
upgraded++;
|
||||
completed++;
|
||||
logFn(`[${completed}/${items.length}] ${filename} (upgraded from _md, ${(buf.length / 1024).toFixed(1)} KB)`);
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
continue;
|
||||
}
|
||||
|
||||
const buf = Buffer.from(await resp.arrayBuffer());
|
||||
if (buf.length < 500) {
|
||||
} else {
|
||||
skipped++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Neither exists — try full-res, then fallback to medium
|
||||
const buf = await tryFetch(item.url, referer);
|
||||
if (buf) {
|
||||
writeFileSync(filepath, buf);
|
||||
const folderName = basename(outputDir);
|
||||
const fileType = VIDEO_EXTS.has(extname(filename).toLowerCase()) ? 'video' : 'image';
|
||||
try { upsertMediaFile(folderName, filename, fileType, buf.length, Date.now(), null); } catch {}
|
||||
completed++;
|
||||
logFn(`[${completed}/${items.length}] ${filename} (${(buf.length / 1024).toFixed(1)} KB)`);
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Full-res failed — try medium (thumbUrl)
|
||||
if (item.thumbUrl) {
|
||||
const mdBuf = await tryFetch(item.thumbUrl, referer);
|
||||
if (mdBuf) {
|
||||
writeFileSync(mdPath, mdBuf);
|
||||
const folderName = basename(outputDir);
|
||||
const fileType = VIDEO_EXTS.has(extname(mdName).toLowerCase()) ? 'video' : 'image';
|
||||
try { upsertMediaFile(folderName, mdName, fileType, mdBuf.length, Date.now(), null); } catch {}
|
||||
completed++;
|
||||
logFn(`[${completed}/${items.length}] ${mdName} (medium fallback, ${(mdBuf.length / 1024).toFixed(1)} KB)`);
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle filename collision
|
||||
if (existsSync(filepath)) {
|
||||
const ext = extname(filename);
|
||||
const name = filename.slice(0, -ext.length);
|
||||
let i = 1;
|
||||
while (existsSync(filepath)) {
|
||||
filepath = join(outputDir, `${name}_${i}${ext}`);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
writeFileSync(filepath, buf);
|
||||
const savedName = basename(filepath);
|
||||
const folderName = basename(outputDir);
|
||||
const fileExt = extname(savedName).toLowerCase();
|
||||
const fileType = VIDEO_EXTS.has(fileExt) ? 'video' : 'image';
|
||||
try { upsertMediaFile(folderName, savedName, fileType, buf.length, Date.now(), null); } catch {}
|
||||
|
||||
completed++;
|
||||
const sizeKb = (buf.length / 1024).toFixed(1);
|
||||
logFn(`[${completed}/${items.length}] ${savedName} (${sizeKb} KB)`);
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
} catch (err) {
|
||||
logFn(`FAILED: ${filename} - ${err.message}`);
|
||||
errors++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
}
|
||||
|
||||
// Both failed
|
||||
logFn(`FAILED: ${filename} — full-res and medium both unavailable`);
|
||||
errors++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -183,5 +345,6 @@ export async function downloadMedia(items, outputDir, workers, logFn, progressFn
|
||||
}
|
||||
await Promise.all(workerPromises);
|
||||
|
||||
if (upgraded > 0) logFn(`Upgraded ${upgraded} files from medium to full resolution`);
|
||||
return { completed, errors, skipped, total: items.length };
|
||||
}
|
||||
|
||||
@@ -0,0 +1,219 @@
|
||||
import { File } from 'megajs';
|
||||
import { existsSync, mkdirSync, statSync, unlinkSync } from 'fs';
|
||||
import { createWriteStream } from 'fs';
|
||||
import { basename, join, extname } from 'path';
|
||||
import { pipeline } from 'stream/promises';
|
||||
import { upsertMediaFile } from '../db.js';
|
||||
|
||||
const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v']);
|
||||
const IMAGE_EXTS = new Set(['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff']);
|
||||
|
||||
export function parseMegaUrl(url) {
|
||||
// Validate it's a mega.nz folder URL
|
||||
const parsed = new URL(url);
|
||||
if (!parsed.hostname.includes('mega.nz') && !parsed.hostname.includes('mega.co.nz')) {
|
||||
throw new Error('Not a mega.nz URL');
|
||||
}
|
||||
if (!parsed.pathname.includes('/folder/')) {
|
||||
throw new Error('Expected a mega.nz folder URL (e.g. https://mega.nz/folder/ABC#key)');
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
// Load shared folder and list all files recursively
|
||||
export async function listAllFiles(url, logFn) {
|
||||
logFn('Loading shared folder...');
|
||||
const folder = File.fromURL(url);
|
||||
await folder.loadAttributes();
|
||||
|
||||
const folderName = folder.name || 'mega_folder';
|
||||
logFn(`Folder: ${folderName}`);
|
||||
|
||||
// Recursively get all non-directory files
|
||||
const allFiles = folder.filter(f => !f.directory, true);
|
||||
logFn(`Found ${allFiles.length} files across all subfolders`);
|
||||
|
||||
// Build items with subfolder paths
|
||||
const items = [];
|
||||
for (const file of allFiles) {
|
||||
const ext = extname(file.name).toLowerCase();
|
||||
let type = 'other';
|
||||
if (IMAGE_EXTS.has(ext)) type = 'image';
|
||||
else if (VIDEO_EXTS.has(ext)) type = 'video';
|
||||
|
||||
// Build relative path from parent folders
|
||||
let subfolder = '';
|
||||
let parent = file.parent;
|
||||
const parts = [];
|
||||
while (parent && parent !== folder) {
|
||||
parts.unshift(parent.name);
|
||||
parent = parent.parent;
|
||||
}
|
||||
subfolder = parts.join('/');
|
||||
|
||||
items.push({
|
||||
file,
|
||||
name: file.name,
|
||||
size: file.size,
|
||||
type,
|
||||
subfolder,
|
||||
});
|
||||
}
|
||||
|
||||
return { folderName, items };
|
||||
}
|
||||
|
||||
// Parse bandwidth limit wait time from error message
|
||||
function parseBandwidthWait(errMsg) {
|
||||
const m = errMsg.match(/(\d+)\s*seconds?\s*until/i);
|
||||
if (m) return parseInt(m[1], 10);
|
||||
if (/bandwidth/i.test(errMsg)) return 3600; // default 1hr if can't parse
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Download all files with concurrency + bandwidth limit auto-retry
|
||||
export async function downloadMegaFiles(items, outputDir, workers, logFn, progressFn, checkCancelled, statusFn) {
|
||||
mkdirSync(outputDir, { recursive: true });
|
||||
|
||||
let completed = 0;
|
||||
let errors = 0;
|
||||
let skipped = 0;
|
||||
let index = 0;
|
||||
let bandwidthPaused = false;
|
||||
|
||||
async function processNext() {
|
||||
while (index < items.length) {
|
||||
if (checkCancelled()) return;
|
||||
|
||||
// If another worker hit the bandwidth limit, wait for it to clear
|
||||
if (bandwidthPaused) return;
|
||||
|
||||
const current = index++;
|
||||
const item = items[current];
|
||||
|
||||
// All files go to root output dir (flatten subfolders)
|
||||
const filepath = join(outputDir, item.name);
|
||||
|
||||
// Skip if file exists AND is non-empty (0-byte = failed partial download)
|
||||
if (existsSync(filepath)) {
|
||||
try {
|
||||
const st = statSync(filepath);
|
||||
if (st.size > 0) {
|
||||
skipped++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
continue;
|
||||
}
|
||||
// Remove 0-byte leftover from previous failed download
|
||||
unlinkSync(filepath);
|
||||
} catch {}
|
||||
}
|
||||
|
||||
try {
|
||||
const stream = item.file.download();
|
||||
await pipeline(stream, createWriteStream(filepath));
|
||||
|
||||
// Verify the file was actually written
|
||||
let actualSize = item.size;
|
||||
try { actualSize = statSync(filepath).size; } catch {}
|
||||
|
||||
const folderName = basename(outputDir);
|
||||
const ext = extname(item.name).toLowerCase();
|
||||
const fileType = VIDEO_EXTS.has(ext) ? 'video' : IMAGE_EXTS.has(ext) ? 'image' : 'other';
|
||||
try { upsertMediaFile(folderName, item.name, fileType, actualSize, Date.now(), null); } catch {}
|
||||
|
||||
completed++;
|
||||
const sizeMb = (item.size / (1024 * 1024)).toFixed(1);
|
||||
logFn(`[${completed}/${items.length}] ${item.subfolder ? item.subfolder + '/' : ''}${item.name} (${sizeMb} MB)`);
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
} catch (err) {
|
||||
// Clean up partial/empty file on any error
|
||||
try { unlinkSync(filepath); } catch {}
|
||||
|
||||
const waitSecs = parseBandwidthWait(err.message);
|
||||
if (waitSecs > 0) {
|
||||
// Bandwidth limit — put this item back and pause all workers
|
||||
index = current; // rewind so this file gets retried
|
||||
bandwidthPaused = true;
|
||||
const waitMins = Math.ceil(waitSecs / 60);
|
||||
const resumeAt = Date.now() + waitSecs * 1000;
|
||||
logFn(`Bandwidth limit reached — waiting ${waitMins} minutes for quota reset...`);
|
||||
if (statusFn) statusFn({ paused: true, resumeAt });
|
||||
await new Promise(r => setTimeout(r, waitSecs * 1000));
|
||||
if (checkCancelled()) return;
|
||||
if (statusFn) statusFn({ paused: false, resumeAt: null });
|
||||
logFn('Quota reset — resuming downloads...');
|
||||
bandwidthPaused = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
logFn(`FAILED: ${item.name} — ${err.message}`);
|
||||
errors++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const workerPromises = [];
|
||||
for (let i = 0; i < Math.min(workers, items.length); i++) {
|
||||
workerPromises.push(processNext());
|
||||
}
|
||||
await Promise.all(workerPromises);
|
||||
|
||||
// If we paused for bandwidth and there are remaining files, run single-threaded to finish
|
||||
while (index < items.length && !checkCancelled()) {
|
||||
const current = index++;
|
||||
const item = items[current];
|
||||
const filepath = join(outputDir, item.name);
|
||||
|
||||
if (existsSync(filepath)) {
|
||||
try {
|
||||
const st = statSync(filepath);
|
||||
if (st.size > 0) {
|
||||
skipped++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
continue;
|
||||
}
|
||||
unlinkSync(filepath);
|
||||
} catch {}
|
||||
}
|
||||
|
||||
try {
|
||||
const stream = item.file.download();
|
||||
await pipeline(stream, createWriteStream(filepath));
|
||||
|
||||
let actualSize = item.size;
|
||||
try { actualSize = statSync(filepath).size; } catch {}
|
||||
|
||||
const folderName = basename(outputDir);
|
||||
const ext = extname(item.name).toLowerCase();
|
||||
const fileType = VIDEO_EXTS.has(ext) ? 'video' : IMAGE_EXTS.has(ext) ? 'image' : 'other';
|
||||
try { upsertMediaFile(folderName, item.name, fileType, actualSize, Date.now(), null); } catch {}
|
||||
|
||||
completed++;
|
||||
const sizeMb = (item.size / (1024 * 1024)).toFixed(1);
|
||||
logFn(`[${completed}/${items.length}] ${item.subfolder ? item.subfolder + '/' : ''}${item.name} (${sizeMb} MB)`);
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
} catch (err) {
|
||||
try { unlinkSync(filepath); } catch {}
|
||||
|
||||
const waitSecs = parseBandwidthWait(err.message);
|
||||
if (waitSecs > 0) {
|
||||
index = current;
|
||||
const waitMins = Math.ceil(waitSecs / 60);
|
||||
const resumeAt = Date.now() + waitSecs * 1000;
|
||||
logFn(`Bandwidth limit reached — waiting ${waitMins} minutes...`);
|
||||
if (statusFn) statusFn({ paused: true, resumeAt });
|
||||
await new Promise(r => setTimeout(r, waitSecs * 1000));
|
||||
if (checkCancelled()) break;
|
||||
if (statusFn) statusFn({ paused: false, resumeAt: null });
|
||||
logFn('Quota reset — resuming...');
|
||||
continue;
|
||||
}
|
||||
logFn(`FAILED: ${item.name} — ${err.message}`);
|
||||
errors++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
}
|
||||
}
|
||||
|
||||
return { completed, errors, skipped, total: items.length };
|
||||
}
|
||||
@@ -0,0 +1,300 @@
|
||||
import { spawn } from 'child_process';
|
||||
import { basename, extname, join } from 'path';
|
||||
import { existsSync, statSync, readdirSync } from 'fs';
|
||||
import { execFile } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
import { insertVideo, getVideoByPath } from '../db.js';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
const VIDEOS_PATH = process.env.VIDEOS_PATH || '/data/videos';
|
||||
const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v', '.wmv', '.flv', '.ts']);
|
||||
|
||||
// Quality presets mapped to yt-dlp format strings
|
||||
const QUALITY_PRESETS = {
|
||||
best: 'bestvideo+bestaudio/best',
|
||||
'2160p': 'bestvideo[height<=2160]+bestaudio/best[height<=2160]',
|
||||
'1080p': 'bestvideo[height<=1080]+bestaudio/best[height<=1080]',
|
||||
'720p': 'bestvideo[height<=720]+bestaudio/best[height<=720]',
|
||||
'480p': 'bestvideo[height<=480]+bestaudio/best[height<=480]',
|
||||
audio: 'bestaudio/best',
|
||||
};
|
||||
|
||||
async function probeVideo(filePath) {
|
||||
const { stdout } = await execFileAsync('ffprobe', [
|
||||
'-v', 'error',
|
||||
'-show_entries', 'format=duration,bit_rate',
|
||||
'-show_entries', 'stream=codec_name,width,height,r_frame_rate,codec_type',
|
||||
'-of', 'json',
|
||||
filePath,
|
||||
], { timeout: 60000 });
|
||||
|
||||
const info = JSON.parse(stdout);
|
||||
const videoStream = info.streams?.find(s => s.codec_type === 'video');
|
||||
const audioStream = info.streams?.find(s => s.codec_type === 'audio');
|
||||
const duration = parseFloat(info.format?.duration || '0');
|
||||
const bitrate = parseInt(info.format?.bit_rate || '0', 10);
|
||||
|
||||
let fps = null;
|
||||
if (videoStream?.r_frame_rate) {
|
||||
const [num, den] = videoStream.r_frame_rate.split('/');
|
||||
if (den && parseInt(den, 10) > 0) {
|
||||
fps = Math.round((parseInt(num, 10) / parseInt(den, 10)) * 100) / 100;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
duration: duration || null,
|
||||
width: videoStream?.width || null,
|
||||
height: videoStream?.height || null,
|
||||
fps,
|
||||
codec: videoStream?.codec_name || null,
|
||||
bitrate: bitrate || null,
|
||||
has_audio: audioStream ? 1 : 0,
|
||||
};
|
||||
}
|
||||
|
||||
async function generateThumbnail(filePath) {
|
||||
const thumbDir = join(VIDEOS_PATH, '.thumbnails');
|
||||
const filename = basename(filePath);
|
||||
const thumbName = `${Date.now()}_${filename.replace(/\.[^.]+$/, '.jpg')}`;
|
||||
const thumbPath = join(thumbDir, thumbName);
|
||||
|
||||
let duration = 0;
|
||||
try {
|
||||
const { stdout } = await execFileAsync('ffprobe', [
|
||||
'-v', 'error', '-show_entries', 'format=duration', '-of', 'csv=p=0', filePath,
|
||||
], { timeout: 15000 });
|
||||
duration = parseFloat(stdout.trim()) || 0;
|
||||
} catch { /* ignore */ }
|
||||
|
||||
const seekTime = duration > 2 ? '1' : '0';
|
||||
|
||||
await execFileAsync('ffmpeg', [
|
||||
'-ss', seekTime, '-i', filePath,
|
||||
'-frames:v', '1', '-vf', 'scale=480:-1', '-q:v', '4', '-y', '-update', '1',
|
||||
thumbPath,
|
||||
], { timeout: 30000 });
|
||||
|
||||
return thumbPath;
|
||||
}
|
||||
|
||||
// Register a downloaded video file into the videos DB table
|
||||
async function registerVideo(filePath, log) {
|
||||
try {
|
||||
if (getVideoByPath(filePath)) {
|
||||
log(`Already indexed: ${basename(filePath)}`);
|
||||
return;
|
||||
}
|
||||
|
||||
const stat = statSync(filePath);
|
||||
const filename = basename(filePath);
|
||||
|
||||
let probe;
|
||||
try {
|
||||
probe = await probeVideo(filePath);
|
||||
} catch (err) {
|
||||
log(`Probe failed for ${filename}: ${err.message}`);
|
||||
return;
|
||||
}
|
||||
|
||||
let thumbPath = null;
|
||||
try {
|
||||
thumbPath = await generateThumbnail(filePath);
|
||||
} catch { /* ignore */ }
|
||||
|
||||
const title = basename(filename, extname(filename))
|
||||
.replace(/[_.-]/g, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
|
||||
insertVideo({
|
||||
title,
|
||||
filename,
|
||||
file_path: filePath,
|
||||
file_size: stat.size,
|
||||
...probe,
|
||||
thumbnail_path: thumbPath,
|
||||
status: 'ready',
|
||||
});
|
||||
|
||||
log(`Registered in library: ${title}`);
|
||||
} catch (err) {
|
||||
log(`Failed to register ${basename(filePath)}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Build yt-dlp arguments from config
|
||||
function buildArgs(config) {
|
||||
const { url, quality, customFormat, embedMetadata, embedThumbnail, embedSubs,
|
||||
writeSubs, subLangs, restrictFilenames, outputTemplate,
|
||||
playlist, maxDownloads, concurrentFragments, rateLimit,
|
||||
sponsorBlock, cookiesFile } = config;
|
||||
|
||||
const args = [];
|
||||
|
||||
// Format
|
||||
if (customFormat) {
|
||||
args.push('-f', customFormat);
|
||||
} else {
|
||||
args.push('-f', QUALITY_PRESETS[quality] || QUALITY_PRESETS.best);
|
||||
}
|
||||
|
||||
// Merge to mp4 when possible
|
||||
if (quality !== 'audio') {
|
||||
args.push('--merge-output-format', 'mp4');
|
||||
} else {
|
||||
args.push('-x', '--audio-format', 'mp3');
|
||||
}
|
||||
|
||||
// Embed options
|
||||
if (embedMetadata) args.push('--embed-metadata');
|
||||
if (embedThumbnail) args.push('--embed-thumbnail');
|
||||
if (embedSubs) args.push('--embed-subs');
|
||||
if (writeSubs) args.push('--write-subs');
|
||||
if (subLangs) args.push('--sub-langs', subLangs);
|
||||
|
||||
// Filename
|
||||
if (restrictFilenames) args.push('--restrict-filenames');
|
||||
args.push('-o', join(VIDEOS_PATH, outputTemplate || '%(title)s.%(ext)s'));
|
||||
|
||||
// Playlist
|
||||
if (playlist) {
|
||||
args.push('--yes-playlist');
|
||||
if (maxDownloads) args.push('--max-downloads', String(maxDownloads));
|
||||
} else {
|
||||
args.push('--no-playlist');
|
||||
}
|
||||
|
||||
// Performance
|
||||
if (concurrentFragments && concurrentFragments > 1) {
|
||||
args.push('--concurrent-fragments', String(concurrentFragments));
|
||||
}
|
||||
if (rateLimit) args.push('--rate-limit', rateLimit);
|
||||
|
||||
// SponsorBlock
|
||||
if (sponsorBlock === 'remove') args.push('--sponsorblock-remove', 'all');
|
||||
else if (sponsorBlock === 'mark') args.push('--sponsorblock-mark', 'all');
|
||||
|
||||
// Cookies
|
||||
if (cookiesFile) args.push('--cookies', cookiesFile);
|
||||
|
||||
// Progress & output
|
||||
args.push('--newline', '--no-colors', '--no-overwrites');
|
||||
// Print downloaded file paths
|
||||
args.push('--print', 'after_move:filepath');
|
||||
|
||||
args.push(url);
|
||||
return args;
|
||||
}
|
||||
|
||||
// Run yt-dlp download. Returns a promise. Progress/logs via callbacks.
|
||||
export function runYtdlp(config, log, onProgress, isCancelled) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const args = buildArgs(config);
|
||||
log(`yt-dlp ${args.join(' ')}`);
|
||||
|
||||
const proc = spawn('yt-dlp', args, {
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
|
||||
const downloadedFiles = [];
|
||||
let currentFile = '';
|
||||
let fileCount = 0;
|
||||
|
||||
proc.stdout.on('data', (data) => {
|
||||
const lines = data.toString().split('\n').filter(Boolean);
|
||||
for (const line of lines) {
|
||||
// yt-dlp --print after_move:filepath outputs the final file path on its own line
|
||||
// These lines don't start with [ and are absolute paths
|
||||
if (line.startsWith('/') && existsSync(line.trim())) {
|
||||
const filePath = line.trim();
|
||||
if (!downloadedFiles.includes(filePath)) {
|
||||
downloadedFiles.push(filePath);
|
||||
fileCount++;
|
||||
onProgress(fileCount, 0);
|
||||
log(`Downloaded: ${basename(filePath)}`);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse progress lines: [download] 45.2% of 250.00MiB at 5.00MiB/s ETA 00:25
|
||||
const progressMatch = line.match(/\[download\]\s+([\d.]+)%\s+of\s+~?([\d.]+\w+)\s+at\s+([\d.]+\w+\/s|Unknown)\s+ETA\s+(\S+)/);
|
||||
if (progressMatch) {
|
||||
const pct = parseFloat(progressMatch[1]);
|
||||
const size = progressMatch[2];
|
||||
const speed = progressMatch[3];
|
||||
const eta = progressMatch[4];
|
||||
log(`[download] ${pct.toFixed(1)}% of ${size} at ${speed} ETA ${eta}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Destination line: [download] Destination: filename.mp4
|
||||
const destMatch = line.match(/\[download\] Destination:\s+(.+)/);
|
||||
if (destMatch) {
|
||||
currentFile = basename(destMatch[1]);
|
||||
log(`Downloading: ${currentFile}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Already downloaded
|
||||
if (line.includes('has already been downloaded')) {
|
||||
log(line.trim());
|
||||
onProgress(fileCount, 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Log other yt-dlp output
|
||||
if (line.trim()) {
|
||||
log(line.trim());
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
proc.stderr.on('data', (data) => {
|
||||
const lines = data.toString().split('\n').filter(Boolean);
|
||||
for (const line of lines) {
|
||||
if (line.includes('WARNING:')) {
|
||||
log(`Warning: ${line.replace(/WARNING:\s*/, '')}`);
|
||||
} else if (line.includes('ERROR:')) {
|
||||
log(`ERROR: ${line.replace(/ERROR:\s*/, '')}`);
|
||||
onProgress(fileCount, 1);
|
||||
} else if (line.trim()) {
|
||||
log(line.trim());
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Check for cancellation
|
||||
const cancelCheck = setInterval(() => {
|
||||
if (isCancelled()) {
|
||||
proc.kill('SIGTERM');
|
||||
clearInterval(cancelCheck);
|
||||
}
|
||||
}, 500);
|
||||
|
||||
proc.on('close', async (code) => {
|
||||
clearInterval(cancelCheck);
|
||||
|
||||
// Register downloaded video files in the library
|
||||
for (const filePath of downloadedFiles) {
|
||||
const ext = extname(filePath).toLowerCase();
|
||||
if (VIDEO_EXTS.has(ext)) {
|
||||
await registerVideo(filePath, log);
|
||||
}
|
||||
}
|
||||
|
||||
if (code === 0) {
|
||||
resolve({ files: downloadedFiles.length, errors: 0 });
|
||||
} else if (isCancelled()) {
|
||||
resolve({ files: downloadedFiles.length, errors: 0, cancelled: true });
|
||||
} else {
|
||||
resolve({ files: downloadedFiles.length, errors: 1 });
|
||||
}
|
||||
});
|
||||
|
||||
proc.on('error', (err) => {
|
||||
clearInterval(cancelCheck);
|
||||
reject(err);
|
||||
});
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user