Add app auth, dashboard, scheduler, video management, and new scrapers

- JWT-based app authentication with user roles, folder/route access control
- Dashboard with storage stats, health checks, and recent activity
- Auto-download/scrape scheduler (12h interval) with per-user and per-job configs
- Video upload, tagging, HLS transcoding, and detail pages
- New scrapers: LeakGallery, Mega (megajs), yt-dlp
- FlareSolverr integration for Cloudflare-protected sites
- Gallery: advanced filtering (date, size, search), sort modes, equal-mix shuffle
- Forum sites management with stored cookies/auth
- GridWall/GridCell components for responsive media grid
- Media API with folder-access permissions

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Trey T
2026-04-16 07:48:10 -05:00
parent 4903b84aef
commit 236f36aae6
54 changed files with 9986 additions and 420 deletions
+189 -44
View File
@@ -1,13 +1,43 @@
import * as cheerio from 'cheerio';
import { createWriteStream, existsSync, mkdirSync, statSync } from 'fs';
import { createWriteStream, existsSync, mkdirSync, statSync, writeFileSync } from 'fs';
import { basename, join, extname } from 'path';
import { pipeline } from 'stream/promises';
import { execFile } from 'child_process';
import { promisify } from 'util';
import { upsertMediaFile } from '../db.js';
const execFileAsync = promisify(execFile);
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
const SERVER_IP = '47.185.183.191';
export class CookieExpiredError extends Error {
constructor(statusCode) {
super(`Cookie expired or invalid (HTTP ${statusCode})`);
this.name = 'CookieExpiredError';
this.statusCode = statusCode;
}
}
// Replace DDoS-Guard __ddg9_ cookie IP with server's IP so cookies work from any browser
function fixCookieIp(cookies) {
if (!cookies) return cookies;
return cookies.replace(/__ddg9_=[^;]+/, `__ddg9_=${SERVER_IP}`);
}
const IMAGE_EXTS = new Set(['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff']);
const SKIP_PATTERNS = ['avatar', 'smilie', 'emoji', 'icon', 'logo', 'button', 'sprite', 'badge', 'rank', 'star'];
const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v', '.wmv', '.flv', '.ts']);
const SKIP_PATTERNS = ['avatar', 'smilie', 'emoji', 'icon', 'logo', 'button', 'sprite', 'badge', 'rank', 'star', 'dc_thumbnails'];
// External hosts that gallery-dl can resolve
const GALLERY_DL_HOSTS = [
/saint\d*\.\w+/i,
/cyberdrop\.\w+/i,
/bunkr+\.\w+/i,
/pixeldrain\.com/i,
/gofile\.io/i,
/turbo\.\w+/i,
];
function isImageUrl(url) {
try {
@@ -16,26 +46,44 @@ function isImageUrl(url) {
} catch { return false; }
}
function isVideoUrl(url) {
try {
const path = new URL(url).pathname.toLowerCase();
return [...VIDEO_EXTS].some(ext => path.endsWith(ext));
} catch { return false; }
}
function isMediaUrl(url) {
return isImageUrl(url) || isVideoUrl(url);
}
function isExternalHost(url) {
try {
const hostname = new URL(url).hostname.toLowerCase();
return GALLERY_DL_HOSTS.some(p => p.test(hostname));
} catch { return false; }
}
export function getPageUrl(baseUrl, pageNum) {
const url = baseUrl.replace(/page-\d+/, `page-${pageNum}`);
return url.split('#')[0];
}
export async function detectMaxPage(baseUrl, logFn) {
export async function detectMaxPage(baseUrl, logFn, cookies) {
try {
const resp = await fetch(baseUrl, { headers: { 'User-Agent': UA }, signal: AbortSignal.timeout(15000) });
const headers = { 'User-Agent': UA };
if (cookies) headers['Cookie'] = fixCookieIp(cookies);
const resp = await fetch(baseUrl, { headers, signal: AbortSignal.timeout(15000) });
if (!resp.ok) return null;
const html = await resp.text();
const $ = cheerio.load(html);
let maxPage = 1;
// XenForo-style
$('a.pageNav-page, .pageNav a[href*="page-"], .pagination a[href*="page-"]').each((_, el) => {
const href = $(el).attr('href') || '';
const m = href.match(/page-(\d+)/);
if (m) maxPage = Math.max(maxPage, parseInt(m[1], 10));
});
// Generic pagination text
$('a').each((_, el) => {
const text = $(el).text().trim();
if (/^\d+$/.test(text)) {
@@ -58,6 +106,7 @@ export async function detectMaxPage(baseUrl, logFn) {
function tryFullSizeUrl(thumbUrl) {
const candidates = [];
if (thumbUrl.includes('.th.')) candidates.push(thumbUrl.replace('.th.', '.'));
if (thumbUrl.includes('.md.')) candidates.push(thumbUrl.replace('.md.', '.'));
if (/_thumb\./i.test(thumbUrl)) candidates.push(thumbUrl.replace(/_thumb\./i, '.'));
if (thumbUrl.includes('/thumbs/')) {
candidates.push(thumbUrl.replace('/thumbs/', '/images/'));
@@ -74,7 +123,7 @@ function tryFullSizeUrl(thumbUrl) {
return candidates;
}
async function downloadImage(url, outputDir, downloadedSet, logFn) {
async function downloadImage(url, outputDir, downloadedSet, logFn, cookies) {
if (downloadedSet.has(url)) return false;
if (!isImageUrl(url)) return false;
const lower = url.toLowerCase();
@@ -83,47 +132,34 @@ async function downloadImage(url, outputDir, downloadedSet, logFn) {
downloadedSet.add(url);
let filename;
try {
filename = basename(new URL(url).pathname);
} catch { return false; }
try { filename = basename(new URL(url).pathname); } catch { return false; }
if (!filename) return false;
filename = filename.replace('.th.', '.').replace('.md.', '.');
filename = filename.replace('.th.', '.');
let filepath = join(outputDir, filename);
const filepath = join(outputDir, filename);
if (existsSync(filepath)) {
const ext = extname(filename);
const name = filename.slice(0, -ext.length);
let i = 1;
while (existsSync(filepath)) {
filepath = join(outputDir, `${name}_${i}${ext}`);
i++;
}
return false;
}
try {
const resp = await fetch(url, {
headers: { 'User-Agent': UA },
signal: AbortSignal.timeout(30000),
});
const dlHeaders = { 'User-Agent': UA };
if (cookies) dlHeaders['Cookie'] = fixCookieIp(cookies);
const resp = await fetch(url, { headers: dlHeaders, signal: AbortSignal.timeout(30000) });
if (!resp.ok) {
logFn(`FAILED (${resp.status}): ${url}`);
return false;
}
// Read full body to check size
const buf = Buffer.from(await resp.arrayBuffer());
if (buf.length < 1000) {
downloadedSet.delete(url);
return false;
}
const { writeFileSync } = await import('fs');
writeFileSync(filepath, buf);
const savedName = basename(filepath);
const folderName = basename(outputDir);
try { upsertMediaFile(folderName, savedName, 'image', buf.length, Date.now(), null); } catch { /* ignore */ }
try { upsertMediaFile(folderName, savedName, 'image', buf.length, Date.now(), null); } catch {}
const sizeKb = (buf.length / 1024).toFixed(1);
logFn(`Downloaded: ${savedName} (${sizeKb} KB)`);
@@ -134,28 +170,101 @@ async function downloadImage(url, outputDir, downloadedSet, logFn) {
}
}
export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn) {
// Use gallery-dl to download from external hosts (bunkr, saint, cyberdrop, etc.)
async function downloadFromExternalHost(url, outputDir, downloadedSet, logFn) {
if (downloadedSet.has(url)) return 0;
downloadedSet.add(url);
logFn(`Resolving via gallery-dl: ${url}`);
try {
const args = [
'-d', outputDir,
'--filename', '{filename}.{extension}',
'--no-mtime',
'-o', 'directory=[]',
url,
];
const { stdout, stderr } = await execFileAsync('gallery-dl', args, {
timeout: 300000, // 5 min per external link
maxBuffer: 10 * 1024 * 1024,
});
let count = 0;
const lines = (stdout + '\n' + stderr).split('\n').filter(Boolean);
for (const line of lines) {
// gallery-dl outputs file paths for downloaded files
const trimmed = line.trim();
if (trimmed.startsWith(outputDir) || trimmed.startsWith('/')) {
const filePath = trimmed.replace(/^# /, '');
if (existsSync(filePath)) {
const stat = statSync(filePath);
const savedName = basename(filePath);
const folderName = basename(outputDir);
const ext = extname(savedName).toLowerCase();
const type = VIDEO_EXTS.has(ext) ? 'video' : 'image';
const sizeStr = type === 'video'
? `${(stat.size / (1024 * 1024)).toFixed(1)} MB`
: `${(stat.size / 1024).toFixed(1)} KB`;
try { upsertMediaFile(folderName, savedName, type, stat.size, Date.now(), null); } catch {}
logFn(`Downloaded: ${savedName} (${sizeStr}) [${type}]`);
count++;
}
} else if (trimmed.includes('Downloading') || trimmed.includes('Skipping')) {
logFn(` ${trimmed}`);
}
}
if (count === 0) {
// gallery-dl doesn't always output paths clearly, check stderr for errors
const errLines = stderr ? stderr.split('\n').filter(l => l.trim()) : [];
for (const line of errLines) {
if (line.includes('ERROR') || line.includes('error')) {
logFn(` gallery-dl: ${line.trim()}`);
}
}
logFn(` gallery-dl finished but no files detected from output`);
}
return count;
} catch (err) {
if (err.stderr) {
const errMsg = err.stderr.split('\n').find(l => l.includes('ERROR') || l.includes('error')) || err.stderr.slice(0, 200);
logFn(`gallery-dl error: ${errMsg.trim()}`);
} else {
logFn(`gallery-dl error: ${err.message}`);
}
return 0;
}
}
export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn, cookies) {
logFn(`Fetching page: ${pageUrl}`);
let html;
try {
const resp = await fetch(pageUrl, {
headers: { 'User-Agent': UA },
signal: AbortSignal.timeout(15000),
});
const headers = { 'User-Agent': UA };
if (cookies) headers['Cookie'] = fixCookieIp(cookies);
const resp = await fetch(pageUrl, { headers, signal: AbortSignal.timeout(15000) });
if (!resp.ok) {
// SimpCity returns 404 for expired sessions, 403 for blocked
if (cookies && (resp.status === 404 || resp.status === 403)) {
throw new CookieExpiredError(resp.status);
}
logFn(`Failed to fetch page (${resp.status})`);
return 0;
}
html = await resp.text();
} catch (err) {
if (err instanceof CookieExpiredError) throw err;
logFn(`Failed to fetch page: ${err.message}`);
return 0;
}
const $ = cheerio.load(html);
// Try known content selectors, fall back to whole page
const selectors = '.message-body, .post-body, .post_body, .postcontent, .messageContent, .bbWrapper, article, .entry-content, .post_message, .post-content, #posts, .threadBody';
let contentAreas = $(selectors).toArray();
if (contentAreas.length === 0) {
@@ -163,6 +272,7 @@ export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn)
}
const imageUrls = [];
const externalUrls = new Set();
for (const area of contentAreas) {
const $area = $(area);
@@ -176,7 +286,6 @@ export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn)
let absSrc;
try { absSrc = new URL(src, pageUrl).href; } catch { return; }
// Check parent <a> for direct image link
const $parentA = $img.closest('a');
if ($parentA.length && $parentA.attr('href')) {
try {
@@ -188,7 +297,6 @@ export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn)
} catch {}
}
// Try to derive full-size from thumbnail URL
const fullCandidates = tryFullSizeUrl(absSrc);
if (fullCandidates.length > 0) {
imageUrls.push(...fullCandidates);
@@ -196,7 +304,6 @@ export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn)
imageUrls.push(absSrc);
}
// Also check data attributes
for (const attr of ['data-src', 'data-url', 'data-orig', 'data-original', 'data-full-url', 'data-zoom-src']) {
const val = $img.attr(attr);
if (val && val !== src) {
@@ -205,26 +312,64 @@ export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn)
}
});
// Pass 2: <a href> pointing directly to images (no child <img>)
// Pass 2: <a href> links — images + external hosts
$area.find('a[href]').each((_, el) => {
const $a = $(el);
if ($a.find('img').length) return;
let href;
try { href = new URL($a.attr('href'), pageUrl).href; } catch { return; }
// Skip same-forum links
try {
const href = new URL($a.attr('href'), pageUrl).href;
if (isImageUrl(href)) imageUrls.push(href);
if (new URL(href).hostname === new URL(pageUrl).hostname) return;
} catch {}
// Direct image link (without child img — those are handled in Pass 1)
if (isImageUrl(href) && $a.find('img').length === 0) {
imageUrls.push(href);
return;
}
// Direct video link
if (isVideoUrl(href)) {
externalUrls.add(href);
return;
}
// External file host (bunkr, saint, cyberdrop, etc.)
if (isExternalHost(href)) {
externalUrls.add(href);
}
});
// Pass 3: iframe embeds
$area.find('iframe[src]').each((_, el) => {
const src = $(el).attr('src');
if (src) {
try {
const absUrl = new URL(src, pageUrl).href;
if (isExternalHost(absUrl)) externalUrls.add(absUrl);
} catch {}
}
});
}
logFn(`Found ${imageUrls.length} candidate URLs`);
logFn(`Found ${imageUrls.length} images, ${externalUrls.size} external links`);
let count = 0;
// Download images
for (const imgUrl of imageUrls) {
if (await downloadImage(imgUrl, outputDir, downloadedSet, logFn)) {
if (await downloadImage(imgUrl, outputDir, downloadedSet, logFn, cookies)) {
count++;
}
}
logFn(`${count} images from this page`);
// Download from external hosts via gallery-dl
for (const extUrl of externalUrls) {
const dlCount = await downloadFromExternalHost(extUrl, outputDir, downloadedSet, logFn);
count += dlCount;
}
logFn(`${count} files from this page`);
return count;
}