Add app auth, dashboard, scheduler, video management, and new scrapers
- JWT-based app authentication with user roles, folder/route access control - Dashboard with storage stats, health checks, and recent activity - Auto-download/scrape scheduler (12h interval) with per-user and per-job configs - Video upload, tagging, HLS transcoding, and detail pages - New scrapers: LeakGallery, Mega (megajs), yt-dlp - FlareSolverr integration for Cloudflare-protected sites - Gallery: advanced filtering (date, size, search), sort modes, equal-mix shuffle - Forum sites management with stored cookies/auth - GridWall/GridCell components for responsive media grid - Media API with folder-access permissions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+222
-59
@@ -1,6 +1,7 @@
|
||||
import { existsSync, writeFileSync, mkdirSync } from 'fs';
|
||||
import { existsSync, writeFileSync, mkdirSync, unlinkSync } from 'fs';
|
||||
import { basename, join, extname } from 'path';
|
||||
import { upsertMediaFile } from '../db.js';
|
||||
import { load as cheerioLoad } from 'cheerio';
|
||||
import { upsertMediaFile, removeMediaFile } from '../db.js';
|
||||
|
||||
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
||||
|
||||
@@ -9,10 +10,13 @@ const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v']);
|
||||
export function parseMediaUrl(url) {
|
||||
const parsed = new URL(url);
|
||||
const base = `${parsed.protocol}//${parsed.hostname}`;
|
||||
// Support /model/{id} or /media/{id}
|
||||
// Support /model/{id} or /media/{id} (fapello.to JSON API)
|
||||
const m = parsed.pathname.match(/\/(?:model|media)\/(\d+)/);
|
||||
if (!m) throw new Error(`Can't parse URL. Expected: https://fapello.to/model/12345`);
|
||||
return { base, userId: m[1] };
|
||||
if (m) return { base, userId: m[1], mode: 'api' };
|
||||
// Support fapello.com profile slug URLs like /josie-hamming-41/
|
||||
const slugMatch = parsed.pathname.match(/^\/([a-zA-Z0-9_-]+)\/?$/);
|
||||
if (slugMatch) return { base, userId: slugMatch[1], mode: 'html' };
|
||||
throw new Error(`Can't parse URL. Expected: https://fapello.to/model/12345 or https://fapello.com/username/`);
|
||||
}
|
||||
|
||||
// Fetch JSON from the API endpoint
|
||||
@@ -73,6 +77,7 @@ export async function fetchAllMedia(base, userId, maxPages, delay, logFn, checkC
|
||||
allItems.push({
|
||||
id: item.id,
|
||||
url: fullUrl,
|
||||
thumbUrl: item.newUrlThumb || null,
|
||||
type: isVideo ? 'video' : 'image',
|
||||
});
|
||||
}
|
||||
@@ -92,13 +97,171 @@ export async function fetchAllMedia(base, userId, maxPages, delay, logFn, checkC
|
||||
return allItems;
|
||||
}
|
||||
|
||||
// --- HTML-based scraping (fapello.com profile pages) ---
|
||||
|
||||
function parseMediaFromHtml(html, base) {
|
||||
const $ = cheerioLoad(html);
|
||||
const items = [];
|
||||
|
||||
// Find all image thumbnails in the grid
|
||||
$('img[src*="_300px."]').each((_, el) => {
|
||||
const thumbUrl = $(el).attr('src');
|
||||
if (!thumbUrl) return;
|
||||
// Convert thumbnail to full-size: remove _300px
|
||||
const fullUrl = thumbUrl.replace(/_300px\./, '.');
|
||||
const absUrl = fullUrl.startsWith('http') ? fullUrl : `${base}${fullUrl}`;
|
||||
items.push({ url: absUrl, type: 'image' });
|
||||
});
|
||||
|
||||
// Find video elements (source tags with .mp4)
|
||||
$('video source[src*=".mp4"], video[src*=".mp4"]').each((_, el) => {
|
||||
const src = $(el).attr('src');
|
||||
if (!src) return;
|
||||
const absUrl = src.startsWith('http') ? src : `${base}${src}`;
|
||||
items.push({ url: absUrl, type: 'video' });
|
||||
});
|
||||
|
||||
return items;
|
||||
}
|
||||
|
||||
export async function fetchAllMediaFromHtml(base, slug, maxPages, delay, logFn, checkCancelled) {
|
||||
const allItems = [];
|
||||
const seen = new Set();
|
||||
let totalPages = maxPages;
|
||||
|
||||
// Phase 1: Fetch initial profile page to get data-max
|
||||
logFn(`Fetching profile page: ${base}/${slug}/`);
|
||||
try {
|
||||
const resp = await fetch(`${base}/${slug}/`, {
|
||||
headers: { 'User-Agent': UA },
|
||||
signal: AbortSignal.timeout(15000),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
logFn(`Profile page error (${resp.status})`);
|
||||
return allItems;
|
||||
}
|
||||
const html = await resp.text();
|
||||
const $ = cheerioLoad(html);
|
||||
|
||||
// Get max pages from data-max attribute
|
||||
const dataMax = $('#showmore').attr('data-max');
|
||||
if (dataMax) {
|
||||
totalPages = Math.min(parseInt(dataMax, 10) || maxPages, maxPages);
|
||||
logFn(`Detected ${totalPages} pages`);
|
||||
}
|
||||
|
||||
// Parse initial page content
|
||||
const initialItems = parseMediaFromHtml(html, base);
|
||||
for (const item of initialItems) {
|
||||
if (!seen.has(item.url)) {
|
||||
seen.add(item.url);
|
||||
allItems.push({ ...item, id: seen.size });
|
||||
}
|
||||
}
|
||||
logFn(`Page 1: ${initialItems.length} items (${allItems.length} total)`);
|
||||
} catch (err) {
|
||||
logFn(`Error fetching profile: ${err.message}`);
|
||||
return allItems;
|
||||
}
|
||||
|
||||
// Phase 2: Paginate through AJAX pages
|
||||
for (let page = 2; page <= totalPages; page++) {
|
||||
if (checkCancelled()) break;
|
||||
|
||||
const ajaxUrl = `${base}/ajax/model/${slug}/page-${page}/`;
|
||||
try {
|
||||
const resp = await fetch(ajaxUrl, {
|
||||
headers: {
|
||||
'User-Agent': UA,
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': `${base}/${slug}/`,
|
||||
},
|
||||
signal: AbortSignal.timeout(15000),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
if (resp.status === 404) {
|
||||
logFn(`Page ${page}: 404 — done`);
|
||||
break;
|
||||
}
|
||||
logFn(`Page ${page}: error (${resp.status})`);
|
||||
continue;
|
||||
}
|
||||
const html = await resp.text();
|
||||
if (!html || html.trim().length === 0) {
|
||||
logFn(`Page ${page}: empty — done`);
|
||||
break;
|
||||
}
|
||||
|
||||
const pageItems = parseMediaFromHtml(html, base);
|
||||
let newCount = 0;
|
||||
for (const item of pageItems) {
|
||||
if (!seen.has(item.url)) {
|
||||
seen.add(item.url);
|
||||
allItems.push({ ...item, id: seen.size });
|
||||
newCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (newCount === 0) {
|
||||
logFn(`Page ${page}: all duplicates — stopping`);
|
||||
break;
|
||||
}
|
||||
|
||||
logFn(`Page ${page}: ${pageItems.length} items (${newCount} new, ${allItems.length} total)`);
|
||||
} catch (err) {
|
||||
logFn(`Page ${page}: error — ${err.message}`);
|
||||
}
|
||||
|
||||
if (page < totalPages && !checkCancelled()) {
|
||||
await new Promise(r => setTimeout(r, delay));
|
||||
}
|
||||
}
|
||||
|
||||
return allItems;
|
||||
}
|
||||
|
||||
// Helper: derive filename from URL, with fallback
|
||||
function filenameFromUrl(url, item) {
|
||||
try {
|
||||
const name = basename(new URL(url).pathname);
|
||||
if (name && name !== '/') return name;
|
||||
} catch {}
|
||||
return `${item.id}.${item.type === 'video' ? 'mp4' : 'jpg'}`;
|
||||
}
|
||||
|
||||
// Helper: add _md suffix before extension
|
||||
function mdFilename(filename) {
|
||||
const ext = extname(filename);
|
||||
return filename.slice(0, -ext.length) + '_md' + ext;
|
||||
}
|
||||
|
||||
// Helper: try fetching a URL, return buffer or null
|
||||
async function tryFetch(url, referer) {
|
||||
if (!url) return null;
|
||||
try {
|
||||
const resp = await fetch(url, {
|
||||
headers: { 'User-Agent': UA, 'Referer': referer || 'https://fapello.to/' },
|
||||
signal: AbortSignal.timeout(60000),
|
||||
});
|
||||
if (!resp.ok) return null;
|
||||
const buf = Buffer.from(await resp.arrayBuffer());
|
||||
if (buf.length < 500) return null;
|
||||
return buf;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Download all collected media items with concurrency
|
||||
export async function downloadMedia(items, outputDir, workers, logFn, progressFn, checkCancelled) {
|
||||
// Fallback: if full-res URL fails, download medium (thumbUrl) with _md suffix.
|
||||
// Upgrade: if _md file exists, try full-res again; replace _md on success.
|
||||
export async function downloadMedia(items, outputDir, workers, logFn, progressFn, checkCancelled, referer) {
|
||||
mkdirSync(outputDir, { recursive: true });
|
||||
|
||||
let completed = 0;
|
||||
let errors = 0;
|
||||
let skipped = 0;
|
||||
let upgraded = 0;
|
||||
let index = 0;
|
||||
|
||||
async function processNext() {
|
||||
@@ -108,72 +271,71 @@ export async function downloadMedia(items, outputDir, workers, logFn, progressFn
|
||||
const current = index++;
|
||||
const item = items[current];
|
||||
|
||||
let filename;
|
||||
try {
|
||||
filename = basename(new URL(item.url).pathname);
|
||||
if (!filename || filename === '/') {
|
||||
filename = `${item.id}.${item.type === 'video' ? 'mp4' : 'jpg'}`;
|
||||
}
|
||||
} catch {
|
||||
filename = `${item.id}.${item.type === 'video' ? 'mp4' : 'jpg'}`;
|
||||
}
|
||||
const filename = filenameFromUrl(item.url, item);
|
||||
const filepath = join(outputDir, filename);
|
||||
const mdName = mdFilename(filename);
|
||||
const mdPath = join(outputDir, mdName);
|
||||
|
||||
let filepath = join(outputDir, filename);
|
||||
// Full-res already exists — skip
|
||||
if (existsSync(filepath)) {
|
||||
skipped++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const resp = await fetch(item.url, {
|
||||
headers: {
|
||||
'User-Agent': UA,
|
||||
'Referer': 'https://fapello.to/',
|
||||
},
|
||||
signal: AbortSignal.timeout(60000),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
logFn(`FAILED (${resp.status}): ${filename}`);
|
||||
errors++;
|
||||
// Medium version exists — try to upgrade to full-res
|
||||
if (existsSync(mdPath)) {
|
||||
const buf = await tryFetch(item.url, referer);
|
||||
if (buf) {
|
||||
writeFileSync(filepath, buf);
|
||||
try { unlinkSync(mdPath); } catch {}
|
||||
const folderName = basename(outputDir);
|
||||
const fileType = VIDEO_EXTS.has(extname(filename).toLowerCase()) ? 'video' : 'image';
|
||||
try { removeMediaFile(folderName, mdName); } catch {}
|
||||
try { upsertMediaFile(folderName, filename, fileType, buf.length, Date.now(), null); } catch {}
|
||||
upgraded++;
|
||||
completed++;
|
||||
logFn(`[${completed}/${items.length}] ${filename} (upgraded from _md, ${(buf.length / 1024).toFixed(1)} KB)`);
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
continue;
|
||||
}
|
||||
|
||||
const buf = Buffer.from(await resp.arrayBuffer());
|
||||
if (buf.length < 500) {
|
||||
} else {
|
||||
skipped++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Neither exists — try full-res, then fallback to medium
|
||||
const buf = await tryFetch(item.url, referer);
|
||||
if (buf) {
|
||||
writeFileSync(filepath, buf);
|
||||
const folderName = basename(outputDir);
|
||||
const fileType = VIDEO_EXTS.has(extname(filename).toLowerCase()) ? 'video' : 'image';
|
||||
try { upsertMediaFile(folderName, filename, fileType, buf.length, Date.now(), null); } catch {}
|
||||
completed++;
|
||||
logFn(`[${completed}/${items.length}] ${filename} (${(buf.length / 1024).toFixed(1)} KB)`);
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Full-res failed — try medium (thumbUrl)
|
||||
if (item.thumbUrl) {
|
||||
const mdBuf = await tryFetch(item.thumbUrl, referer);
|
||||
if (mdBuf) {
|
||||
writeFileSync(mdPath, mdBuf);
|
||||
const folderName = basename(outputDir);
|
||||
const fileType = VIDEO_EXTS.has(extname(mdName).toLowerCase()) ? 'video' : 'image';
|
||||
try { upsertMediaFile(folderName, mdName, fileType, mdBuf.length, Date.now(), null); } catch {}
|
||||
completed++;
|
||||
logFn(`[${completed}/${items.length}] ${mdName} (medium fallback, ${(mdBuf.length / 1024).toFixed(1)} KB)`);
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle filename collision
|
||||
if (existsSync(filepath)) {
|
||||
const ext = extname(filename);
|
||||
const name = filename.slice(0, -ext.length);
|
||||
let i = 1;
|
||||
while (existsSync(filepath)) {
|
||||
filepath = join(outputDir, `${name}_${i}${ext}`);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
writeFileSync(filepath, buf);
|
||||
const savedName = basename(filepath);
|
||||
const folderName = basename(outputDir);
|
||||
const fileExt = extname(savedName).toLowerCase();
|
||||
const fileType = VIDEO_EXTS.has(fileExt) ? 'video' : 'image';
|
||||
try { upsertMediaFile(folderName, savedName, fileType, buf.length, Date.now(), null); } catch {}
|
||||
|
||||
completed++;
|
||||
const sizeKb = (buf.length / 1024).toFixed(1);
|
||||
logFn(`[${completed}/${items.length}] ${savedName} (${sizeKb} KB)`);
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
} catch (err) {
|
||||
logFn(`FAILED: ${filename} - ${err.message}`);
|
||||
errors++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
}
|
||||
|
||||
// Both failed
|
||||
logFn(`FAILED: ${filename} — full-res and medium both unavailable`);
|
||||
errors++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -183,5 +345,6 @@ export async function downloadMedia(items, outputDir, workers, logFn, progressFn
|
||||
}
|
||||
await Promise.all(workerPromises);
|
||||
|
||||
if (upgraded > 0) logFn(`Upgraded ${upgraded} files from medium to full resolution`);
|
||||
return { completed, errors, skipped, total: items.length };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user