236f36aae6
- JWT-based app authentication with user roles, folder/route access control - Dashboard with storage stats, health checks, and recent activity - Auto-download/scrape scheduler (12h interval) with per-user and per-job configs - Video upload, tagging, HLS transcoding, and detail pages - New scrapers: LeakGallery, Mega (megajs), yt-dlp - FlareSolverr integration for Cloudflare-protected sites - Gallery: advanced filtering (date, size, search), sort modes, equal-mix shuffle - Forum sites management with stored cookies/auth - GridWall/GridCell components for responsive media grid - Media API with folder-access permissions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
351 lines
11 KiB
JavaScript
351 lines
11 KiB
JavaScript
import { existsSync, writeFileSync, mkdirSync, unlinkSync } from 'fs';
|
|
import { basename, join, extname } from 'path';
|
|
import { load as cheerioLoad } from 'cheerio';
|
|
import { upsertMediaFile, removeMediaFile } from '../db.js';
|
|
|
|
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
|
|
|
const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v']);
|
|
|
|
export function parseMediaUrl(url) {
|
|
const parsed = new URL(url);
|
|
const base = `${parsed.protocol}//${parsed.hostname}`;
|
|
// Support /model/{id} or /media/{id} (fapello.to JSON API)
|
|
const m = parsed.pathname.match(/\/(?:model|media)\/(\d+)/);
|
|
if (m) return { base, userId: m[1], mode: 'api' };
|
|
// Support fapello.com profile slug URLs like /josie-hamming-41/
|
|
const slugMatch = parsed.pathname.match(/^\/([a-zA-Z0-9_-]+)\/?$/);
|
|
if (slugMatch) return { base, userId: slugMatch[1], mode: 'html' };
|
|
throw new Error(`Can't parse URL. Expected: https://fapello.to/model/12345 or https://fapello.com/username/`);
|
|
}
|
|
|
|
// Fetch JSON from the API endpoint
|
|
// API: GET /api/media/{userId}/{page}/{order}
|
|
// Requires X-Requested-With and Referer headers to avoid 403
|
|
async function fetchApiPage(base, userId, page, order, logFn) {
|
|
const apiUrl = `${base}/api/media/${userId}/${page}/${order}`;
|
|
try {
|
|
const resp = await fetch(apiUrl, {
|
|
headers: {
|
|
'User-Agent': UA,
|
|
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
|
'X-Requested-With': 'XMLHttpRequest',
|
|
'Referer': `${base}/model/${userId}`,
|
|
},
|
|
signal: AbortSignal.timeout(15000),
|
|
});
|
|
if (!resp.ok) {
|
|
if (resp.status === 404) return null;
|
|
logFn(`API error (${resp.status}): ${apiUrl}`);
|
|
return null;
|
|
}
|
|
const data = await resp.json();
|
|
return data;
|
|
} catch (err) {
|
|
logFn(`API fetch error: ${err.message}`);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// Collect all media items by paginating through the API
|
|
export async function fetchAllMedia(base, userId, maxPages, delay, logFn, checkCancelled) {
|
|
const allItems = [];
|
|
const seen = new Set();
|
|
|
|
for (let page = 1; page <= maxPages; page++) {
|
|
if (checkCancelled()) break;
|
|
|
|
logFn(`Fetching page ${page}...`);
|
|
const data = await fetchApiPage(base, userId, page, 1, logFn);
|
|
|
|
if (!data || data.length === 0) {
|
|
logFn(`Page ${page}: no more items — done`);
|
|
break;
|
|
}
|
|
|
|
let newCount = 0;
|
|
for (const item of data) {
|
|
if (seen.has(item.id)) continue;
|
|
seen.add(item.id);
|
|
newCount++;
|
|
|
|
// type "2" = video (newUrl is mp4), type "1" = image (newUrl is full-size jpg)
|
|
const isVideo = item.type === '2' || item.type === 2;
|
|
const fullUrl = item.newUrl;
|
|
if (!fullUrl) continue;
|
|
|
|
allItems.push({
|
|
id: item.id,
|
|
url: fullUrl,
|
|
thumbUrl: item.newUrlThumb || null,
|
|
type: isVideo ? 'video' : 'image',
|
|
});
|
|
}
|
|
|
|
if (newCount === 0) {
|
|
logFn(`Page ${page}: all duplicates — stopping`);
|
|
break;
|
|
}
|
|
|
|
logFn(`Page ${page}: ${data.length} items (${newCount} new, ${allItems.length} total)`);
|
|
|
|
if (page < maxPages && !checkCancelled()) {
|
|
await new Promise(r => setTimeout(r, delay));
|
|
}
|
|
}
|
|
|
|
return allItems;
|
|
}
|
|
|
|
// --- HTML-based scraping (fapello.com profile pages) ---
|
|
|
|
function parseMediaFromHtml(html, base) {
|
|
const $ = cheerioLoad(html);
|
|
const items = [];
|
|
|
|
// Find all image thumbnails in the grid
|
|
$('img[src*="_300px."]').each((_, el) => {
|
|
const thumbUrl = $(el).attr('src');
|
|
if (!thumbUrl) return;
|
|
// Convert thumbnail to full-size: remove _300px
|
|
const fullUrl = thumbUrl.replace(/_300px\./, '.');
|
|
const absUrl = fullUrl.startsWith('http') ? fullUrl : `${base}${fullUrl}`;
|
|
items.push({ url: absUrl, type: 'image' });
|
|
});
|
|
|
|
// Find video elements (source tags with .mp4)
|
|
$('video source[src*=".mp4"], video[src*=".mp4"]').each((_, el) => {
|
|
const src = $(el).attr('src');
|
|
if (!src) return;
|
|
const absUrl = src.startsWith('http') ? src : `${base}${src}`;
|
|
items.push({ url: absUrl, type: 'video' });
|
|
});
|
|
|
|
return items;
|
|
}
|
|
|
|
export async function fetchAllMediaFromHtml(base, slug, maxPages, delay, logFn, checkCancelled) {
|
|
const allItems = [];
|
|
const seen = new Set();
|
|
let totalPages = maxPages;
|
|
|
|
// Phase 1: Fetch initial profile page to get data-max
|
|
logFn(`Fetching profile page: ${base}/${slug}/`);
|
|
try {
|
|
const resp = await fetch(`${base}/${slug}/`, {
|
|
headers: { 'User-Agent': UA },
|
|
signal: AbortSignal.timeout(15000),
|
|
});
|
|
if (!resp.ok) {
|
|
logFn(`Profile page error (${resp.status})`);
|
|
return allItems;
|
|
}
|
|
const html = await resp.text();
|
|
const $ = cheerioLoad(html);
|
|
|
|
// Get max pages from data-max attribute
|
|
const dataMax = $('#showmore').attr('data-max');
|
|
if (dataMax) {
|
|
totalPages = Math.min(parseInt(dataMax, 10) || maxPages, maxPages);
|
|
logFn(`Detected ${totalPages} pages`);
|
|
}
|
|
|
|
// Parse initial page content
|
|
const initialItems = parseMediaFromHtml(html, base);
|
|
for (const item of initialItems) {
|
|
if (!seen.has(item.url)) {
|
|
seen.add(item.url);
|
|
allItems.push({ ...item, id: seen.size });
|
|
}
|
|
}
|
|
logFn(`Page 1: ${initialItems.length} items (${allItems.length} total)`);
|
|
} catch (err) {
|
|
logFn(`Error fetching profile: ${err.message}`);
|
|
return allItems;
|
|
}
|
|
|
|
// Phase 2: Paginate through AJAX pages
|
|
for (let page = 2; page <= totalPages; page++) {
|
|
if (checkCancelled()) break;
|
|
|
|
const ajaxUrl = `${base}/ajax/model/${slug}/page-${page}/`;
|
|
try {
|
|
const resp = await fetch(ajaxUrl, {
|
|
headers: {
|
|
'User-Agent': UA,
|
|
'X-Requested-With': 'XMLHttpRequest',
|
|
'Referer': `${base}/${slug}/`,
|
|
},
|
|
signal: AbortSignal.timeout(15000),
|
|
});
|
|
if (!resp.ok) {
|
|
if (resp.status === 404) {
|
|
logFn(`Page ${page}: 404 — done`);
|
|
break;
|
|
}
|
|
logFn(`Page ${page}: error (${resp.status})`);
|
|
continue;
|
|
}
|
|
const html = await resp.text();
|
|
if (!html || html.trim().length === 0) {
|
|
logFn(`Page ${page}: empty — done`);
|
|
break;
|
|
}
|
|
|
|
const pageItems = parseMediaFromHtml(html, base);
|
|
let newCount = 0;
|
|
for (const item of pageItems) {
|
|
if (!seen.has(item.url)) {
|
|
seen.add(item.url);
|
|
allItems.push({ ...item, id: seen.size });
|
|
newCount++;
|
|
}
|
|
}
|
|
|
|
if (newCount === 0) {
|
|
logFn(`Page ${page}: all duplicates — stopping`);
|
|
break;
|
|
}
|
|
|
|
logFn(`Page ${page}: ${pageItems.length} items (${newCount} new, ${allItems.length} total)`);
|
|
} catch (err) {
|
|
logFn(`Page ${page}: error — ${err.message}`);
|
|
}
|
|
|
|
if (page < totalPages && !checkCancelled()) {
|
|
await new Promise(r => setTimeout(r, delay));
|
|
}
|
|
}
|
|
|
|
return allItems;
|
|
}
|
|
|
|
// Helper: derive filename from URL, with fallback
|
|
function filenameFromUrl(url, item) {
|
|
try {
|
|
const name = basename(new URL(url).pathname);
|
|
if (name && name !== '/') return name;
|
|
} catch {}
|
|
return `${item.id}.${item.type === 'video' ? 'mp4' : 'jpg'}`;
|
|
}
|
|
|
|
// Helper: add _md suffix before extension
|
|
function mdFilename(filename) {
|
|
const ext = extname(filename);
|
|
return filename.slice(0, -ext.length) + '_md' + ext;
|
|
}
|
|
|
|
// Helper: try fetching a URL, return buffer or null
|
|
async function tryFetch(url, referer) {
|
|
if (!url) return null;
|
|
try {
|
|
const resp = await fetch(url, {
|
|
headers: { 'User-Agent': UA, 'Referer': referer || 'https://fapello.to/' },
|
|
signal: AbortSignal.timeout(60000),
|
|
});
|
|
if (!resp.ok) return null;
|
|
const buf = Buffer.from(await resp.arrayBuffer());
|
|
if (buf.length < 500) return null;
|
|
return buf;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// Download all collected media items with concurrency
|
|
// Fallback: if full-res URL fails, download medium (thumbUrl) with _md suffix.
|
|
// Upgrade: if _md file exists, try full-res again; replace _md on success.
|
|
export async function downloadMedia(items, outputDir, workers, logFn, progressFn, checkCancelled, referer) {
|
|
mkdirSync(outputDir, { recursive: true });
|
|
|
|
let completed = 0;
|
|
let errors = 0;
|
|
let skipped = 0;
|
|
let upgraded = 0;
|
|
let index = 0;
|
|
|
|
async function processNext() {
|
|
while (index < items.length) {
|
|
if (checkCancelled()) return;
|
|
|
|
const current = index++;
|
|
const item = items[current];
|
|
|
|
const filename = filenameFromUrl(item.url, item);
|
|
const filepath = join(outputDir, filename);
|
|
const mdName = mdFilename(filename);
|
|
const mdPath = join(outputDir, mdName);
|
|
|
|
// Full-res already exists — skip
|
|
if (existsSync(filepath)) {
|
|
skipped++;
|
|
progressFn(completed + skipped, errors, items.length);
|
|
continue;
|
|
}
|
|
|
|
// Medium version exists — try to upgrade to full-res
|
|
if (existsSync(mdPath)) {
|
|
const buf = await tryFetch(item.url, referer);
|
|
if (buf) {
|
|
writeFileSync(filepath, buf);
|
|
try { unlinkSync(mdPath); } catch {}
|
|
const folderName = basename(outputDir);
|
|
const fileType = VIDEO_EXTS.has(extname(filename).toLowerCase()) ? 'video' : 'image';
|
|
try { removeMediaFile(folderName, mdName); } catch {}
|
|
try { upsertMediaFile(folderName, filename, fileType, buf.length, Date.now(), null); } catch {}
|
|
upgraded++;
|
|
completed++;
|
|
logFn(`[${completed}/${items.length}] ${filename} (upgraded from _md, ${(buf.length / 1024).toFixed(1)} KB)`);
|
|
progressFn(completed + skipped, errors, items.length);
|
|
} else {
|
|
skipped++;
|
|
progressFn(completed + skipped, errors, items.length);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Neither exists — try full-res, then fallback to medium
|
|
const buf = await tryFetch(item.url, referer);
|
|
if (buf) {
|
|
writeFileSync(filepath, buf);
|
|
const folderName = basename(outputDir);
|
|
const fileType = VIDEO_EXTS.has(extname(filename).toLowerCase()) ? 'video' : 'image';
|
|
try { upsertMediaFile(folderName, filename, fileType, buf.length, Date.now(), null); } catch {}
|
|
completed++;
|
|
logFn(`[${completed}/${items.length}] ${filename} (${(buf.length / 1024).toFixed(1)} KB)`);
|
|
progressFn(completed + skipped, errors, items.length);
|
|
continue;
|
|
}
|
|
|
|
// Full-res failed — try medium (thumbUrl)
|
|
if (item.thumbUrl) {
|
|
const mdBuf = await tryFetch(item.thumbUrl, referer);
|
|
if (mdBuf) {
|
|
writeFileSync(mdPath, mdBuf);
|
|
const folderName = basename(outputDir);
|
|
const fileType = VIDEO_EXTS.has(extname(mdName).toLowerCase()) ? 'video' : 'image';
|
|
try { upsertMediaFile(folderName, mdName, fileType, mdBuf.length, Date.now(), null); } catch {}
|
|
completed++;
|
|
logFn(`[${completed}/${items.length}] ${mdName} (medium fallback, ${(mdBuf.length / 1024).toFixed(1)} KB)`);
|
|
progressFn(completed + skipped, errors, items.length);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Both failed
|
|
logFn(`FAILED: ${filename} — full-res and medium both unavailable`);
|
|
errors++;
|
|
progressFn(completed + skipped, errors, items.length);
|
|
}
|
|
}
|
|
|
|
const workerPromises = [];
|
|
for (let i = 0; i < Math.min(workers, items.length); i++) {
|
|
workerPromises.push(processNext());
|
|
}
|
|
await Promise.all(workerPromises);
|
|
|
|
if (upgraded > 0) logFn(`Upgraded ${upgraded} files from medium to full resolution`);
|
|
return { completed, errors, skipped, total: items.length };
|
|
}
|