Files
OFApp/server/scrapers/medialink.js
Trey t 1e5f54f60b Add DRM downloads, scrapers, gallery index, and UI improvements
- DRM video download pipeline with pywidevine subprocess for Widevine key acquisition
- Scraper system: forum threads, Coomer/Kemono API, and MediaLink (Fapello) scrapers
- SQLite-backed media index for instant gallery loads with startup scan
- Duplicate detection and gallery filtering/sorting
- HLS video component, log viewer, and scrape management UI
- Dockerfile updated for Python/pywidevine, docker-compose volume for CDM

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 11:29:11 -06:00

188 lines
5.7 KiB
JavaScript

import { existsSync, writeFileSync, mkdirSync } from 'fs';
import { basename, join, extname } from 'path';
import { upsertMediaFile } from '../db.js';
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v']);
export function parseMediaUrl(url) {
const parsed = new URL(url);
const base = `${parsed.protocol}//${parsed.hostname}`;
// Support /model/{id} or /media/{id}
const m = parsed.pathname.match(/\/(?:model|media)\/(\d+)/);
if (!m) throw new Error(`Can't parse URL. Expected: https://fapello.to/model/12345`);
return { base, userId: m[1] };
}
// Fetch JSON from the API endpoint
// API: GET /api/media/{userId}/{page}/{order}
// Requires X-Requested-With and Referer headers to avoid 403
async function fetchApiPage(base, userId, page, order, logFn) {
const apiUrl = `${base}/api/media/${userId}/${page}/${order}`;
try {
const resp = await fetch(apiUrl, {
headers: {
'User-Agent': UA,
'Accept': 'application/json, text/javascript, */*; q=0.01',
'X-Requested-With': 'XMLHttpRequest',
'Referer': `${base}/model/${userId}`,
},
signal: AbortSignal.timeout(15000),
});
if (!resp.ok) {
if (resp.status === 404) return null;
logFn(`API error (${resp.status}): ${apiUrl}`);
return null;
}
const data = await resp.json();
return data;
} catch (err) {
logFn(`API fetch error: ${err.message}`);
return null;
}
}
// Collect all media items by paginating through the API
export async function fetchAllMedia(base, userId, maxPages, delay, logFn, checkCancelled) {
const allItems = [];
const seen = new Set();
for (let page = 1; page <= maxPages; page++) {
if (checkCancelled()) break;
logFn(`Fetching page ${page}...`);
const data = await fetchApiPage(base, userId, page, 1, logFn);
if (!data || data.length === 0) {
logFn(`Page ${page}: no more items — done`);
break;
}
let newCount = 0;
for (const item of data) {
if (seen.has(item.id)) continue;
seen.add(item.id);
newCount++;
// type "2" = video (newUrl is mp4), type "1" = image (newUrl is full-size jpg)
const isVideo = item.type === '2' || item.type === 2;
const fullUrl = item.newUrl;
if (!fullUrl) continue;
allItems.push({
id: item.id,
url: fullUrl,
type: isVideo ? 'video' : 'image',
});
}
if (newCount === 0) {
logFn(`Page ${page}: all duplicates — stopping`);
break;
}
logFn(`Page ${page}: ${data.length} items (${newCount} new, ${allItems.length} total)`);
if (page < maxPages && !checkCancelled()) {
await new Promise(r => setTimeout(r, delay));
}
}
return allItems;
}
// Download all collected media items with concurrency
export async function downloadMedia(items, outputDir, workers, logFn, progressFn, checkCancelled) {
mkdirSync(outputDir, { recursive: true });
let completed = 0;
let errors = 0;
let skipped = 0;
let index = 0;
async function processNext() {
while (index < items.length) {
if (checkCancelled()) return;
const current = index++;
const item = items[current];
let filename;
try {
filename = basename(new URL(item.url).pathname);
if (!filename || filename === '/') {
filename = `${item.id}.${item.type === 'video' ? 'mp4' : 'jpg'}`;
}
} catch {
filename = `${item.id}.${item.type === 'video' ? 'mp4' : 'jpg'}`;
}
let filepath = join(outputDir, filename);
if (existsSync(filepath)) {
skipped++;
progressFn(completed + skipped, errors, items.length);
continue;
}
try {
const resp = await fetch(item.url, {
headers: {
'User-Agent': UA,
'Referer': 'https://fapello.to/',
},
signal: AbortSignal.timeout(60000),
});
if (!resp.ok) {
logFn(`FAILED (${resp.status}): ${filename}`);
errors++;
progressFn(completed + skipped, errors, items.length);
continue;
}
const buf = Buffer.from(await resp.arrayBuffer());
if (buf.length < 500) {
skipped++;
progressFn(completed + skipped, errors, items.length);
continue;
}
// Handle filename collision
if (existsSync(filepath)) {
const ext = extname(filename);
const name = filename.slice(0, -ext.length);
let i = 1;
while (existsSync(filepath)) {
filepath = join(outputDir, `${name}_${i}${ext}`);
i++;
}
}
writeFileSync(filepath, buf);
const savedName = basename(filepath);
const folderName = basename(outputDir);
const fileExt = extname(savedName).toLowerCase();
const fileType = VIDEO_EXTS.has(fileExt) ? 'video' : 'image';
try { upsertMediaFile(folderName, savedName, fileType, buf.length, Date.now(), null); } catch {}
completed++;
const sizeKb = (buf.length / 1024).toFixed(1);
logFn(`[${completed}/${items.length}] ${savedName} (${sizeKb} KB)`);
progressFn(completed + skipped, errors, items.length);
} catch (err) {
logFn(`FAILED: ${filename} - ${err.message}`);
errors++;
progressFn(completed + skipped, errors, items.length);
}
}
}
const workerPromises = [];
for (let i = 0; i < Math.min(workers, items.length); i++) {
workerPromises.push(processNext());
}
await Promise.all(workerPromises);
return { completed, errors, skipped, total: items.length };
}