- DRM video download pipeline with pywidevine subprocess for Widevine key acquisition - Scraper system: forum threads, Coomer/Kemono API, and MediaLink (Fapello) scrapers - SQLite-backed media index for instant gallery loads with startup scan - Duplicate detection and gallery filtering/sorting - HLS video component, log viewer, and scrape management UI - Dockerfile updated for Python/pywidevine, docker-compose volume for CDM Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
188 lines
5.7 KiB
JavaScript
188 lines
5.7 KiB
JavaScript
import { existsSync, writeFileSync, mkdirSync } from 'fs';
|
|
import { basename, join, extname } from 'path';
|
|
import { upsertMediaFile } from '../db.js';
|
|
|
|
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
|
|
|
const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v']);
|
|
|
|
export function parseMediaUrl(url) {
|
|
const parsed = new URL(url);
|
|
const base = `${parsed.protocol}//${parsed.hostname}`;
|
|
// Support /model/{id} or /media/{id}
|
|
const m = parsed.pathname.match(/\/(?:model|media)\/(\d+)/);
|
|
if (!m) throw new Error(`Can't parse URL. Expected: https://fapello.to/model/12345`);
|
|
return { base, userId: m[1] };
|
|
}
|
|
|
|
// Fetch JSON from the API endpoint
|
|
// API: GET /api/media/{userId}/{page}/{order}
|
|
// Requires X-Requested-With and Referer headers to avoid 403
|
|
async function fetchApiPage(base, userId, page, order, logFn) {
|
|
const apiUrl = `${base}/api/media/${userId}/${page}/${order}`;
|
|
try {
|
|
const resp = await fetch(apiUrl, {
|
|
headers: {
|
|
'User-Agent': UA,
|
|
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
|
'X-Requested-With': 'XMLHttpRequest',
|
|
'Referer': `${base}/model/${userId}`,
|
|
},
|
|
signal: AbortSignal.timeout(15000),
|
|
});
|
|
if (!resp.ok) {
|
|
if (resp.status === 404) return null;
|
|
logFn(`API error (${resp.status}): ${apiUrl}`);
|
|
return null;
|
|
}
|
|
const data = await resp.json();
|
|
return data;
|
|
} catch (err) {
|
|
logFn(`API fetch error: ${err.message}`);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// Collect all media items by paginating through the API
|
|
export async function fetchAllMedia(base, userId, maxPages, delay, logFn, checkCancelled) {
|
|
const allItems = [];
|
|
const seen = new Set();
|
|
|
|
for (let page = 1; page <= maxPages; page++) {
|
|
if (checkCancelled()) break;
|
|
|
|
logFn(`Fetching page ${page}...`);
|
|
const data = await fetchApiPage(base, userId, page, 1, logFn);
|
|
|
|
if (!data || data.length === 0) {
|
|
logFn(`Page ${page}: no more items — done`);
|
|
break;
|
|
}
|
|
|
|
let newCount = 0;
|
|
for (const item of data) {
|
|
if (seen.has(item.id)) continue;
|
|
seen.add(item.id);
|
|
newCount++;
|
|
|
|
// type "2" = video (newUrl is mp4), type "1" = image (newUrl is full-size jpg)
|
|
const isVideo = item.type === '2' || item.type === 2;
|
|
const fullUrl = item.newUrl;
|
|
if (!fullUrl) continue;
|
|
|
|
allItems.push({
|
|
id: item.id,
|
|
url: fullUrl,
|
|
type: isVideo ? 'video' : 'image',
|
|
});
|
|
}
|
|
|
|
if (newCount === 0) {
|
|
logFn(`Page ${page}: all duplicates — stopping`);
|
|
break;
|
|
}
|
|
|
|
logFn(`Page ${page}: ${data.length} items (${newCount} new, ${allItems.length} total)`);
|
|
|
|
if (page < maxPages && !checkCancelled()) {
|
|
await new Promise(r => setTimeout(r, delay));
|
|
}
|
|
}
|
|
|
|
return allItems;
|
|
}
|
|
|
|
// Download all collected media items with concurrency
|
|
export async function downloadMedia(items, outputDir, workers, logFn, progressFn, checkCancelled) {
|
|
mkdirSync(outputDir, { recursive: true });
|
|
|
|
let completed = 0;
|
|
let errors = 0;
|
|
let skipped = 0;
|
|
let index = 0;
|
|
|
|
async function processNext() {
|
|
while (index < items.length) {
|
|
if (checkCancelled()) return;
|
|
|
|
const current = index++;
|
|
const item = items[current];
|
|
|
|
let filename;
|
|
try {
|
|
filename = basename(new URL(item.url).pathname);
|
|
if (!filename || filename === '/') {
|
|
filename = `${item.id}.${item.type === 'video' ? 'mp4' : 'jpg'}`;
|
|
}
|
|
} catch {
|
|
filename = `${item.id}.${item.type === 'video' ? 'mp4' : 'jpg'}`;
|
|
}
|
|
|
|
let filepath = join(outputDir, filename);
|
|
if (existsSync(filepath)) {
|
|
skipped++;
|
|
progressFn(completed + skipped, errors, items.length);
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
const resp = await fetch(item.url, {
|
|
headers: {
|
|
'User-Agent': UA,
|
|
'Referer': 'https://fapello.to/',
|
|
},
|
|
signal: AbortSignal.timeout(60000),
|
|
});
|
|
if (!resp.ok) {
|
|
logFn(`FAILED (${resp.status}): ${filename}`);
|
|
errors++;
|
|
progressFn(completed + skipped, errors, items.length);
|
|
continue;
|
|
}
|
|
|
|
const buf = Buffer.from(await resp.arrayBuffer());
|
|
if (buf.length < 500) {
|
|
skipped++;
|
|
progressFn(completed + skipped, errors, items.length);
|
|
continue;
|
|
}
|
|
|
|
// Handle filename collision
|
|
if (existsSync(filepath)) {
|
|
const ext = extname(filename);
|
|
const name = filename.slice(0, -ext.length);
|
|
let i = 1;
|
|
while (existsSync(filepath)) {
|
|
filepath = join(outputDir, `${name}_${i}${ext}`);
|
|
i++;
|
|
}
|
|
}
|
|
|
|
writeFileSync(filepath, buf);
|
|
const savedName = basename(filepath);
|
|
const folderName = basename(outputDir);
|
|
const fileExt = extname(savedName).toLowerCase();
|
|
const fileType = VIDEO_EXTS.has(fileExt) ? 'video' : 'image';
|
|
try { upsertMediaFile(folderName, savedName, fileType, buf.length, Date.now(), null); } catch {}
|
|
|
|
completed++;
|
|
const sizeKb = (buf.length / 1024).toFixed(1);
|
|
logFn(`[${completed}/${items.length}] ${savedName} (${sizeKb} KB)`);
|
|
progressFn(completed + skipped, errors, items.length);
|
|
} catch (err) {
|
|
logFn(`FAILED: ${filename} - ${err.message}`);
|
|
errors++;
|
|
progressFn(completed + skipped, errors, items.length);
|
|
}
|
|
}
|
|
}
|
|
|
|
const workerPromises = [];
|
|
for (let i = 0; i < Math.min(workers, items.length); i++) {
|
|
workerPromises.push(processNext());
|
|
}
|
|
await Promise.all(workerPromises);
|
|
|
|
return { completed, errors, skipped, total: items.length };
|
|
}
|