Files
OFApp/server/scrapers/coomer.js
Trey t 1e5f54f60b Add DRM downloads, scrapers, gallery index, and UI improvements
- DRM video download pipeline with pywidevine subprocess for Widevine key acquisition
- Scraper system: forum threads, Coomer/Kemono API, and MediaLink (Fapello) scrapers
- SQLite-backed media index for instant gallery loads with startup scan
- Duplicate detection and gallery filtering/sorting
- HLS video component, log viewer, and scrape management UI
- Dockerfile updated for Python/pywidevine, docker-compose volume for CDM

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 11:29:11 -06:00

202 lines
5.7 KiB
JavaScript

import { existsSync, mkdirSync, writeFileSync } from 'fs';
import { basename, join, extname } from 'path';
import { upsertMediaFile } from '../db.js';
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
export function parseUserUrl(url) {
const parsed = new URL(url);
const base = `${parsed.protocol}//${parsed.hostname}`;
const m = parsed.pathname.match(/^\/([^/]+)\/user\/([^/?#]+)/);
if (!m) throw new Error(`Can't parse URL. Expected: https://coomer.su/SERVICE/user/USER_ID`);
return { base, service: m[1], userId: m[2] };
}
async function fetchApi(apiUrl, logFn, retries = 3) {
for (let attempt = 0; attempt < retries; attempt++) {
try {
const resp = await fetch(apiUrl, {
headers: { 'User-Agent': UA, 'Accept': 'application/json' },
signal: AbortSignal.timeout(15000),
});
if (resp.ok) return await resp.json();
if (resp.status === 404) return [];
if (resp.status === 429) {
const wait = 5 * (attempt + 1);
logFn(`Rate limited, waiting ${wait}s...`);
await sleep(wait * 1000);
continue;
}
if (resp.status >= 500) {
await sleep(2000);
continue;
}
logFn(`API error ${resp.status}: ${apiUrl}`);
return null;
} catch (err) {
if (attempt < retries - 1) {
await sleep(2000);
} else {
throw err;
}
}
}
return null;
}
function sleep(ms) {
return new Promise(r => setTimeout(r, ms));
}
export function collectFiles(posts, cdnBase) {
const files = [];
const seen = new Set();
for (const post of posts) {
const items = [];
if (post.file && post.file.path) items.push(post.file);
if (post.attachments) {
for (const att of post.attachments) {
if (att.path) items.push(att);
}
}
for (const f of items) {
const fileUrl = `${cdnBase}${f.path}`;
if (seen.has(fileUrl)) continue;
seen.add(fileUrl);
const name = f.name || basename(f.path);
files.push({ url: fileUrl, name });
}
}
return files;
}
async function downloadFile(url, outputDir, name, logFn) {
let filepath = join(outputDir, name);
if (existsSync(filepath)) {
// File already exists, skip
return { skipped: true };
}
try {
const resp = await fetch(url, {
headers: { 'User-Agent': UA },
signal: AbortSignal.timeout(60000),
});
if (!resp.ok) {
logFn(`FAILED (${resp.status}): ${name}`);
return { error: true };
}
const buf = Buffer.from(await resp.arrayBuffer());
// Handle filename collision (different content)
if (existsSync(filepath)) {
const ext = extname(name);
const base = name.slice(0, -ext.length);
let i = 1;
while (existsSync(filepath)) {
filepath = join(outputDir, `${base}_${i}${ext}`);
i++;
}
}
writeFileSync(filepath, buf);
const savedName = basename(filepath);
const folderName = basename(outputDir);
const ext = extname(savedName).toLowerCase();
const fileType = ['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v'].includes(ext) ? 'video' : 'image';
try { upsertMediaFile(folderName, savedName, fileType, buf.length, Date.now(), null); } catch { /* ignore */ }
const sizeKb = (buf.length / 1024).toFixed(1);
return { filename: savedName, sizeKb };
} catch (err) {
logFn(`FAILED: ${name} - ${err.message}`);
return { error: true };
}
}
export async function fetchAllPosts(base, service, userId, maxPages, logFn, checkCancelled) {
const allFiles = [];
for (let page = 0; page < maxPages; page++) {
if (checkCancelled()) break;
const offset = page * 50;
const apiUrl = `${base}/api/v1/${service}/user/${userId}/posts?o=${offset}`;
let posts;
try {
posts = await fetchApi(apiUrl, logFn);
} catch (err) {
logFn(`API failed: ${err.message}`);
break;
}
if (!posts || posts.length === 0) break;
const parsed = new URL(base);
const cdnHost = `n1.${parsed.hostname}`;
const cdnBase = `${parsed.protocol}//${cdnHost}/data`;
const files = collectFiles(posts, cdnBase);
allFiles.push(...files);
logFn(`Page ${page + 1}: ${posts.length} posts (${allFiles.length} files total)`);
if (posts.length < 50) break;
}
return allFiles;
}
export async function downloadFiles(files, outputDir, concurrency, logFn, progressFn, checkCancelled) {
mkdirSync(outputDir, { recursive: true });
// Filter out already existing files
const toDownload = [];
let skipped = 0;
for (const f of files) {
if (existsSync(join(outputDir, f.name))) {
skipped++;
} else {
toDownload.push(f);
}
}
if (skipped > 0) logFn(`Skipping ${skipped} already downloaded files`);
logFn(`Downloading ${toDownload.length} files with ${concurrency} workers...`);
let completed = 0;
let errors = 0;
let active = 0;
let index = 0;
// Simple semaphore-based concurrency
async function processNext() {
while (index < toDownload.length) {
if (checkCancelled()) return;
const current = index++;
const file = toDownload[current];
const result = await downloadFile(file.url, outputDir, file.name, logFn);
if (result.error) {
errors++;
} else if (!result.skipped) {
completed++;
logFn(`[${completed}/${toDownload.length}] ${result.filename} (${result.sizeKb} KB)`);
}
progressFn(completed + skipped, errors, files.length);
}
}
const workers = [];
for (let i = 0; i < Math.min(concurrency, toDownload.length); i++) {
workers.push(processNext());
}
await Promise.all(workers);
return { completed, errors, skipped, total: files.length };
}