- DRM video download pipeline with pywidevine subprocess for Widevine key acquisition - Scraper system: forum threads, Coomer/Kemono API, and MediaLink (Fapello) scrapers - SQLite-backed media index for instant gallery loads with startup scan - Duplicate detection and gallery filtering/sorting - HLS video component, log viewer, and scrape management UI - Dockerfile updated for Python/pywidevine, docker-compose volume for CDM Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
202 lines
5.7 KiB
JavaScript
202 lines
5.7 KiB
JavaScript
import { existsSync, mkdirSync, writeFileSync } from 'fs';
|
|
import { basename, join, extname } from 'path';
|
|
import { upsertMediaFile } from '../db.js';
|
|
|
|
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
|
|
|
export function parseUserUrl(url) {
|
|
const parsed = new URL(url);
|
|
const base = `${parsed.protocol}//${parsed.hostname}`;
|
|
const m = parsed.pathname.match(/^\/([^/]+)\/user\/([^/?#]+)/);
|
|
if (!m) throw new Error(`Can't parse URL. Expected: https://coomer.su/SERVICE/user/USER_ID`);
|
|
return { base, service: m[1], userId: m[2] };
|
|
}
|
|
|
|
async function fetchApi(apiUrl, logFn, retries = 3) {
|
|
for (let attempt = 0; attempt < retries; attempt++) {
|
|
try {
|
|
const resp = await fetch(apiUrl, {
|
|
headers: { 'User-Agent': UA, 'Accept': 'application/json' },
|
|
signal: AbortSignal.timeout(15000),
|
|
});
|
|
|
|
if (resp.ok) return await resp.json();
|
|
if (resp.status === 404) return [];
|
|
if (resp.status === 429) {
|
|
const wait = 5 * (attempt + 1);
|
|
logFn(`Rate limited, waiting ${wait}s...`);
|
|
await sleep(wait * 1000);
|
|
continue;
|
|
}
|
|
if (resp.status >= 500) {
|
|
await sleep(2000);
|
|
continue;
|
|
}
|
|
logFn(`API error ${resp.status}: ${apiUrl}`);
|
|
return null;
|
|
} catch (err) {
|
|
if (attempt < retries - 1) {
|
|
await sleep(2000);
|
|
} else {
|
|
throw err;
|
|
}
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function sleep(ms) {
|
|
return new Promise(r => setTimeout(r, ms));
|
|
}
|
|
|
|
export function collectFiles(posts, cdnBase) {
|
|
const files = [];
|
|
const seen = new Set();
|
|
|
|
for (const post of posts) {
|
|
const items = [];
|
|
if (post.file && post.file.path) items.push(post.file);
|
|
if (post.attachments) {
|
|
for (const att of post.attachments) {
|
|
if (att.path) items.push(att);
|
|
}
|
|
}
|
|
for (const f of items) {
|
|
const fileUrl = `${cdnBase}${f.path}`;
|
|
if (seen.has(fileUrl)) continue;
|
|
seen.add(fileUrl);
|
|
const name = f.name || basename(f.path);
|
|
files.push({ url: fileUrl, name });
|
|
}
|
|
}
|
|
return files;
|
|
}
|
|
|
|
async function downloadFile(url, outputDir, name, logFn) {
|
|
let filepath = join(outputDir, name);
|
|
if (existsSync(filepath)) {
|
|
// File already exists, skip
|
|
return { skipped: true };
|
|
}
|
|
|
|
try {
|
|
const resp = await fetch(url, {
|
|
headers: { 'User-Agent': UA },
|
|
signal: AbortSignal.timeout(60000),
|
|
});
|
|
if (!resp.ok) {
|
|
logFn(`FAILED (${resp.status}): ${name}`);
|
|
return { error: true };
|
|
}
|
|
|
|
const buf = Buffer.from(await resp.arrayBuffer());
|
|
|
|
// Handle filename collision (different content)
|
|
if (existsSync(filepath)) {
|
|
const ext = extname(name);
|
|
const base = name.slice(0, -ext.length);
|
|
let i = 1;
|
|
while (existsSync(filepath)) {
|
|
filepath = join(outputDir, `${base}_${i}${ext}`);
|
|
i++;
|
|
}
|
|
}
|
|
|
|
writeFileSync(filepath, buf);
|
|
const savedName = basename(filepath);
|
|
const folderName = basename(outputDir);
|
|
const ext = extname(savedName).toLowerCase();
|
|
const fileType = ['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v'].includes(ext) ? 'video' : 'image';
|
|
try { upsertMediaFile(folderName, savedName, fileType, buf.length, Date.now(), null); } catch { /* ignore */ }
|
|
const sizeKb = (buf.length / 1024).toFixed(1);
|
|
return { filename: savedName, sizeKb };
|
|
} catch (err) {
|
|
logFn(`FAILED: ${name} - ${err.message}`);
|
|
return { error: true };
|
|
}
|
|
}
|
|
|
|
export async function fetchAllPosts(base, service, userId, maxPages, logFn, checkCancelled) {
|
|
const allFiles = [];
|
|
|
|
for (let page = 0; page < maxPages; page++) {
|
|
if (checkCancelled()) break;
|
|
|
|
const offset = page * 50;
|
|
const apiUrl = `${base}/api/v1/${service}/user/${userId}/posts?o=${offset}`;
|
|
|
|
let posts;
|
|
try {
|
|
posts = await fetchApi(apiUrl, logFn);
|
|
} catch (err) {
|
|
logFn(`API failed: ${err.message}`);
|
|
break;
|
|
}
|
|
|
|
if (!posts || posts.length === 0) break;
|
|
|
|
const parsed = new URL(base);
|
|
const cdnHost = `n1.${parsed.hostname}`;
|
|
const cdnBase = `${parsed.protocol}//${cdnHost}/data`;
|
|
|
|
const files = collectFiles(posts, cdnBase);
|
|
allFiles.push(...files);
|
|
|
|
logFn(`Page ${page + 1}: ${posts.length} posts (${allFiles.length} files total)`);
|
|
|
|
if (posts.length < 50) break;
|
|
}
|
|
|
|
return allFiles;
|
|
}
|
|
|
|
export async function downloadFiles(files, outputDir, concurrency, logFn, progressFn, checkCancelled) {
|
|
mkdirSync(outputDir, { recursive: true });
|
|
|
|
// Filter out already existing files
|
|
const toDownload = [];
|
|
let skipped = 0;
|
|
for (const f of files) {
|
|
if (existsSync(join(outputDir, f.name))) {
|
|
skipped++;
|
|
} else {
|
|
toDownload.push(f);
|
|
}
|
|
}
|
|
|
|
if (skipped > 0) logFn(`Skipping ${skipped} already downloaded files`);
|
|
logFn(`Downloading ${toDownload.length} files with ${concurrency} workers...`);
|
|
|
|
let completed = 0;
|
|
let errors = 0;
|
|
let active = 0;
|
|
let index = 0;
|
|
|
|
// Simple semaphore-based concurrency
|
|
async function processNext() {
|
|
while (index < toDownload.length) {
|
|
if (checkCancelled()) return;
|
|
|
|
const current = index++;
|
|
const file = toDownload[current];
|
|
|
|
const result = await downloadFile(file.url, outputDir, file.name, logFn);
|
|
if (result.error) {
|
|
errors++;
|
|
} else if (!result.skipped) {
|
|
completed++;
|
|
logFn(`[${completed}/${toDownload.length}] ${result.filename} (${result.sizeKb} KB)`);
|
|
}
|
|
progressFn(completed + skipped, errors, files.length);
|
|
}
|
|
}
|
|
|
|
const workers = [];
|
|
for (let i = 0; i < Math.min(concurrency, toDownload.length); i++) {
|
|
workers.push(processNext());
|
|
}
|
|
await Promise.all(workers);
|
|
|
|
return { completed, errors, skipped, total: files.length };
|
|
}
|