Add DRM downloads, scrapers, gallery index, and UI improvements
- DRM video download pipeline with pywidevine subprocess for Widevine key acquisition - Scraper system: forum threads, Coomer/Kemono API, and MediaLink (Fapello) scrapers - SQLite-backed media index for instant gallery loads with startup scan - Duplicate detection and gallery filtering/sorting - HLS video component, log viewer, and scrape management UI - Dockerfile updated for Python/pywidevine, docker-compose volume for CDM Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
201
server/scrapers/coomer.js
Normal file
201
server/scrapers/coomer.js
Normal file
@@ -0,0 +1,201 @@
|
||||
import { existsSync, mkdirSync, writeFileSync } from 'fs';
|
||||
import { basename, join, extname } from 'path';
|
||||
import { upsertMediaFile } from '../db.js';
|
||||
|
||||
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
||||
|
||||
export function parseUserUrl(url) {
|
||||
const parsed = new URL(url);
|
||||
const base = `${parsed.protocol}//${parsed.hostname}`;
|
||||
const m = parsed.pathname.match(/^\/([^/]+)\/user\/([^/?#]+)/);
|
||||
if (!m) throw new Error(`Can't parse URL. Expected: https://coomer.su/SERVICE/user/USER_ID`);
|
||||
return { base, service: m[1], userId: m[2] };
|
||||
}
|
||||
|
||||
async function fetchApi(apiUrl, logFn, retries = 3) {
|
||||
for (let attempt = 0; attempt < retries; attempt++) {
|
||||
try {
|
||||
const resp = await fetch(apiUrl, {
|
||||
headers: { 'User-Agent': UA, 'Accept': 'application/json' },
|
||||
signal: AbortSignal.timeout(15000),
|
||||
});
|
||||
|
||||
if (resp.ok) return await resp.json();
|
||||
if (resp.status === 404) return [];
|
||||
if (resp.status === 429) {
|
||||
const wait = 5 * (attempt + 1);
|
||||
logFn(`Rate limited, waiting ${wait}s...`);
|
||||
await sleep(wait * 1000);
|
||||
continue;
|
||||
}
|
||||
if (resp.status >= 500) {
|
||||
await sleep(2000);
|
||||
continue;
|
||||
}
|
||||
logFn(`API error ${resp.status}: ${apiUrl}`);
|
||||
return null;
|
||||
} catch (err) {
|
||||
if (attempt < retries - 1) {
|
||||
await sleep(2000);
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function sleep(ms) {
|
||||
return new Promise(r => setTimeout(r, ms));
|
||||
}
|
||||
|
||||
export function collectFiles(posts, cdnBase) {
|
||||
const files = [];
|
||||
const seen = new Set();
|
||||
|
||||
for (const post of posts) {
|
||||
const items = [];
|
||||
if (post.file && post.file.path) items.push(post.file);
|
||||
if (post.attachments) {
|
||||
for (const att of post.attachments) {
|
||||
if (att.path) items.push(att);
|
||||
}
|
||||
}
|
||||
for (const f of items) {
|
||||
const fileUrl = `${cdnBase}${f.path}`;
|
||||
if (seen.has(fileUrl)) continue;
|
||||
seen.add(fileUrl);
|
||||
const name = f.name || basename(f.path);
|
||||
files.push({ url: fileUrl, name });
|
||||
}
|
||||
}
|
||||
return files;
|
||||
}
|
||||
|
||||
async function downloadFile(url, outputDir, name, logFn) {
|
||||
let filepath = join(outputDir, name);
|
||||
if (existsSync(filepath)) {
|
||||
// File already exists, skip
|
||||
return { skipped: true };
|
||||
}
|
||||
|
||||
try {
|
||||
const resp = await fetch(url, {
|
||||
headers: { 'User-Agent': UA },
|
||||
signal: AbortSignal.timeout(60000),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
logFn(`FAILED (${resp.status}): ${name}`);
|
||||
return { error: true };
|
||||
}
|
||||
|
||||
const buf = Buffer.from(await resp.arrayBuffer());
|
||||
|
||||
// Handle filename collision (different content)
|
||||
if (existsSync(filepath)) {
|
||||
const ext = extname(name);
|
||||
const base = name.slice(0, -ext.length);
|
||||
let i = 1;
|
||||
while (existsSync(filepath)) {
|
||||
filepath = join(outputDir, `${base}_${i}${ext}`);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
writeFileSync(filepath, buf);
|
||||
const savedName = basename(filepath);
|
||||
const folderName = basename(outputDir);
|
||||
const ext = extname(savedName).toLowerCase();
|
||||
const fileType = ['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v'].includes(ext) ? 'video' : 'image';
|
||||
try { upsertMediaFile(folderName, savedName, fileType, buf.length, Date.now(), null); } catch { /* ignore */ }
|
||||
const sizeKb = (buf.length / 1024).toFixed(1);
|
||||
return { filename: savedName, sizeKb };
|
||||
} catch (err) {
|
||||
logFn(`FAILED: ${name} - ${err.message}`);
|
||||
return { error: true };
|
||||
}
|
||||
}
|
||||
|
||||
export async function fetchAllPosts(base, service, userId, maxPages, logFn, checkCancelled) {
|
||||
const allFiles = [];
|
||||
|
||||
for (let page = 0; page < maxPages; page++) {
|
||||
if (checkCancelled()) break;
|
||||
|
||||
const offset = page * 50;
|
||||
const apiUrl = `${base}/api/v1/${service}/user/${userId}/posts?o=${offset}`;
|
||||
|
||||
let posts;
|
||||
try {
|
||||
posts = await fetchApi(apiUrl, logFn);
|
||||
} catch (err) {
|
||||
logFn(`API failed: ${err.message}`);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!posts || posts.length === 0) break;
|
||||
|
||||
const parsed = new URL(base);
|
||||
const cdnHost = `n1.${parsed.hostname}`;
|
||||
const cdnBase = `${parsed.protocol}//${cdnHost}/data`;
|
||||
|
||||
const files = collectFiles(posts, cdnBase);
|
||||
allFiles.push(...files);
|
||||
|
||||
logFn(`Page ${page + 1}: ${posts.length} posts (${allFiles.length} files total)`);
|
||||
|
||||
if (posts.length < 50) break;
|
||||
}
|
||||
|
||||
return allFiles;
|
||||
}
|
||||
|
||||
export async function downloadFiles(files, outputDir, concurrency, logFn, progressFn, checkCancelled) {
|
||||
mkdirSync(outputDir, { recursive: true });
|
||||
|
||||
// Filter out already existing files
|
||||
const toDownload = [];
|
||||
let skipped = 0;
|
||||
for (const f of files) {
|
||||
if (existsSync(join(outputDir, f.name))) {
|
||||
skipped++;
|
||||
} else {
|
||||
toDownload.push(f);
|
||||
}
|
||||
}
|
||||
|
||||
if (skipped > 0) logFn(`Skipping ${skipped} already downloaded files`);
|
||||
logFn(`Downloading ${toDownload.length} files with ${concurrency} workers...`);
|
||||
|
||||
let completed = 0;
|
||||
let errors = 0;
|
||||
let active = 0;
|
||||
let index = 0;
|
||||
|
||||
// Simple semaphore-based concurrency
|
||||
async function processNext() {
|
||||
while (index < toDownload.length) {
|
||||
if (checkCancelled()) return;
|
||||
|
||||
const current = index++;
|
||||
const file = toDownload[current];
|
||||
|
||||
const result = await downloadFile(file.url, outputDir, file.name, logFn);
|
||||
if (result.error) {
|
||||
errors++;
|
||||
} else if (!result.skipped) {
|
||||
completed++;
|
||||
logFn(`[${completed}/${toDownload.length}] ${result.filename} (${result.sizeKb} KB)`);
|
||||
}
|
||||
progressFn(completed + skipped, errors, files.length);
|
||||
}
|
||||
}
|
||||
|
||||
const workers = [];
|
||||
for (let i = 0; i < Math.min(concurrency, toDownload.length); i++) {
|
||||
workers.push(processNext());
|
||||
}
|
||||
await Promise.all(workers);
|
||||
|
||||
return { completed, errors, skipped, total: files.length };
|
||||
}
|
||||
Reference in New Issue
Block a user