import { existsSync, mkdirSync, writeFileSync } from 'fs'; import { basename, join, extname } from 'path'; import { upsertMediaFile } from '../db.js'; const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'; export function parseUserUrl(url) { const parsed = new URL(url); const base = `${parsed.protocol}//${parsed.hostname}`; const m = parsed.pathname.match(/^\/([^/]+)\/user\/([^/?#]+)/); if (!m) throw new Error(`Can't parse URL. Expected: https://coomer.su/SERVICE/user/USER_ID`); return { base, service: m[1], userId: m[2] }; } async function fetchApi(apiUrl, logFn, retries = 3) { for (let attempt = 0; attempt < retries; attempt++) { try { const resp = await fetch(apiUrl, { headers: { 'User-Agent': UA, 'Accept': 'application/json' }, signal: AbortSignal.timeout(15000), }); if (resp.ok) return await resp.json(); if (resp.status === 404) return []; if (resp.status === 429) { const wait = 5 * (attempt + 1); logFn(`Rate limited, waiting ${wait}s...`); await sleep(wait * 1000); continue; } if (resp.status >= 500) { await sleep(2000); continue; } logFn(`API error ${resp.status}: ${apiUrl}`); return null; } catch (err) { if (attempt < retries - 1) { await sleep(2000); } else { throw err; } } } return null; } function sleep(ms) { return new Promise(r => setTimeout(r, ms)); } export function collectFiles(posts, cdnBase) { const files = []; const seen = new Set(); for (const post of posts) { const items = []; if (post.file && post.file.path) items.push(post.file); if (post.attachments) { for (const att of post.attachments) { if (att.path) items.push(att); } } for (const f of items) { const fileUrl = `${cdnBase}${f.path}`; if (seen.has(fileUrl)) continue; seen.add(fileUrl); const name = f.name || basename(f.path); files.push({ url: fileUrl, name }); } } return files; } async function downloadFile(url, outputDir, name, logFn) { let filepath = join(outputDir, name); if (existsSync(filepath)) { // File already exists, skip return { skipped: true }; } try { const resp = await fetch(url, { headers: { 'User-Agent': UA }, signal: AbortSignal.timeout(60000), }); if (!resp.ok) { logFn(`FAILED (${resp.status}): ${name}`); return { error: true }; } const buf = Buffer.from(await resp.arrayBuffer()); // Handle filename collision (different content) if (existsSync(filepath)) { const ext = extname(name); const base = name.slice(0, -ext.length); let i = 1; while (existsSync(filepath)) { filepath = join(outputDir, `${base}_${i}${ext}`); i++; } } writeFileSync(filepath, buf); const savedName = basename(filepath); const folderName = basename(outputDir); const ext = extname(savedName).toLowerCase(); const fileType = ['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v'].includes(ext) ? 'video' : 'image'; try { upsertMediaFile(folderName, savedName, fileType, buf.length, Date.now(), null); } catch { /* ignore */ } const sizeKb = (buf.length / 1024).toFixed(1); return { filename: savedName, sizeKb }; } catch (err) { logFn(`FAILED: ${name} - ${err.message}`); return { error: true }; } } export async function fetchAllPosts(base, service, userId, maxPages, logFn, checkCancelled) { const allFiles = []; for (let page = 0; page < maxPages; page++) { if (checkCancelled()) break; const offset = page * 50; const apiUrl = `${base}/api/v1/${service}/user/${userId}/posts?o=${offset}`; let posts; try { posts = await fetchApi(apiUrl, logFn); } catch (err) { logFn(`API failed: ${err.message}`); break; } if (!posts || posts.length === 0) break; const parsed = new URL(base); const cdnHost = `n1.${parsed.hostname}`; const cdnBase = `${parsed.protocol}//${cdnHost}/data`; const files = collectFiles(posts, cdnBase); allFiles.push(...files); logFn(`Page ${page + 1}: ${posts.length} posts (${allFiles.length} files total)`); if (posts.length < 50) break; } return allFiles; } export async function downloadFiles(files, outputDir, concurrency, logFn, progressFn, checkCancelled) { mkdirSync(outputDir, { recursive: true }); // Filter out already existing files const toDownload = []; let skipped = 0; for (const f of files) { if (existsSync(join(outputDir, f.name))) { skipped++; } else { toDownload.push(f); } } if (skipped > 0) logFn(`Skipping ${skipped} already downloaded files`); logFn(`Downloading ${toDownload.length} files with ${concurrency} workers...`); let completed = 0; let errors = 0; let active = 0; let index = 0; // Simple semaphore-based concurrency async function processNext() { while (index < toDownload.length) { if (checkCancelled()) return; const current = index++; const file = toDownload[current]; const result = await downloadFile(file.url, outputDir, file.name, logFn); if (result.error) { errors++; } else if (!result.skipped) { completed++; logFn(`[${completed}/${toDownload.length}] ${result.filename} (${result.sizeKb} KB)`); } progressFn(completed + skipped, errors, files.length); } } const workers = []; for (let i = 0; i < Math.min(concurrency, toDownload.length); i++) { workers.push(processNext()); } await Promise.all(workers); return { completed, errors, skipped, total: files.length }; }