Add DRM downloads, scrapers, gallery index, and UI improvements
- DRM video download pipeline with pywidevine subprocess for Widevine key acquisition - Scraper system: forum threads, Coomer/Kemono API, and MediaLink (Fapello) scrapers - SQLite-backed media index for instant gallery loads with startup scan - Duplicate detection and gallery filtering/sorting - HLS video component, log viewer, and scrape management UI - Dockerfile updated for Python/pywidevine, docker-compose volume for CDM Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
201
server/scrapers/coomer.js
Normal file
201
server/scrapers/coomer.js
Normal file
@@ -0,0 +1,201 @@
|
||||
import { existsSync, mkdirSync, writeFileSync } from 'fs';
|
||||
import { basename, join, extname } from 'path';
|
||||
import { upsertMediaFile } from '../db.js';
|
||||
|
||||
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
||||
|
||||
export function parseUserUrl(url) {
|
||||
const parsed = new URL(url);
|
||||
const base = `${parsed.protocol}//${parsed.hostname}`;
|
||||
const m = parsed.pathname.match(/^\/([^/]+)\/user\/([^/?#]+)/);
|
||||
if (!m) throw new Error(`Can't parse URL. Expected: https://coomer.su/SERVICE/user/USER_ID`);
|
||||
return { base, service: m[1], userId: m[2] };
|
||||
}
|
||||
|
||||
async function fetchApi(apiUrl, logFn, retries = 3) {
|
||||
for (let attempt = 0; attempt < retries; attempt++) {
|
||||
try {
|
||||
const resp = await fetch(apiUrl, {
|
||||
headers: { 'User-Agent': UA, 'Accept': 'application/json' },
|
||||
signal: AbortSignal.timeout(15000),
|
||||
});
|
||||
|
||||
if (resp.ok) return await resp.json();
|
||||
if (resp.status === 404) return [];
|
||||
if (resp.status === 429) {
|
||||
const wait = 5 * (attempt + 1);
|
||||
logFn(`Rate limited, waiting ${wait}s...`);
|
||||
await sleep(wait * 1000);
|
||||
continue;
|
||||
}
|
||||
if (resp.status >= 500) {
|
||||
await sleep(2000);
|
||||
continue;
|
||||
}
|
||||
logFn(`API error ${resp.status}: ${apiUrl}`);
|
||||
return null;
|
||||
} catch (err) {
|
||||
if (attempt < retries - 1) {
|
||||
await sleep(2000);
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function sleep(ms) {
|
||||
return new Promise(r => setTimeout(r, ms));
|
||||
}
|
||||
|
||||
export function collectFiles(posts, cdnBase) {
|
||||
const files = [];
|
||||
const seen = new Set();
|
||||
|
||||
for (const post of posts) {
|
||||
const items = [];
|
||||
if (post.file && post.file.path) items.push(post.file);
|
||||
if (post.attachments) {
|
||||
for (const att of post.attachments) {
|
||||
if (att.path) items.push(att);
|
||||
}
|
||||
}
|
||||
for (const f of items) {
|
||||
const fileUrl = `${cdnBase}${f.path}`;
|
||||
if (seen.has(fileUrl)) continue;
|
||||
seen.add(fileUrl);
|
||||
const name = f.name || basename(f.path);
|
||||
files.push({ url: fileUrl, name });
|
||||
}
|
||||
}
|
||||
return files;
|
||||
}
|
||||
|
||||
async function downloadFile(url, outputDir, name, logFn) {
|
||||
let filepath = join(outputDir, name);
|
||||
if (existsSync(filepath)) {
|
||||
// File already exists, skip
|
||||
return { skipped: true };
|
||||
}
|
||||
|
||||
try {
|
||||
const resp = await fetch(url, {
|
||||
headers: { 'User-Agent': UA },
|
||||
signal: AbortSignal.timeout(60000),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
logFn(`FAILED (${resp.status}): ${name}`);
|
||||
return { error: true };
|
||||
}
|
||||
|
||||
const buf = Buffer.from(await resp.arrayBuffer());
|
||||
|
||||
// Handle filename collision (different content)
|
||||
if (existsSync(filepath)) {
|
||||
const ext = extname(name);
|
||||
const base = name.slice(0, -ext.length);
|
||||
let i = 1;
|
||||
while (existsSync(filepath)) {
|
||||
filepath = join(outputDir, `${base}_${i}${ext}`);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
writeFileSync(filepath, buf);
|
||||
const savedName = basename(filepath);
|
||||
const folderName = basename(outputDir);
|
||||
const ext = extname(savedName).toLowerCase();
|
||||
const fileType = ['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v'].includes(ext) ? 'video' : 'image';
|
||||
try { upsertMediaFile(folderName, savedName, fileType, buf.length, Date.now(), null); } catch { /* ignore */ }
|
||||
const sizeKb = (buf.length / 1024).toFixed(1);
|
||||
return { filename: savedName, sizeKb };
|
||||
} catch (err) {
|
||||
logFn(`FAILED: ${name} - ${err.message}`);
|
||||
return { error: true };
|
||||
}
|
||||
}
|
||||
|
||||
export async function fetchAllPosts(base, service, userId, maxPages, logFn, checkCancelled) {
|
||||
const allFiles = [];
|
||||
|
||||
for (let page = 0; page < maxPages; page++) {
|
||||
if (checkCancelled()) break;
|
||||
|
||||
const offset = page * 50;
|
||||
const apiUrl = `${base}/api/v1/${service}/user/${userId}/posts?o=${offset}`;
|
||||
|
||||
let posts;
|
||||
try {
|
||||
posts = await fetchApi(apiUrl, logFn);
|
||||
} catch (err) {
|
||||
logFn(`API failed: ${err.message}`);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!posts || posts.length === 0) break;
|
||||
|
||||
const parsed = new URL(base);
|
||||
const cdnHost = `n1.${parsed.hostname}`;
|
||||
const cdnBase = `${parsed.protocol}//${cdnHost}/data`;
|
||||
|
||||
const files = collectFiles(posts, cdnBase);
|
||||
allFiles.push(...files);
|
||||
|
||||
logFn(`Page ${page + 1}: ${posts.length} posts (${allFiles.length} files total)`);
|
||||
|
||||
if (posts.length < 50) break;
|
||||
}
|
||||
|
||||
return allFiles;
|
||||
}
|
||||
|
||||
export async function downloadFiles(files, outputDir, concurrency, logFn, progressFn, checkCancelled) {
|
||||
mkdirSync(outputDir, { recursive: true });
|
||||
|
||||
// Filter out already existing files
|
||||
const toDownload = [];
|
||||
let skipped = 0;
|
||||
for (const f of files) {
|
||||
if (existsSync(join(outputDir, f.name))) {
|
||||
skipped++;
|
||||
} else {
|
||||
toDownload.push(f);
|
||||
}
|
||||
}
|
||||
|
||||
if (skipped > 0) logFn(`Skipping ${skipped} already downloaded files`);
|
||||
logFn(`Downloading ${toDownload.length} files with ${concurrency} workers...`);
|
||||
|
||||
let completed = 0;
|
||||
let errors = 0;
|
||||
let active = 0;
|
||||
let index = 0;
|
||||
|
||||
// Simple semaphore-based concurrency
|
||||
async function processNext() {
|
||||
while (index < toDownload.length) {
|
||||
if (checkCancelled()) return;
|
||||
|
||||
const current = index++;
|
||||
const file = toDownload[current];
|
||||
|
||||
const result = await downloadFile(file.url, outputDir, file.name, logFn);
|
||||
if (result.error) {
|
||||
errors++;
|
||||
} else if (!result.skipped) {
|
||||
completed++;
|
||||
logFn(`[${completed}/${toDownload.length}] ${result.filename} (${result.sizeKb} KB)`);
|
||||
}
|
||||
progressFn(completed + skipped, errors, files.length);
|
||||
}
|
||||
}
|
||||
|
||||
const workers = [];
|
||||
for (let i = 0; i < Math.min(concurrency, toDownload.length); i++) {
|
||||
workers.push(processNext());
|
||||
}
|
||||
await Promise.all(workers);
|
||||
|
||||
return { completed, errors, skipped, total: files.length };
|
||||
}
|
||||
230
server/scrapers/forum.js
Normal file
230
server/scrapers/forum.js
Normal file
@@ -0,0 +1,230 @@
|
||||
import * as cheerio from 'cheerio';
|
||||
import { createWriteStream, existsSync, mkdirSync, statSync } from 'fs';
|
||||
import { basename, join, extname } from 'path';
|
||||
import { pipeline } from 'stream/promises';
|
||||
import { upsertMediaFile } from '../db.js';
|
||||
|
||||
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
||||
|
||||
const IMAGE_EXTS = new Set(['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff']);
|
||||
const SKIP_PATTERNS = ['avatar', 'smilie', 'emoji', 'icon', 'logo', 'button', 'sprite', 'badge', 'rank', 'star'];
|
||||
|
||||
function isImageUrl(url) {
|
||||
try {
|
||||
const path = new URL(url).pathname.toLowerCase();
|
||||
return [...IMAGE_EXTS].some(ext => path.endsWith(ext));
|
||||
} catch { return false; }
|
||||
}
|
||||
|
||||
export function getPageUrl(baseUrl, pageNum) {
|
||||
const url = baseUrl.replace(/page-\d+/, `page-${pageNum}`);
|
||||
return url.split('#')[0];
|
||||
}
|
||||
|
||||
export async function detectMaxPage(baseUrl, logFn) {
|
||||
try {
|
||||
const resp = await fetch(baseUrl, { headers: { 'User-Agent': UA }, signal: AbortSignal.timeout(15000) });
|
||||
if (!resp.ok) return null;
|
||||
const html = await resp.text();
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
let maxPage = 1;
|
||||
// XenForo-style
|
||||
$('a.pageNav-page, .pageNav a[href*="page-"], .pagination a[href*="page-"]').each((_, el) => {
|
||||
const href = $(el).attr('href') || '';
|
||||
const m = href.match(/page-(\d+)/);
|
||||
if (m) maxPage = Math.max(maxPage, parseInt(m[1], 10));
|
||||
});
|
||||
// Generic pagination text
|
||||
$('a').each((_, el) => {
|
||||
const text = $(el).text().trim();
|
||||
if (/^\d+$/.test(text)) {
|
||||
const n = parseInt(text, 10);
|
||||
if (n > maxPage && n < 10000) maxPage = n;
|
||||
}
|
||||
});
|
||||
|
||||
if (maxPage > 1) {
|
||||
logFn(`Detected ${maxPage} pages`);
|
||||
return maxPage;
|
||||
}
|
||||
return null;
|
||||
} catch (err) {
|
||||
logFn(`Page detection failed: ${err.message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function tryFullSizeUrl(thumbUrl) {
|
||||
const candidates = [];
|
||||
if (thumbUrl.includes('.th.')) candidates.push(thumbUrl.replace('.th.', '.'));
|
||||
if (/_thumb\./i.test(thumbUrl)) candidates.push(thumbUrl.replace(/_thumb\./i, '.'));
|
||||
if (thumbUrl.includes('/thumbs/')) {
|
||||
candidates.push(thumbUrl.replace('/thumbs/', '/images/'));
|
||||
candidates.push(thumbUrl.replace('/thumbs/', '/full/'));
|
||||
}
|
||||
try {
|
||||
const parsed = new URL(thumbUrl);
|
||||
const base = basename(parsed.pathname);
|
||||
if (base.startsWith('thumb_')) {
|
||||
candidates.push(thumbUrl.replace(`/${base}`, `/${base.slice(6)}`));
|
||||
}
|
||||
if (parsed.search) candidates.push(thumbUrl.split('?')[0]);
|
||||
} catch {}
|
||||
return candidates;
|
||||
}
|
||||
|
||||
async function downloadImage(url, outputDir, downloadedSet, logFn) {
|
||||
if (downloadedSet.has(url)) return false;
|
||||
if (!isImageUrl(url)) return false;
|
||||
const lower = url.toLowerCase();
|
||||
if (SKIP_PATTERNS.some(p => lower.includes(p))) return false;
|
||||
|
||||
downloadedSet.add(url);
|
||||
|
||||
let filename;
|
||||
try {
|
||||
filename = basename(new URL(url).pathname);
|
||||
} catch { return false; }
|
||||
if (!filename) return false;
|
||||
|
||||
filename = filename.replace('.th.', '.');
|
||||
|
||||
let filepath = join(outputDir, filename);
|
||||
if (existsSync(filepath)) {
|
||||
const ext = extname(filename);
|
||||
const name = filename.slice(0, -ext.length);
|
||||
let i = 1;
|
||||
while (existsSync(filepath)) {
|
||||
filepath = join(outputDir, `${name}_${i}${ext}`);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const resp = await fetch(url, {
|
||||
headers: { 'User-Agent': UA },
|
||||
signal: AbortSignal.timeout(30000),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
logFn(`FAILED (${resp.status}): ${url}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Read full body to check size
|
||||
const buf = Buffer.from(await resp.arrayBuffer());
|
||||
if (buf.length < 1000) {
|
||||
downloadedSet.delete(url);
|
||||
return false;
|
||||
}
|
||||
|
||||
const { writeFileSync } = await import('fs');
|
||||
writeFileSync(filepath, buf);
|
||||
|
||||
const savedName = basename(filepath);
|
||||
const folderName = basename(outputDir);
|
||||
try { upsertMediaFile(folderName, savedName, 'image', buf.length, Date.now(), null); } catch { /* ignore */ }
|
||||
|
||||
const sizeKb = (buf.length / 1024).toFixed(1);
|
||||
logFn(`Downloaded: ${savedName} (${sizeKb} KB)`);
|
||||
return true;
|
||||
} catch (err) {
|
||||
logFn(`FAILED: ${basename(filepath)} - ${err.message}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export async function scrapeForumPage(pageUrl, outputDir, downloadedSet, logFn) {
|
||||
logFn(`Fetching page: ${pageUrl}`);
|
||||
|
||||
let html;
|
||||
try {
|
||||
const resp = await fetch(pageUrl, {
|
||||
headers: { 'User-Agent': UA },
|
||||
signal: AbortSignal.timeout(15000),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
logFn(`Failed to fetch page (${resp.status})`);
|
||||
return 0;
|
||||
}
|
||||
html = await resp.text();
|
||||
} catch (err) {
|
||||
logFn(`Failed to fetch page: ${err.message}`);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const $ = cheerio.load(html);
|
||||
|
||||
// Try known content selectors, fall back to whole page
|
||||
const selectors = '.message-body, .post-body, .post_body, .postcontent, .messageContent, .bbWrapper, article, .entry-content, .post_message, .post-content, #posts, .threadBody';
|
||||
let contentAreas = $(selectors).toArray();
|
||||
if (contentAreas.length === 0) {
|
||||
contentAreas = [$.root().get(0)];
|
||||
}
|
||||
|
||||
const imageUrls = [];
|
||||
|
||||
for (const area of contentAreas) {
|
||||
const $area = $(area);
|
||||
|
||||
// Pass 1: <img> tags
|
||||
$area.find('img').each((_, el) => {
|
||||
const $img = $(el);
|
||||
const src = $img.attr('src') || $img.attr('data-src') || $img.attr('data-url') || '';
|
||||
if (!src) return;
|
||||
|
||||
let absSrc;
|
||||
try { absSrc = new URL(src, pageUrl).href; } catch { return; }
|
||||
|
||||
// Check parent <a> for direct image link
|
||||
const $parentA = $img.closest('a');
|
||||
if ($parentA.length && $parentA.attr('href')) {
|
||||
try {
|
||||
const aHref = new URL($parentA.attr('href'), pageUrl).href;
|
||||
if (isImageUrl(aHref)) {
|
||||
imageUrls.push(aHref);
|
||||
return;
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
// Try to derive full-size from thumbnail URL
|
||||
const fullCandidates = tryFullSizeUrl(absSrc);
|
||||
if (fullCandidates.length > 0) {
|
||||
imageUrls.push(...fullCandidates);
|
||||
} else {
|
||||
imageUrls.push(absSrc);
|
||||
}
|
||||
|
||||
// Also check data attributes
|
||||
for (const attr of ['data-src', 'data-url', 'data-orig', 'data-original', 'data-full-url', 'data-zoom-src']) {
|
||||
const val = $img.attr(attr);
|
||||
if (val && val !== src) {
|
||||
try { imageUrls.push(new URL(val, pageUrl).href); } catch {}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Pass 2: <a href> pointing directly to images (no child <img>)
|
||||
$area.find('a[href]').each((_, el) => {
|
||||
const $a = $(el);
|
||||
if ($a.find('img').length) return;
|
||||
try {
|
||||
const href = new URL($a.attr('href'), pageUrl).href;
|
||||
if (isImageUrl(href)) imageUrls.push(href);
|
||||
} catch {}
|
||||
});
|
||||
}
|
||||
|
||||
logFn(`Found ${imageUrls.length} candidate URLs`);
|
||||
|
||||
let count = 0;
|
||||
for (const imgUrl of imageUrls) {
|
||||
if (await downloadImage(imgUrl, outputDir, downloadedSet, logFn)) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
logFn(`${count} images from this page`);
|
||||
return count;
|
||||
}
|
||||
187
server/scrapers/medialink.js
Normal file
187
server/scrapers/medialink.js
Normal file
@@ -0,0 +1,187 @@
|
||||
import { existsSync, writeFileSync, mkdirSync } from 'fs';
|
||||
import { basename, join, extname } from 'path';
|
||||
import { upsertMediaFile } from '../db.js';
|
||||
|
||||
const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
||||
|
||||
const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v']);
|
||||
|
||||
export function parseMediaUrl(url) {
|
||||
const parsed = new URL(url);
|
||||
const base = `${parsed.protocol}//${parsed.hostname}`;
|
||||
// Support /model/{id} or /media/{id}
|
||||
const m = parsed.pathname.match(/\/(?:model|media)\/(\d+)/);
|
||||
if (!m) throw new Error(`Can't parse URL. Expected: https://fapello.to/model/12345`);
|
||||
return { base, userId: m[1] };
|
||||
}
|
||||
|
||||
// Fetch JSON from the API endpoint
|
||||
// API: GET /api/media/{userId}/{page}/{order}
|
||||
// Requires X-Requested-With and Referer headers to avoid 403
|
||||
async function fetchApiPage(base, userId, page, order, logFn) {
|
||||
const apiUrl = `${base}/api/media/${userId}/${page}/${order}`;
|
||||
try {
|
||||
const resp = await fetch(apiUrl, {
|
||||
headers: {
|
||||
'User-Agent': UA,
|
||||
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
||||
'X-Requested-With': 'XMLHttpRequest',
|
||||
'Referer': `${base}/model/${userId}`,
|
||||
},
|
||||
signal: AbortSignal.timeout(15000),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
if (resp.status === 404) return null;
|
||||
logFn(`API error (${resp.status}): ${apiUrl}`);
|
||||
return null;
|
||||
}
|
||||
const data = await resp.json();
|
||||
return data;
|
||||
} catch (err) {
|
||||
logFn(`API fetch error: ${err.message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Collect all media items by paginating through the API
|
||||
export async function fetchAllMedia(base, userId, maxPages, delay, logFn, checkCancelled) {
|
||||
const allItems = [];
|
||||
const seen = new Set();
|
||||
|
||||
for (let page = 1; page <= maxPages; page++) {
|
||||
if (checkCancelled()) break;
|
||||
|
||||
logFn(`Fetching page ${page}...`);
|
||||
const data = await fetchApiPage(base, userId, page, 1, logFn);
|
||||
|
||||
if (!data || data.length === 0) {
|
||||
logFn(`Page ${page}: no more items — done`);
|
||||
break;
|
||||
}
|
||||
|
||||
let newCount = 0;
|
||||
for (const item of data) {
|
||||
if (seen.has(item.id)) continue;
|
||||
seen.add(item.id);
|
||||
newCount++;
|
||||
|
||||
// type "2" = video (newUrl is mp4), type "1" = image (newUrl is full-size jpg)
|
||||
const isVideo = item.type === '2' || item.type === 2;
|
||||
const fullUrl = item.newUrl;
|
||||
if (!fullUrl) continue;
|
||||
|
||||
allItems.push({
|
||||
id: item.id,
|
||||
url: fullUrl,
|
||||
type: isVideo ? 'video' : 'image',
|
||||
});
|
||||
}
|
||||
|
||||
if (newCount === 0) {
|
||||
logFn(`Page ${page}: all duplicates — stopping`);
|
||||
break;
|
||||
}
|
||||
|
||||
logFn(`Page ${page}: ${data.length} items (${newCount} new, ${allItems.length} total)`);
|
||||
|
||||
if (page < maxPages && !checkCancelled()) {
|
||||
await new Promise(r => setTimeout(r, delay));
|
||||
}
|
||||
}
|
||||
|
||||
return allItems;
|
||||
}
|
||||
|
||||
// Download all collected media items with concurrency
|
||||
export async function downloadMedia(items, outputDir, workers, logFn, progressFn, checkCancelled) {
|
||||
mkdirSync(outputDir, { recursive: true });
|
||||
|
||||
let completed = 0;
|
||||
let errors = 0;
|
||||
let skipped = 0;
|
||||
let index = 0;
|
||||
|
||||
async function processNext() {
|
||||
while (index < items.length) {
|
||||
if (checkCancelled()) return;
|
||||
|
||||
const current = index++;
|
||||
const item = items[current];
|
||||
|
||||
let filename;
|
||||
try {
|
||||
filename = basename(new URL(item.url).pathname);
|
||||
if (!filename || filename === '/') {
|
||||
filename = `${item.id}.${item.type === 'video' ? 'mp4' : 'jpg'}`;
|
||||
}
|
||||
} catch {
|
||||
filename = `${item.id}.${item.type === 'video' ? 'mp4' : 'jpg'}`;
|
||||
}
|
||||
|
||||
let filepath = join(outputDir, filename);
|
||||
if (existsSync(filepath)) {
|
||||
skipped++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const resp = await fetch(item.url, {
|
||||
headers: {
|
||||
'User-Agent': UA,
|
||||
'Referer': 'https://fapello.to/',
|
||||
},
|
||||
signal: AbortSignal.timeout(60000),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
logFn(`FAILED (${resp.status}): ${filename}`);
|
||||
errors++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
continue;
|
||||
}
|
||||
|
||||
const buf = Buffer.from(await resp.arrayBuffer());
|
||||
if (buf.length < 500) {
|
||||
skipped++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle filename collision
|
||||
if (existsSync(filepath)) {
|
||||
const ext = extname(filename);
|
||||
const name = filename.slice(0, -ext.length);
|
||||
let i = 1;
|
||||
while (existsSync(filepath)) {
|
||||
filepath = join(outputDir, `${name}_${i}${ext}`);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
writeFileSync(filepath, buf);
|
||||
const savedName = basename(filepath);
|
||||
const folderName = basename(outputDir);
|
||||
const fileExt = extname(savedName).toLowerCase();
|
||||
const fileType = VIDEO_EXTS.has(fileExt) ? 'video' : 'image';
|
||||
try { upsertMediaFile(folderName, savedName, fileType, buf.length, Date.now(), null); } catch {}
|
||||
|
||||
completed++;
|
||||
const sizeKb = (buf.length / 1024).toFixed(1);
|
||||
logFn(`[${completed}/${items.length}] ${savedName} (${sizeKb} KB)`);
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
} catch (err) {
|
||||
logFn(`FAILED: ${filename} - ${err.message}`);
|
||||
errors++;
|
||||
progressFn(completed + skipped, errors, items.length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const workerPromises = [];
|
||||
for (let i = 0; i < Math.min(workers, items.length); i++) {
|
||||
workerPromises.push(processNext());
|
||||
}
|
||||
await Promise.all(workerPromises);
|
||||
|
||||
return { completed, errors, skipped, total: items.length };
|
||||
}
|
||||
Reference in New Issue
Block a user