- DRM video download pipeline with pywidevine subprocess for Widevine key acquisition - Scraper system: forum threads, Coomer/Kemono API, and MediaLink (Fapello) scrapers - SQLite-backed media index for instant gallery loads with startup scan - Duplicate detection and gallery filtering/sorting - HLS video component, log viewer, and scrape management UI - Dockerfile updated for Python/pywidevine, docker-compose volume for CDM Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
508 lines
16 KiB
JavaScript
508 lines
16 KiB
JavaScript
import { Router } from 'express';
|
|
import { readdirSync, statSync, existsSync, mkdirSync, unlinkSync, createReadStream } from 'fs';
|
|
import { join, extname } from 'path';
|
|
import { execFile } from 'child_process';
|
|
import { promisify } from 'util';
|
|
import { createHash } from 'crypto';
|
|
import {
|
|
getPostDateByFilename, getSetting,
|
|
upsertMediaFileBatch, removeMediaFile, removeStaleFiles,
|
|
getMediaFolders, getMediaFiles, getMediaFileCount, getAllIndexedFolders,
|
|
} from './db.js';
|
|
|
|
const execFileAsync = promisify(execFile);
|
|
const router = Router();
|
|
const MEDIA_PATH = process.env.MEDIA_PATH || './data/media';
|
|
const THUMB_DIR = '.thumbs';
|
|
|
|
// In-flight thumb generation promises (dedup concurrent requests for same file)
|
|
const thumbInFlight = new Map();
|
|
|
|
const IMAGE_EXTS = new Set(['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp']);
|
|
const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v']);
|
|
|
|
function getMediaType(filename) {
|
|
const ext = extname(filename).toLowerCase();
|
|
if (IMAGE_EXTS.has(ext)) return 'image';
|
|
if (VIDEO_EXTS.has(ext)) return 'video';
|
|
return null;
|
|
}
|
|
|
|
// --- Background filesystem scanner ---
|
|
|
|
export function scanMediaFiles() {
|
|
const startTime = Date.now();
|
|
console.log('[gallery] Starting media index scan...');
|
|
|
|
if (!existsSync(MEDIA_PATH)) {
|
|
console.log('[gallery] Media path does not exist, skipping scan');
|
|
return;
|
|
}
|
|
|
|
let entries;
|
|
try {
|
|
entries = readdirSync(MEDIA_PATH, { withFileTypes: true });
|
|
} catch (err) {
|
|
console.error('[gallery] Failed to read media path:', err.message);
|
|
return;
|
|
}
|
|
|
|
const scannedFolders = new Set();
|
|
let totalFiles = 0;
|
|
|
|
for (const entry of entries) {
|
|
if (!entry.isDirectory() || entry.name.startsWith('.') || entry.name.startsWith('_')) continue;
|
|
const folderName = entry.name;
|
|
scannedFolders.add(folderName);
|
|
const dirPath = join(MEDIA_PATH, folderName);
|
|
|
|
let files;
|
|
try {
|
|
files = readdirSync(dirPath);
|
|
} catch { continue; }
|
|
|
|
const batch = [];
|
|
const validFilenames = [];
|
|
|
|
for (const file of files) {
|
|
if (file.startsWith('.')) continue;
|
|
const mediaType = getMediaType(file);
|
|
if (!mediaType) continue;
|
|
|
|
validFilenames.push(file);
|
|
const filePath = join(dirPath, file);
|
|
try {
|
|
const stat = statSync(filePath);
|
|
const postedAt = getPostDateByFilename(file);
|
|
batch.push({
|
|
folder: folderName,
|
|
filename: file,
|
|
type: mediaType,
|
|
size: stat.size,
|
|
modified: stat.mtimeMs,
|
|
postedAt: postedAt || null,
|
|
});
|
|
} catch { continue; }
|
|
}
|
|
|
|
if (batch.length > 0) {
|
|
upsertMediaFileBatch(batch);
|
|
totalFiles += batch.length;
|
|
}
|
|
|
|
// Remove DB rows for files that no longer exist in this folder
|
|
removeStaleFiles(folderName, validFilenames);
|
|
}
|
|
|
|
// Remove DB rows for folders that no longer exist on disk
|
|
const indexedFolders = getAllIndexedFolders();
|
|
for (const f of indexedFolders) {
|
|
if (!scannedFolders.has(f)) {
|
|
removeStaleFiles(f, []);
|
|
}
|
|
}
|
|
|
|
const elapsed = ((Date.now() - startTime) / 1000).toFixed(2);
|
|
const dbCount = getMediaFileCount();
|
|
console.log(`[gallery] Index scan complete: ${totalFiles} files in ${scannedFolders.size} folders (${elapsed}s). DB total: ${dbCount}`);
|
|
}
|
|
|
|
// GET /api/gallery/folders — list all folders with file counts (from DB index)
|
|
router.get('/api/gallery/folders', (req, res, next) => {
|
|
try {
|
|
const folders = getMediaFolders();
|
|
res.json(folders);
|
|
} catch (err) {
|
|
next(err);
|
|
}
|
|
});
|
|
|
|
// GET /api/gallery/files?folder=&type=&sort=&offset=&limit= (from DB index)
|
|
router.get('/api/gallery/files', (req, res, next) => {
|
|
try {
|
|
const { folder, type, sort, offset, limit } = req.query;
|
|
const foldersParam = req.query.folders;
|
|
const foldersArr = foldersParam
|
|
? foldersParam.split(',').map((f) => f.trim()).filter(Boolean)
|
|
: undefined;
|
|
|
|
const offsetNum = parseInt(offset || '0', 10);
|
|
const limitNum = parseInt(limit || '50', 10);
|
|
const hlsEnabled = (getSetting('hls_enabled') || process.env.HLS_ENABLED) === 'true';
|
|
|
|
const { total, rows } = getMediaFiles({
|
|
folder: folder || undefined,
|
|
folders: foldersArr,
|
|
type: type || 'all',
|
|
sort: sort || 'latest',
|
|
offset: offsetNum,
|
|
limit: limitNum,
|
|
});
|
|
|
|
const files = rows.map((r) => {
|
|
const fileObj = {
|
|
folder: r.folder,
|
|
filename: r.filename,
|
|
type: r.type,
|
|
size: r.size,
|
|
modified: r.modified,
|
|
postedAt: r.posted_at || null,
|
|
url: `/api/gallery/media/${encodeURIComponent(r.folder)}/${encodeURIComponent(r.filename)}`,
|
|
};
|
|
if (hlsEnabled && r.type === 'video') {
|
|
fileObj.hlsUrl = `/api/hls/${encodeURIComponent(r.folder)}/${encodeURIComponent(r.filename)}/master.m3u8`;
|
|
}
|
|
return fileObj;
|
|
});
|
|
|
|
res.json({ total, offset: offsetNum, limit: limitNum, files });
|
|
} catch (err) {
|
|
next(err);
|
|
}
|
|
});
|
|
|
|
// POST /api/gallery/rescan — trigger a media index rescan
|
|
let rescanState = { running: false, lastRun: null, fileCount: 0, elapsed: null };
|
|
|
|
router.post('/api/gallery/rescan', (req, res) => {
|
|
if (rescanState.running) {
|
|
return res.json({ status: 'already_running', ...rescanState });
|
|
}
|
|
rescanState = { running: true, lastRun: null, fileCount: 0, elapsed: null };
|
|
res.json({ status: 'started' });
|
|
|
|
setImmediate(() => {
|
|
try {
|
|
scanMediaFiles();
|
|
rescanState.fileCount = getMediaFileCount();
|
|
} catch (err) {
|
|
console.error('[gallery] Rescan failed:', err.message);
|
|
} finally {
|
|
rescanState.running = false;
|
|
rescanState.lastRun = new Date().toISOString();
|
|
}
|
|
});
|
|
});
|
|
|
|
router.get('/api/gallery/rescan/status', (req, res) => {
|
|
res.json({ ...rescanState, fileCount: rescanState.running ? rescanState.fileCount : getMediaFileCount() });
|
|
});
|
|
|
|
// GET /api/gallery/media/:folder/:filename — serve actual file
|
|
router.get('/api/gallery/media/:folder/:filename', (req, res) => {
|
|
const { folder, filename } = req.params;
|
|
|
|
// Prevent path traversal
|
|
if (folder.includes('..') || filename.includes('..')) {
|
|
return res.status(400).json({ error: 'Invalid path' });
|
|
}
|
|
|
|
const filePath = join(MEDIA_PATH, folder, filename);
|
|
res.sendFile(filePath, { root: '/' }, (err) => {
|
|
if (err && !res.headersSent) {
|
|
res.status(404).json({ error: 'File not found' });
|
|
}
|
|
});
|
|
});
|
|
|
|
// --- Video Thumbnails ---
|
|
|
|
function getThumbPath(folder, filename) {
|
|
const thumbDir = join(MEDIA_PATH, folder, THUMB_DIR);
|
|
const thumbName = filename.replace(/\.[^.]+$/, '.jpg');
|
|
return { thumbDir, thumbPath: join(thumbDir, thumbName) };
|
|
}
|
|
|
|
async function generateThumb(folder, filename) {
|
|
const videoPath = join(MEDIA_PATH, folder, filename);
|
|
const { thumbDir, thumbPath } = getThumbPath(folder, filename);
|
|
|
|
if (existsSync(thumbPath)) return thumbPath;
|
|
|
|
// Dedup concurrent requests
|
|
const key = `${folder}/${filename}`;
|
|
if (thumbInFlight.has(key)) return thumbInFlight.get(key);
|
|
|
|
const promise = (async () => {
|
|
try {
|
|
if (!existsSync(thumbDir)) mkdirSync(thumbDir, { recursive: true });
|
|
await execFileAsync('ffmpeg', [
|
|
'-ss', '1',
|
|
'-i', videoPath,
|
|
'-frames:v', '1',
|
|
'-vf', 'scale=320:-1',
|
|
'-q:v', '6',
|
|
'-y',
|
|
thumbPath,
|
|
], { timeout: 10000 });
|
|
return thumbPath;
|
|
} catch (err) {
|
|
console.error(`[gallery] thumb failed for ${key}:`, err.message);
|
|
return null;
|
|
} finally {
|
|
thumbInFlight.delete(key);
|
|
}
|
|
})();
|
|
|
|
thumbInFlight.set(key, promise);
|
|
return promise;
|
|
}
|
|
|
|
// GET /api/gallery/thumb/:folder/:filename — serve or generate a video thumbnail
|
|
router.get('/api/gallery/thumb/:folder/:filename', async (req, res) => {
|
|
const { folder, filename } = req.params;
|
|
if (folder.includes('..') || filename.includes('..')) {
|
|
return res.status(400).json({ error: 'Invalid path' });
|
|
}
|
|
|
|
const { thumbPath } = getThumbPath(folder, filename);
|
|
|
|
// Serve cached thumb immediately
|
|
if (existsSync(thumbPath)) {
|
|
return res.sendFile(thumbPath, { root: '/' }, (err) => {
|
|
if (err && !res.headersSent) res.status(404).json({ error: 'Not found' });
|
|
});
|
|
}
|
|
|
|
// Generate on-demand
|
|
const result = await generateThumb(folder, filename);
|
|
if (result && existsSync(result)) {
|
|
res.sendFile(result, { root: '/' }, (err) => {
|
|
if (err && !res.headersSent) res.status(500).json({ error: 'Failed to serve thumbnail' });
|
|
});
|
|
} else {
|
|
res.status(500).json({ error: 'Thumbnail generation failed' });
|
|
}
|
|
});
|
|
|
|
// Bulk thumbnail generation state
|
|
let thumbGenState = { running: false, total: 0, done: 0, errors: 0 };
|
|
|
|
// POST /api/gallery/generate-thumbs — bulk generate all video thumbnails
|
|
router.post('/api/gallery/generate-thumbs', (req, res) => {
|
|
if (thumbGenState.running) {
|
|
return res.json({ status: 'already_running', ...thumbGenState });
|
|
}
|
|
|
|
// Collect all videos
|
|
const videos = [];
|
|
const dirs = readdirSync(MEDIA_PATH, { withFileTypes: true })
|
|
.filter((e) => e.isDirectory() && !e.name.startsWith('.') && !e.name.startsWith('_'));
|
|
|
|
for (const dir of dirs) {
|
|
const dirPath = join(MEDIA_PATH, dir.name);
|
|
try {
|
|
const files = readdirSync(dirPath);
|
|
for (const file of files) {
|
|
if (file.startsWith('.')) continue;
|
|
const ext = extname(file).toLowerCase();
|
|
if (VIDEO_EXTS.has(ext)) {
|
|
const { thumbPath } = getThumbPath(dir.name, file);
|
|
if (!existsSync(thumbPath)) {
|
|
videos.push({ folder: dir.name, filename: file });
|
|
}
|
|
}
|
|
}
|
|
} catch { continue; }
|
|
}
|
|
|
|
if (videos.length === 0) {
|
|
return res.json({ status: 'done', total: 0, done: 0, errors: 0, message: 'All thumbnails already exist' });
|
|
}
|
|
|
|
thumbGenState = { running: true, total: videos.length, done: 0, errors: 0 };
|
|
res.json({ status: 'started', total: videos.length });
|
|
|
|
// Run in background with concurrency limit
|
|
(async () => {
|
|
const CONCURRENCY = 3;
|
|
let i = 0;
|
|
const next = async () => {
|
|
while (i < videos.length) {
|
|
const { folder, filename } = videos[i++];
|
|
const result = await generateThumb(folder, filename);
|
|
if (result) thumbGenState.done++;
|
|
else thumbGenState.errors++;
|
|
}
|
|
};
|
|
await Promise.all(Array.from({ length: Math.min(CONCURRENCY, videos.length) }, () => next()));
|
|
thumbGenState.running = false;
|
|
})();
|
|
});
|
|
|
|
// GET /api/gallery/generate-thumbs/status — check bulk generation progress
|
|
router.get('/api/gallery/generate-thumbs/status', (req, res) => {
|
|
res.json(thumbGenState);
|
|
});
|
|
|
|
// --- Duplicate File Scanning ---
|
|
|
|
let duplicateScanState = { running: false, total: 0, done: 0, groups: 0 };
|
|
let duplicateGroups = [];
|
|
|
|
function hashFilePartial(filePath, bytes = 65536) {
|
|
return new Promise((resolve, reject) => {
|
|
const hash = createHash('md5');
|
|
const stream = createReadStream(filePath, { start: 0, end: bytes - 1 });
|
|
stream.on('data', (chunk) => hash.update(chunk));
|
|
stream.on('end', () => resolve(hash.digest('hex')));
|
|
stream.on('error', reject);
|
|
});
|
|
}
|
|
|
|
// POST /api/gallery/scan-duplicates — start background duplicate scan
|
|
router.post('/api/gallery/scan-duplicates', (req, res) => {
|
|
if (duplicateScanState.running) {
|
|
return res.json({ status: 'already_running', ...duplicateScanState });
|
|
}
|
|
|
|
// Phase 1: group all files by size
|
|
const bySize = new Map();
|
|
const dirs = readdirSync(MEDIA_PATH, { withFileTypes: true })
|
|
.filter((e) => e.isDirectory() && !e.name.startsWith('.') && !e.name.startsWith('_'));
|
|
|
|
for (const dir of dirs) {
|
|
const dirPath = join(MEDIA_PATH, dir.name);
|
|
let files;
|
|
try { files = readdirSync(dirPath); } catch { continue; }
|
|
for (const file of files) {
|
|
if (file.startsWith('.')) continue;
|
|
const mediaType = getMediaType(file);
|
|
if (!mediaType) continue;
|
|
const filePath = join(dirPath, file);
|
|
try {
|
|
const stat = statSync(filePath);
|
|
const key = stat.size;
|
|
if (!bySize.has(key)) bySize.set(key, []);
|
|
bySize.get(key).push({ folder: dir.name, filename: file, type: mediaType, size: stat.size, modified: stat.mtimeMs, filePath });
|
|
} catch { continue; }
|
|
}
|
|
}
|
|
|
|
// Filter to only sizes with multiple files (potential dupes)
|
|
const candidates = [];
|
|
for (const [, files] of bySize) {
|
|
if (files.length > 1) candidates.push(files);
|
|
}
|
|
|
|
const totalFiles = candidates.reduce((sum, g) => sum + g.length, 0);
|
|
duplicateScanState = { running: true, total: totalFiles, done: 0, groups: 0 };
|
|
duplicateGroups = [];
|
|
res.json({ status: 'started', total: totalFiles, sizeGroups: candidates.length });
|
|
|
|
// Phase 2: hash candidates in background
|
|
(async () => {
|
|
for (const sizeGroup of candidates) {
|
|
const byHash = new Map();
|
|
for (const file of sizeGroup) {
|
|
try {
|
|
const hash = await hashFilePartial(file.filePath);
|
|
if (!byHash.has(hash)) byHash.set(hash, []);
|
|
byHash.get(hash).push(file);
|
|
} catch { /* skip unreadable */ }
|
|
duplicateScanState.done++;
|
|
}
|
|
for (const [, files] of byHash) {
|
|
if (files.length > 1) {
|
|
duplicateGroups.push(files.map(({ filePath, ...rest }) => ({
|
|
...rest,
|
|
path: filePath,
|
|
url: `/api/gallery/media/${encodeURIComponent(rest.folder)}/${encodeURIComponent(rest.filename)}`,
|
|
thumbUrl: rest.type === 'video'
|
|
? `/api/gallery/thumb/${encodeURIComponent(rest.folder)}/${encodeURIComponent(rest.filename)}`
|
|
: undefined,
|
|
})));
|
|
duplicateScanState.groups = duplicateGroups.length;
|
|
}
|
|
}
|
|
}
|
|
duplicateScanState.running = false;
|
|
})();
|
|
});
|
|
|
|
// GET /api/gallery/scan-duplicates/status
|
|
router.get('/api/gallery/scan-duplicates/status', (req, res) => {
|
|
res.json(duplicateScanState);
|
|
});
|
|
|
|
// GET /api/gallery/duplicates — return found duplicate groups (paginated)
|
|
router.get('/api/gallery/duplicates', (req, res) => {
|
|
const offset = parseInt(req.query.offset || '0', 10);
|
|
const limit = parseInt(req.query.limit || '20', 10);
|
|
const page = duplicateGroups.slice(offset, offset + limit);
|
|
res.json({ total: duplicateGroups.length, offset, limit, groups: page });
|
|
});
|
|
|
|
// DELETE /api/gallery/media/:folder/:filename — delete a media file
|
|
router.delete('/api/gallery/media/:folder/:filename', (req, res) => {
|
|
const { folder, filename } = req.params;
|
|
if (folder.includes('..') || filename.includes('..')) {
|
|
return res.status(400).json({ error: 'Invalid path' });
|
|
}
|
|
|
|
const filePath = join(MEDIA_PATH, folder, filename);
|
|
if (!existsSync(filePath)) {
|
|
return res.status(404).json({ error: 'File not found' });
|
|
}
|
|
|
|
try {
|
|
unlinkSync(filePath);
|
|
removeMediaFile(folder, filename);
|
|
|
|
// Also delete cached thumbnail if it exists
|
|
const { thumbPath } = getThumbPath(folder, filename);
|
|
if (existsSync(thumbPath)) {
|
|
try { unlinkSync(thumbPath); } catch { /* ignore */ }
|
|
}
|
|
|
|
// Remove from in-memory duplicate groups
|
|
for (const group of duplicateGroups) {
|
|
const idx = group.findIndex((f) => f.folder === folder && f.filename === filename);
|
|
if (idx !== -1) { group.splice(idx, 1); break; }
|
|
}
|
|
// Remove empty or single-item groups
|
|
duplicateGroups = duplicateGroups.filter((g) => g.length > 1);
|
|
duplicateScanState.groups = duplicateGroups.length;
|
|
|
|
res.json({ ok: true });
|
|
} catch (err) {
|
|
res.status(500).json({ error: err.message });
|
|
}
|
|
});
|
|
|
|
// POST /api/gallery/duplicates/clean — delete all duplicates, keeping one copy per group
|
|
router.post('/api/gallery/duplicates/clean', (req, res) => {
|
|
let deleted = 0;
|
|
let freed = 0;
|
|
let errors = 0;
|
|
|
|
for (const group of duplicateGroups) {
|
|
// Keep the first file, delete the rest
|
|
const toDelete = group.slice(1);
|
|
for (const file of toDelete) {
|
|
const filePath = join(MEDIA_PATH, file.folder, file.filename);
|
|
try {
|
|
if (existsSync(filePath)) {
|
|
unlinkSync(filePath);
|
|
freed += file.size;
|
|
deleted++;
|
|
}
|
|
const { thumbPath } = getThumbPath(file.folder, file.filename);
|
|
if (existsSync(thumbPath)) {
|
|
try { unlinkSync(thumbPath); } catch { /* ignore */ }
|
|
}
|
|
} catch {
|
|
errors++;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Clear all groups since each now has at most 1 file
|
|
duplicateGroups = [];
|
|
duplicateScanState.groups = 0;
|
|
|
|
res.json({ ok: true, deleted, freed, errors });
|
|
});
|
|
|
|
export default router;
|