Add DRM downloads, scrapers, gallery index, and UI improvements

- DRM video download pipeline with pywidevine subprocess for Widevine key acquisition
- Scraper system: forum threads, Coomer/Kemono API, and MediaLink (Fapello) scrapers
- SQLite-backed media index for instant gallery loads with startup scan
- Duplicate detection and gallery filtering/sorting
- HLS video component, log viewer, and scrape management UI
- Dockerfile updated for Python/pywidevine, docker-compose volume for CDM

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-02-16 11:29:11 -06:00
parent c60de19348
commit 1e5f54f60b
28 changed files with 4736 additions and 203 deletions

View File

@@ -1,10 +1,22 @@
import { Router } from 'express';
import { readdirSync, statSync } from 'fs';
import { readdirSync, statSync, existsSync, mkdirSync, unlinkSync, createReadStream } from 'fs';
import { join, extname } from 'path';
import { getPostDateByFilename, getSetting } from './db.js';
import { execFile } from 'child_process';
import { promisify } from 'util';
import { createHash } from 'crypto';
import {
getPostDateByFilename, getSetting,
upsertMediaFileBatch, removeMediaFile, removeStaleFiles,
getMediaFolders, getMediaFiles, getMediaFileCount, getAllIndexedFolders,
} from './db.js';
const execFileAsync = promisify(execFile);
const router = Router();
const MEDIA_PATH = process.env.MEDIA_PATH || './data/media';
const THUMB_DIR = '.thumbs';
// In-flight thumb generation promises (dedup concurrent requests for same file)
const thumbInFlight = new Map();
const IMAGE_EXTS = new Set(['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp']);
const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v']);
@@ -16,113 +28,166 @@ function getMediaType(filename) {
return null;
}
// GET /api/gallery/folders — list all folders with file counts
router.get('/api/gallery/folders', (req, res, next) => {
try {
const entries = readdirSync(MEDIA_PATH, { withFileTypes: true });
const folders = [];
// --- Background filesystem scanner ---
for (const entry of entries) {
if (!entry.isDirectory() || entry.name.startsWith('.') || entry.name.startsWith('_')) continue;
const folderPath = join(MEDIA_PATH, entry.name);
const files = readdirSync(folderPath).filter((f) => {
return !f.startsWith('.') && getMediaType(f) !== null;
});
if (files.length > 0) {
const images = files.filter((f) => getMediaType(f) === 'image').length;
const videos = files.filter((f) => getMediaType(f) === 'video').length;
folders.push({ name: entry.name, total: files.length, images, videos });
}
export function scanMediaFiles() {
const startTime = Date.now();
console.log('[gallery] Starting media index scan...');
if (!existsSync(MEDIA_PATH)) {
console.log('[gallery] Media path does not exist, skipping scan');
return;
}
let entries;
try {
entries = readdirSync(MEDIA_PATH, { withFileTypes: true });
} catch (err) {
console.error('[gallery] Failed to read media path:', err.message);
return;
}
const scannedFolders = new Set();
let totalFiles = 0;
for (const entry of entries) {
if (!entry.isDirectory() || entry.name.startsWith('.') || entry.name.startsWith('_')) continue;
const folderName = entry.name;
scannedFolders.add(folderName);
const dirPath = join(MEDIA_PATH, folderName);
let files;
try {
files = readdirSync(dirPath);
} catch { continue; }
const batch = [];
const validFilenames = [];
for (const file of files) {
if (file.startsWith('.')) continue;
const mediaType = getMediaType(file);
if (!mediaType) continue;
validFilenames.push(file);
const filePath = join(dirPath, file);
try {
const stat = statSync(filePath);
const postedAt = getPostDateByFilename(file);
batch.push({
folder: folderName,
filename: file,
type: mediaType,
size: stat.size,
modified: stat.mtimeMs,
postedAt: postedAt || null,
});
} catch { continue; }
}
folders.sort((a, b) => a.name.localeCompare(b.name));
if (batch.length > 0) {
upsertMediaFileBatch(batch);
totalFiles += batch.length;
}
// Remove DB rows for files that no longer exist in this folder
removeStaleFiles(folderName, validFilenames);
}
// Remove DB rows for folders that no longer exist on disk
const indexedFolders = getAllIndexedFolders();
for (const f of indexedFolders) {
if (!scannedFolders.has(f)) {
removeStaleFiles(f, []);
}
}
const elapsed = ((Date.now() - startTime) / 1000).toFixed(2);
const dbCount = getMediaFileCount();
console.log(`[gallery] Index scan complete: ${totalFiles} files in ${scannedFolders.size} folders (${elapsed}s). DB total: ${dbCount}`);
}
// GET /api/gallery/folders — list all folders with file counts (from DB index)
router.get('/api/gallery/folders', (req, res, next) => {
try {
const folders = getMediaFolders();
res.json(folders);
} catch (err) {
next(err);
}
});
// GET /api/gallery/files?folder=&type=&sort=&offset=&limit=
// GET /api/gallery/files?folder=&type=&sort=&offset=&limit= (from DB index)
router.get('/api/gallery/files', (req, res, next) => {
try {
const { folder, type, sort, offset, limit } = req.query;
const typeFilter = type || 'all'; // all, image, video
const sortMode = sort || 'latest'; // latest, shuffle
const foldersParam = req.query.folders;
const foldersArr = foldersParam
? foldersParam.split(',').map((f) => f.trim()).filter(Boolean)
: undefined;
const offsetNum = parseInt(offset || '0', 10);
const limitNum = parseInt(limit || '50', 10);
const hlsEnabled = (getSetting('hls_enabled') || process.env.HLS_ENABLED) === 'true';
let allFiles = [];
const { total, rows } = getMediaFiles({
folder: folder || undefined,
folders: foldersArr,
type: type || 'all',
sort: sort || 'latest',
offset: offsetNum,
limit: limitNum,
});
const foldersParam = req.query.folders; // comma-separated list
const foldersToScan = folder
? [folder]
: foldersParam
? foldersParam.split(',').map((f) => f.trim()).filter(Boolean)
: readdirSync(MEDIA_PATH, { withFileTypes: true })
.filter((e) => e.isDirectory() && !e.name.startsWith('.') && !e.name.startsWith('_'))
.map((e) => e.name);
for (const dir of foldersToScan) {
const dirPath = join(MEDIA_PATH, dir);
let files;
try {
files = readdirSync(dirPath);
} catch {
continue;
const files = rows.map((r) => {
const fileObj = {
folder: r.folder,
filename: r.filename,
type: r.type,
size: r.size,
modified: r.modified,
postedAt: r.posted_at || null,
url: `/api/gallery/media/${encodeURIComponent(r.folder)}/${encodeURIComponent(r.filename)}`,
};
if (hlsEnabled && r.type === 'video') {
fileObj.hlsUrl = `/api/hls/${encodeURIComponent(r.folder)}/${encodeURIComponent(r.filename)}/master.m3u8`;
}
return fileObj;
});
for (const file of files) {
if (file.startsWith('.')) continue;
const mediaType = getMediaType(file);
if (!mediaType) continue;
if (typeFilter !== 'all' && mediaType !== typeFilter) continue;
const filePath = join(dirPath, file);
const stat = statSync(filePath);
const postedAt = getPostDateByFilename(file);
const fileObj = {
folder: dir,
filename: file,
type: mediaType,
size: stat.size,
modified: stat.mtimeMs,
postedAt: postedAt || null,
url: `/api/gallery/media/${encodeURIComponent(dir)}/${encodeURIComponent(file)}`,
};
if ((getSetting('hls_enabled') || process.env.HLS_ENABLED) === 'true' && mediaType === 'video') {
fileObj.hlsUrl = `/api/hls/${encodeURIComponent(dir)}/${encodeURIComponent(file)}/master.m3u8`;
}
allFiles.push(fileObj);
}
}
// Sort
if (sortMode === 'shuffle') {
for (let i = allFiles.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[allFiles[i], allFiles[j]] = [allFiles[j], allFiles[i]];
}
} else {
allFiles.sort((a, b) => {
const aTime = a.postedAt ? new Date(a.postedAt).getTime() : a.modified;
const bTime = b.postedAt ? new Date(b.postedAt).getTime() : b.modified;
return bTime - aTime;
});
}
const total = allFiles.length;
const page = allFiles.slice(offsetNum, offsetNum + limitNum);
res.json({ total, offset: offsetNum, limit: limitNum, files: page });
res.json({ total, offset: offsetNum, limit: limitNum, files });
} catch (err) {
next(err);
}
});
// POST /api/gallery/rescan — trigger a media index rescan
let rescanState = { running: false, lastRun: null, fileCount: 0, elapsed: null };
router.post('/api/gallery/rescan', (req, res) => {
if (rescanState.running) {
return res.json({ status: 'already_running', ...rescanState });
}
rescanState = { running: true, lastRun: null, fileCount: 0, elapsed: null };
res.json({ status: 'started' });
setImmediate(() => {
try {
scanMediaFiles();
rescanState.fileCount = getMediaFileCount();
} catch (err) {
console.error('[gallery] Rescan failed:', err.message);
} finally {
rescanState.running = false;
rescanState.lastRun = new Date().toISOString();
}
});
});
router.get('/api/gallery/rescan/status', (req, res) => {
res.json({ ...rescanState, fileCount: rescanState.running ? rescanState.fileCount : getMediaFileCount() });
});
// GET /api/gallery/media/:folder/:filename — serve actual file
router.get('/api/gallery/media/:folder/:filename', (req, res) => {
const { folder, filename } = req.params;
@@ -140,4 +205,303 @@ router.get('/api/gallery/media/:folder/:filename', (req, res) => {
});
});
// --- Video Thumbnails ---
function getThumbPath(folder, filename) {
const thumbDir = join(MEDIA_PATH, folder, THUMB_DIR);
const thumbName = filename.replace(/\.[^.]+$/, '.jpg');
return { thumbDir, thumbPath: join(thumbDir, thumbName) };
}
async function generateThumb(folder, filename) {
const videoPath = join(MEDIA_PATH, folder, filename);
const { thumbDir, thumbPath } = getThumbPath(folder, filename);
if (existsSync(thumbPath)) return thumbPath;
// Dedup concurrent requests
const key = `${folder}/${filename}`;
if (thumbInFlight.has(key)) return thumbInFlight.get(key);
const promise = (async () => {
try {
if (!existsSync(thumbDir)) mkdirSync(thumbDir, { recursive: true });
await execFileAsync('ffmpeg', [
'-ss', '1',
'-i', videoPath,
'-frames:v', '1',
'-vf', 'scale=320:-1',
'-q:v', '6',
'-y',
thumbPath,
], { timeout: 10000 });
return thumbPath;
} catch (err) {
console.error(`[gallery] thumb failed for ${key}:`, err.message);
return null;
} finally {
thumbInFlight.delete(key);
}
})();
thumbInFlight.set(key, promise);
return promise;
}
// GET /api/gallery/thumb/:folder/:filename — serve or generate a video thumbnail
router.get('/api/gallery/thumb/:folder/:filename', async (req, res) => {
const { folder, filename } = req.params;
if (folder.includes('..') || filename.includes('..')) {
return res.status(400).json({ error: 'Invalid path' });
}
const { thumbPath } = getThumbPath(folder, filename);
// Serve cached thumb immediately
if (existsSync(thumbPath)) {
return res.sendFile(thumbPath, { root: '/' }, (err) => {
if (err && !res.headersSent) res.status(404).json({ error: 'Not found' });
});
}
// Generate on-demand
const result = await generateThumb(folder, filename);
if (result && existsSync(result)) {
res.sendFile(result, { root: '/' }, (err) => {
if (err && !res.headersSent) res.status(500).json({ error: 'Failed to serve thumbnail' });
});
} else {
res.status(500).json({ error: 'Thumbnail generation failed' });
}
});
// Bulk thumbnail generation state
let thumbGenState = { running: false, total: 0, done: 0, errors: 0 };
// POST /api/gallery/generate-thumbs — bulk generate all video thumbnails
router.post('/api/gallery/generate-thumbs', (req, res) => {
if (thumbGenState.running) {
return res.json({ status: 'already_running', ...thumbGenState });
}
// Collect all videos
const videos = [];
const dirs = readdirSync(MEDIA_PATH, { withFileTypes: true })
.filter((e) => e.isDirectory() && !e.name.startsWith('.') && !e.name.startsWith('_'));
for (const dir of dirs) {
const dirPath = join(MEDIA_PATH, dir.name);
try {
const files = readdirSync(dirPath);
for (const file of files) {
if (file.startsWith('.')) continue;
const ext = extname(file).toLowerCase();
if (VIDEO_EXTS.has(ext)) {
const { thumbPath } = getThumbPath(dir.name, file);
if (!existsSync(thumbPath)) {
videos.push({ folder: dir.name, filename: file });
}
}
}
} catch { continue; }
}
if (videos.length === 0) {
return res.json({ status: 'done', total: 0, done: 0, errors: 0, message: 'All thumbnails already exist' });
}
thumbGenState = { running: true, total: videos.length, done: 0, errors: 0 };
res.json({ status: 'started', total: videos.length });
// Run in background with concurrency limit
(async () => {
const CONCURRENCY = 3;
let i = 0;
const next = async () => {
while (i < videos.length) {
const { folder, filename } = videos[i++];
const result = await generateThumb(folder, filename);
if (result) thumbGenState.done++;
else thumbGenState.errors++;
}
};
await Promise.all(Array.from({ length: Math.min(CONCURRENCY, videos.length) }, () => next()));
thumbGenState.running = false;
})();
});
// GET /api/gallery/generate-thumbs/status — check bulk generation progress
router.get('/api/gallery/generate-thumbs/status', (req, res) => {
res.json(thumbGenState);
});
// --- Duplicate File Scanning ---
let duplicateScanState = { running: false, total: 0, done: 0, groups: 0 };
let duplicateGroups = [];
function hashFilePartial(filePath, bytes = 65536) {
return new Promise((resolve, reject) => {
const hash = createHash('md5');
const stream = createReadStream(filePath, { start: 0, end: bytes - 1 });
stream.on('data', (chunk) => hash.update(chunk));
stream.on('end', () => resolve(hash.digest('hex')));
stream.on('error', reject);
});
}
// POST /api/gallery/scan-duplicates — start background duplicate scan
router.post('/api/gallery/scan-duplicates', (req, res) => {
if (duplicateScanState.running) {
return res.json({ status: 'already_running', ...duplicateScanState });
}
// Phase 1: group all files by size
const bySize = new Map();
const dirs = readdirSync(MEDIA_PATH, { withFileTypes: true })
.filter((e) => e.isDirectory() && !e.name.startsWith('.') && !e.name.startsWith('_'));
for (const dir of dirs) {
const dirPath = join(MEDIA_PATH, dir.name);
let files;
try { files = readdirSync(dirPath); } catch { continue; }
for (const file of files) {
if (file.startsWith('.')) continue;
const mediaType = getMediaType(file);
if (!mediaType) continue;
const filePath = join(dirPath, file);
try {
const stat = statSync(filePath);
const key = stat.size;
if (!bySize.has(key)) bySize.set(key, []);
bySize.get(key).push({ folder: dir.name, filename: file, type: mediaType, size: stat.size, modified: stat.mtimeMs, filePath });
} catch { continue; }
}
}
// Filter to only sizes with multiple files (potential dupes)
const candidates = [];
for (const [, files] of bySize) {
if (files.length > 1) candidates.push(files);
}
const totalFiles = candidates.reduce((sum, g) => sum + g.length, 0);
duplicateScanState = { running: true, total: totalFiles, done: 0, groups: 0 };
duplicateGroups = [];
res.json({ status: 'started', total: totalFiles, sizeGroups: candidates.length });
// Phase 2: hash candidates in background
(async () => {
for (const sizeGroup of candidates) {
const byHash = new Map();
for (const file of sizeGroup) {
try {
const hash = await hashFilePartial(file.filePath);
if (!byHash.has(hash)) byHash.set(hash, []);
byHash.get(hash).push(file);
} catch { /* skip unreadable */ }
duplicateScanState.done++;
}
for (const [, files] of byHash) {
if (files.length > 1) {
duplicateGroups.push(files.map(({ filePath, ...rest }) => ({
...rest,
path: filePath,
url: `/api/gallery/media/${encodeURIComponent(rest.folder)}/${encodeURIComponent(rest.filename)}`,
thumbUrl: rest.type === 'video'
? `/api/gallery/thumb/${encodeURIComponent(rest.folder)}/${encodeURIComponent(rest.filename)}`
: undefined,
})));
duplicateScanState.groups = duplicateGroups.length;
}
}
}
duplicateScanState.running = false;
})();
});
// GET /api/gallery/scan-duplicates/status
router.get('/api/gallery/scan-duplicates/status', (req, res) => {
res.json(duplicateScanState);
});
// GET /api/gallery/duplicates — return found duplicate groups (paginated)
router.get('/api/gallery/duplicates', (req, res) => {
const offset = parseInt(req.query.offset || '0', 10);
const limit = parseInt(req.query.limit || '20', 10);
const page = duplicateGroups.slice(offset, offset + limit);
res.json({ total: duplicateGroups.length, offset, limit, groups: page });
});
// DELETE /api/gallery/media/:folder/:filename — delete a media file
router.delete('/api/gallery/media/:folder/:filename', (req, res) => {
const { folder, filename } = req.params;
if (folder.includes('..') || filename.includes('..')) {
return res.status(400).json({ error: 'Invalid path' });
}
const filePath = join(MEDIA_PATH, folder, filename);
if (!existsSync(filePath)) {
return res.status(404).json({ error: 'File not found' });
}
try {
unlinkSync(filePath);
removeMediaFile(folder, filename);
// Also delete cached thumbnail if it exists
const { thumbPath } = getThumbPath(folder, filename);
if (existsSync(thumbPath)) {
try { unlinkSync(thumbPath); } catch { /* ignore */ }
}
// Remove from in-memory duplicate groups
for (const group of duplicateGroups) {
const idx = group.findIndex((f) => f.folder === folder && f.filename === filename);
if (idx !== -1) { group.splice(idx, 1); break; }
}
// Remove empty or single-item groups
duplicateGroups = duplicateGroups.filter((g) => g.length > 1);
duplicateScanState.groups = duplicateGroups.length;
res.json({ ok: true });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// POST /api/gallery/duplicates/clean — delete all duplicates, keeping one copy per group
router.post('/api/gallery/duplicates/clean', (req, res) => {
let deleted = 0;
let freed = 0;
let errors = 0;
for (const group of duplicateGroups) {
// Keep the first file, delete the rest
const toDelete = group.slice(1);
for (const file of toDelete) {
const filePath = join(MEDIA_PATH, file.folder, file.filename);
try {
if (existsSync(filePath)) {
unlinkSync(filePath);
freed += file.size;
deleted++;
}
const { thumbPath } = getThumbPath(file.folder, file.filename);
if (existsSync(thumbPath)) {
try { unlinkSync(thumbPath); } catch { /* ignore */ }
}
} catch {
errors++;
}
}
}
// Clear all groups since each now has at most 1 file
duplicateGroups = [];
duplicateScanState.groups = 0;
res.json({ ok: true, deleted, freed, errors });
});
export default router;