Files
OFApp/server/scrapers/ytdlp.js
T
Trey T 236f36aae6 Add app auth, dashboard, scheduler, video management, and new scrapers
- JWT-based app authentication with user roles, folder/route access control
- Dashboard with storage stats, health checks, and recent activity
- Auto-download/scrape scheduler (12h interval) with per-user and per-job configs
- Video upload, tagging, HLS transcoding, and detail pages
- New scrapers: LeakGallery, Mega (megajs), yt-dlp
- FlareSolverr integration for Cloudflare-protected sites
- Gallery: advanced filtering (date, size, search), sort modes, equal-mix shuffle
- Forum sites management with stored cookies/auth
- GridWall/GridCell components for responsive media grid
- Media API with folder-access permissions

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 07:48:10 -05:00

301 lines
9.1 KiB
JavaScript

import { spawn } from 'child_process';
import { basename, extname, join } from 'path';
import { existsSync, statSync, readdirSync } from 'fs';
import { execFile } from 'child_process';
import { promisify } from 'util';
import { insertVideo, getVideoByPath } from '../db.js';
const execFileAsync = promisify(execFile);
const VIDEOS_PATH = process.env.VIDEOS_PATH || '/data/videos';
const VIDEO_EXTS = new Set(['.mp4', '.mov', '.avi', '.webm', '.mkv', '.m4v', '.wmv', '.flv', '.ts']);
// Quality presets mapped to yt-dlp format strings
const QUALITY_PRESETS = {
best: 'bestvideo+bestaudio/best',
'2160p': 'bestvideo[height<=2160]+bestaudio/best[height<=2160]',
'1080p': 'bestvideo[height<=1080]+bestaudio/best[height<=1080]',
'720p': 'bestvideo[height<=720]+bestaudio/best[height<=720]',
'480p': 'bestvideo[height<=480]+bestaudio/best[height<=480]',
audio: 'bestaudio/best',
};
async function probeVideo(filePath) {
const { stdout } = await execFileAsync('ffprobe', [
'-v', 'error',
'-show_entries', 'format=duration,bit_rate',
'-show_entries', 'stream=codec_name,width,height,r_frame_rate,codec_type',
'-of', 'json',
filePath,
], { timeout: 60000 });
const info = JSON.parse(stdout);
const videoStream = info.streams?.find(s => s.codec_type === 'video');
const audioStream = info.streams?.find(s => s.codec_type === 'audio');
const duration = parseFloat(info.format?.duration || '0');
const bitrate = parseInt(info.format?.bit_rate || '0', 10);
let fps = null;
if (videoStream?.r_frame_rate) {
const [num, den] = videoStream.r_frame_rate.split('/');
if (den && parseInt(den, 10) > 0) {
fps = Math.round((parseInt(num, 10) / parseInt(den, 10)) * 100) / 100;
}
}
return {
duration: duration || null,
width: videoStream?.width || null,
height: videoStream?.height || null,
fps,
codec: videoStream?.codec_name || null,
bitrate: bitrate || null,
has_audio: audioStream ? 1 : 0,
};
}
async function generateThumbnail(filePath) {
const thumbDir = join(VIDEOS_PATH, '.thumbnails');
const filename = basename(filePath);
const thumbName = `${Date.now()}_${filename.replace(/\.[^.]+$/, '.jpg')}`;
const thumbPath = join(thumbDir, thumbName);
let duration = 0;
try {
const { stdout } = await execFileAsync('ffprobe', [
'-v', 'error', '-show_entries', 'format=duration', '-of', 'csv=p=0', filePath,
], { timeout: 15000 });
duration = parseFloat(stdout.trim()) || 0;
} catch { /* ignore */ }
const seekTime = duration > 2 ? '1' : '0';
await execFileAsync('ffmpeg', [
'-ss', seekTime, '-i', filePath,
'-frames:v', '1', '-vf', 'scale=480:-1', '-q:v', '4', '-y', '-update', '1',
thumbPath,
], { timeout: 30000 });
return thumbPath;
}
// Register a downloaded video file into the videos DB table
async function registerVideo(filePath, log) {
try {
if (getVideoByPath(filePath)) {
log(`Already indexed: ${basename(filePath)}`);
return;
}
const stat = statSync(filePath);
const filename = basename(filePath);
let probe;
try {
probe = await probeVideo(filePath);
} catch (err) {
log(`Probe failed for ${filename}: ${err.message}`);
return;
}
let thumbPath = null;
try {
thumbPath = await generateThumbnail(filePath);
} catch { /* ignore */ }
const title = basename(filename, extname(filename))
.replace(/[_.-]/g, ' ')
.replace(/\s+/g, ' ')
.trim();
insertVideo({
title,
filename,
file_path: filePath,
file_size: stat.size,
...probe,
thumbnail_path: thumbPath,
status: 'ready',
});
log(`Registered in library: ${title}`);
} catch (err) {
log(`Failed to register ${basename(filePath)}: ${err.message}`);
}
}
// Build yt-dlp arguments from config
function buildArgs(config) {
const { url, quality, customFormat, embedMetadata, embedThumbnail, embedSubs,
writeSubs, subLangs, restrictFilenames, outputTemplate,
playlist, maxDownloads, concurrentFragments, rateLimit,
sponsorBlock, cookiesFile } = config;
const args = [];
// Format
if (customFormat) {
args.push('-f', customFormat);
} else {
args.push('-f', QUALITY_PRESETS[quality] || QUALITY_PRESETS.best);
}
// Merge to mp4 when possible
if (quality !== 'audio') {
args.push('--merge-output-format', 'mp4');
} else {
args.push('-x', '--audio-format', 'mp3');
}
// Embed options
if (embedMetadata) args.push('--embed-metadata');
if (embedThumbnail) args.push('--embed-thumbnail');
if (embedSubs) args.push('--embed-subs');
if (writeSubs) args.push('--write-subs');
if (subLangs) args.push('--sub-langs', subLangs);
// Filename
if (restrictFilenames) args.push('--restrict-filenames');
args.push('-o', join(VIDEOS_PATH, outputTemplate || '%(title)s.%(ext)s'));
// Playlist
if (playlist) {
args.push('--yes-playlist');
if (maxDownloads) args.push('--max-downloads', String(maxDownloads));
} else {
args.push('--no-playlist');
}
// Performance
if (concurrentFragments && concurrentFragments > 1) {
args.push('--concurrent-fragments', String(concurrentFragments));
}
if (rateLimit) args.push('--rate-limit', rateLimit);
// SponsorBlock
if (sponsorBlock === 'remove') args.push('--sponsorblock-remove', 'all');
else if (sponsorBlock === 'mark') args.push('--sponsorblock-mark', 'all');
// Cookies
if (cookiesFile) args.push('--cookies', cookiesFile);
// Progress & output
args.push('--newline', '--no-colors', '--no-overwrites');
// Print downloaded file paths
args.push('--print', 'after_move:filepath');
args.push(url);
return args;
}
// Run yt-dlp download. Returns a promise. Progress/logs via callbacks.
export function runYtdlp(config, log, onProgress, isCancelled) {
return new Promise((resolve, reject) => {
const args = buildArgs(config);
log(`yt-dlp ${args.join(' ')}`);
const proc = spawn('yt-dlp', args, {
stdio: ['ignore', 'pipe', 'pipe'],
});
const downloadedFiles = [];
let currentFile = '';
let fileCount = 0;
proc.stdout.on('data', (data) => {
const lines = data.toString().split('\n').filter(Boolean);
for (const line of lines) {
// yt-dlp --print after_move:filepath outputs the final file path on its own line
// These lines don't start with [ and are absolute paths
if (line.startsWith('/') && existsSync(line.trim())) {
const filePath = line.trim();
if (!downloadedFiles.includes(filePath)) {
downloadedFiles.push(filePath);
fileCount++;
onProgress(fileCount, 0);
log(`Downloaded: ${basename(filePath)}`);
}
continue;
}
// Parse progress lines: [download] 45.2% of 250.00MiB at 5.00MiB/s ETA 00:25
const progressMatch = line.match(/\[download\]\s+([\d.]+)%\s+of\s+~?([\d.]+\w+)\s+at\s+([\d.]+\w+\/s|Unknown)\s+ETA\s+(\S+)/);
if (progressMatch) {
const pct = parseFloat(progressMatch[1]);
const size = progressMatch[2];
const speed = progressMatch[3];
const eta = progressMatch[4];
log(`[download] ${pct.toFixed(1)}% of ${size} at ${speed} ETA ${eta}`);
continue;
}
// Destination line: [download] Destination: filename.mp4
const destMatch = line.match(/\[download\] Destination:\s+(.+)/);
if (destMatch) {
currentFile = basename(destMatch[1]);
log(`Downloading: ${currentFile}`);
continue;
}
// Already downloaded
if (line.includes('has already been downloaded')) {
log(line.trim());
onProgress(fileCount, 0);
continue;
}
// Log other yt-dlp output
if (line.trim()) {
log(line.trim());
}
}
});
proc.stderr.on('data', (data) => {
const lines = data.toString().split('\n').filter(Boolean);
for (const line of lines) {
if (line.includes('WARNING:')) {
log(`Warning: ${line.replace(/WARNING:\s*/, '')}`);
} else if (line.includes('ERROR:')) {
log(`ERROR: ${line.replace(/ERROR:\s*/, '')}`);
onProgress(fileCount, 1);
} else if (line.trim()) {
log(line.trim());
}
}
});
// Check for cancellation
const cancelCheck = setInterval(() => {
if (isCancelled()) {
proc.kill('SIGTERM');
clearInterval(cancelCheck);
}
}, 500);
proc.on('close', async (code) => {
clearInterval(cancelCheck);
// Register downloaded video files in the library
for (const filePath of downloadedFiles) {
const ext = extname(filePath).toLowerCase();
if (VIDEO_EXTS.has(ext)) {
await registerVideo(filePath, log);
}
}
if (code === 0) {
resolve({ files: downloadedFiles.length, errors: 0 });
} else if (isCancelled()) {
resolve({ files: downloadedFiles.length, errors: 0, cancelled: true });
} else {
resolve({ files: downloadedFiles.length, errors: 1 });
}
});
proc.on('error', (err) => {
clearInterval(cancelCheck);
reject(err);
});
});
}