- DRM video download pipeline with pywidevine subprocess for Widevine key acquisition - Scraper system: forum threads, Coomer/Kemono API, and MediaLink (Fapello) scrapers - SQLite-backed media index for instant gallery loads with startup scan - Duplicate detection and gallery filtering/sorting - HLS video component, log viewer, and scrape management UI - Dockerfile updated for Python/pywidevine, docker-compose volume for CDM Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
374 lines
13 KiB
JavaScript
374 lines
13 KiB
JavaScript
import { mkdirSync, createWriteStream, existsSync, rmSync } from 'node:fs';
|
|
import { execSync, exec as execCb } from 'node:child_process';
|
|
import { promisify } from 'node:util';
|
|
const execAsync = promisify(execCb);
|
|
import { dirname } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import fetch from 'node-fetch';
|
|
import { getAuthConfig } from './db.js';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const OF_BASE = 'https://onlyfans.com';
|
|
const WVD_PATH = process.env.WVD_PATH || '/data/cdm/device.wvd';
|
|
const HELPER_PATH = `${__dirname}/pywidevine_helper.py`;
|
|
|
|
export function hasCDM() {
|
|
return existsSync(WVD_PATH);
|
|
}
|
|
|
|
// ==================== MPD Parser ====================
|
|
|
|
function parseMpd(mpdText, baseUrl) {
|
|
const result = { pssh: null, video: null, audio: null };
|
|
|
|
// Extract Widevine PSSH (system ID edef8ba9-79d6-4ace-a3c8-27dcd51d21ed)
|
|
// Must find the ContentProtection block for Widevine, not PlayReady
|
|
const cpRegex = /<ContentProtection[^>]*schemeIdUri="urn:uuid:edef8ba9[^"]*"[^>]*>([\s\S]*?)<\/ContentProtection>/gi;
|
|
let cpMatch;
|
|
while ((cpMatch = cpRegex.exec(mpdText)) !== null) {
|
|
const psshInner = cpMatch[1].match(/cenc:pssh[^>]*>([^<]+)</i);
|
|
if (psshInner) {
|
|
result.pssh = psshInner[1].trim();
|
|
break;
|
|
}
|
|
}
|
|
// Fallback: if Widevine-specific block not found, try any cenc:pssh
|
|
if (!result.pssh) {
|
|
const psshMatch = mpdText.match(/cenc:pssh[^>]*>([^<]+)</i);
|
|
if (psshMatch) result.pssh = psshMatch[1].trim();
|
|
}
|
|
|
|
// Split into AdaptationSets
|
|
const asRegex = /<AdaptationSet([^>]*)>([\s\S]*?)<\/AdaptationSet>/gi;
|
|
let match;
|
|
while ((match = asRegex.exec(mpdText)) !== null) {
|
|
const asAttrs = match[1];
|
|
const asBody = match[2];
|
|
|
|
const mimeMatch = asAttrs.match(/mimeType="([^"]+)"/);
|
|
const mime = mimeMatch ? mimeMatch[1] : '';
|
|
const isVideo = mime.includes('video');
|
|
const isAudio = mime.includes('audio');
|
|
if (!isVideo && !isAudio) continue;
|
|
|
|
// Find all Representations, pick highest bandwidth
|
|
const reps = [];
|
|
const repRegex = /<Representation([^>]*)(?:\/>|>([\s\S]*?)<\/Representation>)/gi;
|
|
let repMatch;
|
|
while ((repMatch = repRegex.exec(asBody)) !== null) {
|
|
const bwMatch = repMatch[1].match(/bandwidth="(\d+)"/);
|
|
const idMatch = repMatch[1].match(/id="([^"]+)"/);
|
|
const bwAttr = repMatch[1].match(/bandwidth="(\d+)"/);
|
|
reps.push({
|
|
id: idMatch ? idMatch[1] : '1',
|
|
bandwidth: bwMatch ? parseInt(bwMatch[1]) : 0,
|
|
body: repMatch[2] || '',
|
|
});
|
|
}
|
|
reps.sort((a, b) => b.bandwidth - a.bandwidth);
|
|
const best = reps[0];
|
|
if (!best) continue;
|
|
|
|
// Try SegmentTemplate from Representation first, then AdaptationSet
|
|
let segInfo = parseSegmentTemplate(best.body, best.id, best.bandwidth, baseUrl);
|
|
if (!segInfo) segInfo = parseSegmentTemplate(asBody, best.id, best.bandwidth, baseUrl);
|
|
|
|
// Try SegmentList as fallback
|
|
if (!segInfo) segInfo = parseSegmentList(best.body || asBody, baseUrl);
|
|
|
|
// Try SegmentBase (on-demand profile) as final fallback
|
|
if (!segInfo) segInfo = parseSegmentBase(best.body || asBody, baseUrl);
|
|
|
|
if (segInfo) {
|
|
if (isVideo) result.video = segInfo;
|
|
else result.audio = segInfo;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
function parseSegmentTemplate(text, repId, bandwidth, baseUrl) {
|
|
const tmplMatch = text.match(/<SegmentTemplate([^>]*)(?:\/>|>([\s\S]*?)<\/SegmentTemplate>)/i);
|
|
if (!tmplMatch) return null;
|
|
|
|
const attrs = tmplMatch[1];
|
|
const body = tmplMatch[2] || '';
|
|
|
|
const initMatch = attrs.match(/initialization="([^"]+)"/);
|
|
const mediaMatch = attrs.match(/media="([^"]+)"/);
|
|
const startNumMatch = attrs.match(/startNumber="(\d+)"/);
|
|
|
|
if (!initMatch || !mediaMatch) return null;
|
|
|
|
const initTmpl = initMatch[1];
|
|
const mediaTmpl = mediaMatch[1];
|
|
const startNumber = startNumMatch ? parseInt(startNumMatch[1]) : 1;
|
|
const usesTime = mediaTmpl.includes('$Time$');
|
|
|
|
const initUrl = resolveUrl(
|
|
replaceTemplateVars(initTmpl, repId, bandwidth),
|
|
baseUrl,
|
|
);
|
|
|
|
const segmentUrls = [];
|
|
const timelineMatch = body.match(/<SegmentTimeline>([\s\S]*?)<\/SegmentTimeline>/i);
|
|
|
|
if (timelineMatch) {
|
|
let currentTime = 0;
|
|
let segNum = startNumber;
|
|
const sElements = [...timelineMatch[1].matchAll(/<S\s+([^/]*?)\/?\s*>/gi)];
|
|
|
|
for (const s of sElements) {
|
|
const tMatch = s[1].match(/t="(\d+)"/);
|
|
const dMatch = s[1].match(/d="(\d+)"/);
|
|
const rMatch = s[1].match(/r="(-?\d+)"/);
|
|
|
|
if (tMatch) currentTime = parseInt(tMatch[1]);
|
|
const duration = dMatch ? parseInt(dMatch[1]) : 0;
|
|
let repeat = rMatch ? parseInt(rMatch[1]) : 0;
|
|
if (repeat < 0) repeat = 9999; // r=-1 means repeat until end; bounded by 404 in download
|
|
|
|
for (let i = 0; i <= repeat; i++) {
|
|
let url;
|
|
if (usesTime) {
|
|
url = replaceTemplateVars(mediaTmpl, repId, bandwidth)
|
|
.replace(/\$Time\$/g, String(currentTime));
|
|
} else {
|
|
url = replaceTemplateVars(mediaTmpl, repId, bandwidth)
|
|
.replace(/\$Number\$/g, String(segNum))
|
|
.replace(/\$Number%(\d+)d\$/g, (_, w) => String(segNum).padStart(parseInt(w), '0'));
|
|
}
|
|
segmentUrls.push(resolveUrl(url, baseUrl));
|
|
currentTime += duration;
|
|
segNum++;
|
|
}
|
|
}
|
|
} else {
|
|
// No timeline — use a large count, download will stop on 404
|
|
const startNum = startNumber;
|
|
for (let i = 0; i < 10000; i++) {
|
|
const url = replaceTemplateVars(mediaTmpl, repId, bandwidth)
|
|
.replace(/\$Number\$/g, String(startNum + i))
|
|
.replace(/\$Number%(\d+)d\$/g, (_, w) => String(startNum + i).padStart(parseInt(w), '0'));
|
|
segmentUrls.push(resolveUrl(url, baseUrl));
|
|
}
|
|
}
|
|
|
|
return { initUrl, segmentUrls };
|
|
}
|
|
|
|
function parseSegmentList(text, baseUrl) {
|
|
const initMatch = text.match(/<Initialization\s+sourceURL="([^"]+)"/i);
|
|
if (!initMatch) return null;
|
|
|
|
const initUrl = resolveUrl(initMatch[1], baseUrl);
|
|
const segmentUrls = [];
|
|
const segRegex = /<SegmentURL\s+media="([^"]+)"/gi;
|
|
let m;
|
|
while ((m = segRegex.exec(text)) !== null) {
|
|
segmentUrls.push(resolveUrl(m[1], baseUrl));
|
|
}
|
|
return { initUrl, segmentUrls };
|
|
}
|
|
|
|
function parseSegmentBase(text, baseUrl) {
|
|
const baseUrlMatch = text.match(/<BaseURL>([^<]+)<\/BaseURL>/i);
|
|
if (!baseUrlMatch) return null;
|
|
|
|
const fileUrl = resolveUrl(baseUrlMatch[1].trim(), baseUrl);
|
|
|
|
// On-demand: single file, no segments. Mark as on-demand so the download
|
|
// pipeline can just fetch the whole file instead of init+segments.
|
|
return { onDemand: true, fileUrl };
|
|
}
|
|
|
|
function replaceTemplateVars(template, repId, bandwidth) {
|
|
return template
|
|
.replace(/\$RepresentationID\$/g, repId)
|
|
.replace(/\$Bandwidth\$/g, String(bandwidth));
|
|
}
|
|
|
|
function resolveUrl(url, baseUrl) {
|
|
if (url.startsWith('http')) return url;
|
|
return baseUrl + url;
|
|
}
|
|
|
|
// ==================== Download Pipeline ====================
|
|
|
|
async function fetchWithCookies(url, cfCookies) {
|
|
const cookieParts = [];
|
|
if (cfCookies.cp) cookieParts.push(`CloudFront-Policy=${cfCookies.cp}`);
|
|
if (cfCookies.cs) cookieParts.push(`CloudFront-Signature=${cfCookies.cs}`);
|
|
if (cfCookies.ck) cookieParts.push(`CloudFront-Key-Pair-Id=${cfCookies.ck}`);
|
|
|
|
const headers = {};
|
|
if (cookieParts.length > 0) headers['Cookie'] = cookieParts.join('; ');
|
|
|
|
const res = await fetch(url, { headers });
|
|
return res;
|
|
}
|
|
|
|
async function downloadWholeFile(url, cfCookies, outputPath) {
|
|
const res = await fetchWithCookies(url, cfCookies);
|
|
if (!res.ok) throw new Error(`Download failed: ${res.status} ${url}`);
|
|
const ws = createWriteStream(outputPath);
|
|
for await (const chunk of res.body) ws.write(chunk);
|
|
ws.end();
|
|
await new Promise((resolve, reject) => {
|
|
ws.on('finish', resolve);
|
|
ws.on('error', reject);
|
|
});
|
|
console.log(`[drm-download] Downloaded whole file → ${outputPath}`);
|
|
}
|
|
|
|
async function downloadSegments(track, cfCookies, outputPath) {
|
|
const ws = createWriteStream(outputPath);
|
|
|
|
// Init segment
|
|
const initRes = await fetchWithCookies(track.initUrl, cfCookies);
|
|
if (!initRes.ok) throw new Error(`Init segment failed: ${initRes.status}`);
|
|
for await (const chunk of initRes.body) ws.write(chunk);
|
|
|
|
// Media segments
|
|
let downloaded = 0;
|
|
for (const segUrl of track.segmentUrls) {
|
|
const segRes = await fetchWithCookies(segUrl, cfCookies);
|
|
if (segRes.status === 404 || segRes.status === 403) break; // end of segments
|
|
if (!segRes.ok) throw new Error(`Segment failed: ${segRes.status} ${segUrl}`);
|
|
for await (const chunk of segRes.body) ws.write(chunk);
|
|
downloaded++;
|
|
}
|
|
|
|
ws.end();
|
|
await new Promise((resolve, reject) => {
|
|
ws.on('finish', resolve);
|
|
ws.on('error', reject);
|
|
});
|
|
|
|
console.log(`[drm-download] Downloaded ${downloaded} segments → ${outputPath}`);
|
|
}
|
|
|
|
export async function downloadDrmMedia({
|
|
mpdUrl,
|
|
cfCookies,
|
|
mediaId,
|
|
entityType,
|
|
entityId,
|
|
outputDir,
|
|
outputFilename,
|
|
}) {
|
|
if (!existsSync(WVD_PATH)) throw new Error('No CDM available — place a .wvd file at ' + WVD_PATH);
|
|
|
|
const authConfig = getAuthConfig();
|
|
if (!authConfig) throw new Error('No auth config');
|
|
|
|
console.log(`[drm-download] Starting DRM download for media ${mediaId}`);
|
|
|
|
// 1. Fetch & parse MPD
|
|
const mpdRes = await fetchWithCookies(mpdUrl, cfCookies);
|
|
if (!mpdRes.ok) throw new Error(`MPD fetch failed: ${mpdRes.status}`);
|
|
const mpdText = await mpdRes.text();
|
|
const mpdBaseUrl = mpdUrl.substring(0, mpdUrl.lastIndexOf('/') + 1);
|
|
const mpd = parseMpd(mpdText, mpdBaseUrl);
|
|
|
|
if (!mpd.pssh) {
|
|
throw new Error('No Widevine PSSH found in MPD');
|
|
}
|
|
if (!mpd.video) {
|
|
throw new Error('No video track found in MPD');
|
|
}
|
|
const videoDesc = mpd.video.onDemand ? 'on-demand' : `${mpd.video.segmentUrls.length} segs`;
|
|
const audioDesc = mpd.audio ? (mpd.audio.onDemand ? 'on-demand' : `${mpd.audio.segmentUrls.length} segs`) : 'none';
|
|
console.log(`[drm-download] MPD parsed: video=${videoDesc}, audio=${audioDesc}`);
|
|
|
|
// 2. Get content key via pywidevine (routed through local proxy)
|
|
const PORT = process.env.PORT || 3001;
|
|
const proxyParams = new URLSearchParams({ mediaId });
|
|
if (entityType) proxyParams.set('entityType', entityType);
|
|
if (entityId) proxyParams.set('entityId', entityId);
|
|
const proxyUrl = `http://localhost:${PORT}/api/drm-license?${proxyParams}`;
|
|
|
|
console.log(`[drm-download] Getting content key via pywidevine (proxy → OF)`);
|
|
let keyResult;
|
|
try {
|
|
const { stdout, stderr } = await execAsync(
|
|
`python3 "${HELPER_PATH}" "${WVD_PATH}" "${mpd.pssh}" "${proxyUrl}"`,
|
|
{ timeout: 60000, maxBuffer: 1024 * 1024 },
|
|
);
|
|
keyResult = JSON.parse(stdout.trim());
|
|
} catch (err) {
|
|
const stderr = err.stderr?.toString() || '';
|
|
const stdout = err.stdout?.toString() || '';
|
|
throw new Error(`pywidevine failed: ${stderr || stdout || err.message}`);
|
|
}
|
|
|
|
if (keyResult.error) throw new Error(`License failed: ${keyResult.error}`);
|
|
if (!keyResult.keys?.length) throw new Error('No content keys returned');
|
|
|
|
const contentKey = keyResult.keys.find(k => k.type === 'CONTENT') || keyResult.keys[0];
|
|
console.log(`[drm-download] Got ${keyResult.keys.length} key(s), KID=${contentKey.kid}`);
|
|
|
|
// 3. Download encrypted segments
|
|
mkdirSync(outputDir, { recursive: true });
|
|
const tmpDir = `${outputDir}/.drm-tmp-${mediaId}`;
|
|
mkdirSync(tmpDir, { recursive: true });
|
|
|
|
try {
|
|
console.log('[drm-download] Downloading video...');
|
|
if (mpd.video.onDemand) {
|
|
await downloadWholeFile(mpd.video.fileUrl, cfCookies, `${tmpDir}/video_enc.mp4`);
|
|
} else {
|
|
await downloadSegments(mpd.video, cfCookies, `${tmpDir}/video_enc.mp4`);
|
|
}
|
|
|
|
let hasAudio = false;
|
|
if (mpd.audio) {
|
|
console.log('[drm-download] Downloading audio...');
|
|
if (mpd.audio.onDemand) {
|
|
await downloadWholeFile(mpd.audio.fileUrl, cfCookies, `${tmpDir}/audio_enc.mp4`);
|
|
} else if (mpd.audio.segmentUrls?.length > 0) {
|
|
await downloadSegments(mpd.audio, cfCookies, `${tmpDir}/audio_enc.mp4`);
|
|
}
|
|
hasAudio = true;
|
|
}
|
|
|
|
// 4. Decrypt with ffmpeg
|
|
const keyHex = contentKey.key;
|
|
console.log('[drm-download] Decrypting...');
|
|
|
|
execSync(
|
|
`ffmpeg -y -loglevel error -decryption_key ${keyHex} -i "${tmpDir}/video_enc.mp4" -c copy "${tmpDir}/video.mp4"`,
|
|
{ stdio: 'pipe', timeout: 300000 },
|
|
);
|
|
|
|
if (hasAudio) {
|
|
execSync(
|
|
`ffmpeg -y -loglevel error -decryption_key ${keyHex} -i "${tmpDir}/audio_enc.mp4" -c copy "${tmpDir}/audio.mp4"`,
|
|
{ stdio: 'pipe', timeout: 300000 },
|
|
);
|
|
}
|
|
|
|
// 5. Mux into final file
|
|
const outputPath = `${outputDir}/${outputFilename}`;
|
|
if (hasAudio) {
|
|
console.log('[drm-download] Muxing audio + video...');
|
|
execSync(
|
|
`ffmpeg -y -loglevel error -i "${tmpDir}/video.mp4" -i "${tmpDir}/audio.mp4" -c copy -movflags +faststart "${outputPath}"`,
|
|
{ stdio: 'pipe', timeout: 300000 },
|
|
);
|
|
} else {
|
|
execSync(
|
|
`ffmpeg -y -loglevel error -i "${tmpDir}/video.mp4" -c copy -movflags +faststart "${outputPath}"`,
|
|
{ stdio: 'pipe', timeout: 300000 },
|
|
);
|
|
}
|
|
|
|
console.log(`[drm-download] Complete: ${outputPath}`);
|
|
return outputPath;
|
|
} finally {
|
|
// Cleanup temp files
|
|
try { rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
|
}
|
|
}
|