Add DRM downloads, scrapers, gallery index, and UI improvements
- DRM video download pipeline with pywidevine subprocess for Widevine key acquisition - Scraper system: forum threads, Coomer/Kemono API, and MediaLink (Fapello) scrapers - SQLite-backed media index for instant gallery loads with startup scan - Duplicate detection and gallery filtering/sorting - HLS video component, log viewer, and scrape management UI - Dockerfile updated for Python/pywidevine, docker-compose volume for CDM Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
373
server/drm-download.js
Normal file
373
server/drm-download.js
Normal file
@@ -0,0 +1,373 @@
|
||||
import { mkdirSync, createWriteStream, existsSync, rmSync } from 'node:fs';
|
||||
import { execSync, exec as execCb } from 'node:child_process';
|
||||
import { promisify } from 'node:util';
|
||||
const execAsync = promisify(execCb);
|
||||
import { dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import fetch from 'node-fetch';
|
||||
import { getAuthConfig } from './db.js';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const OF_BASE = 'https://onlyfans.com';
|
||||
const WVD_PATH = process.env.WVD_PATH || '/data/cdm/device.wvd';
|
||||
const HELPER_PATH = `${__dirname}/pywidevine_helper.py`;
|
||||
|
||||
export function hasCDM() {
|
||||
return existsSync(WVD_PATH);
|
||||
}
|
||||
|
||||
// ==================== MPD Parser ====================
|
||||
|
||||
function parseMpd(mpdText, baseUrl) {
|
||||
const result = { pssh: null, video: null, audio: null };
|
||||
|
||||
// Extract Widevine PSSH (system ID edef8ba9-79d6-4ace-a3c8-27dcd51d21ed)
|
||||
// Must find the ContentProtection block for Widevine, not PlayReady
|
||||
const cpRegex = /<ContentProtection[^>]*schemeIdUri="urn:uuid:edef8ba9[^"]*"[^>]*>([\s\S]*?)<\/ContentProtection>/gi;
|
||||
let cpMatch;
|
||||
while ((cpMatch = cpRegex.exec(mpdText)) !== null) {
|
||||
const psshInner = cpMatch[1].match(/cenc:pssh[^>]*>([^<]+)</i);
|
||||
if (psshInner) {
|
||||
result.pssh = psshInner[1].trim();
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Fallback: if Widevine-specific block not found, try any cenc:pssh
|
||||
if (!result.pssh) {
|
||||
const psshMatch = mpdText.match(/cenc:pssh[^>]*>([^<]+)</i);
|
||||
if (psshMatch) result.pssh = psshMatch[1].trim();
|
||||
}
|
||||
|
||||
// Split into AdaptationSets
|
||||
const asRegex = /<AdaptationSet([^>]*)>([\s\S]*?)<\/AdaptationSet>/gi;
|
||||
let match;
|
||||
while ((match = asRegex.exec(mpdText)) !== null) {
|
||||
const asAttrs = match[1];
|
||||
const asBody = match[2];
|
||||
|
||||
const mimeMatch = asAttrs.match(/mimeType="([^"]+)"/);
|
||||
const mime = mimeMatch ? mimeMatch[1] : '';
|
||||
const isVideo = mime.includes('video');
|
||||
const isAudio = mime.includes('audio');
|
||||
if (!isVideo && !isAudio) continue;
|
||||
|
||||
// Find all Representations, pick highest bandwidth
|
||||
const reps = [];
|
||||
const repRegex = /<Representation([^>]*)(?:\/>|>([\s\S]*?)<\/Representation>)/gi;
|
||||
let repMatch;
|
||||
while ((repMatch = repRegex.exec(asBody)) !== null) {
|
||||
const bwMatch = repMatch[1].match(/bandwidth="(\d+)"/);
|
||||
const idMatch = repMatch[1].match(/id="([^"]+)"/);
|
||||
const bwAttr = repMatch[1].match(/bandwidth="(\d+)"/);
|
||||
reps.push({
|
||||
id: idMatch ? idMatch[1] : '1',
|
||||
bandwidth: bwMatch ? parseInt(bwMatch[1]) : 0,
|
||||
body: repMatch[2] || '',
|
||||
});
|
||||
}
|
||||
reps.sort((a, b) => b.bandwidth - a.bandwidth);
|
||||
const best = reps[0];
|
||||
if (!best) continue;
|
||||
|
||||
// Try SegmentTemplate from Representation first, then AdaptationSet
|
||||
let segInfo = parseSegmentTemplate(best.body, best.id, best.bandwidth, baseUrl);
|
||||
if (!segInfo) segInfo = parseSegmentTemplate(asBody, best.id, best.bandwidth, baseUrl);
|
||||
|
||||
// Try SegmentList as fallback
|
||||
if (!segInfo) segInfo = parseSegmentList(best.body || asBody, baseUrl);
|
||||
|
||||
// Try SegmentBase (on-demand profile) as final fallback
|
||||
if (!segInfo) segInfo = parseSegmentBase(best.body || asBody, baseUrl);
|
||||
|
||||
if (segInfo) {
|
||||
if (isVideo) result.video = segInfo;
|
||||
else result.audio = segInfo;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function parseSegmentTemplate(text, repId, bandwidth, baseUrl) {
|
||||
const tmplMatch = text.match(/<SegmentTemplate([^>]*)(?:\/>|>([\s\S]*?)<\/SegmentTemplate>)/i);
|
||||
if (!tmplMatch) return null;
|
||||
|
||||
const attrs = tmplMatch[1];
|
||||
const body = tmplMatch[2] || '';
|
||||
|
||||
const initMatch = attrs.match(/initialization="([^"]+)"/);
|
||||
const mediaMatch = attrs.match(/media="([^"]+)"/);
|
||||
const startNumMatch = attrs.match(/startNumber="(\d+)"/);
|
||||
|
||||
if (!initMatch || !mediaMatch) return null;
|
||||
|
||||
const initTmpl = initMatch[1];
|
||||
const mediaTmpl = mediaMatch[1];
|
||||
const startNumber = startNumMatch ? parseInt(startNumMatch[1]) : 1;
|
||||
const usesTime = mediaTmpl.includes('$Time$');
|
||||
|
||||
const initUrl = resolveUrl(
|
||||
replaceTemplateVars(initTmpl, repId, bandwidth),
|
||||
baseUrl,
|
||||
);
|
||||
|
||||
const segmentUrls = [];
|
||||
const timelineMatch = body.match(/<SegmentTimeline>([\s\S]*?)<\/SegmentTimeline>/i);
|
||||
|
||||
if (timelineMatch) {
|
||||
let currentTime = 0;
|
||||
let segNum = startNumber;
|
||||
const sElements = [...timelineMatch[1].matchAll(/<S\s+([^/]*?)\/?\s*>/gi)];
|
||||
|
||||
for (const s of sElements) {
|
||||
const tMatch = s[1].match(/t="(\d+)"/);
|
||||
const dMatch = s[1].match(/d="(\d+)"/);
|
||||
const rMatch = s[1].match(/r="(-?\d+)"/);
|
||||
|
||||
if (tMatch) currentTime = parseInt(tMatch[1]);
|
||||
const duration = dMatch ? parseInt(dMatch[1]) : 0;
|
||||
let repeat = rMatch ? parseInt(rMatch[1]) : 0;
|
||||
if (repeat < 0) repeat = 9999; // r=-1 means repeat until end; bounded by 404 in download
|
||||
|
||||
for (let i = 0; i <= repeat; i++) {
|
||||
let url;
|
||||
if (usesTime) {
|
||||
url = replaceTemplateVars(mediaTmpl, repId, bandwidth)
|
||||
.replace(/\$Time\$/g, String(currentTime));
|
||||
} else {
|
||||
url = replaceTemplateVars(mediaTmpl, repId, bandwidth)
|
||||
.replace(/\$Number\$/g, String(segNum))
|
||||
.replace(/\$Number%(\d+)d\$/g, (_, w) => String(segNum).padStart(parseInt(w), '0'));
|
||||
}
|
||||
segmentUrls.push(resolveUrl(url, baseUrl));
|
||||
currentTime += duration;
|
||||
segNum++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No timeline — use a large count, download will stop on 404
|
||||
const startNum = startNumber;
|
||||
for (let i = 0; i < 10000; i++) {
|
||||
const url = replaceTemplateVars(mediaTmpl, repId, bandwidth)
|
||||
.replace(/\$Number\$/g, String(startNum + i))
|
||||
.replace(/\$Number%(\d+)d\$/g, (_, w) => String(startNum + i).padStart(parseInt(w), '0'));
|
||||
segmentUrls.push(resolveUrl(url, baseUrl));
|
||||
}
|
||||
}
|
||||
|
||||
return { initUrl, segmentUrls };
|
||||
}
|
||||
|
||||
function parseSegmentList(text, baseUrl) {
|
||||
const initMatch = text.match(/<Initialization\s+sourceURL="([^"]+)"/i);
|
||||
if (!initMatch) return null;
|
||||
|
||||
const initUrl = resolveUrl(initMatch[1], baseUrl);
|
||||
const segmentUrls = [];
|
||||
const segRegex = /<SegmentURL\s+media="([^"]+)"/gi;
|
||||
let m;
|
||||
while ((m = segRegex.exec(text)) !== null) {
|
||||
segmentUrls.push(resolveUrl(m[1], baseUrl));
|
||||
}
|
||||
return { initUrl, segmentUrls };
|
||||
}
|
||||
|
||||
function parseSegmentBase(text, baseUrl) {
|
||||
const baseUrlMatch = text.match(/<BaseURL>([^<]+)<\/BaseURL>/i);
|
||||
if (!baseUrlMatch) return null;
|
||||
|
||||
const fileUrl = resolveUrl(baseUrlMatch[1].trim(), baseUrl);
|
||||
|
||||
// On-demand: single file, no segments. Mark as on-demand so the download
|
||||
// pipeline can just fetch the whole file instead of init+segments.
|
||||
return { onDemand: true, fileUrl };
|
||||
}
|
||||
|
||||
function replaceTemplateVars(template, repId, bandwidth) {
|
||||
return template
|
||||
.replace(/\$RepresentationID\$/g, repId)
|
||||
.replace(/\$Bandwidth\$/g, String(bandwidth));
|
||||
}
|
||||
|
||||
function resolveUrl(url, baseUrl) {
|
||||
if (url.startsWith('http')) return url;
|
||||
return baseUrl + url;
|
||||
}
|
||||
|
||||
// ==================== Download Pipeline ====================
|
||||
|
||||
async function fetchWithCookies(url, cfCookies) {
|
||||
const cookieParts = [];
|
||||
if (cfCookies.cp) cookieParts.push(`CloudFront-Policy=${cfCookies.cp}`);
|
||||
if (cfCookies.cs) cookieParts.push(`CloudFront-Signature=${cfCookies.cs}`);
|
||||
if (cfCookies.ck) cookieParts.push(`CloudFront-Key-Pair-Id=${cfCookies.ck}`);
|
||||
|
||||
const headers = {};
|
||||
if (cookieParts.length > 0) headers['Cookie'] = cookieParts.join('; ');
|
||||
|
||||
const res = await fetch(url, { headers });
|
||||
return res;
|
||||
}
|
||||
|
||||
async function downloadWholeFile(url, cfCookies, outputPath) {
|
||||
const res = await fetchWithCookies(url, cfCookies);
|
||||
if (!res.ok) throw new Error(`Download failed: ${res.status} ${url}`);
|
||||
const ws = createWriteStream(outputPath);
|
||||
for await (const chunk of res.body) ws.write(chunk);
|
||||
ws.end();
|
||||
await new Promise((resolve, reject) => {
|
||||
ws.on('finish', resolve);
|
||||
ws.on('error', reject);
|
||||
});
|
||||
console.log(`[drm-download] Downloaded whole file → ${outputPath}`);
|
||||
}
|
||||
|
||||
async function downloadSegments(track, cfCookies, outputPath) {
|
||||
const ws = createWriteStream(outputPath);
|
||||
|
||||
// Init segment
|
||||
const initRes = await fetchWithCookies(track.initUrl, cfCookies);
|
||||
if (!initRes.ok) throw new Error(`Init segment failed: ${initRes.status}`);
|
||||
for await (const chunk of initRes.body) ws.write(chunk);
|
||||
|
||||
// Media segments
|
||||
let downloaded = 0;
|
||||
for (const segUrl of track.segmentUrls) {
|
||||
const segRes = await fetchWithCookies(segUrl, cfCookies);
|
||||
if (segRes.status === 404 || segRes.status === 403) break; // end of segments
|
||||
if (!segRes.ok) throw new Error(`Segment failed: ${segRes.status} ${segUrl}`);
|
||||
for await (const chunk of segRes.body) ws.write(chunk);
|
||||
downloaded++;
|
||||
}
|
||||
|
||||
ws.end();
|
||||
await new Promise((resolve, reject) => {
|
||||
ws.on('finish', resolve);
|
||||
ws.on('error', reject);
|
||||
});
|
||||
|
||||
console.log(`[drm-download] Downloaded ${downloaded} segments → ${outputPath}`);
|
||||
}
|
||||
|
||||
export async function downloadDrmMedia({
|
||||
mpdUrl,
|
||||
cfCookies,
|
||||
mediaId,
|
||||
entityType,
|
||||
entityId,
|
||||
outputDir,
|
||||
outputFilename,
|
||||
}) {
|
||||
if (!existsSync(WVD_PATH)) throw new Error('No CDM available — place a .wvd file at ' + WVD_PATH);
|
||||
|
||||
const authConfig = getAuthConfig();
|
||||
if (!authConfig) throw new Error('No auth config');
|
||||
|
||||
console.log(`[drm-download] Starting DRM download for media ${mediaId}`);
|
||||
|
||||
// 1. Fetch & parse MPD
|
||||
const mpdRes = await fetchWithCookies(mpdUrl, cfCookies);
|
||||
if (!mpdRes.ok) throw new Error(`MPD fetch failed: ${mpdRes.status}`);
|
||||
const mpdText = await mpdRes.text();
|
||||
const mpdBaseUrl = mpdUrl.substring(0, mpdUrl.lastIndexOf('/') + 1);
|
||||
const mpd = parseMpd(mpdText, mpdBaseUrl);
|
||||
|
||||
if (!mpd.pssh) {
|
||||
throw new Error('No Widevine PSSH found in MPD');
|
||||
}
|
||||
if (!mpd.video) {
|
||||
throw new Error('No video track found in MPD');
|
||||
}
|
||||
const videoDesc = mpd.video.onDemand ? 'on-demand' : `${mpd.video.segmentUrls.length} segs`;
|
||||
const audioDesc = mpd.audio ? (mpd.audio.onDemand ? 'on-demand' : `${mpd.audio.segmentUrls.length} segs`) : 'none';
|
||||
console.log(`[drm-download] MPD parsed: video=${videoDesc}, audio=${audioDesc}`);
|
||||
|
||||
// 2. Get content key via pywidevine (routed through local proxy)
|
||||
const PORT = process.env.PORT || 3001;
|
||||
const proxyParams = new URLSearchParams({ mediaId });
|
||||
if (entityType) proxyParams.set('entityType', entityType);
|
||||
if (entityId) proxyParams.set('entityId', entityId);
|
||||
const proxyUrl = `http://localhost:${PORT}/api/drm-license?${proxyParams}`;
|
||||
|
||||
console.log(`[drm-download] Getting content key via pywidevine (proxy → OF)`);
|
||||
let keyResult;
|
||||
try {
|
||||
const { stdout, stderr } = await execAsync(
|
||||
`python3 "${HELPER_PATH}" "${WVD_PATH}" "${mpd.pssh}" "${proxyUrl}"`,
|
||||
{ timeout: 60000, maxBuffer: 1024 * 1024 },
|
||||
);
|
||||
keyResult = JSON.parse(stdout.trim());
|
||||
} catch (err) {
|
||||
const stderr = err.stderr?.toString() || '';
|
||||
const stdout = err.stdout?.toString() || '';
|
||||
throw new Error(`pywidevine failed: ${stderr || stdout || err.message}`);
|
||||
}
|
||||
|
||||
if (keyResult.error) throw new Error(`License failed: ${keyResult.error}`);
|
||||
if (!keyResult.keys?.length) throw new Error('No content keys returned');
|
||||
|
||||
const contentKey = keyResult.keys.find(k => k.type === 'CONTENT') || keyResult.keys[0];
|
||||
console.log(`[drm-download] Got ${keyResult.keys.length} key(s), KID=${contentKey.kid}`);
|
||||
|
||||
// 3. Download encrypted segments
|
||||
mkdirSync(outputDir, { recursive: true });
|
||||
const tmpDir = `${outputDir}/.drm-tmp-${mediaId}`;
|
||||
mkdirSync(tmpDir, { recursive: true });
|
||||
|
||||
try {
|
||||
console.log('[drm-download] Downloading video...');
|
||||
if (mpd.video.onDemand) {
|
||||
await downloadWholeFile(mpd.video.fileUrl, cfCookies, `${tmpDir}/video_enc.mp4`);
|
||||
} else {
|
||||
await downloadSegments(mpd.video, cfCookies, `${tmpDir}/video_enc.mp4`);
|
||||
}
|
||||
|
||||
let hasAudio = false;
|
||||
if (mpd.audio) {
|
||||
console.log('[drm-download] Downloading audio...');
|
||||
if (mpd.audio.onDemand) {
|
||||
await downloadWholeFile(mpd.audio.fileUrl, cfCookies, `${tmpDir}/audio_enc.mp4`);
|
||||
} else if (mpd.audio.segmentUrls?.length > 0) {
|
||||
await downloadSegments(mpd.audio, cfCookies, `${tmpDir}/audio_enc.mp4`);
|
||||
}
|
||||
hasAudio = true;
|
||||
}
|
||||
|
||||
// 4. Decrypt with ffmpeg
|
||||
const keyHex = contentKey.key;
|
||||
console.log('[drm-download] Decrypting...');
|
||||
|
||||
execSync(
|
||||
`ffmpeg -y -loglevel error -decryption_key ${keyHex} -i "${tmpDir}/video_enc.mp4" -c copy "${tmpDir}/video.mp4"`,
|
||||
{ stdio: 'pipe', timeout: 300000 },
|
||||
);
|
||||
|
||||
if (hasAudio) {
|
||||
execSync(
|
||||
`ffmpeg -y -loglevel error -decryption_key ${keyHex} -i "${tmpDir}/audio_enc.mp4" -c copy "${tmpDir}/audio.mp4"`,
|
||||
{ stdio: 'pipe', timeout: 300000 },
|
||||
);
|
||||
}
|
||||
|
||||
// 5. Mux into final file
|
||||
const outputPath = `${outputDir}/${outputFilename}`;
|
||||
if (hasAudio) {
|
||||
console.log('[drm-download] Muxing audio + video...');
|
||||
execSync(
|
||||
`ffmpeg -y -loglevel error -i "${tmpDir}/video.mp4" -i "${tmpDir}/audio.mp4" -c copy -movflags +faststart "${outputPath}"`,
|
||||
{ stdio: 'pipe', timeout: 300000 },
|
||||
);
|
||||
} else {
|
||||
execSync(
|
||||
`ffmpeg -y -loglevel error -i "${tmpDir}/video.mp4" -c copy -movflags +faststart "${outputPath}"`,
|
||||
{ stdio: 'pipe', timeout: 300000 },
|
||||
);
|
||||
}
|
||||
|
||||
console.log(`[drm-download] Complete: ${outputPath}`);
|
||||
return outputPath;
|
||||
} finally {
|
||||
// Cleanup temp files
|
||||
try { rmSync(tmpDir, { recursive: true, force: true }); } catch {}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user