- DRM video download pipeline with pywidevine subprocess for Widevine key acquisition - Scraper system: forum threads, Coomer/Kemono API, and MediaLink (Fapello) scrapers - SQLite-backed media index for instant gallery loads with startup scan - Duplicate detection and gallery filtering/sorting - HLS video component, log viewer, and scrape management UI - Dockerfile updated for Python/pywidevine, docker-compose volume for CDM Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
316 lines
11 KiB
JavaScript
316 lines
11 KiB
JavaScript
import { Router } from 'express';
|
|
import fetch from 'node-fetch';
|
|
import { mkdirSync, createWriteStream, statSync } from 'fs';
|
|
import { pipeline } from 'stream/promises';
|
|
import { extname } from 'path';
|
|
import { getAuthConfig, isMediaDownloaded, recordDownload, getDownloadStats, saveCursor, getCursor, clearCursor, upsertMediaFile } from './db.js';
|
|
import { createSignedHeaders, getRules } from './signing.js';
|
|
import { downloadDrmMedia, hasCDM } from './drm-download.js';
|
|
|
|
const router = Router();
|
|
const OF_BASE = 'https://onlyfans.com';
|
|
const MEDIA_PATH = process.env.MEDIA_PATH || './data/media';
|
|
const DOWNLOAD_DELAY = parseInt(process.env.DOWNLOAD_DELAY || '1000', 10);
|
|
|
|
// In-memory progress: userId -> { total, completed, errors, running }
|
|
const progressMap = new Map();
|
|
|
|
function buildHeaders(authConfig, signedHeaders) {
|
|
const rules = getRules();
|
|
const headers = {
|
|
'User-Agent': authConfig.user_agent || 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:148.0) Gecko/20100101 Firefox/148.0',
|
|
'Accept': 'application/json, text/plain, */*',
|
|
'Cookie': authConfig.cookie,
|
|
'user-id': authConfig.user_id,
|
|
'x-bc': authConfig.x_bc,
|
|
'x-of-rev': authConfig.x_of_rev,
|
|
'app-token': rules.app_token,
|
|
...signedHeaders,
|
|
};
|
|
if (rules.remove_headers) {
|
|
for (const h of rules.remove_headers) {
|
|
delete headers[h];
|
|
}
|
|
}
|
|
return headers;
|
|
}
|
|
|
|
async function fetchOF(ofPath, authConfig) {
|
|
const signedHeaders = createSignedHeaders(ofPath, authConfig.user_id);
|
|
const headers = buildHeaders(authConfig, signedHeaders);
|
|
const res = await fetch(`${OF_BASE}${ofPath}`, { headers });
|
|
return res.json();
|
|
}
|
|
|
|
function getMediaUrl(media) {
|
|
if (media.source?.source) return media.source.source;
|
|
if (media.files?.full?.url) return media.files.full.url;
|
|
if (media.files?.preview?.url) return media.files.preview.url;
|
|
return null;
|
|
}
|
|
|
|
function getExtFromUrl(url) {
|
|
try {
|
|
const pathname = new URL(url).pathname;
|
|
const ext = extname(pathname).split('?')[0];
|
|
return ext || '.bin';
|
|
} catch {
|
|
return '.bin';
|
|
}
|
|
}
|
|
|
|
function getYearMonth(dateStr) {
|
|
if (!dateStr) return 'unknown';
|
|
try {
|
|
const d = new Date(dateStr);
|
|
if (isNaN(d.getTime())) return 'unknown';
|
|
const y = d.getFullYear();
|
|
const m = String(d.getMonth() + 1).padStart(2, '0');
|
|
return `${y}-${m}`;
|
|
} catch {
|
|
return 'unknown';
|
|
}
|
|
}
|
|
|
|
function sleep(ms) {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
async function downloadFile(url, dest) {
|
|
const res = await fetch(url);
|
|
if (!res.ok) throw new Error(`Download failed: ${res.status}`);
|
|
await pipeline(res.body, createWriteStream(dest));
|
|
}
|
|
|
|
async function runDownload(userId, authConfig, postLimit, resume, username) {
|
|
const progress = { total: 0, completed: 0, errors: 0, running: true };
|
|
progressMap.set(String(userId), progress);
|
|
|
|
try {
|
|
let beforePublishTime = null;
|
|
let hasMore = true;
|
|
const allMedia = [];
|
|
let postsFetched = 0;
|
|
let priorPostsDownloaded = 0;
|
|
|
|
if (resume) {
|
|
const saved = getCursor(String(userId));
|
|
if (saved) {
|
|
beforePublishTime = saved.cursor;
|
|
priorPostsDownloaded = saved.posts_downloaded || 0;
|
|
}
|
|
}
|
|
|
|
// Phase 1: Paginate media items directly via /posts/medias
|
|
while (hasMore) {
|
|
const batchSize = postLimit ? Math.min(10, postLimit - postsFetched) : 10;
|
|
if (batchSize <= 0) break;
|
|
|
|
let ofPath = `/api2/v2/users/${userId}/posts/medias?limit=${batchSize}&order=publish_date_desc&skip_users=all&format=infinite&pinned=0`;
|
|
if (beforePublishTime) {
|
|
ofPath += `&beforePublishTime=${encodeURIComponent(beforePublishTime)}`;
|
|
}
|
|
|
|
const data = await fetchOF(ofPath, authConfig);
|
|
const rawList = Array.isArray(data) ? data : (data.list || []);
|
|
postsFetched += rawList.length;
|
|
|
|
// The /posts/medias endpoint returns post objects with nested media[].
|
|
// Flatten into individual media items.
|
|
for (const item of rawList) {
|
|
const postDate = item.postedAt || item.createdAt || item.publishedAt || null;
|
|
const postId = item.id;
|
|
|
|
if (Array.isArray(item.media) && item.media.length > 0) {
|
|
for (const m of item.media) {
|
|
allMedia.push({ postId, media: m, postDate });
|
|
}
|
|
} else {
|
|
// Fallback: treat the item itself as a media object
|
|
const pid = item.postId || item.post_id || item.id;
|
|
allMedia.push({ postId: pid, media: item, postDate });
|
|
}
|
|
}
|
|
|
|
hasMore = Array.isArray(data) ? data.length === batchSize : !!data.hasMore;
|
|
if (!Array.isArray(data)) {
|
|
beforePublishTime = data.tailMarker || null;
|
|
} else if (rawList.length > 0) {
|
|
// For flat array responses, use the last item's date as cursor
|
|
const last = rawList[rawList.length - 1];
|
|
beforePublishTime = last.postedAt || last.createdAt || null;
|
|
}
|
|
|
|
// Stop if we've hit the limit
|
|
if (postLimit && postsFetched >= postLimit) break;
|
|
|
|
if (hasMore) await sleep(DOWNLOAD_DELAY);
|
|
}
|
|
|
|
// Save cursor for future "continue" downloads
|
|
if (postLimit && beforePublishTime && hasMore) {
|
|
saveCursor(String(userId), beforePublishTime, priorPostsDownloaded + postsFetched);
|
|
} else {
|
|
// Downloaded all media or reached the end — clear cursor
|
|
clearCursor(String(userId));
|
|
}
|
|
|
|
progress.total = allMedia.length;
|
|
|
|
// Phase 2: Download each media item
|
|
for (const { postId, media, postDate } of allMedia) {
|
|
try {
|
|
const mediaId = String(media.id);
|
|
|
|
if (isMediaDownloaded(mediaId)) {
|
|
progress.completed++;
|
|
continue;
|
|
}
|
|
|
|
if (media.canView === false) {
|
|
progress.completed++;
|
|
continue;
|
|
}
|
|
|
|
// Check for DRM-protected video
|
|
const drm = media.files?.drm;
|
|
if (drm?.manifest?.dash && drm?.signature?.dash) {
|
|
if (!hasCDM()) {
|
|
console.log(`[download] Skipping DRM media ${mediaId} (no WVD file configured)`);
|
|
progress.completed++;
|
|
continue;
|
|
}
|
|
try {
|
|
const sig = drm.signature.dash;
|
|
const cfCookies = {
|
|
cp: sig['CloudFront-Policy'],
|
|
cs: sig['CloudFront-Signature'],
|
|
ck: sig['CloudFront-Key-Pair-Id'],
|
|
};
|
|
const drmFilename = `${postId}_${mediaId}_video.mp4`;
|
|
const userDir = `${MEDIA_PATH}/${username || userId}`;
|
|
await downloadDrmMedia({
|
|
mpdUrl: drm.manifest.dash,
|
|
cfCookies,
|
|
mediaId,
|
|
entityType: 'post',
|
|
entityId: String(postId),
|
|
outputDir: userDir,
|
|
outputFilename: drmFilename,
|
|
});
|
|
recordDownload(userId, String(postId), mediaId, 'video', drmFilename, postDate);
|
|
try {
|
|
const st = statSync(`${userDir}/${drmFilename}`);
|
|
upsertMediaFile(username || String(userId), drmFilename, 'video', st.size, st.mtimeMs, postDate);
|
|
} catch { /* stat may fail if file was cleaned up */ }
|
|
progress.completed++;
|
|
} catch (err) {
|
|
console.error(`[download] DRM download failed for media ${mediaId}:`, err.message);
|
|
progress.errors++;
|
|
progress.completed++;
|
|
}
|
|
await sleep(DOWNLOAD_DELAY);
|
|
continue;
|
|
}
|
|
|
|
const url = getMediaUrl(media);
|
|
if (!url) {
|
|
console.log(`[download] Skipping media ${mediaId} (no URL)`);
|
|
progress.completed++;
|
|
continue;
|
|
}
|
|
|
|
const mediaType = media.type || 'unknown';
|
|
const ext = getExtFromUrl(url);
|
|
const filename = `${postId}_${mediaId}_${mediaType}${ext}`;
|
|
const userDir = `${MEDIA_PATH}/${username || userId}`;
|
|
mkdirSync(userDir, { recursive: true });
|
|
const dest = `${userDir}/${filename}`;
|
|
|
|
await downloadFile(url, dest);
|
|
recordDownload(userId, String(postId), mediaId, mediaType, filename, postDate);
|
|
try {
|
|
const st = statSync(dest);
|
|
const indexType = /^(photo|image)$/i.test(mediaType) ? 'image' : /^(video|gif)$/i.test(mediaType) ? 'video' : null;
|
|
if (indexType) upsertMediaFile(username || String(userId), filename, indexType, st.size, st.mtimeMs, postDate);
|
|
} catch { /* ignore */ }
|
|
progress.completed++;
|
|
} catch (err) {
|
|
console.error(`[download] Error downloading media ${media.id}:`, err.message);
|
|
progress.errors++;
|
|
progress.completed++;
|
|
}
|
|
|
|
await sleep(DOWNLOAD_DELAY);
|
|
}
|
|
} catch (err) {
|
|
console.error(`[download] Fatal error for user ${userId}:`, err.message);
|
|
progress.errors++;
|
|
} finally {
|
|
progress.running = false;
|
|
}
|
|
}
|
|
|
|
// POST /api/download/:userId — start background download
|
|
router.post('/api/download/:userId', (req, res, next) => {
|
|
try {
|
|
const authConfig = getAuthConfig();
|
|
if (!authConfig) return res.status(401).json({ error: 'No auth config' });
|
|
|
|
const { userId } = req.params;
|
|
const postLimit = req.body.limit ? parseInt(req.body.limit, 10) : null;
|
|
const resume = !!req.body.resume;
|
|
const username = req.body.username || null;
|
|
|
|
const existing = progressMap.get(String(userId));
|
|
if (existing?.running) {
|
|
return res.json({ status: 'already_running', userId, progress: existing });
|
|
}
|
|
|
|
runDownload(userId, authConfig, postLimit, resume, username).catch((err) =>
|
|
console.error(`[download] Unhandled error for user ${userId}:`, err.message)
|
|
);
|
|
|
|
res.json({ status: 'started', userId });
|
|
} catch (err) {
|
|
next(err);
|
|
}
|
|
});
|
|
|
|
// GET /api/download/:userId/status
|
|
router.get('/api/download/:userId/status', (req, res) => {
|
|
const progress = progressMap.get(String(req.params.userId));
|
|
if (!progress) return res.json({ status: 'not_started' });
|
|
res.json({ status: progress.running ? 'running' : 'completed', ...progress });
|
|
});
|
|
|
|
// GET /api/download/:userId/cursor
|
|
router.get('/api/download/:userId/cursor', (req, res) => {
|
|
const cursor = getCursor(String(req.params.userId));
|
|
if (!cursor) return res.json({ hasCursor: false });
|
|
res.json({ hasCursor: true, postsDownloaded: cursor.posts_downloaded });
|
|
});
|
|
|
|
// GET /api/download/active — list all running downloads
|
|
router.get('/api/download/active', (req, res) => {
|
|
const active = [];
|
|
for (const [userId, progress] of progressMap.entries()) {
|
|
if (progress.running) {
|
|
active.push({ user_id: userId, ...progress });
|
|
}
|
|
}
|
|
res.json(active);
|
|
});
|
|
|
|
// GET /api/download/history
|
|
router.get('/api/download/history', (req, res, next) => {
|
|
try {
|
|
const stats = getDownloadStats();
|
|
res.json(stats);
|
|
} catch (err) {
|
|
next(err);
|
|
}
|
|
});
|
|
|
|
export default router;
|