Initial commit — PlantGuideScraper project
This commit is contained in:
173
backend/app/api/jobs.py
Normal file
173
backend/app/api/jobs.py
Normal file
@@ -0,0 +1,173 @@
|
||||
import json
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.database import get_db
|
||||
from app.models import Job
|
||||
from app.schemas.job import JobCreate, JobResponse, JobListResponse
|
||||
from app.workers.scrape_tasks import run_scrape_job
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("", response_model=JobListResponse)
|
||||
def list_jobs(
|
||||
status: Optional[str] = None,
|
||||
source: Optional[str] = None,
|
||||
limit: int = Query(50, ge=1, le=200),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""List all jobs."""
|
||||
query = db.query(Job)
|
||||
|
||||
if status:
|
||||
query = query.filter(Job.status == status)
|
||||
|
||||
if source:
|
||||
query = query.filter(Job.source == source)
|
||||
|
||||
total = query.count()
|
||||
jobs = query.order_by(Job.created_at.desc()).limit(limit).all()
|
||||
|
||||
return JobListResponse(
|
||||
items=[JobResponse.model_validate(j) for j in jobs],
|
||||
total=total,
|
||||
)
|
||||
|
||||
|
||||
@router.post("", response_model=JobResponse)
|
||||
def create_job(job: JobCreate, db: Session = Depends(get_db)):
|
||||
"""Create and start a new scrape job."""
|
||||
species_filter = None
|
||||
if job.species_ids:
|
||||
species_filter = json.dumps(job.species_ids)
|
||||
|
||||
db_job = Job(
|
||||
name=job.name,
|
||||
source=job.source,
|
||||
species_filter=species_filter,
|
||||
only_without_images=job.only_without_images,
|
||||
max_images=job.max_images,
|
||||
status="pending",
|
||||
)
|
||||
db.add(db_job)
|
||||
db.commit()
|
||||
db.refresh(db_job)
|
||||
|
||||
# Start the Celery task
|
||||
task = run_scrape_job.delay(db_job.id)
|
||||
db_job.celery_task_id = task.id
|
||||
db.commit()
|
||||
|
||||
return JobResponse.model_validate(db_job)
|
||||
|
||||
|
||||
@router.get("/{job_id}", response_model=JobResponse)
|
||||
def get_job(job_id: int, db: Session = Depends(get_db)):
|
||||
"""Get job status."""
|
||||
job = db.query(Job).filter(Job.id == job_id).first()
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
return JobResponse.model_validate(job)
|
||||
|
||||
|
||||
@router.get("/{job_id}/progress")
|
||||
def get_job_progress(job_id: int, db: Session = Depends(get_db)):
|
||||
"""Get real-time job progress from Celery."""
|
||||
from app.workers.celery_app import celery_app
|
||||
|
||||
job = db.query(Job).filter(Job.id == job_id).first()
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
if not job.celery_task_id:
|
||||
return {
|
||||
"status": job.status,
|
||||
"progress_current": job.progress_current,
|
||||
"progress_total": job.progress_total,
|
||||
}
|
||||
|
||||
# Get Celery task state
|
||||
result = celery_app.AsyncResult(job.celery_task_id)
|
||||
|
||||
if result.state == "PROGRESS":
|
||||
meta = result.info
|
||||
return {
|
||||
"status": "running",
|
||||
"progress_current": meta.get("current", 0),
|
||||
"progress_total": meta.get("total", 0),
|
||||
"current_species": meta.get("species", ""),
|
||||
}
|
||||
|
||||
return {
|
||||
"status": job.status,
|
||||
"progress_current": job.progress_current,
|
||||
"progress_total": job.progress_total,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/{job_id}/pause")
|
||||
def pause_job(job_id: int, db: Session = Depends(get_db)):
|
||||
"""Pause a running job."""
|
||||
from app.workers.celery_app import celery_app
|
||||
|
||||
job = db.query(Job).filter(Job.id == job_id).first()
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
if job.status != "running":
|
||||
raise HTTPException(status_code=400, detail="Job is not running")
|
||||
|
||||
# Revoke Celery task
|
||||
if job.celery_task_id:
|
||||
celery_app.control.revoke(job.celery_task_id, terminate=True)
|
||||
|
||||
job.status = "paused"
|
||||
db.commit()
|
||||
|
||||
return {"status": "paused"}
|
||||
|
||||
|
||||
@router.post("/{job_id}/resume")
|
||||
def resume_job(job_id: int, db: Session = Depends(get_db)):
|
||||
"""Resume a paused job."""
|
||||
job = db.query(Job).filter(Job.id == job_id).first()
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
if job.status != "paused":
|
||||
raise HTTPException(status_code=400, detail="Job is not paused")
|
||||
|
||||
# Start new Celery task
|
||||
task = run_scrape_job.delay(job.id)
|
||||
job.celery_task_id = task.id
|
||||
job.status = "pending"
|
||||
db.commit()
|
||||
|
||||
return {"status": "resumed"}
|
||||
|
||||
|
||||
@router.post("/{job_id}/cancel")
|
||||
def cancel_job(job_id: int, db: Session = Depends(get_db)):
|
||||
"""Cancel a job."""
|
||||
from app.workers.celery_app import celery_app
|
||||
|
||||
job = db.query(Job).filter(Job.id == job_id).first()
|
||||
if not job:
|
||||
raise HTTPException(status_code=404, detail="Job not found")
|
||||
|
||||
if job.status in ["completed", "failed"]:
|
||||
raise HTTPException(status_code=400, detail="Job already finished")
|
||||
|
||||
# Revoke Celery task
|
||||
if job.celery_task_id:
|
||||
celery_app.control.revoke(job.celery_task_id, terminate=True)
|
||||
|
||||
job.status = "failed"
|
||||
job.error_message = "Cancelled by user"
|
||||
db.commit()
|
||||
|
||||
return {"status": "cancelled"}
|
||||
Reference in New Issue
Block a user