442 lines
14 KiB
Python
442 lines
14 KiB
Python
import os
|
|
import shutil
|
|
import uuid
|
|
from pathlib import Path
|
|
from typing import Optional, List
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
from fastapi.responses import FileResponse
|
|
from sqlalchemy.orm import Session
|
|
from sqlalchemy import func
|
|
from PIL import Image as PILImage
|
|
|
|
from app.database import get_db
|
|
from app.models import Image, Species
|
|
from app.schemas.image import ImageResponse, ImageListResponse
|
|
from app.config import get_settings
|
|
|
|
router = APIRouter()
|
|
settings = get_settings()
|
|
|
|
|
|
@router.get("", response_model=ImageListResponse)
|
|
def list_images(
|
|
page: int = Query(1, ge=1),
|
|
page_size: int = Query(50, ge=1, le=200),
|
|
species_id: Optional[int] = None,
|
|
source: Optional[str] = None,
|
|
license: Optional[str] = None,
|
|
status: Optional[str] = None,
|
|
min_quality: Optional[float] = None,
|
|
search: Optional[str] = None,
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""List images with pagination and filters."""
|
|
# Use joinedload to fetch species in single query
|
|
from sqlalchemy.orm import joinedload
|
|
query = db.query(Image).options(joinedload(Image.species))
|
|
|
|
if species_id:
|
|
query = query.filter(Image.species_id == species_id)
|
|
|
|
if source:
|
|
query = query.filter(Image.source == source)
|
|
|
|
if license:
|
|
query = query.filter(Image.license == license)
|
|
|
|
if status:
|
|
query = query.filter(Image.status == status)
|
|
|
|
if min_quality:
|
|
query = query.filter(Image.quality_score >= min_quality)
|
|
|
|
if search:
|
|
search_term = f"%{search}%"
|
|
query = query.join(Species).filter(
|
|
(Species.scientific_name.ilike(search_term)) |
|
|
(Species.common_name.ilike(search_term))
|
|
)
|
|
|
|
# Use faster count for simple queries
|
|
if not search:
|
|
# Build count query without join for better performance
|
|
count_query = db.query(func.count(Image.id))
|
|
if species_id:
|
|
count_query = count_query.filter(Image.species_id == species_id)
|
|
if source:
|
|
count_query = count_query.filter(Image.source == source)
|
|
if license:
|
|
count_query = count_query.filter(Image.license == license)
|
|
if status:
|
|
count_query = count_query.filter(Image.status == status)
|
|
if min_quality:
|
|
count_query = count_query.filter(Image.quality_score >= min_quality)
|
|
total = count_query.scalar()
|
|
else:
|
|
total = query.count()
|
|
|
|
pages = (total + page_size - 1) // page_size
|
|
|
|
images = query.order_by(Image.created_at.desc()).offset(
|
|
(page - 1) * page_size
|
|
).limit(page_size).all()
|
|
|
|
items = [
|
|
ImageResponse(
|
|
id=img.id,
|
|
species_id=img.species_id,
|
|
species_name=img.species.scientific_name if img.species else None,
|
|
source=img.source,
|
|
source_id=img.source_id,
|
|
url=img.url,
|
|
local_path=img.local_path,
|
|
license=img.license,
|
|
attribution=img.attribution,
|
|
width=img.width,
|
|
height=img.height,
|
|
quality_score=img.quality_score,
|
|
status=img.status,
|
|
created_at=img.created_at,
|
|
)
|
|
for img in images
|
|
]
|
|
|
|
return ImageListResponse(
|
|
items=items,
|
|
total=total,
|
|
page=page,
|
|
page_size=page_size,
|
|
pages=pages,
|
|
)
|
|
|
|
|
|
@router.get("/sources")
|
|
def list_sources(db: Session = Depends(get_db)):
|
|
"""List all unique image sources."""
|
|
sources = db.query(Image.source).distinct().all()
|
|
return [s[0] for s in sources]
|
|
|
|
|
|
@router.get("/licenses")
|
|
def list_licenses(db: Session = Depends(get_db)):
|
|
"""List all unique licenses."""
|
|
licenses = db.query(Image.license).distinct().all()
|
|
return [l[0] for l in licenses]
|
|
|
|
|
|
@router.post("/process-pending")
|
|
def process_pending_images(
|
|
source: Optional[str] = None,
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""Queue all pending images for download and processing."""
|
|
from app.workers.quality_tasks import batch_process_pending_images
|
|
|
|
query = db.query(func.count(Image.id)).filter(Image.status == "pending")
|
|
if source:
|
|
query = query.filter(Image.source == source)
|
|
pending_count = query.scalar()
|
|
|
|
task = batch_process_pending_images.delay(source=source)
|
|
|
|
return {
|
|
"pending_count": pending_count,
|
|
"task_id": task.id,
|
|
}
|
|
|
|
|
|
@router.get("/process-pending/status/{task_id}")
|
|
def process_pending_status(task_id: str):
|
|
"""Check status of a batch processing task."""
|
|
from app.workers.celery_app import celery_app
|
|
|
|
result = celery_app.AsyncResult(task_id)
|
|
state = result.state # PENDING, STARTED, PROGRESS, SUCCESS, FAILURE
|
|
|
|
response = {"task_id": task_id, "state": state}
|
|
|
|
if state == "PROGRESS" and isinstance(result.info, dict):
|
|
response["queued"] = result.info.get("queued", 0)
|
|
response["total"] = result.info.get("total", 0)
|
|
elif state == "SUCCESS" and isinstance(result.result, dict):
|
|
response["queued"] = result.result.get("queued", 0)
|
|
response["total"] = result.result.get("total", 0)
|
|
|
|
return response
|
|
|
|
|
|
@router.get("/{image_id}", response_model=ImageResponse)
|
|
def get_image(image_id: int, db: Session = Depends(get_db)):
|
|
"""Get an image by ID."""
|
|
image = db.query(Image).filter(Image.id == image_id).first()
|
|
if not image:
|
|
raise HTTPException(status_code=404, detail="Image not found")
|
|
|
|
return ImageResponse(
|
|
id=image.id,
|
|
species_id=image.species_id,
|
|
species_name=image.species.scientific_name if image.species else None,
|
|
source=image.source,
|
|
source_id=image.source_id,
|
|
url=image.url,
|
|
local_path=image.local_path,
|
|
license=image.license,
|
|
attribution=image.attribution,
|
|
width=image.width,
|
|
height=image.height,
|
|
quality_score=image.quality_score,
|
|
status=image.status,
|
|
created_at=image.created_at,
|
|
)
|
|
|
|
|
|
@router.get("/{image_id}/file")
|
|
def get_image_file(image_id: int, db: Session = Depends(get_db)):
|
|
"""Get the actual image file."""
|
|
image = db.query(Image).filter(Image.id == image_id).first()
|
|
if not image:
|
|
raise HTTPException(status_code=404, detail="Image not found")
|
|
|
|
if not image.local_path:
|
|
raise HTTPException(status_code=404, detail="Image file not available")
|
|
|
|
return FileResponse(image.local_path, media_type="image/jpeg")
|
|
|
|
|
|
@router.delete("/{image_id}")
|
|
def delete_image(image_id: int, db: Session = Depends(get_db)):
|
|
"""Delete an image."""
|
|
image = db.query(Image).filter(Image.id == image_id).first()
|
|
if not image:
|
|
raise HTTPException(status_code=404, detail="Image not found")
|
|
|
|
# Delete file if exists
|
|
if image.local_path:
|
|
import os
|
|
if os.path.exists(image.local_path):
|
|
os.remove(image.local_path)
|
|
|
|
db.delete(image)
|
|
db.commit()
|
|
|
|
return {"status": "deleted"}
|
|
|
|
|
|
@router.post("/bulk-delete")
|
|
def bulk_delete_images(
|
|
image_ids: List[int],
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""Delete multiple images."""
|
|
import os
|
|
|
|
images = db.query(Image).filter(Image.id.in_(image_ids)).all()
|
|
|
|
deleted = 0
|
|
for image in images:
|
|
if image.local_path and os.path.exists(image.local_path):
|
|
os.remove(image.local_path)
|
|
db.delete(image)
|
|
deleted += 1
|
|
|
|
db.commit()
|
|
|
|
return {"deleted": deleted}
|
|
|
|
|
|
@router.get("/import/scan")
|
|
def scan_imports(db: Session = Depends(get_db)):
|
|
"""Scan the imports folder and return what can be imported.
|
|
|
|
Expected structure: imports/{source}/{species_name}/*.jpg
|
|
"""
|
|
imports_path = Path(settings.imports_path)
|
|
|
|
if not imports_path.exists():
|
|
return {
|
|
"available": False,
|
|
"message": f"Imports folder not found: {imports_path}",
|
|
"sources": [],
|
|
"total_images": 0,
|
|
"matched_species": 0,
|
|
"unmatched_species": [],
|
|
}
|
|
|
|
results = {
|
|
"available": True,
|
|
"sources": [],
|
|
"total_images": 0,
|
|
"matched_species": 0,
|
|
"unmatched_species": [],
|
|
}
|
|
|
|
# Get all species for matching
|
|
species_map = {}
|
|
for species in db.query(Species).all():
|
|
# Map by scientific name with underscores and spaces
|
|
species_map[species.scientific_name.lower()] = species
|
|
species_map[species.scientific_name.replace(" ", "_").lower()] = species
|
|
|
|
seen_unmatched = set()
|
|
|
|
# Scan source folders
|
|
for source_dir in imports_path.iterdir():
|
|
if not source_dir.is_dir():
|
|
continue
|
|
|
|
source_name = source_dir.name
|
|
source_info = {
|
|
"name": source_name,
|
|
"species_count": 0,
|
|
"image_count": 0,
|
|
}
|
|
|
|
# Scan species folders within source
|
|
for species_dir in source_dir.iterdir():
|
|
if not species_dir.is_dir():
|
|
continue
|
|
|
|
species_name = species_dir.name.replace("_", " ")
|
|
species_key = species_name.lower()
|
|
|
|
# Count images
|
|
image_files = list(species_dir.glob("*.jpg")) + \
|
|
list(species_dir.glob("*.jpeg")) + \
|
|
list(species_dir.glob("*.png"))
|
|
|
|
if not image_files:
|
|
continue
|
|
|
|
source_info["image_count"] += len(image_files)
|
|
results["total_images"] += len(image_files)
|
|
|
|
if species_key in species_map or species_dir.name.lower() in species_map:
|
|
source_info["species_count"] += 1
|
|
results["matched_species"] += 1
|
|
else:
|
|
if species_name not in seen_unmatched:
|
|
seen_unmatched.add(species_name)
|
|
results["unmatched_species"].append(species_name)
|
|
|
|
if source_info["image_count"] > 0:
|
|
results["sources"].append(source_info)
|
|
|
|
return results
|
|
|
|
|
|
@router.post("/import/run")
|
|
def run_import(
|
|
move_files: bool = Query(False, description="Move files instead of copy"),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""Import images from the imports folder.
|
|
|
|
Expected structure: imports/{source}/{species_name}/*.jpg
|
|
Images are copied/moved to: images/{species_name}/{source}_{filename}
|
|
"""
|
|
imports_path = Path(settings.imports_path)
|
|
images_path = Path(settings.images_path)
|
|
|
|
if not imports_path.exists():
|
|
raise HTTPException(status_code=400, detail="Imports folder not found")
|
|
|
|
# Get all species for matching
|
|
species_map = {}
|
|
for species in db.query(Species).all():
|
|
species_map[species.scientific_name.lower()] = species
|
|
species_map[species.scientific_name.replace(" ", "_").lower()] = species
|
|
|
|
imported = 0
|
|
skipped = 0
|
|
errors = []
|
|
|
|
# Scan source folders
|
|
for source_dir in imports_path.iterdir():
|
|
if not source_dir.is_dir():
|
|
continue
|
|
|
|
source_name = source_dir.name
|
|
|
|
# Scan species folders within source
|
|
for species_dir in source_dir.iterdir():
|
|
if not species_dir.is_dir():
|
|
continue
|
|
|
|
species_name = species_dir.name.replace("_", " ")
|
|
species_key = species_name.lower()
|
|
|
|
# Find matching species
|
|
species = species_map.get(species_key) or species_map.get(species_dir.name.lower())
|
|
if not species:
|
|
continue
|
|
|
|
# Create target directory
|
|
target_dir = images_path / species.scientific_name.replace(" ", "_")
|
|
target_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Process images
|
|
image_files = list(species_dir.glob("*.jpg")) + \
|
|
list(species_dir.glob("*.jpeg")) + \
|
|
list(species_dir.glob("*.png"))
|
|
|
|
for img_file in image_files:
|
|
try:
|
|
# Generate unique filename
|
|
ext = img_file.suffix.lower()
|
|
if ext == ".jpeg":
|
|
ext = ".jpg"
|
|
new_filename = f"{source_name}_{img_file.stem}_{uuid.uuid4().hex[:8]}{ext}"
|
|
target_path = target_dir / new_filename
|
|
|
|
# Check if already imported (by original filename pattern)
|
|
existing = db.query(Image).filter(
|
|
Image.species_id == species.id,
|
|
Image.source == source_name,
|
|
Image.source_id == img_file.stem,
|
|
).first()
|
|
|
|
if existing:
|
|
skipped += 1
|
|
continue
|
|
|
|
# Get image dimensions
|
|
try:
|
|
with PILImage.open(img_file) as pil_img:
|
|
width, height = pil_img.size
|
|
except Exception:
|
|
width, height = None, None
|
|
|
|
# Copy or move file
|
|
if move_files:
|
|
shutil.move(str(img_file), str(target_path))
|
|
else:
|
|
shutil.copy2(str(img_file), str(target_path))
|
|
|
|
# Create database record
|
|
image = Image(
|
|
species_id=species.id,
|
|
source=source_name,
|
|
source_id=img_file.stem,
|
|
url=f"file://{img_file}",
|
|
local_path=str(target_path),
|
|
license="unknown",
|
|
width=width,
|
|
height=height,
|
|
status="downloaded",
|
|
)
|
|
db.add(image)
|
|
imported += 1
|
|
|
|
except Exception as e:
|
|
errors.append(f"{img_file}: {str(e)}")
|
|
|
|
# Commit after each species to avoid large transactions
|
|
db.commit()
|
|
|
|
return {
|
|
"imported": imported,
|
|
"skipped": skipped,
|
|
"errors": errors[:20],
|
|
}
|