Initial commit — PlantGuideScraper project
This commit is contained in:
441
backend/app/api/images.py
Normal file
441
backend/app/api/images.py
Normal file
@@ -0,0 +1,441 @@
|
||||
import os
|
||||
import shutil
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Optional, List
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func
|
||||
from PIL import Image as PILImage
|
||||
|
||||
from app.database import get_db
|
||||
from app.models import Image, Species
|
||||
from app.schemas.image import ImageResponse, ImageListResponse
|
||||
from app.config import get_settings
|
||||
|
||||
router = APIRouter()
|
||||
settings = get_settings()
|
||||
|
||||
|
||||
@router.get("", response_model=ImageListResponse)
|
||||
def list_images(
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(50, ge=1, le=200),
|
||||
species_id: Optional[int] = None,
|
||||
source: Optional[str] = None,
|
||||
license: Optional[str] = None,
|
||||
status: Optional[str] = None,
|
||||
min_quality: Optional[float] = None,
|
||||
search: Optional[str] = None,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""List images with pagination and filters."""
|
||||
# Use joinedload to fetch species in single query
|
||||
from sqlalchemy.orm import joinedload
|
||||
query = db.query(Image).options(joinedload(Image.species))
|
||||
|
||||
if species_id:
|
||||
query = query.filter(Image.species_id == species_id)
|
||||
|
||||
if source:
|
||||
query = query.filter(Image.source == source)
|
||||
|
||||
if license:
|
||||
query = query.filter(Image.license == license)
|
||||
|
||||
if status:
|
||||
query = query.filter(Image.status == status)
|
||||
|
||||
if min_quality:
|
||||
query = query.filter(Image.quality_score >= min_quality)
|
||||
|
||||
if search:
|
||||
search_term = f"%{search}%"
|
||||
query = query.join(Species).filter(
|
||||
(Species.scientific_name.ilike(search_term)) |
|
||||
(Species.common_name.ilike(search_term))
|
||||
)
|
||||
|
||||
# Use faster count for simple queries
|
||||
if not search:
|
||||
# Build count query without join for better performance
|
||||
count_query = db.query(func.count(Image.id))
|
||||
if species_id:
|
||||
count_query = count_query.filter(Image.species_id == species_id)
|
||||
if source:
|
||||
count_query = count_query.filter(Image.source == source)
|
||||
if license:
|
||||
count_query = count_query.filter(Image.license == license)
|
||||
if status:
|
||||
count_query = count_query.filter(Image.status == status)
|
||||
if min_quality:
|
||||
count_query = count_query.filter(Image.quality_score >= min_quality)
|
||||
total = count_query.scalar()
|
||||
else:
|
||||
total = query.count()
|
||||
|
||||
pages = (total + page_size - 1) // page_size
|
||||
|
||||
images = query.order_by(Image.created_at.desc()).offset(
|
||||
(page - 1) * page_size
|
||||
).limit(page_size).all()
|
||||
|
||||
items = [
|
||||
ImageResponse(
|
||||
id=img.id,
|
||||
species_id=img.species_id,
|
||||
species_name=img.species.scientific_name if img.species else None,
|
||||
source=img.source,
|
||||
source_id=img.source_id,
|
||||
url=img.url,
|
||||
local_path=img.local_path,
|
||||
license=img.license,
|
||||
attribution=img.attribution,
|
||||
width=img.width,
|
||||
height=img.height,
|
||||
quality_score=img.quality_score,
|
||||
status=img.status,
|
||||
created_at=img.created_at,
|
||||
)
|
||||
for img in images
|
||||
]
|
||||
|
||||
return ImageListResponse(
|
||||
items=items,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
pages=pages,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/sources")
|
||||
def list_sources(db: Session = Depends(get_db)):
|
||||
"""List all unique image sources."""
|
||||
sources = db.query(Image.source).distinct().all()
|
||||
return [s[0] for s in sources]
|
||||
|
||||
|
||||
@router.get("/licenses")
|
||||
def list_licenses(db: Session = Depends(get_db)):
|
||||
"""List all unique licenses."""
|
||||
licenses = db.query(Image.license).distinct().all()
|
||||
return [l[0] for l in licenses]
|
||||
|
||||
|
||||
@router.post("/process-pending")
|
||||
def process_pending_images(
|
||||
source: Optional[str] = None,
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Queue all pending images for download and processing."""
|
||||
from app.workers.quality_tasks import batch_process_pending_images
|
||||
|
||||
query = db.query(func.count(Image.id)).filter(Image.status == "pending")
|
||||
if source:
|
||||
query = query.filter(Image.source == source)
|
||||
pending_count = query.scalar()
|
||||
|
||||
task = batch_process_pending_images.delay(source=source)
|
||||
|
||||
return {
|
||||
"pending_count": pending_count,
|
||||
"task_id": task.id,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/process-pending/status/{task_id}")
|
||||
def process_pending_status(task_id: str):
|
||||
"""Check status of a batch processing task."""
|
||||
from app.workers.celery_app import celery_app
|
||||
|
||||
result = celery_app.AsyncResult(task_id)
|
||||
state = result.state # PENDING, STARTED, PROGRESS, SUCCESS, FAILURE
|
||||
|
||||
response = {"task_id": task_id, "state": state}
|
||||
|
||||
if state == "PROGRESS" and isinstance(result.info, dict):
|
||||
response["queued"] = result.info.get("queued", 0)
|
||||
response["total"] = result.info.get("total", 0)
|
||||
elif state == "SUCCESS" and isinstance(result.result, dict):
|
||||
response["queued"] = result.result.get("queued", 0)
|
||||
response["total"] = result.result.get("total", 0)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
@router.get("/{image_id}", response_model=ImageResponse)
|
||||
def get_image(image_id: int, db: Session = Depends(get_db)):
|
||||
"""Get an image by ID."""
|
||||
image = db.query(Image).filter(Image.id == image_id).first()
|
||||
if not image:
|
||||
raise HTTPException(status_code=404, detail="Image not found")
|
||||
|
||||
return ImageResponse(
|
||||
id=image.id,
|
||||
species_id=image.species_id,
|
||||
species_name=image.species.scientific_name if image.species else None,
|
||||
source=image.source,
|
||||
source_id=image.source_id,
|
||||
url=image.url,
|
||||
local_path=image.local_path,
|
||||
license=image.license,
|
||||
attribution=image.attribution,
|
||||
width=image.width,
|
||||
height=image.height,
|
||||
quality_score=image.quality_score,
|
||||
status=image.status,
|
||||
created_at=image.created_at,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{image_id}/file")
|
||||
def get_image_file(image_id: int, db: Session = Depends(get_db)):
|
||||
"""Get the actual image file."""
|
||||
image = db.query(Image).filter(Image.id == image_id).first()
|
||||
if not image:
|
||||
raise HTTPException(status_code=404, detail="Image not found")
|
||||
|
||||
if not image.local_path:
|
||||
raise HTTPException(status_code=404, detail="Image file not available")
|
||||
|
||||
return FileResponse(image.local_path, media_type="image/jpeg")
|
||||
|
||||
|
||||
@router.delete("/{image_id}")
|
||||
def delete_image(image_id: int, db: Session = Depends(get_db)):
|
||||
"""Delete an image."""
|
||||
image = db.query(Image).filter(Image.id == image_id).first()
|
||||
if not image:
|
||||
raise HTTPException(status_code=404, detail="Image not found")
|
||||
|
||||
# Delete file if exists
|
||||
if image.local_path:
|
||||
import os
|
||||
if os.path.exists(image.local_path):
|
||||
os.remove(image.local_path)
|
||||
|
||||
db.delete(image)
|
||||
db.commit()
|
||||
|
||||
return {"status": "deleted"}
|
||||
|
||||
|
||||
@router.post("/bulk-delete")
|
||||
def bulk_delete_images(
|
||||
image_ids: List[int],
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Delete multiple images."""
|
||||
import os
|
||||
|
||||
images = db.query(Image).filter(Image.id.in_(image_ids)).all()
|
||||
|
||||
deleted = 0
|
||||
for image in images:
|
||||
if image.local_path and os.path.exists(image.local_path):
|
||||
os.remove(image.local_path)
|
||||
db.delete(image)
|
||||
deleted += 1
|
||||
|
||||
db.commit()
|
||||
|
||||
return {"deleted": deleted}
|
||||
|
||||
|
||||
@router.get("/import/scan")
|
||||
def scan_imports(db: Session = Depends(get_db)):
|
||||
"""Scan the imports folder and return what can be imported.
|
||||
|
||||
Expected structure: imports/{source}/{species_name}/*.jpg
|
||||
"""
|
||||
imports_path = Path(settings.imports_path)
|
||||
|
||||
if not imports_path.exists():
|
||||
return {
|
||||
"available": False,
|
||||
"message": f"Imports folder not found: {imports_path}",
|
||||
"sources": [],
|
||||
"total_images": 0,
|
||||
"matched_species": 0,
|
||||
"unmatched_species": [],
|
||||
}
|
||||
|
||||
results = {
|
||||
"available": True,
|
||||
"sources": [],
|
||||
"total_images": 0,
|
||||
"matched_species": 0,
|
||||
"unmatched_species": [],
|
||||
}
|
||||
|
||||
# Get all species for matching
|
||||
species_map = {}
|
||||
for species in db.query(Species).all():
|
||||
# Map by scientific name with underscores and spaces
|
||||
species_map[species.scientific_name.lower()] = species
|
||||
species_map[species.scientific_name.replace(" ", "_").lower()] = species
|
||||
|
||||
seen_unmatched = set()
|
||||
|
||||
# Scan source folders
|
||||
for source_dir in imports_path.iterdir():
|
||||
if not source_dir.is_dir():
|
||||
continue
|
||||
|
||||
source_name = source_dir.name
|
||||
source_info = {
|
||||
"name": source_name,
|
||||
"species_count": 0,
|
||||
"image_count": 0,
|
||||
}
|
||||
|
||||
# Scan species folders within source
|
||||
for species_dir in source_dir.iterdir():
|
||||
if not species_dir.is_dir():
|
||||
continue
|
||||
|
||||
species_name = species_dir.name.replace("_", " ")
|
||||
species_key = species_name.lower()
|
||||
|
||||
# Count images
|
||||
image_files = list(species_dir.glob("*.jpg")) + \
|
||||
list(species_dir.glob("*.jpeg")) + \
|
||||
list(species_dir.glob("*.png"))
|
||||
|
||||
if not image_files:
|
||||
continue
|
||||
|
||||
source_info["image_count"] += len(image_files)
|
||||
results["total_images"] += len(image_files)
|
||||
|
||||
if species_key in species_map or species_dir.name.lower() in species_map:
|
||||
source_info["species_count"] += 1
|
||||
results["matched_species"] += 1
|
||||
else:
|
||||
if species_name not in seen_unmatched:
|
||||
seen_unmatched.add(species_name)
|
||||
results["unmatched_species"].append(species_name)
|
||||
|
||||
if source_info["image_count"] > 0:
|
||||
results["sources"].append(source_info)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
@router.post("/import/run")
|
||||
def run_import(
|
||||
move_files: bool = Query(False, description="Move files instead of copy"),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""Import images from the imports folder.
|
||||
|
||||
Expected structure: imports/{source}/{species_name}/*.jpg
|
||||
Images are copied/moved to: images/{species_name}/{source}_{filename}
|
||||
"""
|
||||
imports_path = Path(settings.imports_path)
|
||||
images_path = Path(settings.images_path)
|
||||
|
||||
if not imports_path.exists():
|
||||
raise HTTPException(status_code=400, detail="Imports folder not found")
|
||||
|
||||
# Get all species for matching
|
||||
species_map = {}
|
||||
for species in db.query(Species).all():
|
||||
species_map[species.scientific_name.lower()] = species
|
||||
species_map[species.scientific_name.replace(" ", "_").lower()] = species
|
||||
|
||||
imported = 0
|
||||
skipped = 0
|
||||
errors = []
|
||||
|
||||
# Scan source folders
|
||||
for source_dir in imports_path.iterdir():
|
||||
if not source_dir.is_dir():
|
||||
continue
|
||||
|
||||
source_name = source_dir.name
|
||||
|
||||
# Scan species folders within source
|
||||
for species_dir in source_dir.iterdir():
|
||||
if not species_dir.is_dir():
|
||||
continue
|
||||
|
||||
species_name = species_dir.name.replace("_", " ")
|
||||
species_key = species_name.lower()
|
||||
|
||||
# Find matching species
|
||||
species = species_map.get(species_key) or species_map.get(species_dir.name.lower())
|
||||
if not species:
|
||||
continue
|
||||
|
||||
# Create target directory
|
||||
target_dir = images_path / species.scientific_name.replace(" ", "_")
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Process images
|
||||
image_files = list(species_dir.glob("*.jpg")) + \
|
||||
list(species_dir.glob("*.jpeg")) + \
|
||||
list(species_dir.glob("*.png"))
|
||||
|
||||
for img_file in image_files:
|
||||
try:
|
||||
# Generate unique filename
|
||||
ext = img_file.suffix.lower()
|
||||
if ext == ".jpeg":
|
||||
ext = ".jpg"
|
||||
new_filename = f"{source_name}_{img_file.stem}_{uuid.uuid4().hex[:8]}{ext}"
|
||||
target_path = target_dir / new_filename
|
||||
|
||||
# Check if already imported (by original filename pattern)
|
||||
existing = db.query(Image).filter(
|
||||
Image.species_id == species.id,
|
||||
Image.source == source_name,
|
||||
Image.source_id == img_file.stem,
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Get image dimensions
|
||||
try:
|
||||
with PILImage.open(img_file) as pil_img:
|
||||
width, height = pil_img.size
|
||||
except Exception:
|
||||
width, height = None, None
|
||||
|
||||
# Copy or move file
|
||||
if move_files:
|
||||
shutil.move(str(img_file), str(target_path))
|
||||
else:
|
||||
shutil.copy2(str(img_file), str(target_path))
|
||||
|
||||
# Create database record
|
||||
image = Image(
|
||||
species_id=species.id,
|
||||
source=source_name,
|
||||
source_id=img_file.stem,
|
||||
url=f"file://{img_file}",
|
||||
local_path=str(target_path),
|
||||
license="unknown",
|
||||
width=width,
|
||||
height=height,
|
||||
status="downloaded",
|
||||
)
|
||||
db.add(image)
|
||||
imported += 1
|
||||
|
||||
except Exception as e:
|
||||
errors.append(f"{img_file}: {str(e)}")
|
||||
|
||||
# Commit after each species to avoid large transactions
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"imported": imported,
|
||||
"skipped": skipped,
|
||||
"errors": errors[:20],
|
||||
}
|
||||
Reference in New Issue
Block a user