Initial commit — PlantGuideScraper project
This commit is contained in:
8
backend/app/models/__init__.py
Normal file
8
backend/app/models/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from app.models.species import Species
|
||||
from app.models.image import Image
|
||||
from app.models.job import Job
|
||||
from app.models.api_key import ApiKey
|
||||
from app.models.export import Export
|
||||
from app.models.cached_stats import CachedStats
|
||||
|
||||
__all__ = ["Species", "Image", "Job", "ApiKey", "Export", "CachedStats"]
|
||||
18
backend/app/models/api_key.py
Normal file
18
backend/app/models/api_key.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from sqlalchemy import Column, Integer, String, Float, Boolean
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class ApiKey(Base):
|
||||
__tablename__ = "api_keys"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
source = Column(String, unique=True, nullable=False) # 'flickr', 'inaturalist', 'wikimedia', 'trefle'
|
||||
api_key = Column(String, nullable=False) # Also used as Client ID for OAuth sources
|
||||
api_secret = Column(String, nullable=True) # Also used as Client Secret for OAuth sources
|
||||
access_token = Column(String, nullable=True) # For OAuth sources like Wikimedia
|
||||
rate_limit_per_sec = Column(Float, default=1.0)
|
||||
enabled = Column(Boolean, default=True)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ApiKey(id={self.id}, source='{self.source}', enabled={self.enabled})>"
|
||||
14
backend/app/models/cached_stats.py
Normal file
14
backend/app/models/cached_stats.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from datetime import datetime
|
||||
from sqlalchemy import Column, Integer, String, Text, DateTime
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class CachedStats(Base):
|
||||
"""Stores pre-calculated statistics updated by Celery beat."""
|
||||
__tablename__ = "cached_stats"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
key = Column(String(50), unique=True, nullable=False, index=True)
|
||||
value = Column(Text, nullable=False) # JSON-encoded stats
|
||||
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
24
backend/app/models/export.py
Normal file
24
backend/app/models/export.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from sqlalchemy import Column, Integer, String, Float, DateTime, Text, func
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class Export(Base):
|
||||
__tablename__ = "exports"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String, nullable=False)
|
||||
filter_criteria = Column(Text, nullable=True) # JSON: min_images, licenses, min_quality, species_ids
|
||||
train_split = Column(Float, default=0.8)
|
||||
status = Column(String, default="pending") # pending, generating, completed, failed
|
||||
file_path = Column(String, nullable=True)
|
||||
file_size = Column(Integer, nullable=True)
|
||||
species_count = Column(Integer, nullable=True)
|
||||
image_count = Column(Integer, nullable=True)
|
||||
celery_task_id = Column(String, nullable=True)
|
||||
created_at = Column(DateTime, server_default=func.now())
|
||||
completed_at = Column(DateTime, nullable=True)
|
||||
error_message = Column(Text, nullable=True)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Export(id={self.id}, name='{self.name}', status='{self.status}')>"
|
||||
36
backend/app/models/image.py
Normal file
36
backend/app/models/image.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from sqlalchemy import Column, Integer, String, Float, DateTime, ForeignKey, func, UniqueConstraint, Index
|
||||
from sqlalchemy.orm import relationship
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class Image(Base):
|
||||
__tablename__ = "images"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
species_id = Column(Integer, ForeignKey("species.id"), nullable=False, index=True)
|
||||
source = Column(String, nullable=False, index=True)
|
||||
source_id = Column(String, nullable=True)
|
||||
url = Column(String, nullable=False)
|
||||
local_path = Column(String, nullable=True)
|
||||
license = Column(String, nullable=False, index=True)
|
||||
attribution = Column(String, nullable=True)
|
||||
width = Column(Integer, nullable=True)
|
||||
height = Column(Integer, nullable=True)
|
||||
phash = Column(String, nullable=True, index=True)
|
||||
quality_score = Column(Float, nullable=True)
|
||||
status = Column(String, default="pending", index=True) # pending, downloaded, rejected, deleted
|
||||
created_at = Column(DateTime, server_default=func.now())
|
||||
|
||||
# Composite indexes for common query patterns
|
||||
__table_args__ = (
|
||||
UniqueConstraint("source", "source_id", name="uq_source_source_id"),
|
||||
Index("ix_images_species_status", "species_id", "status"), # For counting images per species by status
|
||||
Index("ix_images_status_created", "status", "created_at"), # For listing images by status
|
||||
)
|
||||
|
||||
# Relationships
|
||||
species = relationship("Species", back_populates="images")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Image(id={self.id}, source='{self.source}', status='{self.status}')>"
|
||||
27
backend/app/models/job.py
Normal file
27
backend/app/models/job.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Text, Boolean, func
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class Job(Base):
|
||||
__tablename__ = "jobs"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
name = Column(String, nullable=False)
|
||||
source = Column(String, nullable=False)
|
||||
species_filter = Column(Text, nullable=True) # JSON array of species IDs or NULL for all
|
||||
only_without_images = Column(Boolean, default=False) # If True, only scrape species with 0 images
|
||||
max_images = Column(Integer, nullable=True) # If set, only scrape species with fewer than N images
|
||||
status = Column(String, default="pending", index=True) # pending, running, paused, completed, failed
|
||||
progress_current = Column(Integer, default=0)
|
||||
progress_total = Column(Integer, default=0)
|
||||
images_downloaded = Column(Integer, default=0)
|
||||
images_rejected = Column(Integer, default=0)
|
||||
celery_task_id = Column(String, nullable=True)
|
||||
started_at = Column(DateTime, nullable=True)
|
||||
completed_at = Column(DateTime, nullable=True)
|
||||
error_message = Column(Text, nullable=True)
|
||||
created_at = Column(DateTime, server_default=func.now())
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Job(id={self.id}, name='{self.name}', status='{self.status}')>"
|
||||
21
backend/app/models/species.py
Normal file
21
backend/app/models/species.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from sqlalchemy import Column, Integer, String, DateTime, func
|
||||
from sqlalchemy.orm import relationship
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
class Species(Base):
|
||||
__tablename__ = "species"
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
scientific_name = Column(String, unique=True, nullable=False, index=True)
|
||||
common_name = Column(String, nullable=True)
|
||||
genus = Column(String, nullable=True, index=True)
|
||||
family = Column(String, nullable=True)
|
||||
created_at = Column(DateTime, server_default=func.now())
|
||||
|
||||
# Relationships
|
||||
images = relationship("Image", back_populates="species", cascade="all, delete-orphan")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Species(id={self.id}, scientific_name='{self.scientific_name}')>"
|
||||
Reference in New Issue
Block a user