Initial commit — PlantGuideScraper project

This commit is contained in:
Trey T
2026-04-12 09:54:27 -05:00
commit 6926f502c5
87 changed files with 29120 additions and 0 deletions

54
backend/alembic/env.py Normal file
View File

@@ -0,0 +1,54 @@
from logging.config import fileConfig
from sqlalchemy import engine_from_config
from sqlalchemy import pool
from alembic import context
# Import models for autogenerate
from app.database import Base
from app.models import Species, Image, Job, ApiKey, Export
config = context.config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
target_metadata = Base.metadata
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode."""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode."""
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection, target_metadata=target_metadata
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

View File

@@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@@ -0,0 +1,112 @@
"""Initial migration
Revision ID: 001
Revises:
Create Date: 2024-01-01
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
revision: str = '001'
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Species table
op.create_table(
'species',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('scientific_name', sa.String(), nullable=False, unique=True),
sa.Column('common_name', sa.String(), nullable=True),
sa.Column('genus', sa.String(), nullable=True),
sa.Column('family', sa.String(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
)
op.create_index('ix_species_scientific_name', 'species', ['scientific_name'])
op.create_index('ix_species_genus', 'species', ['genus'])
# API Keys table
op.create_table(
'api_keys',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('source', sa.String(), nullable=False, unique=True),
sa.Column('api_key', sa.String(), nullable=False),
sa.Column('api_secret', sa.String(), nullable=True),
sa.Column('rate_limit_per_sec', sa.Float(), default=1.0),
sa.Column('enabled', sa.Boolean(), default=True),
)
# Images table
op.create_table(
'images',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('species_id', sa.Integer(), sa.ForeignKey('species.id'), nullable=False),
sa.Column('source', sa.String(), nullable=False),
sa.Column('source_id', sa.String(), nullable=True),
sa.Column('url', sa.String(), nullable=False),
sa.Column('local_path', sa.String(), nullable=True),
sa.Column('license', sa.String(), nullable=False),
sa.Column('attribution', sa.String(), nullable=True),
sa.Column('width', sa.Integer(), nullable=True),
sa.Column('height', sa.Integer(), nullable=True),
sa.Column('phash', sa.String(), nullable=True),
sa.Column('quality_score', sa.Float(), nullable=True),
sa.Column('status', sa.String(), default='pending'),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
)
op.create_index('ix_images_species_id', 'images', ['species_id'])
op.create_index('ix_images_source', 'images', ['source'])
op.create_index('ix_images_status', 'images', ['status'])
op.create_index('ix_images_phash', 'images', ['phash'])
op.create_unique_constraint('uq_source_source_id', 'images', ['source', 'source_id'])
# Jobs table
op.create_table(
'jobs',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('name', sa.String(), nullable=False),
sa.Column('source', sa.String(), nullable=False),
sa.Column('species_filter', sa.Text(), nullable=True),
sa.Column('status', sa.String(), default='pending'),
sa.Column('progress_current', sa.Integer(), default=0),
sa.Column('progress_total', sa.Integer(), default=0),
sa.Column('images_downloaded', sa.Integer(), default=0),
sa.Column('images_rejected', sa.Integer(), default=0),
sa.Column('celery_task_id', sa.String(), nullable=True),
sa.Column('started_at', sa.DateTime(), nullable=True),
sa.Column('completed_at', sa.DateTime(), nullable=True),
sa.Column('error_message', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
)
op.create_index('ix_jobs_status', 'jobs', ['status'])
# Exports table
op.create_table(
'exports',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('name', sa.String(), nullable=False),
sa.Column('filter_criteria', sa.Text(), nullable=True),
sa.Column('train_split', sa.Float(), default=0.8),
sa.Column('status', sa.String(), default='pending'),
sa.Column('file_path', sa.String(), nullable=True),
sa.Column('file_size', sa.Integer(), nullable=True),
sa.Column('species_count', sa.Integer(), nullable=True),
sa.Column('image_count', sa.Integer(), nullable=True),
sa.Column('celery_task_id', sa.String(), nullable=True),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
sa.Column('completed_at', sa.DateTime(), nullable=True),
sa.Column('error_message', sa.Text(), nullable=True),
)
def downgrade() -> None:
op.drop_table('exports')
op.drop_table('jobs')
op.drop_table('images')
op.drop_table('api_keys')
op.drop_table('species')

View File

@@ -0,0 +1,53 @@
"""Add cached_stats table and license index
Revision ID: 002
Revises: 001
Create Date: 2025-01-25
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
revision: str = '002'
down_revision: Union[str, None] = '001'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Cached stats table for pre-calculated dashboard statistics
op.create_table(
'cached_stats',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('key', sa.String(50), nullable=False, unique=True),
sa.Column('value', sa.Text(), nullable=False),
sa.Column('updated_at', sa.DateTime(), server_default=sa.func.now()),
)
op.create_index('ix_cached_stats_key', 'cached_stats', ['key'])
# Add license index to images table (if not exists)
# Using batch mode for SQLite compatibility
try:
op.create_index('ix_images_license', 'images', ['license'])
except Exception:
pass # Index may already exist
# Add only_without_images column to jobs if it doesn't exist
try:
op.add_column('jobs', sa.Column('only_without_images', sa.Boolean(), default=False))
except Exception:
pass # Column may already exist
def downgrade() -> None:
try:
op.drop_index('ix_images_license', 'images')
except Exception:
pass
try:
op.drop_column('jobs', 'only_without_images')
except Exception:
pass
op.drop_table('cached_stats')

View File

@@ -0,0 +1,31 @@
"""Add max_images column to jobs table
Revision ID: 003
Revises: 002
Create Date: 2025-01-25
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
revision: str = '003'
down_revision: Union[str, None] = '002'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Add max_images column to jobs table
try:
op.add_column('jobs', sa.Column('max_images', sa.Integer(), nullable=True))
except Exception:
pass # Column may already exist
def downgrade() -> None:
try:
op.drop_column('jobs', 'max_images')
except Exception:
pass