workout generator audit: rules engine, structure rules, split patterns, injury UX, metadata cleanup
- Add rules_engine.py with quantitative rules for all 8 workout types - Add quality gate retry loop in generate_single_workout() - Expand calibrate_structure_rules to all 120 combinations (8 types × 5 goals × 3 sections) - Wire WeeklySplitPattern DB records into _pick_weekly_split() - Enforce movement patterns from WorkoutStructureRule in exercise selection - Add straight-set strength support (single main lift, 4-6 rounds) - Add modality consistency check for duration-dominant workout types - Add InjuryStep component to onboarding and preferences - Add sibling exercise exclusion in regenerate and preview_day endpoints - Display generator warnings on dashboard - Expand fix_rep_durations, fix_exercise_flags, fix_movement_pattern_typo - Add audit_exercise_data and check_rules_drift management commands - Add Next.js frontend with dashboard, onboarding, preferences, history pages - Add generator app with ML-powered workout generation pipeline - 96 new tests across 7 test modules Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
463
generator/management/commands/fix_rep_durations.py
Normal file
463
generator/management/commands/fix_rep_durations.py
Normal file
@@ -0,0 +1,463 @@
|
||||
"""
|
||||
Fixes estimated_rep_duration on all Exercise records using three sources:
|
||||
|
||||
1. **Exact match** from JSON workout files (AI/all_workouts_data/ and AI/cho/workouts/)
|
||||
Each set has `estimated_duration` (total seconds) and `reps`.
|
||||
We compute per_rep = estimated_duration / reps, averaged across all
|
||||
appearances of each exercise.
|
||||
|
||||
2. **Fuzzy match** from the same JSON data for exercises whose DB name
|
||||
doesn't match exactly. Uses name normalization (strip parentheticals,
|
||||
punctuation, plurals) + difflib with a 0.85 cutoff, rejecting matches
|
||||
where the equipment type differs (e.g. barbell vs dumbbell).
|
||||
|
||||
3. **Movement-pattern lookup** for exercises not found by either method.
|
||||
Uses the exercise's `movement_patterns` field against PATTERN_DURATIONS.
|
||||
|
||||
4. **Category-based defaults** for exercises that don't match any pattern.
|
||||
Falls back to DEFAULT_DURATION (3.0s).
|
||||
|
||||
Duration-only exercises (is_duration=True AND is_reps=False) are skipped
|
||||
since they use the `duration` field instead.
|
||||
|
||||
Usage:
|
||||
python manage.py fix_rep_durations
|
||||
python manage.py fix_rep_durations --dry-run
|
||||
"""
|
||||
|
||||
import difflib
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import statistics
|
||||
from collections import defaultdict
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.management.base import BaseCommand
|
||||
|
||||
from exercise.models import Exercise
|
||||
|
||||
|
||||
# Movement-pattern lookup table: maps movement pattern keywords to per-rep durations.
|
||||
PATTERN_DURATIONS = {
|
||||
'compound_push': 3.0,
|
||||
'compound_pull': 3.0,
|
||||
'squat': 3.0,
|
||||
'hinge': 3.0,
|
||||
'lunge': 3.0,
|
||||
'isolation_push': 2.5,
|
||||
'isolation_pull': 2.5,
|
||||
'isolation': 2.5,
|
||||
'olympic': 2.0,
|
||||
'explosive': 2.0,
|
||||
'plyometric': 2.0,
|
||||
'carry': 1.0,
|
||||
'core': 2.5,
|
||||
}
|
||||
|
||||
# Category defaults keyed by substring match on movement_patterns.
|
||||
# Order matters: first match wins. More specific patterns go first.
|
||||
CATEGORY_DEFAULTS = [
|
||||
# Explosive / ballistic -- fast reps
|
||||
('plyometric', 1.5),
|
||||
('combat', 1.0),
|
||||
('cardio/locomotion', 1.0),
|
||||
|
||||
# Compound lower -- heavy, slower
|
||||
('lower pull - hip hinge', 5.0),
|
||||
('lower push - squat', 4.5),
|
||||
('lower push - lunge', 4.0),
|
||||
('lower pull', 4.5),
|
||||
('lower push', 4.0),
|
||||
|
||||
# Compound upper
|
||||
('upper push - horizontal', 3.5),
|
||||
('upper push - vertical', 3.5),
|
||||
('upper pull - vertical', 4.0),
|
||||
('upper pull - horizonal', 3.5), # note: typo is in DB
|
||||
('upper pull - horizontal', 3.5), # also match corrected version
|
||||
('upper push', 3.5),
|
||||
('upper pull', 3.5),
|
||||
|
||||
# Isolation / machine
|
||||
('machine', 2.5),
|
||||
('arms', 2.5),
|
||||
|
||||
# Core
|
||||
('core - anti-extension', 3.5),
|
||||
('core - carry', 3.0),
|
||||
('core', 3.0),
|
||||
|
||||
# Mobility / yoga -- slow, controlled
|
||||
('yoga', 5.0),
|
||||
('mobility - static', 5.0),
|
||||
('mobility - dynamic', 4.0),
|
||||
('mobility', 4.0),
|
||||
|
||||
# Olympic lifts -- explosive, technical
|
||||
('olympic', 4.0),
|
||||
|
||||
# Isolation
|
||||
('isolation', 2.5),
|
||||
|
||||
# Carry / farmer walk
|
||||
('carry', 3.0),
|
||||
|
||||
# Agility
|
||||
('agility', 1.5),
|
||||
|
||||
# Stretch / activation
|
||||
('stretch', 5.0),
|
||||
('activation', 3.0),
|
||||
('warm up', 3.0),
|
||||
('warmup', 3.0),
|
||||
]
|
||||
|
||||
# Fallback if nothing matches
|
||||
DEFAULT_DURATION = 3.0
|
||||
|
||||
# For backwards compat, also expose as DEFAULT_PER_REP
|
||||
DEFAULT_PER_REP = DEFAULT_DURATION
|
||||
|
||||
# Equipment words -- if these differ between DB and JSON name, reject the match
|
||||
EQUIPMENT_WORDS = {
|
||||
'barbell', 'dumbbell', 'kettlebell', 'cable', 'band', 'machine',
|
||||
'smith', 'trx', 'ez-bar', 'ez bar', 'landmine', 'medicine ball',
|
||||
'resistance band', 'bodyweight',
|
||||
}
|
||||
|
||||
|
||||
def _normalize_name(name):
|
||||
"""Normalize an exercise name for fuzzy comparison."""
|
||||
n = name.lower().strip()
|
||||
# Remove parenthetical content: "Squat (Back)" -> "Squat"
|
||||
n = re.sub(r'\([^)]*\)', '', n)
|
||||
# Remove common suffixes/noise
|
||||
n = re.sub(r'\b(each side|per side|each leg|per leg|each arm|per arm)\b', '', n)
|
||||
# Remove direction words (forward/backward variants are same exercise)
|
||||
n = re.sub(r'\b(forward|backward|forwards|backwards)\b', '', n)
|
||||
# Normalize punctuation and whitespace
|
||||
n = re.sub(r'[^\w\s]', ' ', n)
|
||||
n = re.sub(r'\s+', ' ', n).strip()
|
||||
# De-pluralize each word (handles "lunges"->"lunge", "curls"->"curl")
|
||||
words = []
|
||||
for w in n.split():
|
||||
if w.endswith('s') and not w.endswith('ss') and len(w) > 2:
|
||||
w = w[:-1]
|
||||
words.append(w)
|
||||
return ' '.join(words)
|
||||
|
||||
|
||||
def _extract_equipment(name):
|
||||
"""Extract the equipment word from an exercise name, if any."""
|
||||
name_lower = name.lower()
|
||||
for eq in EQUIPMENT_WORDS:
|
||||
if eq in name_lower:
|
||||
return eq
|
||||
return None
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Fix estimated_rep_duration using JSON workout data + pattern/category defaults'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Show what would change without writing to DB',
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
dry_run = options['dry_run']
|
||||
|
||||
# -- Step 1: Parse JSON files for real per-rep timing --
|
||||
json_durations = self._parse_json_files()
|
||||
self.stdout.write(
|
||||
f'Parsed JSON: {len(json_durations)} exercises with real timing data'
|
||||
)
|
||||
|
||||
# -- Step 1b: Build fuzzy lookup from normalized JSON names --
|
||||
fuzzy_index = self._build_fuzzy_index(json_durations)
|
||||
|
||||
# -- Step 2: Update exercises --
|
||||
exercises = Exercise.objects.all()
|
||||
from_json_exact = 0
|
||||
from_json_fuzzy = 0
|
||||
from_pattern = 0
|
||||
from_category = 0
|
||||
skipped_duration_only = 0
|
||||
set_null = 0
|
||||
unchanged = 0
|
||||
fuzzy_matches = []
|
||||
|
||||
for ex in exercises:
|
||||
# Skip duration-only exercises (is_duration=True AND is_reps=False)
|
||||
if ex.is_duration and not ex.is_reps:
|
||||
if ex.estimated_rep_duration is not None:
|
||||
if not dry_run:
|
||||
ex.estimated_rep_duration = None
|
||||
ex.save(update_fields=['estimated_rep_duration'])
|
||||
set_null += 1
|
||||
else:
|
||||
skipped_duration_only += 1
|
||||
continue
|
||||
|
||||
# Duration-only exercises that aren't reps-based
|
||||
if not ex.is_reps and not ex.is_duration:
|
||||
# Edge case: neither reps nor duration -- skip
|
||||
unchanged += 1
|
||||
continue
|
||||
|
||||
# Try exact match first
|
||||
name_lower = ex.name.lower().strip()
|
||||
if name_lower in json_durations:
|
||||
new_val = json_durations[name_lower]
|
||||
source = 'json-exact'
|
||||
from_json_exact += 1
|
||||
else:
|
||||
# Try fuzzy match
|
||||
fuzzy_result = self._fuzzy_match(ex.name, json_durations, fuzzy_index)
|
||||
if fuzzy_result is not None:
|
||||
new_val, matched_name = fuzzy_result
|
||||
source = 'json-fuzzy'
|
||||
from_json_fuzzy += 1
|
||||
fuzzy_matches.append((ex.name, matched_name, new_val))
|
||||
else:
|
||||
# Try movement-pattern lookup
|
||||
pattern_val = self._get_pattern_duration(ex)
|
||||
if pattern_val is not None:
|
||||
new_val = pattern_val
|
||||
source = 'pattern'
|
||||
from_pattern += 1
|
||||
else:
|
||||
# Fall back to category defaults
|
||||
new_val = self._get_category_default(ex)
|
||||
source = 'category'
|
||||
from_category += 1
|
||||
|
||||
old_val = ex.estimated_rep_duration
|
||||
|
||||
if dry_run:
|
||||
if old_val != new_val:
|
||||
self.stdout.write(
|
||||
f' [{source}] {ex.name}: {old_val:.2f}s -> {new_val:.2f}s'
|
||||
if old_val else
|
||||
f' [{source}] {ex.name}: None -> {new_val:.2f}s'
|
||||
)
|
||||
else:
|
||||
ex.estimated_rep_duration = new_val
|
||||
ex.save(update_fields=['estimated_rep_duration'])
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(
|
||||
f'\n{"[DRY RUN] " if dry_run else ""}'
|
||||
f'Updated {from_json_exact + from_json_fuzzy + from_pattern + from_category + set_null} exercises: '
|
||||
f'{from_json_exact} from JSON (exact), {from_json_fuzzy} from JSON (fuzzy), '
|
||||
f'{from_pattern} from pattern lookup, {from_category} from category defaults, '
|
||||
f'{set_null} set to null (duration-only), '
|
||||
f'{skipped_duration_only} already null (duration-only), '
|
||||
f'{unchanged} unchanged'
|
||||
))
|
||||
|
||||
# Show fuzzy matches for review
|
||||
if fuzzy_matches:
|
||||
self.stdout.write(f'\nFuzzy matches ({len(fuzzy_matches)}):')
|
||||
for db_name, json_name, val in sorted(fuzzy_matches):
|
||||
self.stdout.write(f' {db_name:50s} -> {json_name} ({val:.2f}s)')
|
||||
|
||||
# -- Step 3: Show summary stats --
|
||||
reps_exercises = Exercise.objects.filter(is_reps=True)
|
||||
total_reps = reps_exercises.count()
|
||||
with_duration = reps_exercises.exclude(estimated_rep_duration__isnull=True).count()
|
||||
without_duration = reps_exercises.filter(estimated_rep_duration__isnull=True).count()
|
||||
|
||||
coverage_pct = (with_duration / total_reps * 100) if total_reps > 0 else 0
|
||||
self.stdout.write(
|
||||
f'\nCoverage: {with_duration}/{total_reps} rep-based exercises '
|
||||
f'have estimated_rep_duration ({coverage_pct:.1f}%)'
|
||||
)
|
||||
if without_duration > 0:
|
||||
self.stdout.write(
|
||||
f' {without_duration} exercises still missing estimated_rep_duration'
|
||||
)
|
||||
|
||||
if not dry_run:
|
||||
durations = list(
|
||||
reps_exercises
|
||||
.exclude(estimated_rep_duration__isnull=True)
|
||||
.values_list('estimated_rep_duration', flat=True)
|
||||
)
|
||||
if durations:
|
||||
self.stdout.write(
|
||||
f'\nNew stats for rep-based exercises ({len(durations)}):'
|
||||
f'\n Min: {min(durations):.2f}s'
|
||||
f'\n Max: {max(durations):.2f}s'
|
||||
f'\n Mean: {statistics.mean(durations):.2f}s'
|
||||
f'\n Median: {statistics.median(durations):.2f}s'
|
||||
)
|
||||
|
||||
def _build_fuzzy_index(self, json_durations):
|
||||
"""
|
||||
Build a dict of {normalized_name: original_name} for fuzzy matching.
|
||||
"""
|
||||
index = {}
|
||||
for original_name in json_durations:
|
||||
norm = _normalize_name(original_name)
|
||||
# Keep the first occurrence if duplicates after normalization
|
||||
if norm not in index:
|
||||
index[norm] = original_name
|
||||
return index
|
||||
|
||||
def _fuzzy_match(self, db_name, json_durations, fuzzy_index):
|
||||
"""
|
||||
Try to fuzzy-match a DB exercise name to a JSON exercise name.
|
||||
|
||||
Strategy:
|
||||
1. Exact match on normalized names
|
||||
2. Containment match: all words of the shorter name appear in the longer
|
||||
3. High-cutoff difflib (0.88) with word overlap >= 75%
|
||||
|
||||
Equipment must match in all cases.
|
||||
|
||||
Returns (duration_value, matched_json_name) or None.
|
||||
"""
|
||||
db_norm = _normalize_name(db_name)
|
||||
db_equipment = _extract_equipment(db_name)
|
||||
db_words = set(db_norm.split())
|
||||
|
||||
# First try: exact match on normalized names
|
||||
if db_norm in fuzzy_index:
|
||||
original = fuzzy_index[db_norm]
|
||||
json_equipment = _extract_equipment(original)
|
||||
if db_equipment and json_equipment and db_equipment != json_equipment:
|
||||
return None
|
||||
return json_durations[original], original
|
||||
|
||||
# Second try: containment match -- shorter name's words are a
|
||||
# subset of the longer name's words (e.g. "barbell good morning"
|
||||
# is contained in "barbell russian good morning")
|
||||
for json_norm, original in fuzzy_index.items():
|
||||
json_words = set(json_norm.split())
|
||||
shorter, longer = (
|
||||
(db_words, json_words) if len(db_words) <= len(json_words)
|
||||
else (json_words, db_words)
|
||||
)
|
||||
# All words of the shorter must appear in the longer
|
||||
if shorter.issubset(longer) and len(shorter) >= 2:
|
||||
# But names shouldn't differ by too many words (max 2 extra)
|
||||
if len(longer) - len(shorter) > 2:
|
||||
continue
|
||||
json_equipment = _extract_equipment(original)
|
||||
if db_equipment and json_equipment and db_equipment != json_equipment:
|
||||
continue
|
||||
if (db_equipment is None) != (json_equipment is None):
|
||||
continue
|
||||
return json_durations[original], original
|
||||
|
||||
# Third try: high-cutoff difflib with strict word overlap
|
||||
normalized_json_names = list(fuzzy_index.keys())
|
||||
matches = difflib.get_close_matches(
|
||||
db_norm, normalized_json_names, n=3, cutoff=0.88,
|
||||
)
|
||||
|
||||
for match_norm in matches:
|
||||
original = fuzzy_index[match_norm]
|
||||
json_equipment = _extract_equipment(original)
|
||||
if db_equipment and json_equipment and db_equipment != json_equipment:
|
||||
continue
|
||||
if (db_equipment is None) != (json_equipment is None):
|
||||
continue
|
||||
# Require >= 75% word overlap
|
||||
match_words = set(match_norm.split())
|
||||
overlap = len(db_words & match_words)
|
||||
total = max(len(db_words), len(match_words))
|
||||
if total > 0 and overlap / total < 0.75:
|
||||
continue
|
||||
return json_durations[original], original
|
||||
|
||||
return None
|
||||
|
||||
def _parse_json_files(self):
|
||||
"""
|
||||
Parse all workout JSON files and compute average per-rep duration
|
||||
for each exercise. Returns {lowercase_name: avg_seconds_per_rep}.
|
||||
"""
|
||||
base = settings.BASE_DIR
|
||||
patterns = [
|
||||
os.path.join(base, 'AI', 'all_workouts_data', '*.json'),
|
||||
os.path.join(base, 'AI', 'cho', 'workouts', '*.json'),
|
||||
]
|
||||
files = []
|
||||
for pat in patterns:
|
||||
files.extend(sorted(glob.glob(pat)))
|
||||
|
||||
exercise_samples = defaultdict(list)
|
||||
|
||||
for fpath in files:
|
||||
with open(fpath) as f:
|
||||
try:
|
||||
data = json.load(f)
|
||||
except (json.JSONDecodeError, UnicodeDecodeError):
|
||||
continue
|
||||
|
||||
workouts = [data] if isinstance(data, dict) else data
|
||||
|
||||
for workout in workouts:
|
||||
if not isinstance(workout, dict):
|
||||
continue
|
||||
for section in workout.get('sections', []):
|
||||
for s in section.get('sets', []):
|
||||
if not isinstance(s, dict):
|
||||
continue
|
||||
ex = s.get('exercise', {})
|
||||
if not isinstance(ex, dict):
|
||||
continue
|
||||
name = ex.get('name', '').strip()
|
||||
if not name:
|
||||
continue
|
||||
|
||||
reps = s.get('reps', 0) or 0
|
||||
est_dur = s.get('estimated_duration', 0) or 0
|
||||
set_type = s.get('type', '')
|
||||
|
||||
if set_type == 'reps' and reps > 0 and est_dur > 0:
|
||||
per_rep = est_dur / reps
|
||||
# Sanity: ignore outliers (< 0.5s or > 20s per rep)
|
||||
if 0.5 <= per_rep <= 20.0:
|
||||
exercise_samples[name.lower()].append(per_rep)
|
||||
|
||||
# Average across all samples per exercise
|
||||
result = {}
|
||||
for name, samples in exercise_samples.items():
|
||||
result[name] = round(statistics.mean(samples), 2)
|
||||
|
||||
return result
|
||||
|
||||
def _get_pattern_duration(self, exercise):
|
||||
"""
|
||||
Return a per-rep duration based on the PATTERN_DURATIONS lookup table.
|
||||
Checks the exercise's movement_patterns field for matching patterns.
|
||||
Returns the first match, or None if no match.
|
||||
"""
|
||||
patterns_str = (exercise.movement_patterns or '').lower()
|
||||
if not patterns_str:
|
||||
return None
|
||||
|
||||
for pattern_key, duration in PATTERN_DURATIONS.items():
|
||||
if pattern_key in patterns_str:
|
||||
return duration
|
||||
|
||||
return None
|
||||
|
||||
def _get_category_default(self, exercise):
|
||||
"""
|
||||
Return a per-rep duration based on the exercise's movement_patterns
|
||||
using the more detailed CATEGORY_DEFAULTS table.
|
||||
"""
|
||||
patterns = (exercise.movement_patterns or '').lower()
|
||||
|
||||
for keyword, duration in CATEGORY_DEFAULTS:
|
||||
if keyword in patterns:
|
||||
return duration
|
||||
|
||||
return DEFAULT_DURATION
|
||||
Reference in New Issue
Block a user