Files
WerkoutAPI/generator/services/workout_analyzer.py
Trey t c80c66c2e5 Codebase hardening: 102 fixes across 35+ files
Deep audit identified 106 findings; 102 fixed, 4 deferred. Covers 8 areas:

- Settings & deploy: env-gated DEBUG/SECRET_KEY, HTTPS headers, gunicorn, celery worker
- Auth (registered_user): password write_only, request.data fixes, transaction safety, proper HTTP status codes
- Workout app: IDOR protection, get_object_or_404, prefetch_related N+1 fixes, transaction.atomic
- Video/scripts: path traversal sanitization, HLS trigger guard, auth on cache wipe
- Models (exercise/equipment/muscle/superset): null-safe __str__, stable IDs, prefetch support
- Generator views: helper for registered_user lookup, logger.exception, bulk_update, transaction wrapping
- Generator core (rules/selector/generator): push-pull ratio, type affinity normalization, modality checks, side-pair exact match, word-boundary regex, equipment cache clearing
- Generator services (plan_builder/analyzer/normalizer): transaction.atomic, muscle cache, bulk_update, glutes classification fix

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 22:29:14 -06:00

1372 lines
56 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Core ML analysis engine for the workout generator.
Analyzes all workouts stored in the Django DB and populates the ML pattern
models:
- WorkoutType
- MuscleGroupSplit
- WeeklySplitPattern
- WorkoutStructureRule
- MovementPatternOrder
Usage (via management command):
python manage.py analyze_workouts
Usage (programmatic):
from generator.services.workout_analyzer import WorkoutAnalyzer
analyzer = WorkoutAnalyzer()
analyzer.analyze()
"""
from __future__ import annotations
import logging
from collections import Counter, defaultdict
from datetime import timedelta
from typing import Dict, List, Optional, Set, Tuple
import numpy as np
from django.db import transaction
from django.db.models import Count, Prefetch, Q
from exercise.models import Exercise
from generator.models import (
MuscleGroupSplit,
MovementPatternOrder,
WeeklySplitPattern,
WorkoutStructureRule,
WorkoutType,
)
from generator.services.muscle_normalizer import (
classify_split_type,
get_broad_split_category,
get_movement_patterns_for_exercise,
get_muscles_for_exercise,
normalize_muscle_name,
)
from muscle.models import ExerciseMuscle
from superset.models import Superset, SupersetExercise
from workout.models import Workout
logger = logging.getLogger(__name__)
# ============================================================
# Default workout type definitions
# ============================================================
DEFAULT_WORKOUT_TYPES: list[dict] = [
{
'name': 'functional_strength_training',
'display_name': 'Functional Strength',
'description': (
'Compound and functional movement-based strength work. '
'Moderate rep ranges with an emphasis on movement quality.'
),
'typical_rest_between_sets': 90, # compounds need 2-3 min, 90s blended avg
'typical_intensity': 'high', # RPE 7-9 targets
'rep_range_min': 6, # 6-12 rep range for functional strength
'rep_range_max': 12,
'round_range_min': 3,
'round_range_max': 5, # research says 3-5 sets
'duration_bias': 0.15, # mostly rep-based, ~15% duration
'superset_size_min': 2,
'superset_size_max': 4,
},
{
'name': 'traditional_strength_training',
'display_name': 'Traditional Strength',
'description': (
'Classic strength training with heavier loads and lower rep ranges. '
'Longer rest periods between sets.'
),
'typical_rest_between_sets': 150, # main lifts need 3-5 min, 150s blended avg
'typical_intensity': 'high',
'rep_range_min': 3, # heavy singles/triples are fundamental
'rep_range_max': 8,
'round_range_min': 4, # research says 4-6 sets for main lifts
'round_range_max': 6,
'duration_bias': 0.0, # zero duration exercises in working sets
'superset_size_min': 1,
'superset_size_max': 3,
},
{
'name': 'high_intensity_interval_training',
'display_name': 'HIIT',
'description': (
'Short, intense intervals alternating with brief rest. '
'Duration-biased with higher rep counts.'
),
'typical_rest_between_sets': 30,
'typical_intensity': 'high',
'rep_range_min': 10,
'rep_range_max': 20,
'round_range_min': 3,
'round_range_max': 5,
'duration_bias': 0.7,
'superset_size_min': 3,
'superset_size_max': 6,
},
{
'name': 'cross_training',
'display_name': 'Cross Training',
'description': (
'Mixed modality training combining strength, cardio, and '
'functional movements in varied formats.'
),
'typical_rest_between_sets': 60, # need more rest for strength blocks
'typical_intensity': 'high',
'rep_range_min': 6, # strength portions use 6-8 rep range
'rep_range_max': 15,
'round_range_min': 3,
'round_range_max': 5,
'duration_bias': 0.5,
'superset_size_min': 3,
'superset_size_max': 5,
},
{
'name': 'core_training',
'display_name': 'Core Training',
'description': (
'Focused core and abdominal work with anti-rotation, '
'anti-extension, and rotational patterns.'
),
'typical_rest_between_sets': 45, # loaded carries/ab wheel need 60-90s
'typical_intensity': 'medium',
'rep_range_min': 10,
'rep_range_max': 20,
'round_range_min': 2,
'round_range_max': 4,
'duration_bias': 0.6,
'superset_size_min': 3,
'superset_size_max': 5,
},
{
'name': 'flexibility',
'display_name': 'Flexibility',
'description': (
'Mobility, stretching, and yoga-style sessions focused on '
'range of motion and recovery.'
),
'typical_rest_between_sets': 15,
'typical_intensity': 'low',
'rep_range_min': 1,
'rep_range_max': 5,
'round_range_min': 1,
'round_range_max': 2,
'duration_bias': 1.0, # 100% duration-based (holds)
'superset_size_min': 3,
'superset_size_max': 6,
},
{
'name': 'cardio',
'display_name': 'Cardio',
'description': (
'Running, walking, rowing, or other locomotion-based '
'cardiovascular training.'
),
'typical_rest_between_sets': 30,
'typical_intensity': 'medium',
'rep_range_min': 1,
'rep_range_max': 1,
'round_range_min': 1,
'round_range_max': 3,
'duration_bias': 1.0,
'superset_size_min': 1,
'superset_size_max': 3,
},
{
'name': 'hypertrophy',
'display_name': 'Hypertrophy',
'description': (
'Moderate-to-high volume work targeting muscle growth. '
'Controlled tempos and moderate rest.'
),
'typical_rest_between_sets': 90, # compounds need 2-3 min
'typical_intensity': 'high', # RPE 7-9 targets
'rep_range_min': 6, # heavy compounds at 6-8 for mechanical tension
'rep_range_max': 15,
'round_range_min': 3,
'round_range_max': 4,
'duration_bias': 0.1, # almost entirely rep-based
'superset_size_min': 2,
'superset_size_max': 4,
},
]
class WorkoutAnalyzer:
"""
Analyzes every Workout in the DB (through Superset -> SupersetExercise ->
Exercise -> ExerciseMuscle) and populates the generator ML-pattern models.
Steps performed by ``analyze()``:
1. Populate WorkoutType defaults
2. Extract muscle-group pairings -> MuscleGroupSplit
3. Extract weekly split patterns -> WeeklySplitPattern
4. Extract workout structure rules -> WorkoutStructureRule
5. Extract movement-pattern ordering -> MovementPatternOrder
"""
def __init__(self):
# Caches populated during analysis
self._workout_muscles: Dict[int, Set[str]] = {}
self._workout_split_types: Dict[int, str] = {}
self._workout_data: Dict[int, dict] = {}
self._workout_type_map: Dict[str, WorkoutType] = {}
# ------------------------------------------------------------------
# Public entry point
# ------------------------------------------------------------------
def analyze(self) -> None:
"""Run the full analysis pipeline."""
print('\n' + '=' * 64)
print(' Workout Analyzer - ML Pattern Extraction')
print('=' * 64)
with transaction.atomic():
self._clear_existing_patterns()
self._step1_populate_workout_types()
self._step2_extract_workout_data()
self._step3_extract_muscle_group_splits()
self._step4_extract_weekly_split_patterns()
self._step5_extract_workout_structure_rules()
self._step6_extract_movement_pattern_ordering()
self._step7_ensure_full_rule_coverage()
print('\n' + '=' * 64)
print(' Analysis complete.')
print('=' * 64 + '\n')
# ------------------------------------------------------------------
# Housekeeping
# ------------------------------------------------------------------
def _clear_existing_patterns(self) -> None:
"""Delete all existing ML pattern records for a clean re-run."""
counts = {}
counts['WorkoutType'] = WorkoutType.objects.count()
counts['MuscleGroupSplit'] = MuscleGroupSplit.objects.all().delete()[0]
counts['WeeklySplitPattern'] = WeeklySplitPattern.objects.all().delete()[0]
counts['WorkoutStructureRule'] = WorkoutStructureRule.objects.all().delete()[0]
counts['MovementPatternOrder'] = MovementPatternOrder.objects.all().delete()[0]
print('\n[Cleanup] Cleared existing pattern records:')
for model_name, count in counts.items():
if model_name == 'WorkoutType':
print(f' {model_name}: {count} existing (will upsert)')
else:
print(f' {model_name}: {count} deleted')
# ------------------------------------------------------------------
# Step 1: Populate WorkoutType defaults
# ------------------------------------------------------------------
def _step1_populate_workout_types(self) -> None:
print('\n[Step 1] Populating WorkoutType defaults ...')
created_count = 0
updated_count = 0
for wt_data in DEFAULT_WORKOUT_TYPES:
name = wt_data['name']
defaults = {k: v for k, v in wt_data.items() if k != 'name'}
obj, created = WorkoutType.objects.get_or_create(
name=name,
defaults=defaults,
)
self._workout_type_map[name] = obj
if created:
created_count += 1
else:
updated_count += 1
print(f' Created: {created_count}, Preserved: {updated_count}')
print(f' Total WorkoutTypes: {WorkoutType.objects.count()}')
# ------------------------------------------------------------------
# Step 2: Extract per-workout data (muscles, patterns, stats)
# ------------------------------------------------------------------
def _step2_extract_workout_data(self) -> None:
print('\n[Step 2] Extracting per-workout data ...')
workouts = Workout.objects.prefetch_related(
Prefetch(
'superset_workout',
queryset=Superset.objects.order_by('order').prefetch_related(
Prefetch(
'superset_exercises',
queryset=SupersetExercise.objects.order_by('order').select_related('exercise'),
)
),
)
).all()
total = workouts.count()
skipped = 0
for workout in workouts:
supersets = list(workout.superset_workout.all())
if not supersets:
skipped += 1
continue
# Collect muscles, patterns, and per-superset stats
all_muscles: Set[str] = set()
all_patterns: List[str] = []
superset_details: List[dict] = []
muscle_exercise_counter: Counter = Counter()
for ss in supersets:
ss_exercises = list(ss.superset_exercises.all())
ss_muscles: Set[str] = set()
ss_patterns: List[str] = []
reps_list: List[int] = []
duration_list: List[int] = []
for se in ss_exercises:
ex = se.exercise
ex_muscles = get_muscles_for_exercise(ex)
ss_muscles.update(ex_muscles)
all_muscles.update(ex_muscles)
# Count each muscle once per exercise (for focus extraction)
for m in ex_muscles:
muscle_exercise_counter[m] += 1
ex_patterns = get_movement_patterns_for_exercise(ex)
ss_patterns.extend(ex_patterns)
all_patterns.extend(ex_patterns)
if se.reps and se.reps > 0:
reps_list.append(se.reps)
if se.duration and se.duration > 0:
duration_list.append(se.duration)
superset_details.append({
'order': ss.order,
'rounds': ss.rounds,
'exercise_count': len(ss_exercises),
'muscles': ss_muscles,
'patterns': ss_patterns,
'reps_list': reps_list,
'duration_list': duration_list,
'name': ss.name or '',
})
# Extract focus muscles: top muscles by exercise count
# covering >= 60% of total exercise-muscle references, capped at 5
focus_muscles = self._extract_focus_muscles(muscle_exercise_counter)
focus_split_type = classify_split_type(focus_muscles)
split_type = classify_split_type(all_muscles)
self._workout_muscles[workout.id] = all_muscles
self._workout_split_types[workout.id] = split_type
self._workout_data[workout.id] = {
'workout': workout,
'muscles': all_muscles,
'focus_muscles': focus_muscles,
'patterns': all_patterns,
'split_type': split_type,
'focus_split_type': focus_split_type,
'superset_details': superset_details,
'superset_count': len(superset_details),
'created_at': workout.created_at,
'user_id': workout.registered_user_id,
}
analyzed = len(self._workout_data)
print(f' Total workouts in DB: {total}')
print(f' Analyzed (with supersets): {analyzed}')
print(f' Skipped (no supersets): {skipped}')
# Print split type distribution (all muscles)
split_counter = Counter(self._workout_split_types.values())
print(' Split type distribution (all muscles):')
for st, cnt in split_counter.most_common():
print(f' {st}: {cnt}')
# Print focus split type distribution
focus_split_counter = Counter(
wdata['focus_split_type'] for wdata in self._workout_data.values()
)
print(' Split type distribution (focus muscles):')
for st, cnt in focus_split_counter.most_common():
print(f' {st}: {cnt}')
@staticmethod
def _extract_focus_muscles(muscle_counter: Counter) -> Set[str]:
"""
Extract the focus muscles from a per-workout muscle-exercise counter.
Selects the top muscles (by exercise count) that collectively cover
>= 60% of total exercise-muscle references, capped at 5 muscles.
This identifies what a workout *focused on* rather than every muscle
it happened to touch.
"""
if not muscle_counter:
return set()
total_refs = sum(muscle_counter.values())
if total_refs == 0:
return set()
focus: Set[str] = set()
cumulative = 0
threshold = total_refs * 0.60
for muscle, count in muscle_counter.most_common():
if len(focus) >= 5:
break
focus.add(muscle)
cumulative += count
if cumulative >= threshold:
break
return focus
# ------------------------------------------------------------------
# Step 3: Extract muscle group splits -> MuscleGroupSplit
# ------------------------------------------------------------------
def _step3_extract_muscle_group_splits(self) -> None:
print('\n[Step 3] Extracting muscle group splits ...')
if not self._workout_data:
print(' No workout data to analyze.')
self._create_default_muscle_group_splits()
return
# Group workouts by their focus muscle set (frozen for hashing)
muscle_set_counter: Counter = Counter()
muscle_set_to_details: dict[frozenset, list] = defaultdict(list)
for wid, wdata in self._workout_data.items():
fset = frozenset(wdata['focus_muscles'])
if fset:
muscle_set_counter[fset] += 1
muscle_set_to_details[fset].append(wdata)
# Cluster similar focus muscle sets to avoid near-duplicate splits.
# Strategy: group by focus_split_type, then merge sets
# that share >= 60% of muscles within the same split type.
split_type_groups: dict[str, list[tuple[frozenset, int]]] = defaultdict(list)
for fset, freq in muscle_set_counter.items():
st = classify_split_type(fset)
split_type_groups[st].append((fset, freq))
created = 0
for split_type, items in split_type_groups.items():
# Sort by frequency descending so the most common sets are the cluster seeds
items.sort(key=lambda x: x[1], reverse=True)
clusters: list[dict] = []
for fset, freq in items:
merged = False
for cluster in clusters:
# Compare against the seed muscles (not the growing union)
# to prevent clusters from bloating and rejecting new members
overlap = len(fset & cluster['seed_muscles'])
smaller = min(len(fset), len(cluster['seed_muscles']))
if smaller > 0 and overlap / smaller >= 0.6:
cluster['all_muscles'] = cluster['all_muscles'] | fset
cluster['frequency'] += freq
cluster['exercise_counts'].extend(
[d['superset_count'] for d in muscle_set_to_details[fset]]
)
merged = True
break
if not merged:
clusters.append({
'seed_muscles': set(fset),
'all_muscles': set(fset),
'frequency': freq,
'split_type': split_type,
'exercise_counts': [
d['superset_count'] for d in muscle_set_to_details[fset]
],
})
for cluster in clusters:
if cluster['frequency'] < 2:
continue
# Use the seed muscles for the split record (focused, 3-5 muscles)
muscle_list = sorted(cluster['seed_muscles'])
ex_counts = cluster['exercise_counts']
typical_ex_count = int(np.median(ex_counts)) if ex_counts else 6
label = self._generate_split_label(split_type, muscle_list)
MuscleGroupSplit.objects.create(
muscle_names=muscle_list,
frequency=cluster['frequency'],
label=label,
typical_exercise_count=typical_ex_count,
split_type=split_type,
)
created += 1
# Supplement missing split types to reduce data bias
if created > 0:
existing_split_types = set(
MuscleGroupSplit.objects.values_list('split_type', flat=True)
)
all_split_types = {'push', 'pull', 'legs', 'upper', 'lower', 'full_body', 'core'}
missing = all_split_types - existing_split_types
if missing:
print(f' Supplementing {len(missing)} missing split types: {missing}')
self._supplement_missing_splits(missing)
created += len(missing)
# If we got nothing from the data, seed defaults
if created == 0:
self._create_default_muscle_group_splits()
else:
print(f' Created {created} MuscleGroupSplit records.')
def _generate_split_label(self, split_type: str, muscles: List[str]) -> str:
"""Generate a human-readable label for a muscle group split."""
label_map = {
'push': 'Push',
'pull': 'Pull',
'legs': 'Legs',
'upper': 'Upper Body',
'lower': 'Lower Body',
'full_body': 'Full Body',
'core': 'Core',
'cardio': 'Cardio',
}
base = label_map.get(split_type, split_type.replace('_', ' ').title())
# Add the top 3 muscles as context
top_muscles = muscles[:3]
if top_muscles:
detail = ', '.join(m.title() for m in top_muscles)
return f'{base} ({detail})'
return base
def _supplement_missing_splits(self, missing_types: set) -> None:
"""Add default MuscleGroupSplit records for split types not seen in data."""
# Use minimum observed frequency so supplemented splits have a chance
# in weighted random selection (frequency=0 would never be picked)
min_freq = MuscleGroupSplit.objects.filter(
frequency__gt=0
).order_by('frequency').values_list('frequency', flat=True).first() or 2
baseline_freq = max(2, min_freq)
default_muscles = {
'push': (['chest', 'deltoids', 'front deltoids', 'triceps'], 'Push (Chest, Shoulders, Triceps)'),
'pull': (['upper back', 'lats', 'biceps', 'rear deltoids'], 'Pull (Back, Biceps)'),
'legs': (['quads', 'hamstrings', 'glutes', 'calves'], 'Legs (Quads, Hamstrings, Glutes)'),
'upper': (['chest', 'deltoids', 'upper back', 'lats', 'triceps', 'biceps'], 'Upper Body'),
'lower': (['quads', 'hamstrings', 'glutes', 'calves', 'lower back'], 'Lower Body'),
'full_body': (['chest', 'deltoids', 'upper back', 'lats', 'quads', 'hamstrings', 'glutes', 'core'], 'Full Body'),
'core': (['abs', 'obliques', 'core', 'hip flexors'], 'Core'),
}
for split_type in missing_types:
if split_type in default_muscles:
muscles, label = default_muscles[split_type]
MuscleGroupSplit.objects.create(
muscle_names=muscles,
frequency=baseline_freq,
label=label,
typical_exercise_count=6,
split_type=split_type,
)
def _create_default_muscle_group_splits(self) -> None:
"""Seed sensible default MuscleGroupSplit records."""
print(' Creating default muscle group splits ...')
# Use baseline frequency of 2 so defaults participate in weighted selection
baseline_freq = 2
defaults = [
{
'muscle_names': ['chest', 'deltoids', 'front deltoids', 'triceps'],
'label': 'Push (Chest, Shoulders, Triceps)',
'split_type': 'push',
'typical_exercise_count': 6,
'frequency': baseline_freq,
},
{
'muscle_names': ['upper back', 'lats', 'biceps', 'rear deltoids', 'middle back'],
'label': 'Pull (Back, Biceps)',
'split_type': 'pull',
'typical_exercise_count': 6,
'frequency': baseline_freq,
},
{
'muscle_names': ['quads', 'hamstrings', 'glutes', 'calves'],
'label': 'Legs (Quads, Hamstrings, Glutes)',
'split_type': 'legs',
'typical_exercise_count': 6,
'frequency': baseline_freq,
},
{
'muscle_names': [
'chest', 'deltoids', 'front deltoids', 'triceps',
'upper back', 'lats', 'biceps', 'rear deltoids',
],
'label': 'Upper Body',
'split_type': 'upper',
'typical_exercise_count': 8,
'frequency': baseline_freq,
},
{
'muscle_names': [
'quads', 'hamstrings', 'glutes', 'calves',
'lower back', 'hip flexors',
],
'label': 'Lower Body',
'split_type': 'lower',
'typical_exercise_count': 7,
'frequency': baseline_freq,
},
{
'muscle_names': ['abs', 'obliques', 'core', 'hip flexors'],
'label': 'Core',
'split_type': 'core',
'typical_exercise_count': 6,
'frequency': baseline_freq,
},
{
'muscle_names': [
'chest', 'deltoids', 'upper back', 'lats', 'quads',
'hamstrings', 'glutes', 'abs', 'core',
],
'label': 'Full Body',
'split_type': 'full_body',
'typical_exercise_count': 8,
'frequency': baseline_freq,
},
]
for d in defaults:
MuscleGroupSplit.objects.create(**d)
print(f' Created {len(defaults)} default MuscleGroupSplit records.')
# ------------------------------------------------------------------
# Step 4: Extract weekly split patterns -> WeeklySplitPattern
# ------------------------------------------------------------------
def _step4_extract_weekly_split_patterns(self) -> None:
print('\n[Step 4] Extracting weekly split patterns ...')
if not self._workout_data:
print(' No workout data to analyze.')
self._create_default_weekly_patterns()
return
# Group workouts by user and ISO week
user_week_workouts: dict[tuple, list] = defaultdict(list)
for wid, wdata in self._workout_data.items():
created_at = wdata['created_at']
if created_at is None:
continue
user_id = wdata['user_id']
iso_year, iso_week, _ = created_at.isocalendar()
key = (user_id, iso_year, iso_week)
user_week_workouts[key].append(wdata)
# Sort each week's workouts by creation time and extract the split sequence
pattern_counter: Counter = Counter()
days_per_week_patterns: dict[int, Counter] = defaultdict(Counter)
for key, week_workouts in user_week_workouts.items():
week_workouts.sort(key=lambda w: w['created_at'])
split_sequence = tuple(
get_broad_split_category(w['focus_split_type']) for w in week_workouts
)
days_count = len(split_sequence)
if days_count < 1 or days_count > 7:
continue
pattern_counter[split_sequence] += 1
days_per_week_patterns[days_count][split_sequence] += 1
if not pattern_counter:
print(' Not enough weekly data to extract patterns.')
self._create_default_weekly_patterns()
return
# Look up or create MuscleGroupSplits for mapping
split_type_to_mgs: dict[str, int] = {}
for mgs in MuscleGroupSplit.objects.all():
if mgs.split_type not in split_type_to_mgs:
split_type_to_mgs[mgs.split_type] = mgs.id
created = 0
for days_count, counter in sorted(days_per_week_patterns.items()):
for pattern_tuple, freq in counter.most_common(10):
# Map each element of the pattern to a MuscleGroupSplit ID
pattern_ids = []
pattern_labels = []
for split_label in pattern_tuple:
# Map broad category back to a split_type for MuscleGroupSplit lookup
mgs_id = split_type_to_mgs.get(split_label)
if mgs_id is None:
# Try mapping broad category to actual split types
for st in ['push', 'pull', 'legs', 'upper', 'lower', 'full_body', 'core', 'cardio']:
if get_broad_split_category(st) == split_label and st in split_type_to_mgs:
mgs_id = split_type_to_mgs[st]
break
pattern_ids.append(mgs_id)
pattern_labels.append(split_label)
# Determine rest day positions: for a 7-day week, rest days
# are the positions not covered by workouts.
rest_positions = []
if days_count < 7:
# Simple heuristic: space rest days evenly
total_rest = 7 - days_count
if total_rest > 0:
spacing = 7.0 / total_rest
rest_positions = [
int(round(i * spacing)) for i in range(total_rest)
]
# Clamp to valid day indices
rest_positions = [
min(max(p, 0), 6) for p in rest_positions
]
WeeklySplitPattern.objects.create(
days_per_week=days_count,
pattern=pattern_ids,
pattern_labels=pattern_labels,
frequency=freq,
rest_day_positions=rest_positions,
)
created += 1
if created == 0:
self._create_default_weekly_patterns()
else:
print(f' Created {created} WeeklySplitPattern records.')
# Print some stats
for dpw, counter in sorted(days_per_week_patterns.items()):
top = counter.most_common(1)[0] if counter else None
if top:
print(f' {dpw}-day weeks: {sum(counter.values())} occurrences, '
f'top pattern: {list(top[0])} (x{top[1]})')
def _create_default_weekly_patterns(self) -> None:
"""Seed sensible default WeeklySplitPattern records."""
print(' Creating default weekly split patterns ...')
split_type_to_mgs: dict[str, int] = {}
for mgs in MuscleGroupSplit.objects.all():
if mgs.split_type not in split_type_to_mgs:
split_type_to_mgs[mgs.split_type] = mgs.id
defaults = [
# 3-day full body
{
'days_per_week': 3,
'pattern_labels': ['full_body', 'full_body', 'full_body'],
'rest_day_positions': [1, 3, 5, 6],
},
# 4-day upper/lower
{
'days_per_week': 4,
'pattern_labels': ['upper', 'lower', 'upper', 'lower'],
'rest_day_positions': [2, 4, 6],
},
# 4-day push/pull/legs + full body
{
'days_per_week': 4,
'pattern_labels': ['push', 'pull', 'lower', 'full_body'],
'rest_day_positions': [2, 4, 6],
},
# 5-day push/pull/legs/upper/lower
{
'days_per_week': 5,
'pattern_labels': ['push', 'pull', 'lower', 'upper', 'lower'],
'rest_day_positions': [2, 6],
},
# 6-day PPL x2
{
'days_per_week': 6,
'pattern_labels': ['push', 'pull', 'lower', 'push', 'pull', 'lower'],
'rest_day_positions': [6],
},
]
for d in defaults:
pattern_ids = []
for label in d['pattern_labels']:
# Map the label to a split_type -> MuscleGroupSplit
mgs_id = split_type_to_mgs.get(label)
if mgs_id is None:
for st, sid in split_type_to_mgs.items():
if get_broad_split_category(st) == label:
mgs_id = sid
break
pattern_ids.append(mgs_id)
WeeklySplitPattern.objects.create(
days_per_week=d['days_per_week'],
pattern=pattern_ids,
pattern_labels=d['pattern_labels'],
frequency=0,
rest_day_positions=d['rest_day_positions'],
)
print(f' Created {len(defaults)} default WeeklySplitPattern records.')
# ------------------------------------------------------------------
# Step 5: Extract workout structure rules -> WorkoutStructureRule
# ------------------------------------------------------------------
def _step5_extract_workout_structure_rules(self) -> None:
print('\n[Step 5] Extracting workout structure rules ...')
if not self._workout_data:
print(' No workout data to analyze.')
self._create_default_structure_rules()
return
# Collect stats per section_type per workout_type.
# We classify the first superset as warm_up, the last as cool_down,
# and everything in between as working. For workouts with <= 2
# supersets we treat everything as working.
# section key: (inferred_workout_type, section_type)
section_stats: dict[tuple[str, str], dict] = defaultdict(lambda: {
'rounds': [],
'exercises_per_superset': [],
'reps': [],
'durations': [],
'patterns': [],
})
for wid, wdata in self._workout_data.items():
details = wdata['superset_details']
if not details:
continue
# Infer a workout type from movement patterns
inferred_type = self._infer_workout_type(wdata)
num_ss = len(details)
for idx, ss in enumerate(details):
if num_ss >= 4:
if idx == 0:
section = 'warm_up'
elif idx == num_ss - 1:
section = 'cool_down'
else:
section = 'working'
elif num_ss == 3:
if idx == 0:
section = 'warm_up'
elif idx == num_ss - 1:
section = 'cool_down'
else:
section = 'working'
else:
section = 'working'
key = (inferred_type, section)
stats = section_stats[key]
stats['rounds'].append(ss['rounds'])
stats['exercises_per_superset'].append(ss['exercise_count'])
stats['reps'].extend(ss['reps_list'])
stats['durations'].extend(ss['duration_list'])
stats['patterns'].extend(ss['patterns'])
if not section_stats:
self._create_default_structure_rules()
return
all_goals = ['strength', 'hypertrophy', 'endurance', 'weight_loss', 'general_fitness']
created = 0
for (wt_name, section_type), stats in section_stats.items():
wt_obj = self._workout_type_map.get(wt_name)
# Compute baseline statistics from historical data
rounds_arr = np.array(stats['rounds']) if stats['rounds'] else np.array([3])
eps_arr = np.array(stats['exercises_per_superset']) if stats['exercises_per_superset'] else np.array([3])
reps_arr = np.array(stats['reps']) if stats['reps'] else np.array([10])
dur_arr = np.array(stats['durations']) if stats['durations'] else np.array([30])
typical_rounds = int(np.median(rounds_arr))
typical_eps = int(np.median(eps_arr))
rep_min = int(np.percentile(reps_arr, 25)) if len(reps_arr) > 1 else int(reps_arr[0])
rep_max = int(np.percentile(reps_arr, 75)) if len(reps_arr) > 1 else int(reps_arr[0])
dur_min = int(np.percentile(dur_arr, 25)) if len(dur_arr) > 1 else int(dur_arr[0])
dur_max = int(np.percentile(dur_arr, 75)) if len(dur_arr) > 1 else int(dur_arr[0])
# Sanity bounds to prevent extreme values from data bias
typical_rounds = max(1, min(8, typical_rounds))
rep_min = max(1, min(50, rep_min))
rep_max = max(rep_min, min(50, rep_max))
dur_min = max(5, min(180, dur_min))
dur_max = max(dur_min, min(180, dur_max))
# Ensure min <= max
if rep_min > rep_max:
rep_min, rep_max = rep_max, rep_min
if dur_min > dur_max:
dur_min, dur_max = dur_max, dur_min
# Top movement patterns for this section
pattern_counter = Counter(stats['patterns'])
top_patterns = [p for p, _ in pattern_counter.most_common(8)]
# Baseline params used for goal-specific adjustments
base_params = {
'rep_min': max(1, rep_min),
'rep_max': max(rep_min, rep_max),
'rounds': max(1, typical_rounds),
'dur_min': max(5, dur_min),
'dur_max': max(dur_min, dur_max),
}
# Create one rule per goal type with adjusted parameters
for goal in all_goals:
adjusted = self._apply_goal_adjustments(base_params, goal)
WorkoutStructureRule.objects.create(
workout_type=wt_obj,
section_type=section_type,
movement_patterns=top_patterns,
typical_rounds=adjusted['rounds'],
typical_exercises_per_superset=max(1, typical_eps),
typical_rep_range_min=adjusted['rep_min'],
typical_rep_range_max=adjusted['rep_max'],
typical_duration_range_min=adjusted['dur_min'],
typical_duration_range_max=adjusted['dur_max'],
goal_type=goal,
)
created += 1
print(f' Created {created} WorkoutStructureRule records '
f'({created // len(all_goals)} base x {len(all_goals)} goals).')
# Print summary
for (wt_name, section_type), stats in section_stats.items():
n = len(stats['rounds'])
print(f' {wt_name} / {section_type}: {n} superset samples -> {len(all_goals)} goal variants')
def _infer_workout_type(self, wdata: dict) -> str:
"""
Infer the workout type name from a workout's movement patterns and structure.
"""
patterns = wdata.get('patterns', [])
if not patterns:
return 'functional_strength_training'
pattern_counter = Counter(patterns)
total = sum(pattern_counter.values())
# Count categories
yoga_count = sum(v for k, v in pattern_counter.items() if 'yoga' in k)
mobility_count = sum(v for k, v in pattern_counter.items() if 'mobility' in k or 'flexibility' in k)
massage_count = sum(v for k, v in pattern_counter.items() if 'massage' in k)
core_count = sum(v for k, v in pattern_counter.items() if 'core' in k)
cardio_count = sum(v for k, v in pattern_counter.items() if 'cardio' in k or 'locomotion' in k)
plyometric_count = sum(v for k, v in pattern_counter.items() if 'plyometric' in k)
machine_count = sum(v for k, v in pattern_counter.items() if 'machine' in k)
combat_count = sum(v for k, v in pattern_counter.items() if 'combat' in k)
upper_count = sum(v for k, v in pattern_counter.items() if 'upper' in k)
lower_count = sum(v for k, v in pattern_counter.items() if 'lower' in k)
flexibility_total = yoga_count + mobility_count + massage_count
# Flexibility / Yoga
if flexibility_total > total * 0.5:
return 'flexibility'
# Core-focused
if core_count > total * 0.5:
return 'core_training'
# Cardio / locomotion
if cardio_count > total * 0.4:
return 'cardio'
# HIIT: high plyometric and mixed patterns
if plyometric_count + cardio_count + combat_count > total * 0.3:
return 'high_intensity_interval_training'
# Cross training: good mix of everything
unique_categories = sum(1 for x in [
upper_count, lower_count, core_count, plyometric_count, cardio_count
] if x > 0)
if unique_categories >= 4:
return 'cross_training'
# Machine-heavy -> traditional strength
if machine_count > total * 0.3:
return 'traditional_strength_training'
# Hypertrophy: heavy on upper/lower push/pull, not much cardio/plyo
if (upper_count + lower_count > total * 0.7 and
cardio_count + plyometric_count < total * 0.15 and
machine_count + upper_count > total * 0.4):
return 'hypertrophy'
# Default: functional strength
return 'functional_strength_training'
def _infer_goal_type(self, workout_type_name: str) -> str:
"""Map a workout type name to a goal type (used as the 'baseline' goal)."""
mapping = {
'functional_strength_training': 'general_fitness',
'traditional_strength_training': 'strength',
'high_intensity_interval_training': 'weight_loss',
'cross_training': 'general_fitness',
'core_training': 'general_fitness',
'flexibility': 'general_fitness',
'cardio': 'endurance',
'hypertrophy': 'hypertrophy',
}
return mapping.get(workout_type_name, 'general_fitness')
# Goal-specific multipliers applied to baseline stats to create variants.
# rep_min/max are multiplied, rounds/dur are added, rest is added (seconds).
GOAL_ADJUSTMENTS = {
'strength': {
'rep_min_mult': 0.6, 'rep_max_mult': 0.7,
'rounds_adj': 1, 'dur_min_adj': 0, 'dur_max_adj': 0,
},
'hypertrophy': {
'rep_min_mult': 0.9, 'rep_max_mult': 1.1,
'rounds_adj': 0, 'dur_min_adj': 0, 'dur_max_adj': 0,
},
'endurance': {
'rep_min_mult': 1.3, 'rep_max_mult': 1.5,
'rounds_adj': -1, 'dur_min_adj': 10, 'dur_max_adj': 15,
},
'weight_loss': {
'rep_min_mult': 1.2, 'rep_max_mult': 1.3,
'rounds_adj': 0, 'dur_min_adj': 5, 'dur_max_adj': 10,
},
'general_fitness': {
'rep_min_mult': 1.0, 'rep_max_mult': 1.0,
'rounds_adj': 0, 'dur_min_adj': 0, 'dur_max_adj': 0,
},
}
@classmethod
def _apply_goal_adjustments(cls, base_params: dict, goal: str) -> dict:
"""
Apply goal-specific adjustments to a baseline set of structure rule params.
Returns a new dict with adjusted values.
"""
adj = cls.GOAL_ADJUSTMENTS.get(goal, cls.GOAL_ADJUSTMENTS['general_fitness'])
rep_min = max(1, int(base_params['rep_min'] * adj['rep_min_mult']))
rep_max = max(rep_min, int(base_params['rep_max'] * adj['rep_max_mult']))
rounds = max(1, base_params['rounds'] + adj['rounds_adj'])
dur_min = max(5, base_params['dur_min'] + adj['dur_min_adj'])
dur_max = max(dur_min, base_params['dur_max'] + adj['dur_max_adj'])
return {
'rep_min': rep_min,
'rep_max': rep_max,
'rounds': rounds,
'dur_min': dur_min,
'dur_max': dur_max,
}
def _create_default_structure_rules(self) -> None:
"""Seed sensible default WorkoutStructureRule records with goal variants."""
print(' Creating default workout structure rules ...')
all_goals = ['strength', 'hypertrophy', 'endurance', 'weight_loss', 'general_fitness']
# For each workout type, create warm_up, working, cool_down sections
section_defaults = {
'warm_up': {
'typical_exercises_per_superset': 3,
'movement_patterns': [
'mobility', 'mobility - dynamic', 'core', 'core - anti-extension',
],
'base_params': {
'rep_min': 8, 'rep_max': 12,
'rounds': 2,
'dur_min': 20, 'dur_max': 30,
},
},
'working': {
'typical_exercises_per_superset': 3,
'movement_patterns': [
'upper push', 'upper pull', 'lower push', 'lower pull', 'core',
],
'base_params': {
'rep_min': 8, 'rep_max': 12,
'rounds': 3,
'dur_min': 30, 'dur_max': 45,
},
},
'cool_down': {
'typical_exercises_per_superset': 4,
'movement_patterns': [
'yoga', 'mobility', 'mobility - static', 'massage',
],
'base_params': {
'rep_min': 5, 'rep_max': 10,
'rounds': 1,
'dur_min': 30, 'dur_max': 60,
},
},
}
created = 0
for wt_name, wt_obj in self._workout_type_map.items():
for section_type, defaults in section_defaults.items():
base = defaults['base_params']
for goal in all_goals:
adjusted = self._apply_goal_adjustments(base, goal)
WorkoutStructureRule.objects.create(
workout_type=wt_obj,
section_type=section_type,
goal_type=goal,
movement_patterns=defaults['movement_patterns'],
typical_exercises_per_superset=defaults['typical_exercises_per_superset'],
typical_rounds=adjusted['rounds'],
typical_rep_range_min=adjusted['rep_min'],
typical_rep_range_max=adjusted['rep_max'],
typical_duration_range_min=adjusted['dur_min'],
typical_duration_range_max=adjusted['dur_max'],
)
created += 1
print(f' Created {created} default WorkoutStructureRule records '
f'({len(self._workout_type_map)} types x 3 sections x {len(all_goals)} goals).')
# ------------------------------------------------------------------
# Step 6: Extract movement pattern ordering -> MovementPatternOrder
# ------------------------------------------------------------------
def _step6_extract_movement_pattern_ordering(self) -> None:
print('\n[Step 6] Extracting movement pattern ordering ...')
if not self._workout_data:
print(' No workout data to analyze.')
self._create_default_movement_pattern_orders()
return
# For each workout, classify each superset position as early/middle/late
# and record which movement patterns appear there.
# position_key: (pattern, position, section_type)
pattern_position_counter: Counter = Counter()
for wid, wdata in self._workout_data.items():
details = wdata['superset_details']
num_ss = len(details)
if num_ss == 0:
continue
for idx, ss in enumerate(details):
# Determine position
if num_ss == 1:
position = 'middle'
elif num_ss == 2:
position = 'early' if idx == 0 else 'late'
else:
relative_pos = idx / (num_ss - 1)
if relative_pos <= 0.33:
position = 'early'
elif relative_pos >= 0.67:
position = 'late'
else:
position = 'middle'
# Determine section type
if num_ss >= 4:
if idx == 0:
section = 'warm_up'
elif idx == num_ss - 1:
section = 'cool_down'
else:
section = 'working'
elif num_ss == 3:
if idx == 0:
section = 'warm_up'
elif idx == num_ss - 1:
section = 'cool_down'
else:
section = 'working'
else:
section = 'working'
for pattern in ss['patterns']:
if pattern:
pattern_position_counter[(pattern, position, section)] += 1
if not pattern_position_counter:
self._create_default_movement_pattern_orders()
return
created = 0
for (pattern, position, section), freq in pattern_position_counter.items():
if freq < 1:
continue
MovementPatternOrder.objects.create(
movement_pattern=pattern,
position=position,
frequency=freq,
section_type=section,
)
created += 1
print(f' Created {created} MovementPatternOrder records.')
# Print top patterns per position
for pos in ['early', 'middle', 'late']:
pos_items = []
for (patt, position, section), f in pattern_position_counter.items():
if position == pos:
pos_items.append((patt, f))
pos_counter = Counter()
for patt, f in pos_items:
pos_counter[patt] += f
top_3 = pos_counter.most_common(3)
top_str = ', '.join(f'{p} ({c})' for p, c in top_3)
print(f' {pos}: {top_str}')
def _create_default_movement_pattern_orders(self) -> None:
"""Seed sensible default MovementPatternOrder records."""
print(' Creating default movement pattern orders ...')
defaults = [
# Warm-up (early) patterns
('mobility', 'early', 'warm_up', 10),
('mobility - dynamic', 'early', 'warm_up', 10),
('core - anti-extension', 'early', 'warm_up', 8),
('core', 'early', 'warm_up', 7),
('balance', 'early', 'warm_up', 5),
# Working (early) patterns -- compound movements first
('lower push - squat', 'early', 'working', 15),
('lower pull - hip hinge', 'early', 'working', 12),
('upper push - vertical', 'early', 'working', 10),
('upper pull - vertical', 'early', 'working', 8),
# Working (middle) patterns
('upper push - horizontal', 'middle', 'working', 12),
('upper pull - horizonal', 'middle', 'working', 12),
('lower push - lunge', 'middle', 'working', 10),
('lower push', 'middle', 'working', 8),
('lower pull', 'middle', 'working', 8),
('upper push', 'middle', 'working', 8),
('upper pull', 'middle', 'working', 8),
('core - anti-rotation', 'middle', 'working', 6),
('core - anti-lateral flexion', 'middle', 'working', 6),
# Working (late) patterns -- isolation and accessories
('arms', 'late', 'working', 10),
('core - carry', 'late', 'working', 8),
('core - rotational', 'late', 'working', 7),
('plyometric', 'early', 'working', 7),
# Cool-down (late) patterns
('yoga', 'late', 'cool_down', 15),
('mobility - static', 'late', 'cool_down', 12),
('massage', 'late', 'cool_down', 10),
]
for pattern, position, section, freq in defaults:
MovementPatternOrder.objects.create(
movement_pattern=pattern,
position=position,
frequency=freq,
section_type=section,
)
print(f' Created {len(defaults)} default MovementPatternOrder records.')
# ------------------------------------------------------------------
# Step 7: Ensure full WorkoutStructureRule coverage
# ------------------------------------------------------------------
def _step7_ensure_full_rule_coverage(self) -> None:
"""
Ensure every WorkoutType × section × goal combination has a
WorkoutStructureRule. Creates sensible defaults for any gaps
left by the data-driven extraction.
"""
print('\n[Step 7] Ensuring full rule coverage ...')
all_goals = ['strength', 'hypertrophy', 'endurance', 'weight_loss', 'general_fitness']
all_sections = ['warm_up', 'working', 'cool_down']
workout_types = list(WorkoutType.objects.all())
# Default values per section type (used when no rule exists)
section_defaults = {
'warm_up': {
'typical_rounds': 1,
'typical_exercises_per_superset': 5,
'typical_rep_range_min': 8,
'typical_rep_range_max': 12,
'typical_duration_range_min': 20,
'typical_duration_range_max': 35,
'movement_patterns': ['mobility', 'mobility - dynamic', 'core'],
},
'working': {
'typical_rounds': 3,
'typical_exercises_per_superset': 3,
'typical_rep_range_min': 8,
'typical_rep_range_max': 12,
'typical_duration_range_min': 30,
'typical_duration_range_max': 45,
'movement_patterns': ['upper push', 'upper pull', 'lower push', 'lower pull', 'core'],
},
'cool_down': {
'typical_rounds': 1,
'typical_exercises_per_superset': 4,
'typical_rep_range_min': 8,
'typical_rep_range_max': 10,
'typical_duration_range_min': 25,
'typical_duration_range_max': 40,
'movement_patterns': ['yoga', 'mobility', 'mobility - static'],
},
}
# Prefetch all existing rules into an in-memory set to avoid
# N exists() queries (one per workout_type x section x goal combination).
existing_rules = set(
WorkoutStructureRule.objects.values_list(
'workout_type_id', 'section_type', 'goal_type'
)
)
created = 0
for wt in workout_types:
for section in all_sections:
for goal in all_goals:
if (wt.pk, section, goal) not in existing_rules:
defaults = dict(section_defaults[section])
# Apply goal adjustments
base_params = {
'rep_min': defaults['typical_rep_range_min'],
'rep_max': defaults['typical_rep_range_max'],
'rounds': defaults['typical_rounds'],
'dur_min': defaults['typical_duration_range_min'],
'dur_max': defaults['typical_duration_range_max'],
}
adjusted = self._apply_goal_adjustments(base_params, goal)
WorkoutStructureRule.objects.create(
workout_type=wt,
section_type=section,
goal_type=goal,
movement_patterns=defaults['movement_patterns'],
typical_rounds=adjusted['rounds'],
typical_exercises_per_superset=defaults['typical_exercises_per_superset'],
typical_rep_range_min=adjusted['rep_min'],
typical_rep_range_max=adjusted['rep_max'],
typical_duration_range_min=adjusted['dur_min'],
typical_duration_range_max=adjusted['dur_max'],
)
created += 1
expected = len(workout_types) * len(all_sections) * len(all_goals)
actual = WorkoutStructureRule.objects.count()
print(f' Gap-filled {created} missing rules.')
print(f' Total rules: {actual} (expected {expected} for {len(workout_types)} types)')