""" Comprehensive audit of exercise data quality. Checks for: 1. Null estimated_rep_duration on rep-based exercises 2. is_weight false positives (bodyweight exercises marked as weighted) 3. Exercises with no muscle assignments 4. "horizonal" typo in movement_patterns 5. Null metadata fields summary (difficulty_level, exercise_tier, etc.) Exits with code 1 if any CRITICAL issues are found. Usage: python manage.py audit_exercise_data """ import re import sys from django.core.management.base import BaseCommand from exercise.models import Exercise from muscle.models import ExerciseMuscle # Same bodyweight patterns as fix_exercise_flags for consistency BODYWEIGHT_PATTERNS = [ r'\bwall sit\b', r'\bplank\b', r'\bmountain climber\b', r'\bburpee\b', r'\bpush ?up\b', r'\bpushup\b', r'\bpull ?up\b', r'\bpullup\b', r'\bchin ?up\b', r'\bchinup\b', r'\bdips?\b', r'\bpike\b', r'\bhandstand\b', r'\bl sit\b', r'\bv sit\b', r'\bhollow\b', r'\bsuperman\b', r'\bbird dog\b', r'\bdead bug\b', r'\bbear crawl\b', r'\bcrab walk\b', r'\binchworm\b', r'\bjumping jack\b', r'\bhigh knee\b', r'\bbutt kick\b', r'\bskater\b', r'\blunge jump\b', r'\bjump lunge\b', r'\bsquat jump\b', r'\bjump squat\b', r'\bbox jump\b', r'\btuck jump\b', r'\bbroad jump\b', r'\bsprinter\b', r'\bagility ladder\b', r'\bbody ?weight\b', r'\bbodyweight\b', r'\bcalisthenics?\b', r'\bflutter kick\b', r'\bleg raise\b', r'\bsit ?up\b', r'\bcrunch\b', r'\bstretch\b', r'\byoga\b', r'\bfoam roll\b', r'\bjump rope\b', r'\bspider crawl\b', ] class Command(BaseCommand): help = 'Audit exercise data quality -- exits 1 if critical issues found' def handle(self, *args, **options): issues = [] # 1. Null estimated_rep_duration (excluding duration-only exercises) null_duration = Exercise.objects.filter( estimated_rep_duration__isnull=True, is_reps=True, ).exclude( is_duration=True, is_reps=False ).count() if null_duration > 0: issues.append( f"CRITICAL: {null_duration} rep-based exercises have null estimated_rep_duration" ) else: self.stdout.write(self.style.SUCCESS( 'OK: All rep-based exercises have estimated_rep_duration' )) # 2. is_weight false positives -- bodyweight exercises marked as weighted weight_false_positives = 0 weighted_exercises = Exercise.objects.filter(is_weight=True) for ex in weighted_exercises: if not ex.name: continue name_lower = ex.name.lower() if any(re.search(pat, name_lower) for pat in BODYWEIGHT_PATTERNS): weight_false_positives += 1 if weight_false_positives > 0: issues.append( f"WARNING: {weight_false_positives} bodyweight exercises still have is_weight=True" ) else: self.stdout.write(self.style.SUCCESS( 'OK: No bodyweight exercises incorrectly marked as weighted' )) # 3. Exercises with no muscles exercises_with_muscles = set( ExerciseMuscle.objects.values_list('exercise_id', flat=True).distinct() ) exercises_no_muscles = Exercise.objects.exclude( pk__in=exercises_with_muscles ).count() if exercises_no_muscles > 0: issues.append( f"CRITICAL: {exercises_no_muscles} exercises have no muscle assignments" ) else: self.stdout.write(self.style.SUCCESS( 'OK: All exercises have muscle assignments' )) # 4. "horizonal" typo typo_count = Exercise.objects.filter( movement_patterns__icontains='horizonal' ).count() if typo_count > 0: issues.append( f'WARNING: {typo_count} exercises have "horizonal" typo in movement_patterns' ) else: self.stdout.write(self.style.SUCCESS( 'OK: No "horizonal" typos in movement_patterns' )) # 5. Null metadata fields summary total = Exercise.objects.count() if total > 0: # Base field always present metadata_fields = { 'movement_patterns': Exercise.objects.filter( movement_patterns__isnull=True ).count() + Exercise.objects.filter(movement_patterns='').count(), } # Optional fields that may not exist in all environments optional_fields = ['difficulty_level', 'exercise_tier'] for field_name in optional_fields: if hasattr(Exercise, field_name): try: null_count = Exercise.objects.filter( **{f'{field_name}__isnull': True} ).count() + Exercise.objects.filter( **{field_name: ''} ).count() metadata_fields[field_name] = null_count except Exception: pass # Field doesn't exist in DB schema yet self.stdout.write(f'\nMetadata coverage ({total} total exercises):') for field, null_count in metadata_fields.items(): filled = total - null_count pct = (filled / total) * 100 self.stdout.write(f' {field}: {filled}/{total} ({pct:.1f}%)') if null_count > total * 0.5: # More than 50% missing issues.append( f"WARNING: {field} is missing on {null_count}/{total} exercises ({100-pct:.1f}%)" ) # Report self.stdout.write('') # blank line if not issues: self.stdout.write(self.style.SUCCESS('All exercise data checks passed!')) else: for issue in issues: if issue.startswith('CRITICAL'): self.stdout.write(self.style.ERROR(issue)) else: self.stdout.write(self.style.WARNING(issue)) critical = [i for i in issues if i.startswith('CRITICAL')] if critical: self.stdout.write(self.style.ERROR( f'\n{len(critical)} critical issue(s) found. Run fix commands to resolve.' )) sys.exit(1) else: self.stdout.write(self.style.WARNING( f'\n{len(issues)} non-critical warning(s) found.' ))