WerkoutAPI/generator/services/muscle_normalizer.py

"""
Muscle name normalization and split classification.

The DB contains ~38 muscle entries with casing duplicates (e.g. "Quads" vs "quads",
"Abs" vs "abs", "Core" vs "core"). This module provides a single source of truth
for mapping raw muscle names to canonical lowercase names, organizing them into
split categories, and classifying a set of muscles into a split type.
"""

from __future__ import annotations

from typing import Set, List, Optional

# ---------------------------------------------------------------------------
# Raw name -> canonical name
# Keys are lowercased for lookup; values are the canonical form we store.
# ---------------------------------------------------------------------------
MUSCLE_NORMALIZATION_MAP: dict[str, str] = {
    # --- quads ---
    'quads': 'quads',
    'quadriceps': 'quads',
    'quad': 'quads',

    # --- hamstrings ---
    'hamstrings': 'hamstrings',
    'hamstring': 'hamstrings',
    'hams': 'hamstrings',

    # --- glutes ---
    'glutes': 'glutes',
    'glute': 'glutes',
    'gluteus': 'glutes',
    'gluteus maximus': 'glutes',

    # --- calves ---
    'calves': 'calves',
    'calf': 'calves',
    'gastrocnemius': 'calves',
    'soleus': 'calves',

    # --- chest ---
    'chest': 'chest',
    'pecs': 'chest',
    'pectorals': 'chest',

    # --- deltoids / shoulders ---
    'deltoids': 'deltoids',
    'deltoid': 'deltoids',
    'shoulders': 'deltoids',
    'shoulder': 'deltoids',
    'front deltoids': 'front deltoids',
    'front deltoid': 'front deltoids',
    'front delts': 'front deltoids',
    'rear deltoids': 'rear deltoids',
    'rear deltoid': 'rear deltoids',
    'rear delts': 'rear deltoids',
    'side deltoids': 'side deltoids',
    'side deltoid': 'side deltoids',
    'side delts': 'side deltoids',
    'lateral deltoids': 'side deltoids',
    'medial deltoids': 'side deltoids',

    # --- triceps ---
    'triceps': 'triceps',
    'tricep': 'triceps',

    # --- biceps ---
    'biceps': 'biceps',
    'bicep': 'biceps',

    # --- upper back ---
    'upper back': 'upper back',
    'rhomboids': 'upper back',

    # --- lats ---
    'lats': 'lats',
    'latissimus dorsi': 'lats',
    'lat': 'lats',

    # --- middle back ---
    'middle back': 'middle back',
    'mid back': 'middle back',

    # --- lower back ---
    'lower back': 'lower back',
    'erector spinae': 'lower back',
    'spinal erectors': 'lower back',

    # --- traps ---
    'traps': 'traps',
    'trapezius': 'traps',

    # --- abs ---
    'abs': 'abs',
    'abdominals': 'abs',
    'rectus abdominis': 'abs',

    # --- obliques ---
    'obliques': 'obliques',
    'oblique': 'obliques',
    'external obliques': 'obliques',
    'internal obliques': 'obliques',

    # --- core (general) ---
    'core': 'core',

    # --- intercostals ---
    'intercostals': 'intercostals',

    # --- hip flexor ---
    'hip flexor': 'hip flexors',
    'hip flexors': 'hip flexors',
    'iliopsoas': 'hip flexors',
    'psoas': 'hip flexors',

    # --- hip abductors ---
    'hip abductors': 'hip abductors',
    'hip abductor': 'hip abductors',

    # --- hip adductors ---
    'hip adductors': 'hip adductors',
    'hip adductor': 'hip adductors',
    'adductors': 'hip adductors',
    'groin': 'hip adductors',

    # --- rotator cuff ---
    'rotator cuff': 'rotator cuff',

    # --- forearms ---
    'forearms': 'forearms',
    'forearm': 'forearms',
    'wrist flexors': 'forearms',
    'wrist extensors': 'forearms',

    # --- arms (general) ---
    'arms': 'arms',

    # --- feet ---
    'feet': 'feet',
    'foot': 'feet',

    # --- it band ---
    'it band': 'it band',
    'iliotibial band': 'it band',
}

# ---------------------------------------------------------------------------
# Muscles grouped by functional split category.
# Used to classify a workout's primary split type.
# ---------------------------------------------------------------------------
MUSCLE_GROUP_CATEGORIES: dict[str, list[str]] = {
    'upper_push': [
        'chest', 'front deltoids', 'deltoids', 'triceps', 'side deltoids',
    ],
    'upper_pull': [
        'upper back', 'lats', 'biceps', 'rear deltoids', 'middle back',
        'traps', 'forearms', 'rotator cuff',
    ],
    'lower_push': [
        'quads', 'calves', 'hip abductors', 'hip adductors',
    ],
    'lower_pull': [
        'hamstrings', 'glutes', 'lower back', 'hip flexors',
    ],
    'core': [
        'abs', 'obliques', 'core', 'intercostals', 'hip flexors',
    ],
}

# Reverse lookup: canonical muscle -> list of categories it belongs to
_MUSCLE_TO_CATEGORIES: dict[str, list[str]] = {}
for _cat, _muscles in MUSCLE_GROUP_CATEGORIES.items():
    for _m in _muscles:
        _MUSCLE_TO_CATEGORIES.setdefault(_m, []).append(_cat)

# Broader split groupings for classifying entire workouts
SPLIT_CATEGORY_MAP: dict[str, str] = {
    'upper_push': 'upper',
    'upper_pull': 'upper',
    'lower_push': 'lower',
    'lower_pull': 'lower',
    'core': 'core',
}


def normalize_muscle_name(name: Optional[str]) -> Optional[str]:
    """
    Map a raw muscle name string to its canonical lowercase form.

    Returns None if the name is empty, None, or unrecognized.
    """
    if not name:
        return None
    key = name.strip().lower()
    if not key:
        return None
    canonical = MUSCLE_NORMALIZATION_MAP.get(key)
    if canonical:
        return canonical
    # Fallback: return the lowered/stripped version so we don't silently
    # drop unknown muscles -- the analyzer can decide what to do.
    return key


_muscle_cache: dict[int, Set[str]] = {}


def get_muscles_for_exercise(exercise) -> Set[str]:
    """
    Return the set of normalized muscle names for a given Exercise instance.

    Uses the ExerciseMuscle join table (exercise.exercise_muscle_exercise).
    Falls back to the comma-separated Exercise.muscle_groups field if no
    ExerciseMuscle rows exist.

    Results are cached per exercise ID to avoid repeated DB queries.
    """
    if exercise.id in _muscle_cache:
        return _muscle_cache[exercise.id]

    from muscle.models import ExerciseMuscle

    muscles: Set[str] = set()

    # Primary source: ExerciseMuscle join table
    em_qs = ExerciseMuscle.objects.filter(exercise=exercise).select_related('muscle')
    for em in em_qs:
        if em.muscle and em.muscle.name:
            normalized = normalize_muscle_name(em.muscle.name)
            if normalized:
                muscles.add(normalized)

    # Fallback: comma-separated muscle_groups CharField on Exercise
    if not muscles and exercise.muscle_groups:
        for raw in exercise.muscle_groups.split(','):
            normalized = normalize_muscle_name(raw)
            if normalized:
                muscles.add(normalized)

    _muscle_cache[exercise.id] = muscles
    return muscles


def clear_muscle_cache() -> None:
    """Clear the muscle cache (useful for testing or re-analysis)."""
    _muscle_cache.clear()


def get_movement_patterns_for_exercise(exercise) -> List[str]:
    """
    Parse the comma-separated movement_patterns CharField on Exercise and
    return a list of normalized (lowered, stripped) pattern strings.
    """
    if not exercise.movement_patterns:
        return []
    patterns = []
    for raw in exercise.movement_patterns.split(','):
        cleaned = raw.strip().lower()
        if cleaned:
            patterns.append(cleaned)
    return patterns


def classify_split_type(muscle_names: set[str] | list[str]) -> str:
    """
    Given a set/list of canonical muscle names from a workout, return the
    best-fit split_type string.

    Returns one of: 'push', 'pull', 'legs', 'upper', 'lower', 'full_body',
    'core'.

    Note: This function intentionally does not return 'cardio' because split
    classification is muscle-based and cardio is not a muscle group. Cardio
    workout detection happens via ``WorkoutAnalyzer._infer_workout_type()``
    which examines movement patterns (cardio/locomotion) rather than muscles.
    """
    if not muscle_names:
        return 'full_body'

    muscle_set = set(muscle_names) if not isinstance(muscle_names, set) else muscle_names

    # Count how many muscles fall into each category
    category_scores: dict[str, int] = {
        'upper_push': 0,
        'upper_pull': 0,
        'lower_push': 0,
        'lower_pull': 0,
        'core': 0,
    }
    for m in muscle_set:
        cats = _MUSCLE_TO_CATEGORIES.get(m, [])
        for cat in cats:
            category_scores[cat] += 1

    total = sum(category_scores.values())
    if total == 0:
        return 'full_body'

    upper_push = category_scores['upper_push']
    upper_pull = category_scores['upper_pull']
    lower_push = category_scores['lower_push']
    lower_pull = category_scores['lower_pull']
    core_score = category_scores['core']

    upper_total = upper_push + upper_pull
    lower_total = lower_push + lower_pull

    # -- Core dominant --
    if core_score > 0 and core_score >= total * 0.6:
        return 'core'

    # -- Full body: both upper and lower have meaningful representation --
    if upper_total > 0 and lower_total > 0:
        upper_ratio = upper_total / total
        lower_ratio = lower_total / total
        # If neither upper nor lower dominates heavily, it's full body
        if 0.2 <= upper_ratio <= 0.8 and 0.2 <= lower_ratio <= 0.8:
            return 'full_body'

    # -- Upper dominant --
    if upper_total > lower_total and upper_total >= total * 0.5:
        if upper_push > 0 and upper_pull == 0:
            return 'push'
        if upper_pull > 0 and upper_push == 0:
            return 'pull'
        if upper_push > upper_pull * 2:
            return 'push'
        if upper_pull > upper_push * 2:
            return 'pull'
        return 'upper'

    # -- Lower dominant --
    if lower_total > upper_total and lower_total >= total * 0.5:
        if lower_push > 0 and lower_pull == 0:
            return 'legs'
        if lower_pull > 0 and lower_push == 0:
            return 'legs'
        return 'lower'

    # -- Push dominant (upper push + lower push) --
    push_total = upper_push + lower_push
    pull_total = upper_pull + lower_pull
    if push_total > pull_total * 2:
        return 'push'
    if pull_total > push_total * 2:
        return 'pull'

    return 'full_body'


def get_broad_split_category(split_type: str) -> str:
    """
    Simplify a split type for weekly-pattern analysis.
    Returns one of: 'upper', 'lower', 'push', 'pull', 'core', 'full_body', 'cardio'.
    """
    mapping = {
        'push': 'push',
        'pull': 'pull',
        'legs': 'lower',
        'upper': 'upper',
        'lower': 'lower',
        'full_body': 'full_body',
        'core': 'core',
        'cardio': 'cardio',
    }
    return mapping.get(split_type, 'full_body')