Exercise selector: add similarity dedup, side-pair integrity, and modality guardrails

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Trey t
2026-02-24 11:00:35 -06:00
parent 909c75d8ee
commit 63b57a83ab
2 changed files with 521 additions and 23 deletions

View File

@@ -1,5 +1,6 @@
import random
import logging
import re
from collections import Counter
from django.db.models import Q, Count
@@ -24,7 +25,8 @@ logger = logging.getLogger(__name__)
MOVEMENT_FAMILY_KEYWORDS = [
# Olympic — specific before general
('clean and jerk', 'clean_and_jerk'), ('hang clean', 'clean'),
('clean pull', 'clean'), ('power clean', 'clean'), ('clean', 'clean'),
('clean pull', 'clean'), ('high pull', 'clean'),
('power clean', 'clean'), ('clean', 'clean'),
('snatch', 'snatch'),
# Vertical pull
('chin-up', 'chin_up'), ('chin up', 'chin_up'),
@@ -75,6 +77,9 @@ for _group, _members in FAMILY_GROUPS.items():
for _member in _members:
_FAMILY_TO_GROUP[_member] = _group
_LEFT_SIDE_VALUES = {'left', 'left_arm', 'left_leg', 'left_side'}
_RIGHT_SIDE_VALUES = {'right', 'right_arm', 'right_leg', 'right_side'}
def extract_movement_families(exercise_name):
"""Extract movement family tags from an exercise name.
@@ -133,6 +138,14 @@ class ExerciseSelector:
WARMUP_EXCLUDED_PATTERNS = [
'upper push', 'upper pull', 'olympic', 'combat', 'arms',
]
# Similarity thresholds to prevent near-duplicate selections.
SIMILARITY_HARD_THRESHOLD = 0.80
SIMILARITY_SOFT_THRESHOLD = 0.65
# Recovery/stretch movements should not appear in working sets.
WORKING_EXCLUDED_PATTERNS = [
'mobility - static', 'static stretch', 'cool down', 'cooldown',
'yoga', 'breathing', 'massage',
]
def __init__(self, user_preference, recently_used_ids=None, hard_exclude_ids=None):
self.user_preference = user_preference
@@ -142,6 +155,9 @@ class ExerciseSelector:
self.hard_exclude_ids = hard_exclude_ids or set() # Phase 6: hard exclude recent exercises
self.used_movement_patterns = Counter() # Phase 11: track patterns for variety
self.used_movement_families = Counter() # Movement family dedup across workout
self.used_working_similarity_profiles = []
self.last_working_similarity_profiles = []
self._exercise_profile_cache = {}
self.warnings = [] # Phase 13: generation warnings
self.progression_boost_ids = set() # IDs of exercises that are progressions of recently done ones
@@ -155,6 +171,9 @@ class ExerciseSelector:
self.used_exercise_names = set()
self.used_movement_patterns = Counter()
self.used_movement_families = Counter()
self.used_working_similarity_profiles = []
self.last_working_similarity_profiles = []
self._exercise_profile_cache = {}
self.warnings = []
def select_exercises(
@@ -195,6 +214,14 @@ class ExerciseSelector:
is_duration_based=is_duration_based,
fitness_level=fitness_level,
)
# Working supersets should not contain stretch/recovery exercises.
excluded_q = Q(name__icontains='stretch')
for pat in self.WORKING_EXCLUDED_PATTERNS:
excluded_q |= Q(movement_patterns__icontains=pat)
qs = qs.exclude(excluded_q)
# Guard against low-quality rows causing misclassification/selection drift.
qs = qs.exclude(Q(movement_patterns__isnull=True) | Q(movement_patterns=''))
qs = qs.exclude(Q(muscle_groups__isnull=True) | Q(muscle_groups=''))
# For advanced/elite, boost compound movements
if fitness_level and fitness_level >= 3 and not movement_pattern_preference:
@@ -225,7 +252,13 @@ class ExerciseSelector:
preferred_qs = qs.filter(pk__in=combined_preferred_ids)
other_qs = qs.exclude(pk__in=combined_preferred_ids)
selected = self._weighted_pick(preferred_qs, other_qs, count, superset_position=superset_position)
selected = self._weighted_pick(
preferred_qs,
other_qs,
count,
superset_position=superset_position,
similarity_scope='working',
)
# Sort selected exercises by tier: primary first, then secondary, then accessory
TIER_ORDER = {'primary': 0, 'secondary': 1, 'accessory': 2, None: 2}
@@ -292,12 +325,20 @@ class ExerciseSelector:
is_duration_based=is_duration_based,
fitness_level=fitness_level,
)
fallback_qs = fallback_qs.exclude(excluded_q)
fallback_qs = fallback_qs.exclude(Q(movement_patterns__isnull=True) | Q(movement_patterns=''))
fallback_qs = fallback_qs.exclude(Q(muscle_groups__isnull=True) | Q(muscle_groups=''))
still_needed = count - len(selected)
already_ids = {e.pk for e in selected}
fallback_qs = fallback_qs.exclude(pk__in=already_ids)
extras = self._weighted_pick(fallback_qs, Exercise.objects.none(), still_needed)
mg_label = ', '.join(muscle_groups[:3]) if muscle_groups else 'target muscles'
extras = self._weighted_pick(
fallback_qs,
Exercise.objects.none(),
still_needed,
similarity_scope='working',
)
if extras:
mg_label = ', '.join(muscle_groups[:3]) if muscle_groups else 'target muscles'
self.warnings.append(
f'Used bodyweight fallback for {mg_label} '
f'({len(extras)} exercises) due to limited equipment matches.'
@@ -312,6 +353,13 @@ class ExerciseSelector:
# Handle side-specific pairing: if an exercise has a side value,
# look for the matching opposite-side exercise so they appear together.
selected = self._pair_sided_exercises(selected, qs)
selected = self._ensure_side_pair_integrity(
selected,
qs,
count=count,
similarity_scope='working',
superset_position=superset_position,
)
# Mark everything we just selected as used and track patterns
for ex in selected:
@@ -322,6 +370,7 @@ class ExerciseSelector:
for pat in [p.strip().lower() for p in patterns.split(',') if p.strip()]:
self.used_movement_patterns[pat] += 1
self._track_families(selected)
self._track_similarity_profiles(selected, scope='working')
return self._trim_preserving_pairs(selected, count)
@@ -333,6 +382,8 @@ class ExerciseSelector:
is_duration_based=True,
fitness_level=fitness_level,
)
# Avoid duplicate-looking left/right variants in recovery sections.
qs = qs.filter(Q(side__isnull=True) | Q(side=''))
# Prefer exercises whose movement_patterns overlap with warmup keywords
warmup_q = Q()
@@ -369,6 +420,7 @@ class ExerciseSelector:
is_duration_based=True,
fitness_level=fitness_level,
).exclude(pk__in={e.pk for e in selected})
wide_qs = wide_qs.filter(Q(side__isnull=True) | Q(side=''))
# Apply same warmup safety exclusions
wide_qs = wide_qs.exclude(is_weight=True)
wide_qs = wide_qs.exclude(is_compound=True)
@@ -387,6 +439,7 @@ class ExerciseSelector:
self.used_exercise_names.add((ex.name or '').lower().strip())
self._track_families(selected)
selected = self._ensure_side_pair_integrity(selected, qs, count=count)
return self._trim_preserving_pairs(selected, count)
def select_cooldown_exercises(self, target_muscles, count=4):
@@ -403,6 +456,8 @@ class ExerciseSelector:
is_duration_based=True,
fitness_level=fitness_level,
)
# Avoid duplicate-looking left/right variants in recovery sections.
qs = qs.filter(Q(side__isnull=True) | Q(side=''))
cooldown_q = Q()
for kw in self.COOLDOWN_PATTERNS:
@@ -434,6 +489,7 @@ class ExerciseSelector:
is_duration_based=True,
fitness_level=fitness_level,
).exclude(pk__in={e.pk for e in selected})
wide_qs = wide_qs.filter(Q(side__isnull=True) | Q(side=''))
# Apply same exclusions
wide_qs = wide_qs.exclude(exclude_q)
# R11: also apply weight filter on wide fallback
@@ -452,6 +508,7 @@ class ExerciseSelector:
self.used_exercise_names.add((ex.name or '').lower().strip())
self._track_families(selected)
selected = self._ensure_side_pair_integrity(selected, qs, count=count)
return self._trim_preserving_pairs(selected, count)
# ------------------------------------------------------------------
@@ -470,6 +527,14 @@ class ExerciseSelector:
for fam in extract_movement_families(ex.name):
self.used_movement_families[fam] += 1
def _track_similarity_profiles(self, exercises, scope='working'):
"""Record similarity profiles so later supersets can avoid near-duplicates."""
if scope != 'working':
return
profiles = [self._build_similarity_profile(ex) for ex in exercises]
self.last_working_similarity_profiles = profiles
self.used_working_similarity_profiles.extend(profiles)
def _get_filtered_queryset(self, muscle_groups=None, is_duration_based=None, fitness_level=None):
"""
Build a base Exercise queryset filtered by:
@@ -587,8 +652,8 @@ class ExerciseSelector:
if is_duration_based is True:
qs = qs.filter(is_duration=True)
elif is_duration_based is False:
# Prefer rep-based but don't hard-exclude; handled by caller
pass
# Rep-based supersets must use rep-capable exercises only.
qs = qs.filter(is_reps=True)
# ---- Fitness-level filtering ----
if fitness_level is not None and fitness_level <= 1:
@@ -664,6 +729,8 @@ class ExerciseSelector:
if is_duration_based is True:
qs = qs.filter(is_duration=True)
elif is_duration_based is False:
qs = qs.filter(is_reps=True)
# ---- Safety: Fitness-level filtering (same as _get_filtered_queryset) ----
if fitness_level is not None and fitness_level <= 1:
@@ -821,7 +888,14 @@ class ExerciseSelector:
return qs
def _weighted_pick(self, preferred_qs, other_qs, count, superset_position=None):
def _weighted_pick(
self,
preferred_qs,
other_qs,
count,
superset_position=None,
similarity_scope=None,
):
"""
Pick up to *count* exercises using weighted random selection.
@@ -898,6 +972,7 @@ class ExerciseSelector:
selected_family_groups = set() # group names used in this superset
selected_families = set() # exact families used in this superset
selected_family_counts = Counter() # exact family counts in this superset
selected_profiles = []
# Shuffle to break any ordering bias
random.shuffle(pool)
@@ -940,9 +1015,17 @@ class ExerciseSelector:
attempts += 1
continue
if similarity_scope == 'working':
candidate_profile = self._build_similarity_profile(candidate)
if self._is_similarity_blocked(candidate_profile, selected_profiles):
attempts += 1
continue
selected.append(candidate)
selected_ids.add(candidate.pk)
selected_names.add(candidate_name)
if similarity_scope == 'working':
selected_profiles.append(candidate_profile)
# Track family groups for intra-superset blocking
for fam in candidate_families:
selected_families.add(fam)
@@ -954,6 +1037,102 @@ class ExerciseSelector:
return selected
@staticmethod
def _tokenize_text(value):
"""Tokenize free text into normalized, low-noise tokens."""
if not value:
return set()
tokens = set(re.findall(r"[a-z0-9]+", value.lower()))
stop_words = {
'and', 'or', 'the', 'with', 'to', 'a', 'an', 'of',
'single', 'arm', 'double', 'alternating',
'barbell', 'dumbbell', 'kettlebell', 'machine', 'cable',
'bodyweight',
}
return {tok for tok in tokens if tok not in stop_words and len(tok) > 1}
@staticmethod
def _tokenize_csv(value):
"""Tokenize comma-separated categorical fields."""
if not value:
return set()
return {part.strip().lower() for part in value.split(',') if part and part.strip()}
def _build_similarity_profile(self, ex):
"""Create a cached token profile used by similarity scoring."""
cached = self._exercise_profile_cache.get(ex.pk)
if cached is not None:
return cached
profile = {
'id': ex.pk,
'movement': self._tokenize_csv(getattr(ex, 'movement_patterns', '') or ''),
'muscles': self._tokenize_csv(getattr(ex, 'muscle_groups', '') or ''),
'equipment': self._tokenize_csv(getattr(ex, 'equipment_required', '') or ''),
'name_tokens': self._tokenize_text(getattr(ex, 'name', '') or ''),
}
self._exercise_profile_cache[ex.pk] = profile
return profile
@staticmethod
def _jaccard_similarity(left, right):
"""Jaccard similarity between token sets."""
if not left and not right:
return 0.0
union = left | right
if not union:
return 0.0
return len(left & right) / len(union)
def _exercise_similarity_score(self, candidate_profile, existing_profile):
"""Weighted similarity score in [0,1]."""
movement = self._jaccard_similarity(
candidate_profile['movement'], existing_profile['movement']
)
muscles = self._jaccard_similarity(
candidate_profile['muscles'], existing_profile['muscles']
)
equipment = self._jaccard_similarity(
candidate_profile['equipment'], existing_profile['equipment']
)
name = self._jaccard_similarity(
candidate_profile['name_tokens'], existing_profile['name_tokens']
)
return (
(0.45 * movement)
+ (0.35 * muscles)
+ (0.10 * equipment)
+ (0.10 * name)
)
def _is_similarity_blocked(self, candidate_profile, selected_profiles):
"""Block near-duplicate exercises within the workout and adjacent sets."""
for existing_profile in self.used_working_similarity_profiles:
if (
self._exercise_similarity_score(candidate_profile, existing_profile)
>= self.SIMILARITY_HARD_THRESHOLD
):
return True
for existing_profile in selected_profiles:
if (
self._exercise_similarity_score(candidate_profile, existing_profile)
>= self.SIMILARITY_HARD_THRESHOLD
):
return True
for existing_profile in self.last_working_similarity_profiles:
if (
self._exercise_similarity_score(candidate_profile, existing_profile)
>= self.SIMILARITY_SOFT_THRESHOLD
):
return True
for existing_profile in selected_profiles:
if (
self._exercise_similarity_score(candidate_profile, existing_profile)
>= self.SIMILARITY_SOFT_THRESHOLD
):
return True
return False
def _pair_sided_exercises(self, selected, base_qs):
"""
For exercises with a ``side`` value (e.g. 'Left', 'Right'), try
@@ -965,20 +1144,13 @@ class ExerciseSelector:
paired = list(selected)
paired_ids = {e.pk for e in paired}
side_map = {
'left': 'right',
'right': 'left',
'Left': 'Right',
'Right': 'Left',
}
exercises_to_add = []
for ex in list(paired):
if ex.side and ex.side.strip():
side_lower = ex.side.strip().lower()
opposite = side_map.get(side_lower)
if not opposite:
side_norm = self._normalize_side_value(ex.side)
opposite_norm = self._opposite_side(side_norm)
if not opposite_norm:
continue
# Find the matching partner by name similarity and opposite side
@@ -992,8 +1164,8 @@ class ExerciseSelector:
Exercise.objects
.filter(
name__icontains=base_name,
side__iexact=opposite,
)
.filter(self._side_values_q(opposite_norm))
.exclude(pk__in=self.used_exercise_ids)
.exclude(pk__in=paired_ids)
.first()
@@ -1040,14 +1212,13 @@ class ExerciseSelector:
# Identify paired indices
paired_indices = set()
for i, ex in enumerate(selected):
if ex.side and ex.side.strip():
if self._normalize_side_value(getattr(ex, 'side', '')):
# Find its partner in the list
side_lower = ex.side.strip().lower()
base_name = ex.name
for side_word in ['Left', 'Right', 'left', 'right']:
base_name = base_name.replace(side_word, '').strip()
for j, other in enumerate(selected):
if i != j and other.side and other.side.strip():
if i != j and self._normalize_side_value(getattr(other, 'side', '')):
other_base = other.name
for side_word in ['Left', 'Right', 'left', 'right']:
other_base = other_base.replace(side_word, '').strip()
@@ -1072,12 +1243,12 @@ class ExerciseSelector:
# Build paired set for result indices
result_paired = set()
for i, ex in enumerate(result):
if ex.side and ex.side.strip():
if self._normalize_side_value(getattr(ex, 'side', '')):
base_name = ex.name
for side_word in ['Left', 'Right', 'left', 'right']:
base_name = base_name.replace(side_word, '').strip()
for j, other in enumerate(result):
if i != j and other.side and other.side.strip():
if i != j and self._normalize_side_value(getattr(other, 'side', '')):
other_base = other.name
for side_word in ['Left', 'Right', 'left', 'right']:
other_base = other_base.replace(side_word, '').strip()
@@ -1094,6 +1265,207 @@ class ExerciseSelector:
return result
def _strip_side_tokens(self, name):
"""Normalize a name by removing left/right tokens."""
base = name or ''
for side_word in [
'Left', 'Right', 'left', 'right',
'left arm', 'right arm', 'left leg', 'right leg',
'left side', 'right side',
]:
base = base.replace(side_word, '').strip()
return base.lower()
@staticmethod
def _normalize_side_value(side):
"""Map DB side values to canonical left/right tokens."""
value = (side or '').strip().lower()
if value in _LEFT_SIDE_VALUES:
return 'left'
if value in _RIGHT_SIDE_VALUES:
return 'right'
return None
@staticmethod
def _opposite_side(side_norm):
"""Return opposite canonical side for left/right."""
if side_norm == 'left':
return 'right'
if side_norm == 'right':
return 'left'
return None
@staticmethod
def _side_values_q(side_norm):
"""Build a queryset filter matching any DB side token for a canonical side."""
q = Q()
values = _LEFT_SIDE_VALUES if side_norm == 'left' else _RIGHT_SIDE_VALUES
for side_value in values:
q |= Q(side__iexact=side_value)
return q
def _drop_unpaired_sided_exercises(self, selected):
"""Drop any left/right exercise that does not have its opposite side."""
side_groups = {}
for ex in selected:
side_val = self._normalize_side_value(getattr(ex, 'side', ''))
if side_val not in ('left', 'right'):
continue
key = self._strip_side_tokens(getattr(ex, 'name', ''))
side_groups.setdefault(key, {'left': [], 'right': []})
side_groups[key][side_val].append(ex.pk)
allowed_ids = set()
for key, sides in side_groups.items():
if sides['left'] and sides['right']:
allowed_ids.update(sides['left'])
allowed_ids.update(sides['right'])
filtered = []
removed_count = 0
for ex in selected:
side_val = self._normalize_side_value(getattr(ex, 'side', ''))
if side_val in ('left', 'right') and ex.pk not in allowed_ids:
removed_count += 1
continue
filtered.append(ex)
if removed_count:
self.warnings.append(
f'Removed {removed_count} unpaired side-specific exercises '
f'to enforce left/right pairing.'
)
return filtered
def _find_missing_side_partner(self, ex, base_qs, existing_ids):
"""
Try hard to find opposite-side partner for a sided exercise.
Search order:
1) base_qs with strict name-base match
2) global Exercise table with strict name-base match
3) base_qs with relaxed icontains name-base match
4) global Exercise table with relaxed icontains name-base match
"""
side_norm = self._normalize_side_value(getattr(ex, 'side', ''))
opposite_norm = self._opposite_side(side_norm)
if not opposite_norm:
return None
base_name = self._strip_side_tokens(getattr(ex, 'name', ''))
if not base_name:
return None
def _pick_from_queryset(qs, strict=True):
candidates = qs.filter(self._side_values_q(opposite_norm))
if strict:
candidates = [
c for c in candidates
if self._strip_side_tokens(getattr(c, 'name', '')) == base_name
]
return candidates[0] if candidates else None
return candidates.filter(name__icontains=base_name).first()
common_exclusions = Q(pk__in=existing_ids)
# Prefer unused exercise ids, but do not hard-fail pairing if only used counterpart exists.
preferred_exclusions = common_exclusions | Q(pk__in=self.used_exercise_ids)
base_preferred = base_qs.exclude(preferred_exclusions)
partner = _pick_from_queryset(base_preferred, strict=True)
if partner:
return partner
global_preferred = Exercise.objects.exclude(preferred_exclusions)
partner = _pick_from_queryset(global_preferred, strict=True)
if partner:
return partner
# Relaxed pass still avoiding duplicates in the current selection.
base_relaxed = base_qs.exclude(common_exclusions)
partner = _pick_from_queryset(base_relaxed, strict=False)
if partner:
return partner
global_relaxed = Exercise.objects.exclude(common_exclusions)
return _pick_from_queryset(global_relaxed, strict=False)
def _ensure_side_pair_integrity(
self,
selected,
base_qs,
count,
similarity_scope=None,
superset_position=None,
):
"""
Enforce strict left/right pairing:
- First attempt to add missing opposite-side partners
- Remove orphan left/right exercises only as a last resort
- Backfill with non-sided exercises when possible
"""
balanced = list(selected)
existing_ids = {ex.pk for ex in balanced}
added_partners = 0
for ex in list(balanced):
side_val = self._normalize_side_value(getattr(ex, 'side', ''))
if side_val not in ('left', 'right'):
continue
key = self._strip_side_tokens(getattr(ex, 'name', ''))
has_left = any(
self._normalize_side_value(getattr(other, 'side', '')) == 'left'
and self._strip_side_tokens(getattr(other, 'name', '')) == key
for other in balanced
)
has_right = any(
self._normalize_side_value(getattr(other, 'side', '')) == 'right'
and self._strip_side_tokens(getattr(other, 'name', '')) == key
for other in balanced
)
if has_left and has_right:
continue
partner = self._find_missing_side_partner(ex, base_qs, existing_ids)
if partner and partner.pk not in existing_ids:
balanced.append(partner)
existing_ids.add(partner.pk)
added_partners += 1
if added_partners:
# Keep sided pairs by preferentially removing non-sided fillers.
while len(balanced) > count:
remove_idx = None
for idx in range(len(balanced) - 1, -1, -1):
if self._normalize_side_value(getattr(balanced[idx], 'side', '')) not in ('left', 'right'):
remove_idx = idx
break
if remove_idx is None:
break
balanced.pop(remove_idx)
self.warnings.append(
f'Added {added_partners} missing opposite-side exercise partners.'
)
balanced = self._drop_unpaired_sided_exercises(balanced)
if len(balanced) < count:
deficit = count - len(balanced)
existing_ids = {ex.pk for ex in balanced}
filler_qs = (
base_qs.exclude(pk__in=existing_ids)
.filter(Q(side__isnull=True) | Q(side=''))
)
extras = self._weighted_pick(
filler_qs,
Exercise.objects.none(),
deficit,
superset_position=superset_position,
similarity_scope=similarity_scope,
)
balanced.extend(extras)
return balanced
def balance_stretch_positions(self, selected, muscle_groups=None, fitness_level=None):
"""
Improve stretch position variety for hypertrophy workouts.