Skip to content

Commit 7e2ebf9

Browse files
committed
fix edge cases
1 parent de03c2c commit 7e2ebf9

File tree

3 files changed

+84
-84
lines changed

3 files changed

+84
-84
lines changed

optillm/plugins/spl/config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@
2020
# How often to perform maintenance operations (merge, prune)
2121
MAINTENANCE_INTERVAL = 40
2222

23-
# Similarity thresholds
23+
# Strategy selection thresholds
2424
STRATEGY_CREATION_THRESHOLD = 0.7 # Higher threshold to avoid creating similar strategies
2525
STRATEGY_MERGING_THRESHOLD = 0.6 # Lower threshold to merge more similar strategies
26+
MIN_SUCCESS_RATE_FOR_INFERENCE = 0.4 # Minimum success rate for a strategy to be used during inference
2627

2728
# Limits for strategy management
2829
MAX_STRATEGIES_PER_TYPE = 10 # Maximum strategies to store in DB per problem type

optillm/plugins/spl/evaluation.py

Lines changed: 33 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
)
1515
from optillm.plugins.spl.config import (
1616
DEFAULT_MAX_TOKENS,
17-
MAX_STRATEGIES_FOR_INFERENCE
17+
MAX_STRATEGIES_FOR_INFERENCE,
18+
MIN_SUCCESS_RATE_FOR_INFERENCE
1819
)
1920

2021
# Setup logging
@@ -25,23 +26,40 @@ def select_relevant_strategies(query: str, problem_type: str, db: Any, max_strat
2526
Select the most relevant strategies for a given problem to be used during inference.
2627
This controls how many strategies are included in the system prompt augmentation.
2728
29+
Only selects strategies of the matching problem type with success rate >= MIN_SUCCESS_RATE_FOR_INFERENCE.
30+
2831
Args:
2932
query: The problem/query text
3033
problem_type: The type of problem
3134
db: Strategy database
3235
max_strategies: Maximum number of strategies to return
3336
3437
Returns:
35-
List[Strategy]: The selected strategies
38+
List[Strategy]: The selected strategies (may be empty if none meet criteria)
3639
"""
3740
# First, get strategies specifically for this problem type
3841
type_specific = db.get_strategies_for_problem(problem_type)
42+
logger.info(f"Found {len(type_specific)} strategies for problem type '{problem_type}'")
43+
44+
# Filter strategies by minimum success rate
45+
qualified_strategies = []
46+
for strategy in type_specific:
47+
if strategy.success_rate >= MIN_SUCCESS_RATE_FOR_INFERENCE or strategy.total_attempts < 5:
48+
qualified_strategies.append(strategy)
49+
else:
50+
logger.info(f"Strategy {strategy.strategy_id} skipped - success rate {strategy.success_rate:.2f} < minimum {MIN_SUCCESS_RATE_FOR_INFERENCE:.2f}")
51+
52+
if not qualified_strategies:
53+
logger.info(f"No strategies meet the minimum success rate threshold ({MIN_SUCCESS_RATE_FOR_INFERENCE:.2f}) for problem type '{problem_type}'")
54+
return []
3955

40-
# If we have more type-specific strategies than needed, sort and select the best ones
41-
if len(type_specific) > max_strategies:
56+
logger.info(f"Found {len(qualified_strategies)} strategies that meet minimum success rate requirement")
57+
58+
# If we have more qualified strategies than needed, sort and select the best ones
59+
if len(qualified_strategies) > max_strategies:
4260
# Score each strategy based on success rate and recency
4361
scored_strategies = []
44-
for strategy in type_specific:
62+
for strategy in qualified_strategies:
4563
recency_score = 0
4664
if strategy.last_used:
4765
# Calculate days since last use
@@ -55,40 +73,19 @@ def select_relevant_strategies(query: str, problem_type: str, db: Any, max_strat
5573

5674
# Sort by score (descending) and take top strategies
5775
scored_strategies.sort(key=lambda x: x[1], reverse=True)
58-
return [s[0] for s in scored_strategies[:max_strategies]]
59-
60-
# If we don't have enough type-specific strategies, get similar strategies from other types
61-
if len(type_specific) < max_strategies:
62-
# Calculate how many more strategies we need
63-
needed = max_strategies - len(type_specific)
64-
65-
# Get similar strategies from other problem types
66-
type_specific_ids = {s.strategy_id for s in type_specific}
67-
similar_strategies = []
68-
69-
for s, score in db.get_similar_strategies(query, n=max_strategies*2): # Get more than needed to filter
70-
# Only include strategies from other problem types and not already selected
71-
if s.strategy_id not in type_specific_ids and s.problem_type != problem_type:
72-
similar_strategies.append(s)
73-
if len(similar_strategies) >= needed:
74-
break
75-
76-
# Combine type-specific strategies with similar strategies
77-
combined = type_specific + similar_strategies[:needed] # Only add as many as needed
76+
selected = [s[0] for s in scored_strategies[:max_strategies]]
7877

7978
# Log which strategies we're using
80-
for i, strategy in enumerate(combined, 1):
81-
problem_type_str = "(same type)" if strategy.problem_type == problem_type else f"(from {strategy.problem_type})"
82-
logger.info(f"Selected strategy {i}/{max_strategies} for inference: {strategy.strategy_id} {problem_type_str} (success rate: {strategy.success_rate:.2f})")
83-
84-
return combined
85-
86-
# If we have exactly the right number, just return them
87-
# Log which strategies we're using
88-
for i, strategy in enumerate(type_specific, 1):
89-
logger.info(f"Selected strategy {i}/{max_strategies} for inference: {strategy.strategy_id} (same type) (success rate: {strategy.success_rate:.2f})")
79+
for i, strategy in enumerate(selected, 1):
80+
logger.info(f"Selected strategy {i}/{max_strategies} for inference: {strategy.strategy_id} (success rate: {strategy.success_rate:.2f})")
9081

91-
return type_specific[:max_strategies]
82+
return selected
83+
84+
# If we have fewer or equal to the maximum, use all qualified strategies
85+
for i, strategy in enumerate(qualified_strategies, 1):
86+
logger.info(f"Selected strategy {i}/{len(qualified_strategies)} for inference: {strategy.strategy_id} (success rate: {strategy.success_rate:.2f})")
87+
88+
return qualified_strategies
9289

9390
def evaluate_strategy_effectiveness(response: str, thinking: Optional[str], selected_strategies: List[Strategy], client, model: str) -> Dict[str, bool]:
9491
"""

optillm/plugins/spl/main.py

Lines changed: 49 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
MAINTENANCE_INTERVAL,
2727
STRATEGY_MERGING_THRESHOLD,
2828
MAX_STRATEGIES_PER_TYPE,
29-
MAX_STRATEGIES_FOR_INFERENCE
29+
MAX_STRATEGIES_FOR_INFERENCE,
30+
MIN_SUCCESS_RATE_FOR_INFERENCE
3031
)
3132

3233
# Setup logging
@@ -124,15 +125,22 @@ def run_spl(system_prompt: str, initial_query: str, client, model: str, request_
124125
for i, strategy in enumerate(selected_strategies, 1):
125126
logger.info(f"Selected strategy {i}/{MAX_STRATEGIES_FOR_INFERENCE} for inference: {strategy.strategy_id} (success rate: {strategy.success_rate:.2f})")
126127

127-
# 7. Handle strategies for the problem type
128+
# 7. Handle situation when no strategies are selected
128129
if not selected_strategies:
129-
logger.info(f"No existing strategies found for problem type: {problem_type}")
130-
logger.info(f"Running without strategy augmentation - using base system prompt only")
130+
if not existing_strategies:
131+
# No strategies exist for this problem type
132+
logger.info(f"No strategies exist for problem type '{problem_type}'. Enable learning mode with 'spl_learning=True' to create strategies.")
133+
else:
134+
# Strategies exist but don't meet the minimum success rate
135+
logger.info(f"Strategies exist for problem type '{problem_type}' but none meet the minimum success rate threshold of {MIN_SUCCESS_RATE_FOR_INFERENCE:.2f}.")
136+
logger.info(f"Enable learning mode with 'spl_learning=True' to improve strategies.")
131137

132-
# Just use the original system prompt with no augmentation
138+
# Use the original system prompt without augmentation
139+
logger.info("Running without strategy augmentation - using base system prompt only.")
133140
augmented_prompt = system_prompt
134141
else:
135-
# 8. Augment the system prompt with the selected strategies
142+
# Normal case - strategies were selected
143+
# Augment the system prompt with the selected strategies
136144
augmented_prompt = augment_system_prompt(system_prompt, selected_strategies)
137145
logger.info(f"Augmented system prompt with {len(selected_strategies)} strategies (inference limit: {MAX_STRATEGIES_FOR_INFERENCE})")
138146

@@ -148,19 +156,12 @@ def run_spl(system_prompt: str, initial_query: str, client, model: str, request_
148156
request_params['max_tokens'] = DEFAULT_MAX_TOKENS
149157
elif request_params['max_tokens'] < DEFAULT_MAX_TOKENS:
150158
request_params['max_tokens'] = DEFAULT_MAX_TOKENS
151-
152-
# Log a suggestion if no strategies found in inference mode
153-
if not learning_mode and not existing_strategies:
154-
logger.info(f"No strategies exist for problem type '{problem_type}'. To learn strategies for this specific problem type, enable learning mode by setting 'spl_learning=True' in the request config.")
155-
156-
# Use unmodified query - no need to add fallback message to the actual query
157-
initial_query_with_suggestion = initial_query
158159

159160
response = client.chat.completions.create(
160161
model=model,
161162
messages=[
162163
{"role": "system", "content": augmented_prompt},
163-
{"role": "user", "content": initial_query_with_suggestion}
164+
{"role": "user", "content": initial_query}
164165
],
165166
**request_params
166167
)
@@ -177,39 +178,40 @@ def run_spl(system_prompt: str, initial_query: str, client, model: str, request_
177178
logger.debug(f"Main response - final answer after removing thinking: '{final_response}'")
178179

179180
# Only perform learning operations if in learning mode and we have strategies
180-
if learning_mode and selected_strategies:
181-
# 10. Evaluate the effectiveness of the strategies
182-
strategy_effectiveness = evaluate_strategy_effectiveness(
183-
final_response,
184-
thinking,
185-
selected_strategies,
186-
client,
187-
model
188-
)
189-
190-
# 11. Update strategy metrics based on effectiveness
191-
for strategy_id, effective in strategy_effectiveness.items():
192-
# Skip temporary fallback strategies
193-
if strategy_id != "fallback_temporary":
194-
db.update_strategy_performance(strategy_id, effective)
195-
logger.info(f"Strategy {strategy_id} effectiveness: {effective}")
196-
197-
# If the strategy was effective and thinking was used, add the thinking as a reasoning example
198-
if effective and thinking and strategy_id != "fallback_temporary":
199-
db.add_reasoning_example(strategy_id, thinking)
200-
logger.info(f"Added reasoning example to strategy {strategy_id}")
201-
202-
# 12. Periodically refine strategies (after every 10 uses)
203-
for strategy in selected_strategies:
204-
# Skip temporary fallback strategies
205-
if (strategy.strategy_id != "fallback_temporary" and
206-
strategy.total_attempts % 10 == 0 and
207-
strategy.total_attempts > 0):
208-
logger.info(f"Refining strategy {strategy.strategy_id} after {strategy.total_attempts} attempts")
209-
refined_strategy = refine_strategy(strategy, initial_query, final_response, thinking, client, model)
210-
db.refine_strategy(strategy.strategy_id, refined_strategy.strategy_text)
211-
elif learning_mode:
212-
logger.info("No strategies to evaluate")
181+
if learning_mode:
182+
if selected_strategies:
183+
# 10. Evaluate the effectiveness of the strategies
184+
strategy_effectiveness = evaluate_strategy_effectiveness(
185+
final_response,
186+
thinking,
187+
selected_strategies,
188+
client,
189+
model
190+
)
191+
192+
# 11. Update strategy metrics based on effectiveness
193+
for strategy_id, effective in strategy_effectiveness.items():
194+
# Skip temporary fallback strategies
195+
if strategy_id != "fallback_temporary":
196+
db.update_strategy_performance(strategy_id, effective)
197+
logger.info(f"Strategy {strategy_id} effectiveness: {effective}")
198+
199+
# If the strategy was effective and thinking was used, add the thinking as a reasoning example
200+
if effective and thinking and strategy_id != "fallback_temporary":
201+
db.add_reasoning_example(strategy_id, thinking)
202+
logger.info(f"Added reasoning example to strategy {strategy_id}")
203+
204+
# 12. Periodically refine strategies (after every 10 uses)
205+
for strategy in selected_strategies:
206+
# Skip temporary fallback strategies
207+
if (strategy.strategy_id != "fallback_temporary" and
208+
strategy.total_attempts % 10 == 0 and
209+
strategy.total_attempts > 0):
210+
logger.info(f"Refining strategy {strategy.strategy_id} after {strategy.total_attempts} attempts")
211+
refined_strategy = refine_strategy(strategy, initial_query, final_response, thinking, client, model)
212+
db.refine_strategy(strategy.strategy_id, refined_strategy.strategy_text)
213+
else:
214+
logger.info("No strategies to evaluate or refine - consider adding strategies for this problem type")
213215
else:
214216
logger.info("Strategy evaluation and refinement skipped (not in learning mode)")
215217

0 commit comments

Comments
 (0)