|
9 | 9 | This creates a challenging landscape for iterative refinement but |
10 | 10 | allows evolutionary crossover to combine good "building blocks" |
11 | 11 | from different individuals. |
| 12 | +
|
| 13 | +Set RICH_FEEDBACK=1 to enable rich feedback mode, which tells you |
| 14 | +exactly which modules are correct/incorrect. This demonstrates that |
| 15 | +iterative refinement works well when feedback is attributable. |
12 | 16 | """ |
13 | 17 |
|
| 18 | +import os |
14 | 19 | import sys |
15 | 20 | import time |
16 | 21 | import traceback |
17 | 22 | import importlib.util |
18 | 23 |
|
| 24 | +# Rich feedback mode - when enabled, reveals which modules are correct |
| 25 | +RICH_FEEDBACK = os.environ.get("RICH_FEEDBACK", "0") == "1" |
| 26 | + |
19 | 27 | # The correct solution (hidden from the optimizer) |
20 | 28 | # This represents the "optimal" pipeline configuration discovered through |
21 | 29 | # extensive testing/domain expertise |
@@ -141,14 +149,34 @@ def score_config(config: dict) -> tuple: |
141 | 149 |
|
142 | 150 | def build_artifacts(config: dict, correct_count: int, module_results: dict, eval_time: float) -> dict: |
143 | 151 | """ |
144 | | - Build artifacts that provide useful feedback without revealing |
145 | | - exactly which modules are correct. |
| 152 | + Build artifacts that provide useful feedback. |
| 153 | +
|
| 154 | + In normal mode: Only reveals how many modules are correct, not which ones. |
| 155 | + In rich feedback mode (RICH_FEEDBACK=1): Reveals exactly which modules are correct/incorrect. |
146 | 156 | """ |
147 | 157 | artifacts = {} |
148 | 158 |
|
149 | 159 | # Configuration summary |
150 | 160 | artifacts["configuration"] = str(config) |
151 | 161 |
|
| 162 | + # Rich feedback mode - reveals which modules are correct/incorrect |
| 163 | + if RICH_FEEDBACK: |
| 164 | + correct_modules = [m for m, is_correct in module_results.items() if is_correct] |
| 165 | + incorrect_modules = [m for m, is_correct in module_results.items() if not is_correct] |
| 166 | + |
| 167 | + artifacts["module_feedback"] = { |
| 168 | + "correct": correct_modules, |
| 169 | + "incorrect": incorrect_modules, |
| 170 | + } |
| 171 | + |
| 172 | + if incorrect_modules: |
| 173 | + hints = [] |
| 174 | + for module in incorrect_modules: |
| 175 | + hints.append(f"'{module}' is WRONG - try a different option from {VALID_OPTIONS[module]}") |
| 176 | + artifacts["actionable_hints"] = hints |
| 177 | + else: |
| 178 | + artifacts["actionable_hints"] = ["All modules are correct!"] |
| 179 | + |
152 | 180 | # Score feedback - tells you how many are correct, but not which ones |
153 | 181 | if correct_count == NUM_MODULES: |
154 | 182 | artifacts["status"] = "PERFECT! All modules correctly configured!" |
|
0 commit comments