|
16 | 16 | "|---|---|\n", |
17 | 17 | "| 1 — Configuration | All tunable parameters in one place |\n", |
18 | 18 | "| 2 — Environment | Check API keys, install path, imports |\n", |
19 | | - "| 2.5 — Opik health check | Verify Opik stack is reachable and API-responsive before running |\n", |
| 19 | + "| 2.5 — Langfuse health check | Verify Langfuse credentials are configured before running |\n", |
20 | 20 | "| 3 — Load dataset | Pull samples from HuggingFace |\n", |
21 | 21 | "| 4 — Instantiate agents | Build Planner, OCR, Vision, Verifier |\n", |
22 | 22 | "| 5 — Run pipeline | Generate MEPs (Plan → OCR → Vision → Verify) |\n", |
|
119 | 119 | " val = os.environ.get(var, \"\")\n", |
120 | 120 | " needed = needed_for in CONFIG\n", |
121 | 121 | " if val and not val.startswith(\"your_\"):\n", |
122 | | - " print(f\" ok {var} ({val[:12]}...)\")\n", |
| 122 | + " print(f\" ok {var} ({val[:3]}...)\")\n", |
123 | 123 | " elif needed:\n", |
124 | 124 | " print(f\" MISSING {var} <- required for {CONFIG}\")\n", |
125 | 125 | " missing.append(var)\n", |
|
141 | 141 | "from agentic_chartqapro_eval.eval.eval_outputs import evaluate_mep # noqa: E402\n", |
142 | 142 | "from agentic_chartqapro_eval.eval.eval_traces import evaluate_trace # noqa: E402\n", |
143 | 143 | "from agentic_chartqapro_eval.eval.summarize import summarize, write_csv # noqa: E402\n", |
| 144 | + "from agentic_chartqapro_eval.langfuse_integration.client import get_client # noqa: E402\n", |
144 | 145 | "from agentic_chartqapro_eval.mep.writer import iter_meps # noqa: E402\n", |
145 | | - "from agentic_chartqapro_eval.opik_integration.client import get_client # noqa: E402\n", |
146 | 146 | "from agentic_chartqapro_eval.runner.run_generate_meps import ( # noqa: E402\n", |
147 | 147 | " BACKEND_CONFIGS,\n", |
148 | 148 | " process_sample,\n", |
|
159 | 159 | "id": "cell-opik-hdr", |
160 | 160 | "metadata": {}, |
161 | 161 | "source": [ |
162 | | - "## 2.5 — Opik Health Check\n", |
| 162 | + "## 2.5 — Langfuse Health Check\n", |
163 | 163 | "\n", |
164 | | - "Verifies that the self-hosted Opik stack is **fully operational** before the pipeline runs.\n", |
165 | | - "Three checks are run in sequence:\n", |
| 164 | + "Verifies that Langfuse credentials are configured before the pipeline runs.\n", |
166 | 165 | "\n", |
167 | 166 | "| Check | What it tests |\n", |
168 | 167 | "|---|---|\n", |
169 | | - "| HTTP reachable | TCP connection to `OPIK_URL_OVERRIDE` succeeds within 5 s |\n", |
170 | | - "| Client init | `opik.Opik()` initialises without error |\n", |
171 | | - "| API read test | A lightweight `search_traces` call returns a valid response |\n", |
| 168 | + "| Env vars present | `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` are set in `.env` |\n", |
| 169 | + "| Client init | `Langfuse()` initialises without error |\n", |
172 | 170 | "\n", |
173 | | - "If `OPIK_URL_OVERRIDE` is not set the cell prints a skip notice and continues — Opik is optional.\n", |
174 | | - "If any check fails the pipeline still runs; only tracing is affected." |
| 171 | + "If the keys are absent the cell prints a skip notice and continues — Langfuse is optional.\n", |
| 172 | + "The pipeline produces identical MEPs with or without it; tracing is purely additive." |
175 | 173 | ] |
176 | 174 | }, |
177 | 175 | { |
|
181 | 179 | "metadata": {}, |
182 | 180 | "outputs": [], |
183 | 181 | "source": [ |
184 | | - "import urllib.error\n", |
185 | | - "import urllib.request\n", |
186 | | - "\n", |
187 | | - "# Force re-initialisation so re-running this cell after starting Docker works correctly\n", |
188 | | - "from agentic_chartqapro_eval.opik_integration.client import reset_client\n", |
| 182 | + "from agentic_chartqapro_eval.langfuse_integration.client import reset_client\n", |
189 | 183 | "\n", |
190 | 184 | "\n", |
| 185 | + "# Force re-initialisation so re-running this cell picks up any .env changes\n", |
191 | 186 | "reset_client()\n", |
192 | 187 | "\n", |
193 | | - "OPIK_URL = os.environ.get(\"OPIK_URL_OVERRIDE\", \"\")\n", |
| 188 | + "lf_public = os.environ.get(\"LANGFUSE_PUBLIC_KEY\", \"\")\n", |
| 189 | + "lf_secret = os.environ.get(\"LANGFUSE_SECRET_KEY\", \"\")\n", |
194 | 190 | "\n", |
195 | | - "if not OPIK_URL:\n", |
196 | | - " print(\"[skip] OPIK_URL_OVERRIDE is not set.\")\n", |
197 | | - " print(\" Opik tracing is disabled. Pipeline will run fine without it.\")\n", |
| 191 | + "if not lf_public or not lf_secret:\n", |
| 192 | + " print(\"[skip] LANGFUSE_PUBLIC_KEY / LANGFUSE_SECRET_KEY are not set.\")\n", |
| 193 | + " print(\" Langfuse tracing is disabled. Pipeline will run fine without it.\")\n", |
198 | 194 | " print()\n", |
199 | | - " print(\"To enable Opik tracing:\")\n", |
200 | | - " print(\" 1. Start the Docker stack:\")\n", |
201 | | - " print(\" cd /path/to/opik/deployment/docker-compose\")\n", |
202 | | - " print(\" docker compose --profile opik up -d\")\n", |
203 | | - " print(\" 2. Add to .env: OPIK_URL_OVERRIDE=http://localhost:5173/api\")\n", |
204 | | - " print(\" 3. Re-run this cell.\")\n", |
| 195 | + " print(\"To enable Langfuse tracing, add to .env:\")\n", |
| 196 | + " print(\" LANGFUSE_PUBLIC_KEY=pk-lf-...\")\n", |
| 197 | + " print(\" LANGFUSE_SECRET_KEY=sk-lf-...\")\n", |
| 198 | + " print(\" # LANGFUSE_HOST=https://cloud.langfuse.com (default; change for self-hosted)\")\n", |
205 | 199 | "else:\n", |
206 | 200 | " results = {}\n", |
207 | 201 | "\n", |
208 | | - " # -- Check 1: HTTP reachability (any response = server is up) --\n", |
209 | | - " try:\n", |
210 | | - " with urllib.request.urlopen(OPIK_URL, timeout=5) as r:\n", |
211 | | - " results[\"http\"] = (\"ok\", f\"HTTP {r.status}\")\n", |
212 | | - " except urllib.error.HTTPError as e:\n", |
213 | | - " # HTTPError means server responded -- it is up, just returned a non-200\n", |
214 | | - " results[\"http\"] = (\"ok\", f\"HTTP {e.code} (server responded)\")\n", |
215 | | - " except Exception as e:\n", |
216 | | - " results[\"http\"] = (\"fail\", str(e))\n", |
| 202 | + " # -- Check 1: Env vars present --\n", |
| 203 | + " results[\"env\"] = (\"ok\", f\"pk={lf_public[:3]}...\")\n", |
217 | 204 | "\n", |
218 | | - " # -- Check 2: Opik Python client initialises --\n", |
219 | | - " _opik_hc = None\n", |
| 205 | + " # -- Check 2: Client initialises --\n", |
220 | 206 | " try:\n", |
221 | | - " from agentic_chartqapro_eval.opik_integration.client import get_client\n", |
222 | | - "\n", |
223 | | - " _opik_hc = get_client()\n", |
224 | | - " if _opik_hc is not None:\n", |
225 | | - " results[\"client\"] = (\"ok\", \"opik.Opik() ready\")\n", |
| 207 | + " _lf_hc = get_client()\n", |
| 208 | + " if _lf_hc is not None:\n", |
| 209 | + " results[\"client\"] = (\"ok\", \"Langfuse() ready\")\n", |
226 | 210 | " else:\n", |
227 | 211 | " results[\"client\"] = (\"fail\", \"get_client() returned None\")\n", |
228 | 212 | " except Exception as e:\n", |
229 | 213 | " results[\"client\"] = (\"fail\", str(e))\n", |
230 | 214 | "\n", |
231 | | - " # -- Check 3: API actually responds to a lightweight read --\n", |
232 | | - " if results.get(\"client\", (\"\",))[0] == \"ok\" and _opik_hc is not None:\n", |
233 | | - " try:\n", |
234 | | - " traces = _opik_hc.search_traces(max_results=1)\n", |
235 | | - " results[\"api\"] = (\"ok\", f\"search_traces returned {len(traces)} result(s)\")\n", |
236 | | - " except Exception as e:\n", |
237 | | - " err_str = str(e)\n", |
238 | | - " hint = \"\"\n", |
239 | | - " if \"readonly\" in err_str.lower() or \"500\" in err_str:\n", |
240 | | - " hint = \" [ClickHouse replica may be read-only -- run SYSTEM RESTORE REPLICA]\"\n", |
241 | | - " results[\"api\"] = (\"fail\", err_str[:120] + hint)\n", |
242 | | - " else:\n", |
243 | | - " results[\"api\"] = (\"skip\", \"client unavailable\")\n", |
244 | | - "\n", |
245 | 215 | " # -- Report --\n", |
246 | | - " print(f\"Opik URL : {OPIK_URL}\")\n", |
247 | | - " print()\n", |
248 | 216 | " labels = [\n", |
249 | | - " (\"http\", \"HTTP reachable \"),\n", |
250 | | - " (\"client\", \"Client init \"),\n", |
251 | | - " (\"api\", \"API read test \"),\n", |
| 217 | + " (\"env\", \"Env vars present\"),\n", |
| 218 | + " (\"client\", \"Client init \"),\n", |
252 | 219 | " ]\n", |
253 | 220 | " all_ok = True\n", |
254 | 221 | " for key, label in labels:\n", |
255 | 222 | " status, detail = results.get(key, (\"skip\", \"\"))\n", |
256 | | - " if status == \"ok\":\n", |
257 | | - " marker = \"✓ OK \"\n", |
258 | | - " elif status == \"skip\":\n", |
259 | | - " marker = \"⊘ skip\"\n", |
260 | | - " else:\n", |
261 | | - " marker = \"✗ FAIL\"\n", |
| 223 | + " marker = \"✓ OK \" if status == \"ok\" else (\"⊘ skip\" if status == \"skip\" else \"✗ FAIL\")\n", |
| 224 | + " if status not in (\"ok\", \"skip\"):\n", |
262 | 225 | " all_ok = False\n", |
263 | 226 | " print(f\" {marker} {label} {detail}\")\n", |
264 | 227 | "\n", |
265 | 228 | " print()\n", |
266 | 229 | " if all_ok:\n", |
267 | | - " dashboard_url = OPIK_URL.rstrip(\"/\").removesuffix(\"/api\")\n", |
268 | | - " print(\"✓ Opik is fully operational.\")\n", |
269 | | - " print(f\"Dashboard : {dashboard_url}\")\n", |
| 230 | + " lf_host = os.environ.get(\"LANGFUSE_HOST\") or os.environ.get(\"LANGFUSE_BASE_URL\") or \"https://cloud.langfuse.com\"\n", |
| 231 | + " print(\"✓ Langfuse is configured.\")\n", |
| 232 | + " print(f\"Host : {lf_host}\")\n", |
270 | 233 | " print(\"Traces and scores will be recorded automatically during the pipeline run.\")\n", |
271 | 234 | " else:\n", |
272 | | - " print(\"⚠ WARNING: One or more Opik checks failed.\")\n", |
273 | | - " print(\"The pipeline will still run; Opik tracing may not work correctly.\")\n", |
274 | | - " if results.get(\"http\", (\"\",))[0] == \"fail\":\n", |
275 | | - " print()\n", |
276 | | - " print(\" Docker stack appears to be down. To start it:\")\n", |
277 | | - " print(\" cd /path/to/opik/deployment/docker-compose\")\n", |
278 | | - " print(\" docker compose --profile opik up -d\")\n", |
279 | | - " if results.get(\"api\", (\"\",))[0] == \"fail\":\n", |
280 | | - " print()\n", |
281 | | - " print(\" API is reachable but not responding correctly.\")\n", |
282 | | - " print(\" Check ClickHouse replica state:\")\n", |
283 | | - " print(\" docker exec opik-clickhouse-1 clickhouse-client --query \\\\\")\n", |
284 | | - " print(\" \\\"SELECT database,table,is_readonly FROM system.replicas WHERE database='opik'\\\"\")" |
| 235 | + " print(\"⚠ WARNING: Langfuse client failed to initialise.\")\n", |
| 236 | + " print(\"The pipeline will still run; tracing will be skipped.\")" |
285 | 237 | ] |
286 | 238 | }, |
287 | 239 | { |
|
376 | 328 | "else:\n", |
377 | 329 | " print(\"OcrReaderTool : disabled (USE_OCR=False)\")\n", |
378 | 330 | "\n", |
379 | | - "# Opik observability (no-op if OPIK_URL_OVERRIDE not set)\n", |
380 | | - "opik_client = get_client()\n", |
381 | | - "opik_status = \"enabled\" if opik_client else \"not configured\"\n", |
382 | | - "print(f\"Opik : {opik_status}\")" |
| 331 | + "# Langfuse observability (no-op if keys not set)\n", |
| 332 | + "lf_client = get_client()\n", |
| 333 | + "lf_status = \"enabled\" if lf_client else \"not configured\"\n", |
| 334 | + "print(f\"Langfuse : {lf_status}\")" |
383 | 335 | ] |
384 | 336 | }, |
385 | 337 | { |
|
421 | 373 | " config,\n", |
422 | 374 | " RUN_ID,\n", |
423 | 375 | " OUT_DIR,\n", |
424 | | - " opik_client=opik_client,\n", |
| 376 | + " lf_client=lf_client,\n", |
425 | 377 | " verifier_agent=verifier,\n", |
426 | 378 | " ocr_tool=ocr,\n", |
427 | 379 | " )\n", |
|
459 | 411 | "## 6 — Inspect First MEP\n", |
460 | 412 | "\n", |
461 | 413 | "MEPs are self-contained JSON files. Every field you see here is what the agent actually\n", |
462 | | - "produced — no post-processing. The `opik_trace_id` links this MEP back to the live trace\n", |
463 | | - "in the Opik dashboard if Opik is configured." |
| 414 | + "produced — no post-processing. The `lf_trace_id` links this MEP back to the live trace\n", |
| 415 | + "in the Langfuse dashboard if Langfuse is configured." |
464 | 416 | ] |
465 | 417 | }, |
466 | 418 | { |
|
501 | 453 | " print(\"Timestamps (ms):\")\n", |
502 | 454 | " for k in [\"planner_ms\", \"ocr_ms\", \"vision_ms\", \"verifier_ms\"]:\n", |
503 | 455 | " print(f\" {k:<16} {ts.get(k, 0):.0f}\")\n", |
504 | | - " if mep.get(\"opik_trace_id\"):\n", |
505 | | - " print(f\"Opik trace ID: {mep['opik_trace_id']}\")\n", |
| 456 | + " if mep.get(\"lf_trace_id\"):\n", |
| 457 | + " print(f\"Langfuse trace ID: {mep['lf_trace_id']}\")\n", |
506 | 458 | " print(\"=\" * 64)\n", |
507 | 459 | "\n", |
508 | 460 | " img_path = s.get(\"image_ref\", {}).get(\"path\", \"\")\n", |
|
609 | 561 | " config,\n", |
610 | 562 | " RUN_ID_NO_OCR,\n", |
611 | 563 | " OUT_DIR_NO_OCR,\n", |
612 | | - " opik_client=opik_client,\n", |
| 564 | + " lf_client=lf_client,\n", |
613 | 565 | " verifier_agent=verifier,\n", |
614 | 566 | " ocr_tool=None, # <-- OCR disabled\n", |
615 | 567 | " )\n", |
|
0 commit comments