diff --git a/AGENTS.md b/AGENTS.md index cf60def..4078e17 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -8,7 +8,15 @@ Current implementations include BitVector, RmM Tree, and LOUDS Tree. Planned add ## Skills -./.kilo/skills/ contains several project-specific skills, use them when appropriate +Shared C++ agent skills live in `agentic/cpp/skills`. Pixie-specific examples +for those skills live in `agentic/local/cpp/skills`. +Shared C++ agent commands live in `agentic/cpp/commands`. Pixie-specific +commands or command notes live in `agentic/local/cpp/commands`. + +When a task matches a skill, read: + +1. `agentic/cpp/skills//SKILL.md` +2. `agentic/local/cpp/skills//EXAMPLES.md`, if present ## Architecture diff --git a/agentic/cpp/README.md b/agentic/cpp/README.md new file mode 100644 index 0000000..68a2262 --- /dev/null +++ b/agentic/cpp/README.md @@ -0,0 +1,15 @@ +# Shared C++ Agent Skills + +This subtree contains reusable C++ agent skills and related commands. + +Keep this tree generic: + +- Do not add project-specific benchmark names, CMake options, or paths. +- Keep reusable scripts beside the skills that use them. +- Put project-specific examples in the consuming repository under + `agentic/local/cpp/skills//EXAMPLES.md`. + +When using a skill in a project, read: + +1. `agentic/cpp/skills//SKILL.md` +2. `agentic/local/cpp/skills//EXAMPLES.md`, if present diff --git a/agentic/cpp/commands/README.md b/agentic/cpp/commands/README.md new file mode 100644 index 0000000..2a24b20 --- /dev/null +++ b/agentic/cpp/commands/README.md @@ -0,0 +1,6 @@ +# Shared C++ Agent Commands + +Reusable command definitions for C++ projects belong here. + +Keep project-specific commands in the consuming repository under +`agentic/local/cpp/commands`. diff --git a/agentic/cpp/commands/benchmarks-affected.md b/agentic/cpp/commands/benchmarks-affected.md new file mode 100644 index 0000000..158cb9e --- /dev/null +++ b/agentic/cpp/commands/benchmarks-affected.md @@ -0,0 +1,34 @@ +--- +description: Scan current branch and report impacted benchmark targets/functions. +--- + +# Benchmarks Affected + +Identify which benchmark binaries and benchmark functions are affected by changes on the current branch. + +Use the `benchmarks-affected` skill as the single source of truth for workflow details and guardrails. +Do not duplicate or override the skill instructions in this command. + +## Inputs + +- Optional `--baseline ` (default: `main`) +- Optional `--compile-commands ` +- Optional `--no-include-working-tree` +- Optional `--format ` (default: `text`) + +## Workflow + +1. Execute the `benchmarks-affected` skill workflow. +2. Pass through command inputs to the analyzer invocation defined by the skill. +3. Report results with these sections: + - Changed files + - Affected benchmark targets + - Affected benchmark functions + - Suggested `--benchmark_filter` regex + - Any warnings/failures + +## Output rules + +1. If `affected_benchmarks` is non-empty, prioritize those names. +2. If `affected_benchmarks` is empty but benchmark targets are affected, mark result as partial and include target-level impact. +3. Do not run full benchmark suites in this command; this command is for impact discovery only. diff --git a/agentic/cpp/commands/perf-review.md b/agentic/cpp/commands/perf-review.md new file mode 100644 index 0000000..8ef1865 --- /dev/null +++ b/agentic/cpp/commands/perf-review.md @@ -0,0 +1,149 @@ +--- +description: Benchmark-driven PR performance review versus target branch +--- + +# Perf Review Workflow + +You are performing a performance review for the current PR branch. + +Non-negotiable requirements: +1. Benchmark timing plus profiling data is the highest-priority judgment tool. +2. Compare source branch versus target branch and report relevant benchmark metric changes. +3. Provide analysis and a final verdict: does the PR improve performance or not. + +## Inputs + +- Optional argument `--target `: target branch override. +- Optional argument `--filter `: benchmark filter regex. +- Optional argument `--no-counters`: disable hardware-counter collection. + +If arguments are omitted: +- Default target branch to PR base branch from `gh pr view --json baseRefName` when available. +- Fall back target branch to `main`. + +Filter handling: +- If `--filter` is provided, pass it through. +- Else use the filter produced by `benchmarks-affected` through `benchmarks-compare-revisions`. +- If no filter can be derived, run conservative full-binary compare for impacted binaries. + +## Step 1 - Resolve branches and hashes + +1. Resolve contender from current checkout (`HEAD`) and compute short hash. +2. Resolve baseline branch using precedence: `--target` -> PR base from `gh pr view --json baseRefName` -> `main`. +3. Resolve baseline short hash. +4. Print branch/hash mapping before benchmark execution. + +## Step 2 - Run timing and hardware-counter comparison via skill (single source of truth) + +Use `benchmarks-compare-revisions` as the single source of truth for revision builds, benchmark scope, compare.py flow, retry policy, and guardrails. + +Pass-through inputs: +- Baseline ref/hash from Step 1. +- Contender ref/hash from Step 1. +- Optional `--filter` override. +- Counter mode: default on (`COLLECT_COUNTERS=1`) on Linux, disabled when `--no-counters` is provided. + +Consume outputs from `benchmarks-compare-revisions`: +- Baseline and contender benchmark JSON artifacts. +- compare.py output per binary. +- Effective filter used. +- Scope metadata from `benchmarks-affected` (`affected_benchmark_targets`, `affected_benchmarks`) when available. +- `counters_available` status and, when unavailable, explicit reason. +- Baseline and contender counter JSON artifacts (when available). +- Derived counter metrics per benchmark (IPC, cache miss rate, branch mispredict rate). +- Counter anomaly list and ready-to-embed counter summary table. + +Execution guardrails: +- Run benchmarks sequentially. +- No background jobs (`nohup`, `&`). +- Use Release timing builds only. +- If timing comparison fails, return blocked verdict with exact failure points. + +## Step 3 - Consume delegated hardware-counter outputs + +Hardware-counter collection is delegated to `benchmarks-compare-revisions`. + +Pass-through inputs: +- `COLLECT_COUNTERS=1` by default on Linux (unless `--no-counters` is provided). +- Same baseline/contender refs and effective filter used in Step 2. + +Consume outputs: +- Counter preflight result. +- Counter JSON artifacts for both revisions. +- Derived metrics (IPC, cache miss rate, branch mispredict rate). +- Anomaly list and counter summary table for report embedding. + +If counters are unavailable (`counters_available=false`), continue with timing-only review and explicitly mark profiling as unavailable in the report. + +## Step 4 - Analyze timing and counter data + +Timing classification per benchmark entry: +- Improvement: time delta < -5% +- Regression: time delta > +5% +- Neutral: between -5% and +5% + +Aggregate per binary: +- Number of improvements/regressions/neutral +- Net average percentage change +- Largest regression and largest improvement + +Counter correlation: +- Use skill-provided hardware counter summary and anomaly list to explain major timing changes. +- Do not recompute derived counter metrics in this command. + +Judgment priority: +- Base verdict primarily on benchmark timing comparison. +- Use counter data as explanatory evidence and confidence signal. + +Noise-control expectations: +- Include at least one control benchmark family expected to be unaffected by the code change. +- Treat isolated swings without pattern as noise unless reproduced across related sizes/fill ratios. + +## Step 5 - Produce final markdown report + +Return a structured markdown report with this shape: + +```markdown +## Performance Review: vs + +### Configuration +- Baseline: () +- Contender: () +- Platform: +- Benchmarks run: +- Filter: +- Hardware counters: available / unavailable + +### Timing Summary +| Binary | Improvements | Regressions | Neutral | Net Change | +|---|---:|---:|---:|---:| +| ... | N | N | N | +/-X% | + +### Detailed Timing Results + + +### Hardware Counter Profile (if available) +| Benchmark | IPC (base->new) | Cache Miss Rate (base->new) | Branch Mispredict (base->new) | +|---|---:|---:|---:| +| ... | X.XX -> Y.YY | A.A% -> B.B% | C.C% -> D.D% | + +### Key Findings +- +- + +### Verdict +**[IMPROVES PERFORMANCE | REGRESSES PERFORMANCE | NO SIGNIFICANT CHANGE]** + +<1-2 sentence justification grounded in benchmark metrics, with profiling context if available> +``` + +Verdict rules: +- `IMPROVES PERFORMANCE`: improvements outnumber regressions, no severe regression (>10%), and net average change is favorable. +- `REGRESSES PERFORMANCE`: any severe regression (>10%) or regressions dominate with net unfavorable average. +- `NO SIGNIFICANT CHANGE`: mostly neutral changes or mixed results that approximately cancel out. + +## Failure Handling + +- If required builds fail or timing comparison cannot run, output a blocked review with exact failure points and no misleading verdict. +- If only profiling fails (`counters_available=false` from delegated skill output), continue with timing-based verdict and explicitly list profiling limitation. +- If JSON output is invalid/truncated, discard it and rerun that benchmark command once with tighter filter and explicit output redirection. diff --git a/agentic/cpp/commands/ping.md b/agentic/cpp/commands/ping.md new file mode 100644 index 0000000..b5edaf7 --- /dev/null +++ b/agentic/cpp/commands/ping.md @@ -0,0 +1,7 @@ +--- +description: Test command that replies with pong +--- + +Respond with exactly `pong`. +Do not add any other words. +Do not add quotes or punctuation. diff --git a/agentic/cpp/skills/benchmarks-affected/SKILL.md b/agentic/cpp/skills/benchmarks-affected/SKILL.md new file mode 100644 index 0000000..2072dcb --- /dev/null +++ b/agentic/cpp/skills/benchmarks-affected/SKILL.md @@ -0,0 +1,81 @@ +--- +name: benchmarks-affected +description: Analyze current branch versus a baseline and extract affected benchmark targets and benchmark functions using compile_commands and clang AST. +--- + +# Benchmarks Affected Skill + +Use this skill to identify exactly which benchmark binaries and benchmark functions are affected by code changes on the current branch. + +It implements a two-stage workflow: + +1. `compile_commands.json` analysis to determine affected compile targets. +2. Clang AST analysis to determine affected benchmark functions. + +## Goal + +Given `HEAD` and a baseline branch (default `main`), produce: + +- Changed files. +- Affected targets (with emphasis on benchmark targets). +- Exact benchmark functions impacted by the changes. +- A ready-to-use Google Benchmark filter regex. + +## Prerequisites + +1. Build tree with benchmarks enabled and compile database exported. Use the +repository's normal benchmark-enabling CMake options: + +```bash +BUILD_SUFFIX=local +cmake -B build/benchmarks-all_${BUILD_SUFFIX} \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON +cmake --build build/benchmarks-all_${BUILD_SUFFIX} --config Release -j +``` + +2. `clang++` must be available on `PATH` (used for AST dump). + +For repository-specific invocations, check +`agentic/local/cpp/skills/benchmarks-affected/EXAMPLES.md` when present. + +## Run + +```bash +python3 agentic/cpp/skills/benchmarks-affected/analyze_benchmarks_affected.py \ + --baseline main \ + --compile-commands build/benchmarks-all_local/compile_commands.json \ + --format json +``` + +If `--compile-commands` is omitted, the script auto-selects the most recently modified `build/**/compile_commands.json`. +Working tree changes are included by default. Use `--no-include-working-tree` to restrict analysis to `...HEAD` only. + +## Output + +The analyzer reports: + +- `affected_targets`: impacted CMake targets inferred from compile dependency analysis. +- `affected_benchmark_targets`: subset of benchmark binaries impacted. +- `affected_benchmarks`: precise benchmark function names from AST-level call analysis. +- `suggested_filter_regex`: regex to pass as `--benchmark_filter`. + +## How to Use Findings + +1. Build only impacted benchmark binaries where feasible. +2. Run benchmark binaries with the suggested filter: + +```bash +FILTER='^(BM_Foo|BM_Bar)(/|$)' +BENCH_CPU=${BENCH_CPU:-0} +taskset -c "${BENCH_CPU}" build/benchmarks-all_local/benchmarks --benchmark_filter="${FILTER}" +``` + +3. If impact mapping is broad/uncertain, run full binary for selected benchmark target(s). + +## Guardrails + +1. Keep baseline comparison at merge-base style diff: `...HEAD`. +2. Use Release binaries for timing runs. +3. If AST parse fails for a TU, still trust compile target impact and mark benchmark-function scope as partial. +4. If benchmark infra (`CMakeLists.txt`, benchmark source layout) changed, fall back to conservative benchmark selection. diff --git a/agentic/cpp/skills/benchmarks-affected/analyze_benchmarks_affected.py b/agentic/cpp/skills/benchmarks-affected/analyze_benchmarks_affected.py new file mode 100755 index 0000000..4dcae85 --- /dev/null +++ b/agentic/cpp/skills/benchmarks-affected/analyze_benchmarks_affected.py @@ -0,0 +1,1132 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import argparse +import concurrent.futures +import json +import os +import re +import shlex +import shutil +import subprocess +import sys +from collections import defaultdict +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + + +def is_project_source(source: Path, repo_root: Path) -> bool: + """Exclude third-party deps and generated build files.""" + try: + rel = source.relative_to(repo_root) + except ValueError: + return False + rel_text = rel.as_posix() + if rel_text.startswith("build/") or "_deps/" in rel_text: + return False + return True + + +KNOWN_BENCHMARK_TARGETS = {"benchmarks"} + +HEADER_EXTENSIONS = { + ".h", + ".hh", + ".hpp", + ".hxx", + ".inc", + ".ipp", + ".tcc", +} + +BUILD_INFRA_FILES = { + "CMakeLists.txt", + "CMakePresets.json", +} + +DIFF_HUNK_RE = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@") + +CPP_FUNCTION_START_RE = re.compile( + r"^\s*" + r"(?:template\s*<[^>]*>\s*)?" + r"(?:(?:inline|constexpr|consteval|constinit|static|friend|virtual|explicit)\s+)*" + r"[A-Za-z_~][\w:<>,\s\*&\[\]]*\s+" + r"([~A-Za-z_][A-Za-z0-9_]*)\s*" + r"\([^;{}]*\)\s*" + r"(?:const\s*)?" + r"(?:noexcept\s*)?" + r"(?:->\s*[^\{]+)?\{" +) + + +@dataclass +class CompileCommandEntry: + directory: Path + source: Path + arguments: list[str] + output: Path | None + target: str | None + dependencies: set[Path] = field(default_factory=set) + dep_error: str | None = None + + +@dataclass +class AstImpactResult: + benchmark_names: set[str] = field(default_factory=set) + affected_names: set[str] = field(default_factory=set) + ast_error: str | None = None + + +def run_command( + args: list[str], + cwd: Path, + check: bool = True, + timeout: float | None = 60.0, +) -> subprocess.CompletedProcess[str]: + return subprocess.run( + args, + cwd=str(cwd), + text=True, + capture_output=True, + check=check, + timeout=timeout, + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=( + "Analyze benchmark impact between baseline and HEAD via " + "compile_commands dependency mapping and clang AST analysis." + ) + ) + parser.add_argument( + "--baseline", + default="main", + help="Baseline ref used as ...HEAD (default: main).", + ) + parser.add_argument( + "--head", + default="HEAD", + help="Contender ref (default: HEAD).", + ) + parser.add_argument( + "--compile-commands", + default=None, + help=( + "Path to compile_commands.json. If omitted, auto-discovers most " + "recent build/**/compile_commands.json." + ), + ) + parser.add_argument( + "--clangxx", + default=None, + help="clang++ executable for AST dump (auto-detected if omitted).", + ) + parser.add_argument( + "--format", + choices=["text", "json"], + default="text", + help="Output format (default: text).", + ) + parser.add_argument( + "--include-working-tree", + dest="include_working_tree", + action="store_true", + default=True, + help=( + "Include local unstaged/staged changes in changed-files set, " + "in addition to ... (default: enabled)." + ), + ) + parser.add_argument( + "--no-include-working-tree", + dest="include_working_tree", + action="store_false", + help="Disable working-tree inclusion and only analyze ....", + ) + return parser.parse_args() + + +def git_repo_root() -> Path: + proc = run_command(["git", "rev-parse", "--show-toplevel"], cwd=Path.cwd()) + return Path(proc.stdout.strip()).resolve() + + +def resolve_compile_commands(repo_root: Path, explicit_path: str | None) -> Path: + if explicit_path: + compile_path = Path(explicit_path) + if not compile_path.is_absolute(): + compile_path = (repo_root / compile_path).resolve() + if not compile_path.exists(): + raise FileNotFoundError(f"compile_commands.json not found: {compile_path}") + return compile_path + + candidates = sorted( + repo_root.glob("build/**/compile_commands.json"), + key=lambda path: path.stat().st_mtime, + reverse=True, + ) + if not candidates: + raise FileNotFoundError( + "No compile_commands.json found under build/**. " + "Configure with -DCMAKE_EXPORT_COMPILE_COMMANDS=ON first." + ) + return candidates[0].resolve() + + +def load_compile_commands( + compile_commands_path: Path, + repo_root: Path, +) -> list[CompileCommandEntry]: + entries: list[CompileCommandEntry] = [] + data = json.loads(compile_commands_path.read_text(encoding="utf-8")) + for raw_entry in data: + directory = Path(raw_entry["directory"]).resolve() + + raw_source = Path(raw_entry["file"]) + if raw_source.is_absolute(): + source = raw_source.resolve() + else: + source = (directory / raw_source).resolve() + + if not is_project_source(source, repo_root): + continue + + if "arguments" in raw_entry: + arguments = [str(arg) for arg in raw_entry["arguments"]] + else: + arguments = shlex.split(raw_entry["command"]) + + output = infer_output_path(arguments, directory) + target = infer_cmake_target_from_output(output) + + entries.append( + CompileCommandEntry( + directory=directory, + source=source, + arguments=arguments, + output=output, + target=target, + ) + ) + return entries + + +def infer_output_path(arguments: list[str], directory: Path) -> Path | None: + output_token: str | None = None + for idx, arg in enumerate(arguments): + if arg == "-o" and idx + 1 < len(arguments): + output_token = arguments[idx + 1] + elif arg.startswith("-o") and len(arg) > 2: + output_token = arg[2:] + elif arg.startswith("/Fo") and len(arg) > 3: + output_token = arg[3:] + + if output_token is None: + return None + + out_path = Path(output_token) + if out_path.is_absolute(): + return out_path.resolve() + return (directory / out_path).resolve() + + +def infer_cmake_target_from_output(output: Path | None) -> str | None: + if output is None: + return None + parts = output.parts + for index, part in enumerate(parts): + if part == "CMakeFiles" and index + 1 < len(parts): + target_part = parts[index + 1] + if target_part.endswith(".dir"): + return target_part[: -len(".dir")] + return target_part + return None + + +def git_changed_files(repo_root: Path, baseline: str, head: str) -> set[Path]: + diff_range = f"{baseline}...{head}" + proc = run_command(["git", "diff", "--name-only", diff_range], cwd=repo_root) + changed_files: set[Path] = set() + for line in proc.stdout.splitlines(): + line = line.strip() + if not line: + continue + changed_files.add((repo_root / line).resolve()) + return changed_files + + +def git_working_tree_changed_files(repo_root: Path) -> set[Path]: + changed_files: set[Path] = set() + commands = [ + ["git", "diff", "--name-only"], + ["git", "diff", "--name-only", "--cached"], + ] + for cmd in commands: + proc = run_command(cmd, cwd=repo_root) + for line in proc.stdout.splitlines(): + line = line.strip() + if not line: + continue + changed_files.add((repo_root / line).resolve()) + return changed_files + + +def parse_changed_lines_from_diff_text( + diff_text: str, + repo_root: Path, +) -> dict[Path, set[int]]: + changed_lines: dict[Path, set[int]] = defaultdict(set) + + current_file: Path | None = None + in_hunk = False + new_line = 0 + + for raw_line in diff_text.splitlines(): + if raw_line.startswith("+++ "): + file_token = raw_line[4:].strip() + if file_token == "/dev/null": + current_file = None + in_hunk = False + continue + if file_token.startswith("b/"): + file_token = file_token[2:] + current_file = (repo_root / file_token).resolve() + in_hunk = False + continue + + hunk_match = DIFF_HUNK_RE.match(raw_line) + if hunk_match: + in_hunk = current_file is not None + new_line = int(hunk_match.group(1)) + continue + + if not in_hunk or current_file is None: + continue + + if raw_line.startswith("+") and not raw_line.startswith("+++"): + changed_lines[current_file].add(new_line) + new_line += 1 + continue + + if raw_line.startswith("-") and not raw_line.startswith("---"): + continue + + if raw_line.startswith(" "): + new_line += 1 + continue + + return changed_lines + + +def git_changed_line_map( + repo_root: Path, + baseline: str, + head: str, + include_working_tree: bool, +) -> dict[Path, set[int]]: + changed_lines: dict[Path, set[int]] = defaultdict(set) + + proc = run_command( + ["git", "diff", "--unified=0", f"{baseline}...{head}"], + cwd=repo_root, + ) + baseline_map = parse_changed_lines_from_diff_text(proc.stdout, repo_root) + for path, lines in baseline_map.items(): + changed_lines[path].update(lines) + + if include_working_tree: + for cmd in ( + ["git", "diff", "--unified=0"], + ["git", "diff", "--cached", "--unified=0"], + ): + wt_proc = run_command(cmd, cwd=repo_root) + wt_map = parse_changed_lines_from_diff_text(wt_proc.stdout, repo_root) + for path, lines in wt_map.items(): + changed_lines[path].update(lines) + + return changed_lines + + +def extract_changed_symbol_names_from_file( + file_path: Path, + changed_lines: set[int], +) -> set[str]: + if not changed_lines or not file_path.exists(): + return set() + + lines = file_path.read_text(encoding="utf-8", errors="replace").splitlines() + symbols: set[str] = set() + + line_index = 1 + max_line = len(lines) + while line_index <= max_line: + line = lines[line_index - 1] + match = CPP_FUNCTION_START_RE.match(line) + if not match: + line_index += 1 + continue + + symbol_name = match.group(1) + start_line = line_index + brace_depth = line.count("{") - line.count("}") + end_line = start_line + + while brace_depth > 0 and end_line < max_line: + end_line += 1 + body_line = lines[end_line - 1] + brace_depth += body_line.count("{") - body_line.count("}") + + if any(start_line <= line_no <= end_line for line_no in changed_lines): + symbols.add(symbol_name) + + line_index = end_line + 1 + + return symbols + + +def collect_changed_symbol_names( + changed_line_map: dict[Path, set[int]], +) -> set[str]: + symbol_names: set[str] = set() + for file_path, changed_lines in changed_line_map.items(): + symbol_names.update( + extract_changed_symbol_names_from_file(file_path, changed_lines) + ) + return symbol_names + + +def clean_command_for_dependency_scan(arguments: list[str]) -> list[str]: + cleaned: list[str] = [] + skip_next = False + flags_with_value = { + "-o", + "-MF", + "-MT", + "-MQ", + "-MJ", + "-Xclang", + } + standalone_drop = { + "-c", + "-MD", + "-MMD", + "-MP", + "-MM", + "-M", + "-E", + "-S", + } + + index = 0 + while index < len(arguments): + arg = arguments[index] + if skip_next: + skip_next = False + index += 1 + continue + + if arg in flags_with_value: + skip_next = True + index += 1 + continue + if arg in standalone_drop: + index += 1 + continue + if arg.startswith("-o") and len(arg) > 2: + index += 1 + continue + if arg.startswith("-MF") and len(arg) > 3: + index += 1 + continue + if arg.startswith("-MT") and len(arg) > 3: + index += 1 + continue + if arg.startswith("-MQ") and len(arg) > 3: + index += 1 + continue + if arg.startswith("-MJ") and len(arg) > 3: + index += 1 + continue + + cleaned.append(arg) + index += 1 + + return cleaned + + +def parse_makefile_dependencies(stdout_text: str) -> list[str]: + flattened = stdout_text.replace("\\\n", " ").replace("\n", " ") + if ":" not in flattened: + return [] + dep_payload = flattened.split(":", 1)[1].strip() + if not dep_payload: + return [] + return shlex.split(dep_payload) + + +def compute_tu_dependencies(entry: CompileCommandEntry) -> None: + dep_cmd = clean_command_for_dependency_scan(entry.arguments) + if not dep_cmd: + entry.dep_error = "Empty compile command after sanitization" + entry.dependencies = {entry.source} + return + + dep_cmd.extend(["-MM", "-MF", "-", "-MT", "__benchmark_affected_tu__"]) + source_arg = str(entry.source) + if source_arg not in dep_cmd: + dep_cmd.append(source_arg) + + try: + proc = run_command(dep_cmd, cwd=entry.directory, check=False) + except FileNotFoundError as exc: + entry.dep_error = str(exc) + entry.dependencies = {entry.source} + return + + dependencies: set[Path] = {entry.source} + if proc.returncode != 0: + stderr = proc.stderr.strip() + entry.dep_error = ( + stderr if stderr else f"Dependency scan failed ({proc.returncode})" + ) + entry.dependencies = dependencies + return + + for dep in parse_makefile_dependencies(proc.stdout): + dep_path = Path(dep) + resolved = ( + dep_path.resolve() + if dep_path.is_absolute() + else (entry.directory / dep_path).resolve() + ) + dependencies.add(resolved) + + entry.dependencies = dependencies + + +def is_build_infra_change(repo_root: Path, changed: set[Path]) -> bool: + for path in changed: + if path.name in BUILD_INFRA_FILES: + return True + try: + rel = path.relative_to(repo_root) + except ValueError: + continue + rel_text = rel.as_posix() + if rel_text.startswith("cmake/"): + return True + return False + + +def identify_benchmark_targets( + entries: list[CompileCommandEntry], repo_root: Path +) -> set[str]: + benchmark_targets: set[str] = set() + targets_present = {entry.target for entry in entries if entry.target} + for entry in entries: + if entry.target is None: + continue + try: + rel = entry.source.relative_to(repo_root) + rel_text = rel.as_posix() + except ValueError: + rel_text = entry.source.as_posix() + + if rel_text.startswith("src/benchmarks/"): + benchmark_targets.add(entry.target) + + benchmark_targets.update(targets_present.intersection(KNOWN_BENCHMARK_TARGETS)) + return benchmark_targets + + +def is_benchmark_source(source: Path, repo_root: Path) -> bool: + try: + rel_text = source.relative_to(repo_root).as_posix() + except ValueError: + return False + return rel_text.startswith("src/benchmarks/") + + +def dedupe_entries_by_target_source( + entries: list[CompileCommandEntry], +) -> list[CompileCommandEntry]: + deduped: list[CompileCommandEntry] = [] + seen: set[tuple[str | None, Path]] = set() + for entry in entries: + key = (entry.target, entry.source) + if key in seen: + continue + seen.add(key) + deduped.append(entry) + return deduped + + +def discover_clangxx(explicit: str | None) -> str: + if explicit: + return explicit + + candidates = [ + "clang++", + "clang++-19", + "clang++-18", + "clang++-17", + "clang++-16", + ] + for candidate in candidates: + resolved = shutil.which(candidate) + if resolved: + return resolved + raise FileNotFoundError( + "clang++ was not found on PATH. Provide --clangxx to select a clang compiler." + ) + + +def clean_command_for_ast(arguments: list[str], clangxx: str) -> list[str]: + cleaned = clean_command_for_dependency_scan(arguments) + if not cleaned: + return [] + cleaned[0] = clangxx + cleaned.extend(["-Xclang", "-ast-dump=json", "-fsyntax-only"]) + return cleaned + + +def normalize_path_candidate(path_text: str | None, working_dir: Path) -> Path | None: + if not path_text: + return None + path = Path(path_text) + if path.is_absolute(): + return path.resolve() + return (working_dir / path).resolve() + + +def file_from_loc(loc: dict[str, Any] | None, working_dir: Path) -> Path | None: + if not isinstance(loc, dict): + return None + if "file" in loc: + return normalize_path_candidate(str(loc["file"]), working_dir) + for nested_key in ("spellingLoc", "expansionLoc", "includedFrom"): + nested_loc = loc.get(nested_key) + if isinstance(nested_loc, dict): + resolved = file_from_loc(nested_loc, working_dir) + if resolved is not None: + return resolved + return None + + +def iter_ast_nodes(node: Any): + if isinstance(node, dict): + yield node + inner = node.get("inner", []) + if isinstance(inner, list): + for child in inner: + yield from iter_ast_nodes(child) + elif isinstance(node, list): + for item in node: + yield from iter_ast_nodes(item) + + +def referenced_decl_file(node: dict[str, Any], working_dir: Path) -> Path | None: + referenced = node.get("referencedDecl") + if not isinstance(referenced, dict): + return None + return file_from_loc(referenced.get("loc"), working_dir) + + +def node_references_changed_symbol( + node: dict[str, Any], + changed_symbol_names: set[str], +) -> bool: + if not changed_symbol_names: + return False + + for subnode in iter_ast_nodes(node): + if not isinstance(subnode, dict): + continue + + kind = subnode.get("kind") + if kind == "MemberExpr": + member_name = subnode.get("name") + if isinstance(member_name, str) and member_name in changed_symbol_names: + return True + + if kind == "DeclRefExpr": + ref_decl = subnode.get("referencedDecl") + if not isinstance(ref_decl, dict): + continue + ref_name = ref_decl.get("name") + if isinstance(ref_name, str) and ref_name in changed_symbol_names: + return True + + return False + + +def call_expr_callee_name(call_expr: dict[str, Any]) -> str | None: + for node in iter_ast_nodes(call_expr): + if not isinstance(node, dict): + continue + if node.get("kind") != "DeclRefExpr": + continue + referenced = node.get("referencedDecl") + if isinstance(referenced, dict) and isinstance(referenced.get("name"), str): + return referenced["name"] + return None + + +def string_literals_in_node(node: dict[str, Any]) -> list[str]: + values: list[str] = [] + for cur in iter_ast_nodes(node): + if not isinstance(cur, dict): + continue + if cur.get("kind") != "StringLiteral": + continue + value = cur.get("value") + if isinstance(value, str): + if len(value) >= 2 and value[0] == '"' and value[-1] == '"': + value = value[1:-1] + values.append(value) + return values + + +def benchmark_names_from_source(source: Path) -> set[str]: + names: set[str] = set() + if not source.exists(): + return names + text = source.read_text(encoding="utf-8", errors="replace") + for match in re.finditer(r"BENCHMARK\(\s*([A-Za-z_][A-Za-z0-9_]*)\s*\)", text): + names.add(match.group(1)) + for match in re.finditer(r"register_op\(\s*\"([^\"]+)\"", text): + names.add(match.group(1)) + return names + + +def ast_analyze_entry( + entry: CompileCommandEntry, + changed_files: set[Path], + changed_symbol_names: set[str], + clangxx: str, +) -> AstImpactResult: + result = AstImpactResult() + + ast_cmd = clean_command_for_ast(entry.arguments, clangxx) + if not ast_cmd: + result.ast_error = "Failed to build AST command" + return result + + try: + proc = run_command(ast_cmd, cwd=entry.directory, check=False) + except FileNotFoundError as exc: + result.ast_error = str(exc) + return result + + if proc.returncode != 0: + stderr = proc.stderr.strip() + result.ast_error = ( + stderr if stderr else f"AST command failed ({proc.returncode})" + ) + return result + + try: + ast_root = json.loads(proc.stdout) + except json.JSONDecodeError as exc: + result.ast_error = f"Invalid AST JSON: {exc}" + return result + + function_callees: dict[str, set[str]] = defaultdict(set) + direct_impacted_functions: set[str] = set() + dynamic_benchmarks_by_function: dict[str, set[str]] = defaultdict(set) + + for node in iter_ast_nodes(ast_root): + if not isinstance(node, dict): + continue + + if node.get("kind") not in {"FunctionDecl", "CXXMethodDecl"}: + continue + + function_name = node.get("name") + if not isinstance(function_name, str) or not function_name: + continue + + if function_name.startswith("BM_"): + result.benchmark_names.add(function_name) + + function_callees.setdefault(function_name, set()) + + function_loc = file_from_loc(node.get("loc"), entry.directory) + is_directly_impacted = function_loc in changed_files + if not is_directly_impacted: + is_directly_impacted = node_references_changed_symbol( + node, changed_symbol_names + ) + + for subnode in iter_ast_nodes(node): + if not isinstance(subnode, dict): + continue + + sub_kind = subnode.get("kind") + if sub_kind in {"CallExpr", "CXXMemberCallExpr", "CXXOperatorCallExpr"}: + callee = call_expr_callee_name(subnode) + if callee: + function_callees[function_name].add(callee) + + if callee == "register_op": + literal_values = string_literals_in_node(subnode) + if literal_values: + dynamic_benchmarks_by_function[function_name].add( + literal_values[0] + ) + + if not is_directly_impacted: + ref_file = referenced_decl_file(subnode, entry.directory) + if ref_file in changed_files: + is_directly_impacted = True + + if is_directly_impacted: + direct_impacted_functions.add(function_name) + + # Reverse call-graph propagation: if a function is directly impacted, + # every caller in this TU is impacted as well (fixed-point DFS/BFS). + callers_of: dict[str, set[str]] = defaultdict(set) + for caller, callees in function_callees.items(): + for callee in callees: + callers_of[callee].add(caller) + + impacted_functions = set(direct_impacted_functions) + stack = list(direct_impacted_functions) + while stack: + callee_name = stack.pop() + for caller_name in callers_of.get(callee_name, set()): + if caller_name in impacted_functions: + continue + impacted_functions.add(caller_name) + stack.append(caller_name) + + for function_name in impacted_functions: + if function_name.startswith("BM_"): + result.affected_names.add(function_name) + + for function_name, names in dynamic_benchmarks_by_function.items(): + result.benchmark_names.update(names) + if function_name in impacted_functions: + result.affected_names.update(names) + + return result + + +def regex_for_benchmarks(names: set[str]) -> str | None: + if not names: + return None + ordered = sorted(names) + body = "|".join(re.escape(name) for name in ordered) + return rf"^({body})(/|$)" + + +def relpath_or_abs(path: Path, root: Path) -> str: + try: + return path.relative_to(root).as_posix() + except ValueError: + return path.as_posix() + + +def main() -> int: + cli = parse_args() + + try: + repo_root = git_repo_root() + changed_files = git_changed_files(repo_root, cli.baseline, cli.head) + if cli.include_working_tree: + changed_files.update(git_working_tree_changed_files(repo_root)) + changed_line_map = git_changed_line_map( + repo_root, + cli.baseline, + cli.head, + cli.include_working_tree, + ) + changed_symbol_names = collect_changed_symbol_names(changed_line_map) + compile_commands_path = resolve_compile_commands( + repo_root, cli.compile_commands + ) + entries = load_compile_commands(compile_commands_path, repo_root) + except FileNotFoundError as exc: + print(f"error: {exc}", file=sys.stderr) + return 2 + except subprocess.CalledProcessError as exc: + stderr = (exc.stderr or "").strip() + if stderr: + print(f"error: {stderr}", file=sys.stderr) + else: + print(f"error: command failed: {' '.join(exc.cmd)}", file=sys.stderr) + return 2 + + target_to_entries: dict[str, list[CompileCommandEntry]] = defaultdict(list) + source_to_entries: dict[Path, list[CompileCommandEntry]] = defaultdict(list) + for entry in entries: + source_to_entries[entry.source].append(entry) + if entry.target: + target_to_entries[entry.target].append(entry) + + benchmark_targets = identify_benchmark_targets(entries, repo_root) + all_targets = {entry.target for entry in entries if entry.target} + benchmark_entries = dedupe_entries_by_target_source( + [entry for entry in entries if entry.target in benchmark_targets] + ) + + infra_change = is_build_infra_change(repo_root, changed_files) + relevant_changed_files = { + path + for path in changed_files + if is_project_source(path, repo_root) + or path.name in BUILD_INFRA_FILES + or relpath_or_abs(path, repo_root).startswith("cmake/") + } + has_header_changes = any( + path.suffix.lower() in HEADER_EXTENSIONS for path in relevant_changed_files + ) + benchmark_source_extensions = {".c", ".cc", ".cpp", ".cxx"} + only_benchmark_source_changes = bool(relevant_changed_files) and all( + is_benchmark_source(path, repo_root) + and path.suffix.lower() in benchmark_source_extensions + for path in relevant_changed_files + ) + + directly_affected_targets: set[str] = set() + for changed_path in changed_files: + for entry in source_to_entries.get(changed_path, []): + if entry.target: + directly_affected_targets.add(entry.target) + + dependency_scan_entries: list[CompileCommandEntry] = [] + if not infra_change and not only_benchmark_source_changes: + if has_header_changes: + dependency_scan_entries = dedupe_entries_by_target_source(entries) + else: + dependency_scan_entries = benchmark_entries + + if dependency_scan_entries: + with concurrent.futures.ThreadPoolExecutor( + max_workers=min(8, (os.cpu_count() or 4)) + ) as pool: + list(pool.map(compute_tu_dependencies, dependency_scan_entries)) + + affected_targets: set[str] = set(directly_affected_targets) + for entry in dependency_scan_entries: + has_changed_dependency = any(dep in changed_files for dep in entry.dependencies) + if has_changed_dependency and entry.target: + affected_targets.add(entry.target) + + if infra_change: + affected_targets.update(all_targets) + + dependency_impacted_benchmark_targets = affected_targets.intersection( + benchmark_targets + ) + impacted_benchmark_entries = [ + entry + for entry in benchmark_entries + if entry.target in dependency_impacted_benchmark_targets + ] + + ast_errors: dict[str, str] = {} + benchmark_target_to_names: dict[str, set[str]] = defaultdict(set) + benchmark_target_to_affected: dict[str, set[str]] = defaultdict(set) + warnings: list[str] = [] + ast_fallback_used = False + ast_entries_scanned = 0 + + if impacted_benchmark_entries: + try: + clangxx = discover_clangxx(cli.clangxx) + except FileNotFoundError as exc: + clangxx = "" + warnings.append(str(exc)) + + if not clangxx: + ast_fallback_used = True + for entry in impacted_benchmark_entries: + target_name = entry.target or "" + fallback_names = benchmark_names_from_source(entry.source) + benchmark_target_to_names[target_name].update(fallback_names) + benchmark_target_to_affected[target_name].update(fallback_names) + else: + max_ast_workers = min(2, (os.cpu_count() or 2)) + with concurrent.futures.ThreadPoolExecutor( + max_workers=max_ast_workers + ) as pool: + futures = { + pool.submit( + ast_analyze_entry, + entry, + changed_files, + changed_symbol_names, + clangxx, + ): entry + for entry in impacted_benchmark_entries + } + ast_entries_scanned = len(futures) + for future in concurrent.futures.as_completed(futures): + entry = futures[future] + target_name = entry.target or "" + source_path = entry.source + source_is_changed = source_path in changed_files + + try: + ast_result = future.result(timeout=120) + except Exception as exc: + ast_result = AstImpactResult( + ast_error=f"AST worker failed: {exc}" + ) + + if ast_result.ast_error: + ast_errors[relpath_or_abs(source_path, repo_root)] = ( + ast_result.ast_error + ) + + benchmark_names = ast_result.benchmark_names + if not benchmark_names: + benchmark_names = benchmark_names_from_source(source_path) + benchmark_target_to_names[target_name].update(benchmark_names) + + if ast_result.affected_names: + benchmark_target_to_affected[target_name].update( + ast_result.affected_names + ) + elif source_is_changed or ast_result.ast_error: + benchmark_target_to_affected[target_name].update( + benchmark_names + ) + if benchmark_names: + ast_fallback_used = True + + if infra_change and benchmark_targets: + for target_name in sorted(benchmark_targets): + for entry in target_to_entries.get(target_name, []): + names = benchmark_names_from_source(entry.source) + benchmark_target_to_names[target_name].update(names) + benchmark_target_to_affected[target_name].update(names) + + if infra_change: + affected_benchmark_targets = sorted(benchmark_targets) + else: + affected_benchmark_targets = sorted( + target for target, names in benchmark_target_to_affected.items() if names + ) + + all_affected_benchmarks: set[str] = set() + for names in benchmark_target_to_affected.values(): + all_affected_benchmarks.update(names) + + dep_scan_failures = { + relpath_or_abs(entry.source, repo_root): entry.dep_error + for entry in dependency_scan_entries + if entry.dep_error + } + + scope_mode = "normal" + if infra_change: + scope_mode = "infra_fallback" + elif ast_fallback_used: + scope_mode = "ast_fallback" + + report: dict[str, Any] = { + "baseline": cli.baseline, + "head": cli.head, + "include_working_tree": cli.include_working_tree, + "changed_symbols": sorted(changed_symbol_names), + "compile_commands": relpath_or_abs(compile_commands_path, repo_root), + "changed_files": sorted( + relpath_or_abs(path, repo_root) for path in changed_files + ), + "affected_targets": sorted(affected_targets), + "affected_benchmark_targets": affected_benchmark_targets, + "affected_benchmarks": { + target: sorted(names) + for target, names in sorted(benchmark_target_to_affected.items()) + if names + }, + "suggested_filter_regex": regex_for_benchmarks(all_affected_benchmarks), + "dependency_entries_scanned": len(dependency_scan_entries), + "benchmark_entries_scanned": len(benchmark_entries), + "ast_entries_scanned": ast_entries_scanned, + "scope_mode": scope_mode, + "dependency_scan_failures": dep_scan_failures, + "ast_failures": ast_errors, + "warnings": warnings, + } + + if cli.format == "json": + json.dump(report, sys.stdout, indent=2) + sys.stdout.write("\n") + return 0 + + print(f"Baseline: {cli.baseline}") + print(f"Head: {cli.head}") + print(f"Compile commands: {report['compile_commands']}") + print(f"Scope mode: {report['scope_mode']}") + print( + "Scan counts: " + f"dependency={report['dependency_entries_scanned']}, " + f"benchmark={report['benchmark_entries_scanned']}, " + f"ast={report['ast_entries_scanned']}" + ) + print("") + + print(f"Changed files ({len(report['changed_files'])}):") + for item in report["changed_files"]: + print(f"- {item}") + if not report["changed_files"]: + print("- none") + + print("") + print(f"Affected targets ({len(report['affected_targets'])}):") + for item in report["affected_targets"]: + print(f"- {item}") + if not report["affected_targets"]: + print("- none") + + print("") + print(f"Affected benchmark targets ({len(report['affected_benchmark_targets'])}):") + for item in report["affected_benchmark_targets"]: + print(f"- {item}") + if not report["affected_benchmark_targets"]: + print("- none") + + print("") + print("Affected benchmark functions:") + if report["affected_benchmarks"]: + for target, names in report["affected_benchmarks"].items(): + print(f"- {target}:") + for name in names: + print(f" - {name}") + else: + print("- none") + + print("") + print("Suggested --benchmark_filter regex:") + print(report["suggested_filter_regex"] or "none") + + if dep_scan_failures: + print("") + print("Dependency scan failures:") + for source, error in dep_scan_failures.items(): + print(f"- {source}: {error}") + + if ast_errors: + print("") + print("AST failures:") + for source, error in ast_errors.items(): + print(f"- {source}: {error}") + + if warnings: + print("") + print("Warnings:") + for warning in warnings: + print(f"- {warning}") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/agentic/cpp/skills/benchmarks-compare-revisions/SKILL.md b/agentic/cpp/skills/benchmarks-compare-revisions/SKILL.md new file mode 100644 index 0000000..caa9542 --- /dev/null +++ b/agentic/cpp/skills/benchmarks-compare-revisions/SKILL.md @@ -0,0 +1,225 @@ +--- +name: benchmarks-compare-revisions +description: Compare benchmark performance between two git revisions via Google Benchmark compare.py, with optional hardware-counter comparison from diagnostic libpfm builds. +--- + +# Benchmarks Compare Revisions Skill + +Use this skill to compare performance between two git revisions. + +This workflow now depends on: + +1. `../benchmarks-affected/SKILL.md` to determine affected benchmark targets/functions and produce a benchmark filter. +2. `../benchmarks/SKILL.md` for build/run operational details. + +## Goal + +Build two separate benchmark binaries using short commit hashes as build suffixes, compare timing results with Google Benchmark compare.py, and optionally compare hardware counters across the same revisions. + +## Step 0 — Choose revisions, hashes, and options + +Pick a baseline and a contender revision. Use short commit hashes to suffix build directories so builds do not collide. + +Optional behavior flags: + +- `COLLECT_COUNTERS=1` to enable hardware-counter collection and analysis in addition to timing comparison. +- `COLLECT_COUNTERS=0` to run timing-only comparison. + +Counter collection is Linux-only and requires: + +- diagnostic builds with `BENCHMARK_ENABLE_LIBPFM=ON` +- perf permissions on the host (for access to performance counters) + +Example: +```bash +BASELINE=abc1234 +CONTENDER=def5678 +``` + +## Step 1 — Compute affected benchmark scope first + +Run `benchmarks-affected` from the contender checkout to derive the compare scope. + +Do not duplicate `benchmarks-affected` internals here (compile database selection, AST analysis, or fallback heuristics). Follow that skill directly and consume only its outputs. + +Inputs to pass through: + +- `--baseline ${BASELINE}` +- optional compile-commands path if auto-detection is not desired +- optional output format (`json` recommended for parsing) + +Consume these outputs from `benchmarks-affected`: + +- `suggested_filter_regex` -> set `FILTER` +- `affected_benchmark_targets` -> optionally constrain which benchmark binary/binaries to run +- `affected_benchmarks` -> function-level scope for validation/reporting + +If `FILTER` is empty, fall back to full benchmark binary compare (conservative mode). + +## Step 2 — Build both revisions + +Use the existing benchmarks skill build steps, but set the build suffix to include the short hash for each revision. + +Always build Release timing binaries. + +If `COLLECT_COUNTERS=1`, also build diagnostic binaries (RelWithDebInfo + libpfm) for both revisions. + +```bash +# Baseline +BUILD_SUFFIX=bench_${BASELINE} +git checkout ${BASELINE} +# Follow ../benchmarks/SKILL.md timing build instructions with this suffix +# If COLLECT_COUNTERS=1, also follow the diagnostic build instructions with this suffix + +# Contender +BUILD_SUFFIX=bench_${CONTENDER} +git checkout ${CONTENDER} +# Follow ../benchmarks/SKILL.md timing build instructions with this suffix +# If COLLECT_COUNTERS=1, also follow the diagnostic build instructions with this suffix +``` + +Expected build trees: + +- Timing: `build/benchmarks-all_bench_` +- Counters (optional): `build/benchmarks-diagnostic_bench_` + +## Step 3 — Compare using compare.py + +Use Google Benchmark compare tooling with a JSON-first flow to avoid long-running binary-vs-binary retries. + +Locate compare.py from the Google Benchmark dependency (installed under the build tree): +```bash +COMPARE_PY=build/benchmarks-all_bench_${BASELINE}/_deps/googlebenchmark-src/tools/compare.py +``` + +Verify Python deps once (compare.py imports numpy/scipy): +```bash +python3 -c "import numpy, scipy" +``` + +Generate baseline/contender JSON sequentially with explicit file outputs: +```bash +BENCH_CPU=${BENCH_CPU:-0} +BENCH_RUN="taskset -c ${BENCH_CPU}" +BASE_JSON=/tmp/bench_${BASELINE}.json +CONT_JSON=/tmp/bench_${CONTENDER}.json + +${BENCH_RUN} build/benchmarks-all_bench_${BASELINE}/benchmarks \ + --benchmark_report_aggregates_only=true \ + --benchmark_display_aggregates_only=true \ + --benchmark_format=json \ + --benchmark_out=${BASE_JSON} > /tmp/bench_${BASELINE}.log 2>&1 + +${BENCH_RUN} build/benchmarks-all_bench_${CONTENDER}/benchmarks \ + --benchmark_report_aggregates_only=true \ + --benchmark_display_aggregates_only=true \ + --benchmark_format=json \ + --benchmark_out=${CONT_JSON} > /tmp/bench_${CONTENDER}.log 2>&1 +``` + +Validate JSON before comparing: +```bash +python3 -m json.tool ${BASE_JSON} > /dev/null +python3 -m json.tool ${CONT_JSON} > /dev/null +``` + +Run the comparison: +```bash +python3 ${COMPARE_PY} -a benchmarks ${BASE_JSON} ${CONT_JSON} +``` + +Use the affected filter from Step 1 when generating JSON files: +```bash +if [ -n "${FILTER}" ]; then + FILTER_ARG="--benchmark_filter=${FILTER}" +else + FILTER_ARG="" +fi + +${BENCH_RUN} build/benchmarks-all_bench_${BASELINE}/benchmarks ${FILTER_ARG} --benchmark_report_aggregates_only=true --benchmark_display_aggregates_only=true ... +${BENCH_RUN} build/benchmarks-all_bench_${CONTENDER}/benchmarks ${FILTER_ARG} --benchmark_report_aggregates_only=true --benchmark_display_aggregates_only=true ... +``` + +## Step 3b — Compare hardware counters (optional, Linux only) + +Run this step only when `COLLECT_COUNTERS=1`. + +1. Preflight first with one tiny counter-enabled benchmark run from a diagnostic binary. If output includes warnings such as `Failed to get a file descriptor for performance counter`, mark counters unavailable and skip counter collection. +2. Run baseline and contender diagnostic binaries sequentially with explicit JSON outputs and the same filter scope: + +```bash +BASE_COUNTERS_JSON=/tmp/bench_counters_${BASELINE}.json +CONT_COUNTERS_JSON=/tmp/bench_counters_${CONTENDER}.json + +${BENCH_RUN} build/benchmarks-diagnostic_bench_${BASELINE}/benchmarks \ + ${FILTER_ARG} \ + --benchmark_counters_tabular=true \ + --benchmark_format=json \ + --benchmark_out=${BASE_COUNTERS_JSON} > /tmp/bench_counters_${BASELINE}.log 2>&1 + +${BENCH_RUN} build/benchmarks-diagnostic_bench_${CONTENDER}/benchmarks \ + ${FILTER_ARG} \ + --benchmark_counters_tabular=true \ + --benchmark_format=json \ + --benchmark_out=${CONT_COUNTERS_JSON} > /tmp/bench_counters_${CONTENDER}.log 2>&1 +``` + +3. Validate JSON files before consuming: + +```bash +python3 -m json.tool ${BASE_COUNTERS_JSON} > /dev/null +python3 -m json.tool ${CONT_COUNTERS_JSON} > /dev/null +``` + +4. Collect and compare these counter families when present: + +- `instructions`, `cycles` +- `cache-misses`, `cache-references` +- `branch-misses`, `branches` +- `L1-dcache-load-misses` + +5. Compute derived metrics when denominators are non-zero: + +- IPC = `instructions / cycles` +- Cache miss rate = `cache-misses / cache-references` +- Branch mispredict rate = `branch-misses / branches` + +6. Pair baseline and contender rows by benchmark name, compute deltas, and flag anomalies where timing direction conflicts with key counter direction. + +7. Emit a canonical summary table for downstream consumers: + +```markdown +| Benchmark | IPC (base -> new) | Cache Miss Rate (base -> new) | Branch Mispredict (base -> new) | Anomaly? | +|---|---:|---:|---:|---| +``` + +## Retry and Timeout Policy + +1. Run benchmarks sequentially; do not background with `nohup`/`&`. +2. If a run times out, narrow filter and retry once. +3. Maximum retries per benchmark group: 1. +4. If still failing, emit blocked/partial findings instead of repeated attempts. + +Apply this policy to both timing and counter runs. + +## Step 4 — Record findings + +Capture and return: + +- compare.py output (terminal transcript or redirected file) +- effective filter used +- timing JSON artifacts for baseline and contender +- `counters_available` (`true`/`false`) +- if `counters_available=false`, a reason string (unsupported OS, missing libpfm, perf permission denied, preflight failure) +- if counters are available: counter JSON artifacts, derived metrics table, and anomaly list + +## Best Practices / Guardrails + +1. **Release only**: never compare Debug binaries. +2. **Short hash suffixes**: keep build dirs isolated per revision (example: `bench_`). +3. **Same host, same conditions**: do not compare across different machines or power profiles. +4. **Filter from analysis**: use `benchmarks-affected` output instead of hand-crafted filters whenever possible. +5. **Pin process and frequency**: use `taskset -c ${BENCH_CPU:-0}` for all benchmark executions and follow benchmark skill guidance on CPU governor. +6. **Counter collection is optional and Linux-only**: when unavailable, return timing-only outputs with `counters_available=false`. +7. **Always preflight counters**: do not run full counter collection if preflight fails. +8. **Keep build types separated**: timing uses `benchmarks-all_*` Release builds; counters use `benchmarks-diagnostic_*` RelWithDebInfo builds; never Debug. diff --git a/agentic/cpp/skills/benchmarks/SKILL.md b/agentic/cpp/skills/benchmarks/SKILL.md new file mode 100644 index 0000000..92af231 --- /dev/null +++ b/agentic/cpp/skills/benchmarks/SKILL.md @@ -0,0 +1,209 @@ +--- +name: benchmarks +description: Run Google Benchmark binaries, including filtering, hardware counters, and perf profiling. +--- + +# Benchmarks Skill + +You now have expertise in running and interpreting Google Benchmark suites. +Follow these workflows: + +## Build Directory Convention + +Use a short commit hash suffix for committed revisions: + +```bash +BUILD_SUFFIX=$(git rev-parse --short HEAD) +``` + +If the worktree has uncommitted changes, append a descriptive suffix so results +cannot be confused with a clean HEAD build: + +```bash +BUILD_SUFFIX=$(git rev-parse --short HEAD)-dirty +``` + +If not a git repository, use + +```bash +BUILD_SUFFIX=agent +``` + +## CRITICAL: Never Run Benchmarks from a Debug Build + +> **Always pass `--config Release` (or `--config RelWithDebInfo`) to `cmake --build`.** +> Multi-config generators (MSVC, Xcode) default to `Debug` if no `--config` is given. +> Google Benchmark will print `***WARNING*** Library was built as DEBUG` and timings will +> be 3-10x slower and meaningless. Always verify the binary path contains `Release/` or +> `RelWithDebInfo/`, never `Debug/`. + +## Step 1 — Build + +If benchmarks affected by the changes are easily tractable build only related targets. + +**Pure timing (benchmarks, Release):** +```bash +cmake -B build/benchmarks_${BUILD_SUFFIX} -DCMAKE_BUILD_TYPE=Release +cmake --build build/benchmarks_${BUILD_SUFFIX} --config Release -j +``` + +**Hardware counters / verbose report (benchmarks-diagnostic, RelWithDebInfo, Linux only):** +```bash +cmake -B build/benchmarks-diagnostic_${BUILD_SUFFIX} -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBENCHMARK_ENABLE_LIBPFM=ON +cmake --build build/benchmarks-diagnostic_${BUILD_SUFFIX} --config RelWithDebInfo -j +``` + +For repository-specific benchmark examples, check +`agentic/local/cpp/skills/benchmarks/EXAMPLES.md` when present. + +## Step 2 — Run + +Prefer running benchmarks with filtering passing the benchmarks that should be affected. + +Unless the user explicitly asks otherwise, pin benchmark execution to one CPU with +`taskset` to reduce scheduler noise. Use CPU 0 by default, or override with +`BENCH_CPU=` when a better isolated/performance core is known: + +```bash +BENCH_CPU=${BENCH_CPU:-0} +BENCH_RUN="taskset -c ${BENCH_CPU}" +``` + +If `taskset` is unavailable or fails on the host, report that benchmark results +are unpinned and more noisy. + +Execution guardrails: +- Run benchmark commands sequentially in CI. +- Avoid background jobs (`nohup`, `&`) for benchmark collection. +- Always write machine-readable results with `--benchmark_out` when data is later parsed. + +### Available benchmark binaries + +Discover benchmark binary names from the repository's build system. Common +locations include `build/**/` for single-config generators and +`build/**/Release/` for multi-config generators. Repository-specific +binary lists belong in the repo-local benchmark examples overlay. + +Binary paths vary by generator type: + +| Generator | Path pattern | +|-----------|-------------| +| MSVC / Xcode (multi-config) | `build/_/Release/` | +| Ninja / Make (single-config) | `build/_/` | + +### Run all benchmarks in a binary + +```bash +# Multi-config (MSVC/Xcode) +${BENCH_RUN} build/benchmarks_${BUILD_SUFFIX}/Release/benchmarks + +# Single-config (Ninja/Make) +${BENCH_RUN} build/benchmarks_${BUILD_SUFFIX}/benchmarks +``` + +### Filter benchmarks with a regex (FILTER parameter) + +```bash +FILTER="BM_Foo" # change to match benchmark names in the target binary + +# Multi-config +${BENCH_RUN} build/benchmarks_${BUILD_SUFFIX}/Release/benchmarks --benchmark_filter="${FILTER}" + +# Single-config +${BENCH_RUN} build/benchmarks_${BUILD_SUFFIX}/benchmarks --benchmark_filter="${FILTER}" +``` + +Examples: +```bash +# Only one benchmark family +... --benchmark_filter="BM_Foo" + +# Only one layout/parameter family +... --benchmark_filter="BM_Foo.*Variant" + +# List all available benchmark names without running +... --benchmark_list_tests=true +``` + +### Run with hardware counters (benchmarks-diagnostic build, Linux only) + +The `--benchmark_perf_counters` flag requests hardware counter collection via libpfm. Counter names are platform-specific but common ones include `CYCLES`, `INSTRUCTIONS`, `CACHE-MISSES`, `CACHE-REFERENCES`, `BRANCH-MISSES`, `BRANCH-INSTRUCTIONS`. + +```bash +${BENCH_RUN} build/benchmarks-diagnostic_${BUILD_SUFFIX}/RelWithDebInfo/benchmarks \ + --benchmark_filter="${FILTER}" \ + --benchmark_perf_counters=CYCLES,INSTRUCTIONS,CACHE-MISSES \ + --benchmark_counters_tabular=true +``` + +### Save results to file + +```bash +${BENCH_RUN} build/benchmarks_${BUILD_SUFFIX}/Release/benchmarks \ + --benchmark_filter="${FILTER}" \ + --benchmark_report_aggregates_only=true \ + --benchmark_display_aggregates_only=true \ + --benchmark_format=json \ + --benchmark_out=results.json +``` + +Validate output before consuming: +```bash +python3 -m json.tool results.json > /dev/null +``` + +## Step 3 — Profile with perf (Linux only) + +Use when hardware counters alone are not enough and you need a full call-graph profile for post-processing. + +**Record:** +```bash +perf record -g -F 999 \ + -- ${BENCH_RUN} build/benchmarks-diagnostic_${BUILD_SUFFIX}/benchmarks \ + --benchmark_filter="${FILTER}" \ + --benchmark_min_time=5s +``` + +**Quick report (terminal):** +```bash +perf report --stdio +``` + +**Flame graph (requires FlameGraph scripts):** +```bash +perf script | stackcollapse-perf.pl | flamegraph.pl > flamegraph.html +``` + +**Export for external tools (Hotspot, Firefox Profiler):** +```bash +perf script -F +pid > perf.data.txt +# or open with `hotspot perf.data` +``` + +## Useful Benchmark Flags + +| Flag | Purpose | +|------|---------| +| `--benchmark_filter=` | Run only matching benchmarks | +| `--benchmark_list_tests=true` | List names without running | +| `--benchmark_repetitions=` | Repeat each benchmark n times | +| `--benchmark_min_time=` | Minimum run time per benchmark | +| `--benchmark_format=json` | Machine-readable output | +| `--benchmark_out=` | Save output to file | +| `--benchmark_perf_counters=CYCLES,INSTRUCTIONS,...` | Collect hardware perf counters (requires libpfm build) | +| `--benchmark_counters_tabular=true` | Align user/perf counter columns into a table | +| `--benchmark_time_unit=ms` | Change display unit (ns/us/ms/s) | + +## Best Practices + +1. **Never run from a Debug binary**: always use `--config Release` at build time; check path contains `Release/` +2. **Use benchmarks for clean timing**: Release optimizations, no debug info, no libpfm overhead +3. **Use benchmarks-diagnostic for hardware counters**: RelWithDebInfo + libpfm; Linux only +4. **Use perf for deep profiling**: when counters point to a hotspot but don't explain it +5. **Pin benchmark process** with `taskset -c ${BENCH_CPU:-0}` unless unavailable +6. **Pin CPU frequency** before timing runs: `sudo cpupower frequency-set -g performance` +7. **Filter to reduce noise**: narrow the filter regex to the benchmark under investigation +8. **Save JSON output** when comparing before/after changes: use `--benchmark_out` and diff the files +9. **Fail fast on environment issues**: precheck Python deps used by compare tooling (`numpy`, `scipy`) +10. **Use explicit retry limits**: on timeout, narrow scope and retry once; avoid repeated full-suite attempts +11. **Preflight perf counters**: run a tiny counter-enabled benchmark first; if counters unavailable, skip counter workflow diff --git a/agentic/cpp/skills/cmake/SKILL.md b/agentic/cpp/skills/cmake/SKILL.md new file mode 100644 index 0000000..a659e1a --- /dev/null +++ b/agentic/cpp/skills/cmake/SKILL.md @@ -0,0 +1,96 @@ +--- +name: cmake +description: Compile and build CMake projects, including configuring build types, options, and running test binaries. +--- + +# CMake Build Skill + +You now have expertise in building and configuring CMake projects. Follow these workflows: + +## Build Directory Convention + +Use a short commit hash suffix for committed revisions: + +```bash +BUILD_SUFFIX=$(git rev-parse --short HEAD) +``` + +If the worktree has uncommitted changes, append a descriptive suffix so generated +artifacts cannot be confused with a clean HEAD build: + +```bash +BUILD_SUFFIX=$(git rev-parse --short HEAD)-dirty +``` + +If not a git repository, use + +```bash +BUILD_SUFFIX=agent +``` + +Build directories follow the pattern `build/_`. + +## Using Presets (Preferred When Available) + +> **Important**: `cmake --preset` sets cache variables and generator but its `binaryDir` cannot be +> overridden from the command line. To use a preset's settings with a custom build dir, pass the +> relevant `-D` flags explicitly together with `-B`. Use `--preset` only to discover what flags a +> preset applies. + +**List available presets:** +```bash +cmake --list-presets +``` + +**Replicate a preset's settings with a custom suffix build dir:** + +Release: +```bash +cmake -B build/release_${BUILD_SUFFIX} -DCMAKE_BUILD_TYPE=Release +cmake --build build/release_${BUILD_SUFFIX} -j +``` + +Debug: +```bash +cmake -B build/debug_${BUILD_SUFFIX} -DCMAKE_BUILD_TYPE=Debug +cmake --build build/debug_${BUILD_SUFFIX} -j +``` + +AddressSanitizer: +```bash +cmake -B build/asan_${BUILD_SUFFIX} -DCMAKE_BUILD_TYPE=Debug -DENABLE_ADDRESS_SANITIZER=ON +cmake --build build/asan_${BUILD_SUFFIX} -j +``` + +Coverage: +```bash +cmake -B build/coverage_${BUILD_SUFFIX} -DCMAKE_BUILD_TYPE=Debug -DENABLE_COVERAGE=ON +cmake --build build/coverage_${BUILD_SUFFIX} -j +``` + +Benchmarks: +```bash +cmake -B build/benchmarks_${BUILD_SUFFIX} -DCMAKE_BUILD_TYPE=Release -DBUILD_BENCHMARKS=ON +cmake --build build/benchmarks_${BUILD_SUFFIX} -j +``` + +## Additional Feature Options + +Feature flags are project-specific. Inspect `CMakeLists.txt`, +`CMakePresets.json`, or `cmake -LAH ` before toggling options. For +repository-specific examples, check +`agentic/local/cpp/skills/cmake/EXAMPLES.md` when present. + +**Example feature toggle:** +```bash +cmake -B build/release_${BUILD_SUFFIX} -DCMAKE_BUILD_TYPE=Release -DENABLE_FEATURE=ON +cmake --build build/release_${BUILD_SUFFIX} -j +``` + +## Best Practices + +1. **Use out-of-source builds**: Keep build artifacts in `build/_` directories +2. **Presets fix binaryDir**: `--preset` cannot be combined with `-B` to change the build dir; replicate `-D` flags manually with `-B` instead +3. **Reconfigure when options change**: Rerun the `cmake -B ...` step when toggling options +4. **Clean build directory when needed**: Delete the entire build folder for a fresh configuration +5. **Match build type to task**: Release for performance work, Debug/ASan for correctness diff --git a/agentic/cpp/skills/paper-search/SKILL.md b/agentic/cpp/skills/paper-search/SKILL.md new file mode 100644 index 0000000..27f108c --- /dev/null +++ b/agentic/cpp/skills/paper-search/SKILL.md @@ -0,0 +1,106 @@ +--- +name: paper-search +description: "Search for academic papers across Semantic Scholar, arXiv, and CrossRef APIs. Returns unified results with title, authors, year, abstract, DOI, venue, and citation counts. Integrates with Zotero MCP tools for adding found papers to a Zotero library and generating BibTeX entries. Use when the user asks to find papers, search for related work, look up a DOI, or discover references on a topic." +--- + +# Paper Search + +Search external academic APIs for papers. Provides a unified interface across Semantic Scholar, arXiv, and CrossRef with optional Zotero integration. + +## Workflow + +### 1. Search for Papers + +Run the search script from the skill's `scripts/` directory: + +```bash +python3 scripts/search_papers.py --query "topic" --source semantic_scholar --limit 10 --format compact +``` + +Available sources: +- `semantic_scholar` — Default. Best for comprehensive search with citation counts. +- `arxiv` — Best for preprints and recent unpublished work. +- `crossref` — Best for published works and DOI-based metadata. +- `all` — Query all three sources (slower, results combined). + +Output formats: +- `json` — Full JSON output (default). Good for programmatic use. +- `compact` — Human-readable summary with title, authors, year, venue, citations, and truncated abstract. + +### 2. DOI Lookup + +Look up a specific paper by DOI: + +```bash +python3 scripts/search_papers.py --doi "10.1145/1234567.1234568" --format compact +``` + +### 3. Download PDFs + +Download open-access PDFs directly from search results: + +```bash +python3 scripts/search_papers.py --query "wavelet tree" --source arxiv --limit 3 --download ~/papers +``` + +- arXiv papers always have PDFs available. +- Semantic Scholar provides `openAccessPdf` URLs when available. +- CrossRef may provide PDF links via publisher APIs. + +The `--download` flag adds a `downloaded_path` field to each result in JSON output. + +### 4. Add to Zotero + +**Option A: Via DOI/URL (metadata only)** + +After finding relevant papers, add them to Zotero using the Zotero MCP tools: + +- `zotero_add_by_doi` — Preferred when DOI is available. Fetches full metadata from CrossRef. +- `zotero_add_by_url` — Use for arXiv papers or when only a URL is available. + +**Option B: Via downloaded PDF (metadata + attachment)** + +Download the PDF first, then add to Zotero with the PDF file: + +```bash +# Step 1: Download PDFs and get paths in JSON +python3 scripts/search_papers.py --doi "10.1007/978-3-540-73420-8_13" --download ~/papers --format json + +# Step 2: Use zotero_zotero_add_from_file with the downloaded_path +``` + +The agent should call `zotero_zotero_add_from_file` with the `downloaded_path` from the JSON output. This attaches the PDF to the Zotero item and attempts DOI-based metadata extraction. + +**Option C: Download + Zotero in one step** + +Use `--zotero` to download PDFs with paths formatted for easy Zotero import: + +```bash +python3 scripts/search_papers.py -q "succinct data structures" -s arxiv -n 3 --zotero --download ~/papers +``` + +After adding papers, update the semantic search database: + +``` +zotero_update_search_database +``` + +### 5. Generate BibTeX + +For papers already in Zotero, use `zotero_get_item_metadata` with `format: "bibtex"` to get BibTeX entries. Alternatively, use `zotero_fetch` for full metadata. + +For papers NOT in Zotero, BibTeX can be constructed from the search results' JSON fields (`authors`, `year`, `title`, `venue`, `doi`). + +## Guidance + +- Start with `semantic_scholar` for general queries — it has the broadest coverage and citation data. +- Use `arxiv` when looking for very recent work or preprints in CS/ML/physics. +- Use `crossref` for DOI lookups or when Semantic Scholar returns no results. +- When using `--source all`, results may contain duplicates (same paper from different sources). Deduplicate by DOI or title similarity. +- Citation counts are approximate and may differ across sources. +- arXiv results return the arXiv ID (e.g., `2301.12345`) which can be used with `zotero_add_by_url` via `https://arxiv.org/abs/2301.12345`. + +## API Quirks + +- **arXiv `atom:id` is NOT a DOI** — it contains an arXiv URL like `http://arxiv.org/abs/2301.12345`. Store the extracted ID in `arxiv_id` only; set `doi` to `None` for arXiv results. Writing the arXiv URL into `doi` produces invalid DOI metadata downstream (e.g., Zotero import). +- **CrossRef `select` must include `link`** — the `link` field is needed for `pdf_url` extraction. If omitted from `select`, the API won't return link metadata and `pdf_url` will silently be empty for all CrossRef results. diff --git a/agentic/cpp/skills/paper-search/references/api_reference.md b/agentic/cpp/skills/paper-search/references/api_reference.md new file mode 100644 index 0000000..dcb5aa5 --- /dev/null +++ b/agentic/cpp/skills/paper-search/references/api_reference.md @@ -0,0 +1,32 @@ +# External Paper Search APIs + +## Semantic Scholar + +- **Base URL**: `https://api.semanticscholar.org/graph/v1/` +- **Rate limit**: 1 req/sec without API key, 10 req/sec with key +- **No auth required** for basic usage +- **Fields**: title, authors, year, abstract, externalIds (DOI, ArXiv), venue, citationCount +- **Best for**: Comprehensive academic search with citation counts + +## arXiv + +- **Base URL**: `http://export.arxiv.org/api/query` +- **Rate limit**: Be nice, ~3 sec between requests +- **No auth required** +- **Returns**: XML (Atom feed) +- **Best for**: Preprints, recent work not yet published + +## CrossRef + +- **Base URL**: `https://api.crossref.org/` +- **Rate limit**: 50 req/sec with polite pool (include `mailto` header) +- **No auth required** +- **Best for**: DOI lookup, published works, metadata enrichment + +## Zotero Integration + +After finding papers via external search, use Zotero MCP tools: + +1. `zotero_add_by_doi` — Add paper by DOI (fetches metadata from CrossRef) +2. `zotero_add_by_url` — Add paper by URL (arXiv, DOI URLs) +3. `zotero_update_search_database` — Update semantic search index after adding diff --git a/agentic/cpp/skills/paper-search/scripts/search_papers.py b/agentic/cpp/skills/paper-search/scripts/search_papers.py new file mode 100755 index 0000000..c65351d --- /dev/null +++ b/agentic/cpp/skills/paper-search/scripts/search_papers.py @@ -0,0 +1,333 @@ +#!/usr/bin/env python3 +"""Search external APIs for academic papers. + +Sources: Semantic Scholar, arXiv, CrossRef. +Outputs unified JSON to stdout. + +Usage: + python3 search_papers.py --query "wavelet tree succinct" --source semantic_scholar --limit 10 + python3 search_papers.py --query "succinct data structures" --source arxiv --limit 5 + python3 search_papers.py --doi "10.1145/123" --source crossref + python3 search_papers.py --query "rank select" --source all --limit 5 + python3 search_papers.py --query "wavelet tree" --source arxiv --limit 1 --download ~/papers + python3 search_papers.py --doi "10.1007/978-3-540-73420-8_13" --download ~/papers --zotero +""" + +import argparse +import json +import os +import re +import sys +import time +import urllib.error +import urllib.parse +import urllib.request +from pathlib import Path +from typing import Any + + +def _get(url: str, headers: dict[str, str] | None = None, timeout: int = 30, + retries: int = 2) -> dict: + for attempt in range(retries + 1): + req = urllib.request.Request(url, headers=headers or {}) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + return json.loads(resp.read().decode()) + except urllib.error.HTTPError as e: + if e.code == 429 and attempt < retries: + wait = 2 ** attempt + print(f"Rate limited, retrying in {wait}s...", file=sys.stderr) + time.sleep(wait) + continue + print(f"HTTP {e.code}: {e.reason} for {url}", file=sys.stderr) + return {} + except urllib.error.URLError as e: + print(f"URL error: {e.reason} for {url}", file=sys.stderr) + return {} + return {} + + +def search_semantic_scholar(query: str, limit: int = 10) -> list[dict[str, Any]]: + """Search Semantic Scholar API.""" + params = urllib.parse.urlencode({ + "query": query, + "limit": limit, + "fields": "title,authors,year,abstract,externalIds,venue,publicationDate,citationCount,url,openAccessPdf", + }) + url = f"https://api.semanticscholar.org/graph/v1/paper/search?{params}" + data = _get(url, headers={"Accept": "application/json"}) + results = [] + for paper in data.get("data", []): + ext_ids = paper.get("externalIds") or {} + pdf_info = paper.get("openAccessPdf") or {} + results.append({ + "source": "semantic_scholar", + "title": paper.get("title", ""), + "authors": [a.get("name", "") for a in paper.get("authors", [])], + "year": paper.get("year"), + "abstract": paper.get("abstract", ""), + "doi": ext_ids.get("DOI"), + "arxiv_id": ext_ids.get("ArXiv"), + "venue": paper.get("venue", ""), + "citation_count": paper.get("citationCount"), + "url": paper.get("url", ""), + "pdf_url": pdf_info.get("url"), + }) + return results + + +def search_arxiv(query: str, limit: int = 10) -> list[dict[str, Any]]: + """Search arXiv API.""" + words = query.split() + if len(words) == 1: + search_term = f"all:{query}" + elif len(words) == 2: + # Phrase search for 2-word queries + search_term = f'all:"{query}"' + else: + # Use OR of phrase and individual terms for 3+ words + # This catches exact phrase matches AND papers with all terms + phrase = f'all:"{query}"' + and_terms = " AND ".join(f"all:{w}" for w in words) + search_term = f"({phrase}) OR ({and_terms})" + params = urllib.parse.urlencode({ + "search_query": search_term, + "start": 0, + "max_results": limit, + }) + url = f"http://export.arxiv.org/api/query?{params}" + req = urllib.request.Request(url) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + xml_data = resp.read().decode() + except (urllib.error.URLError, urllib.error.HTTPError) as e: + print(f"arXiv API error: {e}", file=sys.stderr) + return [] + + import xml.etree.ElementTree as ET + root = ET.fromstring(xml_data) + ns = {"atom": "http://www.w3.org/2005/Atom"} + results = [] + for entry in root.findall("atom:entry", ns): + title = entry.findtext("atom:title", "", ns).strip().replace("\n", " ") + abstract = entry.findtext("atom:summary", "", ns).strip().replace("\n", " ") + authors = [a.findtext("atom:name", "", ns) for a in entry.findall("atom:author", ns)] + published = entry.findtext("atom:published", "", ns) + year = int(published[:4]) if published else None + arxiv_id = "" + for link in entry.findall("atom:link", ns): + href = link.get("href", "") + if "arxiv.org/abs/" in href: + arxiv_id = href.split("/abs/")[-1] + break + results.append({ + "source": "arxiv", + "title": title, + "authors": authors, + "year": year, + "abstract": abstract, + "doi": None, + "arxiv_id": arxiv_id, + "venue": "arXiv", + "citation_count": None, + "url": f"https://arxiv.org/abs/{arxiv_id}" if arxiv_id else "", + "pdf_url": f"https://arxiv.org/pdf/{arxiv_id}" if arxiv_id else None, + }) + return results + + +def search_crossref(query: str, limit: int = 10) -> list[dict[str, Any]]: + """Search CrossRef API.""" + params = urllib.parse.urlencode({ + "query": query, + "rows": limit, + "select": "DOI,title,author,published-print,abstract,container-title,is-referenced-by-count,URL,type,link", + }) + url = f"https://api.crossref.org/works?{params}" + data = _get(url, headers={"Accept": "application/json"}) + results = [] + for item in data.get("message", {}).get("items", []): + title_list = item.get("title", []) + title = title_list[0] if title_list else "" + authors = [] + for a in item.get("author", []): + name = f"{a.get('given', '')} {a.get('family', '')}".strip() + if name: + authors.append(name) + pub_date = item.get("published-print", {}).get("date-parts", [[None]]) + year = pub_date[0][0] if pub_date and pub_date[0] else None + venue_list = item.get("container-title", []) + venue = venue_list[0] if venue_list else "" + pdf_url = None + for link in item.get("link", []): + if "pdf" in link.get("content-type", ""): + pdf_url = link.get("URL") + break + results.append({ + "source": "crossref", + "title": title, + "authors": authors, + "year": year, + "abstract": item.get("abstract", ""), + "doi": item.get("DOI"), + "arxiv_id": None, + "venue": venue, + "citation_count": item.get("is-referenced-by-count"), + "url": item.get("URL", ""), + "pdf_url": pdf_url, + }) + return results + + +def lookup_doi(doi: str) -> dict[str, Any] | None: + """Look up a single paper by DOI via CrossRef.""" + url = f"https://api.crossref.org/works/{urllib.parse.quote(doi, safe='')}" + data = _get(url) + item = data.get("message") + if not item: + return None + title_list = item.get("title", []) + title = title_list[0] if title_list else "" + authors = [] + for a in item.get("author", []): + name = f"{a.get('given', '')} {a.get('family', '')}".strip() + if name: + authors.append(name) + pub_date = item.get("published-print", {}).get("date-parts", [[None]]) + year = pub_date[0][0] if pub_date and pub_date[0] else None + venue_list = item.get("container-title", []) + venue = venue_list[0] if venue_list else "" + pdf_url = None + for link in item.get("link", []): + if "pdf" in link.get("content-type", ""): + pdf_url = link.get("URL") + break + return { + "source": "crossref", + "title": title, + "authors": authors, + "year": year, + "abstract": item.get("abstract", ""), + "doi": item.get("DOI"), + "arxiv_id": None, + "venue": venue, + "citation_count": item.get("is-referenced-by-count"), + "url": item.get("URL", ""), + "pdf_url": pdf_url, + } + + +SOURCES = { + "semantic_scholar": search_semantic_scholar, + "arxiv": search_arxiv, + "crossref": search_crossref, +} + + +def _sanitize_filename(title: str) -> str: + """Generate a clean filename from paper title.""" + name = re.sub(r'[^\w\s-]', '', title.lower()) + name = re.sub(r'[\s]+', '_', name.strip()) + return name[:80] + + +def download_pdf(url: str, output_dir: str, paper: dict[str, Any]) -> str | None: + """Download a PDF and return the local path.""" + filename = _sanitize_filename(paper.get("title", "paper")) + ".pdf" + output_path = Path(output_dir) / filename + output_path.parent.mkdir(parents=True, exist_ok=True) + + req = urllib.request.Request(url, headers={ + "User-Agent": "Mozilla/5.0 (academic paper-search script)" + }) + try: + with urllib.request.urlopen(req, timeout=60) as resp: + content_type = resp.headers.get("Content-Type", "") + if "pdf" not in content_type and "octet-stream" not in content_type: + print(f"Warning: unexpected content type '{content_type}' for {url}", + file=sys.stderr) + with open(output_path, "wb") as f: + f.write(resp.read()) + print(f"Downloaded: {output_path}", file=sys.stderr) + return str(output_path) + except (urllib.error.URLError, urllib.error.HTTPError) as e: + print(f"Download failed for {url}: {e}", file=sys.stderr) + return None + + +def main(): + parser = argparse.ArgumentParser(description="Search for academic papers") + parser.add_argument("--query", "-q", help="Search query") + parser.add_argument("--doi", help="Look up a specific DOI") + parser.add_argument("--source", "-s", default="semantic_scholar", + choices=["semantic_scholar", "arxiv", "crossref", "all"], + help="API source (default: semantic_scholar)") + parser.add_argument("--limit", "-n", type=int, default=10, + help="Max results per source (default: 10)") + parser.add_argument("--format", "-f", default="json", + choices=["json", "compact"], + help="Output format (default: json)") + parser.add_argument("--download", "-d", metavar="DIR", + help="Download PDFs to DIR (requires pdf_url in results)") + parser.add_argument("--zotero", "-z", action="store_true", + help="Download PDFs and output paths for Zotero import (implies --download)") + args = parser.parse_args() + + if not args.query and not args.doi: + parser.error("Either --query or --doi is required") + + if args.zotero and not args.download: + args.download = "." + + results = [] + if args.doi: + paper = lookup_doi(args.doi) + if paper: + results.append(paper) + elif args.source == "all": + for name, func in SOURCES.items(): + try: + results.extend(func(args.query, args.limit)) + except Exception as e: + print(f"Error searching {name}: {e}", file=sys.stderr) + time.sleep(1) + else: + results = SOURCES[args.source](args.query, args.limit) + + if args.download: + for r in results: + pdf_url = r.get("pdf_url") + if pdf_url: + path = download_pdf(pdf_url, args.download, r) + r["downloaded_path"] = path + else: + r["downloaded_path"] = None + + if args.format == "json": + print(json.dumps(results, indent=2)) + else: + for i, r in enumerate(results, 1): + authors = ", ".join(r["authors"][:3]) + if len(r["authors"]) > 3: + authors += " et al." + doi_str = f" DOI: {r['doi']}" if r.get("doi") else "" + arxiv_str = f" arXiv: {r['arxiv_id']}" if r.get("arxiv_id") else "" + cite_str = f" Citations: {r['citation_count']}" if r.get("citation_count") else "" + pdf_str = f" PDF: {r['pdf_url']}" if r.get("pdf_url") else " PDF: N/A" + dl_str = "" + if r.get("downloaded_path"): + dl_str = f" Downloaded: {r['downloaded_path']}" + print(f"[{i}] {r['title']}") + print(f" {authors} ({r.get('year', '?')}) — {r.get('venue', '')}") + print(f" {r.get('url', '')}{doi_str}{arxiv_str}{cite_str}") + print(f" {pdf_str}{dl_str}") + if r.get("abstract"): + abstract = r["abstract"][:200] + if len(r["abstract"]) > 200: + abstract += "..." + print(f" {abstract}") + print() + + +if __name__ == "__main__": + main() diff --git a/agentic/cpp/skills/pdf/SKILL.md b/agentic/cpp/skills/pdf/SKILL.md new file mode 100644 index 0000000..ddbce00 --- /dev/null +++ b/agentic/cpp/skills/pdf/SKILL.md @@ -0,0 +1,112 @@ +--- +name: pdf +description: Process PDF files - extract text, create PDFs, merge documents. Use when user asks to read PDF, create PDF, or work with PDF files. +--- + +# PDF Processing Skill + +You now have expertise in PDF manipulation. Follow these workflows: + +## Reading PDFs + +**Option 1: Quick text extraction (preferred)** +```bash +# Using pdftotext (poppler-utils) +pdftotext input.pdf - # Output to stdout +pdftotext input.pdf output.txt # Output to file + +# If pdftotext not available, try: +python3 -c " +import fitz # PyMuPDF +doc = fitz.open('input.pdf') +for page in doc: + print(page.get_text()) +" +``` + +**Option 2: Page-by-page with metadata** +```python +import fitz # pip install pymupdf + +doc = fitz.open("input.pdf") +print(f"Pages: {len(doc)}") +print(f"Metadata: {doc.metadata}") + +for i, page in enumerate(doc): + text = page.get_text() + print(f"--- Page {i+1} ---") + print(text) +``` + +## Creating PDFs + +**Option 1: From Markdown (recommended)** +```bash +# Using pandoc +pandoc input.md -o output.pdf + +# With custom styling +pandoc input.md -o output.pdf --pdf-engine=xelatex -V geometry:margin=1in +``` + +**Option 2: Programmatically** +```python +from reportlab.lib.pagesizes import letter +from reportlab.pdfgen import canvas + +c = canvas.Canvas("output.pdf", pagesize=letter) +c.drawString(100, 750, "Hello, PDF!") +c.save() +``` + +**Option 3: From HTML** +```bash +# Using wkhtmltopdf +wkhtmltopdf input.html output.pdf + +# Or with Python +python3 -c " +import pdfkit +pdfkit.from_file('input.html', 'output.pdf') +" +``` + +## Merging PDFs + +```python +import fitz + +result = fitz.open() +for pdf_path in ["file1.pdf", "file2.pdf", "file3.pdf"]: + doc = fitz.open(pdf_path) + result.insert_pdf(doc) +result.save("merged.pdf") +``` + +## Splitting PDFs + +```python +import fitz + +doc = fitz.open("input.pdf") +for i in range(len(doc)): + single = fitz.open() + single.insert_pdf(doc, from_page=i, to_page=i) + single.save(f"page_{i+1}.pdf") +``` + +## Key Libraries + +| Task | Library | Install | +|------|---------|---------| +| Read/Write/Merge | PyMuPDF | `pip install pymupdf` | +| Create from scratch | ReportLab | `pip install reportlab` | +| HTML to PDF | pdfkit | `pip install pdfkit` + wkhtmltopdf | +| Text extraction | pdftotext | `brew install poppler` / `apt install poppler-utils` | + +## Best Practices + +1. **Always check if tools are installed** before using them +2. **Handle encoding issues** - PDFs may contain various character encodings +3. **Large PDFs**: Process page by page to avoid memory issues +4. **OCR for scanned PDFs**: Use `pytesseract` if text extraction returns empty diff --git a/agentic/cpp/skills/setup-cpp-repo/SKILL.md b/agentic/cpp/skills/setup-cpp-repo/SKILL.md new file mode 100644 index 0000000..83201a4 --- /dev/null +++ b/agentic/cpp/skills/setup-cpp-repo/SKILL.md @@ -0,0 +1,136 @@ +--- +name: setup-cpp-repo +description: Scaffold a new C++20 repository with CMake, Google Test, Google Benchmark, CI workflows, Doxygen docs, and Chromium code style. Use when the user asks to create a new C++ project, set up a C++ library, or initialize a C++ repository with modern tooling. +--- + +# setup-cpp-repo + +## Overview + +This skill generates a complete modern C++20 project scaffold. The generated +repository is header-only by default and includes: + +- CMake build system with presets +- Google Test for unit testing +- Google Benchmark for performance benchmarks +- Doxygen documentation with doxygen-awesome-css theme +- GitHub Actions CI workflows (ASan, lint, coverage, docs) +- Chromium C++ code style via `.clang-format` +- `AGENTS.md` for AI coding assistant guidelines + +## When to Use This Skill + +Use this skill when: +- The user wants to create a new C++ library or project from scratch +- The user asks for a "C++ project template" or "C++ repo setup" +- The user needs CMake + Google Test + benchmark scaffolding +- The user wants header-only library conventions with optional SIMD-oriented + build flags and Doxygen docs + +Do **not** use this skill when: +- Working with an existing codebase (use the `cmake` skill instead) +- The project is not C++ (use a different skill) +- The user only wants a single file or snippet + +## Workflow + +### Step 1: Gather Parameters + +Ask the user for (or infer from context): +- **Project name** (required): Hyphenated lowercase identifier, e.g., `my-lib` +- **Namespace** (optional): C++ namespace. Defaults to project name with hyphens removed, e.g., `mylib` +- **Output directory** (optional): Where to create the project. Defaults to current directory. + +### Step 2: Run the Generator + +Execute the generation script: + +```bash +python3 agentic/cpp/skills/setup-cpp-repo/scripts/init_cpp_project.py \ + --name \ + [--namespace ] \ + [--output-dir ] +``` + +For concrete examples, check +`agentic/local/cpp/skills/setup-cpp-repo/EXAMPLES.md` when present. + +### Step 3: Verify the Scaffold + +After generation, the project structure should look like: + +``` +/ +├── CMakeLists.txt +├── CMakePresets.json +├── .clang-format +├── .gitignore +├── README.md +├── AGENTS.md +├── include/ +│ └── / +│ └── .hpp +├── src/ +│ ├── tests/ +│ │ └── unittests.cpp +│ ├── benchmarks/ +│ │ └── benchmarks.cpp +│ └── docs/ +│ ├── Doxyfile.in +│ └── images/ +├── scripts/ +│ └── coverage_report.sh +└── .github/ + └── workflows/ + ├── build-test.yml + ├── linter.yml + ├── coverage.yml + └── doxygen.yml +``` + +### Step 4: Initial Build and Test + +Change into the project directory and run an initial build to verify everything works: + +```bash +cd +cmake --preset release +cmake --build --preset release -j +./build/release/unittests +``` + +If the build and tests pass, the scaffold is ready. + +### Step 5: Hand Off to cmake Skill + +After project creation, use the **`cmake` skill** (`../cmake/SKILL.md`) for all subsequent build operations. The `cmake` skill documents: +- Build directory conventions with git short-hash suffixes +- How to replicate preset settings with custom build directories +- AddressSanitizer, coverage, and benchmark workflows +- Best practices for out-of-source builds + +## Customization Guide + +### Adding More Test Executables + +Edit `CMakeLists.txt` and add new `add_executable` blocks under the `if(_TESTS)` section, following the pattern of the existing `unittests` target. + +Update `scripts/coverage_report.sh` to run any new test binaries. + +Update `.github/workflows/build-test.yml` to execute new test binaries in CI. + +### Adding More Benchmark Executables + +Edit `CMakeLists.txt` and add new `add_executable` blocks under the `if(_BENCHMARKS)` section, following the pattern of the existing `benchmarks` target. + +### Adding Third-Party Dependencies + +For header-only libraries, prefer `FetchContent` in `CMakeLists.txt`. For compiled libraries, consider vendoring or using a package manager (Conan, vcpkg). + +### Modifying Doxygen Configuration + +Edit `src/docs/Doxyfile.in`. The generated version is intentionally minimal (only non-default settings). Add or override settings as needed. Run `doxygen -g` to see all available options. + +## Reference + +See `references/project_structure.md` for a detailed breakdown of every generated file and its purpose. diff --git a/agentic/cpp/skills/setup-cpp-repo/references/project_structure.md b/agentic/cpp/skills/setup-cpp-repo/references/project_structure.md new file mode 100644 index 0000000..6bf3236 --- /dev/null +++ b/agentic/cpp/skills/setup-cpp-repo/references/project_structure.md @@ -0,0 +1,117 @@ +# Generated Project Structure Reference + +This document describes every file and directory generated by `init_cpp_project.py` and its purpose. + +## Root Files + +### `CMakeLists.txt` +Main CMake configuration. Defines: +- C++20 standard requirements +- `MARCH` cache variable (defaults to `native`) +- Optional SIMD fallback flag when the generated project enables SIMD-specific + code paths +- `ENABLE_ADDRESS_SANITIZER` option for ASan builds +- `_COVERAGE` option for gcov instrumentation +- Build options: `_TESTS`, `_BENCHMARKS`, `_DIAGNOSTICS`, `_DOCS` +- FetchContent dependencies: Google Test, Google Benchmark, spdlog (diagnostics only), Doxygen theme +- Test executable: `unittests` +- Benchmark executable: `benchmarks` +- Custom target: `docs` (when Doxygen is enabled) + +### `CMakePresets.json` +CMake presets (version 4) with a hidden `base` preset. Defines presets for: +- `debug` — Debug build +- `release` — Release build +- `benchmarks` — Release with benchmarks enabled +- `benchmarks-diagnostic` — RelWithDebInfo with diagnostics and libpfm +- `docs` — Documentation build +- `coverage` — Debug with coverage instrumentation +- `asan` — Debug with AddressSanitizer + +### `.clang-format` +Chromium-based C++ formatting configuration. Simplified from the full Chromium style by removing Windows-specific include priorities and IPC macro block definitions. Key settings: +- `BasedOnStyle: Chromium` +- `Standard: Cpp11` +- `InsertBraces: true` +- `InsertNewlineAtEOF: true` +- `IncludeBlocks: Regroup` with generic priority categories + +### `.gitignore` +Standard C++ project ignores: +- `build/`, `.vscode/`, `Testing/` +- `plans/*`, `venv/`, `docs/*` +- `CMakeUserPresets.json` +- `_deps/`, gcov outputs (`*.gcda`, `*.gcno`, `*.gcov`) + +### `README.md` +Minimal project README used as the Doxygen main page. + +### `AGENTS.md` +Project documentation for AI coding assistants. Contains: +- Project overview and architecture conventions +- Technology stack (C++20, CMake, Google Test, Google Benchmark) +- Build commands with all CMake options +- Testing patterns and style guidelines +- Common tasks for AI agents (adding components, modifying SIMD code, adding tests) +- Performance philosophy + +## Directories + +### `include//` +Header-only library API. Contains a placeholder header (`.hpp`) with: +- Doxygen file documentation +- Example function in the project's namespace +- `#pragma once` guard + +### `src/tests/` +Unit test scaffold. Contains `unittests.cpp` with: +- Google Test includes +- Basic assertion test against the placeholder header +- `gtest_main` supplies the test runner entry point + +### `src/benchmarks/` +Benchmark scaffold. Contains `benchmarks.cpp` with: +- Google Benchmark includes +- Example benchmark using `benchmark::DoNotOptimize` +- `BENCHMARK_MAIN()` macro + +### `src/docs/` +Doxygen configuration. Contains: +- `Doxyfile.in` — Trimmed Doxygen config (~300 lines vs. 1100+ in full). Only non-default settings are specified. Key templated values: + - `PROJECT_NAME` + - `INPUT` (points to `include/` and `README.md`) + - `STRIP_FROM_PATH` (strips source dir from file paths) + - `IMAGE_PATH` + - `HTML_EXTRA_STYLESHEET` (doxygen-awesome-css) + - `USE_MDFILE_AS_MAINPAGE` +- `images/` — Empty directory for documentation images + +### `scripts/` +Utility scripts. Contains: +- `coverage_report.sh` — Runs the `coverage` CMake preset, executes tests, and generates gcov reports. Excludes `_deps/`, `third_party/`, and `src/benchmarks/` from coverage. + +### `.github/workflows/` +CI/CD workflows: + +#### `build-test.yml` +Builds the project with AddressSanitizer and runs unit tests on `ubuntu-latest`. Triggered on pushes and PRs to `main`. + +#### `linter.yml` +Runs `clang-format --dry-run --Werror` on all C/C++ files. Triggered on pushes to `main` and all PRs. + +#### `coverage.yml` +Runs the coverage script and uploads results to Codecov. Also uploads coverage artifacts. Triggered on pushes and PRs to `main`. + +#### `doxygen.yml` +Installs Doxygen, builds documentation with the `docs` preset, and deploys HTML output to GitHub Pages. Triggered on pushes to `main` and manual dispatch. + +## Template Substitution + +All generated files use these placeholders, replaced by the script: + +| Placeholder | Example input | Example output | +|-------------|---------------|----------------| +| `{{PROJECT_NAME}}` | `my-lib` | `my-lib` | +| `{{NAMESPACE}}` | `mylib` | `mylib` | +| `{{PROJECT_NAME_UPPER}}` | `MY_LIB` | `MY_LIB` | +| `{{HEADER_NAME}}` | `my_lib.hpp` | `my_lib.hpp` | diff --git a/agentic/cpp/skills/setup-cpp-repo/scripts/init_cpp_project.py b/agentic/cpp/skills/setup-cpp-repo/scripts/init_cpp_project.py new file mode 100755 index 0000000..d76624f --- /dev/null +++ b/agentic/cpp/skills/setup-cpp-repo/scripts/init_cpp_project.py @@ -0,0 +1,1053 @@ +#!/usr/bin/env python3 +""" +init_cpp_project.py - Scaffold a new C++20 repository following modern C++ conventions. + +Usage: + init_cpp_project.py --name [--namespace ] [--output-dir ] + +Example: + init_cpp_project.py --name my-lib --namespace mylib --output-dir . +""" + +import argparse +import os +import sys +from pathlib import Path + + +# --------------------------------------------------------------------------- +# Helper functions +# --------------------------------------------------------------------------- + +def to_upper(name: str) -> str: + """Convert project name to uppercase with underscores.""" + return name.replace("-", "_").upper() + + +def to_snake(name: str) -> str: + """Convert project name to snake_case for filenames.""" + return name.replace("-", "_") + + +# --------------------------------------------------------------------------- +# Templates +# --------------------------------------------------------------------------- + +CMAKE_LISTS_TXT = """cmake_minimum_required(VERSION 3.18) +project({{PROJECT_NAME}}) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +set(MARCH "native" CACHE STRING "march compiler flag") +add_compile_options("-march=${MARCH}") +message(STATUS "MARCH is '${MARCH}'") + +option({{PROJECT_NAME_UPPER}}_DISABLE_WIDE_SIMD "Disable wide SIMD instructions" OFF) +if({{PROJECT_NAME_UPPER}}_DISABLE_WIDE_SIMD) + add_compile_options("-mno-avx512f") + message(STATUS "{{PROJECT_NAME_UPPER}}_DISABLE_WIDE_SIMD is ON") +endif() + +option(ENABLE_ADDRESS_SANITIZER "Enable AddressSanitizer" OFF) +if(ENABLE_ADDRESS_SANITIZER) + add_compile_options(-fsanitize=address -fno-omit-frame-pointer) + add_link_options(-fsanitize=address) + message(STATUS "AddressSanitizer is ON") +endif() + +option({{PROJECT_NAME_UPPER}}_COVERAGE "Enable coverage instrumentation" OFF) +if({{PROJECT_NAME_UPPER}}_COVERAGE) + add_compile_options(-O0 -g --coverage) + add_link_options(--coverage) + message(STATUS "Coverage instrumentation is ON") +endif() + +# --------------------------------------------------------------------------- +# Build options +# --------------------------------------------------------------------------- +option({{PROJECT_NAME_UPPER}}_TESTS "Build unit tests" ON) +option({{PROJECT_NAME_UPPER}}_BENCHMARKS "Build benchmarks" OFF) +option({{PROJECT_NAME_UPPER}}_DIAGNOSTICS "Include diagnostic logs" OFF) +option({{PROJECT_NAME_UPPER}}_DOCS "Build Doxygen documentation" OFF) + +if({{PROJECT_NAME_UPPER}}_DIAGNOSTICS) + add_compile_definitions({{PROJECT_NAME_UPPER}}_DIAGNOSTICS) + set({{PROJECT_NAME_UPPER}}_DIAGNOSTICS_LIBS spdlog::spdlog_header_only) +endif() + +# --------------------------------------------------------------------------- +# Dependencies (fetched only when needed) +# --------------------------------------------------------------------------- +include(FetchContent) + +if({{PROJECT_NAME_UPPER}}_DIAGNOSTICS) + set(SPDLOG_BUILD_SHARED OFF CACHE BOOL "" FORCE) + set(SPDLOG_BUILD_EXAMPLE OFF CACHE BOOL "" FORCE) + set(SPDLOG_BUILD_TESTING OFF CACHE BOOL "" FORCE) + set(SPDLOG_INSTALL OFF CACHE BOOL "" FORCE) + FetchContent_Declare( + spdlog + GIT_REPOSITORY https://github.com/gabime/spdlog.git + GIT_TAG v1.14.1 + ) + FetchContent_MakeAvailable(spdlog) +endif() + +if({{PROJECT_NAME_UPPER}}_BENCHMARKS) + FetchContent_Declare( + googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.9.4 + ) + set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable Google Benchmark tests") + FetchContent_MakeAvailable(googlebenchmark) +endif() + +if({{PROJECT_NAME_UPPER}}_TESTS) + if(NOT TARGET gtest_main) + FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG v1.17.0 + ) + FetchContent_MakeAvailable(googletest) + endif() + include(GoogleTest) +endif() + +# --------------------------------------------------------------------------- +# Unit tests +# --------------------------------------------------------------------------- +if({{PROJECT_NAME_UPPER}}_TESTS) + enable_testing() + + add_executable(unittests + src/tests/unittests.cpp) + target_include_directories(unittests + PUBLIC include) + target_link_libraries(unittests + gtest_main + ${{{PROJECT_NAME_UPPER}}_DIAGNOSTICS_LIBS}) + gtest_discover_tests(unittests) +endif() + +# --------------------------------------------------------------------------- +# Benchmarks +# --------------------------------------------------------------------------- +if({{PROJECT_NAME_UPPER}}_BENCHMARKS) + add_executable(benchmarks + src/benchmarks/benchmarks.cpp) + target_include_directories(benchmarks + PUBLIC include) + target_link_libraries(benchmarks + benchmark + benchmark_main + ${{{PROJECT_NAME_UPPER}}_DIAGNOSTICS_LIBS}) +endif() + +# --------------------------------------------------------------------------- +# Documentation (Doxygen) +# --------------------------------------------------------------------------- +if({{PROJECT_NAME_UPPER}}_DOCS) + find_package(Doxygen REQUIRED) + + FetchContent_Declare( + doxygen-awesome-css + URL https://github.com/jothepro/doxygen-awesome-css/archive/refs/heads/main.zip + ) + FetchContent_MakeAvailable(doxygen-awesome-css) + + FetchContent_GetProperties(doxygen-awesome-css SOURCE_DIR AWESOME_CSS_DIR) + + set(DOXYFILE_IN ${CMAKE_CURRENT_SOURCE_DIR}/src/docs/Doxyfile.in) + set(DOXYFILE_OUT ${CMAKE_CURRENT_BINARY_DIR}/docs/Doxyfile) + configure_file(${DOXYFILE_IN} ${DOXYFILE_OUT} @ONLY) + + add_custom_target(docs + COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYFILE_OUT} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Generating API documentation with Doxygen" + VERBATIM) +endif() +""" + +CMAKE_PRESETS_JSON = """{ + "version": 4, + "cmakeMinimumRequired": { + "major": 3, + "minor": 18, + "patch": 0 + }, + "configurePresets": [ + { + "name": "base", + "hidden": true, + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON" + } + }, + { + "name": "debug", + "displayName": "Debug", + "inherits": "base", + "binaryDir": "${sourceDir}/build/debug", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Debug" + } + }, + { + "name": "release", + "displayName": "Release", + "inherits": "base", + "binaryDir": "${sourceDir}/build/release", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Release" + } + }, + { + "name": "benchmarks", + "displayName": "Benchmarks", + "inherits": "base", + "binaryDir": "${sourceDir}/build/benchmarks", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Release", + "{{PROJECT_NAME_UPPER}}_BENCHMARKS": "ON" + } + }, + { + "name": "benchmarks-diagnostic", + "displayName": "Benchmarks diagnostic build", + "inherits": "base", + "binaryDir": "${sourceDir}/build/release-with-deb", + "cacheVariables": { + "BENCHMARK_ENABLE_LIBPFM": "ON", + "CMAKE_BUILD_TYPE": "RelWithDebInfo", + "{{PROJECT_NAME_UPPER}}_DIAGNOSTICS": "ON", + "{{PROJECT_NAME_UPPER}}_BENCHMARKS": "ON" + } + }, + { + "name": "docs", + "displayName": "Docs", + "inherits": "base", + "binaryDir": "${sourceDir}/build/docs", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Release", + "{{PROJECT_NAME_UPPER}}_DOCS": "ON" + } + }, + { + "name": "coverage", + "displayName": "Coverage", + "inherits": "base", + "binaryDir": "${sourceDir}/build/coverage", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Debug", + "{{PROJECT_NAME_UPPER}}_BENCHMARKS": "OFF", + "{{PROJECT_NAME_UPPER}}_COVERAGE": "ON" + } + }, + { + "name": "asan", + "displayName": "AddressSanitizer", + "inherits": "base", + "binaryDir": "${sourceDir}/build/asan", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Debug", + "{{PROJECT_NAME_UPPER}}_BENCHMARKS": "OFF", + "ENABLE_ADDRESS_SANITIZER": "ON" + } + } + ], + "buildPresets": [ + { + "name": "debug", + "displayName": "Build Debug", + "configurePreset": "debug" + }, + { + "name": "release", + "displayName": "Build Release", + "configurePreset": "release" + }, + { + "name": "benchmarks", + "displayName": "Build Benchmarks", + "configurePreset": "benchmarks" + }, + { + "name": "benchmarks-diagnostic", + "displayName": "Benchmarks diagnostic", + "configurePreset": "benchmarks-diagnostic" + }, + { + "name": "docs", + "displayName": "Build Docs", + "configurePreset": "docs", + "targets": [ + "docs" + ] + }, + { + "name": "coverage", + "displayName": "Build Coverage", + "configurePreset": "coverage" + }, + { + "name": "asan", + "displayName": "Build AddressSanitizer", + "configurePreset": "asan" + } + ] +} +""" + +CLANG_FORMAT = """# Defines the Chromium style for automatic reformatting. +# http://clang.llvm.org/docs/ClangFormatStyleOptions.html +BasedOnStyle: Chromium +# This defaults to 'Auto'. Explicitly set it for a while, so that +# 'vector >' in existing files gets formatted to +# 'vector>'. ('Auto' means that clang-format will only use +# 'int>>' if the file already contains at least one such instance.) +Standard: Cpp11 + +# TODO(crbug.com/1392808): Remove when InsertBraces has been upstreamed into +# the Chromium style (is implied by BasedOnStyle: Chromium). +InsertBraces: true +InsertNewlineAtEOF: true + +# Sort #includes by following +# https://google.github.io/styleguide/cppguide.html#Names_and_Order_of_Includes +IncludeBlocks: Regroup +IncludeCategories: + # C system headers. + - Regex: '^<.*\\.h>' + Priority: 1 + # C++ standard library headers. + - Regex: '^<.*>' + Priority: 2 + # Project headers (quoted includes). + - Regex: '^".*"' + Priority: 3 + # Other libraries. + - Regex: '.*' + Priority: 4 +""" + +GITIGNORE = """build/ +.vscode/ +Testing/ +plans/* +venv/ +docs/* +src/docs/presentations/* +CMakeUserPresets.json +_deps/ +*.gcda +*.gcno +*.gcov +""" + +README_MD = """# {{PROJECT_NAME}} + +{{PROJECT_NAME}} is a C++20 header-only library. + +## Build + +```bash +cmake --preset release +cmake --build --preset release -j +./build/release/unittests +``` +""" + +AGENTS_MD = """# AGENTS.md - AI Coding Assistant Guidelines for {{PROJECT_NAME}} + +## Project Overview + +{{PROJECT_NAME}} is a **C++20 header-only library**. It provides [TODO: brief description]. + +## Skills + +Shared C++ agent skills live in `agentic/cpp/skills` when this repository +vendors the shared skills subtree. Project-specific examples live in +`agentic/local/cpp/skills`. + +## Architecture + +### Project Layout Conventions + +- **`include/`**: Header-only library API (all implementations here, no `.cpp` files) +- **`src/*_tests.cpp`**: Unit tests (Google Test) +- **`src/*_benchmarks.cpp`**: Performance benchmarks (Google Benchmark) +- **`src/docs/`**: Doxygen configuration + +### Key Design Decisions + +1. **Header-only library**: All code in `include/`; no compiled library. +2. **Non-owning spans**: Use `std::span` for external data where appropriate. +3. **SIMD conditional compilation**: Use `#ifdef {{PROJECT_NAME_UPPER}}_AVX512_SUPPORT` / `{{PROJECT_NAME_UPPER}}_AVX2_SUPPORT` with scalar fallbacks. +4. **Target domain**: Optimized for practical data sizes. +5. **Platform**: Linux/Unix is the primary target platform. + +### Why Header-Only? + +- **SIMD flexibility**: Users compile with their target `-march` flags. +- **Better inlining**: Compiler sees full implementation. +- **No ABI issues**: Works across compilers and standard library versions. +- **Easy integration**: Users just `#include` headers. +- **Template-friendly**: No explicit instantiation needed. + +## Technology Stack + +- **Language**: C++20 (required features: `std::span`, `std::popcount`, ``) +- **Build**: CMake >= 3.18 +- **Testing**: Google Test v1.17.0 +- **Benchmarking**: Google Benchmark v1.9.4 +- **SIMD**: AVX-512 (primary), AVX2 (fallback), scalar fallbacks +- **Style**: Chromium C++ style (`.clang-format`) + +### Dependencies + +The library itself is header-only and has **no runtime dependencies**. Build-time dependencies are managed via CMake FetchContent and controlled by options: + +| Option | Default | What it enables | +|--------|---------|-----------------| +| `{{PROJECT_NAME_UPPER}}_TESTS` | `ON` | Unit tests (fetches Google Test) | +| `{{PROJECT_NAME_UPPER}}_BENCHMARKS` | `OFF` | Benchmarks (fetches Google Benchmark) | + +## Build Commands + +```bash +# Standard build (Release) +cmake -B build/release -DCMAKE_BUILD_TYPE=Release +cmake --build build/release -j + +# Debug build +cmake -B build/debug -DCMAKE_BUILD_TYPE=Debug +cmake --build build/debug -j + +# Without wide SIMD +cmake -B build/release -D{{PROJECT_NAME_UPPER}}_DISABLE_WIDE_SIMD=ON +cmake --build build/release -j + +# With AddressSanitizer +cmake -B build/asan -DENABLE_ADDRESS_SANITIZER=ON +cmake --build build/asan -j + +# Custom march flag +cmake -B build/release -DMARCH=icelake-client +cmake --build build/release -j + +# Tests only (no benchmarks) +cmake -B build/release -D{{PROJECT_NAME_UPPER}}_BENCHMARKS=OFF +cmake --build build/release -j +``` + +## Testing + +### Running Tests + +```bash +./build/release/unittests +``` + +### Testing Patterns + +- **Differential testing**: Compare against naive reference implementations. +- **Randomized testing**: Random inputs with configurable seed. +- **Exhaustive short inputs**: Test all patterns for small sizes. + +## Code Style Guidelines + +1. **Formatting**: Run `clang-format` before committing (Chromium style) +2. **Namespace**: All library code in `{{NAMESPACE}}` namespace +3. **Documentation**: Use Doxygen-style comments for public API +4. **Constants**: Use `constexpr` for compile-time values +5. **Alignment**: Be aware of data alignment; prefer 64-byte aligned array allocations where performance matters + +## CI/CD Workflows + +- **build-test.yml**: Builds and runs tests with AddressSanitizer +- **linter.yml**: Clang-format checks on all C/C++ files +- **coverage.yml**: Coverage reporting with codecov upload +- **doxygen.yml**: Documentation generation and GitHub Pages deployment + +## Common Tasks for AI Agents + +### Adding a New Component + +1. Create header in `include/{{NAMESPACE}}/` with Doxygen documentation +2. Add unit tests in `src/tests/_tests.cpp` +3. Add benchmarks in `src/benchmarks/_benchmarks.cpp` +4. Update `CMakeLists.txt` with new executables +5. Run `clang-format` on new files + +### Modifying SIMD Code + +1. Provide implementations for: + - Wide SIMD (`#ifdef {{PROJECT_NAME_UPPER}}_AVX512_SUPPORT`) + - AVX2 (`#ifdef {{PROJECT_NAME_UPPER}}_AVX2_SUPPORT`) + - Scalar fallback +2. Test with `-D{{PROJECT_NAME_UPPER}}_DISABLE_WIDE_SIMD=ON` to verify fallback works +3. Benchmark to ensure performance is maintained + +### Adding Tests + +1. Use Google Test framework +2. Include naive reference implementation for differential testing +3. Add edge cases: empty input, single element, boundary conditions +4. Use random testing with configurable seed for reproducibility + +## Performance Philosophy + +- **Goal**: Best practical performance (not just asymptotic complexity) +- **Approach**: Benchmark-driven optimization using Google Benchmark +- **SIMD**: Leverage vectorized operations where beneficial +- **Cache efficiency**: Align data structures to cache line boundaries (64 bytes) +""" + +DOXYFILE_IN = """# Doxyfile + +DOXYFILE_ENCODING = UTF-8 +PROJECT_NAME = "{{PROJECT_NAME}}" +PROJECT_NUMBER = +PROJECT_BRIEF = +PROJECT_LOGO = +PROJECT_ICON = +OUTPUT_DIRECTORY = docs +CREATE_SUBDIRS = NO +CREATE_SUBDIRS_LEVEL = 8 +ALLOW_UNICODE_NAMES = NO +OUTPUT_LANGUAGE = English +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = YES +STRIP_FROM_PATH = @CMAKE_CURRENT_SOURCE_DIR@ +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = NO +JAVADOC_BANNER = NO +QT_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = NO +PYTHON_DOCSTRING = YES +INHERIT_DOCS = YES +SEPARATE_MEMBER_PAGES = NO +TAB_SIZE = 4 +ALIASES = +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +OPTIMIZE_FOR_FORTRAN = NO +OPTIMIZE_OUTPUT_VHDL = NO +OPTIMIZE_OUTPUT_SLICE = NO +EXTENSION_MAPPING = +MARKDOWN_SUPPORT = YES +MARKDOWN_STRICT = YES +TOC_INCLUDE_HEADINGS = 6 +MARKDOWN_ID_STYLE = DOXYGEN +AUTOLINK_SUPPORT = YES +AUTOLINK_IGNORE_WORDS = +BUILTIN_STL_SUPPORT = NO +CPP_CLI_SUPPORT = NO +SIP_SUPPORT = NO +IDL_PROPERTY_SUPPORT = YES +DISTRIBUTE_GROUP_DOC = NO +GROUP_NESTED_COMPOUNDS = NO +SUBGROUPING = YES +INLINE_GROUPED_CLASSES = NO +INLINE_SIMPLE_STRUCTS = NO +TYPEDEF_HIDES_STRUCT = NO +LOOKUP_CACHE_SIZE = 0 +NUM_PROC_THREADS = 1 +TIMESTAMP = NO +EXTRACT_ALL = NO +EXTRACT_PRIVATE = NO +EXTRACT_PRIV_VIRTUAL = NO +EXTRACT_PACKAGE = NO +EXTRACT_STATIC = NO +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = NO +EXTRACT_ANON_NSPACES = NO +RESOLVE_UNNAMED_PARAMS = YES +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_UNDOC_NAMESPACES = YES +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = SYSTEM +HIDE_SCOPE_NAMES = NO +HIDE_COMPOUND_REFERENCE= NO +SHOW_HEADERFILE = YES +SHOW_INCLUDE_FILES = YES +SHOW_GROUPED_MEMB_INC = NO +FORCE_LOCAL_INCLUDES = NO +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_MEMBERS_CTORS_1ST = NO +SORT_GROUP_NAMES = NO +SORT_BY_SCOPE_NAME = NO +STRICT_PROTO_MATCHING = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = YES +SHOW_FILES = YES +SHOW_NAMESPACES = YES +FILE_VERSION_FILTER = +LAYOUT_FILE = +CITE_BIB_FILES = +EXTERNAL_TOOL_PATH = +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_IF_INCOMPLETE_DOC = YES +WARN_NO_PARAMDOC = NO +WARN_IF_UNDOC_ENUM_VAL = NO +WARN_LAYOUT_FILE = YES +WARN_AS_ERROR = NO +WARN_FORMAT = "$file:$line: $text" +WARN_LINE_FORMAT = "at line $line of file $file" +WARN_LOGFILE = +INPUT = @CMAKE_CURRENT_SOURCE_DIR@/include \ + @CMAKE_CURRENT_SOURCE_DIR@/README.md +INPUT_ENCODING = UTF-8 +INPUT_FILE_ENCODING = +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.h \ + *.hh \ + *.hxx \ + *.hpp +RECURSIVE = YES +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXCLUDE_SYMBOLS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = * +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = @CMAKE_CURRENT_SOURCE_DIR@/src/docs/images +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +FILTER_SOURCE_PATTERNS = +USE_MDFILE_AS_MAINPAGE = @CMAKE_CURRENT_SOURCE_DIR@/README.md +IMPLICIT_DIR_DOCS = YES +FORTRAN_COMMENT_AFTER = 72 +SOURCE_BROWSER = NO +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = NO +REFERENCES_RELATION = NO +REFERENCES_LINK_SOURCE = YES +SOURCE_TOOLTIPS = YES +USE_HTAGS = NO +VERBATIM_HEADERS = YES +CLANG_ASSISTED_PARSING = NO +CLANG_ADD_INC_PATHS = YES +CLANG_OPTIONS = +CLANG_DATABASE_PATH = +ALPHABETICAL_INDEX = YES +IGNORE_PREFIX = +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_EXTRA_STYLESHEET = @AWESOME_CSS_DIR@/doxygen-awesome.css +HTML_EXTRA_FILES = +HTML_COLORSTYLE = AUTO_LIGHT +HTML_COLORSTYLE_HUE = 220 +HTML_COLORSTYLE_SAT = 100 +HTML_COLORSTYLE_GAMMA = 80 +HTML_DYNAMIC_MENUS = YES +HTML_DYNAMIC_SECTIONS = NO +HTML_CODE_FOLDING = YES +HTML_COPY_CLIPBOARD = YES +HTML_PROJECT_COOKIE = +HTML_INDEX_NUM_ENTRIES = 100 +GENERATE_DOCSET = NO +DOCSET_FEEDNAME = "Doxygen generated docs" +DOCSET_FEEDURL = +DOCSET_BUNDLE_ID = org.doxygen.Project +DOCSET_PUBLISHER_ID = org.doxygen.Publisher +DOCSET_PUBLISHER_NAME = Publisher +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +CHM_INDEX_ENCODING = +BINARY_TOC = NO +TOC_EXPAND = NO +SITEMAP_URL = +GENERATE_QHP = NO +QCH_FILE = +QHP_NAMESPACE = org.doxygen.Project +QHP_VIRTUAL_FOLDER = doc +QHP_CUST_FILTER_NAME = +QHP_CUST_FILTER_ATTRS = +QHP_SECT_FILTER_ATTRS = +QHG_LOCATION = +GENERATE_ECLIPSEHELP = NO +ECLIPSE_DOC_ID = org.doxygen.Project +DISABLE_INDEX = NO +GENERATE_TREEVIEW = YES +PAGE_OUTLINE_PANEL = YES +FULL_SIDEBAR = NO +ENUM_VALUES_PER_LINE = 4 +SHOW_ENUM_VALUES = NO +TREEVIEW_WIDTH = 250 +EXT_LINKS_IN_WINDOW = NO +OBFUSCATE_EMAILS = YES +HTML_FORMULA_FORMAT = png +FORMULA_FONTSIZE = 10 +FORMULA_MACROFILE = +USE_MATHJAX = NO +MATHJAX_VERSION = MathJax_2 +MATHJAX_FORMAT = HTML-CSS +MATHJAX_RELPATH = +MATHJAX_EXTENSIONS = +MATHJAX_CODEFILE = +SEARCHENGINE = YES +SERVER_BASED_SEARCH = NO +EXTERNAL_SEARCH = NO +SEARCHENGINE_URL = +SEARCHDATA_FILE = searchdata.xml +EXTERNAL_SEARCH_ID = +EXTRA_SEARCH_MAPPINGS = +GENERATE_LATEX = NO +""" + +COVERAGE_REPORT_SH = """#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +BUILD_DIR="${ROOT_DIR}/build/coverage" + +cmake --preset coverage +cmake --build --preset coverage + +"${BUILD_DIR}/unittests" + +cd "${BUILD_DIR}" +find . -name "*.gcda" > gcov_files.txt +while read -r f; do + case "${f}" in + *"/_deps/"*|*"/third_party/"*|*"/src/benchmarks/"*) + ;; + *) + gcov -pb "${f}" >> coverage.txt + ;; + esac +done < gcov_files.txt +echo "gcov report written to ${BUILD_DIR}/coverage.txt" +""" + +BUILD_TEST_YML = """name: Tests (ASan) + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build-and-test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Create Build Directory + run: mkdir build + + - name: Configure CMake + working-directory: ./build + run: cmake -D{{PROJECT_NAME_UPPER}}_DISABLE_WIDE_SIMD=ON -DENABLE_ADDRESS_SANITIZER=ON -D{{PROJECT_NAME_UPPER}}_BENCHMARKS=OFF .. + + - name: Build Project + working-directory: ./build + run: make -j + + - name: Run Unittests + working-directory: ./build + run: ./unittests +""" + +LINTER_YML = """name: Clang Format Lint + +on: + pull_request: + push: + branches: [main] + +jobs: + clang-format: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install clang-format + run: sudo apt-get update && sudo apt-get install -y clang-format + + - name: Run clang-format check + run: | + mapfile -t FILES < <(find include src -type f \\( -name '*.cpp' -o -name '*.hpp' -o -name '*.cc' -o -name '*.c' -o -name '*.h' \\)) + clang-format --version + if [ ${#FILES[@]} -eq 0 ]; then + echo "No C/C++ files found." + exit 0 + fi + + clang-format --dry-run --Werror "${FILES[@]}" +""" + +COVERAGE_YML = """name: coverage + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + coverage: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Create Build Directory + run: mkdir build + + - name: Run coverage + run: ./scripts/coverage_report.sh + + - name: Upload to Codecov + uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: build/coverage/coverage.txt + flags: gcov + fail_ci_if_error: false + + - name: Upload coverage artifacts + uses: actions/upload-artifact@v4 + with: + name: coverage-gcov + path: | + build/coverage/coverage.txt + build/coverage/*.gcov +""" + +DOXYGEN_YML = """# Simple workflow for deploying static content to GitHub Pages +name: Deploy static content to Pages + +on: + # Runs on pushes targeting the default branch + push: + branches: ["main"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + # Single deploy job since we're just deploying + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Install Doxygen v1.13.2 + run: | + transformed_version=$(echo "1.13.2" | tr '.' '_') + wget https://github.com/doxygen/doxygen/releases/download/Release_${transformed_version}/doxygen-1.13.2.linux.bin.tar.gz + tar -xzf doxygen-1.13.2.linux.bin.tar.gz + sudo mv doxygen-1.13.2/bin/doxygen /usr/local/bin/doxygen + shell: bash + - name: Cmake configure + run: cmake -S ${{github.workspace}} -B ${{github.workspace}}/build -D{{PROJECT_NAME_UPPER}}_DOCS=ON -D{{PROJECT_NAME_UPPER}}_TESTS=OFF -D{{PROJECT_NAME_UPPER}}_BENCHMARKS=OFF + - name: Build docs + run: cmake --build ${{github.workspace}}/build --target docs + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + # Upload entire repository + path: ${{github.workspace}}/build/docs/html + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 +""" + +HEADER_HPP = """#pragma once + +/** + * @file {{HEADER_NAME}} + * @brief Main header for the {{PROJECT_NAME}} library + */ + +namespace {{NAMESPACE}} { + +/** + * @brief Example function. + * + * TODO: Replace with actual library functionality. + */ +inline int example() { + return 42; +} + +} // namespace {{NAMESPACE}} +""" + +UNITTESTS_CPP = """#include + +#include "{{NAMESPACE}}/{{HEADER_NAME}}" + +TEST(ExampleTest, BasicAssertion) { + EXPECT_EQ({{NAMESPACE}}::example(), 42); +} +""" + +BENCHMARKS_CPP = """#include + +#include "{{NAMESPACE}}/{{HEADER_NAME}}" + +static void BM_Example(benchmark::State& state) { + for (auto _ : state) { + benchmark::DoNotOptimize({{NAMESPACE}}::example()); + } +} + +BENCHMARK(BM_Example); + +BENCHMARK_MAIN(); +""" + + +# --------------------------------------------------------------------------- +# Generation logic +# --------------------------------------------------------------------------- + +def generate(args: argparse.Namespace) -> None: + project_name = args.name + namespace = args.namespace or project_name.replace("-", "") + project_name_upper = to_upper(project_name) + header_name = f"{to_snake(project_name)}.hpp" + output_dir = Path(args.output_dir).resolve() / project_name + + if output_dir.exists(): + print(f"Error: output directory already exists: {output_dir}") + sys.exit(1) + + substitutions = { + "{{PROJECT_NAME}}": project_name, + "{{NAMESPACE}}": namespace, + "{{PROJECT_NAME_UPPER}}": project_name_upper, + "{{HEADER_NAME}}": header_name, + } + + def sub(text: str) -> str: + for key, value in substitutions.items(): + text = text.replace(key, value) + return text + + # Create directories + (output_dir / "include" / namespace).mkdir(parents=True) + (output_dir / "src" / "tests").mkdir(parents=True) + (output_dir / "src" / "benchmarks").mkdir(parents=True) + (output_dir / "src" / "docs").mkdir(parents=True) + (output_dir / "src" / "docs" / "images").mkdir(parents=True) + (output_dir / "scripts").mkdir(parents=True) + (output_dir / ".github" / "workflows").mkdir(parents=True) + + # Write files + files = { + output_dir / "CMakeLists.txt": sub(CMAKE_LISTS_TXT), + output_dir / "CMakePresets.json": sub(CMAKE_PRESETS_JSON), + output_dir / ".clang-format": sub(CLANG_FORMAT), + output_dir / ".gitignore": sub(GITIGNORE), + output_dir / "README.md": sub(README_MD), + output_dir / "AGENTS.md": sub(AGENTS_MD), + output_dir / "src" / "docs" / "Doxyfile.in": sub(DOXYFILE_IN), + output_dir / "scripts" / "coverage_report.sh": sub(COVERAGE_REPORT_SH), + output_dir / ".github" / "workflows" / "build-test.yml": sub(BUILD_TEST_YML), + output_dir / ".github" / "workflows" / "linter.yml": sub(LINTER_YML), + output_dir / ".github" / "workflows" / "coverage.yml": sub(COVERAGE_YML), + output_dir / ".github" / "workflows" / "doxygen.yml": sub(DOXYGEN_YML), + output_dir / "include" / namespace / header_name: sub(HEADER_HPP), + output_dir / "src" / "tests" / "unittests.cpp": sub(UNITTESTS_CPP), + output_dir / "src" / "benchmarks" / "benchmarks.cpp": sub(BENCHMARKS_CPP), + } + + for path, content in files.items(): + path.write_text(content) + print(f"Created: {path.relative_to(output_dir.parent)}") + + # Make coverage script executable + (output_dir / "scripts" / "coverage_report.sh").chmod(0o755) + + print(f"\\nProject '{project_name}' generated successfully at {output_dir}") + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Scaffold a new C++20 repository following modern C++ conventions." + ) + parser.add_argument("--name", required=True, help="Project name (e.g., my-lib)") + parser.add_argument( + "--namespace", + help="C++ namespace (defaults to project name with hyphens removed)", + ) + parser.add_argument( + "--output-dir", + default=".", + help="Output directory (default: current directory)", + ) + args = parser.parse_args() + generate(args) + + +if __name__ == "__main__": + main()