fix: improve CI benchmark table in PR (#1663)

knyk-dev · web-flow · commit f1f03cceae31 · 2026-01-08T12:29:00.000+08:00
diff --git a/.github/scripts/benchmark_formatter.py b/.github/scripts/benchmark_formatter.py
@@ -0,0 +1,248 @@
+import pathlib, re, sys
+
+try:
+    p = pathlib.Path("comparison.md")
+    if not p.exists():
+        print("comparison.md not found, skipping post-processing.")
+        sys.exit(0)
+
+    lines = p.read_text(encoding="utf-8").splitlines()
+    processed_lines = []
+    in_code = False
+    def strip_worker_suffix(text: str) -> str:
+        return re.sub(r'(\S+?)-\d+(\s|$)', r'\1\2', text)
+
+    def get_icon(diff_val: float) -> str:
+        if diff_val > 10:
+            return "🐌"
+        if diff_val < -10:
+            return "🚀"
+        return "➡️"
+
+    def clean_superscripts(text: str) -> str:
+        return re.sub(r'[¹²³⁴⁵⁶⁷⁸⁹⁰]', '', text)
+
+    def parse_val(token: str):
+        if '%' in token or '=' in token:
+            return None
+        token = clean_superscripts(token)
+        token = token.split('±')[0].strip()
+        token = token.split('(')[0].strip()
+        if not token:
+            return None
+
+        m = re.match(r'^([-+]?\d*\.?\d+)([a-zA-Zµ]+)?$', token)
+        if not m:
+            return None
+        try:
+            val = float(m.group(1))
+        except ValueError:
+            return None
+        suffix = (m.group(2) or "").replace("µ", "u")
+        multipliers = {
+            "n": 1e-9,
+            "ns": 1e-9,
+            "u": 1e-6,
+            "us": 1e-6,
+            "m": 1e-3,
+            "ms": 1e-3,
+            "s": 1.0,
+            "k": 1e3,
+            "K": 1e3,
+            "M": 1e6,
+            "G": 1e9,
+            "Ki": 1024.0,
+            "Mi": 1024.0**2,
+            "Gi": 1024.0**3,
+            "Ti": 1024.0**4,
+            "B": 1.0,
+            "B/op": 1.0,
+            "C": 1.0,  # tolerate degree/unit markers that don't affect ratio
+        }
+        return val * multipliers.get(suffix, 1.0)
+
+    def extract_two_numbers(tokens):
+        found = []
+        for t in tokens[1:]:  # skip name
+            if t in {"±", "∞", "~", "│", "│"}:
+                continue
+            if '%' in t or '=' in t:
+                continue
+            val = parse_val(t)
+            if val is not None:
+                found.append(val)
+                if len(found) == 2:
+                    break
+        return found
+
+    # Pass 0: 
+    # 1. find a header line with pipes to derive alignment hint
+    # 2. calculate max content width to ensure right-most alignment
+    max_content_width = 0
+    
+    for line in lines:
+        if line.strip() == "```":
+            in_code = not in_code
+            continue
+        if not in_code:
+            continue
+            
+        # Skip footnotes/meta for width calculation
+        if re.match(r'^\s*[¹²³⁴⁵⁶⁷⁸⁹⁰]', line) or re.search(r'need\s*>?=\s*\d+\s+samples', line):
+            continue
+        if not line.strip() or line.strip().startswith(('goos:', 'goarch:', 'pkg:', 'cpu:')):
+            continue
+        # Header lines are handled separately in Pass 1
+        if '│' in line and ('vs base' in line or 'old' in line or 'new' in line):
+            continue
+            
+        # It's likely a data line
+        # Check if it has an existing percentage we might move/align
+        curr_line = strip_worker_suffix(line).rstrip()
+        pct_match = re.search(r'([+-]?\d+\.\d+)%', curr_line)
+        if pct_match:
+            # If we are going to realign this, we count width up to the percentage
+            w = len(curr_line[:pct_match.start()].rstrip())
+        else:
+            w = len(curr_line)
+        
+        if w > max_content_width:
+            max_content_width = w
+
+    # Calculate global alignment target for Diff column
+    # Ensure target column is beyond the longest line with some padding
+    diff_col_start = max_content_width + 4
+    
+    # Calculate right boundary (pipe) position
+    # Diff column width ~12 chars (e.g. "+100.00% 🚀")
+    right_boundary = diff_col_start + 14
+
+    # Reset code fence tracking state for Pass 1
+    in_code = False
+    for line in lines:
+
+        if line.strip() == "```":
+            in_code = not in_code
+            processed_lines.append(line)
+            continue
+
+        if not in_code:
+            processed_lines.append(line)
+            continue
+
+        # footnotes keep untouched
+        if re.match(r'^\s*[¹²³⁴⁵⁶⁷⁸⁹⁰]', line) or re.search(r'need\s*>?=\s*\d+\s+samples', line):
+            processed_lines.append(line)
+            continue
+
+        # header lines: ensure last column labeled Diff and force alignment
+        if '│' in line and ('vs base' in line or 'old' in line or 'new' in line):
+            # Strip trailing pipe and whitespace
+            stripped_header = line.rstrip().rstrip('│').rstrip()
+            
+            # If "vs base" is present, ensure we don't duplicate "Diff" if it's already there
+            # But we want to enforce OUR alignment, so we might strip existing Diff
+            stripped_header = re.sub(r'\s+Diff\s*$', '', stripped_header, flags=re.IGNORECASE)
+            stripped_header = re.sub(r'\s+Delta\b', '', stripped_header, flags=re.IGNORECASE)
+
+            # Pad to diff_col_start
+            if len(stripped_header) < diff_col_start:
+                new_header = stripped_header + " " * (diff_col_start - len(stripped_header))
+            else:
+                new_header = stripped_header + "  "
+
+            # Add Diff column header if it's the second header row (vs base)
+            if 'vs base' in line:
+                new_header += "Diff"
+            
+            # Add closing pipe at the right boundary
+            current_len = len(new_header)
+            if current_len < right_boundary:
+                new_header += " " * (right_boundary - current_len)
+            
+            new_header += "│"
+            processed_lines.append(new_header)
+            continue
+
+        # non-data meta lines
+        if not line.strip() or line.strip().startswith(('goos:', 'goarch:', 'pkg:')):
+            processed_lines.append(line)
+            continue
+
+        line = strip_worker_suffix(line)
+        tokens = line.split()
+        if not tokens:
+            processed_lines.append(line)
+            continue
+
+        numbers = extract_two_numbers(tokens)
+        pct_match = re.search(r'([+-]?\d+\.\d+)%', line)
+
+        # Helper to align and append
+        def append_aligned(left_part, content):
+            if len(left_part) < diff_col_start:
+                aligned = left_part + " " * (diff_col_start - len(left_part))
+            else:
+                aligned = left_part + "  "
+            
+            # Ensure content doesn't exceed right boundary (visual check only, we don't truncate)
+            # But users asked not to exceed header pipe.
+            # Header pipe is at right_boundary.
+            # Content starts at diff_col_start.
+            # So content length should be <= right_boundary - diff_col_start
+            return f"{aligned}{content}"
+
+        # Special handling for geomean when values missing or zero
+        is_geomean = tokens[0] == "geomean"
+        if is_geomean and (len(numbers) < 2 or any(v == 0 for v in numbers)) and not pct_match:
+            leading = re.match(r'^\s*', line).group(0)
+            left = f"{leading}geomean"
+            processed_lines.append(append_aligned(left, "n/a (has zero)"))
+            continue
+
+        # when both values are zero, force diff = 0 and align
+        if len(numbers) == 2 and numbers[0] == 0 and numbers[1] == 0:
+            diff_val = 0.0
+            icon = get_icon(diff_val)
+            left = line.rstrip()
+            processed_lines.append(append_aligned(left, f"{diff_val:+.2f}% {icon}"))
+            continue
+
+        # recompute diff when we have two numeric values
+        if len(numbers) == 2 and numbers[0] != 0:
+            diff_val = (numbers[1] - numbers[0]) / numbers[0] * 100
+            icon = get_icon(diff_val)
+
+            left = line
+            if pct_match:
+                left = line[:pct_match.start()].rstrip()
+            else:
+                left = line.rstrip()
+
+            processed_lines.append(append_aligned(left, f"{diff_val:+.2f}% {icon}"))
+            continue
+
+        # fallback: align existing percentage to Diff column and (re)append icon
+        if pct_match:
+            try:
+                pct_val = float(pct_match.group(1))
+                icon = get_icon(pct_val)
+
+                left = line[:pct_match.start()].rstrip()
+                suffix = line[pct_match.end():]
+                # Remove any existing icon after the percentage to avoid duplicates
+                suffix = re.sub(r'\s*(🐌|🚀|➡️)', '', suffix)
+
+                processed_lines.append(append_aligned(left, f"{pct_val:+.2f}% {icon}{suffix}"))
+            except ValueError:
+                processed_lines.append(line)
+            continue
+
+        # If we cannot parse numbers or percentages, keep the original (only worker suffix stripped)
+        processed_lines.append(line)
+
+    p.write_text("\n".join(processed_lines) + "\n", encoding="utf-8")
+
+except Exception as e:
+    print(f"Error post-processing comparison.md: {e}")
+    sys.exit(1)
diff --git a/.github/scripts/download_artifact.js b/.github/scripts/download_artifact.js
@@ -0,0 +1,32 @@
+module.exports = async ({github, context, core}) => {
+  try {
+    const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
+      owner: context.repo.owner,
+      repo: context.repo.repo,
+      run_id: context.payload.workflow_run.id,
+    });
+
+    const matchArtifact = artifacts.data.artifacts.find((artifact) => {
+      return artifact.name == "benchmark-results";
+    });
+
+    if (!matchArtifact) {
+      core.setFailed("No artifact named 'benchmark-results' found.");
+      return;
+    }
+
+    const download = await github.rest.actions.downloadArtifact({
+      owner: context.repo.owner,
+      repo: context.repo.repo,
+      artifact_id: matchArtifact.id,
+      archive_format: 'zip',
+    });
+
+    const fs = require('fs');
+    const path = require('path');
+    const workspace = process.env.GITHUB_WORKSPACE;
+    fs.writeFileSync(path.join(workspace, 'benchmark-results.zip'), Buffer.from(download.data));
+  } catch (error) {
+    core.setFailed(`Failed to download artifact: ${error.message}`);
+  }
+};
diff --git a/.github/scripts/post_comment.js b/.github/scripts/post_comment.js
@@ -0,0 +1,59 @@
+module.exports = async ({github, context, core}) => {
+  const fs = require('fs');
+  
+  // Validate pr_number.txt
+  if (!fs.existsSync('pr_number.txt')) {
+    core.setFailed("Required artifact file 'pr_number.txt' was not found in the workspace.");
+    return;
+  }
+  const prNumberContent = fs.readFileSync('pr_number.txt', 'utf8').trim();
+  const issue_number = parseInt(prNumberContent, 10);
+  if (!Number.isFinite(issue_number) || issue_number <= 0) {
+     core.setFailed('Invalid PR number in pr_number.txt: "' + prNumberContent + '"');
+     return;
+  }
+
+  // Validate comparison.md
+  if (!fs.existsSync('comparison.md')) {
+    core.setFailed("Required artifact file 'comparison.md' was not found in the workspace.");
+    return;
+  }
+  let comparison;
+  try {
+    comparison = fs.readFileSync('comparison.md', 'utf8');
+  } catch (error) {
+    core.setFailed("Failed to read 'comparison.md': " + error.message);
+    return;
+  }
+
+  // Find existing comment
+  const { data: comments } = await github.rest.issues.listComments({
+    owner: context.repo.owner,
+    repo: context.repo.repo,
+    issue_number: issue_number,
+  });
+
+  const botComment = comments.find(comment =>
+    comment.user.type === 'Bot' &&
+    comment.body.includes('Benchmark Comparison')
+  );
+
+  const footer = '<sub>🤖 This comment will be automatically updated with the latest benchmark results.</sub>';
+  const commentBody = `${comparison}\n\n${footer}`;
+
+  if (botComment) {
+    await github.rest.issues.updateComment({
+      owner: context.repo.owner,
+      repo: context.repo.repo,
+      comment_id: botComment.id,
+      body: commentBody
+    });
+  } else {
+    await github.rest.issues.createComment({
+      owner: context.repo.owner,
+      repo: context.repo.repo,
+      issue_number: issue_number,
+      body: commentBody
+    });
+  }
+};
diff --git a/.github/workflows/comment.yml b/.github/workflows/comment.yml
@@ -0,0 +1,37 @@
+name: Post Benchmark Comment
+
+on:
+  workflow_run:
+    workflows: ["Performance Comparison for Pull Requests"]
+    types:
+      - completed
+
+permissions:
+  pull-requests: write
+
+jobs:
+  comment:
+    runs-on: ubuntu-latest
+    if: >
+      github.event.workflow_run.event == 'pull_request' &&
+      github.event.workflow_run.conclusion == 'success'
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+
+      - name: 'Download artifact'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const script = require('./.github/scripts/download_artifact.js')
+            await script({github, context, core})
+
+      - name: 'Unzip artifact'
+        run: unzip benchmark-results.zip
+
+      - name: 'Post comment'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const script = require('./.github/scripts/post_comment.js')
+            await script({github, context, core})
diff --git a/.github/workflows/performance-pr.yml b/.github/workflows/performance-pr.yml