Skip to content

Commit 7c96171

Browse files
committed
ci: enrichment workflow
1 parent 700d53b commit 7c96171

File tree

4 files changed

+957
-0
lines changed

4 files changed

+957
-0
lines changed

.github/workflows/canonical-source-update.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,16 @@ jobs:
283283
core.notice(`Created PR #${newPr.number} for canonical source merge`);
284284
}
285285
286+
- name: Trigger TBA enrichment
287+
if: steps.check_changes.outputs.changed == 'true' && inputs.dry_run != true
288+
env:
289+
GH_TOKEN: ${{ github.token }}
290+
run: |
291+
echo "::notice title=Triggering TBA Enrichment::Starting TBA conference enrichment workflow"
292+
gh workflow run enrich-tba-conferences.yml -f enrichment_level=full || {
293+
echo "::warning title=Trigger Failed::Could not trigger TBA enrichment workflow"
294+
}
295+
286296
- name: Summary
287297
if: always()
288298
env:
@@ -298,6 +308,7 @@ jobs:
298308
echo "- Status: Changes detected (dry run - not committed)" >> $GITHUB_STEP_SUMMARY
299309
else
300310
echo "- Status: ✅ Changes committed and PR updated" >> $GITHUB_STEP_SUMMARY
311+
echo "- TBA Enrichment: Triggered" >> $GITHUB_STEP_SUMMARY
301312
fi
302313
else
303314
echo "- Status: ✓ No new data from canonical sources" >> $GITHUB_STEP_SUMMARY
Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
name: Enrich TBA Conferences
2+
3+
on:
4+
# Manual trigger or called by canonical-source-update
5+
workflow_dispatch:
6+
inputs:
7+
enrichment_level:
8+
description: 'Enrichment level'
9+
type: choice
10+
options: ['full', 'quick']
11+
default: 'full'
12+
dry_run:
13+
description: 'Dry run (no commit/PR)'
14+
required: false
15+
type: boolean
16+
default: false
17+
18+
# Weekly TBA checker (quick mode)
19+
schedule:
20+
- cron: '0 6 * * 0' # Sunday 6 AM UTC
21+
22+
# Prevent concurrent runs to avoid merge conflicts on the accumulator branch
23+
concurrency:
24+
group: enrich-tba-conferences
25+
cancel-in-progress: false
26+
27+
permissions:
28+
contents: write
29+
pull-requests: write
30+
31+
env:
32+
UPDATE_BRANCH: auto/conference-updates
33+
34+
jobs:
35+
enrich:
36+
runs-on: ubuntu-latest
37+
timeout-minutes: 20
38+
39+
steps:
40+
- name: Determine enrichment level
41+
id: config
42+
env:
43+
EVENT_NAME: ${{ github.event_name }}
44+
INPUT_LEVEL: ${{ inputs.enrichment_level }}
45+
run: |
46+
if [ "$EVENT_NAME" = "schedule" ]; then
47+
echo "level=quick" >> $GITHUB_OUTPUT
48+
echo "::notice title=Scheduled Run::Using quick enrichment mode"
49+
else
50+
echo "level=${INPUT_LEVEL:-full}" >> $GITHUB_OUTPUT
51+
echo "::notice title=Manual Run::Using ${INPUT_LEVEL:-full} enrichment mode"
52+
fi
53+
54+
- name: Checkout repository
55+
uses: actions/checkout@v6
56+
with:
57+
fetch-depth: 0
58+
token: ${{ github.token }}
59+
persist-credentials: true
60+
61+
- name: Configure git identity
62+
run: |
63+
git config user.name "github-actions[bot]"
64+
git config user.email "github-actions[bot]@users.noreply.github.com"
65+
66+
- name: Setup accumulator branch
67+
run: |
68+
git remote set-url origin "https://x-access-token:${{ github.token }}@github.com/${{ github.repository }}.git"
69+
70+
if git ls-remote --heads origin $UPDATE_BRANCH | grep -q $UPDATE_BRANCH; then
71+
echo "::notice title=Branch::Checking out existing accumulator branch"
72+
git fetch origin $UPDATE_BRANCH
73+
git checkout $UPDATE_BRANCH
74+
75+
# Sync with main - try rebase first, then merge
76+
if ! git rebase origin/main; then
77+
echo "::warning title=Rebase Failed::Attempting merge instead"
78+
git rebase --abort 2>/dev/null || true
79+
if ! git merge origin/main --no-edit; then
80+
git merge --abort 2>/dev/null || true
81+
echo "::error title=Branch Sync Failed::Could not rebase or merge with main"
82+
exit 1
83+
fi
84+
fi
85+
else
86+
echo "::notice title=Branch::Creating new accumulator branch"
87+
git checkout -b $UPDATE_BRANCH
88+
fi
89+
90+
- name: Snapshot data files
91+
run: |
92+
cp _data/conferences.yml /tmp/conferences_before.yml
93+
94+
- name: Setup Pixi
95+
uses: prefix-dev/setup-pixi@v0.9.3
96+
97+
- name: Install lynx for text extraction
98+
run: sudo apt-get install -qq -y lynx > /dev/null 2>&1 || true
99+
100+
- name: Find TBA conferences
101+
id: find-tba
102+
run: |
103+
# Count TBA conferences
104+
TBA_COUNT=$(python3 << 'PYCOUNT'
105+
import yaml
106+
import sys
107+
sys.path.insert(0, 'utils')
108+
from tidy_conf.utils import Loader
109+
110+
tba_words = ["tba", "tbd", "cancelled", "none", "na", "n/a", "nan", "n.a."]
111+
with open("_data/conferences.yml") as f:
112+
conferences = yaml.load(f, Loader=Loader) or []
113+
114+
count = sum(1 for c in conferences if str(c.get("cfp", "")).lower().strip() in tba_words)
115+
print(count)
116+
PYCOUNT
117+
)
118+
119+
echo "tba_count=$TBA_COUNT" >> $GITHUB_OUTPUT
120+
121+
if [ "$TBA_COUNT" = "0" ]; then
122+
echo "::notice title=No TBA::No conferences with TBA CFP found"
123+
else
124+
echo "::notice title=TBA Found::Found $TBA_COUNT conferences with TBA CFP"
125+
fi
126+
127+
echo "## TBA Conferences" >> $GITHUB_STEP_SUMMARY
128+
echo "Found **$TBA_COUNT** conferences with TBA CFP deadlines" >> $GITHUB_STEP_SUMMARY
129+
130+
- name: Enrich TBA conferences
131+
id: enrich
132+
if: steps.find-tba.outputs.tba_count != '0'
133+
env:
134+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
135+
ENRICHMENT_LEVEL: ${{ steps.config.outputs.level }}
136+
DRY_RUN: ${{ inputs.dry_run }}
137+
run: |
138+
echo "::group::Running enrichment"
139+
140+
# Build command
141+
CMD="pixi run python utils/enrich_tba.py --level $ENRICHMENT_LEVEL"
142+
if [ "$DRY_RUN" = "true" ]; then
143+
CMD="$CMD --dry-run"
144+
fi
145+
146+
# Run enrichment
147+
if $CMD; then
148+
echo "success=true" >> $GITHUB_OUTPUT
149+
else
150+
echo "success=false" >> $GITHUB_OUTPUT
151+
echo "::error title=Enrichment Failed::TBA enrichment script failed"
152+
fi
153+
154+
echo "::endgroup::"
155+
156+
- name: Check for changes
157+
id: check_changes
158+
run: |
159+
if ! diff -q _data/conferences.yml /tmp/conferences_before.yml > /dev/null 2>&1; then
160+
echo "changed=true" >> $GITHUB_OUTPUT
161+
echo "::notice title=Changes Detected::Conference data was updated"
162+
163+
# Show summary of changes
164+
echo "## Changes" >> $GITHUB_STEP_SUMMARY
165+
echo "\`\`\`diff" >> $GITHUB_STEP_SUMMARY
166+
diff -u /tmp/conferences_before.yml _data/conferences.yml | head -100 >> $GITHUB_STEP_SUMMARY || true
167+
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
168+
else
169+
echo "changed=false" >> $GITHUB_OUTPUT
170+
echo "::notice title=No Changes::No updates from TBA enrichment"
171+
echo "## No Changes" >> $GITHUB_STEP_SUMMARY
172+
echo "TBA enrichment did not find any new CFP data to update." >> $GITHUB_STEP_SUMMARY
173+
fi
174+
175+
- name: Validate and sort
176+
if: steps.check_changes.outputs.changed == 'true' && inputs.dry_run != true
177+
run: pixi run sort
178+
179+
- name: Commit and push
180+
if: steps.check_changes.outputs.changed == 'true' && inputs.dry_run != true
181+
id: commit
182+
env:
183+
ENRICHMENT_LEVEL: ${{ steps.config.outputs.level }}
184+
run: |
185+
git remote set-url origin "https://x-access-token:${{ github.token }}@github.com/${{ github.repository }}.git"
186+
187+
# Stage data files
188+
git add _data/conferences.yml _data/archive.yml _data/legacy.yml 2>/dev/null || git add _data/conferences.yml
189+
190+
# Create descriptive commit message
191+
COMMIT_MSG="chore: enrich TBA conferences with CFP data"
192+
193+
git commit -m "$COMMIT_MSG" -m "Enrichment level: $ENRICHMENT_LEVEL" -m "Triggered: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
194+
git push origin $UPDATE_BRANCH --force-with-lease
195+
196+
echo "commit_sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
197+
echo "commit_msg=$COMMIT_MSG" >> $GITHUB_OUTPUT
198+
199+
- name: Create or update PR
200+
if: steps.check_changes.outputs.changed == 'true' && inputs.dry_run != true
201+
uses: actions/github-script@v8
202+
env:
203+
UPDATE_BRANCH: ${{ env.UPDATE_BRANCH }}
204+
COMMIT_SHA: ${{ steps.commit.outputs.commit_sha }}
205+
ENRICHMENT_LEVEL: ${{ steps.config.outputs.level }}
206+
with:
207+
script: |
208+
const branch = process.env.UPDATE_BRANCH;
209+
const sha = process.env.COMMIT_SHA.substring(0, 7);
210+
const level = process.env.ENRICHMENT_LEVEL;
211+
const date = new Date().toISOString().split('T')[0];
212+
213+
// Check for existing PR from the update branch
214+
const { data: prs } = await github.rest.pulls.list({
215+
owner: context.repo.owner,
216+
repo: context.repo.repo,
217+
head: `${context.repo.owner}:${branch}`,
218+
state: 'open'
219+
});
220+
221+
// Use same table format as other workflows
222+
const entry = `| TBA Enrichment (${level}) | [${sha}](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/commit/${process.env.COMMIT_SHA}) | ${date} |`;
223+
224+
if (prs.length > 0) {
225+
// Update existing PR - append to table
226+
const pr = prs[0];
227+
let body = pr.body || '';
228+
229+
// Add entry before the END_UPDATES marker
230+
body = body.replace(/(<!-- END_UPDATES -->)/, `${entry}\n$1`);
231+
232+
await github.rest.pulls.update({
233+
owner: context.repo.owner,
234+
repo: context.repo.repo,
235+
pull_number: pr.number,
236+
body
237+
});
238+
239+
console.log(`Updated PR #${pr.number}`);
240+
core.notice(`Updated PR #${pr.number} with TBA enrichment`);
241+
} else {
242+
// Create new PR
243+
const { data: newPr } = await github.rest.pulls.create({
244+
owner: context.repo.owner,
245+
repo: context.repo.repo,
246+
title: 'Conference updates',
247+
head: branch,
248+
base: 'main',
249+
body: `## Automated Conference Updates\n\n| Conference | Commit | Date |\n|------------|--------|------|\n${entry}\n<!-- END_UPDATES -->`
250+
});
251+
252+
// Add labels
253+
await github.rest.issues.addLabels({
254+
owner: context.repo.owner,
255+
repo: context.repo.repo,
256+
issue_number: newPr.number,
257+
labels: ['automated', 'conference-update']
258+
});
259+
260+
console.log(`Created PR #${newPr.number}`);
261+
core.notice(`Created PR #${newPr.number} for TBA enrichment`);
262+
}
263+
264+
- name: Summary
265+
if: always()
266+
env:
267+
TBA_COUNT: ${{ steps.find-tba.outputs.tba_count }}
268+
CHANGED: ${{ steps.check_changes.outputs.changed }}
269+
DRY_RUN: ${{ inputs.dry_run }}
270+
LEVEL: ${{ steps.config.outputs.level }}
271+
run: |
272+
echo "" >> $GITHUB_STEP_SUMMARY
273+
echo "## Result" >> $GITHUB_STEP_SUMMARY
274+
275+
if [ "$TBA_COUNT" = "0" ]; then
276+
echo "- Status: No TBA conferences to process" >> $GITHUB_STEP_SUMMARY
277+
elif [ "$CHANGED" = "true" ]; then
278+
if [ "$DRY_RUN" = "true" ]; then
279+
echo "- Status: Changes detected (dry run - not committed)" >> $GITHUB_STEP_SUMMARY
280+
else
281+
echo "- Status: Changes committed and PR updated" >> $GITHUB_STEP_SUMMARY
282+
fi
283+
else
284+
echo "- Status: No new CFP data found" >> $GITHUB_STEP_SUMMARY
285+
fi
286+
287+
echo "- Enrichment level: $LEVEL" >> $GITHUB_STEP_SUMMARY
288+
echo "- TBA conferences scanned: $TBA_COUNT" >> $GITHUB_STEP_SUMMARY

ruff.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ ignore = [
5656

5757
]
5858

59+
[lint.per-file-ignores]
60+
# S603: subprocess with validated URL input for lynx text extraction
61+
"utils/enrich_tba.py" = ["S603"]
5962

6063
[lint.pydocstyle]
6164
convention = "numpy"

0 commit comments

Comments
 (0)