|
| 1 | +/** |
| 2 | + * Translate Chinese digest files to Vietnamese using LLM. |
| 3 | + * |
| 4 | + * Self-contained — does not import any upstream source files so that |
| 5 | + * upstream changes never break this script. |
| 6 | + * |
| 7 | + * Usage: |
| 8 | + * pnpm translate # translate today's digests |
| 9 | + * pnpm translate 2026-03-24 # translate a specific date |
| 10 | + * |
| 11 | + * Required env vars: |
| 12 | + * LLM_PROVIDER - "openai" (default for this script) |
| 13 | + * OPENAI_API_KEY - API key |
| 14 | + * OPENAI_BASE_URL - endpoint override (optional) |
| 15 | + * OPENAI_MODEL - model name (default: gpt-4o) |
| 16 | + */ |
| 17 | + |
| 18 | +import fs from "node:fs"; |
| 19 | +import path from "node:path"; |
| 20 | +import OpenAI from "openai"; |
| 21 | + |
| 22 | +const DIGESTS_DIR = "digests"; |
| 23 | +const MAX_TOKENS = 16384; |
| 24 | +const LLM_CONCURRENCY = 3; |
| 25 | +const MAX_RETRIES = 3; |
| 26 | +const RETRY_BASE_MS = 5_000; |
| 27 | + |
| 28 | +const ZH_REPORTS = ["ai-cli", "ai-agents", "ai-web", "ai-trending", "ai-hn"]; |
| 29 | +const ROLLUP_REPORTS = ["ai-weekly", "ai-monthly"]; |
| 30 | +const ALL_REPORTS = [...ZH_REPORTS, ...ROLLUP_REPORTS]; |
| 31 | + |
| 32 | +const client = new OpenAI({ |
| 33 | + apiKey: process.env["OPENAI_API_KEY"], |
| 34 | + baseURL: process.env["OPENAI_BASE_URL"], |
| 35 | +}); |
| 36 | +const model = process.env["OPENAI_MODEL"] ?? "gpt-4o"; |
| 37 | + |
| 38 | +let slots = LLM_CONCURRENCY; |
| 39 | +const queue: Array<() => void> = []; |
| 40 | + |
| 41 | +function acquire(): Promise<void> { |
| 42 | + if (slots > 0) { |
| 43 | + slots--; |
| 44 | + return Promise.resolve(); |
| 45 | + } |
| 46 | + return new Promise((resolve) => queue.push(resolve)); |
| 47 | +} |
| 48 | + |
| 49 | +function release(): void { |
| 50 | + const next = queue.shift(); |
| 51 | + if (next) next(); |
| 52 | + else slots++; |
| 53 | +} |
| 54 | + |
| 55 | +async function callLlm(prompt: string): Promise<string> { |
| 56 | + for (let attempt = 0; ; attempt++) { |
| 57 | + await acquire(); |
| 58 | + let released = false; |
| 59 | + try { |
| 60 | + const res = await client.chat.completions.create({ |
| 61 | + model, |
| 62 | + max_completion_tokens: MAX_TOKENS, |
| 63 | + messages: [{ role: "user", content: prompt }], |
| 64 | + }); |
| 65 | + const text = res.choices[0]?.message?.content; |
| 66 | + if (!text) throw new Error("Empty LLM response"); |
| 67 | + return text; |
| 68 | + } catch (err) { |
| 69 | + const is429 = (err as { status?: number })?.status === 429 || String(err).includes("429"); |
| 70 | + if (attempt < MAX_RETRIES && is429) { |
| 71 | + release(); |
| 72 | + released = true; |
| 73 | + const wait = RETRY_BASE_MS * 2 ** attempt; |
| 74 | + console.error(`[translate] 429 — retry ${attempt + 1}/${MAX_RETRIES} in ${wait / 1000}s`); |
| 75 | + await new Promise((r) => setTimeout(r, wait)); |
| 76 | + continue; |
| 77 | + } |
| 78 | + throw err; |
| 79 | + } finally { |
| 80 | + if (!released) release(); |
| 81 | + } |
| 82 | + } |
| 83 | +} |
| 84 | + |
| 85 | +function buildPrompt(markdown: string): string { |
| 86 | + return `You are a professional translator. Translate the following Markdown document from Chinese to Vietnamese. |
| 87 | +
|
| 88 | +Rules: |
| 89 | +- Preserve ALL Markdown formatting exactly (headings, tables, links, bold, italic, code blocks, blockquotes, lists). |
| 90 | +- Preserve ALL URLs, GitHub links, issue/PR numbers (e.g., #12345) unchanged. |
| 91 | +- Preserve ALL proper nouns (tool names, company names, project names) in their original form. |
| 92 | +- Preserve ALL code snippets unchanged. |
| 93 | +- Translate naturally and fluently into Vietnamese — not word-by-word. |
| 94 | +- Do NOT add any commentary, explanation, or notes. |
| 95 | +- Output ONLY the translated Markdown document. |
| 96 | +
|
| 97 | +--- |
| 98 | +
|
| 99 | +${markdown}`; |
| 100 | +} |
| 101 | + |
| 102 | +function todayCST(): string { |
| 103 | + const now = new Date(); |
| 104 | + const cst = new Date(now.getTime() + 8 * 60 * 60 * 1000); |
| 105 | + return cst.toISOString().slice(0, 10); |
| 106 | +} |
| 107 | + |
| 108 | +async function translateFile(datePath: string, report: string): Promise<boolean> { |
| 109 | + const zhFile = path.join(datePath, `${report}.md`); |
| 110 | + const viFile = path.join(datePath, `${report}-vi.md`); |
| 111 | + |
| 112 | + if (!fs.existsSync(zhFile)) { |
| 113 | + console.log(`[translate] Skip ${report} — source not found`); |
| 114 | + return false; |
| 115 | + } |
| 116 | + |
| 117 | + if (fs.existsSync(viFile)) { |
| 118 | + console.log(`[translate] Skip ${report} — Vietnamese version exists`); |
| 119 | + return false; |
| 120 | + } |
| 121 | + |
| 122 | + const zhContent = fs.readFileSync(zhFile, "utf-8"); |
| 123 | + if (!zhContent.trim()) { |
| 124 | + console.log(`[translate] Skip ${report} — source is empty`); |
| 125 | + return false; |
| 126 | + } |
| 127 | + |
| 128 | + console.log(`[translate] Translating ${report}...`); |
| 129 | + const viContent = await callLlm(buildPrompt(zhContent)); |
| 130 | + const outPath = path.join(datePath, `${report}-vi.md`); |
| 131 | + fs.mkdirSync(path.dirname(outPath), { recursive: true }); |
| 132 | + fs.writeFileSync(outPath, viContent, "utf-8"); |
| 133 | + console.log(`[translate] Done ${report}-vi.md`); |
| 134 | + return true; |
| 135 | +} |
| 136 | + |
| 137 | +async function main(): Promise<void> { |
| 138 | + const targetDate = process.argv[2] ?? todayCST(); |
| 139 | + const datePath = path.join(DIGESTS_DIR, targetDate); |
| 140 | + |
| 141 | + if (!fs.existsSync(datePath)) { |
| 142 | + console.error(`[translate] Directory not found: ${datePath}`); |
| 143 | + process.exit(1); |
| 144 | + } |
| 145 | + |
| 146 | + console.log(`[translate] Processing date: ${targetDate}`); |
| 147 | + console.log(`[translate] Using model: ${model}`); |
| 148 | + |
| 149 | + const results = await Promise.allSettled(ALL_REPORTS.map((report) => translateFile(datePath, report))); |
| 150 | + |
| 151 | + let translated = 0; |
| 152 | + let failed = 0; |
| 153 | + for (const [i, result] of results.entries()) { |
| 154 | + if (result.status === "rejected") { |
| 155 | + console.error(`[translate] Failed ${ALL_REPORTS[i]}: ${result.reason}`); |
| 156 | + failed++; |
| 157 | + } else if (result.value) { |
| 158 | + translated++; |
| 159 | + } |
| 160 | + } |
| 161 | + |
| 162 | + console.log(`[translate] Complete: ${translated} translated, ${failed} failed`); |
| 163 | + if (failed > 0) process.exit(1); |
| 164 | +} |
| 165 | + |
| 166 | +main().catch((err) => { |
| 167 | + console.error(err); |
| 168 | + process.exit(1); |
| 169 | +}); |
0 commit comments