Skip to content

Commit 1b7a8bd

Browse files
Fix OG image issues: CJK line breaking and generic title enrichment (#2879)
Co-authored-by: Cursor Agent <cursoragent@cursor.com> Co-authored-by: felixkrrr <felixkrrr@users.noreply.github.com>
1 parent ea4ddb6 commit 1b7a8bd

2 files changed

Lines changed: 175 additions & 19 deletions

File tree

app/api/og/route.tsx

Lines changed: 124 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,31 @@ function wrapWords(text: string, maxChars: number): string[] {
2121
return lines.length ? lines : [""];
2222
}
2323

24+
/**
25+
* Split a string into "tokens" that can be laid out independently.
26+
* Latin/space-delimited text stays as whole words; CJK characters become
27+
* individual tokens so they can wrap at any character boundary.
28+
*/
29+
function tokenize(text: string): string[] {
30+
const tokens: string[] = [];
31+
let buf = "";
32+
for (const ch of text) {
33+
const cp = ch.codePointAt(0)!;
34+
if (/\s/.test(ch)) {
35+
if (buf) { tokens.push(buf); buf = ""; }
36+
continue;
37+
}
38+
if (isCjkOrFullWidth(cp)) {
39+
if (buf) { tokens.push(buf); buf = ""; }
40+
tokens.push(ch);
41+
} else {
42+
buf += ch;
43+
}
44+
}
45+
if (buf) tokens.push(buf);
46+
return tokens;
47+
}
48+
2449
/** ~average char width for F37 (panel / two-line checks). */
2550
const ANALOG_CHAR_EM = 0.48;
2651
/**
@@ -50,8 +75,49 @@ const TITLE_LONG_TITLE_FONT_SIZES = [120, 112, ...TITLE_FONT_SIZES];
5075
const DESC_FONT_SIZES = [26, 24, 22, 20, 18, 16, 14, 13, 12];
5176
const TITLE_MAX_REFINE_FS = 120;
5277

78+
/**
79+
* CJK and other full-width characters render at roughly 1em while Latin
80+
* letters average around the given `em` fraction. Count effective character
81+
* units so width estimation works for mixed-script titles (e.g. Japanese).
82+
*/
83+
function effectiveCharCount(line: string, em: number): number {
84+
let units = 0;
85+
for (const ch of line) {
86+
const cp = ch.codePointAt(0)!;
87+
if (isCjkOrFullWidth(cp)) {
88+
units += 1.0 / em;
89+
} else {
90+
units += 1;
91+
}
92+
}
93+
return units;
94+
}
95+
96+
function isCjkOrFullWidth(cp: number): boolean {
97+
return (
98+
(cp >= 0x2e80 && cp <= 0x9fff) || // CJK radicals, kangxi, ideographs
99+
(cp >= 0xf900 && cp <= 0xfaff) || // CJK compatibility ideographs
100+
(cp >= 0xfe30 && cp <= 0xfe4f) || // CJK compatibility forms
101+
(cp >= 0xff01 && cp <= 0xff60) || // fullwidth Latin + halfwidth forms start
102+
(cp >= 0xffe0 && cp <= 0xffe6) || // fullwidth signs
103+
(cp >= 0x20000 && cp <= 0x2fa1f) || // CJK unified ext B–F, compat supplement
104+
(cp >= 0x3000 && cp <= 0x303f) || // CJK symbols and punctuation
105+
(cp >= 0x3040 && cp <= 0x309f) || // Hiragana
106+
(cp >= 0x30a0 && cp <= 0x30ff) || // Katakana
107+
(cp >= 0x31f0 && cp <= 0x31ff) || // Katakana phonetic extensions
108+
(cp >= 0xac00 && cp <= 0xd7af) // Hangul syllables
109+
);
110+
}
111+
112+
function hasCjk(text: string): boolean {
113+
for (const ch of text) {
114+
if (isCjkOrFullWidth(ch.codePointAt(0)!)) return true;
115+
}
116+
return false;
117+
}
118+
53119
function approxLineWidthPx(line: string, fontSize: number, em: number): number {
54-
return line.length * fontSize * em;
120+
return effectiveCharCount(line, em) * fontSize * em;
55121
}
56122

57123
/** Two lines using an approximate pixel budget (never wider than the panel). */
@@ -61,18 +127,25 @@ function splitTwoLinesByWidth(
61127
innerW: number
62128
): string[] | null {
63129
const budget = innerW;
64-
const words = title.trim().split(/\s+/).filter(Boolean);
130+
const cjk = hasCjk(title);
131+
const words = cjk
132+
? tokenize(title)
133+
: title.trim().split(/\s+/).filter(Boolean);
65134
if (words.length <= 1) return null;
135+
const join = cjk ? joinTokens : (t: string[]) => t.join(" ");
66136
let best: string[] | null = null;
67137
let bestImbalance = Infinity;
68138
for (let cut = 1; cut < words.length; cut++) {
69-
const l1 = words.slice(0, cut).join(" ");
70-
const l2 = words.slice(cut).join(" ");
139+
const l1 = join(words.slice(0, cut));
140+
const l2 = join(words.slice(cut));
71141
if (
72142
approxLineWidthPx(l1, fontSize, ANALOG_CHAR_EM) <= budget &&
73143
approxLineWidthPx(l2, fontSize, ANALOG_CHAR_EM) <= budget
74144
) {
75-
const imbalance = Math.abs(l1.length - l2.length);
145+
const imbalance = Math.abs(
146+
approxLineWidthPx(l1, fontSize, ANALOG_CHAR_EM) -
147+
approxLineWidthPx(l2, fontSize, ANALOG_CHAR_EM)
148+
);
76149
if (imbalance < bestImbalance) {
77150
bestImbalance = imbalance;
78151
best = [l1, l2];
@@ -83,13 +156,17 @@ function splitTwoLinesByWidth(
83156
}
84157

85158
function splitTwoLines(title: string, maxCharsPerLine: number): string[] | null {
86-
const words = title.trim().split(/\s+/).filter(Boolean);
159+
const cjk = hasCjk(title);
160+
const words = cjk
161+
? tokenize(title)
162+
: title.trim().split(/\s+/).filter(Boolean);
87163
if (words.length <= 1) return null;
164+
const join = cjk ? joinTokens : (t: string[]) => t.join(" ");
88165
let best: string[] | null = null;
89166
let bestImbalance = Infinity;
90167
for (let cut = 1; cut < words.length; cut++) {
91-
const l1 = words.slice(0, cut).join(" ");
92-
const l2 = words.slice(cut).join(" ");
168+
const l1 = join(words.slice(0, cut));
169+
const l2 = join(words.slice(cut));
93170
if (l1.length <= maxCharsPerLine && l2.length <= maxCharsPerLine) {
94171
const imbalance = Math.abs(l1.length - l2.length);
95172
if (imbalance < bestImbalance) {
@@ -124,26 +201,49 @@ function titleTextBudgetWidthPx(innerW: number): number {
124201
return Math.max(40, innerW - TITLE_SPAN_H_PADDING_X) * TITLE_TEXT_LINE_FRAC;
125202
}
126203

204+
/**
205+
* Join tokens back into display text: CJK tokens are adjacent without spaces;
206+
* Latin tokens are separated by spaces.
207+
*/
208+
function joinTokens(tokens: string[]): string {
209+
if (tokens.length === 0) return "";
210+
let result = tokens[0];
211+
for (let i = 1; i < tokens.length; i++) {
212+
const prevCjk = hasCjk(tokens[i - 1]);
213+
const curCjk = hasCjk(tokens[i]);
214+
if (prevCjk && curCjk) {
215+
result += tokens[i];
216+
} else {
217+
result += " " + tokens[i];
218+
}
219+
}
220+
return result;
221+
}
222+
127223
/**
128224
* Pack words into rows: each row is the longest prefix that still fits the text budget.
129225
* This matches one yellow row = one visual line (no `wrapWords` char cap that then soft-wraps in Satori).
226+
* Uses tokenize() for CJK-aware splitting so characters can wrap mid-"word".
130227
*/
131228
function greedyWordsToTitleRows(
132229
title: string,
133230
fontSize: number,
134231
innerW: number
135232
): string[] {
136-
const words = title.trim().split(/\s+/).filter(Boolean);
137-
if (words.length === 0) {
233+
const tokens = hasCjk(title)
234+
? tokenize(title)
235+
: title.trim().split(/\s+/).filter(Boolean);
236+
if (tokens.length === 0) {
138237
return [""];
139238
}
239+
const join = hasCjk(title) ? joinTokens : (t: string[]) => t.join(" ");
140240
const budget = titleTextBudgetWidthPx(innerW);
141241
const rows: string[] = [];
142242
let start = 0;
143-
while (start < words.length) {
243+
while (start < tokens.length) {
144244
let end = start;
145-
for (let j = start + 1; j <= words.length; j++) {
146-
const candidate = words.slice(start, j).join(" ");
245+
for (let j = start + 1; j <= tokens.length; j++) {
246+
const candidate = join(tokens.slice(start, j));
147247
if (
148248
approxLineWidthPx(candidate, fontSize, TITLE_LONG_LINE_EM) *
149249
TITLE_RENDER_SAFETY <=
@@ -155,10 +255,10 @@ function greedyWordsToTitleRows(
155255
}
156256
}
157257
if (end === start) {
158-
rows.push(words[start]);
258+
rows.push(tokens[start]);
159259
start += 1;
160260
} else {
161-
rows.push(words.slice(start, end).join(" "));
261+
rows.push(join(tokens.slice(start, end)));
162262
start = end;
163263
}
164264
}
@@ -237,7 +337,11 @@ function splitTitleIntoBalancedLines(
237337
innerW: number,
238338
targetLines: number
239339
): string[] | null {
240-
const words = title.trim().split(/\s+/).filter(Boolean);
340+
const cjk = hasCjk(title);
341+
const words = cjk
342+
? tokenize(title)
343+
: title.trim().split(/\s+/).filter(Boolean);
344+
const join = cjk ? joinTokens : (t: string[]) => t.join(" ");
241345
const n = words.length;
242346
if (targetLines < 1 || targetLines > n) return null;
243347
const budget = titleTextBudgetWidthPx(innerW);
@@ -248,7 +352,7 @@ function splitTitleIntoBalancedLines(
248352
for (let i = 0; i < n; i++) {
249353
let line = "";
250354
for (let j = i; j < n; j++) {
251-
line = line ? `${line} ${words[j]}` : words[j];
355+
line = join(words.slice(i, j + 1));
252356
const w =
253357
approxLineWidthPx(line, fontSize, TITLE_LONG_LINE_EM) *
254358
TITLE_RENDER_SAFETY;
@@ -302,7 +406,7 @@ function splitTitleIntoBalancedLines(
302406
for (let k = targetLines; k >= 1; k--) {
303407
const start = prev[k][end];
304408
if (start < 0) return null;
305-
out.push(words.slice(start, end).join(" "));
409+
out.push(join(words.slice(start, end)));
306410
end = start;
307411
}
308412
out.reverse();
@@ -385,6 +489,7 @@ function fitTitleLayoutLongAtLineCount(
385489
function isLongTitle(title: string): boolean {
386490
const t = title.trim();
387491
if (t.length > 105) return true;
492+
if (hasCjk(t) && effectiveCharCount(t, ANALOG_CHAR_EM) > 105) return true;
388493
const words = t.split(/\s+/).filter(Boolean);
389494
return words.length > 14;
390495
}
@@ -393,6 +498,7 @@ function isLongTitle(title: string): boolean {
393498
function isShortTitle(title: string): boolean {
394499
const t = title.trim();
395500
if (!t) return false;
501+
if (hasCjk(t)) return false;
396502
const words = t.split(/\s+/).filter(Boolean);
397503
return words.length <= 3 && t.length <= 36;
398504
}

lib/mdx-page.ts

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,55 @@ export async function loadPage(
4040
}
4141
/* eslint-enable @typescript-eslint/no-explicit-any */
4242

43+
/**
44+
* Titles that are too generic to stand alone in an OG image (no parent context).
45+
* When one of these is the page title and there is a slug parent segment or
46+
* section title available, we enrich it automatically for the OG card.
47+
*/
48+
const GENERIC_TITLES = new Set([
49+
"overview",
50+
"get started",
51+
"concepts",
52+
"core concepts",
53+
"data model",
54+
"troubleshooting and faq",
55+
"troubleshooting & faq",
56+
"mcp server",
57+
]);
58+
59+
const SLUG_WORD_OVERRIDES: Record<string, string> = {
60+
api: "API",
61+
sdk: "SDK",
62+
faq: "FAQ",
63+
llm: "LLM",
64+
mcp: "MCP",
65+
ui: "UI",
66+
};
67+
68+
function slugSegmentToTitle(segment: string): string {
69+
return segment
70+
.split("-")
71+
.map((w) => SLUG_WORD_OVERRIDES[w.toLowerCase()] ?? w.charAt(0).toUpperCase() + w.slice(1))
72+
.join(" ");
73+
}
74+
75+
function enrichOgTitle(title: string, slug: string[], sectionTitle: string): string {
76+
const lower = title.toLowerCase().trim();
77+
if (!GENERIC_TITLES.has(lower)) return title;
78+
79+
let context: string;
80+
if (slug.length >= 2) {
81+
context = slugSegmentToTitle(slug[slug.length - 2]);
82+
} else if (slug.length === 0) {
83+
context = "Langfuse";
84+
} else {
85+
context = sectionTitle;
86+
}
87+
88+
if (lower === "get started") return `Get Started with ${context}`;
89+
return `${context} ${title}`;
90+
}
91+
4392
/**
4493
* Builds Next.js Metadata for a section page.
4594
*
@@ -61,8 +110,9 @@ export function buildSectionMetadata(
61110
const canonicalUrl =
62111
pageData.canonical ?? opts?.canonicalFallback ?? buildPageUrl(pagePath);
63112
const seoTitle = pageData.seoTitle || page.data.title;
113+
const ogTitle = pageData.seoTitle ? seoTitle : enrichOgTitle(seoTitle, slug, sectionTitle);
64114
const ogImage = buildOgImageUrl({
65-
title: seoTitle,
115+
title: ogTitle,
66116
description: page.data.description,
67117
section: sectionTitle,
68118
staticOgImage: pageData.ogImage,

0 commit comments

Comments
 (0)