|
| 1 | +const { dcApiEndpoint } = require("../../../environment"); |
| 2 | +const { getWorkFileSets } = require("../../opensearch"); |
| 3 | +const { |
| 4 | + getTranscriptionContent, |
| 5 | + normalizeLanguages, |
| 6 | +} = require("./presentation-api/items"); |
| 7 | + |
| 8 | +function extractSnippet(content, q, contextChars = 100) { |
| 9 | + const idx = content.toLowerCase().indexOf(q.toLowerCase()); |
| 10 | + if (idx === -1) return null; |
| 11 | + const start = Math.max(0, idx - contextChars); |
| 12 | + const end = Math.min(content.length, idx + q.length + contextChars); |
| 13 | + let snippet = content.slice(start, end).trim(); |
| 14 | + if (start > 0) snippet = "..." + snippet; |
| 15 | + if (end < content.length) snippet = snippet + "..."; |
| 16 | + return snippet; |
| 17 | +} |
| 18 | + |
| 19 | +function buildSearchAnnotationBody(annotation, snippet) { |
| 20 | + const body = { |
| 21 | + type: "TextualBody", |
| 22 | + value: snippet, |
| 23 | + format: "text/plain", |
| 24 | + }; |
| 25 | + const languages = normalizeLanguages(annotation.language); |
| 26 | + if (languages.length === 1) { |
| 27 | + body.language = languages[0]; |
| 28 | + } else if (languages.length > 1) { |
| 29 | + body.language = languages; |
| 30 | + } |
| 31 | + return body; |
| 32 | +} |
| 33 | + |
| 34 | +async function transform(workId, q, opts = {}) { |
| 35 | + const { allowPrivate = false, allowUnpublished = false } = opts; |
| 36 | + |
| 37 | + const manifestId = `${dcApiEndpoint()}/works/${workId}?as=iiif`; |
| 38 | + const searchId = `${dcApiEndpoint()}/works/${workId}/search?as=iiif&q=${encodeURIComponent(q)}`; |
| 39 | + |
| 40 | + const response = await getWorkFileSets(workId, { |
| 41 | + allowPrivate, |
| 42 | + allowUnpublished, |
| 43 | + annotationsQuery: q, |
| 44 | + role: "Access", |
| 45 | + source: ["id", "annotations", "group_with"], |
| 46 | + sortBy: "rank", |
| 47 | + }); |
| 48 | + |
| 49 | + const fileSets = |
| 50 | + response.statusCode === 200 |
| 51 | + ? JSON.parse(response.body).hits.hits.map((h) => h._source) |
| 52 | + : []; |
| 53 | + |
| 54 | + // Replicate manifest.js grouping: ungrouped file sets use their own id as key |
| 55 | + const fileSetGroups = {}; |
| 56 | + fileSets.forEach((fs) => { |
| 57 | + const key = fs.group_with || fs.id; |
| 58 | + if (!fileSetGroups[key]) fileSetGroups[key] = []; |
| 59 | + fileSetGroups[key].push(fs); |
| 60 | + }); |
| 61 | + |
| 62 | + const items = []; |
| 63 | + |
| 64 | + Object.entries(fileSetGroups).forEach(([groupKey, groupFileSets], index) => { |
| 65 | + const canvasId = `${manifestId}/canvas/${index}`; |
| 66 | + |
| 67 | + // Primary file set is the one whose id matches the group key (same as manifest.js) |
| 68 | + const primary = |
| 69 | + groupFileSets.find((fs) => fs.id === groupKey) || groupFileSets[0]; |
| 70 | + if (!primary?.annotations) return; |
| 71 | + |
| 72 | + primary.annotations |
| 73 | + .filter((ann) => ann.type === "transcription") |
| 74 | + .forEach((ann) => { |
| 75 | + const content = getTranscriptionContent(ann); |
| 76 | + const snippet = extractSnippet(content, q); |
| 77 | + if (!snippet) return; |
| 78 | + |
| 79 | + items.push({ |
| 80 | + id: `${canvasId}/annotation/${ann.id}`, |
| 81 | + type: "Annotation", |
| 82 | + motivation: "supplementing", |
| 83 | + body: buildSearchAnnotationBody(ann, snippet), |
| 84 | + target: canvasId, |
| 85 | + }); |
| 86 | + }); |
| 87 | + }); |
| 88 | + |
| 89 | + return { |
| 90 | + statusCode: 200, |
| 91 | + headers: { "content-type": "application/json" }, |
| 92 | + body: JSON.stringify({ |
| 93 | + "@context": "http://iiif.io/api/search/2/context.json", |
| 94 | + id: searchId, |
| 95 | + type: "AnnotationPage", |
| 96 | + items, |
| 97 | + }), |
| 98 | + }; |
| 99 | +} |
| 100 | + |
| 101 | +module.exports = { transform }; |
0 commit comments