Skip to content

Commit a6ecb1f

Browse files
committed
Add IIIF content search route
1 parent 21643a4 commit a6ecb1f

8 files changed

Lines changed: 355 additions & 0 deletions

File tree

api/src/api/opensearch.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ async function getWorkFileSets(workId, opts = {}) {
2727
const {
2828
allowPrivate = false,
2929
allowUnpublished = false,
30+
annotationsQuery = null,
3031
role = null,
3132
source = null,
3233
sortBy = null,
@@ -52,6 +53,9 @@ async function getWorkFileSets(workId, opts = {}) {
5253
if (role) {
5354
mustClauses.push({ term: { role: role } });
5455
}
56+
if (annotationsQuery) {
57+
mustClauses.push({ match: { "annotations.content": annotationsQuery } });
58+
}
5559

5660
const searchBody = {
5761
size: 10000,

api/src/api/response/iiif/manifest.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,12 @@ async function transform(response, options = {}) {
368368
}
369369
}
370370

371+
jsonManifest.service = [
372+
{
373+
id: `${dcApiEndpoint()}/works/${source.id}/search?as=iiif`,
374+
type: "SearchService2",
375+
},
376+
];
371377
jsonManifest.provider = [provider];
372378
jsonManifest.logo = [nulLogo];
373379
const navPlace = buildNavPlace(source);

api/src/api/response/iiif/presentation-api/items.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,10 @@ module.exports = {
157157
buildImageService,
158158
buildSupplementingAnnotation,
159159
buildTranscriptionAnnotation,
160+
getTranscriptionContent,
160161
isAltFormat,
161162
isAudioVideo,
162163
isImage,
163164
isPDF,
165+
normalizeLanguages,
164166
};
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
const { dcApiEndpoint } = require("../../../environment");
2+
const { getWorkFileSets } = require("../../opensearch");
3+
const {
4+
getTranscriptionContent,
5+
normalizeLanguages,
6+
} = require("./presentation-api/items");
7+
8+
function extractSnippet(content, q, contextChars = 100) {
9+
const idx = content.toLowerCase().indexOf(q.toLowerCase());
10+
if (idx === -1) return null;
11+
const start = Math.max(0, idx - contextChars);
12+
const end = Math.min(content.length, idx + q.length + contextChars);
13+
let snippet = content.slice(start, end).trim();
14+
if (start > 0) snippet = "..." + snippet;
15+
if (end < content.length) snippet = snippet + "...";
16+
return snippet;
17+
}
18+
19+
function buildSearchAnnotationBody(annotation, snippet) {
20+
const body = {
21+
type: "TextualBody",
22+
value: snippet,
23+
format: "text/plain",
24+
};
25+
const languages = normalizeLanguages(annotation.language);
26+
if (languages.length === 1) {
27+
body.language = languages[0];
28+
} else if (languages.length > 1) {
29+
body.language = languages;
30+
}
31+
return body;
32+
}
33+
34+
async function transform(workId, q, opts = {}) {
35+
const { allowPrivate = false, allowUnpublished = false } = opts;
36+
37+
const manifestId = `${dcApiEndpoint()}/works/${workId}?as=iiif`;
38+
const searchId = `${dcApiEndpoint()}/works/${workId}/search?as=iiif&q=${encodeURIComponent(q)}`;
39+
40+
const response = await getWorkFileSets(workId, {
41+
allowPrivate,
42+
allowUnpublished,
43+
annotationsQuery: q,
44+
role: "Access",
45+
source: ["id", "annotations", "group_with"],
46+
sortBy: "rank",
47+
});
48+
49+
const fileSets =
50+
response.statusCode === 200
51+
? JSON.parse(response.body).hits.hits.map((h) => h._source)
52+
: [];
53+
54+
// Replicate manifest.js grouping: ungrouped file sets use their own id as key
55+
const fileSetGroups = {};
56+
fileSets.forEach((fs) => {
57+
const key = fs.group_with || fs.id;
58+
if (!fileSetGroups[key]) fileSetGroups[key] = [];
59+
fileSetGroups[key].push(fs);
60+
});
61+
62+
const items = [];
63+
64+
Object.entries(fileSetGroups).forEach(([groupKey, groupFileSets], index) => {
65+
const canvasId = `${manifestId}/canvas/${index}`;
66+
67+
// Primary file set is the one whose id matches the group key (same as manifest.js)
68+
const primary =
69+
groupFileSets.find((fs) => fs.id === groupKey) || groupFileSets[0];
70+
if (!primary?.annotations) return;
71+
72+
primary.annotations
73+
.filter((ann) => ann.type === "transcription")
74+
.forEach((ann) => {
75+
const content = getTranscriptionContent(ann);
76+
const snippet = extractSnippet(content, q);
77+
if (!snippet) return;
78+
79+
items.push({
80+
id: `${canvasId}/annotation/${ann.id}`,
81+
type: "Annotation",
82+
motivation: "supplementing",
83+
body: buildSearchAnnotationBody(ann, snippet),
84+
target: canvasId,
85+
});
86+
});
87+
});
88+
89+
return {
90+
statusCode: 200,
91+
headers: { "content-type": "application/json" },
92+
body: JSON.stringify({
93+
"@context": "http://iiif.io/api/search/2/context.json",
94+
id: searchId,
95+
type: "AnnotationPage",
96+
items,
97+
}),
98+
};
99+
}
100+
101+
module.exports = { transform };
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
const { getWork } = require("../api/opensearch");
2+
const iiifSearchResponse = require("../api/response/iiif/search");
3+
const { wrap } = require("./middleware");
4+
5+
exports.handler = wrap(async (event) => {
6+
const id = event.pathParameters.id;
7+
const { as, q } = event.queryStringParameters;
8+
9+
const allowPrivate =
10+
event.userToken.isSuperUser() ||
11+
event.userToken.isReadingRoom() ||
12+
event.userToken.hasEntitlement(id);
13+
const allowUnpublished =
14+
event.userToken.isSuperUser() || event.userToken.hasEntitlement(id);
15+
16+
if (as !== "iiif" || !q?.trim()) {
17+
return {
18+
statusCode: 400,
19+
body: JSON.stringify({
20+
message: "Request must include ?as=iiif&q={query}",
21+
}),
22+
};
23+
}
24+
25+
const workResponse = await getWork(id, { allowPrivate, allowUnpublished });
26+
if (workResponse.statusCode !== 200) return workResponse;
27+
28+
return iiifSearchResponse.transform(id, q, { allowPrivate, allowUnpublished });
29+
});

api/template.yaml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,30 @@ Resources:
584584
ApiId: !Ref dcApi
585585
Path: /works/{id}/thumbnail
586586
Method: HEAD
587+
getWorkSearchFunction:
588+
Type: AWS::Serverless::Function
589+
Condition: DeployAPI
590+
Properties:
591+
Handler: handlers/get-work-search.handler
592+
Description: IIIF Search 2.0 for a Work's transcription annotations.
593+
#* Layers:
594+
#* - !Ref apiDependencies
595+
Policies:
596+
- !Ref SecretsPolicy
597+
- !Ref readIndexPolicy
598+
Events:
599+
WorkApiGet:
600+
Type: HttpApi
601+
Properties:
602+
ApiId: !Ref dcApi
603+
Path: /works/{id}/search
604+
Method: GET
605+
WorkApiHead:
606+
Type: HttpApi
607+
Properties:
608+
ApiId: !Ref dcApi
609+
Path: /works/{id}/search
610+
Method: HEAD
587611
getSimilarFunction:
588612
Type: AWS::Serverless::Function
589613
Condition: DeployAPI

api/test/integration/get-work-by-id.test.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,10 @@ describe("Retrieve work by id", () => {
9191
"http://iiif.io/api/presentation/3/context.json"
9292
);
9393
expect(resultBody.label.none[0]).to.eq("Canary Record TEST 1");
94+
expect(resultBody.service).to.deep.include({
95+
id: `${process.env.DC_API_ENDPOINT}/works/1234/search?as=iiif`,
96+
type: "SearchService2",
97+
});
9498
});
9599

96100
it("will retrieve a private, unpublished work document with an entitlement", async () => {

0 commit comments

Comments
 (0)