From f86140a1b6e464dd9bed24018d3e7a6581364d45 Mon Sep 17 00:00:00 2001 From: Liam Lloyd-Tucker Date: Fri, 12 Jun 2026 17:28:34 -0700 Subject: [PATCH] Allow single IPTC keyword in metadata Currently, we expect the IPTC:Keyword field in metadata files to be an object containing potentially multiple keywords. However, in some files it is simply a single keyword expressed as a string. This commit updates the metadata attacher lambda to handle that case. --- .../src/fixtures/single_keyword_mets.xml | 25 +++++++++ packages/metadata_attacher/src/index.test.ts | 51 +++++++++++++++++++ packages/metadata_attacher/src/index.ts | 7 ++- packages/metadata_attacher/src/models.ts | 1 + packages/metadata_attacher/src/validators.ts | 15 ++++-- 5 files changed, 92 insertions(+), 7 deletions(-) create mode 100644 packages/metadata_attacher/src/fixtures/single_keyword_mets.xml diff --git a/packages/metadata_attacher/src/fixtures/single_keyword_mets.xml b/packages/metadata_attacher/src/fixtures/single_keyword_mets.xml new file mode 100644 index 00000000..b5670a16 --- /dev/null +++ b/packages/metadata_attacher/src/fixtures/single_keyword_mets.xml @@ -0,0 +1,25 @@ + + + + + + + + objects/710a1def-caf8-48f2-8eee-0848b4cfda10 + + + + + 12.40 + image/jpeg + nature + + + + + + + + + + diff --git a/packages/metadata_attacher/src/index.test.ts b/packages/metadata_attacher/src/index.test.ts index f601bf99..fb33e76e 100644 --- a/packages/metadata_attacher/src/index.test.ts +++ b/packages/metadata_attacher/src/index.test.ts @@ -675,6 +675,57 @@ describe("handler", () => { ); }); + test("should extract a single IPTC keyword provided as a string", async () => { + const metsContent = await loadMetsFile("single_keyword_mets.xml"); + mockS3Send.mockResolvedValue({ + Body: { + transformToString: jest.fn().mockResolvedValue(metsContent), + }, + }); + + const event = { + Records: [ + { + messageId: "1", + receiptHandle: "1", + body: JSON.stringify({ + Message: JSON.stringify({ + Records: [ + { + s3: { + bucket: { + name: "test-bucket", + }, + object: { + key: "access_copies/53f9/8c3d/a29e/4fbf/8a4a/4fd9/991e/313d/1_upload-4a64ba7c-ceac-4547-ac13-c487b2711d5a/METS.4a64ba7c-ceac-4547-ac13-c487b2711d5a.xml", + }, + }, + }, + ], + }), + }), + attributes: { + ApproximateReceiveCount: "1", + SentTimestamp: "1", + SenderId: "1", + ApproximateFirstReceiveTimestamp: "1", + }, + messageAttributes: {}, + md5OfBody: "1", + eventSource: "1", + eventSourceARN: "1", + awsRegion: "1", + }, + ], + }; + + await handler(event, mock(), jest.fn()); + + const recordMetadata = await getRecordMetadata("1"); + expect(recordMetadata).toBeDefined(); + expect(recordMetadata?.tags).toEqual(["nature"]); + }); + test("should handle database error gracefully", async () => { const metsContent = await loadMetsFile("sample_mets.xml"); mockS3Send.mockResolvedValue({ diff --git a/packages/metadata_attacher/src/index.ts b/packages/metadata_attacher/src/index.ts index b7ff6b3f..eb92854a 100644 --- a/packages/metadata_attacher/src/index.ts +++ b/packages/metadata_attacher/src/index.ts @@ -127,10 +127,13 @@ const getPhotoMetadata = ( rdfMetadata["ExifIFD:DateTimeOriginal"], rdfMetadata["ExifIFD:OffsetTimeOriginal"], ); + const { "IPTC:Keywords": iptcKeywords } = rdfMetadata; const tags = - rdfMetadata["IPTC:Keywords"] === undefined + iptcKeywords === undefined ? undefined - : rdfMetadata["IPTC:Keywords"]["rdf:Bag"]["rdf:li"]; + : typeof iptcKeywords === "string" + ? [iptcKeywords] + : iptcKeywords["rdf:Bag"]["rdf:li"]; return { creationTime: diff --git a/packages/metadata_attacher/src/models.ts b/packages/metadata_attacher/src/models.ts index 516f7717..543e93af 100644 --- a/packages/metadata_attacher/src/models.ts +++ b/packages/metadata_attacher/src/models.ts @@ -33,6 +33,7 @@ export interface RdfMetadata { "IPTC:ObjectName": string | undefined; "IPTC:Caption-Abstract": string | undefined; "IPTC:Keywords": + | string | { "rdf:Bag": { "rdf:li": string[]; diff --git a/packages/metadata_attacher/src/validators.ts b/packages/metadata_attacher/src/validators.ts index 01aaec1e..0e8b838c 100644 --- a/packages/metadata_attacher/src/validators.ts +++ b/packages/metadata_attacher/src/validators.ts @@ -6,11 +6,16 @@ const rdfMetadataSchema = Joi.object({ "File:MIMEType": Joi.string().required(), "IPTC:ObjectName": Joi.string().optional().empty(""), "IPTC:Caption-Abstract": Joi.string().optional().empty(""), - "IPTC:Keywords": Joi.object({ - "rdf:Bag": Joi.object({ - "rdf:li": Joi.array().items(Joi.string()).required(), - }).required(), - }).optional(), + "IPTC:Keywords": Joi.alternatives() + .try( + Joi.string(), + Joi.object({ + "rdf:Bag": Joi.object({ + "rdf:li": Joi.array().items(Joi.string()).required(), + }).required(), + }), + ) + .optional(), "ExifIFD:Title": Joi.string().optional().empty(""), "ExifIFD:UserComment": Joi.string().optional().empty(""), "ExifIFD:Comments": Joi.string().optional().empty(""),