From 25cb74084cc3bef5d9ffd38e6c76e2fd053557d9 Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Wed, 25 Jun 2025 15:58:53 -0400 Subject: [PATCH 1/3] backwards updates from fulltext components modified: adsingestschema/Document.json modified: adsingestschema/RecordData.json --- adsingestschema/Document.json | 13 +++++++++++-- adsingestschema/RecordData.json | 25 ++++++++++++++++++++----- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/adsingestschema/Document.json b/adsingestschema/Document.json index 9bfc229..65d900f 100644 --- a/adsingestschema/Document.json +++ b/adsingestschema/Document.json @@ -1,5 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://raw.githubusercontent.com/adsabs/ingest_data_model/refs/heads/main/adsingestschema/Document.json", "title": "Document", "description": "Data schema for importing metadata into ADS pipeline", "type": "object", @@ -55,10 +56,18 @@ } }, "title": { - "$ref": "./Title.json" + "type": "array", + "description": "Array of Title.json objects for multilingual support", + "items": { + "$ref": "./Title.json" + } }, "subtitle": { - "$ref": "./Title.json" + "type": "array", + "description": "Array of Title.json objects for multilingual support", + "items": { + "$ref": "./Title.json" + } }, "abstract": { "type": "object", diff --git a/adsingestschema/RecordData.json b/adsingestschema/RecordData.json index c0807cc..bec46a1 100644 --- a/adsingestschema/RecordData.json +++ b/adsingestschema/RecordData.json @@ -7,8 +7,8 @@ "description": "Timestamp for when the metadata was harvested (e.g. file created timestamp)", "type": "string" }, - "parsedTime": { - "description": "Timestamp for when parsing commenced.", + "dataType": { + "description": "I don't know", "type": "string" }, "loadType": { @@ -41,18 +41,33 @@ "description": "If loadtype is fromFile, path to file; if fromURL, it's a URL", "type": "string" }, + "masterRecord": { + "description": "Canonical record identifier with which this object is associated", + "type": "string" + }, + "parsedTime": { + "description": "Timestamp for when parsing commenced.", + "type": "string" + }, "recordOrigin": { "description": "Provider of record (e.g. Publisher, STI, arXiv)", "type": "string" + }, + "UUID": { + "description": "Internal identifier UUID for this object", + "type": "string" } }, "required": [ "createdTime", - "parsedTime", - "loadType", + "dataType", "loadFormat", "loadLocation", - "recordOrigin" + "loadType", + "masterRecord", + "parsedTime", + "recordOrigin", + "UUID" ], "additionalProperties": false } From e166df1585b2ea50606f41e9cd85e5d9c23ebd96 Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Mon, 11 Aug 2025 08:55:12 -0400 Subject: [PATCH 2/3] updates validation examples to match schema updates --- test_data/gcnc.json | 19 ++++++++++--------- test_data/gcnc_fails.json | 19 ++++++++++--------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/test_data/gcnc.json b/test_data/gcnc.json index 3b97a8d..00cfbd2 100644 --- a/test_data/gcnc.json +++ b/test_data/gcnc.json @@ -1,18 +1,23 @@ { "recordData": { "createdTime": "2021-08-30Z12:00:00", - "parsedTime": "2021-10-02Z18:50:00", + "dataType": "", "loadType": "fromFile", "loadFormat": "Text", "loadLocation": "./real_data/14500.gcn3", - "recordOrigin": "Publisher" + "masterRecord": "", + "parsedTime": "2021-10-02Z18:50:00", + "recordOrigin": "Publisher", + "UUID": "SciX:0123-abcd-6789" }, "pubDate": { "electrDate": "2013-04-29" }, - "title": { - "textEnglish": "GRB 100728A: GROND host detection and X-shooter redshift" - }, + "title": [ + { + "textEnglish": "GRB 100728A: GROND host detection and X-shooter redshift" + } + ], "authors": [ { "name": { @@ -61,10 +66,6 @@ "abstract": { "textEnglish": "TITLE: GCN CIRCULAR\nNUMBER: 14500\nSUBJECT: GRB 100728A: GROND host detection and X-shooter redshift\nDATE: 13/04/29 11:32:33 GMT\nFROM: Thomas Kruehler at Dark Cosmology Center \n\nT. Kruehler (DARK), J. Greiner (MPE) and D. A. Kann (TLS) report:\n\nWe performed deep observations of the field of GRB 100728A\nsimultaneously in g'r'i'z'JHK with GROND (Greiner et al. 2008,\nPASP 120, 405). GRB 100728A triggered Swift (Cannizzo et al.,\nGCN 11004), GBM (von Kienlin, GCN 11006), Konus-Wind (Golenetskii\net al., GCN 11021) and Suzaku WAM (Tsai et al., GCN 11037).\n\nOur late observations were performed on 2010-11-10, 105 days\nafter the trigger, and consisted of dithered images with a total of\n4427 s exposure in g'r'i'z' and 3600 s in JHK under clear sky\nconditions and a seeing of 0.8\".\n\nInside the enhanced X-ray error-circle\n(http://www.swift.ac.uk/xrt_positions), we detect an extended\nobject with an r'-band AB magnitude of r' = 25.0 +/- 0.2.\n\nIts barycentric coordinates are:\n\nRA (J2000) = 05:55:01.98\nDec. (J2000) = -15:15:19.7\n\nwith uncertainties of 0.3\" in each coordinate.\nRegistering the early afterglow images (Olivares et al., GCN 11020)\nagainst the late host frame, we derive an offset between galaxy\ncenter and afterglow of approximately 0.4\".\n\nA spectrum of this galaxy was taken on 2012-11-15 with the ESO VLT\nUT2 equipped with the X-shooter spectrograph, covering the\nwavelength range 3000-25000 AA. The seeing was 1.2\" and the\ntotal exposure of our spectrum was 2400 s. In the VIS and NIR arm,\nwe detect emission lines, which we interpret as being due to\n[O II](3729) and Halpha at a common redshift of z = 1.567.\n\nThe bright X-ray emission (Evans et al., GCN 11014), coupled with the\nfaint and red optical/NIR afterglow (Olivares et al., GCN 11020) is\ncharacteristic of dust-extinguished events. Fitting the available\nafterglow data with synchrotron models, the visual extinction along\nthe GRB sightline is constrained to 1.5 mag < A_V < 3 mag at z = 1.567.\n\n" }, - "fulltext": { - "language": "en", - "body": "TITLE: GCN CIRCULAR\nNUMBER: 14500\nSUBJECT: GRB 100728A: GROND host detection and X-shooter redshift\nDATE: 13/04/29 11:32:33 GMT\nFROM: Thomas Kruehler at Dark Cosmology Center \n\nT. Kruehler (DARK), J. Greiner (MPE) and D. A. Kann (TLS) report:\n\nWe performed deep observations of the field of GRB 100728A\nsimultaneously in g'r'i'z'JHK with GROND (Greiner et al. 2008,\nPASP 120, 405). GRB 100728A triggered Swift (Cannizzo et al.,\nGCN 11004), GBM (von Kienlin, GCN 11006), Konus-Wind (Golenetskii\net al., GCN 11021) and Suzaku WAM (Tsai et al., GCN 11037).\n\nOur late observations were performed on 2010-11-10, 105 days\nafter the trigger, and consisted of dithered images with a total of\n4427 s exposure in g'r'i'z' and 3600 s in JHK under clear sky\nconditions and a seeing of 0.8\".\n\nInside the enhanced X-ray error-circle\n(http://www.swift.ac.uk/xrt_positions), we detect an extended\nobject with an r'-band AB magnitude of r' = 25.0 +/- 0.2.\n\nIts barycentric coordinates are:\n\nRA (J2000) = 05:55:01.98\nDec. (J2000) = -15:15:19.7\n\nwith uncertainties of 0.3\" in each coordinate.\nRegistering the early afterglow images (Olivares et al., GCN 11020)\nagainst the late host frame, we derive an offset between galaxy\ncenter and afterglow of approximately 0.4\".\n\nA spectrum of this galaxy was taken on 2012-11-15 with the ESO VLT\nUT2 equipped with the X-shooter spectrograph, covering the\nwavelength range 3000-25000 AA. The seeing was 1.2\" and the\ntotal exposure of our spectrum was 2400 s. In the VIS and NIR arm,\nwe detect emission lines, which we interpret as being due to\n[O II](3729) and Halpha at a common redshift of z = 1.567.\n\nThe bright X-ray emission (Evans et al., GCN 11014), coupled with the\nfaint and red optical/NIR afterglow (Olivares et al., GCN 11020) is\ncharacteristic of dust-extinguished events. Fitting the available\nafterglow data with synchrotron models, the visual extinction along\nthe GRB sightline is constrained to 1.5 mag < A_V < 3 mag at z = 1.567.\n\n" - }, "astronomicalObjects": [ "GRB 100728A" ], diff --git a/test_data/gcnc_fails.json b/test_data/gcnc_fails.json index 65820aa..aff8d23 100644 --- a/test_data/gcnc_fails.json +++ b/test_data/gcnc_fails.json @@ -1,18 +1,23 @@ { "recordData": { "createdTime": "2021-08-30Z12:00:00", - "parsedTime": "2021-10-02Z18:50:00", + "dataType": "", "loadType": "fromFile", "loadFormat": "Text", "loadLocation": "./real_data/14500.gcn3", - "recordOrigin": "Publisher" + "masterRecord": "", + "parsedTime": "2021-10-02Z18:50:00", + "recordOrigin": "Publisher", + "UUID": "SciX:0123-abcd-6789" }, "pubDate": { "electrDate": "2013-04-29" }, - "title": { - "textEnglish": "GRB 100728A: GROND host detection and X-shooter redshift" - }, + "title": [ + { + "textEnglish": "GRB 100728A: GROND host detection and X-shooter redshift" + } + ], "authors": [ { "name": { @@ -61,10 +66,6 @@ "abstract": { "textEnglish": "TITLE: GCN CIRCULAR\nNUMBER: 14500\nSUBJECT: GRB 100728A: GROND host detection and X-shooter redshift\nDATE: 13/04/29 11:32:33 GMT\nFROM: Thomas Kruehler at Dark Cosmology Center \n\nT. Kruehler (DARK), J. Greiner (MPE) and D. A. Kann (TLS) report:\n\nWe performed deep observations of the field of GRB 100728A\nsimultaneously in g'r'i'z'JHK with GROND (Greiner et al. 2008,\nPASP 120, 405). GRB 100728A triggered Swift (Cannizzo et al.,\nGCN 11004), GBM (von Kienlin, GCN 11006), Konus-Wind (Golenetskii\net al., GCN 11021) and Suzaku WAM (Tsai et al., GCN 11037).\n\nOur late observations were performed on 2010-11-10, 105 days\nafter the trigger, and consisted of dithered images with a total of\n4427 s exposure in g'r'i'z' and 3600 s in JHK under clear sky\nconditions and a seeing of 0.8\".\n\nInside the enhanced X-ray error-circle\n(http://www.swift.ac.uk/xrt_positions), we detect an extended\nobject with an r'-band AB magnitude of r' = 25.0 +/- 0.2.\n\nIts barycentric coordinates are:\n\nRA (J2000) = 05:55:01.98\nDec. (J2000) = -15:15:19.7\n\nwith uncertainties of 0.3\" in each coordinate.\nRegistering the early afterglow images (Olivares et al., GCN 11020)\nagainst the late host frame, we derive an offset between galaxy\ncenter and afterglow of approximately 0.4\".\n\nA spectrum of this galaxy was taken on 2012-11-15 with the ESO VLT\nUT2 equipped with the X-shooter spectrograph, covering the\nwavelength range 3000-25000 AA. The seeing was 1.2\" and the\ntotal exposure of our spectrum was 2400 s. In the VIS and NIR arm,\nwe detect emission lines, which we interpret as being due to\n[O II](3729) and Halpha at a common redshift of z = 1.567.\n\nThe bright X-ray emission (Evans et al., GCN 11014), coupled with the\nfaint and red optical/NIR afterglow (Olivares et al., GCN 11020) is\ncharacteristic of dust-extinguished events. Fitting the available\nafterglow data with synchrotron models, the visual extinction along\nthe GRB sightline is constrained to 1.5 mag < A_V < 3 mag at z = 1.567.\n\n" }, - "fulltext": { - "language": "en", - "body": "TITLE: GCN CIRCULAR\nNUMBER: 14500\nSUBJECT: GRB 100728A: GROND host detection and X-shooter redshift\nDATE: 13/04/29 11:32:33 GMT\nFROM: Thomas Kruehler at Dark Cosmology Center \n\nT. Kruehler (DARK), J. Greiner (MPE) and D. A. Kann (TLS) report:\n\nWe performed deep observations of the field of GRB 100728A\nsimultaneously in g'r'i'z'JHK with GROND (Greiner et al. 2008,\nPASP 120, 405). GRB 100728A triggered Swift (Cannizzo et al.,\nGCN 11004), GBM (von Kienlin, GCN 11006), Konus-Wind (Golenetskii\net al., GCN 11021) and Suzaku WAM (Tsai et al., GCN 11037).\n\nOur late observations were performed on 2010-11-10, 105 days\nafter the trigger, and consisted of dithered images with a total of\n4427 s exposure in g'r'i'z' and 3600 s in JHK under clear sky\nconditions and a seeing of 0.8\".\n\nInside the enhanced X-ray error-circle\n(http://www.swift.ac.uk/xrt_positions), we detect an extended\nobject with an r'-band AB magnitude of r' = 25.0 +/- 0.2.\n\nIts barycentric coordinates are:\n\nRA (J2000) = 05:55:01.98\nDec. (J2000) = -15:15:19.7\n\nwith uncertainties of 0.3\" in each coordinate.\nRegistering the early afterglow images (Olivares et al., GCN 11020)\nagainst the late host frame, we derive an offset between galaxy\ncenter and afterglow of approximately 0.4\".\n\nA spectrum of this galaxy was taken on 2012-11-15 with the ESO VLT\nUT2 equipped with the X-shooter spectrograph, covering the\nwavelength range 3000-25000 AA. The seeing was 1.2\" and the\ntotal exposure of our spectrum was 2400 s. In the VIS and NIR arm,\nwe detect emission lines, which we interpret as being due to\n[O II](3729) and Halpha at a common redshift of z = 1.567.\n\nThe bright X-ray emission (Evans et al., GCN 11014), coupled with the\nfaint and red optical/NIR afterglow (Olivares et al., GCN 11020) is\ncharacteristic of dust-extinguished events. Fitting the available\nafterglow data with synchrotron models, the visual extinction along\nthe GRB sightline is constrained to 1.5 mag < A_V < 3 mag at z = 1.567.\n\n" - }, "astronomicalObjects": [ "GRB 100728A" ], From 93920cc8985c9cbdfd6963e8cb2b35574726c409 Mon Sep 17 00:00:00 2001 From: Matthew Templeton Date: Wed, 10 Sep 2025 14:26:19 -0400 Subject: [PATCH 3/3] Update schema location modified: adsingestschema/Document.json --- adsingestschema/Document.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adsingestschema/Document.json b/adsingestschema/Document.json index 65d900f..efefd13 100644 --- a/adsingestschema/Document.json +++ b/adsingestschema/Document.json @@ -1,6 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://raw.githubusercontent.com/adsabs/ingest_data_model/refs/heads/main/adsingestschema/Document.json", + "$id": "https://raw.githubusercontent.com/seasidesparrow/ingest_data_model/refs/tags/v1.0.13/adsingestschema/Document.json", "title": "Document", "description": "Data schema for importing metadata into ADS pipeline", "type": "object",