From 56a7bab19d98ff0568d27ceb53cf61da0c9a07b2 Mon Sep 17 00:00:00 2001 From: Filipe Lopes Date: Fri, 29 Aug 2025 06:57:42 -0300 Subject: [PATCH] docs: update match.md add info about __index required field in response, batches and search_normalized_score --- docs/source/oclapi/apireference/match.md | 210 +++++++++++++---------- 1 file changed, 119 insertions(+), 91 deletions(-) diff --git a/docs/source/oclapi/apireference/match.md b/docs/source/oclapi/apireference/match.md index 3376a90..20e4e0f 100644 --- a/docs/source/oclapi/apireference/match.md +++ b/docs/source/oclapi/apireference/match.md @@ -6,7 +6,14 @@ The `$match` endpoint allows you to find similar or matching concepts across dif `$match` API must accept POST (GET is not supported). +Note on row indexing: For every request, the API automatically adds a zero-based `__index` field to each `row` in the response. This acts as a local identifier to correlate each response item with the corresponding input item. The `__index` is always returned by the API and starts at 0. + +### Batching & Concurrency + +The API processes rows in batches. The client interface allows you to set the batch size, which is useful for APIs with longer computation times—using a lower batch size helps avoid timeouts. By default, the UI sends two concurrent batch requests at a time. For example, with 1000 rows and a batch size of 50, two batches (each with 50 rows) are sent concurrently; as soon as one finishes, the next batch is sent, maintaining exactly two in-flight requests. + ### $match Algorithm Fields + - `id` - Exact match on `concept.id` in the target repository - `name` - Keyword or semantic search on concept primary display name - `synonyms` - Keyword or semantic search on all concept names and synonyms @@ -24,90 +31,97 @@ The `$match` endpoint allows you to find similar or matching concepts across dif - `mapping:list` - Matches concepts in the target repo that share a mapping, where the input is a list of mappings for the row. ## Request + +### Request URL parameters + ``` POST /concepts/$match/ ``` -| Parameter | Type | Required | Default | Description | -|-----------|------|----------|---------|-------------| -| `verbose` | Boolean | No | `false` | More details in results (concept details) | -| `limit` | Integer | No | `1` | Number of results to be returned or page size | -| `offset` | Integer | No | `0` | Number of results to skip | -| `page` | Integer | No | `1` | Page number for paginated results | -| `includeRetired` | Boolean | No | `false` | Match against retired concepts as well | -| `bestMatch` | Boolean | No | `false` | Forces a minimum search score threshold to be applied | -| `semantic` | Boolean | No | `false` | Use LM algo for matching | -| `numCandidates` | Integer | No | `5000` | Only needed when semantic=true. Range: 1 to 5000. For more information: https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html#tune-approximate-knn-for-speed-accuracy | -| `kNearest` | Integer | No | `5` | Only needed when semantic=true. Range: 1 to 10 | +| Parameter | Type | Required | Default | Description | +| ---------------- | ------- | -------- | ------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `verbose` | Boolean | No | `false` | More details in results (concept details) | +| `limit` | Integer | No | `1` | Number of results to be returned or page size | +| `offset` | Integer | No | `0` | Number of results to skip | +| `page` | Integer | No | `1` | Page number for paginated results | +| `includeRetired` | Boolean | No | `false` | Match against retired concepts as well | +| `bestMatch` | Boolean | No | `false` | Forces a minimum search score threshold to be applied | +| `semantic` | Boolean | No | `false` | Use LM algo for matching | +| `numCandidates` | Integer | No | `5000` | Only needed when semantic=true. Range: 1 to 5000. For more information: https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html#tune-approximate-knn-for-speed-accuracy | +| `kNearest` | Integer | No | `5` | Only needed when semantic=true. Range: 1 to 10 | ### Request Body Schema -| **Code (Name)** | **Card.** | **Type** | **Definition (Description)** | -| ------------------------------ | --------- | -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `target_repo_url` | 1..1 | string | Repository URL to match against. Uses `$resolve` to identify the specific repo version. **Example:** `/orgs/CIEL/sources/CIEL/` | -| `rows` | 1..\* | list | List of concept-like key-value pairs; each row may have different fields. Only fields that are recognized by the matching algorithm are used, all other rows are ignored. **Example:** `[{"s_n":"1","name":"malaria"},{"s_n":"2","name":"blood type"}]` | -| `rows.id` | 0..1 | string | Exact match against a concept ID. *(May be removed in future versions.)* **Example Input Data:** `12`, `57`, `A01.1` | -| `rows.name` | 0..1 | string | Semantic or fuzzy search on primary display name. **Example Input Data:** `Anemia due to blood loss` | -| `rows.synonyms` | 0..1 | string | Semantic or fuzzy search across all names/synonyms. **Example Input Data:** `"Anemia, blood loss", "Anémie secondaire à une hémorragie"` | -| `rows.description` | 0..1 | string | Text search on concept descriptions. **Example Input Data:** `"Anemia due to bleeding or a hemorrhagic process"` | -| `rows.concept_class` | 0..1 | string | Match on concept class (e.g., diagnosis, symptom). **Example Input Data:** `Diagnosis`, `Symptom` | -| `rows.datatype` | 0..1 | string | Match on datatype (e.g., numeric, coded). **Example Input Data:** `Numeric`, `Coded` | -| `rows.mapping_code` | 0..1 | string | Exact match on a concept ID or mapping in the target repo version. **Example Input Data:** `D50.0`, `Z87.5`, `X59.9` | -| `rows.mapping_list` | 0..1 | string | Exact match on comma‑separated mapping list. *(In development.)* **Example Input Data:** `CIEL:1858, ICD10:DC14.Z, LOINC:5792-7` | -| `rows.same_as_map_codes` | 0..1 | string | Search only “same as” mappings. *(Deprecated.)* **Example Input Data:** `CIEL:1858, ICD10:DC14.Z, LOINC:5792-7` | -| `rows.other_map_codes` | 0..1 | string | Search all non‑“same as” mappings. *(Deprecated.)* **Example Input Data:** `CIEL:1858, ICD10:DC14.Z, LOINC:5792-7` | -| `map_config` | 0..\* | list | Optional list configuring mapping logic per row. **Example from Request Body:** see below. | -| `map_config.type` | 1..1 | code | Type of mapping: `mapping-code` or `mapping-list`. **Example:** `mapping-code`, `mapping-list` | -| `map_config.input_column` | 1..1 | string | Name of the row‑field to use. **Example:** `loinc-example`, `icd10-example`, `list example` | -| `map_config.target_source_url` | 0..1 | string | Target repo URL for `mapping-code` entries (required if type is `mapping-code`). **Example:** `/orgs/CIEL/sources/CIEL/` | -| `map_config.separator` | 0..1 | string | Separator between source name and code in `mapping-list`. **Example:** `:` | -| `map_config.delimiter` | 0..1 | string | Delimiter for multiple mappings in `mapping-list`. **Example:** `,` | -| `map_config.target_urls` | 0..1 | map | URL map of source mnemonics to repositories. Required for `mapping-list`. **Example:** `{"ICD10": "/orgs/WHO/sources/ICD-10-WHO/", "CIEL": "/orgs/CIEL/sources/CIEL/", "LOINC": "/orgs/Regenstrief/sources/LOINC/"}` | - - -## Response Format - -| **Code (Name)** | **Card.** | **Type** | **Definition (Description)** | -| ------------------------------ | --------- | -------------------- | ------- | -| _\_ | 1 | list | A list of response objects | -| row | 1 | map | The original row submitted, with no alteration | -| results | 1..* | list | Ordered list of concept candidates | -| results.url | 1 | string | | -| results.display_name | 1 | string | | -| results.id | 1 | string | | -| results.retired | 0..1 | bool | | -| results.concept_class | 0..1 | string | | -| results.datatype | 0..1 | string | | -| results.property | 0..* | list | | -| results.property.code | 1 | string | The key of the property (e.g. concept_class) | -| results.property.valueCode | 0..1 | string | | -| results.property.valueCoding | 0..1 | ... | | -| results.property.valueString | 0..1 | string | | -| results.property.valueInteger | 0..1 | int | | -| results.property.valueBoolean | 0..1 | bool | | -| results.property.valueDateTime | 0..1 | DateTime | | -| results.property.valueDecimal | 0..1 | decimal | | -| results.extras | 0..1 | map | | -| results.search_meta.search_score | 1 | decimal | | -| results.search_meta.search_highlight | 0..1 | map | | -| | | | | -| results.search_meta.match_type | 0..1 | string | | -| results.source | 0..1 | | | -| results.owner | 0..1 | | | -| results.owner_type | 0..1 | | | -| results.owner_url | 0..1 | | | -| results.mappings | 0..1 | | | -| results.names | 0..1 | | | +| **Code (Name)** | **Card.** | **Type** | **Definition (Description)** | +| ------------------------------ | --------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `target_repo_url` | 1..1 | string | Repository URL to match against. Uses `$resolve` to identify the specific repo version. **Example:** `/orgs/CIEL/sources/CIEL/` | +| `rows` | 1..\* | list | List of concept-like key-value pairs; each row may have different fields. Only fields that are recognized by the matching algorithm are used, all other rows are ignored. **Example:** `[{"name":"malaria"},{"name":"blood type"}]` | +| `rows.id` | 0..1 | string | Exact match against a concept ID. _(May be removed in future versions.)_ **Example Input Data:** `12`, `57`, `A01.1` | +| `rows.name` | 0..1 | string | Semantic or fuzzy search on primary display name. **Example Input Data:** `Anemia due to blood loss` | +| `rows.synonyms` | 0..1 | string | Semantic or fuzzy search across all names/synonyms. **Example Input Data:** `"Anemia, blood loss", "Anémie secondaire à une hémorragie"` | +| `rows.description` | 0..1 | string | Text search on concept descriptions. **Example Input Data:** `"Anemia due to bleeding or a hemorrhagic process"` | +| `rows.concept_class` | 0..1 | string | Match on concept class (e.g., diagnosis, symptom). **Example Input Data:** `Diagnosis`, `Symptom` | +| `rows.datatype` | 0..1 | string | Match on datatype (e.g., numeric, coded). **Example Input Data:** `Numeric`, `Coded` | +| `rows.mapping_code` | 0..1 | string | Exact match on a concept ID or mapping in the target repo version. **Example Input Data:** `D50.0`, `Z87.5`, `X59.9` | +| `rows.mapping_list` | 0..1 | string | Exact match on comma‑separated mapping list. _(In development.)_ **Example Input Data:** `CIEL:1858, ICD10:DC14.Z, LOINC:5792-7` | +| `rows.same_as_map_codes` | 0..1 | string | Search only “same as” mappings. _(Deprecated.)_ **Example Input Data:** `CIEL:1858, ICD10:DC14.Z, LOINC:5792-7` | +| `rows.other_map_codes` | 0..1 | string | Search all non‑“same as” mappings. _(Deprecated.)_ **Example Input Data:** `CIEL:1858, ICD10:DC14.Z, LOINC:5792-7` | +| `map_config` | 0..\* | list | Optional list configuring mapping logic per row. **Example from Request Body:** see below. | +| `map_config.type` | 1..1 | code | Type of mapping: `mapping-code` or `mapping-list`. **Example:** `mapping-code`, `mapping-list` | +| `map_config.input_column` | 1..1 | string | Name of the row‑field to use. **Example:** `loinc-example`, `icd10-example`, `list example` | +| `map_config.target_source_url` | 0..1 | string | Target repo URL for `mapping-code` entries (required if type is `mapping-code`). **Example:** `/orgs/CIEL/sources/CIEL/` | +| `map_config.separator` | 0..1 | string | Separator between source name and code in `mapping-list`. **Example:** `:` | +| `map_config.delimiter` | 0..1 | string | Delimiter for multiple mappings in `mapping-list`. **Example:** `,` | +| `map_config.target_urls` | 0..1 | map | URL map of source mnemonics to repositories. Required for `mapping-list`. **Example:** `{"ICD10": "/orgs/WHO/sources/ICD-10-WHO/", "CIEL": "/orgs/CIEL/sources/CIEL/", "LOINC": "/orgs/Regenstrief/sources/LOINC/"}` | + +## Response + +### Response Body Schema + +| **Code (Name)** | **Card.** | **Type** | **Definition (Description)** | +| ------------------------------------------- | --------- | -------- | ------------------------------------------------------------------------------------ | +| _\_ | 1 | list | A list of response objects | +| row | 1 | map | The original row submitted, plus an auto-added zero-based `__index` local identifier | +| results | 1..\* | list | Ordered list of concept candidates | +| results.url | 1 | string | | +| results.display_name | 1 | string | | +| results.id | 1 | string | | +| results.retired | 0..1 | bool | | +| results.concept_class | 0..1 | string | | +| results.datatype | 0..1 | string | | +| results.property | 0..\* | list | | +| results.property.code | 1 | string | The key of the property (e.g. concept_class) | +| results.property.valueCode | 0..1 | string | | +| results.property.valueCoding | 0..1 | ... | | +| results.property.valueString | 0..1 | string | | +| results.property.valueInteger | 0..1 | int | | +| results.property.valueBoolean | 0..1 | bool | | +| results.property.valueDateTime | 0..1 | DateTime | | +| results.property.valueDecimal | 0..1 | decimal | | +| results.extras | 0..1 | map | | +| results.search_meta.search_score | 1 | decimal | | +| results.search_meta.search_normalized_score | 0..1 | decimal | Normalized score in the range 0–1 | +| results.search_meta.search_highlight | 0..1 | map | | +| | | | | +| results.search_meta.match_type | 0..1 | string | | +| results.source | 0..1 | | | +| results.owner | 0..1 | | | +| results.owner_type | 0..1 | | | +| results.owner_url | 0..1 | | | +| results.mappings | 0..1 | | | +| results.names | 0..1 | | | + +### Response Example -#### Response ```json [ { - "row": {"local_id":"1396", "name":"malaria"}, + "row": {"__index": 0, "name":"malaria"}, "results": [ { "search_meta": { "search_score": 2.0546277, + "search_normalized_score": 0.92, "match_type": "very_high", "search_highlight": {} }, @@ -129,6 +143,7 @@ POST /concepts/$match/ { "search_meta": { "search_score": 2.0455465, + "search_normalized_score": 0.91, "match_type": "very_high", "search_confidence": null, "search_highlight": {} @@ -148,29 +163,32 @@ POST /concepts/$match/ ] ``` -### Example Request 1: Simple Request +## Examples + +### Example 1: Minimal Request + ``` POST https://api.openconceptlab.org/concepts/$match/?includeSearchMeta=true&semantic=true&bestMatch=true&limit=1 ``` + ```json { - "rows":[ - {"local_id":"1396", "name":"malaria"}, - {"local_id":"2", "name":"a1c"} - ], - "target_repo_url": "/orgs/CIEL/sources/CIEL/" + "rows": [{ "name": "malaria" }, { "name": "a1c" }], + "target_repo_url": "/orgs/CIEL/sources/CIEL/" } ``` #### Response + ```json [ { - "row": {"local_id":"1396", "name":"malaria"}, + "row": {"__index": 0, "name":"malaria"}, "results": [ { "search_meta": { "search_score": 2.0546277, # required + "search_normalized_score": 0.92, # optional (0–1) "match_type": "very_high", # optional "search_confidence": null, # optional "search_highlight": {} # optional @@ -188,6 +206,7 @@ POST https://api.openconceptlab.org/concepts/$match/?includeSearchMeta=true&sema { "search_meta": { "search_score": 2.0455465, + "search_normalized_score": 0.91, "match_type": "very_high", "search_confidence": null, "search_highlight": {} @@ -207,23 +226,32 @@ POST https://api.openconceptlab.org/concepts/$match/?includeSearchMeta=true&sema ] ``` - -### Example Request 2 +### Example 2 ```json { - "rows":[ - {"local_id":"1396", "name":"Mother's HIV Status", "loinc_code": "75179-2"}, - {"local_id":"2", "name":"Weeks of gestation", "loinc_code": "11884-4"} - ], - "target_repo_url": "/orgs/Regenstrief/sources/LOINC/2.71.21AA/", - "map_config": [ - {"type": "mapping-code", "input_column": "loinc_code", "target_source_url": "/orgs/CIEL/sources/CIEL/"}, - {"type": "mapping-list", "input_column": "maps", "separator": ":", "delimiter": ",", "target_urls": { - "ICD10": "/orgs/WHO/sources/ICD-10-WHO/", - "CIEL": "/orgs/CIEL/sources/CIEL/", - "LOINC": "/orgs/Regenstrief/sources/LOINC/" - }} - ] + "rows": [ + { "name": "Mother's HIV Status", "loinc_code": "75179-2" }, + { "name": "Weeks of gestation", "loinc_code": "11884-4" } + ], + "target_repo_url": "/orgs/Regenstrief/sources/LOINC/2.71.21AA/", + "map_config": [ + { + "type": "mapping-code", + "input_column": "loinc_code", + "target_source_url": "/orgs/CIEL/sources/CIEL/" + }, + { + "type": "mapping-list", + "input_column": "maps", + "separator": ":", + "delimiter": ",", + "target_urls": { + "ICD10": "/orgs/WHO/sources/ICD-10-WHO/", + "CIEL": "/orgs/CIEL/sources/CIEL/", + "LOINC": "/orgs/Regenstrief/sources/LOINC/" + } + } + ] } ```