Skip to content

Commit 8417a5b

Browse files
frano-mNoopDogclaude
authored
feat: map diagnosis facet term values to display value (#4722) (#4723)
* feat: map diagnosis facet term values to display value (#4722) * feat: add HP and OMIM diagnosis term ID to name mapping #4722 Script fetches term IDs from the AnVIL Azul API and resolves names from authoritative sources (hp.obo for HP, phenotype.hpoa for OMIM). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: replace placeholder diagnosis mapping with full lookup data #4722 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: rename export to DIAGNOSIS_DISPLAY_VALUE to match existing import #4722 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: mapped diagnosis column values (#4722) * fix: trim value before mapping (#4722) * fix: strip obsolete prefix from HP term display names (#4722) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: trim displayed diagnosis ID and document mapping script sources (#4722) - Use value.trim() in mapDiagnosisValue display string for consistency with lookup - Expand lookup-diagnosis-terms.ts header with source rationale and re-run guidance Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Fran McDade <18710366+frano-m@users.noreply.github.com> Co-authored-by: Dave Rogers <dave@clevercanary.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 91219c3 commit 8417a5b

5 files changed

Lines changed: 824 additions & 7 deletions

File tree

app/viewModelBuilders/azul/anvil-cmg/common/viewModelBuilders.tsx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ import {
4747
DATASET_RESPONSE,
4848
} from "../../../../../site-config/anvil-cmg/category";
4949
import { ROUTES } from "../../../../../site-config/anvil-cmg/dev/export/routes";
50+
import { mapDiagnosisValue } from "../../../../../site-config/anvil-cmg/dev/index/common/utils";
5051
import {
5152
AggregatedBioSampleResponse,
5253
AggregatedDatasetResponse,
@@ -723,7 +724,7 @@ export const buildDiagnoses = (
723724
): React.ComponentProps<typeof C.NTagCell> => {
724725
return {
725726
label: getPluralizedMetadataLabel(METADATA_KEY.DIAGNOSIS),
726-
values: getAggregatedDiagnoses(response),
727+
values: getAggregatedDiagnoses(response).map(mapDiagnosisValue),
727728
};
728729
};
729730

scripts/lookup-diagnosis-terms.ts

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
#!/usr/bin/env npx ts-node
2+
/**
3+
* Fetch HP and OMIM term names from authoritative sources and generate a JS constant.
4+
*
5+
* Sources
6+
* -------
7+
* - HP terms: hp.obo from obophenotype/human-phenotype-ontology (GitHub).
8+
* This is the canonical release artifact for the Human Phenotype Ontology —
9+
* the same file used by Monarch, OMIM, ClinVar, and other biomedical databases.
10+
* alt_id entries are resolved so that retired/merged term IDs still get a name.
11+
* Terms marked "obsolete" in the OBO name field have that prefix stripped.
12+
*
13+
* - OMIM terms: phenotype.hpoa from the HPO project's latest release.
14+
* This file maps OMIM disease IDs (e.g. OMIM:143100) to human-readable disease
15+
* names. It is maintained by the HPO team and sourced from OMIM with permission,
16+
* making it the standard cross-reference between OMIM IDs and display names.
17+
*
18+
* - Term IDs: fetched live from the AnVIL Azul API (termFacets for
19+
* diagnoses.disease and diagnoses.phenotype). Only IDs actually present in the
20+
* AnVIL catalog are included in the output — the lookup table stays minimal.
21+
*
22+
* When to re-run
23+
* --------------
24+
* Re-run whenever:
25+
* - New AnVIL datasets are ingested that introduce diagnosis codes not yet in
26+
* the mapping (the UI will fall back to showing the raw ID for unknown terms).
27+
* - The HPO releases a new version with updated or renamed terms.
28+
*
29+
* Usage
30+
* -----
31+
* npx ts-node scripts/lookup-diagnosis-terms.ts > site-config/anvil-cmg/dev/index/common/diagnosis.ts
32+
*/
33+
34+
const AZUL_URL =
35+
"https://service.explore.anvilproject.org/index/datasets?size=1&filters=%7B%7D";
36+
const HP_OBO_URL =
37+
"https://raw.githubusercontent.com/obophenotype/human-phenotype-ontology/master/hp.obo";
38+
const HPOA_URL =
39+
"https://github.com/obophenotype/human-phenotype-ontology/releases/latest/download/phenotype.hpoa";
40+
41+
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- API response shape is dynamic
42+
async function fetchJson(url: string): Promise<any> {
43+
const resp = await fetch(url);
44+
if (!resp.ok) throw new Error(`Failed to fetch ${url}: ${resp.status}`);
45+
return resp.json();
46+
}
47+
48+
async function fetchText(url: string): Promise<string> {
49+
const resp = await fetch(url, { redirect: "follow" });
50+
if (!resp.ok) throw new Error(`Failed to fetch ${url}: ${resp.status}`);
51+
return resp.text();
52+
}
53+
54+
async function getTermIdsFromAzul(): Promise<{
55+
hpIds: Set<string>;
56+
omimIds: Set<string>;
57+
}> {
58+
console.error("Fetching term IDs from AnVIL Azul API...");
59+
const data = await fetchJson(AZUL_URL);
60+
const facets = data.termFacets ?? {};
61+
62+
const hpIds = new Set<string>();
63+
const omimIds = new Set<string>();
64+
65+
for (const key of ["diagnoses.disease", "diagnoses.phenotype"]) {
66+
const terms = facets[key]?.terms ?? [];
67+
for (const t of terms) {
68+
const term: string | undefined = t.term;
69+
if (!term) continue;
70+
// Some entries have multiple IDs separated by semicolons
71+
for (const part of term.split(";")) {
72+
const trimmed = part.trim();
73+
if (trimmed.startsWith("HP:")) hpIds.add(trimmed);
74+
else if (trimmed.startsWith("OMIM:")) omimIds.add(trimmed);
75+
}
76+
}
77+
}
78+
79+
console.error(` Found ${hpIds.size} HP terms, ${omimIds.size} OMIM terms`);
80+
return { hpIds, omimIds };
81+
}
82+
83+
async function buildHpMap(hpIds: Set<string>): Promise<Map<string, string>> {
84+
console.error("Downloading hp.obo...");
85+
const obo = await fetchText(HP_OBO_URL);
86+
87+
const hpNames = new Map<string, string>();
88+
let currentId: string | null = null;
89+
let currentName: string | null = null;
90+
let altIds: string[] = [];
91+
92+
const saveCurrent = (): void => {
93+
if (currentId && currentName) {
94+
if (hpIds.has(currentId)) hpNames.set(currentId, currentName);
95+
for (const alt of altIds) {
96+
if (hpIds.has(alt)) hpNames.set(alt, currentName);
97+
}
98+
}
99+
};
100+
101+
for (const line of obo.split("\n")) {
102+
const trimmed = line.trim();
103+
if (trimmed === "[Term]") {
104+
saveCurrent();
105+
currentId = null;
106+
currentName = null;
107+
altIds = [];
108+
} else if (trimmed.startsWith("id: HP:")) {
109+
currentId = trimmed.slice(4);
110+
} else if (trimmed.startsWith("name: ") && currentId) {
111+
currentName = trimmed.slice(6).replace(/^obsolete\s+/, "");
112+
} else if (trimmed.startsWith("alt_id: HP:")) {
113+
altIds.push(trimmed.slice(8));
114+
}
115+
}
116+
saveCurrent();
117+
118+
console.error(` Resolved ${hpNames.size}/${hpIds.size} HP terms`);
119+
const missing = [...hpIds].filter((id) => !hpNames.has(id));
120+
if (missing.length)
121+
console.error(` Missing HP terms: ${missing.sort().join(", ")}`);
122+
return hpNames;
123+
}
124+
125+
async function buildOmimMap(
126+
omimIds: Set<string>
127+
): Promise<Map<string, string>> {
128+
console.error("Downloading phenotype.hpoa...");
129+
const hpoa = await fetchText(HPOA_URL);
130+
131+
const omimNames = new Map<string, string>();
132+
for (const line of hpoa.split("\n")) {
133+
if (line.startsWith("#") || line.startsWith("database_id")) continue;
134+
const parts = line.split("\t");
135+
if (parts.length < 2) continue;
136+
const dbId = parts[0].trim();
137+
const diseaseName = parts[1].trim();
138+
if (omimIds.has(dbId) && !omimNames.has(dbId)) {
139+
omimNames.set(dbId, diseaseName);
140+
}
141+
}
142+
143+
console.error(` Resolved ${omimNames.size}/${omimIds.size} OMIM terms`);
144+
const missing = [...omimIds].filter((id) => !omimNames.has(id));
145+
if (missing.length)
146+
console.error(` Missing OMIM terms: ${missing.sort().join(", ")}`);
147+
return omimNames;
148+
}
149+
150+
function generateJs(mapping: Map<string, string>): string {
151+
const lines: string[] = [];
152+
lines.push("/**");
153+
lines.push(
154+
" * Mapping of HP (Human Phenotype Ontology) and OMIM term IDs to their names."
155+
);
156+
lines.push(
157+
" * Auto-generated by scripts/lookup-diagnosis-terms.ts from authoritative sources:"
158+
);
159+
lines.push(
160+
" * - HP terms: hp.obo from obophenotype/human-phenotype-ontology"
161+
);
162+
lines.push(" * - OMIM terms: phenotype.hpoa from HPO project");
163+
lines.push(" * - Term IDs: AnVIL Azul API (explore.anvilproject.org)");
164+
lines.push(" */");
165+
lines.push(
166+
"export const DIAGNOSIS_DISPLAY_VALUE: Record<string, string> = {"
167+
);
168+
169+
const sortedKeys = [...mapping.keys()].sort();
170+
for (const termId of sortedKeys) {
171+
const name = mapping.get(termId)!;
172+
const escaped = name.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
173+
lines.push(` "${termId}": "${escaped}",`);
174+
}
175+
176+
lines.push("};");
177+
lines.push("");
178+
return lines.join("\n");
179+
}
180+
181+
async function main(): Promise<void> {
182+
const { hpIds, omimIds } = await getTermIdsFromAzul();
183+
const [hpMap, omimMap] = await Promise.all([
184+
buildHpMap(hpIds),
185+
buildOmimMap(omimIds),
186+
]);
187+
188+
const combined = new Map<string, string>([...hpMap, ...omimMap]);
189+
const js = generateJs(combined);
190+
process.stdout.write(js);
191+
192+
console.error(
193+
`\nGenerated mapping for ${combined.size} terms (${hpMap.size} HP + ${omimMap.size} OMIM)`
194+
);
195+
}
196+
197+
main().catch((err) => {
198+
console.error(err);
199+
process.exit(1);
200+
});

site-config/anvil-cmg/dev/config.ts

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,27 @@
11
import { APIEndpoints } from "@databiosphere/findable-ui/lib/apis/azul/common/entities";
2+
import { FILTER_SORT } from "@databiosphere/findable-ui/lib/common/filters/sort/config/types";
23
import { SystemStatusBindResponseFn } from "@databiosphere/findable-ui/lib/config/entities";
34
import { CATALOG_DEFAULT } from "../../../app/apis/azul/anvil-cmg/common/constants";
45
import * as C from "../../../app/components/index";
56
import { mapSelectCategoryValue } from "../../../app/config/utils";
7+
import { buildDataDictionary } from "../../../app/viewModelBuilders/azul/anvil-cmg/common/dataDictionaryMapper/dataDictionaryMapper";
8+
import { TABLE_OPTIONS } from "../../../app/viewModelBuilders/azul/anvil-cmg/common/dataDictionaryMapper/tableOptions";
69
import { bindSystemStatusResponse } from "../../../app/viewModelBuilders/azul/common/systemStatusMapper/systemStatusMapper";
710
import { FLATTEN, GIT_HUB_REPO_URL } from "../../common/constants";
811
import { SiteConfig } from "../../common/entities";
912
import { ANVIL_CMG_CATEGORY_KEY, ANVIL_CMG_CATEGORY_LABEL } from "../category";
1013
import { announcements } from "./announcements/announcements";
1114
import { authenticationConfig } from "./authentication/authentication";
15+
import dataDictionary from "./dataDictionary/data-dictionary.json";
1216
import { exportConfig } from "./export/export";
1317
import { activitiesEntityConfig } from "./index/activitiesEntityConfig";
1418
import { biosamplesEntityConfig } from "./index/biosamplesEntityConfig";
15-
import { mapAccessibleValue } from "./index/common/utils";
19+
import { mapAccessibleValue, mapDiagnosisValue } from "./index/common/utils";
1620
import { datasetsEntityConfig } from "./index/datasetsEntityConfig";
1721
import { donorsEntityConfig } from "./index/donorsEntityConfig";
1822
import { filesEntityConfig } from "./index/filesEntityConfig";
19-
import { floating } from "./layout/floating";
20-
import dataDictionary from "./dataDictionary/data-dictionary.json";
21-
import { TABLE_OPTIONS } from "../../../app/viewModelBuilders/azul/anvil-cmg/common/dataDictionaryMapper/tableOptions";
22-
import { buildDataDictionary } from "../../../app/viewModelBuilders/azul/anvil-cmg/common/dataDictionaryMapper/dataDictionaryMapper";
2323
import { buildSummaries } from "./index/summaryViewModelBuilder";
24-
import { FILTER_SORT } from "@databiosphere/findable-ui/lib/common/filters/sort/config/types";
24+
import { floating } from "./layout/floating";
2525

2626
// Template constants
2727
const APP_TITLE = "AnVIL Data Explorer";
@@ -78,6 +78,7 @@ export function makeConfig(
7878
{
7979
key: ANVIL_CMG_CATEGORY_KEY.DIAGNOSE_DISEASE,
8080
label: ANVIL_CMG_CATEGORY_LABEL.DIAGNOSE_DISEASE,
81+
mapSelectCategoryValue: mapSelectCategoryValue(mapDiagnosisValue),
8182
},
8283
{
8384
key: ANVIL_CMG_CATEGORY_KEY.DONOR_ORGANISM_TYPE,

0 commit comments

Comments
 (0)