From 3534e80daa7d31def21cd686bb9739573cff952b Mon Sep 17 00:00:00 2001 From: dstewartons Date: Mon, 22 Jun 2026 15:44:04 +0100 Subject: [PATCH 1/8] use exact SOC metadata lookup when building hierarchy nodes - stop using parent-fallback metadata for titles, descriptions, qualifications and tasks --- .../hierarchy/soc_hierarchy.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/occupational_classification/hierarchy/soc_hierarchy.py b/src/occupational_classification/hierarchy/soc_hierarchy.py index bf71ac7..5be8a5e 100644 --- a/src/occupational_classification/hierarchy/soc_hierarchy.py +++ b/src/occupational_classification/hierarchy/soc_hierarchy.py @@ -243,8 +243,11 @@ def _define_codes_and_nodes(soc_df: pd.DataFrame): code_node_dict = {} for code in soc_df["code"]: - group_description = soc_meta.get_meta_by_code(code)["group_description"] - group_title = soc_meta.get_meta_by_code(code)["group_title"] + meta = soc_meta.get_meta_by_code_exact(code) + group_description = ( + meta.get("group_description", "") if "error" not in meta else "" + ) + group_title = meta.get("group_title", "") if "error" not in meta else "" soc_node = SocNode( code, group_title=group_title, group_description=group_description ) @@ -270,11 +273,13 @@ def _populate_tasks_and_quals(nodes: list): for node in nodes: code = node.soc_code if SocCode(code).code_length() == _SOC_CODE_LENGTH: - qual = soc_meta.get_meta_by_code(code)["entry_routes_and_quals"] - node.qualifications = qual - - tasks_list = soc_meta.get_meta_by_code(code)["tasks"] - node.tasks = tasks_list + meta = soc_meta.get_meta_by_code_exact(code) + if "error" in meta: + node.qualifications = "" + node.tasks = [] + else: + node.qualifications = meta.get("entry_routes_and_quals", "") + node.tasks = meta.get("tasks", []) def _populate_job_titles(nodes: list, soc_index: pd.DataFrame): From 7555094b92e623900ddaf8a89e95f32b569c7f98 Mon Sep 17 00:00:00 2001 From: dstewartons Date: Mon, 22 Jun 2026 15:45:51 +0100 Subject: [PATCH 2/8] use exact SOC metadata lookup in SOCLookup --- .../lookup/soc_lookup.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/occupational_classification/lookup/soc_lookup.py b/src/occupational_classification/lookup/soc_lookup.py index 5d986b8..0ad0a6c 100644 --- a/src/occupational_classification/lookup/soc_lookup.py +++ b/src/occupational_classification/lookup/soc_lookup.py @@ -121,8 +121,9 @@ def lookup( # pylint: disable=too-many-locals matching_code_sub_major_group = matching_code[:2] matching_code_major_group = matching_code[:1] if self.meta is not None: - matching_code_meta = self.meta.get_meta_by_code(matching_code) - matching_code_meta = self._normalise_meta(matching_code_meta) + matching_code_meta = self._normalise_meta( + self.meta.get_meta_by_code_exact(matching_code) + ) if matching_code_minor_group is not None: minor_group_meta = self._normalise_meta( self.meta.get_meta_by_code_exact(matching_code_minor_group) @@ -132,7 +133,7 @@ def lookup( # pylint: disable=too-many-locals self.meta.get_meta_by_code_exact(matching_code_sub_major_group) ) major_group_meta = self._normalise_meta( - self.meta.get_meta_by_code(matching_code_major_group) + self.meta.get_meta_by_code_exact(matching_code_major_group) ) if not matching_code: @@ -156,7 +157,9 @@ def lookup( # pylint: disable=too-many-locals major_groups = [ { "code": major_group_code, - "meta": self.meta.get_meta_by_code(major_group_code), + "meta": self._normalise_meta( + self.meta.get_meta_by_code_exact(major_group_code) + ), } for major_group_code in major_group_codes ] @@ -199,7 +202,9 @@ def lookup_code_major_group( matching_code_major_group: Optional[str] = code[:1] if code else None major_group_meta: Optional[dict[str, Any]] = None if self.meta is not None and matching_code_major_group is not None: - major_group_meta = self.meta.get_meta_by_code(matching_code_major_group) + major_group_meta = self._normalise_meta( + self.meta.get_meta_by_code_exact(matching_code_major_group) + ) return { "code_major_group": matching_code_major_group, "code_major_group_meta": major_group_meta, From 395e97e3f86b84d9781a554c1dc4df3bff4318a9 Mon Sep 17 00:00:00 2001 From: dstewartons Date: Mon, 22 Jun 2026 15:46:51 +0100 Subject: [PATCH 3/8] remove silent parent fallback from SocMeta get_meta_by_code - require an exact SOC code match and return an error when metadata is missing --- .../meta/soc_meta.py | 33 ++++--------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/src/occupational_classification/meta/soc_meta.py b/src/occupational_classification/meta/soc_meta.py index b401c23..5992eda 100644 --- a/src/occupational_classification/meta/soc_meta.py +++ b/src/occupational_classification/meta/soc_meta.py @@ -4171,35 +4171,10 @@ def __init__(self): self.soc_meta = SOCmeta def get_meta_by_code(self, code: str) -> dict: - """Retrieve metadata for a given SOC code with parent fallback.""" - entry = self.soc_meta.get(code) - if entry is not None: - return { - "code": code, - "group_title": entry.get("group_title", ""), - "group_description": entry.get("group_description", ""), - "entry_routes_and_quals": entry.get("entry_routes_and_quals", ""), - "tasks": entry.get("tasks", []), - } - lookup = code[:-1] - while lookup: - entry = self.soc_meta.get(lookup) - if entry is not None: - return { - "code": lookup, - "group_title": entry.get("group_title", ""), - "group_description": entry.get("group_description", ""), - "entry_routes_and_quals": entry.get("entry_routes_and_quals", ""), - "tasks": entry.get("tasks", []), - } - lookup = lookup[:-1] - return {"error": f"No metadata found for SOC code {code}"} - - def get_meta_by_code_exact(self, code: str) -> dict: - """Retrieve metadata only for an exact SOC code match.""" + """Retrieve metadata for an exact SOC code match only.""" entry = self.soc_meta.get(code) if entry is None: - return {"error": f"No exact metadata found for SOC code {code}"} + return {"error": f"No metadata found for SOC code {code}"} return { "code": code, "group_title": entry.get("group_title", ""), @@ -4207,3 +4182,7 @@ def get_meta_by_code_exact(self, code: str) -> dict: "entry_routes_and_quals": entry.get("entry_routes_and_quals", ""), "tasks": entry.get("tasks", []), } + + def get_meta_by_code_exact(self, code: str) -> dict: + """Retrieve metadata only for an exact SOC code match.""" + return self.get_meta_by_code(code) From d0ce0de2bce694e0db1d41ace8f966f969ec4452 Mon Sep 17 00:00:00 2001 From: dstewartons Date: Mon, 22 Jun 2026 15:54:00 +0100 Subject: [PATCH 4/8] extend SOCLookup tests for exact unit-level metadata - assert code_meta matches the matched unit code and is null when SOC metadata is missing --- tests/test_lookup.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/test_lookup.py b/tests/test_lookup.py index 4a1bcec..c39d214 100644 --- a/tests/test_lookup.py +++ b/tests/test_lookup.py @@ -180,6 +180,26 @@ def test_soc_lookup_default_path_uses_example_csv(): result = lookup.lookup("chief executives and senior officials") assert result["code"] == "1111" assert result["code_major_group"] == "1" + assert result["code_meta"] is not None + assert result["code_meta"]["code"] == "1111" + + +def test_lookup_returns_null_code_meta_when_metadata_missing(tmp_path): + """Lookup returns null code_meta when the matched code has no SOC metadata.""" + data = pd.DataFrame( + { + "description": ["unknown occupation"], + "label": ["9999"], + } + ) + file_path = tmp_path / "mock_soc_data_missing_meta.csv" + data.to_csv(file_path, index=False) + + lookup = SOCLookup(data_path=str(file_path)) + result = lookup.lookup("unknown occupation") + + assert result["code"] == "9999" + assert result["code_meta"] is None def test_soc_lookup_rejects_non_csv_path(tmp_path): From 512b2a9af2164e05e99d9914e71ee6cd1fecb03e Mon Sep 17 00:00:00 2001 From: dstewartons Date: Mon, 22 Jun 2026 15:54:53 +0100 Subject: [PATCH 5/8] assert example SOC lookup returns unit-level code_meta for 1111 --- tests/test_soc_lookup_example_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_soc_lookup_example_data.py b/tests/test_soc_lookup_example_data.py index 7ed76e0..616984c 100644 --- a/tests/test_soc_lookup_example_data.py +++ b/tests/test_soc_lookup_example_data.py @@ -29,6 +29,7 @@ def test_soc_lookup_example_exact_match(): assert result["description"] == "chief executives and senior officials" # With always-on SocMeta, metadata should be present for the example code assert result["code_meta"] is not None + assert result["code_meta"]["code"] == "1111" assert result["code_major_group"] == "1" assert result["code_major_group_meta"] is not None From f10b60e48df3319bbbd751d74fa61dcf25d5fd55 Mon Sep 17 00:00:00 2001 From: dstewartons Date: Mon, 22 Jun 2026 16:02:44 +0100 Subject: [PATCH 6/8] add SocMeta tests for exact metadata lookup without parent fallback --- tests/test_soc_meta.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 tests/test_soc_meta.py diff --git a/tests/test_soc_meta.py b/tests/test_soc_meta.py new file mode 100644 index 0000000..fa4395f --- /dev/null +++ b/tests/test_soc_meta.py @@ -0,0 +1,23 @@ +"""Tests for SocMeta exact-match behaviour.""" + +from occupational_classification.meta.soc_meta import SocMeta + + +def test_get_meta_by_code_returns_exact_unit_metadata(): + """get_meta_by_code returns unit-level metadata for a known SOC code.""" + meta = SocMeta().get_meta_by_code("1111") + assert "error" not in meta + assert meta["code"] == "1111" + assert meta["group_title"] == "Chief executives and senior officials" + + +def test_get_meta_by_code_does_not_fallback_to_parent_group(): + """get_meta_by_code returns an error for unknown codes instead of parent fallback.""" + meta = SocMeta().get_meta_by_code("9999") + assert meta == {"error": "No metadata found for SOC code 9999"} + + +def test_get_meta_by_code_exact_matches_get_meta_by_code(): + """get_meta_by_code_exact delegates to get_meta_by_code with the same result.""" + soc_meta = SocMeta() + assert soc_meta.get_meta_by_code_exact("2112") == soc_meta.get_meta_by_code("2112") From ef155f207d319a7bb8dd1245b979799b1c82d354 Mon Sep 17 00:00:00 2001 From: dstewartons Date: Mon, 22 Jun 2026 16:03:09 +0100 Subject: [PATCH 7/8] update changelog --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e962451..84e0408 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +--- +## [0.1.6] - 2026-03-20 + +## Added +- Full in-code `SOCmeta` map from SOC 2020 Volume 1 (~550 entries). +- Layered SOC lookup metadata fields (minor, sub-major, and major group). + +## Changed +- `SocMeta.get_meta_by_code()` now requires an exact code match; silent parent-group fallback removed. +- `SOCLookup` and `soc_hierarchy` use exact metadata lookup throughout. + --- ## [0.1.3] - 2025-07-08 From 62871f81d09010deb15331cfba7782d9b78e5234 Mon Sep 17 00:00:00 2001 From: dstewartons Date: Mon, 22 Jun 2026 16:03:40 +0100 Subject: [PATCH 8/8] bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b223eb7..10868e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "soc-classification-library" -version = "0.1.5" +version = "0.1.6" description = "Standard Occupational Classification library" authors = ["Steve Gibbard "] license = "MIT"