diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f0ec9a6..63600fd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,31 +1,63 @@ -name: test +name: ci -on: - push: - branches: - - '**' +on: [push, pull_request] + + +env: + X_PYTHON_VERSION: "3.11" jobs: build: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: - - ubuntu-latest - - windows-latest + # This job checks if the build succeeds + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python ${{ env.X_PYTHON_VERSION }} + uses: actions/setup-python@v4 + with: + python-version: ${{ env.X_PYTHON_VERSION }} + - name: Build the package + run: pip install . + + test: + # This job runs the unittests + runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 - - name: Set up Python + - name: Set up Python ${{ env.X_PYTHON_VERSION }} uses: actions/setup-python@v4 with: - python-version: "3.10" - architecture: x64 + python-version: ${{ env.X_PYTHON_VERSION }} - name: Install Python dependencies - run: pip install -r requirements.txt + run: | + python -m pip install --upgrade pip + pip install .[dev] - name: Run Python Tests run: python -m unittest discover + + static-analysis: + # This job runs static code analysis, namely pycodestyle and mypy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: ${{ env.X_PYTHON_VERSION }} + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install .[dev] + - name: Check typing with MyPy + run: | + mypy semantic_matcher test + - name: Check code style with PyCodestyle + run: | + pycodestyle --count --max-line-length 120 semantic_matcher test diff --git a/config.ini.default b/config.ini.default index 10c47b2..e75e636 100644 --- a/config.ini.default +++ b/config.ini.default @@ -2,7 +2,7 @@ endpoint=http://127.0.0.1 LISTEN_ADDRESS=127.0.0.1 port=8000 -equivalence_table_file=./resources/equivalence_table.json +match_graph_file=./resources/example_graph.json [RESOLVER] endpoint=http://semantic_id_resolver diff --git a/pyproject.toml b/pyproject.toml index a179c6c..a56c928 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,16 @@ dependencies = [ "pydantic>=1.10", "uvicorn>=0.21.1", "requests>=2.31.0", + "networkx>=3.4.2", +] + +[project.optional-dependencies] +dev = [ + "mypy", + "pycodestyle", + "coverage", + "types-networkx", # Type hints for networkx + "types-requests", # Type hints for requests ] [tool.setuptools] diff --git a/resources/equivalence_table.json b/resources/equivalence_table.json deleted file mode 100644 index fe95165..0000000 --- a/resources/equivalence_table.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "matches": { - "s-heppner.com/semanticID/one": [ - { - "base_semantic_id": "s-heppner.com/semanticID/one", - "match_semantic_id": "s-heppner.com/semanticID/1", - "score": 1.0, - "meta_information": { - "matchSource": "Defined by Sebastian Heppner" - } - }, - { - "base_semantic_id": "s-heppner.com/semanticID/one", - "match_semantic_id": "s-heppner.com/semanticID/two", - "score": 0.8, - "meta_information": { - "matchSource": "Defined by Sebastian Heppner" - } - } - ], - "s-heppner.com/semanticID/two": [ - { - "base_semantic_id": "s-heppner.com/semanticID/two", - "match_semantic_id": "s-heppner.com/semanticID/2", - "score": 1.0, - "meta_information": { - "matchSource": "Defined by Sebastian Heppner" - } - } - ] - } -} \ No newline at end of file diff --git a/resources/example_graph.json b/resources/example_graph.json new file mode 100644 index 0000000..c24de3c --- /dev/null +++ b/resources/example_graph.json @@ -0,0 +1,26 @@ +[ + { + "base_semantic_id": "A", + "match_semantic_id": "B", + "score": 0.8, + "path": [] + }, + { + "base_semantic_id": "B", + "match_semantic_id": "C", + "score": 0.7, + "path": [] + }, + { + "base_semantic_id": "B", + "match_semantic_id": "D", + "score": 0.6, + "path": [] + }, + { + "base_semantic_id": "C", + "match_semantic_id": "D", + "score": 0.9, + "path": [] + } +] \ No newline at end of file diff --git a/semantic_matcher/algorithm.py b/semantic_matcher/algorithm.py new file mode 100644 index 0000000..a64060e --- /dev/null +++ b/semantic_matcher/algorithm.py @@ -0,0 +1,147 @@ +import json +from typing import List, Tuple +import heapq + +import networkx as nx +from pydantic import BaseModel + + +class SemanticMatchGraph(nx.DiGraph): + def __init__(self): + super().__init__() + + def add_semantic_match(self, + base_semantic_id: str, + match_semantic_id: str, + score: float): + self.add_edge( + u_of_edge=base_semantic_id, + v_of_edge=match_semantic_id, + weight=score, + ) + + def get_all_matches(self) -> List["SemanticMatch"]: + matches: List["SemanticMatch"] = [] + + # Iterate over all edges in the graph + for base, match, data in self.edges(data=True): + score = data.get("weight", 0.0) # Get weight, default to 0.0 if missing + matches.append(SemanticMatch( + base_semantic_id=base, + match_semantic_id=match, + score=score, + path=[] # Direct match, no intermediate nodes + )) + + return matches + + def to_file(self, filename: str): + with open(filename, "w") as file: + matches = [match.model_dump() for match in self.get_all_matches()] + json.dump(matches, file, indent=4) + + @classmethod + def from_file(cls, filename: str) -> "SemanticMatchGraph": + with open(filename, "r") as file: + matches_data = json.load(file) + graph = SemanticMatchGraph() + for match_data in matches_data: + graph.add_semantic_match( + base_semantic_id=match_data["base_semantic_id"], + match_semantic_id=match_data["match_semantic_id"], + score=match_data["score"] + ) + return graph + + +class SemanticMatch(BaseModel): + base_semantic_id: str + match_semantic_id: str + score: float + path: List[str] # The path of `semantic_id`s that the algorithm took + + def __str__(self) -> str: + return f"{' -> '.join(self.path + [self.match_semantic_id])} = {self.score}" + + def __hash__(self): + return hash(( + self.base_semantic_id, + self.match_semantic_id, + self.score, + tuple(self.path), + )) + + +def find_semantic_matches( + graph: SemanticMatchGraph, + semantic_id: str, + min_score: float = 0.5 +) -> List[SemanticMatch]: + """ + Find semantic matches for a given node with a minimum score threshold. + + Args: + graph (nx.DiGraph): The directed graph with weighted edges. + semantic_id (str): The starting semantic_id. + min_score (float): The minimum similarity score to consider. + This value is necessary to ensure the search terminates also with sufficiently large graphs. + + Returns: + List[SemanticMatch]: + A list of MatchResults, sorted by their score with the highest score first. + """ + if semantic_id not in graph: + return [] + + # We need to make sure that all possible paths starting from the given semantic_id are explored. + # To achieve this, we use the concept of "priority queue". While we could use a simple FIFO list of matches to + # explore, this way we actually end up with an already sorted result with the highest match at the beginning of the + # list. As possible implementation of this abstract data structure, we choose to use a "max-heap". + # However, there is no efficient implementation of a max-heap in Python, so rather we use the built-in "min-heap" + # and negate the score values. A priority queue ensures that elements with the highest priority are processed first, + # regardless of when they were added. + # We initialize the priority queue: + pq: List[Tuple[float, str, List[str]]] = [(-1.0, semantic_id, [])] # (neg_score, node, path) + # The queue is structured as follows: + # - `neg_score`: The negative score of the match + # - `node`: The `match_semantic_id` of the match + # - `path`: The path between the `semantic_id` and the `match_semantic_id` + + # Prepare the result list + results: List[SemanticMatch] = [] + + # Run the priority queue until all possible paths have been explored + # This means in each iteration: + # - We pop the top element of the queue as it's the next highest semantic match we want to explore + # - If the match has a score higher or equal to the given `min_score`, we add it to the results + # - We add all connected `semantic_id`s to the priority queue to be treated next + # - We go to the next element of the queue + while pq: + # Get the highest-score match from the queue + neg_score, node, path = heapq.heappop(pq) + score = -neg_score # Convert back to positive + + # Store result if above threshold (except the start node) + if node != semantic_id and score >= min_score: + results.append(SemanticMatch( + base_semantic_id=semantic_id, + match_semantic_id=node, + score=score, + path=path + )) + + # Traverse to the neighboring and therefore connected `semantic_id`s + for neighbor, edge_data in graph[node].items(): + edge_weight = edge_data["weight"] + assert isinstance(edge_weight, float) + new_score: float = score * edge_weight # Multiplicative propagation + + # Prevent loops by ensuring we do not revisit the start node after the first iteration + if neighbor == semantic_id: + continue # Avoid re-exploring the start node + + # We add the newly found `semantic_id`s to the queue to be explored next in order of their score + if new_score >= min_score: + heapq.heappush(pq, (-new_score, neighbor, path + [node])) # Push updated path + + return results diff --git a/semantic_matcher/examples/simple_example_equivalence_table.py b/semantic_matcher/examples/simple_example_equivalence_table.py deleted file mode 100644 index f2ce282..0000000 --- a/semantic_matcher/examples/simple_example_equivalence_table.py +++ /dev/null @@ -1,38 +0,0 @@ -from semantic_matcher.model import SemanticMatch, EquivalenceTable - - -def return_simple_example_equivalence_table() -> EquivalenceTable: - """ - Returns a simple equivalence table with three semantic matches - """ - table = EquivalenceTable(matches={}) - table.add_semantic_match( - SemanticMatch( - base_semantic_id="s-heppner.com/semanticID/one", - match_semantic_id="s-heppner.com/semanticID/1", - score=1., - meta_information={"matchSource": "Defined by Sebastian Heppner"} - ) - ) - table.add_semantic_match( - SemanticMatch( - base_semantic_id="s-heppner.com/semanticID/two", - match_semantic_id="s-heppner.com/semanticID/2", - score=1., - meta_information={"matchSource": "Defined by Sebastian Heppner"} - ) - ) - table.add_semantic_match( - SemanticMatch( - base_semantic_id="s-heppner.com/semanticID/one", - match_semantic_id="s-heppner.com/semanticID/two", - score=0.8, - meta_information={"matchSource": "Defined by Sebastian Heppner"} - ) - ) - return table - - -if __name__ == '__main__': - e = return_simple_example_equivalence_table() - e.to_file("example_equivalence_table.json") diff --git a/semantic_matcher/model.py b/semantic_matcher/model.py deleted file mode 100644 index c95701a..0000000 --- a/semantic_matcher/model.py +++ /dev/null @@ -1,69 +0,0 @@ -from typing import Dict, List - -from pydantic import BaseModel - - -class SemanticMatch(BaseModel): - """ - A semantic match, mapping two semanticIDs with a matching score. Can be imagined as a weighted graph with - `base_semantic_id` ---`score`---> `match_semantic_id` - - Todo: Think about static and TTL, but that is optimization - Todo: Maybe we want to have the matching method as debug information - """ - base_semantic_id: str - match_semantic_id: str - score: float - meta_information: Dict - - -class EquivalenceTable(BaseModel): - matches: Dict[str, List[SemanticMatch]] - - def add_semantic_match(self, match: SemanticMatch) -> None: - if self.matches.get(match.base_semantic_id) is not None: - if match not in self.matches[match.base_semantic_id]: - self.matches[match.base_semantic_id].append(match) - else: - self.matches[match.base_semantic_id] = [match] - - def remove_semantic_match(self, match: SemanticMatch) -> None: - if self.matches.get(match.base_semantic_id) is not None: - self.matches.get(match.base_semantic_id).remove(match) - if len(self.matches.get(match.base_semantic_id)) == 0: - self.matches.pop(match.base_semantic_id) - - def remove_all_semantic_matches(self): - self.matches.clear() - - def get_local_matches(self, semantic_id: str, score_limit: float) -> List[SemanticMatch]: - equivalence_table_result = self.matches.get(semantic_id) - if equivalence_table_result is None: - return [] - matching_result = [] - for match in equivalence_table_result: - if match.score > score_limit: - matching_result.append(match) - rec_result = self.get_local_matches(match.match_semantic_id, score_limit/match.score) - for rec_match in rec_result: - rec_match.base_semantic_id = match.base_semantic_id - rec_match.score *= match.score - if "path" not in rec_match.meta_information: - rec_match.meta_information["path"] = [] - rec_match.meta_information["path"].insert(0, match.match_semantic_id) - if rec_result is not None: - matching_result += rec_result - return matching_result - - def get_all_matches(self) -> List[SemanticMatch]: - return self.matches - - def to_file(self, filename: str) -> None: - with open(filename, "w") as file: - file.write(self.model_dump_json(indent=4)) - - @classmethod - def from_file(cls, filename: str) -> "EquivalenceTable": - with open(filename, "r") as file: - return EquivalenceTable.model_validate_json(file.read()) - diff --git a/semantic_matcher/examples/__init__.py b/semantic_matcher/py.typed similarity index 100% rename from semantic_matcher/examples/__init__.py rename to semantic_matcher/py.typed diff --git a/semantic_matcher/service.py b/semantic_matcher/service.py index 0b752fe..4635ce3 100644 --- a/semantic_matcher/service.py +++ b/semantic_matcher/service.py @@ -1,9 +1,25 @@ -from typing import List +from typing import Optional, List, Set +from pydantic import BaseModel import requests -from fastapi import APIRouter +from fastapi import APIRouter, Response -from semantic_matcher import model, service_model +from semantic_matcher import algorithm + + +class MatchRequest(BaseModel): + """ + Request body for the :func:`service.SemanticMatchingService.get_match` + + :ivar semantic_id: The semantic ID that we want to find matches for + :ivar score_limit: The minimum semantic similarity score to look for. Is considered as larger or equal (>=) + :ivar local_only: If `True`, only check at the local service and do not request other services + :ivar already_checked_locations: Optional Set of already checked semantic matching services to avoid looping + """ + semantic_id: str + score_limit: float + local_only: bool = True + already_checked_locations: Optional[Set[str]] = None class SemanticMatchingService: @@ -27,7 +43,7 @@ class SemanticMatchingService: def __init__( self, endpoint: str, - equivalences: model.EquivalenceTable + graph: algorithm.SemanticMatchGraph ): """ Initializer of :class:`~.SemanticMatchingService` @@ -46,6 +62,7 @@ def __init__( self.router.add_api_route( "/get_matches", self.get_matches, + response_model=List[algorithm.SemanticMatch], methods=["GET"] ) self.router.add_api_route( @@ -53,82 +70,93 @@ def __init__( self.post_matches, methods=["POST"] ) - self.router.add_api_route( - "/clear", - self.remove_all_matches, - methods=["POST"] - ) self.endpoint: str = endpoint - self.equivalence_table: model.EquivalenceTable = equivalences + self.graph: algorithm.SemanticMatchGraph = graph def get_all_matches(self): """ Returns all matches stored in the equivalence table- """ - matches = self.equivalence_table.get_all_matches() + matches = self.graph.get_all_matches() return matches - def remove_all_matches(self): - self.equivalence_table.remove_all_semantic_matches() - def get_matches( self, - request_body: service_model.MatchRequest - ) -> service_model.MatchesList: + request_body: MatchRequest + ) -> List[algorithm.SemanticMatch]: """ A query to match two SubmodelElements semantically. Returns a matching score """ # Try first local matching - matches: List[model.SemanticMatch] = self.equivalence_table.get_local_matches( + matches: List[algorithm.SemanticMatch] = algorithm.find_semantic_matches( + graph=self.graph, semantic_id=request_body.semantic_id, - score_limit=request_body.score_limit + min_score=request_body.score_limit ) # If the request asks us to only locally look, we're done already if request_body.local_only: - return service_model.MatchesList(matches=matches) + return matches # Now look for remote matches: - additional_remote_matches: List[model.SemanticMatch] = [] + additional_remote_matches: List[algorithm.SemanticMatch] = [] for match in matches: + # If the `match_semantic_id` has the same namespace as the `base_semantic_id` there is no sense in looking + # further, since the semantic_id Resolver would return this Semantic Matching Service. if match.base_semantic_id.split("/")[0] == match.match_semantic_id.split("/")[0]: - #match_id is local - continue + continue # Todo: We definitely need to check for namespace, this just takes "https:" + + # We need to make sure we do not go to the same Semantic Matching Service twice. + # For that we update the already_checked_locations with the current endpoint: + already_checked_locations: Set[str] = {self.endpoint} + if request_body.already_checked_locations: + already_checked_locations.update(request_body.already_checked_locations) + remote_matching_service = self._get_matcher_from_semantic_id(match.match_semantic_id) - if remote_matching_service is None: + # If we could not find the remote_matching_service, or we already checked it, we continue + if remote_matching_service is None or remote_matching_service in already_checked_locations: continue - remote_matching_request = service_model.MatchRequest( + # Todo: There is an edge case where this would not find all matches: + # Imagine we have a situation, where A -> B -> C -> D, but A and C are on SMS1 and B and C are + # on SMS2. This would not find the match C, since we already checked SMS1 + # I guess this is fine for the moment though. + + # This makes it possible to create the match request: + remote_matching_request = MatchRequest( semantic_id=match.match_semantic_id, - # This is a simple "Ungleichung" - # Unified score is multiplied: score(A->B) * score(B->C) - # This score should be larger or equal than the requested score_limit: - # score(A->B) * score(B->C) >= score_limit - # score(A->B) is well known, as it is the `match.score` - # => score(B->C) >= (score_limit/score(A->B)) + # This is a simple inequality equation: + # Unified score is multiplied: score(A->B) * score(B->C) + # This score should be larger or equal than the requested score_limit: + # score(A->B) * score(B->C) >= score_limit + # score(A->B) is well known, as it is the `match.score` + # => score(B->C) >= (score_limit/score(A->B)) score_limit=float(request_body.score_limit/match.score), # If we already request a remote score, it does not make sense to choose `local_only` local_only=False, - name=request_body.name, - definition=request_body.definition + already_checked_locations=already_checked_locations ) url = f"{remote_matching_service}/get_matches" new_matches_response = requests.get(url, json=remote_matching_request.model_dump_json()) - match_response = service_model.MatchesList.model_validate_json(new_matches_response.text) - additional_remote_matches.extend(match_response.matches) + response_matches = [algorithm.SemanticMatch(**match) for match in new_matches_response.json()] + additional_remote_matches.extend(response_matches) # Finally, put all matches together and return matches.extend(additional_remote_matches) - res = service_model.MatchesList(matches=matches) - return res + return matches def post_matches( self, - request_body: service_model.MatchesList - ) -> None: - for match in request_body.matches: - self.equivalence_table.add_semantic_match(match) - # Todo: Figure out how to properly return 200 + request_body: List[algorithm.SemanticMatch] + ) -> Response: + for match in request_body: + self.graph.add_semantic_match( + base_semantic_id=match.base_semantic_id, + match_semantic_id=match.match_semantic_id, + score=match.score, + ) + return Response(status_code=200) - def _get_matcher_from_semantic_id(self, semantic_id: str) -> str: + @staticmethod + def _get_matcher_from_semantic_id(semantic_id: str) -> Optional[str]: """ Finds the suiting `SemanticMatchingService` for the given `semantic_id`. @@ -162,19 +190,18 @@ def _get_matcher_from_semantic_id(self, semantic_id: str) -> str: os.path.abspath(os.path.join(os.path.dirname(__file__), "../config.ini")), ]) - # Read in equivalence table - # Note, this construct takes the path in the config.ini relative to the - # location of the config.ini - EQUIVALENCES = model.EquivalenceTable.from_file( + # Read in `SemanticMatchGraph`. + # Note, this construct takes the path in the config.ini relative to the location of the config.ini + match_graph = algorithm.SemanticMatchGraph.from_file( filename=os.path.abspath(os.path.join( os.path.dirname(__file__), "..", - config["SERVICE"]["equivalence_table_file"] + config["SERVICE"]["match_graph_file"] )) ) SEMANTIC_MATCHING_SERVICE = SemanticMatchingService( endpoint=config["SERVICE"]["endpoint"], - equivalences=EQUIVALENCES + graph=match_graph, ) APP = FastAPI() APP.include_router( diff --git a/semantic_matcher/service_model.py b/semantic_matcher/service_model.py deleted file mode 100644 index 64d2a1a..0000000 --- a/semantic_matcher/service_model.py +++ /dev/null @@ -1,25 +0,0 @@ -from typing import Optional, List - -from pydantic import BaseModel - -from semantic_matcher import model - - -class MatchRequest(BaseModel): - """ - Request body for the :func:`service.SemanticMatchingService.get_match` - - :ivar semantic_id: The semantic ID that we want to find matches for - :ivar local_only: If `True`, only check at the local service and do not request other services - :ivar name: Optional name of the resolved semantic ID for NLP matching - :ivar definition: Optional definition of the resolved semantic ID for NLP matching - """ - semantic_id: str - score_limit: float - local_only: bool = True - name: Optional[str] = None - definition: Optional[str] = None - - -class MatchesList(BaseModel): - matches: List[model.SemanticMatch] diff --git a/semantic_matcher/visualization.py b/semantic_matcher/visualization.py new file mode 100644 index 0000000..2c0d862 --- /dev/null +++ b/semantic_matcher/visualization.py @@ -0,0 +1,33 @@ +import matplotlib.pyplot as plt # type: ignore +import networkx as nx + +from semantic_matcher.algorithm import SemanticMatchGraph + +# Todo: This is WIP + + +def save_graph_as_figure(g: SemanticMatchGraph, filename: str) -> None: + """ + A simple visualization of a `SemanticMatchGraph` saved as picture + """ + # Draw the graph + plt.figure() + pos = nx.spring_layout(g) # Positions for nodes + nx.draw(g, pos, with_labels=True, node_color='lightblue', edge_color='gray', node_size=3000, font_size=10) + + # Add edge labels + edge_labels = {(u, v): f"{d['weight']:.2f}" for u, v, d in g.edges(data=True)} + nx.draw_networkx_edge_labels(g, pos, edge_labels=edge_labels) + + plt.savefig(filename) + + +if __name__ == "__main__": + graph_complex = SemanticMatchGraph() + graph_complex.add_edge("A", "B", weight=0.9, source="dataset1") + graph_complex.add_edge("A", "C", weight=0.8, source="dataset2") + graph_complex.add_edge("B", "D", weight=0.7, source="dataset3") + graph_complex.add_edge("C", "D", weight=0.6, source="dataset4") + graph_complex.add_edge("D", "E", weight=0.5, source="dataset5") + + save_graph_as_figure(graph_complex, "temp.png") diff --git a/test/test_algorithm.py b/test/test_algorithm.py new file mode 100644 index 0000000..6dd1ef7 --- /dev/null +++ b/test/test_algorithm.py @@ -0,0 +1,357 @@ +import unittest +from typing import List +import os +import json + +from semantic_matcher import algorithm + + +class TestSemanticMatchGraph(unittest.TestCase): + TEST_FILE = "test_graph.json" + + def setUp(self): + """Set up a test graph before each test.""" + self.graph = algorithm.SemanticMatchGraph() + self.graph.add_semantic_match("A", "B", 0.8) + self.graph.add_semantic_match("B", "C", 0.6) + self.graph.add_semantic_match("C", "D", 0.9) + + def tearDown(self): + """Remove the test file after each test.""" + if os.path.exists(self.TEST_FILE): + os.remove(self.TEST_FILE) + + def test_get_all_matches_basic(self): + """Test that all direct semantic matches are returned correctly.""" + matches = self.graph.get_all_matches() + + expected_matches = [ + algorithm.SemanticMatch(base_semantic_id="A", match_semantic_id="B", score=0.8, path=[]), + algorithm.SemanticMatch(base_semantic_id="B", match_semantic_id="C", score=0.6, path=[]), + algorithm.SemanticMatch(base_semantic_id="C", match_semantic_id="D", score=0.9, path=[]), + ] + + self.assertEqual(len(matches), 3, "Incorrect number of matches retrieved.") + self.assertCountEqual(expected_matches, matches, "Matches do not match expected results.") + + def test_get_all_matches_empty_graph(self): + """Test that an empty graph returns an empty list.""" + empty_graph = algorithm.SemanticMatchGraph() + matches = empty_graph.get_all_matches() + self.assertEqual([], matches, "Empty graph should return an empty list.") + + def test_get_all_matches_duplicate_edges(self): + """Test handling of duplicate edges with different scores.""" + self.graph.add_semantic_match("A", "B", 0.9) # Overwriting edge + matches = self.graph.get_all_matches() + + expected_matches = [ + algorithm.SemanticMatch(base_semantic_id="A", match_semantic_id="B", score=0.9, path=[]), + algorithm.SemanticMatch(base_semantic_id="B", match_semantic_id="C", score=0.6, path=[]), + algorithm.SemanticMatch(base_semantic_id="C", match_semantic_id="D", score=0.9, path=[]), + ] + + self.assertEqual(len(matches), 3, "Duplicate edge handling failed.") + self.assertCountEqual(expected_matches, matches, "Matches do not match expected results.") + + def test_get_all_matches_varying_weights(self): + """Test that matches with different weights are retrieved correctly.""" + self.graph.add_semantic_match("D", "E", 0.3) + self.graph.add_semantic_match("E", "F", 1.0) + matches = self.graph.get_all_matches() + + expected_matches = [ + algorithm.SemanticMatch(base_semantic_id="A", match_semantic_id="B", score=0.8, path=[]), + algorithm.SemanticMatch(base_semantic_id="B", match_semantic_id="C", score=0.6, path=[]), + algorithm.SemanticMatch(base_semantic_id="C", match_semantic_id="D", score=0.9, path=[]), + algorithm.SemanticMatch(base_semantic_id="D", match_semantic_id="E", score=0.3, path=[]), + algorithm.SemanticMatch(base_semantic_id="E", match_semantic_id="F", score=1.0, path=[]), + ] + + self.assertEqual(len(matches), 5, "Incorrect number of matches retrieved.") + self.assertCountEqual(expected_matches, matches, "Matches do not match expected results.") + + def test_to_file(self): + """Test that the graph is correctly saved to a file.""" + self.graph.to_file(self.TEST_FILE) + + # Check if file exists + self.assertTrue(os.path.exists(self.TEST_FILE), "File was not created.") + + # Load file content and verify JSON structure + with open(self.TEST_FILE, "r") as file: + data = json.load(file) + + self.assertIsInstance(data, list, "File content should be a list of matches.") + self.assertEqual(len(data), 3, "Incorrect number of matches stored in file.") + + expected_data = [ + {"base_semantic_id": "A", "match_semantic_id": "B", "score": 0.8, "path": []}, + {"base_semantic_id": "B", "match_semantic_id": "C", "score": 0.6, "path": []}, + {"base_semantic_id": "C", "match_semantic_id": "D", "score": 0.9, "path": []}, + ] + + self.assertEqual(data, expected_data, "File content does not match expected data.") + + def test_from_file(self): + """Test that a graph can be correctly loaded from a file.""" + self.graph.to_file(self.TEST_FILE) + loaded_graph = algorithm.SemanticMatchGraph.from_file(self.TEST_FILE) + + # Check if the loaded graph has the same edges and weights + self.assertEqual(len(loaded_graph.edges()), 3, "Loaded graph has incorrect number of edges.") + + for u, v, data in self.graph.edges(data=True): + self.assertTrue(loaded_graph.has_edge(u, v), f"Edge {u} -> {v} is missing in loaded graph.") + self.assertEqual(loaded_graph[u][v]["weight"], data["weight"], f"Edge weight mismatch for {u} -> {v}") + + def test_empty_graph(self): + """Test saving and loading an empty graph.""" + empty_graph = algorithm.SemanticMatchGraph() + empty_graph.to_file(self.TEST_FILE) + loaded_graph = algorithm.SemanticMatchGraph.from_file(self.TEST_FILE) + + self.assertEqual(len(loaded_graph.edges()), 0, "Loaded graph should be empty.") + + +class TestSemanticMatch(unittest.TestCase): + def test_str_representation(self): + """Test that __str__ correctly formats the path and score.""" + match = algorithm.SemanticMatch( + base_semantic_id="A", + match_semantic_id="B", + score=0.8, + path=["A"] + ) + + expected_str = "A -> B = 0.8" + self.assertEqual(expected_str, str(match), "__str__ method output is incorrect") + + def test_str_representation_longer_path(self): + """Test __str__ output with a longer path.""" + match = algorithm.SemanticMatch( + base_semantic_id="A", + match_semantic_id="D", + score=0.6, + path=["A", "B", "C"] + ) + + expected_str = "A -> B -> C -> D = 0.6" + self.assertEqual(expected_str, str(match), "__str__ method output is incorrect for longer paths") + + def test_str_representation_no_path(self): + """Test __str__ output when there's no path (direct match).""" + match = algorithm.SemanticMatch( + base_semantic_id="X", + match_semantic_id="Y", + score=1.0, + path=[] + ) + + expected_str = "Y = 1.0" # No path, just the match + self.assertEqual(expected_str, str(match), "__str__ method output is incorrect for empty path") + + def test_hash_consistency(self): + """Test that identical SemanticMatch instances have the same hash.""" + match1 = algorithm.SemanticMatch( + base_semantic_id="A", + match_semantic_id="B", + score=0.8, + path=["A"] + ) + match2 = algorithm.SemanticMatch( + base_semantic_id="A", + match_semantic_id="B", + score=0.8, + path=["A"] + ) + + self.assertEqual(hash(match1), hash(match2), "Hashes of identical objects should be the same") + + def test_hash_uniqueness(self): + """Test that different SemanticMatch instances have different hashes.""" + match1 = algorithm.SemanticMatch( + base_semantic_id="A", + match_semantic_id="B", + score=0.8, + path=["A"] + ) + match2 = algorithm.SemanticMatch( + base_semantic_id="A", + match_semantic_id="C", + score=0.8, + path=["A"] + ) + + self.assertNotEqual(hash(match1), hash(match2), "Hashes of different objects should be different") + + def test_hash_set_usage(self): + """Test that SemanticMatch instances can be used in a set (ensuring uniqueness).""" + match1 = algorithm.SemanticMatch( + base_semantic_id="A", + match_semantic_id="B", + score=0.8, + path=["A"] + ) + match2 = algorithm.SemanticMatch( + base_semantic_id="A", + match_semantic_id="B", + score=0.8, + path=["A"] + ) + match3 = algorithm.SemanticMatch( + base_semantic_id="A", + match_semantic_id="C", + score=0.9, + path=["A"] + ) + + match_set = {match1, match2, match3} + self.assertEqual(len(match_set), 2, "Set should contain unique elements based on hash") + + def test_hash_dict_usage(self): + """Test that SemanticMatch instances can be used as dictionary keys.""" + match1 = algorithm.SemanticMatch( + base_semantic_id="A", + match_semantic_id="B", + score=0.8, + path=["A"] + ) + match2 = algorithm.SemanticMatch( + base_semantic_id="A", + match_semantic_id="B", + score=0.8, + path=["A"] + ) + + match_dict = {match1: "First Entry", match2: "Second Entry"} + + self.assertEqual(len(match_dict), 1, "Identical objects should overwrite each other in a dictionary") + self.assertEqual(match_dict[match1], "Second Entry", "Latest value should be stored in dictionary") + + +class TestFindSemanticMatches(unittest.TestCase): + def setUp(self): + """Set up test graphs for various cases.""" + self.graph = algorithm.SemanticMatchGraph() + + # Populate the graph + self.graph.add_edge("A", "B", weight=0.8) + self.graph.add_edge("B", "C", weight=0.7) + self.graph.add_edge("C", "D", weight=0.9) + self.graph.add_edge("B", "D", weight=0.6) + self.graph.add_edge("D", "E", weight=0.5) + + def test_basic_functionality(self): + """Test basic propagation of semantic matches.""" + matches: List[algorithm.SemanticMatch] = algorithm.find_semantic_matches(self.graph, "A", min_score=0) + str_matches: List[str] = [str(i) for i in matches] + expected = [ + f"A -> B = 0.8", + f"A -> B -> C = {0.8*0.7}", # 0.56 + f"A -> B -> C -> D = {0.8*0.7*0.9}", # 0.504 + f"A -> B -> D = {0.8*0.6}", # 0.48 + f"A -> B -> C -> D -> E = {0.8 * 0.7 * 0.9 * 0.5}", # 0.252 + f"A -> B -> D -> E = {0.8 * 0.6 * 0.5}", # 0.24 + ] + self.assertEqual(expected, str_matches) + + def test_loop_prevention(self): + """Ensure that loops do not cause infinite recursion.""" + self.graph.add_edge("D", "A", weight=0.4) # Creates a loop + + matches: List[algorithm.SemanticMatch] = algorithm.find_semantic_matches( + self.graph, + semantic_id="A", + min_score=0.1 + ) + + # We would expect the algorithm to have only traversed the graph exactly once! + self.assertEqual(6, len(matches)) + # For simplifying the analysis, we remove everything but the `semantic_id`s of the found matches + matched_semantic_ids: List[str] = [i.match_semantic_id for i in matches] + self.assertIn("D", matched_semantic_ids) + self.assertNotIn("A", matched_semantic_ids) # "A" should not be revisited + + def test_minimum_threshold(self): + """Ensure that results below the minimum score are excluded.""" + matches = algorithm.find_semantic_matches(self.graph, "A", min_score=0.6) + str_matches: List[str] = [str(i) for i in matches] + expected: List[str] = [ + "A -> B = 0.8" + ] + self.assertEqual(expected, str_matches) + + def test_not_in_graph(self): + """Test that matches that are not in the graph are not found""" + matches = algorithm.find_semantic_matches(self.graph, semantic_id="X", min_score=0) + self.assertEqual(0, len(matches)) + + def test_disconnected_graph(self): + """Test behavior when the graph has disconnected components.""" + graph_disconnected = algorithm.SemanticMatchGraph() + graph_disconnected.add_edge("A", "B", weight=0.8) + graph_disconnected.add_edge("X", "Y", weight=0.9) + + matches = algorithm.find_semantic_matches(graph_disconnected, "A", min_score=0.1) + str_matches: List[str] = [str(i) for i in matches] + expected: List[str] = [ + "A -> B = 0.8" + ] + + self.assertEqual(expected, str_matches) + + def test_empty_graph(self): + """Test behavior when the graph is empty.""" + graph_empty = algorithm.SemanticMatchGraph() + matches = algorithm.find_semantic_matches(graph_empty, "A", min_score=0.1) + + self.assertEqual(0, len(matches)) + + def test_single_node_no_edges(self): + """Test behavior when the graph has only one node and no edges.""" + graph_single = algorithm.SemanticMatchGraph() + graph_single.add_node("A") + + matches = algorithm.find_semantic_matches(graph_single, "A", min_score=0.1) + + self.assertEqual(0, len(matches)) + + def test_edge_case_weights(self): + """Test behavior with edge weights close to zero and one.""" + graph_edge_cases = algorithm.SemanticMatchGraph() + graph_edge_cases.add_edge("A", "B", weight=1.0, source="perfect match") + graph_edge_cases.add_edge("B", "C", weight=0.01, source="weak match") + + matches = algorithm.find_semantic_matches(graph_edge_cases, "A", min_score=0.01) + matches_str: List[str] = [str(i) for i in matches] + expected: List[str] = [ + f"A -> B = {1.0}", + f"A -> B -> C = {1.0*0.01}", + ] + self.assertEqual(expected, matches_str) + + def test_complex_graph(self): + """Test behavior in a complex graph with multiple branches.""" + graph_complex = algorithm.SemanticMatchGraph() + graph_complex.add_edge("A", "B", weight=0.9, source="dataset1") + graph_complex.add_edge("A", "C", weight=0.8, source="dataset2") + graph_complex.add_edge("B", "D", weight=0.7, source="dataset3") + graph_complex.add_edge("C", "D", weight=0.6, source="dataset4") + graph_complex.add_edge("D", "E", weight=0.5, source="dataset5") + + matches = algorithm.find_semantic_matches(graph_complex, "A", min_score=0.3) + matches_str: List[str] = [str(i) for i in matches] + expected: List[str] = [ + f"A -> B = {0.9}", # 0.9 + f"A -> C = {0.8}", # 0.8 + f"A -> B -> D = {0.9*0.7}", # 0.63 + f"A -> C -> D = {0.8*0.6}", # 0.48 + f"A -> B -> D -> E = {0.9*0.7*0.5}", # 0.315 + # f"A -> B -> C -> E = {0.8*0.6*0.5}", # 0.24 => out + ] + self.assertEqual(expected, matches_str) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/test_resources/example_graph.json b/test/test_resources/example_graph.json new file mode 100644 index 0000000..395217b --- /dev/null +++ b/test/test_resources/example_graph.json @@ -0,0 +1,20 @@ +[ + { + "base_semantic_id": "https://s-heppner.com/semantic_id/one", + "match_semantic_id": "https://s-heppner.com/semantic_id/uno", + "score": 0.9, + "path": [] + }, + { + "base_semantic_id": "https://s-heppner.com/semantic_id/one", + "match_semantic_id": "https://remote.com/semantic_id/deux", + "score": 0.7, + "path": [] + }, + { + "base_semantic_id": "https://s-heppner.com/semantic_id/uno", + "match_semantic_id": "https://s-heppner.com/semantic_id/trois", + "score": 0.6, + "path": [] + } +] \ No newline at end of file diff --git a/test/test_semantic_matcher.py b/test/test_semantic_matcher.py index f7f8719..2aa3ae6 100644 --- a/test/test_semantic_matcher.py +++ b/test/test_semantic_matcher.py @@ -1,5 +1,4 @@ import os -import configparser import multiprocessing import requests @@ -8,8 +7,7 @@ from fastapi import FastAPI import uvicorn -from semantic_matcher import model -from semantic_matcher.model import SemanticMatch +from semantic_matcher import algorithm from semantic_matcher.service import SemanticMatchingService from contextlib import contextmanager @@ -20,28 +18,21 @@ def run_server(): - # Load test configuration - config = configparser.ConfigParser() - config.read([ - os.path.abspath(os.path.join(os.path.dirname(__file__), "../test_resources/config.ini")), - ]) - # Read in equivalence table - EQUIVALENCES = model.EquivalenceTable.from_file( + match_graph = algorithm.SemanticMatchGraph.from_file( filename=os.path.abspath(os.path.join( os.path.dirname(__file__), - "..", - config["SERVICE"]["equivalence_table_file"] + "test_resources/example_graph.json" )) ) # Initialise SemanticMatchingService semantic_matching_service = SemanticMatchingService( - endpoint=config["SERVICE"]["endpoint"], - equivalences=EQUIVALENCES + endpoint="localhost", + graph=match_graph ) - # Mock resolver + # Mock semantic_id Resolver def mock_get_matcher(self, semantic_id): return "http://remote-service:8000" @@ -57,12 +48,11 @@ def __init__(self, content: str, status_code: int = 200): def mock_requests_get(url, json): if url == "http://remote-service:8000/get_matches": - match_one = SemanticMatch( + match_one = algorithm.SemanticMatch( base_semantic_id="s-heppner.com/semanticID/three", match_semantic_id="remote-service.com/semanticID/tres", score=1.0, - meta_information={"matchSource": "Defined by Moritz Sommer", - "path": ["remote-service.com/semanticID/trois"]} + path=[] ) matches_data = { "matches": [match_one.model_dump()] @@ -77,7 +67,7 @@ def mock_requests_get(url, json): # Run server app = FastAPI() app.include_router(semantic_matching_service.router) - uvicorn.run(app, host=config["SERVICE"]["ENDPOINT"], port=int(config["SERVICE"]["PORT"]), log_level="error") + uvicorn.run(app, host="localhost", port=8000, log_level="error") @contextmanager @@ -96,137 +86,85 @@ def run_server_context(): class TestSemanticMatchingService(unittest.TestCase): - def test_get_all_matches(self): with run_server_context(): response = requests.get("http://localhost:8000/all_matches") - expected_matches = { - 's-heppner.com/semanticID/one': [ - { - 'base_semantic_id': 's-heppner.com/semanticID/one', - 'match_semantic_id': 's-heppner.com/semanticID/1', - 'score': 1.0, - 'meta_information': {'matchSource': 'Defined by Sebastian Heppner'} - }, - { - 'base_semantic_id': 's-heppner.com/semanticID/one', - 'match_semantic_id': 's-heppner.com/semanticID/two', - 'score': 0.8, - 'meta_information': {'matchSource': 'Defined by Sebastian Heppner'} - } - ], - 's-heppner.com/semanticID/two': [ - { - 'base_semantic_id': 's-heppner.com/semanticID/two', - 'match_semantic_id': 's-heppner.com/semanticID/2', - 'score': 1.0, - 'meta_information': {'matchSource': 'Defined by Sebastian Heppner'} - } - ], - 's-heppner.com/semanticID/three': [ - { - 'base_semantic_id': 's-heppner.com/semanticID/three', - 'match_semantic_id': 'remote-service.com/semanticID/trois', - 'score': 1.0, - 'meta_information': {'matchSource': 'Defined by Moritz Sommer'} - } - ] - } + expected_matches = [ + {'base_semantic_id': 'https://s-heppner.com/semantic_id/one', + 'match_semantic_id': 'https://s-heppner.com/semantic_id/uno', + 'path': [], + 'score': 0.9}, + {'base_semantic_id': 'https://s-heppner.com/semantic_id/one', + 'match_semantic_id': 'https://remote.com/semantic_id/deux', + 'path': [], + 'score': 0.7}, + {'base_semantic_id': 'https://s-heppner.com/semantic_id/uno', + 'match_semantic_id': 'https://s-heppner.com/semantic_id/trois', + 'path': [], + 'score': 0.6} + ] actual_matches = response.json() self.assertEqual(expected_matches, actual_matches) def test_post_matches(self): with run_server_context(): new_match = { - "base_semantic_id": "s-heppner.com/semanticID/new", - "match_semantic_id": "s-heppner.com/semanticID/3", + "base_semantic_id": "https://s-heppner.com/semantic_id/new", + "match_semantic_id": "https://s-heppner.com/semantic_id/nouveaux", "score": 0.95, - "meta_information": {"matchSource": "Defined by UnitTest"} + "path": [], } - matches_list = { - "matches": [new_match] - } - requests.post( + response = requests.post( "http://localhost:8000/post_matches", - json=matches_list - ) - response = requests.get("http://localhost:8000/all_matches") - actual_matches = response.json() - self.assertIn("s-heppner.com/semanticID/new", actual_matches) - self.assertEqual( - actual_matches["s-heppner.com/semanticID/new"][0]["match_semantic_id"], - "s-heppner.com/semanticID/3" - ) - - self.assertEqual( - actual_matches["s-heppner.com/semanticID/new"][0]["score"], - 0.95 - ) - - self.assertEqual( - actual_matches["s-heppner.com/semanticID/new"][0]["meta_information"]["matchSource"], - "Defined by UnitTest" + json=[new_match] ) + self.assertEqual(200, response.status_code) + # Todo: Make sure this does not become a problem in other tests def test_get_matches_local_only(self): with run_server_context(): match_request = { - "semantic_id": "s-heppner.com/semanticID/one", + "semantic_id": "https://s-heppner.com/semantic_id/one", "score_limit": 0.5, "local_only": True } response = requests.get("http://localhost:8000/get_matches", json=match_request) - expected_matches = { - "matches": [ - { - "base_semantic_id": "s-heppner.com/semanticID/one", - "match_semantic_id": "s-heppner.com/semanticID/1", - "score": 1.0, - "meta_information": {"matchSource": "Defined by Sebastian Heppner"} - }, - { - "base_semantic_id": "s-heppner.com/semanticID/one", - "match_semantic_id": "s-heppner.com/semanticID/two", - "score": 0.8, - "meta_information": {"matchSource": "Defined by Sebastian Heppner"} - }, - { - "base_semantic_id": "s-heppner.com/semanticID/one", - "match_semantic_id": "s-heppner.com/semanticID/2", - "score": 0.8, - "meta_information": {"matchSource": "Defined by Sebastian Heppner", - "path": ["s-heppner.com/semanticID/two"]} - } - ] - } + expected_matches = [ + {'base_semantic_id': 'https://s-heppner.com/semantic_id/one', + 'match_semantic_id': 'https://s-heppner.com/semantic_id/uno', + 'path': ['https://s-heppner.com/semantic_id/one'], + 'score': 0.9}, + {'base_semantic_id': 'https://s-heppner.com/semantic_id/one', + 'match_semantic_id': 'https://remote.com/semantic_id/deux', + 'path': ['https://s-heppner.com/semantic_id/one'], + 'score': 0.7}, + {'base_semantic_id': 'https://s-heppner.com/semantic_id/one', + 'match_semantic_id': 'https://s-heppner.com/semantic_id/trois', + 'path': ['https://s-heppner.com/semantic_id/one', + 'https://s-heppner.com/semantic_id/uno'], + 'score': 0.54} + ] actual_matches = response.json() self.assertEqual(expected_matches, actual_matches) def test_get_matches_local_and_remote(self): with run_server_context(): match_request = { - "semantic_id": "s-heppner.com/semanticID/three", + "semantic_id": "https://s-heppner.com/semantic_id/one", "score_limit": 0.7, "local_only": False } response = requests.get("http://localhost:8000/get_matches", json=match_request) - expected_matches = { - "matches": [ - { - "base_semantic_id": "s-heppner.com/semanticID/three", - "match_semantic_id": "remote-service.com/semanticID/trois", - "score": 1.0, - "meta_information": {"matchSource": "Defined by Moritz Sommer"} - }, - { - "base_semantic_id": "s-heppner.com/semanticID/three", - "match_semantic_id": "remote-service.com/semanticID/tres", - "score": 1.0, - "meta_information": {"matchSource": "Defined by Moritz Sommer", - "path": ["remote-service.com/semanticID/trois"]} - }, - ] - } + expected_matches = [ + {'base_semantic_id': 'https://s-heppner.com/semantic_id/one', + 'match_semantic_id': 'https://s-heppner.com/semantic_id/uno', + 'path': ['https://s-heppner.com/semantic_id/one'], + 'score': 0.9}, + {'base_semantic_id': 'https://s-heppner.com/semantic_id/one', + 'match_semantic_id': 'https://remote.com/semantic_id/deux', + 'path': ['https://s-heppner.com/semantic_id/one'], + 'score': 0.7} + ] actual_matches = response.json() self.assertEqual(expected_matches, actual_matches) @@ -238,74 +176,8 @@ def test_get_matches_no_matches(self): "local_only": True } response = requests.get("http://localhost:8000/get_matches", json=match_request) - expected_matches = {"matches": []} actual_matches = response.json() - self.assertEqual(expected_matches, actual_matches) - - def test_get_matches_with_low_score_limit(self): - with run_server_context(): - match_request = { - "semantic_id": "s-heppner.com/semanticID/one", - "score_limit": 0.9, - "local_only": True - } - response = requests.get("http://localhost:8000/get_matches", json=match_request) - expected_matches = { - "matches": [ - { - "base_semantic_id": "s-heppner.com/semanticID/one", - "match_semantic_id": "s-heppner.com/semanticID/1", - "score": 1.0, - "meta_information": {"matchSource": "Defined by Sebastian Heppner"} - } - ] - } - actual_matches = response.json() - self.assertEqual(expected_matches, actual_matches) - - def test_get_matches_with_nlp_parameters(self): - with run_server_context(): - match_request = { - "semantic_id": "s-heppner.com/semanticID/one", - "score_limit": 0.5, - "local_only": True, - "name": "Example Name", - "definition": "Example Definition" - } - response = requests.get("http://localhost:8000/get_matches", json=match_request) - expected_matches = { - "matches": [ - { - "base_semantic_id": "s-heppner.com/semanticID/one", - "match_semantic_id": "s-heppner.com/semanticID/1", - "score": 1.0, - "meta_information": {"matchSource": "Defined by Sebastian Heppner"} - }, - { - "base_semantic_id": "s-heppner.com/semanticID/one", - "match_semantic_id": "s-heppner.com/semanticID/two", - "score": 0.8, - "meta_information": {"matchSource": "Defined by Sebastian Heppner"} - }, - { - "base_semantic_id": "s-heppner.com/semanticID/one", - "match_semantic_id": "s-heppner.com/semanticID/2", - "score": 0.8, - "meta_information": {"matchSource": "Defined by Sebastian Heppner", - "path": ["s-heppner.com/semanticID/two"]} - } - ] - } - actual_matches = response.json() - self.assertEqual(expected_matches, actual_matches) - - def test_remove_all_matches(self): - with run_server_context(): - requests.post("http://localhost:8000/clear") - response = requests.get("http://localhost:8000/all_matches") - expected_matches = {} - actual_matches = response.json() - self.assertEqual(expected_matches, actual_matches) + self.assertEqual([], actual_matches) if __name__ == '__main__': diff --git a/test_resources/config.ini b/test_resources/config.ini deleted file mode 100644 index ae230aa..0000000 --- a/test_resources/config.ini +++ /dev/null @@ -1,8 +0,0 @@ -[SERVICE] -endpoint=127.0.0.1 -port=8000 -equivalence_table_file=./test_resources/equivalence_table.json - -[RESOLVER] -endpoint=http://semantic_id_resolver -port=8125 diff --git a/test_resources/equivalence_table.json b/test_resources/equivalence_table.json deleted file mode 100644 index 34c8ac6..0000000 --- a/test_resources/equivalence_table.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "matches": { - "s-heppner.com/semanticID/one": [ - { - "base_semantic_id": "s-heppner.com/semanticID/one", - "match_semantic_id": "s-heppner.com/semanticID/1", - "score": 1.0, - "meta_information": { - "matchSource": "Defined by Sebastian Heppner" - } - }, - { - "base_semantic_id": "s-heppner.com/semanticID/one", - "match_semantic_id": "s-heppner.com/semanticID/two", - "score": 0.8, - "meta_information": { - "matchSource": "Defined by Sebastian Heppner" - } - } - ], - "s-heppner.com/semanticID/two": [ - { - "base_semantic_id": "s-heppner.com/semanticID/two", - "match_semantic_id": "s-heppner.com/semanticID/2", - "score": 1.0, - "meta_information": { - "matchSource": "Defined by Sebastian Heppner" - } - } - ], - "s-heppner.com/semanticID/three": [ - { - "base_semantic_id": "s-heppner.com/semanticID/three", - "match_semantic_id": "remote-service.com/semanticID/trois", - "score": 1.0, - "meta_information": { - "matchSource": "Defined by Moritz Sommer" - } - } - ] - } -} \ No newline at end of file