From 5a26b0c0be8f3b40c4365c13c3824d39e7d3325d Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 21 Jan 2025 18:34:10 -0500 Subject: [PATCH 1/2] Removed boosting. --- api/server.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/api/server.py b/api/server.py index 13f8f463..9ca93b07 100755 --- a/api/server.py +++ b/api/server.py @@ -429,11 +429,25 @@ async def lookup(string: str, # https://solr.apache.org/guide/solr/latest/query-guide/dismax-query-parser.html#pf-phrase-fields-parameter "pf": "preferred_name_exactish^10 names_exactish^5 preferred_name names", # Boosts - "bq": [], + "bq": [ + # We don't like results that only have a single clique, so we slightly un-boost these results. + # Unfortunately this is quite slow and doesn't seem to be very useful. + # "clique_identifier_count:[0 TO 1]^0.8" + ], "boost": [ # The boost is multiplied with score -- calculating the log() reduces how quickly this increases - # the score for increasing clique identifier counts. - "log(clique_identifier_count)" + # the score for increasing clique identifier counts. However, this can lead to confusing results, + # where we pick a non-exact match instead of an exact match (see + # https://github.com/TranslatorSRI/NameResolution/issues/174 and + # https://github.com/TranslatorSRI/NameResolution/issues/161). + # + # The downside to turning this off is that: + # 1. We get a ton of UMLS results coming back in (which can be filtered out if needed, but that's + # not ideal). + # 2. We get back a random gene instead of the human gene (although you could filter that with + # the taxon name, but also not ideal). + # + # "log(clique_identifier_count)" ], }, }, From b87f95aa6540836147509e3e687c21a38a287b35 Mon Sep 17 00:00:00 2001 From: Gaurav Vaidya Date: Tue, 21 Jan 2025 18:34:39 -0500 Subject: [PATCH 2/2] Added on:push trigger for testing. --- .github/workflows/release-name-resolution.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/release-name-resolution.yml b/.github/workflows/release-name-resolution.yml index e5773778..1b349b27 100644 --- a/.github/workflows/release-name-resolution.yml +++ b/.github/workflows/release-name-resolution.yml @@ -1,6 +1,7 @@ name: 'Release a new version of NameResolution to Github Packages' on: + push: release: types: [published]