Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import au.org.aodn.ogcapi.server.core.model.enumeration.CQLFields;
import au.org.aodn.ogcapi.server.core.model.enumeration.CQLFieldsInterface;
import au.org.aodn.ogcapi.server.core.model.enumeration.StacBasicField;
import au.org.aodn.ogcapi.server.core.model.enumeration.StacSummeries;
import co.elastic.clients.elasticsearch.ElasticsearchClient;
import co.elastic.clients.elasticsearch._types.*;
import co.elastic.clients.elasticsearch._types.aggregations.*;
Expand Down Expand Up @@ -157,31 +158,104 @@ protected SearchResult<StacCollectionModel> searchCollectionBy(final List<Query>
final List<SortOptions> sortOptions,
final Double score,
final Long maxSize) {

Supplier<SearchRequest.Builder> builderSupplier = () -> {
SearchRequest.Builder builder = new SearchRequest.Builder();
builder.index(indexName)
// If user query request a page that is smaller then the internal default, then
// we use the smaller one. The internal page size is used to get the result by
// batch, lets say page is 20 and internal is 10, then we do it in two batch.
// But if we request 5 only, then there is no point to load 10
.size(maxSize != null && maxSize < pageSize ? maxSize.intValue() : pageSize)
.query(q -> q.bool(createBoolQueryForProperties(queries, should, filters)));
.size(maxSize != null && maxSize < pageSize ? maxSize.intValue() : pageSize);

// use script score if search with text, in such case, the final score depends on both relevance and metadata quality
// put query in script block
// determine to use script score block or not
// only use script_score if sortby contains "score" and should field is not empty
boolean useScriptScore = (sortOptions != null && !sortOptions.isEmpty()) && (should != null && !should.isEmpty());

if (useScriptScore) {
String summaryScore = StacSummeries.Score.searchField;
builder.query(q -> q.scriptScore(ss -> ss
// to get the original _score from ELasticsearch
.query(bq -> bq.bool(createBoolQueryForProperties(queries, should, filters)))
.script(s -> s.inline(i -> i
.lang("painless")
.source(
// Step 1: Retrieve internal quality score from summaries.score field
// Default to 0 if field doesn't exist or is empty
"double internalScore = doc.containsKey('"+summaryScore+"') && " +
"!doc['"+summaryScore+"'].empty ? doc['"+summaryScore+"'].value : 0.0; " +

// Step 2: Normalize internal score to 0-1 range
// Assuming summaries.score is in range 0-106
"double normalizedScore = internalScore / 106.0; " +

// Step 3: Ensure minimum multiplier to avoid zero scores
"double multiplier = Math.max(normalizedScore, 0.01); " +

// Step 4: Calculate final score
// Final score = Elasticsearch relevance * normalized quality
"return _score * multiplier;"
)
)
))
);
}
// use original query logic
else {
builder.query(q -> q.bool(createBoolQueryForProperties(queries, should, filters)));
}

if(searchAfter != null) {
builder.searchAfter(searchAfter);
}

if(sortOptions != null) {
builder.sort(sortOptions);
}
// to use sort by uuid as a tiebreaker
boolean hasUuidSort = false;

builder.sort(so -> so
// We need a unique key for the search, cannot use _id in v8 anymore, so we need
// to sort using the keyword, this field is not for search and therefore not in enum
.field(FieldSort.of(f -> f
.field(StacBasicField.UUID.sortField)
.order(SortOrder.Asc))));
// apply sort options
if (useScriptScore) {
// add sort options
if (sortOptions != null && !sortOptions.isEmpty()) {
for (SortOptions sortOption : sortOptions) {
builder.sort(sortOption);

// check if it has sort by id option
if (sortOption.isField() &&
sortOption.field().field().equals(StacBasicField.UUID.sortField)) {
hasUuidSort = true;
}
}
}
}
else {
// when not using script_score, apply all sort options
if (sortOptions != null && !sortOptions.isEmpty()) {
for (SortOptions sortOption : sortOptions) {
builder.sort(sortOption);

// check if it has sort by id option
if (sortOption.isField() &&
sortOption.field().field().equals(StacBasicField.UUID.sortField)) {
hasUuidSort = true;
}
}
}
else if (should != null && !should.isEmpty()) {
// If no sortOptions provided but there are text queries,
// default to sorting by _score
builder.sort(so -> so.score(sc -> sc.order(SortOrder.Desc)));
}
}
// add sort by id as the final tiebreaker if it was applied
if (!hasUuidSort) {
builder.sort(so -> so
// We need a unique key for the search, cannot use _id in v8 anymore, so we need
// to sort using the keyword, this field is not for search and therefore not in enum
.field(FieldSort.of(f -> f
.field(StacBasicField.UUID.sortField)
.order(SortOrder.Asc))));
}

if(score != null) {
// By default we do not setup any min_score, the api caller should pass it in so
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ public void verifyCQLPropertyScore() throws IOException {

// Increase score will drop one record
collections = testRestTemplate.getForEntity(getBasePath() + "/collections?q='dataset includes'&filter=score>=3", Collections.class);
assertEquals(3, Objects.requireNonNull(collections.getBody()).getCollections().size(), "hit 3, with score 3");
assertEquals(2, Objects.requireNonNull(collections.getBody()).getCollections().size(), "hit 3, with score 3");
assertEquals("bf287dfe-9ce4-4969-9c59-51c39ea4d011", Objects.requireNonNull(collections.getBody()).getCollections().get(0).getId(), "bf287dfe-9ce4-4969-9c59-51c39ea4d011");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,17 @@ public void verifyCorrectInternalPagingLargeData() throws IOException {
// Given 6 records and we set page to 4, that means each query elastic return 4 record only
// and the logic to load the reset can kick in.
super.insertJsonToElasticRecordIndex(
// set test summaries.score 90
"5c418118-2581-4936-b6fd-d6bedfe74f62.json",
// set test summaries.score 106
"19da2ce7-138f-4427-89de-a50c724f5f54.json",
// set test summaries.score 70
"516811d7-cd1e-207a-e0440003ba8c79dd.json",
// set test summaries.score 60
"7709f541-fc0c-4318-b5b9-9053aa474e0e.json",
// set test summaries.score 50
"bc55eff4-7596-3565-e044-00144fdd4fa6.json",
// set test summaries.score 100
"bf287dfe-9ce4-4969-9c59-51c39ea4d011.json");

// Call rest api directly and get query result
Expand Down Expand Up @@ -97,6 +103,7 @@ public void verifyCorrectInternalPagingLargeData() throws IOException {
}
/**
* with page_size set, the max number of record return will equals page_size
* With default search, the sort should follow uuid order
*/
@Test
public void verifyCorrectPageSizeDataReturn() throws IOException {
Expand All @@ -105,11 +112,17 @@ public void verifyCorrectPageSizeDataReturn() throws IOException {
// Given 6 records and we set page to 4, that means each query elastic return 4 record only
// and the logic to load the reset can kick in.
super.insertJsonToElasticRecordIndex(
// set test summaries.score 90
"5c418118-2581-4936-b6fd-d6bedfe74f62.json",
// set test summaries.score 106
"19da2ce7-138f-4427-89de-a50c724f5f54.json",
// set test summaries.score 70
"516811d7-cd1e-207a-e0440003ba8c79dd.json",
// set test summaries.score 60
"7709f541-fc0c-4318-b5b9-9053aa474e0e.json",
// set test summaries.score 50
"bc55eff4-7596-3565-e044-00144fdd4fa6.json",
// set test summaries.score 100
"bf287dfe-9ce4-4969-9c59-51c39ea4d011.json");

// Call rest api directly and get query result
Expand All @@ -132,12 +145,12 @@ public void verifyCorrectPageSizeDataReturn() throws IOException {
assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields");
assertEquals("1.0", collections.getBody().getSearchAfter().get(0), "Search after 1 value");
assertEquals(
"100",
"90",
collections.getBody().getSearchAfter().get(1),
"search_after 2 arg"
);
assertEquals(
"str:bf287dfe-9ce4-4969-9c59-51c39ea4d011",
"str:5c418118-2581-4936-b6fd-d6bedfe74f62",
collections.getBody().getSearchAfter().get(2),
"search_after 3 arg"
);
Expand Down Expand Up @@ -185,6 +198,11 @@ public void verifyCorrectPageSizeDataReturn() throws IOException {
* Extreme case, page size set to 1 and query text "dataset" and page one by one. Only part of the json
* will be return, the sort value should give you the next item and you will be able to go to next one.
* The first sort value is the relevant and because of query text the value will be something greater than 1.0
* After weighted sorting, the actual order is (for the first 4 records):
* Document 0: UUID=bf287dfe-9ce4-4969-9c59-51c39ea4d011
* Document 1: UUID=19da2ce7-138f-4427-89de-a50c724f5f54
* Document 2: UUID=bc55eff4-7596-3565-e044-00144fdd4fa6
* Document 3: UUID=7709f541-fc0c-4318-b5b9-9053aa474e0e
*/
@Test
public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException {
Expand All @@ -193,11 +211,17 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException {
// Given 6 records and we set page to 4, that means each query elastic return 4 record only
// and the logic to load the reset can kick in.
super.insertJsonToElasticRecordIndex(
// set test summaries.score 90
"5c418118-2581-4936-b6fd-d6bedfe74f62.json",
// set test summaries.score 106
"19da2ce7-138f-4427-89de-a50c724f5f54.json",
// set test summaries.score 70
"516811d7-cd1e-207a-e0440003ba8c79dd.json",
// set test summaries.score 60
"7709f541-fc0c-4318-b5b9-9053aa474e0e.json",
// set test summaries.score 50
"bc55eff4-7596-3565-e044-00144fdd4fa6.json",
// set test summaries.score 100
"bf287dfe-9ce4-4969-9c59-51c39ea4d011.json");

// Call rest api directly and get query result with search on "dataset"
Expand All @@ -220,7 +244,7 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException {
// The search after give you the value to go to next batch
assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields");
assertEquals(
"str:bc55eff4-7596-3565-e044-00144fdd4fa6",
"str:bf287dfe-9ce4-4969-9c59-51c39ea4d011",
collections.getBody().getSearchAfter().get(2),
"search_after 2 arg"
);
Expand All @@ -232,7 +256,7 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException {
String.format("'%s||%s||%s'",
collections.getBody().getSearchAfter().get(0),
collections.getBody().getSearchAfter().get(1),
"bc55eff4-7596-3565-e044-00144fdd4fa6"),
"bf287dfe-9ce4-4969-9c59-51c39ea4d011"),
HttpMethod.GET,
null,
new ParameterizedTypeReference<>() {
Expand All @@ -249,7 +273,7 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException {
// The search after give you the value to go to next batch
assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields");
assertEquals(
"str:7709f541-fc0c-4318-b5b9-9053aa474e0e",
"str:19da2ce7-138f-4427-89de-a50c724f5f54",
collections.getBody().getSearchAfter().get(2),
"search_after 3 arg"
);
Expand Down Expand Up @@ -278,14 +302,19 @@ public void verifyCorrectPageSizeDataReturnWithQuery() throws IOException {
// The search after give you the value to go to next batch
assertEquals(3, collections.getBody().getSearchAfter().size(), "search_after three fields");
assertEquals(
"str:19da2ce7-138f-4427-89de-a50c724f5f54",
"str:5c418118-2581-4936-b6fd-d6bedfe74f62",
collections.getBody().getSearchAfter().get(2),
"search_after 3 value"
);
}
/**
* Similar to verifyCorrectPageSizeDataReturnWithQuery and add score in the query,
* this is used to verify a bug fix where page_size and score crash the query
* After weighted sorting, the actual order is (for the first 4 records):
* Document 0: UUID=bf287dfe-9ce4-4969-9c59-51c39ea4d011
* Document 1: UUID=19da2ce7-138f-4427-89de-a50c724f5f54
* Document 2: UUID=bc55eff4-7596-3565-e044-00144fdd4fa6
* Document 3: UUID=7709f541-fc0c-4318-b5b9-9053aa474e0e
*/
@Test
public void verifyCorrectPageSizeAndScoreWithQuery() throws IOException {
Expand All @@ -296,11 +325,17 @@ public void verifyCorrectPageSizeAndScoreWithQuery() throws IOException {
// Given 6 records and we set page to 4, that means each query elastic return 4 record only
// and the logic to load the reset can kick in.
super.insertJsonToElasticRecordIndex(
// set test summaries.score 90
"5c418118-2581-4936-b6fd-d6bedfe74f62.json",
// set test summaries.score 106
"19da2ce7-138f-4427-89de-a50c724f5f54.json",
// set test summaries.score 70
"516811d7-cd1e-207a-e0440003ba8c79dd.json",
// set test summaries.score 60
"7709f541-fc0c-4318-b5b9-9053aa474e0e.json",
// set test summaries.score 50
"bc55eff4-7596-3565-e044-00144fdd4fa6.json",
// set test summaries.score 100
"bf287dfe-9ce4-4969-9c59-51c39ea4d011.json");

// Call rest api directly and get query result with search on "dataset"
Expand Down Expand Up @@ -329,12 +364,12 @@ public void verifyCorrectPageSizeAndScoreWithQuery() throws IOException {
logger.debug("verifyCorrectPageSizeAndScoreWithQuery - search after {}", collections.getBody().getSearchAfter());

assertEquals(
"80",
"100",
collections.getBody().getSearchAfter().get(1),
"search_after 2 value"
);
assertEquals(
"str:bc55eff4-7596-3565-e044-00144fdd4fa6",
"str:bf287dfe-9ce4-4969-9c59-51c39ea4d011",
collections.getBody().getSearchAfter().get(2),
"search_after 3 value"
);
Expand All @@ -346,7 +381,7 @@ public void verifyCorrectPageSizeAndScoreWithQuery() throws IOException {
String.format("'%s|| %s || %s'",
collections.getBody().getSearchAfter().get(0),
collections.getBody().getSearchAfter().get(1),
"bc55eff4-7596-3565-e044-00144fdd4fa6"),
"bf287dfe-9ce4-4969-9c59-51c39ea4d011"),
HttpMethod.GET,
null,
new ParameterizedTypeReference<>() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
]
},
"summaries": {
"score": 100,
"score": 106,
"status": "completed",
"credits": [
"Australia’s Integrated Marine Observing System (IMOS) is enabled by the National Collaborative Research Infrastructure Strategy (NCRIS). It is operated by a consortium of institutions as an unincorporated joint venture, with the University of Tasmania as Lead Agent.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@
}
],
"summaries": {
"score": 100,
"score": 90,
"dataset_provider": null,
"dataset_group": ["aodn"],
"proj:geometry": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
]
},
"summaries": {
"score": 95,
"score": 60,
"status": "completed",
"credits": [
"Australian Climate Change Science Program",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
]
},
"summaries": {
"score": 80,
"score": 50,
"status": "",
"scope": {
"code": "nonGeographicDataset",
Expand Down
Loading