From d82dfa070cac88d3f83077332f1a778d28cdd0e0 Mon Sep 17 00:00:00 2001 From: totodore Date: Sun, 11 Jan 2026 14:59:38 +0100 Subject: [PATCH 1/5] fix(qw-search): unsupported OwnedValue deser with postcard --- quickwit/quickwit-query/src/aggregations.rs | 163 +++++++++++++++++++- 1 file changed, 158 insertions(+), 5 deletions(-) diff --git a/quickwit/quickwit-query/src/aggregations.rs b/quickwit/quickwit-query/src/aggregations.rs index 6b7755c4fcc..57d6c69e815 100644 --- a/quickwit/quickwit-query/src/aggregations.rs +++ b/quickwit/quickwit-query/src/aggregations.rs @@ -12,8 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::net::Ipv6Addr; + use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; +use tantivy::DateTime; use tantivy::aggregation::Key as TantivyKey; use tantivy::aggregation::agg_result::{ AggregationResult as TantivyAggregationResult, AggregationResults as TantivyAggregationResults, @@ -24,8 +27,10 @@ use tantivy::aggregation::agg_result::{ use tantivy::aggregation::metric::{ ExtendedStats, PercentileValues as TantivyPercentileValues, PercentileValuesVecEntry, PercentilesMetricResult as TantivyPercentilesMetricResult, SingleMetricResult, Stats, - TopHitsMetricResult, + TopHitsMetricResult as TantivyTopHitsMetricResult, TopHitsVecEntry as TantivyTopHitsVecEntry, }; +use tantivy::schema::{Facet, OwnedValue as TantivyOwnedValue}; +use tantivy::tokenizer::PreTokenizedString; // hopefully all From in this module are no-ops, otherwise, this is a very sad situation @@ -116,7 +121,7 @@ impl From for MetricResult { TantivyMetricResult::ExtendedStats(val) => MetricResult::ExtendedStats(val), TantivyMetricResult::Sum(val) => MetricResult::Sum(val), TantivyMetricResult::Percentiles(val) => MetricResult::Percentiles(val.into()), - TantivyMetricResult::TopHits(val) => MetricResult::TopHits(val), + TantivyMetricResult::TopHits(val) => MetricResult::TopHits(val.into()), TantivyMetricResult::Cardinality(val) => MetricResult::Cardinality(val), } } @@ -133,7 +138,7 @@ impl From for TantivyMetricResult { MetricResult::ExtendedStats(val) => TantivyMetricResult::ExtendedStats(val), MetricResult::Sum(val) => TantivyMetricResult::Sum(val), MetricResult::Percentiles(val) => TantivyMetricResult::Percentiles(val.into()), - MetricResult::TopHits(val) => TantivyMetricResult::TopHits(val), + MetricResult::TopHits(val) => TantivyMetricResult::TopHits(val.into()), MetricResult::Cardinality(val) => TantivyMetricResult::Cardinality(val), } } @@ -229,7 +234,8 @@ pub enum BucketEntries { } impl From> for BucketEntries -where U: From +where + U: From, { fn from(value: TantivyBucketEntries) -> BucketEntries { match value { @@ -244,7 +250,8 @@ where U: From } impl From> for TantivyBucketEntries -where U: From +where + U: From, { fn from(value: BucketEntries) -> TantivyBucketEntries { match value { @@ -413,3 +420,149 @@ impl From for TantivyPercentilesMetricResult { TantivyPercentilesMetricResult { values } } } + +// Redefine the tantivy TopHitsVecEntry to use our own `OwnedValue` +// and avoid skip_serializing_if so postcard can (de)-serialize it. +/// The top_hits metric results entry +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct TopHitsVecEntry { + /// The sort values of the document, depending on the sort criteria in the request. + pub sort: Vec>, + + /// Search results, for queries that include field retrieval requests + /// (`docvalue_fields`). + #[serde(rename = "docvalue_fields")] + pub doc_value_fields: FxHashMap, +} + +/// The top_hits metric aggregation results a list of top hits by sort criteria. +/// +/// The main reason for wrapping it in `hits` is to match elasticsearch output structure. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct TopHitsMetricResult { + /// The result of the top_hits metric. + pub hits: Vec, +} + +/// Redefinition of [`TantivyOwnedValue`] to have it work +/// with postcard de-serialization. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum OwnedValue { + /// A null value. + Null, + /// The str type is used for any text information. + Str(String), + /// Pre-tokenized str type, + PreTokStr(PreTokenizedString), + /// Unsigned 64-bits Integer `u64` + U64(u64), + /// Signed 64-bits Integer `i64` + I64(i64), + /// 64-bits Float `f64` + F64(f64), + /// Bool value + Bool(bool), + /// Date/time with nanoseconds precision + Date(DateTime), + /// Facet + Facet(Facet), + /// Arbitrarily sized byte array + Bytes(Vec), + /// A set of values. + Array(Vec), + /// Dynamic object value. + Object(Vec<(String, Self)>), + /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`. + IpAddr(Ipv6Addr), +} + +impl From for TantivyTopHitsVecEntry { + fn from(value: TopHitsVecEntry) -> TantivyTopHitsVecEntry { + TantivyTopHitsVecEntry { + sort: value.sort, + doc_value_fields: value + .doc_value_fields + .into_iter() + .map(|(k, v)| (k, v.into())) + .collect(), + } + } +} +impl From for TopHitsVecEntry { + fn from(value: TantivyTopHitsVecEntry) -> Self { + TopHitsVecEntry { + sort: value.sort, + doc_value_fields: value + .doc_value_fields + .into_iter() + .map(|(k, v)| (k, v.into())) + .collect(), + } + } +} +impl From for TantivyTopHitsMetricResult { + fn from(value: TopHitsMetricResult) -> Self { + TantivyTopHitsMetricResult { + hits: value + .hits + .into_iter() + .map(TantivyTopHitsVecEntry::from) + .collect(), + } + } +} +impl From for TopHitsMetricResult { + fn from(value: TantivyTopHitsMetricResult) -> Self { + TopHitsMetricResult { + hits: value.hits.into_iter().map(TopHitsVecEntry::from).collect(), + } + } +} + +impl From for OwnedValue { + fn from(value: TantivyOwnedValue) -> Self { + match value { + TantivyOwnedValue::Null => OwnedValue::Null, + TantivyOwnedValue::Str(v) => OwnedValue::Str(v), + TantivyOwnedValue::PreTokStr(v) => OwnedValue::PreTokStr(v), + TantivyOwnedValue::U64(v) => OwnedValue::U64(v), + TantivyOwnedValue::I64(v) => OwnedValue::I64(v), + TantivyOwnedValue::F64(v) => OwnedValue::F64(v), + TantivyOwnedValue::Bool(v) => OwnedValue::Bool(v), + TantivyOwnedValue::Date(v) => OwnedValue::Date(v), + TantivyOwnedValue::Facet(v) => OwnedValue::Facet(v), + TantivyOwnedValue::Bytes(v) => OwnedValue::Bytes(v), + TantivyOwnedValue::Array(v) => { + OwnedValue::Array(v.into_iter().map(OwnedValue::from).collect()) + } + TantivyOwnedValue::Object(v) => { + OwnedValue::Object(v.into_iter().map(|(k, v)| (k, v.into())).collect()) + } + TantivyOwnedValue::IpAddr(v) => OwnedValue::IpAddr(v), + } + } +} + +impl From for TantivyOwnedValue { + fn from(value: OwnedValue) -> Self { + match value { + OwnedValue::Null => TantivyOwnedValue::Null, + OwnedValue::Str(v) => TantivyOwnedValue::Str(v), + OwnedValue::PreTokStr(v) => TantivyOwnedValue::PreTokStr(v), + OwnedValue::F64(v) => TantivyOwnedValue::F64(v), + OwnedValue::U64(v) => TantivyOwnedValue::U64(v), + OwnedValue::I64(v) => TantivyOwnedValue::I64(v), + OwnedValue::Bool(v) => TantivyOwnedValue::Bool(v), + OwnedValue::Date(v) => TantivyOwnedValue::Date(v), + OwnedValue::Facet(v) => TantivyOwnedValue::Facet(v), + OwnedValue::Bytes(v) => TantivyOwnedValue::Bytes(v), + OwnedValue::Array(v) => { + TantivyOwnedValue::Array(v.into_iter().map(TantivyOwnedValue::from).collect()) + } + OwnedValue::Object(v) => { + TantivyOwnedValue::Object(v.into_iter().map(|(k, v)| (k, v.into())).collect()) + } + OwnedValue::IpAddr(v) => TantivyOwnedValue::IpAddr(v), + } + } +} From 8382a8e6d31e93142d530891d2e1e2e00fd1e076 Mon Sep 17 00:00:00 2001 From: tprevot Date: Tue, 13 Jan 2026 10:43:25 +0100 Subject: [PATCH 2/5] fix: fmt --- quickwit/quickwit-query/src/aggregations.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/quickwit/quickwit-query/src/aggregations.rs b/quickwit/quickwit-query/src/aggregations.rs index 57d6c69e815..42675bd8c98 100644 --- a/quickwit/quickwit-query/src/aggregations.rs +++ b/quickwit/quickwit-query/src/aggregations.rs @@ -234,8 +234,7 @@ pub enum BucketEntries { } impl From> for BucketEntries -where - U: From, +where U: From { fn from(value: TantivyBucketEntries) -> BucketEntries { match value { @@ -250,8 +249,7 @@ where } impl From> for TantivyBucketEntries -where - U: From, +where U: From { fn from(value: BucketEntries) -> TantivyBucketEntries { match value { From da328e9708bec43fd427c1851123270d27b431db Mon Sep 17 00:00:00 2001 From: tprevot Date: Wed, 14 Jan 2026 11:00:20 +0100 Subject: [PATCH 3/5] tests: add top_hits scenario for es compatible API --- .../es_compatibility/0032-top_hits.yaml | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 quickwit/rest-api-tests/scenarii/es_compatibility/0032-top_hits.yaml diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0032-top_hits.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0032-top_hits.yaml new file mode 100644 index 00000000000..2c661a750c8 --- /dev/null +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0032-top_hits.yaml @@ -0,0 +1,128 @@ +# Test top_hits aggregation grouped by actor.login +params: + size: 0 +json: + aggs: + logins: + terms: + field: actor.login + aggs: + recent_events: + top_hits: + size: 1 + sort: + - created_at: desc + docvalue_fields: + - created_at + - repo.name + +expected: + hits: + total: + value: 100 + hits: [] + + aggregations: + logins: + doc_count_error_upper_bound: 0 + sum_other_doc_count: 86 + buckets: + - key: jadonk + doc_count: 2 + recent_events: + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["beagleboard/beagleboard-org"] + created_at: ["2015-02-01T00:00:16Z"] + + - key: teozfrank + doc_count: 2 + recent_events: + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["teozfrank/DuelMe"] + created_at: ["2015-02-01T00:00:06Z"] + + - key: hodgies + doc_count: 2 + recent_events: + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["comp4560-4R-assignemnt-git-wrecked/git-wrecked"] + created_at: ["2015-02-01T00:00:06Z"] + + - key: freeside + doc_count: 2 + recent_events: + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["freeside/Freeside"] + created_at: ["2015-02-01T00:00:12Z"] + + - key: enist + doc_count: 1 + recent_events: + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["t4t5/sweetalert"] + created_at: ["2015-02-01T00:00:11Z"] + + - key: brandon1011 + doc_count: 1 + recent_events: + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["jazzTheJackRabbit/recommendationSystems"] + created_at: ["2015-02-01T00:00:11Z"] + + - key: wmfgerrit + doc_count: 1 + recent_events: + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["wikimedia/mediawiki-extensions"] + created_at: ["2015-02-01T00:00:06Z"] + + - key: raxacoricofallapatorius + doc_count: 1 + recent_events: + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["raxacoricofallapatorius/astro"] + created_at: ["2015-02-01T00:00:11Z"] + + - key: manfredtremmel + doc_count: 1 + recent_events: + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["ManfredTremmel/gwt-commons-lang3"] + created_at: ["2015-02-01T00:00:13Z"] + + - key: cn-nytimes + doc_count: 1 + recent_events: + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["cn-nytimes/mirrors"] + created_at: ["2015-02-01T00:00:11Z"] From 268366cf94b56cb02d9c4f66fb421d751b09fe57 Mon Sep 17 00:00:00 2001 From: tprevot Date: Wed, 14 Jan 2026 14:56:00 +0100 Subject: [PATCH 4/5] tests: fix top_hits test --- .../es_compatibility/0032-top_hits.yaml | 69 ++++++++++--------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0032-top_hits.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0032-top_hits.yaml index 2c661a750c8..b49d258fe47 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/0032-top_hits.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0032-top_hits.yaml @@ -6,6 +6,8 @@ json: logins: terms: field: actor.login + order: + _key: asc aggs: recent_events: top_hits: @@ -24,105 +26,104 @@ expected: aggregations: logins: - doc_count_error_upper_bound: 0 - sum_other_doc_count: 86 + sum_other_doc_count: 90 buckets: - - key: jadonk - doc_count: 2 + - key: 1maria + doc_count: 1 recent_events: hits: - sort: $expect: "len(val) == 1" docvalue_fields: - repo.name: ["beagleboard/beagleboard-org"] - created_at: ["2015-02-01T00:00:16Z"] + repo.name: ["Health-Tracker/health_tracker"] + created_at: ["2015-02-01T00:00:11Z"] - - key: teozfrank - doc_count: 2 + - key: aborruso + doc_count: 1 recent_events: hits: - sort: $expect: "len(val) == 1" docvalue_fields: - repo.name: ["teozfrank/DuelMe"] - created_at: ["2015-02-01T00:00:06Z"] + repo.name: ["SiciliaHub/albopretoriopa"] + created_at: ["2015-02-01T00:00:07Z"] - - key: hodgies - doc_count: 2 + - key: adius + doc_count: 1 recent_events: hits: - sort: $expect: "len(val) == 1" docvalue_fields: - repo.name: ["comp4560-4R-assignemnt-git-wrecked/git-wrecked"] - created_at: ["2015-02-01T00:00:06Z"] + repo.name: ["casertap/UkuleleSongbook"] + created_at: ["2015-02-01T00:00:11Z"] - - key: freeside - doc_count: 2 + - key: amosnier + doc_count: 1 recent_events: hits: - sort: $expect: "len(val) == 1" docvalue_fields: - repo.name: ["freeside/Freeside"] - created_at: ["2015-02-01T00:00:12Z"] + repo.name: ["amosnier/code"] + created_at: ["2015-02-01T00:00:06Z"] - - key: enist + - key: anantax13 doc_count: 1 recent_events: hits: - sort: $expect: "len(val) == 1" docvalue_fields: - repo.name: ["t4t5/sweetalert"] + repo.name: ["anantax13/aboutme"] created_at: ["2015-02-01T00:00:11Z"] - - key: brandon1011 + - key: ancurio doc_count: 1 recent_events: hits: - sort: $expect: "len(val) == 1" docvalue_fields: - repo.name: ["jazzTheJackRabbit/recommendationSystems"] - created_at: ["2015-02-01T00:00:11Z"] + repo.name: ["Ancurio/mkxp"] + created_at: ["2015-02-01T00:00:15Z"] - - key: wmfgerrit + - key: athal7 doc_count: 1 recent_events: hits: - sort: $expect: "len(val) == 1" docvalue_fields: - repo.name: ["wikimedia/mediawiki-extensions"] - created_at: ["2015-02-01T00:00:06Z"] + repo.name: ["spree/spree"] + created_at: ["2015-02-01T00:00:15Z"] - - key: raxacoricofallapatorius + - key: basuco doc_count: 1 recent_events: hits: - sort: $expect: "len(val) == 1" docvalue_fields: - repo.name: ["raxacoricofallapatorius/astro"] - created_at: ["2015-02-01T00:00:11Z"] + repo.name: ["carlops/Haskinator"] + created_at: ["2015-02-01T00:00:06Z"] - - key: manfredtremmel + - key: boecko doc_count: 1 recent_events: hits: - sort: $expect: "len(val) == 1" docvalue_fields: - repo.name: ["ManfredTremmel/gwt-commons-lang3"] - created_at: ["2015-02-01T00:00:13Z"] + repo.name: ["toy/blueutil"] + created_at: ["2015-02-01T00:00:12Z"] - - key: cn-nytimes + - key: bptripp doc_count: 1 recent_events: hits: - sort: $expect: "len(val) == 1" docvalue_fields: - repo.name: ["cn-nytimes/mirrors"] + repo.name: ["bptripp/nengo-FPGA"] created_at: ["2015-02-01T00:00:11Z"] From c1d2b13ab9bf36b05b9d441bf8d155d4722bcc37 Mon Sep 17 00:00:00 2001 From: tprevot Date: Tue, 20 Jan 2026 18:54:36 +0100 Subject: [PATCH 5/5] tests(rest): fix top hits test --- .../es_compatibility/0032-top_hits.yaml | 110 ++++++++++-------- 1 file changed, 60 insertions(+), 50 deletions(-) diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0032-top_hits.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0032-top_hits.yaml index b49d258fe47..23a99b6b327 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/0032-top_hits.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0032-top_hits.yaml @@ -32,98 +32,108 @@ expected: doc_count: 1 recent_events: hits: - - sort: - $expect: "len(val) == 1" - docvalue_fields: - repo.name: ["Health-Tracker/health_tracker"] - created_at: ["2015-02-01T00:00:11Z"] + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["Health-Tracker/health_tracker"] + created_at: ["2015-02-01T00:00:11Z"] - key: aborruso doc_count: 1 recent_events: hits: - - sort: - $expect: "len(val) == 1" - docvalue_fields: - repo.name: ["SiciliaHub/albopretoriopa"] - created_at: ["2015-02-01T00:00:07Z"] + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["SiciliaHub/albopretoriopa"] + created_at: ["2015-02-01T00:00:07Z"] - key: adius doc_count: 1 recent_events: hits: - - sort: - $expect: "len(val) == 1" - docvalue_fields: - repo.name: ["casertap/UkuleleSongbook"] - created_at: ["2015-02-01T00:00:11Z"] + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["casertap/UkuleleSongbook"] + created_at: ["2015-02-01T00:00:11Z"] - key: amosnier doc_count: 1 recent_events: hits: - - sort: - $expect: "len(val) == 1" - docvalue_fields: - repo.name: ["amosnier/code"] - created_at: ["2015-02-01T00:00:06Z"] + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["amosnier/code"] + created_at: ["2015-02-01T00:00:06Z"] - key: anantax13 doc_count: 1 recent_events: hits: - - sort: - $expect: "len(val) == 1" - docvalue_fields: - repo.name: ["anantax13/aboutme"] - created_at: ["2015-02-01T00:00:11Z"] + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["anantax13/aboutme"] + created_at: ["2015-02-01T00:00:11Z"] - key: ancurio doc_count: 1 recent_events: hits: - - sort: - $expect: "len(val) == 1" - docvalue_fields: - repo.name: ["Ancurio/mkxp"] - created_at: ["2015-02-01T00:00:15Z"] + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["Ancurio/mkxp"] + created_at: ["2015-02-01T00:00:15Z"] - key: athal7 doc_count: 1 recent_events: hits: - - sort: - $expect: "len(val) == 1" - docvalue_fields: - repo.name: ["spree/spree"] - created_at: ["2015-02-01T00:00:15Z"] + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["spree/spree"] + created_at: ["2015-02-01T00:00:15Z"] - key: basuco doc_count: 1 recent_events: hits: - - sort: - $expect: "len(val) == 1" - docvalue_fields: - repo.name: ["carlops/Haskinator"] - created_at: ["2015-02-01T00:00:06Z"] + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["carlops/Haskinator"] + created_at: ["2015-02-01T00:00:06Z"] - key: boecko doc_count: 1 recent_events: hits: - - sort: - $expect: "len(val) == 1" - docvalue_fields: - repo.name: ["toy/blueutil"] - created_at: ["2015-02-01T00:00:12Z"] + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["toy/blueutil"] + created_at: ["2015-02-01T00:00:12Z"] - key: bptripp doc_count: 1 recent_events: hits: - - sort: - $expect: "len(val) == 1" - docvalue_fields: - repo.name: ["bptripp/nengo-FPGA"] - created_at: ["2015-02-01T00:00:11Z"] + hits: + - sort: + $expect: "len(val) == 1" + docvalue_fields: + repo.name: ["bptripp/nengo-FPGA"] + created_at: ["2015-02-01T00:00:11Z"]