From c56d1a64c96da6feccefff1286e43ec29a2ddb01 Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Mon, 23 Jan 2017 15:08:28 -0800 Subject: [PATCH] Add custom_field_analyzers option This new option allows users to create separate analyzers for fields that will be used for text indexes. COUCHDB-3278 --- src/mango_idx_text.erl | 21 +++++++-- test/07-text-custom-field-list-test.py | 64 ++++++++++++++++++++++++++ test/mango.py | 5 +- 3 files changed, 84 insertions(+), 6 deletions(-) diff --git a/src/mango_idx_text.erl b/src/mango_idx_text.erl index ad9d2e8..0ef0260 100644 --- a/src/mango_idx_text.erl +++ b/src/mango_idx_text.erl @@ -222,6 +222,11 @@ opts() -> {tag, default_field}, {optional, true}, {default, {[]}} + ]}, + {<<"custom_field_analyzers">>, [ + {tag, custom_field_analyzers}, + {optional, true}, + {default, []} ]}, {<<"selector">>, [ {tag, selector}, @@ -267,24 +272,30 @@ get_default_field_options(Props) -> end. +get_custom_analyzers(Props)-> + Analyzers = couch_util:get_value(custom_field_analyzers, Props, []), + [{mango_util:lucene_escape_user(F), A} || {[{F, A}]} <- Analyzers]. + + construct_analyzer({Props}) -> DefaultAnalyzer = couch_util:get_value(default_analyzer, Props, <<"keyword">>), + CustomAnalyzers = get_custom_analyzers(Props), {DefaultField, DefaultFieldAnalyzer} = get_default_field_options(Props), - DefaultAnalyzerDef = case DefaultField of + AnalyzerDef = case DefaultField of true -> - [{<<"$default">>, DefaultFieldAnalyzer}]; + [{<<"$default">>, DefaultFieldAnalyzer}] ++ CustomAnalyzers; _ -> - [] + CustomAnalyzers end, - case DefaultAnalyzerDef of + case AnalyzerDef of [] -> <<"keyword">>; _ -> {[ {<<"name">>, <<"perfield">>}, {<<"default">>, DefaultAnalyzer}, - {<<"fields">>, {DefaultAnalyzerDef}} + {<<"fields">>, {AnalyzerDef}} ]} end. diff --git a/test/07-text-custom-field-list-test.py b/test/07-text-custom-field-list-test.py index 029c91c..cb7ac99 100644 --- a/test/07-text-custom-field-list-test.py +++ b/test/07-text-custom-field-list-test.py @@ -145,3 +145,67 @@ def test_two_or(self): {"location.state": "Don't Exist"}]}) assert len(docs) == 1 assert docs[0]["user_id"] == 10 + + +@unittest.skipUnless(mango.has_text_service(), "requires text service") +class CustomAnalyzerTests(mango.UserDocsTests): + + @classmethod + def setUpClass(klass): + super(CustomAnalyzerTests, klass).setUpClass() + if mango.has_text_service(): + klass.db.create_text_index(ddoc="cfa_1", + analyzer="standard", + custom_field_analyzers =[ + {"location.address.street:string" : "keyword"} + ] + ) + klass.db.create_text_index(ddoc="cfa_2", + custom_field_analyzers=[ + {"location.address.street:string" : "standard"} + ] + ) + klass.db.create_text_index(ddoc="cfa_3", + fields = [ + {"name": "location.state", "type": "string"}, + {"name": "location.address.street", "type": "string"} + ], + custom_field_analyzers=[ + {"location.state:string" : "standard"}, + {"location.address.street:string" : "keyword"} + ] + ) + + # Because of our filter, we need to add in $text:ignorecase as + # a no-op to bypass the filter + def test_standard_default_custom_keyword(self): + q = {"$or": [{"$text": "ignorecase"}, {"location.state": "new"}, + {"location.state": "hawaii"}]} + docs = self.db.find(q, sort=["location.address.street:string"], + use_index="_design/cfa_1") + + assert len(docs) == 3 + assert docs[0]["location"]["address"]["street"] == "Bancroft Place" + assert docs[1]["location"]["address"]["street"] == "Miller Avenue" + assert docs[2]["location"]["address"]["street"] == "Nostrand Avenue" + + def test_keyword_default_custom_standard(self): + q = {"$or": [{"$text": "ignorecase"}, + {"location.address.street": "avenue"}]} + docs = self.db.find(q, sort=["location.state:string"], + use_index="_design/cfa_2") + + assert len(docs) == 6 + assert docs[0]["location"]["state"] == "Maine" + assert docs[5]["location"]["state"] == "North Dakota" + + def test_custom_fields_custom_analyzers(self): + q = {"$or": [{"$text": "ignorecase"}, {"location.state": "new"}, + {"location.state": "hawaii"}]} + docs = self.db.find(q, sort=["location.address.street:string"], + use_index="_design/cfa_3") + + assert len(docs) == 3 + assert docs[0]["location"]["address"]["street"] == "Bancroft Place" + assert docs[1]["location"]["address"]["street"] == "Miller Avenue" + assert docs[2]["location"]["address"]["street"] == "Nostrand Avenue" diff --git a/test/mango.py b/test/mango.py index da51180..9d32043 100644 --- a/test/mango.py +++ b/test/mango.py @@ -104,7 +104,8 @@ def create_index(self, fields, idx_type="json", name=None, ddoc=None): return r.json()["result"] == "created" def create_text_index(self, analyzer=None, selector=None, idx_type="text", - default_field=None, fields=None, name=None, ddoc=None,index_array_lengths=None): + default_field=None, fields=None, name=None, ddoc=None, + index_array_lengths=None, custom_field_analyzers=None): body = { "index": { }, @@ -123,6 +124,8 @@ def create_text_index(self, analyzer=None, selector=None, idx_type="text", body["selector"] = selector if fields is not None: body["index"]["fields"] = fields + if custom_field_analyzers is not None: + body["index"]["custom_field_analyzers"] = custom_field_analyzers if ddoc is not None: body["ddoc"] = ddoc body = json.dumps(body)