From c56d1a64c96da6feccefff1286e43ec29a2ddb01 Mon Sep 17 00:00:00 2001
From: Tony Sun <tony.sun@cloudant.com>
Date: Mon, 23 Jan 2017 15:08:28 -0800
Subject: [PATCH] Add custom_field_analyzers option

This new option allows users to create separate analyzers for
fields that will be used for text indexes.

COUCHDB-3278
---
 src/mango_idx_text.erl                 | 21 +++++++--
 test/07-text-custom-field-list-test.py | 64 ++++++++++++++++++++++++++
 test/mango.py                          |  5 +-
 3 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/src/mango_idx_text.erl b/src/mango_idx_text.erl
index ad9d2e8..0ef0260 100644
--- a/src/mango_idx_text.erl
+++ b/src/mango_idx_text.erl
@@ -222,6 +222,11 @@ opts() ->
             {tag, default_field},
             {optional, true},
             {default, {[]}}
+        ]},
+        {<<"custom_field_analyzers">>, [
+            {tag, custom_field_analyzers},
+            {optional, true},
+            {default, []}
         ]},
          {<<"selector">>, [
             {tag, selector},
@@ -267,24 +272,30 @@ get_default_field_options(Props) ->
     end.
 
 
+get_custom_analyzers(Props)->
+    Analyzers = couch_util:get_value(custom_field_analyzers, Props, []),
+    [{mango_util:lucene_escape_user(F), A} || {[{F, A}]} <- Analyzers].
+
+
 construct_analyzer({Props}) ->
     DefaultAnalyzer = couch_util:get_value(default_analyzer, Props,
         <<"keyword">>),
+    CustomAnalyzers = get_custom_analyzers(Props),
     {DefaultField, DefaultFieldAnalyzer} = get_default_field_options(Props),
-    DefaultAnalyzerDef = case DefaultField of
+    AnalyzerDef = case DefaultField of
         true ->
-            [{<<"$default">>, DefaultFieldAnalyzer}];
+            [{<<"$default">>, DefaultFieldAnalyzer}] ++ CustomAnalyzers;
         _ ->
-            []
+            CustomAnalyzers
     end,
-    case DefaultAnalyzerDef of
+    case AnalyzerDef of
         [] ->
             <<"keyword">>;
         _ ->
             {[
                 {<<"name">>, <<"perfield">>},
                 {<<"default">>, DefaultAnalyzer},
-                {<<"fields">>, {DefaultAnalyzerDef}}
+                {<<"fields">>, {AnalyzerDef}}
             ]}
     end.
 
diff --git a/test/07-text-custom-field-list-test.py b/test/07-text-custom-field-list-test.py
index 029c91c..cb7ac99 100644
--- a/test/07-text-custom-field-list-test.py
+++ b/test/07-text-custom-field-list-test.py
@@ -145,3 +145,67 @@ def test_two_or(self):
             {"location.state": "Don't Exist"}]})
         assert len(docs) == 1
         assert docs[0]["user_id"] == 10
+
+
+@unittest.skipUnless(mango.has_text_service(), "requires text service")
+class CustomAnalyzerTests(mango.UserDocsTests):
+
+    @classmethod
+    def setUpClass(klass):
+        super(CustomAnalyzerTests, klass).setUpClass()
+        if mango.has_text_service():
+            klass.db.create_text_index(ddoc="cfa_1",
+                analyzer="standard",
+                custom_field_analyzers =[
+                    {"location.address.street:string" : "keyword"}
+                ]
+            )
+            klass.db.create_text_index(ddoc="cfa_2",
+                custom_field_analyzers=[
+                    {"location.address.street:string" : "standard"}
+                ]
+            )
+            klass.db.create_text_index(ddoc="cfa_3",
+                fields = [
+                    {"name": "location.state", "type": "string"},
+                    {"name": "location.address.street", "type": "string"}
+                ],
+                custom_field_analyzers=[
+                    {"location.state:string" : "standard"},
+                    {"location.address.street:string" : "keyword"}
+                ]
+            )
+
+    # Because of our filter, we need to add in $text:ignorecase as
+    # a no-op to bypass the filter
+    def test_standard_default_custom_keyword(self):
+        q = {"$or": [{"$text": "ignorecase"}, {"location.state": "new"},
+            {"location.state": "hawaii"}]}
+        docs = self.db.find(q, sort=["location.address.street:string"],
+            use_index="_design/cfa_1")
+
+        assert len(docs) == 3
+        assert docs[0]["location"]["address"]["street"] == "Bancroft Place"
+        assert docs[1]["location"]["address"]["street"] == "Miller Avenue"
+        assert docs[2]["location"]["address"]["street"] == "Nostrand Avenue"
+
+    def test_keyword_default_custom_standard(self):
+        q = {"$or": [{"$text": "ignorecase"},
+            {"location.address.street": "avenue"}]}
+        docs = self.db.find(q, sort=["location.state:string"],
+            use_index="_design/cfa_2")
+
+        assert len(docs) == 6
+        assert docs[0]["location"]["state"] == "Maine"
+        assert docs[5]["location"]["state"] == "North Dakota"
+
+    def test_custom_fields_custom_analyzers(self):
+        q = {"$or": [{"$text": "ignorecase"}, {"location.state": "new"},
+            {"location.state": "hawaii"}]}
+        docs = self.db.find(q, sort=["location.address.street:string"],
+            use_index="_design/cfa_3")
+
+        assert len(docs) == 3
+        assert docs[0]["location"]["address"]["street"] == "Bancroft Place"
+        assert docs[1]["location"]["address"]["street"] == "Miller Avenue"
+        assert docs[2]["location"]["address"]["street"] == "Nostrand Avenue"
diff --git a/test/mango.py b/test/mango.py
index da51180..9d32043 100644
--- a/test/mango.py
+++ b/test/mango.py
@@ -104,7 +104,8 @@ def create_index(self, fields, idx_type="json", name=None, ddoc=None):
         return r.json()["result"] == "created"
 
     def create_text_index(self, analyzer=None, selector=None, idx_type="text",
-        default_field=None, fields=None, name=None, ddoc=None,index_array_lengths=None):
+        default_field=None, fields=None, name=None, ddoc=None,
+        index_array_lengths=None, custom_field_analyzers=None):
         body = {
             "index": {
             },
@@ -123,6 +124,8 @@ def create_text_index(self, analyzer=None, selector=None, idx_type="text",
             body["selector"] = selector
         if fields is not None:
             body["index"]["fields"] = fields
+        if custom_field_analyzers is not None:
+            body["index"]["custom_field_analyzers"] = custom_field_analyzers
         if ddoc is not None:
             body["ddoc"] = ddoc
         body = json.dumps(body)