diff --git a/src/c2/search/reranker/browser/configure.zcml b/src/c2/search/reranker/browser/configure.zcml index 9f628ac..d452597 100644 --- a/src/c2/search/reranker/browser/configure.zcml +++ b/src/c2/search/reranker/browser/configure.zcml @@ -20,4 +20,23 @@ permission="cmf.ManagePortal" /> + + + + + diff --git a/src/c2/search/reranker/browser/hybrid_search.py b/src/c2/search/reranker/browser/hybrid_search.py index c14d497..58967f9 100644 --- a/src/c2/search/reranker/browser/hybrid_search.py +++ b/src/c2/search/reranker/browser/hybrid_search.py @@ -1,12 +1,19 @@ """Browser view for hybrid search combining keyword and vector search.""" -from c2.search.reranker import logger from c2.search.reranker.interfaces import IRerankerSettings from c2.search.reranker.reranker import ( RerankerSettings, calculate_time_decay, get_content_age_days, ) +from c2.search.reranker.search import ( + compute_rrf_scores, + find_vector_index, + get_brain_by_rid, + is_vectorsearch_available, + keyword_search, + vector_search, +) from DateTime import DateTime from plone.registry.interfaces import IRegistry from Products.CMFCore.utils import getToolByName @@ -23,9 +30,6 @@ class HybridSearchView(BrowserView): Access via: @@hybrid-search?SearchableText=keyword """ - # RRF constant: prevents top-ranked items from dominating too much. - RRF_K = 60 - def __call__(self): self.search_text = self.request.form.get("SearchableText", "") self.results = [] @@ -50,90 +54,13 @@ def __call__(self): return self.index() - def _is_vectorsearch_available(self): - """Check if collective.vectorsearch is importable.""" - try: - from collective.vectorsearch.vector_index import VectorIndex # noqa: F401 - - return True - except ImportError: - return False - - def _find_vector_index(self, catalog): - """Find the first VectorIndex in catalog. - - Returns the index object or None. - """ - for idx_name in catalog.Indexes: - idx = catalog.Indexes[idx_name] - if getattr(idx, "meta_type", "") == "VectorIndex": - return idx - return None - - def _keyword_search(self, catalog): - """Execute keyword search via catalog. - - Returns dict: {rid: (brain, normalized_score)} - """ - query = {"SearchableText": self.search_text, "sort_limit": 200} - brains = list(catalog.searchResults(**query)) - result = {} - max_score = 0.0 - for brain in brains: - rid = brain.getRID() - score = getattr(brain, "data_record_normalized_score_", None) - score = 1.0 if score is None or score == 0 else float(score) - if score > max_score: - max_score = score - result[rid] = (brain, score) - - # Normalize to 0.0-1.0 range - if max_score > 0: - result = { - rid: (brain, score / max_score) - for rid, (brain, score) in result.items() - } - return result - - def _vector_search(self, vector_index): - """Execute vector search via VectorIndex. - - Returns dict: {rid: vector_score_normalized} - """ - - class QueryRecord: - def __init__(self, text): - self.keys = [text] - - record = QueryRecord(self.search_text) - bucket = vector_index.query_index(record) - - if bucket is None: - return {} - - result = {} - for rid, int_score in bucket.items(): - result[rid] = float(int_score) / 100_000_000.0 - return result - - def _get_brain_by_rid(self, catalog, rid): - """Get a catalog brain for a given RID.""" - try: - path = catalog.getpath(rid) - results = catalog.searchResults(path={"query": path, "depth": 0}) - if results: - return results[0] - except Exception: - logger.debug("Could not resolve brain for RID %s", rid) - return None - def _prepare_vector_search(self, catalog): """Prepare and execute vector search if possible. Returns (vector_results, keyword_ratio) tuple. Sets self.vector_message and self.vector_index_name as side effects. """ - if not self._is_vectorsearch_available(): + if not is_vectorsearch_available(): self.vector_message = ( "collective.vectorsearch is not installed. " "Showing keyword-only results." @@ -147,7 +74,7 @@ def _prepare_vector_search(self, catalog): ) return {}, 100 - vector_index = self._find_vector_index(catalog) + vector_index = find_vector_index(catalog) if vector_index is None: self.vector_message = ( "No VectorIndex found in catalog. Please add a VectorIndex." @@ -156,40 +83,11 @@ def _prepare_vector_search(self, catalog): self.vector_index_name = vector_index.id try: - return self._vector_search(vector_index), self.keyword_ratio + return vector_search(vector_index, self.search_text), self.keyword_ratio except Exception as e: self.vector_message = f"Vector search error: {e}" return {}, 100 - def _compute_rrf_scores(self, keyword_results, vector_results): - """Compute RRF (Reciprocal Rank Fusion) scores from rank positions. - - Returns dict: {rid: (keyword_rrf, vector_rrf)} - """ - k = self.RRF_K - - # Sort keyword results by score descending → assign ranks - kw_ranked = sorted( - keyword_results.keys(), - key=lambda rid: keyword_results[rid][1], - reverse=True, - ) - kw_rrf = {} - for rank, rid in enumerate(kw_ranked, start=1): - kw_rrf[rid] = 1.0 / (k + rank) - - # Sort vector results by score descending → assign ranks - vec_ranked = sorted( - vector_results.keys(), - key=lambda rid: vector_results[rid], - reverse=True, - ) - vec_rrf = {} - for rank, rid in enumerate(vec_ranked, start=1): - vec_rrf[rid] = 1.0 / (k + rank) - - return kw_rrf, vec_rrf - def _search_hybrid(self): """Execute hybrid search combining keyword and vector results.""" catalog = getToolByName(self.context, "portal_catalog") @@ -197,7 +95,7 @@ def _search_hybrid(self): reranker_settings = RerankerSettings() # Step 1: Keyword search (always) - keyword_results = self._keyword_search(catalog) + keyword_results = keyword_search(catalog, self.search_text) # Step 2: Vector search (if available and enabled) vector_results, keyword_ratio = self._prepare_vector_search(catalog) @@ -213,7 +111,7 @@ def _search_hybrid(self): kw_rrf = {} vec_rrf = {} if use_rrf and vector_results: - kw_rrf, vec_rrf = self._compute_rrf_scores(keyword_results, vector_results) + kw_rrf, vec_rrf = compute_rrf_scores(keyword_results, vector_results) all_rids = set(keyword_results.keys()) | set(vector_results.keys()) results = [] @@ -226,7 +124,7 @@ def _search_hybrid(self): brain, ks = kw_data else: ks = 0.0 - brain = self._get_brain_by_rid(catalog, rid) + brain = get_brain_by_rid(catalog, rid) if brain is None: continue diff --git a/src/c2/search/reranker/browser/search_override.py b/src/c2/search/reranker/browser/search_override.py new file mode 100644 index 0000000..8ee992a --- /dev/null +++ b/src/c2/search/reranker/browser/search_override.py @@ -0,0 +1,187 @@ +"""Override Plone's classic @@search and @@ajax-search with reranking.""" + +import os + +from c2.search.reranker import _ +from c2.search.reranker.search import is_reranker_enabled +from c2.search.reranker.search import search_and_rerank +from plone.app.contentlisting.interfaces import IContentListing +from Products.CMFPlone.browser.search import AjaxSearch +from Products.CMFPlone.browser.search import Search +from Products.CMFPlone.browser.search import SortOption +from Products.CMFCore.utils import getToolByName +from Products.Five.browser.pagetemplatefile import ViewPageTemplateFile +from Products.ZCTextIndex.ParseTree import ParseError +from zope.i18nmessageid import MessageFactory + +try: + from plone.base.batch import Batch +except ImportError: + from Products.CMFPlone.PloneBatch import Batch + +import Products.CMFPlone.browser + +_plone = MessageFactory("plone") + +_SEARCH_TEMPLATE = os.path.join( + os.path.dirname(Products.CMFPlone.browser.__file__), + "templates", + "search.pt", +) + +# Sort key used for our custom reranker algorithm +RERANKER_SORT_KEY = "reranker" + + +class RerankedSearch(Search): + """Search view that applies reranking when enabled.""" + + index = ViewPageTemplateFile(_SEARCH_TEMPLATE) + + def __call__(self): + return self.index() + + def sort_options(self): + """Add 'custom algorithm' sort option when reranker is enabled.""" + if not is_reranker_enabled(): + return super().sort_options() + + if "sort_on" not in self.request.form: + self.request.form["sort_on"] = RERANKER_SORT_KEY + return ( + SortOption( + self.request, + _( + "label_sort_reranker", + default="custom algorithm", + ), + RERANKER_SORT_KEY, + ), + SortOption(self.request, _plone("relevance"), "relevance"), + SortOption( + self.request, + _plone("date (newest first)"), + "Date", + reverse=True, + ), + SortOption(self.request, _plone("alphabetically"), "sortable_title"), + ) + + def filter_query(self, query): + """Handle 'reranker' sort key before parent processing.""" + form_sort = self.request.form.get("sort_on", "") + is_reranker_sort = form_sort == RERANKER_SORT_KEY + + if is_reranker_sort: + # Temporarily set to "relevance" so parent doesn't pass + # unknown sort key to the catalog + self.request.form["sort_on"] = "relevance" + + result = super().filter_query(query) + + if is_reranker_sort: + self.request.form["sort_on"] = RERANKER_SORT_KEY + + return result + + def results( + self, + query=None, + batch=True, + b_size=10, + b_start=0, + use_content_listing=True, + ): + if not is_reranker_enabled(): + return super().results( + query=query, + batch=batch, + b_size=b_size, + b_start=b_start, + use_content_listing=use_content_listing, + ) + + # Only apply reranking when "reranker" sort is active + sort_on = self.request.form.get("sort_on", "") + if sort_on and sort_on != RERANKER_SORT_KEY: + return super().results( + query=query, + batch=batch, + b_size=b_size, + b_start=b_start, + use_content_listing=use_content_listing, + ) + + return self._reranked_results( + query=query, + batch=batch, + b_size=b_size, + b_start=b_start, + use_content_listing=use_content_listing, + ) + + def _reranked_results( + self, + query=None, + batch=True, + b_size=10, + b_start=0, + use_content_listing=True, + ): + """Execute search with reranking algorithm.""" + if query is None: + query = {} + if batch: + b_start = int(b_start) + query = self.filter_query(query) + + if query is None: + results = [] + else: + search_text = query.get("SearchableText") + if not search_text: + # No text search: reranking is not meaningful, + # fall back to normal catalog search + catalog = getToolByName(self.context, "portal_catalog") + try: + results = catalog(**query) + except ParseError: + return [] + else: + # Build query extras (excluding search text and sort/batch params) + query_extras = { + k: v + for k, v in query.items() + if k + not in ( + "SearchableText", + "sort_on", + "sort_order", + "sort_limit", + "b_start", + "b_size", + ) + } + try: + results = search_and_rerank( + self.context, + search_text, + query_extras or None, + ) + except ParseError: + return [] + + if use_content_listing: + results = IContentListing(results) + if batch: + results = Batch(results, b_size, b_start) + return results + + +class RerankedAjaxSearch(RerankedSearch, AjaxSearch): + """Ajax search view that uses reranked results. + + Inherits results() from RerankedSearch and __call__ from AjaxSearch. + """ + + pass diff --git a/src/c2/search/reranker/configure.zcml b/src/c2/search/reranker/configure.zcml index 6e61105..3e6f598 100644 --- a/src/c2/search/reranker/configure.zcml +++ b/src/c2/search/reranker/configure.zcml @@ -19,6 +19,7 @@ + - 1000 + 1001 diff --git a/src/c2/search/reranker/profiles/default/registry/main.xml b/src/c2/search/reranker/profiles/default/registry/main.xml index 602a799..c8917d0 100644 --- a/src/c2/search/reranker/profiles/default/registry/main.xml +++ b/src/c2/search/reranker/profiles/default/registry/main.xml @@ -4,6 +4,7 @@ > + False False 50 rrf diff --git a/src/c2/search/reranker/search.py b/src/c2/search/reranker/search.py new file mode 100644 index 0000000..0daf5f5 --- /dev/null +++ b/src/c2/search/reranker/search.py @@ -0,0 +1,279 @@ +"""Shared search-and-rerank logic for integrating with Plone's default search. + +This module provides the core function ``search_and_rerank`` that can be used +by both the REST API ``@search`` service and the classic ``@@search`` view. +It optionally combines keyword search with vector search (hybrid mode) and +applies content-type boost and time-decay reranking. +""" + +from c2.search.reranker import logger +from c2.search.reranker.interfaces import IRerankerSettings +from c2.search.reranker.reranker import ( + RerankerSettings, + calculate_time_decay, + get_content_age_days, + rerank_brains, +) +from DateTime import DateTime +from plone.registry.interfaces import IRegistry +from Products.CMFCore.utils import getToolByName +from zope.component import getUtility + +# RRF constant: prevents top-ranked items from dominating too much. +RRF_K = 60 + + +def is_reranker_enabled(): + """Check whether the reranker is enabled in the control panel.""" + try: + registry = getUtility(IRegistry) + settings = registry.forInterface(IRerankerSettings) + return settings.reranker_enabled + except Exception: + logger.debug("Could not read reranker_enabled setting, defaulting to False") + return False + + +def is_vectorsearch_available(): + """Check if collective.vectorsearch is importable.""" + try: + from collective.vectorsearch.vector_index import VectorIndex # noqa: F401 + + return True + except ImportError: + return False + + +def find_vector_index(catalog): + """Find the first VectorIndex in catalog, or None.""" + for idx_name in catalog.Indexes: + idx = catalog.Indexes[idx_name] + if getattr(idx, "meta_type", "") == "VectorIndex": + return idx + return None + + +def keyword_search(catalog, search_text, query_extras=None): + """Execute keyword search via catalog. + + Returns dict: {rid: (brain, normalized_score)} + """ + query = {"SearchableText": search_text, "sort_limit": 200} + if query_extras: + query.update(query_extras) + brains = list(catalog.searchResults(**query)) + result = {} + max_score = 0.0 + for brain in brains: + rid = brain.getRID() + score = getattr(brain, "data_record_normalized_score_", None) + score = 1.0 if score is None or score == 0 else float(score) + if score > max_score: + max_score = score + result[rid] = (brain, score) + + # Normalize to 0.0-1.0 range + if max_score > 0: + result = { + rid: (brain, score / max_score) for rid, (brain, score) in result.items() + } + return result + + +def vector_search(vector_index, search_text): + """Execute vector search via VectorIndex. + + Returns dict: {rid: vector_score_normalized} + """ + + class QueryRecord: + def __init__(self, text): + self.keys = [text] + + record = QueryRecord(search_text) + bucket = vector_index.query_index(record) + + if bucket is None: + return {} + + result = {} + for rid, int_score in bucket.items(): + result[rid] = float(int_score) / 100_000_000.0 + return result + + +def get_brain_by_rid(catalog, rid): + """Get a catalog brain for a given RID.""" + try: + path = catalog.getpath(rid) + results = catalog.searchResults(path={"query": path, "depth": 0}) + if results: + return results[0] + except Exception: + logger.debug("Could not resolve brain for RID %s", rid) + return None + + +def compute_rrf_scores(keyword_results, vector_results): + """Compute RRF (Reciprocal Rank Fusion) scores from rank positions. + + Returns (kw_rrf, vec_rrf) dicts mapping rid to RRF score. + """ + k = RRF_K + + kw_ranked = sorted( + keyword_results.keys(), + key=lambda rid: keyword_results[rid][1], + reverse=True, + ) + kw_rrf = {} + for rank, rid in enumerate(kw_ranked, start=1): + kw_rrf[rid] = 1.0 / (k + rank) + + vec_ranked = sorted( + vector_results.keys(), + key=lambda rid: vector_results[rid], + reverse=True, + ) + vec_rrf = {} + for rank, rid in enumerate(vec_ranked, start=1): + vec_rrf[rid] = 1.0 / (k + rank) + + return kw_rrf, vec_rrf + + +def get_vector_index(catalog, settings): + """Get the vector index if vector search is available and enabled. + + Returns the VectorIndex object, or None. + """ + if not settings.vector_search_enabled: + return None + if not is_vectorsearch_available(): + return None + return find_vector_index(catalog) + + +def search_and_rerank(context, search_text, query_extras=None): + """Execute search with reranking, optionally with hybrid vector search. + + When vector_search_enabled is True and collective.vectorsearch is available: + - Combines keyword + vector search using RRF or weighted scoring + - Applies content-type boost and time-decay + + When vector_search_enabled is False (or vector search unavailable): + - Executes keyword search only + - Applies content-type boost and time-decay via rerank_brains() + + Args: + context: Plone context (for catalog access) + search_text: SearchableText query string + query_extras: additional catalog query params (path, portal_type, etc.) + Note: sort_on/sort_order/sort_limit/b_start/b_size should be + excluded since reranking produces its own ordering. + + Returns: + list of catalog brains in reranked order + """ + catalog = getToolByName(context, "portal_catalog") + registry = getUtility(IRegistry) + settings = registry.forInterface(IRerankerSettings) + + vector_index = get_vector_index(catalog, settings) + + if vector_index is not None: + return _hybrid_search_and_rerank( + catalog, + search_text, + vector_index, + settings.keyword_search_ratio, + settings.scoring_mode, + query_extras, + ) + + # Keyword-only mode: use simple rerank_brains + return _keyword_search_and_rerank(catalog, search_text, query_extras) + + +def _keyword_search_and_rerank(catalog, search_text, query_extras=None): + """Keyword search only, reranked with boost and decay.""" + query = {"SearchableText": search_text, "sort_limit": 200} + if query_extras: + query.update(query_extras) + brains = list(catalog.searchResults(**query)) + if not brains: + return [] + ranked = rerank_brains(brains) + return [brain for brain, _score_details in ranked] + + +def _hybrid_search_and_rerank( + catalog, + search_text, + vector_index, + keyword_ratio, + scoring_mode, + query_extras=None, +): + """Hybrid keyword + vector search, with boost and decay.""" + now = DateTime() + reranker_settings = RerankerSettings() + + # Step 1: Keyword search + keyword_results = keyword_search(catalog, search_text, query_extras) + + # Step 2: Vector search + try: + vector_results = vector_search(vector_index, search_text) + except Exception as e: + logger.warning("Vector search failed, falling back to keyword only: %s", e) + vector_results = {} + keyword_ratio = 100 + + if not vector_results: + keyword_ratio = 100 + + # Step 3: Combine scores + kw = keyword_ratio / 100.0 + vw = 1.0 - kw + use_rrf = scoring_mode == "rrf" + + kw_rrf = {} + vec_rrf = {} + if use_rrf and vector_results: + kw_rrf, vec_rrf = compute_rrf_scores(keyword_results, vector_results) + + all_rids = set(keyword_results.keys()) | set(vector_results.keys()) + scored = [] + + for rid in all_rids: + kw_data = keyword_results.get(rid) + vs = vector_results.get(rid, 0.0) + + if kw_data is not None: + brain, ks = kw_data + else: + ks = 0.0 + brain = get_brain_by_rid(catalog, rid) + if brain is None: + continue + + if use_rrf and vector_results: + kr = kw_rrf.get(rid, 0.0) + vr = vec_rrf.get(rid, 0.0) + combined = kr * kw + vr * vw + else: + combined = ks * kw + vs * vw + + # Step 4: Apply boost and decay + portal_type = brain.portal_type + boost = reranker_settings.get_boost(portal_type) + halflife = reranker_settings.get_halflife(portal_type) + age_days = get_content_age_days(brain, now=now) + decay = calculate_time_decay(age_days, halflife) + + final_score = combined * boost * decay + scored.append((brain, final_score)) + + scored.sort(key=lambda x: x[1], reverse=True) + return [brain for brain, _score in scored] diff --git a/src/c2/search/reranker/services/__init__.py b/src/c2/search/reranker/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/c2/search/reranker/services/configure.zcml b/src/c2/search/reranker/services/configure.zcml new file mode 100644 index 0000000..7263b70 --- /dev/null +++ b/src/c2/search/reranker/services/configure.zcml @@ -0,0 +1,15 @@ + + + + + diff --git a/src/c2/search/reranker/services/search.py b/src/c2/search/reranker/services/search.py new file mode 100644 index 0000000..6da8567 --- /dev/null +++ b/src/c2/search/reranker/services/search.py @@ -0,0 +1,144 @@ +"""Custom @search GET service that optionally applies reranking.""" + +from c2.search.reranker.search import is_reranker_enabled +from c2.search.reranker.search import search_and_rerank +from plone.restapi.batching import HypermediaBatch +from plone.restapi.interfaces import ISerializeToJson +from plone.restapi.interfaces import ISerializeToJsonSummary +from plone.restapi.search.handler import SearchHandler +from plone.restapi.search.utils import unflatten_dotted_dict +from plone.restapi.services import Service +from zope.component import getMultiAdapter + +import logging + + +log = logging.getLogger(__name__) + +# Keys to exclude from query_extras when building reranker query +_SORT_AND_BATCH_KEYS = frozenset(( + "SearchableText", + "sort_on", + "sort_order", + "sort_limit", + "b_start", + "b_size", +)) + + +class RerankerSearchGet(Service): + """Custom @search GET service that optionally applies reranking. + + When reranker_enabled is True in the control panel settings, + this service intercepts the catalog results, applies the + reranking algorithm (content-type boost + time-decay, and + optionally hybrid vector search), and returns the reranked + results with proper batching. + + When reranker_enabled is False, this delegates entirely to the + default SearchHandler with zero overhead. + """ + + def reply(self): + query = self.request.form.copy() + query = unflatten_dotted_dict(query) + + if not is_reranker_enabled(): + return SearchHandler(self.context, self.request).search(query) + + return self._search_with_reranking(query) + + def _prepare_query(self, query): + """Parse and prepare the query, extracting flags. + + Returns (query, fullobjects) tuple. + """ + handler = SearchHandler(self.context, self.request) + + fullobjects = query.pop("fullobjects", None) is not None + use_site_search_settings = ( + query.pop( + "use_site_search_settings", + None, + ) + is not None + ) + + if use_site_search_settings: + query = handler.filter_query(query) + + if "SearchableText" in query: + # quote_chars may not exist in older plone.restapi (Plone 5.2) + quote_fn = getattr(handler, "quote_chars", None) + if quote_fn: + query["SearchableText"] = quote_fn(query["SearchableText"]) + if not query["SearchableText"] or query["SearchableText"] == "*": + return None, fullobjects + + # These private methods may not exist in older plone.restapi + constrain_fn = getattr(handler, "_constrain_query_by_path", None) + if constrain_fn: + constrain_fn(query) + parse_fn = getattr(handler, "_parse_query", None) + if parse_fn: + query = parse_fn(query) + return query, fullobjects + + def _search_with_reranking(self, query): + """Execute search, apply reranking, and return batched JSON results.""" + query, fullobjects = self._prepare_query(query) + if query is None: + return [] + + search_text = query.get("SearchableText") + if not search_text: + # No text search: delegate to normal serialization + handler = SearchHandler(self.context, self.request) + lazy_resultset = handler.catalog.searchResults(**query) + return getMultiAdapter((lazy_resultset, self.request), ISerializeToJson)( + fullobjects=fullobjects + ) + + query_extras = {k: v for k, v in query.items() if k not in _SORT_AND_BATCH_KEYS} + + reranked_brains = search_and_rerank( + self.context, + search_text, + query_extras or None, + ) + + return self._serialize_results(reranked_brains, fullobjects) + + def _serialize_results(self, brains, fullobjects=False): + """Serialize a list of brains with batching.""" + batch = HypermediaBatch(self.request, brains) + + results = { + "@id": batch.canonical_url, + "items_total": batch.items_total, + } + links = batch.links + if links: + results["batching"] = links + + results["items"] = [] + for brain in batch: + if fullobjects: + try: + obj = brain.getObject() + except KeyError: + log.warning( + "Brain getObject error: %s doesn't exist anymore", + brain.getPath(), + ) + continue + result = getMultiAdapter((obj, self.request), ISerializeToJson)( + include_items=False + ) + else: + result = getMultiAdapter( + (brain, self.request), ISerializeToJsonSummary + )() + results["items"].append(result) + + return results diff --git a/src/c2/search/reranker/upgrades/configure.zcml b/src/c2/search/reranker/upgrades/configure.zcml index 3b2354a..cccfc7d 100644 --- a/src/c2/search/reranker/upgrades/configure.zcml +++ b/src/c2/search/reranker/upgrades/configure.zcml @@ -3,18 +3,16 @@ xmlns:genericsetup="http://namespaces.zope.org/genericsetup" > - diff --git a/test_plone52.cfg b/test_plone52.cfg index 96178dc..455f597 100644 --- a/test_plone52.cfg +++ b/test_plone52.cfg @@ -9,3 +9,7 @@ update-versions-file = test_plone52.cfg [versions] createcoverage = 1.5 +# Fix finalizeSchemas TypeError with Provides objects +# caused by plone.dexterity 2.11.0 memory leak fix. +# See: https://github.com/plone/plone.supermodel/pull/55 +plone.supermodel = 1.7.0 diff --git a/tests/conftest.py b/tests/conftest.py index 5b606a3..29b6b9b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,62 @@ +# Fix plone.supermodel < 1.7.0 finalizeSchemas TypeError on Plone 5.2. +# plone.dexterity 2.11.0 introduces Provides objects in schema.dependents, +# but plone.supermodel < 1.7.0's walk() yields all objects (including Provides), +# causing sorted() to fail with TypeError when comparing Provides with SchemaClass. +# We backport the fix from plone.supermodel >= 1.7.0: filter to SchemaClass only. +# See: https://github.com/plone/plone.supermodel/pull/55 +try: + import pkg_resources + + _version = tuple( + int(x) + for x in pkg_resources.get_distribution("plone.supermodel").version.split(".")[ + :3 + ] + ) + if _version < (1, 7, 0): + import logging + + from plone.supermodel import model as _psm + from plone.supermodel.model import SchemaClass + from zope.interface.interface import InterfaceClass + + _logger = logging.getLogger("plone.supermodel") + + def _patched_finalizeSchemas(parent=None): + if parent is None: + parent = _psm.Schema + if not isinstance(parent, SchemaClass): + raise TypeError( + "Only instances of plone.supermodel.model.SchemaClass " + "can be finalized." + ) + + def walk(schema): + if isinstance(schema, SchemaClass): + yield schema + try: + children = schema.dependents.keys() + except AttributeError: + children = () + for child in children: + yield from walk(child) + + schemas = set(walk(parent)) + for schema in sorted(schemas): + if hasattr(schema, "_SchemaClass_finalize"): + schema._SchemaClass_finalize() + elif isinstance(schema, InterfaceClass): + _logger.warn( + f"{schema.__module__}.{schema.__name__} is not an " + f"instance of SchemaClass. This can happen if the " + f"first base class of a schema is not a SchemaClass. " + f"See https://bugs.launchpad.net/zope.interface/+bug/791218" + ) + + _psm.finalizeSchemas = _patched_finalizeSchemas +except Exception: # noqa: S110 + pass + from c2.search.reranker.testing import ACCEPTANCE_TESTING from c2.search.reranker.testing import FUNCTIONAL_TESTING from c2.search.reranker.testing import INTEGRATION_TESTING diff --git a/tests/setup/test_setup_install.py b/tests/setup/test_setup_install.py index 94261af..5f36563 100644 --- a/tests/setup/test_setup_install.py +++ b/tests/setup/test_setup_install.py @@ -14,4 +14,4 @@ def test_browserlayer(self, browser_layers): def test_latest_version(self, profile_last_version): """Test latest version of default profile.""" - assert profile_last_version(f"{PACKAGE_NAME}:default") == "1000" + assert profile_last_version(f"{PACKAGE_NAME}:default") == "1001" diff --git a/tests/test_imports.py b/tests/test_imports.py new file mode 100644 index 0000000..d5c694b --- /dev/null +++ b/tests/test_imports.py @@ -0,0 +1,116 @@ +"""Diagnostic tests to identify import failures on Plone 5.2. + +These are UNIT tests that don't require the integration layer, +so they run even when ZCML loading fails. +""" + +import importlib +import sys + +import pytest + + +class TestModuleImports: + """Test that all modules can be imported without errors.""" + + @pytest.mark.parametrize( + "module_name", + [ + "c2.search.reranker", + "c2.search.reranker.interfaces", + "c2.search.reranker.reranker", + "c2.search.reranker.search", + "c2.search.reranker.browser.search_override", + "c2.search.reranker.browser.hybrid_search", + "c2.search.reranker.services", + "c2.search.reranker.services.search", + ], + ) + def test_import_module(self, module_name): + """Module should be importable without errors.""" + try: + mod = importlib.import_module(module_name) + assert mod is not None + except Exception as exc: + pytest.fail(f"Failed to import {module_name}: {type(exc).__name__}: {exc}") + + +class TestZCMLDependencies: + """Test that ZCML dependencies are available.""" + + def test_plone_rest_service_directive(self): + """plone.rest should provide the plone:service ZCML directive.""" + import plone.rest # noqa: F401 + + def test_plone_restapi_search_handler(self): + """plone.restapi.search.handler.SearchHandler should be importable.""" + from plone.restapi.search.handler import SearchHandler # noqa: F401 + + def test_plone_restapi_search_utils(self): + """plone.restapi.search.utils.unflatten_dotted_dict should exist.""" + from plone.restapi.search.utils import unflatten_dotted_dict # noqa: F401 + + def test_cmfplone_search_classes(self): + """Products.CMFPlone.browser.search should have required classes.""" + from Products.CMFPlone.browser.search import AjaxSearch # noqa: F401 + from Products.CMFPlone.browser.search import Search # noqa: F401 + from Products.CMFPlone.browser.search import SortOption # noqa: F401 + + def test_batch_import(self): + """Batch class should be importable (Plone 5.2 or 6).""" + try: + from plone.base.batch import Batch + + source = "plone.base.batch" + except ImportError: + from Products.CMFPlone.PloneBatch import Batch + + source = "Products.CMFPlone.PloneBatch" + assert Batch is not None, f"Batch imported from {source}" + + def test_zctextindex_parseError(self): + """Products.ZCTextIndex.ParseTree.ParseError should exist.""" + from Products.ZCTextIndex.ParseTree import ParseError # noqa: F401 + + def test_content_listing(self): + """plone.app.contentlisting.interfaces.IContentListing should exist.""" + from plone.app.contentlisting.interfaces import IContentListing # noqa: F401 + + def test_viewpagetemplatefile(self): + """ViewPageTemplateFile should work with CMFPlone's search.pt.""" + import os + + import Products.CMFPlone.browser + from Products.Five.browser.pagetemplatefile import ViewPageTemplateFile + + template_path = os.path.join( + os.path.dirname(Products.CMFPlone.browser.__file__), + "templates", + "search.pt", + ) + assert os.path.isfile(template_path), f"Template not found: {template_path}" + # Try creating the ViewPageTemplateFile + vpt = ViewPageTemplateFile(template_path) + assert vpt is not None + + +class TestZCMLLoading: + """Test ZCML loading directly.""" + + def test_python_version(self): + """Report Python version for debugging.""" + assert sys.version_info >= (3, 8), f"Python {sys.version}" + + def test_zcml_condition_installed(self): + """zcml:condition 'installed' verb should work.""" + from zope.configuration.config import ConfigurationContext + from zope.configuration.xmlconfig import ConfigurationHandler + + context = ConfigurationContext() + handler = ConfigurationHandler(context, testing=True) + # zope.interface is always available + assert handler.evaluateCondition("installed zope.interface") is True + assert handler.evaluateCondition("not-installed zope.interface") is False + # plone.nonexistent should not be available + assert handler.evaluateCondition("installed plone.nonexistent") is False + assert handler.evaluateCondition("not-installed plone.nonexistent") is True diff --git a/tests/test_search_override.py b/tests/test_search_override.py new file mode 100644 index 0000000..f1ca5af --- /dev/null +++ b/tests/test_search_override.py @@ -0,0 +1,176 @@ +"""Integration tests for the classic @@search view override with reranking.""" + +from c2.search.reranker.browser.search_override import RERANKER_SORT_KEY +from c2.search.reranker.interfaces import IBrowserLayer +from c2.search.reranker.interfaces import IRerankerSettings +from plone import api +from plone.app.testing import setRoles +from plone.app.testing import TEST_USER_ID +from plone.registry.interfaces import IRegistry +from zope.component import getUtility +from zope.interface import alsoProvides + +import pytest + + +class TestRerankedSearchView: + """Test the classic @@search view override.""" + + @pytest.fixture(autouse=True) + def _setup(self, integration): + self.portal = integration["portal"] + self.request = integration["request"] + setRoles(self.portal, TEST_USER_ID, ["Manager"]) + # Ensure IBrowserLayer is marked on the request. + # On Plone 5.2, plone.app.testing does not automatically apply + # registered browser layers to the test request. + alsoProvides(self.request, IBrowserLayer) + + def _set_reranker_enabled(self, enabled): + registry = getUtility(IRegistry) + settings = registry.forInterface(IRerankerSettings) + settings.reranker_enabled = enabled + + def _get_view(self): + return api.content.get_view( + name="search", + context=self.portal, + request=self.request, + ) + + def test_search_view_registered(self): + """The @@search view should be our override when layer is active.""" + from c2.search.reranker.browser.search_override import RerankedSearch + + view = self._get_view() + assert isinstance(view, RerankedSearch) + + def test_disabled_returns_normal_results(self): + """When disabled, @@search should return normal catalog results.""" + self._set_reranker_enabled(False) + api.content.create( + container=self.portal, + type="Document", + id="test-classic-doc", + title="Classic Search Test Epsilon Document", + ) + + self.request.form["SearchableText"] = "Epsilon" + results = self._get_view().results(batch=False) + assert len(results) > 0 + + def test_enabled_returns_reranked_results(self): + """When enabled with reranker sort, should return reranked results.""" + self._set_reranker_enabled(True) + api.content.create( + container=self.portal, + type="Document", + id="test-classic-reranked", + title="Classic Reranked Test Zeta Document", + ) + + self.request.form["SearchableText"] = "Zeta" + self.request.form["sort_on"] = RERANKER_SORT_KEY + results = self._get_view().results(batch=False) + assert len(results) > 0 + + def test_enabled_with_batching(self): + """When enabled, @@search batching should work correctly.""" + self._set_reranker_enabled(True) + for i in range(5): + api.content.create( + container=self.portal, + type="Document", + id=f"test-batch-doc-{i}", + title=f"Batch Test Eta {i} Document", + ) + + self.request.form["SearchableText"] = "Eta" + self.request.form["sort_on"] = RERANKER_SORT_KEY + results = self._get_view().results(batch=True, b_size=2, b_start=0) + # Batch should limit to b_size + assert len(list(results)) <= 2 + + def test_no_searchable_text_falls_back(self): + """When no SearchableText, should fall back to normal catalog.""" + self._set_reranker_enabled(True) + self.request.form["sort_on"] = RERANKER_SORT_KEY + # Without SearchableText, results() should handle gracefully + results = self._get_view().results(batch=False) + # Should return an empty or normal result + assert results is not None + + def test_enabled_relevance_sort_uses_normal_catalog(self): + """When enabled but sort_on=relevance, should use normal catalog.""" + self._set_reranker_enabled(True) + api.content.create( + container=self.portal, + type="Document", + id="test-relevance-sort", + title="Relevance Sort Test Theta Document", + ) + + self.request.form["SearchableText"] = "Theta" + self.request.form["sort_on"] = "relevance" + results = self._get_view().results(batch=False) + assert len(results) > 0 + + def test_enabled_default_sort_is_reranker(self): + """When enabled and no sort specified, should default to reranker.""" + self._set_reranker_enabled(True) + api.content.create( + container=self.portal, + type="Document", + id="test-default-sort", + title="Default Sort Test Iota Document", + ) + + self.request.form["SearchableText"] = "Iota" + # No sort_on in request.form — should default to reranker + results = self._get_view().results(batch=False) + assert len(results) > 0 + + def test_sort_options_disabled_normal(self): + """When disabled, sort_options should return standard options.""" + self._set_reranker_enabled(False) + view = self._get_view() + options = view.sort_options() + sortkeys = [opt.sortkey for opt in options] + assert sortkeys == ["relevance", "Date", "sortable_title"] + + def test_sort_options_enabled_has_reranker(self): + """When enabled, sort_options should include reranker as first.""" + self._set_reranker_enabled(True) + view = self._get_view() + options = view.sort_options() + sortkeys = [opt.sortkey for opt in options] + assert sortkeys == [RERANKER_SORT_KEY, "relevance", "Date", "sortable_title"] + + def test_sort_options_enabled_default_selected(self): + """When enabled with no sort_on, reranker should be the default.""" + self._set_reranker_enabled(True) + view = self._get_view() + view.sort_options() + assert self.request.form.get("sort_on") == RERANKER_SORT_KEY + + +class TestRerankedAjaxSearchView: + """Test the @@ajax-search view override.""" + + @pytest.fixture(autouse=True) + def _setup(self, integration): + self.portal = integration["portal"] + self.request = integration["request"] + setRoles(self.portal, TEST_USER_ID, ["Manager"]) + alsoProvides(self.request, IBrowserLayer) + + def test_ajax_search_view_registered(self): + """The @@ajax-search view should be our override.""" + from c2.search.reranker.browser.search_override import RerankedAjaxSearch + + view = api.content.get_view( + name="ajax-search", + context=self.portal, + request=self.request, + ) + assert isinstance(view, RerankedAjaxSearch) diff --git a/tests/test_search_service.py b/tests/test_search_service.py new file mode 100644 index 0000000..d892480 --- /dev/null +++ b/tests/test_search_service.py @@ -0,0 +1,95 @@ +"""Integration tests for the custom @search REST API service with reranking.""" + +from c2.search.reranker.interfaces import IRerankerSettings +from plone import api +from plone.app.testing import setRoles +from plone.app.testing import TEST_USER_ID +from plone.registry.interfaces import IRegistry +from zope.component import getUtility + +import pytest + + +def _make_service(context, request): + """Create and configure a RerankerSearchGet service instance.""" + from c2.search.reranker.services.search import RerankerSearchGet + + service = RerankerSearchGet.__new__(RerankerSearchGet) + service.context = context + service.request = request + return service + + +class TestRerankerSearchService: + """Test the custom @search REST API service.""" + + @pytest.fixture(autouse=True) + def _setup(self, integration): + self.portal = integration["portal"] + self.request = integration["request"] + setRoles(self.portal, TEST_USER_ID, ["Manager"]) + + def _get_settings(self): + registry = getUtility(IRegistry) + return registry.forInterface(IRerankerSettings) + + def _set_reranker_enabled(self, enabled): + settings = self._get_settings() + settings.reranker_enabled = enabled + + def test_reranker_enabled_setting_exists(self): + """The reranker_enabled setting should exist in the registry.""" + settings = self._get_settings() + assert hasattr(settings, "reranker_enabled") + assert settings.reranker_enabled is False + + def test_disabled_delegates_to_default(self): + """When disabled, @search should work normally.""" + self._set_reranker_enabled(False) + api.content.create( + container=self.portal, + type="Document", + id="test-doc-service", + title="Service Test Gamma Document", + ) + + self.request.form["SearchableText"] = "Gamma" + service = _make_service(self.portal, self.request) + result = service.reply() + assert isinstance(result, dict) + assert "items" in result or "items_total" in result + + def test_enabled_returns_results(self): + """When enabled, @search should return reranked results.""" + self._set_reranker_enabled(True) + api.content.create( + container=self.portal, + type="Document", + id="test-doc-reranked", + title="Reranked Service Test Delta Document", + ) + + self.request.form["SearchableText"] = "Delta" + service = _make_service(self.portal, self.request) + result = service.reply() + assert isinstance(result, dict) + assert "items" in result + assert "items_total" in result + + def test_empty_search_returns_structure(self): + """Empty search should return proper JSON structure.""" + self._set_reranker_enabled(True) + + self.request.form["SearchableText"] = "nonexistent_xyzzy_12345" + service = _make_service(self.portal, self.request) + result = service.reply() + assert isinstance(result, dict) + assert result.get("items_total", 0) == 0 + + def test_no_searchable_text_delegates(self): + """When no SearchableText, should delegate to normal search.""" + self._set_reranker_enabled(True) + + service = _make_service(self.portal, self.request) + result = service.reply() + assert isinstance(result, (dict, list)) diff --git a/tox.ini b/tox.ini index f9a5993..7fd1a68 100644 --- a/tox.ini +++ b/tox.ini @@ -37,7 +37,7 @@ deps = Plone52: -rrequirements_plone52.txt Plone60: -rrequirements_plone60.txt Plone61: -rrequirements_plone61.txt - coverage + coverage[toml] [testenv:ruff-check] @@ -69,7 +69,7 @@ skip_install = true basepython = python3.11 deps = - coverage + coverage[toml] -cconstraints_plone61.txt setenv =