From 7cd7beb7e02ddf5afd9fa0aa6b56db9342cfbd90 Mon Sep 17 00:00:00 2001 From: Jitendra Yejare <11752425+jyejare@users.noreply.github.com> Date: Mon, 27 Apr 2026 19:27:41 +0530 Subject: [PATCH 01/12] feat: Operational metrics for offline store and SOX metrics for both Signed-off-by: Jitendra Yejare <11752425+jyejare@users.noreply.github.com> --- sdk/python/feast/feature_server.py | 79 ++- .../infra/feature_servers/base_config.py | 11 + .../infra/offline_stores/offline_store.py | 65 +- sdk/python/feast/metrics.py | 103 ++- sdk/python/tests/unit/test_metrics.py | 661 +++++++++++++++++- 5 files changed, 899 insertions(+), 20 deletions(-) diff --git a/sdk/python/feast/feature_server.py b/sdk/python/feast/feature_server.py index f60eeb9d87d..1daf59fa2ec 100644 --- a/sdk/python/feast/feature_server.py +++ b/sdk/python/feast/feature_server.py @@ -148,28 +148,72 @@ class ChatRequest(BaseModel): messages: List[ChatMessage] -def _resolve_feature_counts( +def _parse_feature_info( features: Union[List[str], "feast.FeatureService"], ) -> tuple: - """Return (feature_count, feature_view_count) from the resolved features. + """Return ``(feature_view_names, feature_count)`` from resolved features. ``features`` is either a list of ``"feature_view:feature"`` strings or a ``FeatureService`` with ``feature_view_projections``. + + Returns: + (fv_names, feat_count) where fv_names is a list of unique feature + view name strings and feat_count is the total number of features. """ from feast.feature_service import FeatureService if isinstance(features, FeatureService): projections = features.feature_view_projections - fv_count = len(projections) + fv_names = [p.name for p in projections] feat_count = sum(len(p.features) for p in projections) elif isinstance(features, list): feat_count = len(features) - fv_names = {ref.split(":")[0].split("@")[0] for ref in features if ":" in ref} - fv_count = len(fv_names) + fv_names = list( + {ref.split(":")[0].split("@")[0] for ref in features if ":" in ref} + ) else: + fv_names = [] feat_count = 0 - fv_count = 0 - return str(feat_count), str(fv_count) + return fv_names, feat_count + + +def _resolve_feature_counts( + features: Union[List[str], "feast.FeatureService"], +) -> tuple: + """Return ``(feature_count_str, feature_view_count_str)`` for Prometheus labels.""" + fv_names, feat_count = _parse_feature_info(features) + return str(feat_count), str(len(fv_names)) + + +def _emit_online_audit( + request: GetOnlineFeaturesRequest, + features: Union[List[str], "feast.FeatureService"], + entity_count: int, + status: str, + latency_ms: float, +): + """Best-effort audit log emission for online feature requests.""" + try: + from feast.permissions.security_manager import get_security_manager + + requestor_id = "anonymous" + sm = get_security_manager() + if sm and sm.current_user: + requestor_id = sm.current_user.username or "anonymous" + + fv_names, feat_count = _parse_feature_info(features) + + feast_metrics.emit_online_audit_log( + requestor_id=requestor_id, + entity_keys=list(request.entities.keys()), + entity_count=entity_count, + feature_views=fv_names, + feature_count=feat_count, + status=status, + latency_ms=latency_ms, + ) + except Exception: + logger.warning("Failed to emit online audit log", exc_info=True) async def _get_features( @@ -387,11 +431,22 @@ async def get_online_features(request: GetOnlineFeaturesRequest) -> Any: include_feature_view_version_metadata=request.include_feature_view_version_metadata, ) - if store._get_provider().async_supported.online.read: - response = await store.get_online_features_async(**read_params) # type: ignore - else: - response = await run_in_threadpool( - lambda: store.get_online_features(**read_params) # type: ignore + audit_start_ms = time.monotonic() * 1000 + audit_status = "success" + try: + if store._get_provider().async_supported.online.read: + response = await store.get_online_features_async(**read_params) # type: ignore + else: + response = await run_in_threadpool( + lambda: store.get_online_features(**read_params) # type: ignore + ) + except Exception: + audit_status = "error" + raise + finally: + audit_latency_ms = time.monotonic() * 1000 - audit_start_ms + _emit_online_audit( + request, features, entity_count, audit_status, audit_latency_ms ) response_dict = await run_in_threadpool( diff --git a/sdk/python/feast/infra/feature_servers/base_config.py b/sdk/python/feast/infra/feature_servers/base_config.py index df324dc57d3..14ad2fe505e 100644 --- a/sdk/python/feast/infra/feature_servers/base_config.py +++ b/sdk/python/feast/infra/feature_servers/base_config.py @@ -82,6 +82,17 @@ class MetricsConfig(FeastConfigBaseModel): """Emit per-feature-view freshness gauges (feast_feature_freshness_seconds).""" + offline_features: StrictBool = True + """Emit offline store retrieval metrics + (feast_offline_store_request_total, + feast_offline_store_request_latency_seconds, + feast_offline_store_row_count).""" + + audit_logging: StrictBool = False + """Emit structured JSON audit log entries for online and offline + feature requests via the ``feast.audit`` logger. Captures requestor + identity, entity keys, feature views, row counts, and latency.""" + class BaseFeatureServerConfig(FeastConfigBaseModel): """Base Feature Server config that should be extended""" diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py index 4ae0c680c3b..9d9fee22623 100644 --- a/sdk/python/feast/infra/offline_stores/offline_store.py +++ b/sdk/python/feast/infra/offline_stores/offline_store.py @@ -11,9 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging +import time import warnings from abc import ABC -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from typing import ( TYPE_CHECKING, @@ -70,6 +72,21 @@ def __init__( self.max_event_timestamp = max_event_timestamp +def _extract_retrieval_metadata(job: "RetrievalJob") -> tuple: + """Return ``(feature_view_names, feature_count)`` from a RetrievalJob's metadata.""" + try: + meta = job.metadata + if meta: + feature_count = len(meta.features) + feature_views = list( + {ref.split(":")[0] for ref in meta.features if ":" in ref} + ) + return feature_views, feature_count + except (NotImplementedError, AttributeError): + pass + return [], 0 + + class RetrievalJob(ABC): """A RetrievalJob manages the execution of a query to retrieve data from the offline store.""" @@ -152,7 +169,51 @@ def to_arrow( validation_reference (optional): The validation to apply against the retrieved dataframe. timeout (optional): The query timeout if applicable. """ - features_table = self._to_arrow_internal(timeout=timeout) + start_wall = time.monotonic() + status_label = "success" + row_count = 0 + try: + features_table = self._to_arrow_internal(timeout=timeout) + row_count = features_table.num_rows + except Exception: + status_label = "error" + raise + finally: + try: + from feast import metrics as feast_metrics + + elapsed = time.monotonic() - start_wall + + if feast_metrics._config.offline_features: + feast_metrics.offline_store_request_total.labels( + method="to_arrow", status=status_label + ).inc() + feast_metrics.offline_store_request_latency_seconds.labels( + method="to_arrow" + ).observe(elapsed) + if row_count > 0: + feast_metrics.offline_store_row_count.labels( + method="to_arrow" + ).observe(row_count) + + if feast_metrics._config.audit_logging: + feature_views, feature_count = _extract_retrieval_metadata(self) + now_iso = datetime.now(tz=timezone.utc).isoformat() + feast_metrics.emit_offline_audit_log( + method="to_arrow", + feature_views=feature_views, + feature_count=feature_count, + row_count=row_count, + status=status_label, + start_time=now_iso, + end_time=now_iso, + duration_ms=elapsed * 1000, + ) + except Exception: + logging.getLogger(__name__).debug( + "Failed to record offline store metrics", exc_info=True + ) + if self.on_demand_feature_views: # Build a mapping of ODFV name to requested feature names # This ensures we only return the features that were explicitly requested diff --git a/sdk/python/feast/metrics.py b/sdk/python/feast/metrics.py index 694f25a687e..f827f6e31ee 100644 --- a/sdk/python/feast/metrics.py +++ b/sdk/python/feast/metrics.py @@ -42,6 +42,7 @@ """ import atexit +import json import logging import os import shutil @@ -51,7 +52,7 @@ from contextlib import contextmanager from dataclasses import dataclass from datetime import datetime, timezone -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional import psutil @@ -123,6 +124,8 @@ class _MetricsFlags: push: bool = False materialization: bool = False freshness: bool = False + offline_features: bool = False + audit_logging: bool = False _config = _MetricsFlags() @@ -144,6 +147,8 @@ def build_metrics_flags(metrics_config: Optional[object] = None) -> _MetricsFlag push=True, materialization=True, freshness=True, + offline_features=True, + audit_logging=False, ) return _MetricsFlags( enabled=True, @@ -153,6 +158,8 @@ def build_metrics_flags(metrics_config: Optional[object] = None) -> _MetricsFlag push=getattr(metrics_config, "push", True), materialization=getattr(metrics_config, "materialization", True), freshness=getattr(metrics_config, "freshness", True), + offline_features=getattr(metrics_config, "offline_features", True), + audit_logging=getattr(metrics_config, "audit_logging", False), ) @@ -260,6 +267,33 @@ def build_metrics_flags(metrics_config: Optional[object] = None) -> _MetricsFlag multiprocess_mode="max", ) +# --------------------------------------------------------------------------- +# Offline store retrieval metrics +# --------------------------------------------------------------------------- +offline_store_request_total = Counter( + "feast_offline_store_request_total", + "Total offline store retrieval requests", + ["method", "status"], +) +offline_store_request_latency_seconds = Histogram( + "feast_offline_store_request_latency_seconds", + "Latency of offline store retrieval operations in seconds", + ["method"], + buckets=(0.1, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0, 600.0), +) +offline_store_row_count = Histogram( + "feast_offline_store_row_count", + "Number of rows returned by offline store retrieval", + ["method"], + buckets=(100, 1000, 10000, 100000, 500000, 1000000, 5000000), +) + +# --------------------------------------------------------------------------- +# Audit logger — separate from the main feast logger so operators can +# route SOX-style audit entries to a dedicated sink. +# --------------------------------------------------------------------------- +audit_logger = logging.getLogger("feast.audit") + # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -388,6 +422,71 @@ def track_materialization( ) +def emit_online_audit_log( + *, + requestor_id: str, + entity_keys: List[str], + entity_count: int, + feature_views: List[str], + feature_count: int, + status: str, + latency_ms: float, +): + """Emit a structured JSON audit log entry for an online feature request.""" + if not _config.audit_logging: + return + audit_logger.info( + _json_dumps( + { + "event": "online_feature_request", + "timestamp": datetime.now(tz=timezone.utc).isoformat(), + "requestor_id": requestor_id, + "entity_keys": entity_keys, + "entity_count": entity_count, + "feature_views": feature_views, + "feature_count": feature_count, + "status": status, + "latency_ms": round(latency_ms, 2), + } + ) + ) + + +def emit_offline_audit_log( + *, + method: str, + feature_views: List[str], + feature_count: int, + row_count: int, + status: str, + start_time: str, + end_time: str, + duration_ms: float, +): + """Emit a structured JSON audit log entry for an offline feature retrieval.""" + if not _config.audit_logging: + return + audit_logger.info( + _json_dumps( + { + "event": "offline_feature_retrieval", + "method": method, + "start_time": start_time, + "end_time": end_time, + "feature_views": feature_views, + "feature_count": feature_count, + "row_count": row_count, + "status": status, + "duration_ms": round(duration_ms, 2), + } + ) + ) + + +def _json_dumps(obj: dict) -> str: + return json.dumps(obj, separators=(",", ":")) + + def update_feature_freshness( store: "FeatureStore", ) -> None: @@ -507,6 +606,8 @@ def start_metrics_server( push=True, materialization=True, freshness=True, + offline_features=True, + audit_logging=False, ) from prometheus_client import CollectorRegistry, make_wsgi_app diff --git a/sdk/python/tests/unit/test_metrics.py b/sdk/python/tests/unit/test_metrics.py index bffde73dd91..2750757f67a 100644 --- a/sdk/python/tests/unit/test_metrics.py +++ b/sdk/python/tests/unit/test_metrics.py @@ -18,9 +18,14 @@ import pytest from feast.metrics import ( + emit_offline_audit_log, + emit_online_audit_log, feature_freshness_seconds, materialization_duration_seconds, materialization_result_total, + offline_store_request_latency_seconds, + offline_store_request_total, + offline_store_row_count, online_features_entity_count, online_features_request_count, online_features_status_total, @@ -42,13 +47,11 @@ ) -@pytest.fixture(autouse=True) -def _enable_metrics(): - """Enable all metric categories for each test, then restore.""" +def _all_enabled_flags(): + """Return a _MetricsFlags with every category enabled.""" import feast.metrics as m - original = m._config - m._config = m._MetricsFlags( + return m._MetricsFlags( enabled=True, resource=True, request=True, @@ -56,7 +59,18 @@ def _enable_metrics(): push=True, materialization=True, freshness=True, + offline_features=True, + audit_logging=True, ) + + +@pytest.fixture(autouse=True) +def _enable_metrics(): + """Enable all metric categories for each test, then restore.""" + import feast.metrics as m + + original = m._config + m._config = _all_enabled_flags() yield m._config = original @@ -1081,3 +1095,640 @@ def test_separate_from_read_transform_metric(self): assert abs(read_delta - 0.01) < 0.001 assert abs(write_delta - 0.05) < 0.001 + + +class TestOfflineStoreMetrics: + """Tests for the offline store Prometheus metrics (RED pattern).""" + + def test_request_total_increments_on_success(self): + before = offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + + offline_store_request_total.labels(method="to_arrow", status="success").inc() + + assert ( + offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + == before + 1 + ) + + def test_request_total_increments_on_error(self): + before = offline_store_request_total.labels( + method="to_arrow", status="error" + )._value.get() + + offline_store_request_total.labels(method="to_arrow", status="error").inc() + + assert ( + offline_store_request_total.labels( + method="to_arrow", status="error" + )._value.get() + == before + 1 + ) + + def test_latency_histogram_records(self): + before_sum = offline_store_request_latency_seconds.labels( + method="to_arrow" + )._sum.get() + + offline_store_request_latency_seconds.labels(method="to_arrow").observe(2.5) + + after_sum = offline_store_request_latency_seconds.labels( + method="to_arrow" + )._sum.get() + assert pytest.approx(after_sum - before_sum, abs=0.01) == 2.5 + + def test_row_count_histogram_records(self): + before_sum = offline_store_row_count.labels(method="to_arrow")._sum.get() + + offline_store_row_count.labels(method="to_arrow").observe(1000) + + after_sum = offline_store_row_count.labels(method="to_arrow")._sum.get() + assert pytest.approx(after_sum - before_sum, abs=1) == 1000 + + def test_different_methods_tracked_independently(self): + before_a = offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + before_b = offline_store_request_total.labels( + method="other", status="success" + )._value.get() + + offline_store_request_total.labels(method="to_arrow", status="success").inc() + + assert ( + offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + == before_a + 1 + ) + assert ( + offline_store_request_total.labels( + method="other", status="success" + )._value.get() + == before_b + ) + + +class TestEmitAuditLogs: + """Tests for structured JSON audit log emission.""" + + def test_emit_online_audit_log_writes_json(self): + import json + import logging + + _audit_logger = logging.getLogger("feast.audit") + with patch.object(_audit_logger, "info") as mock_info: + emit_online_audit_log( + requestor_id="user@example.com", + entity_keys=["driver_id", "customer_id"], + entity_count=10, + feature_views=["driver_fv", "order_fv"], + feature_count=5, + status="success", + latency_ms=42.0, + ) + + mock_info.assert_called_once() + logged_json = mock_info.call_args[0][0] + record = json.loads(logged_json) + + assert record["event"] == "online_feature_request" + assert record["requestor_id"] == "user@example.com" + assert record["entity_keys"] == ["driver_id", "customer_id"] + assert record["entity_count"] == 10 + assert record["feature_views"] == ["driver_fv", "order_fv"] + assert record["feature_count"] == 5 + assert record["status"] == "success" + assert record["latency_ms"] == pytest.approx(42.0) + assert "timestamp" in record + + def test_emit_online_audit_log_noop_when_disabled(self): + import logging + + import feast.metrics as m + + m._config = m._MetricsFlags(enabled=True, audit_logging=False) + _audit_logger = logging.getLogger("feast.audit") + with patch.object(_audit_logger, "info") as mock_info: + emit_online_audit_log( + requestor_id="user@example.com", + entity_keys=["driver_id"], + entity_count=1, + feature_views=["driver_fv"], + feature_count=1, + status="success", + latency_ms=10.0, + ) + mock_info.assert_not_called() + + def test_emit_offline_audit_log_writes_json(self): + import json + import logging + + _audit_logger = logging.getLogger("feast.audit") + with patch.object(_audit_logger, "info") as mock_info: + emit_offline_audit_log( + method="to_arrow", + feature_views=["driver_fv"], + feature_count=3, + row_count=500, + status="success", + start_time="2026-04-27T12:00:00+00:00", + end_time="2026-04-27T12:00:01+00:00", + duration_ms=1230.0, + ) + + mock_info.assert_called_once() + logged_json = mock_info.call_args[0][0] + record = json.loads(logged_json) + + assert record["event"] == "offline_feature_retrieval" + assert record["method"] == "to_arrow" + assert record["feature_views"] == ["driver_fv"] + assert record["feature_count"] == 3 + assert record["row_count"] == 500 + assert record["status"] == "success" + assert record["duration_ms"] == pytest.approx(1230.0) + assert record["start_time"] == "2026-04-27T12:00:00+00:00" + assert record["end_time"] == "2026-04-27T12:00:01+00:00" + + def test_emit_offline_audit_log_noop_when_disabled(self): + import logging + + import feast.metrics as m + + m._config = m._MetricsFlags(enabled=True, audit_logging=False) + _audit_logger = logging.getLogger("feast.audit") + with patch.object(_audit_logger, "info") as mock_info: + emit_offline_audit_log( + method="to_arrow", + feature_views=["fv"], + feature_count=1, + row_count=10, + status="success", + start_time="t0", + end_time="t1", + duration_ms=500.0, + ) + mock_info.assert_not_called() + + def test_emit_online_audit_log_with_error_status(self): + import json + import logging + + _audit_logger = logging.getLogger("feast.audit") + with patch.object(_audit_logger, "info") as mock_info: + emit_online_audit_log( + requestor_id="unknown", + entity_keys=[], + entity_count=0, + feature_views=[], + feature_count=0, + status="error", + latency_ms=1.0, + ) + + record = json.loads(mock_info.call_args[0][0]) + assert record["status"] == "error" + + +class TestBuildMetricsFlagsOfflineAndAudit: + """Tests for the new offline_features and audit_logging flags.""" + + def test_no_config_defaults_for_new_flags(self): + from feast.metrics import build_metrics_flags + + flags = build_metrics_flags(None) + assert flags.offline_features is True + assert flags.audit_logging is False + + def test_explicit_enable(self): + from types import SimpleNamespace + + from feast.metrics import build_metrics_flags + + mc = SimpleNamespace( + enabled=True, + resource=True, + request=True, + online_features=True, + push=True, + materialization=True, + freshness=True, + offline_features=True, + audit_logging=True, + ) + flags = build_metrics_flags(mc) + assert flags.offline_features is True + assert flags.audit_logging is True + + def test_explicit_disable(self): + from types import SimpleNamespace + + from feast.metrics import build_metrics_flags + + mc = SimpleNamespace( + enabled=True, + resource=True, + request=True, + online_features=True, + push=True, + materialization=True, + freshness=True, + offline_features=False, + audit_logging=False, + ) + flags = build_metrics_flags(mc) + assert flags.offline_features is False + assert flags.audit_logging is False + + def test_missing_new_attrs_fall_back_to_defaults(self): + from types import SimpleNamespace + + from feast.metrics import build_metrics_flags + + mc = SimpleNamespace( + enabled=True, + resource=True, + request=True, + online_features=True, + push=True, + materialization=True, + freshness=True, + ) + flags = build_metrics_flags(mc) + assert flags.offline_features is True + assert flags.audit_logging is False + + +class TestExtractRetrievalMetadata: + """Tests for _extract_retrieval_metadata helper.""" + + def test_extracts_feature_views_and_count(self): + from feast.infra.offline_stores.offline_store import ( + RetrievalMetadata, + _extract_retrieval_metadata, + ) + + job = MagicMock() + job.metadata = RetrievalMetadata( + features=[ + "driver_fv:conv_rate", + "driver_fv:acc_rate", + "vehicle_fv:mileage", + ], + keys=["driver_id"], + ) + + fv_names, feat_count = _extract_retrieval_metadata(job) + assert feat_count == 3 + assert set(fv_names) == {"driver_fv", "vehicle_fv"} + + def test_returns_empty_when_no_metadata(self): + from feast.infra.offline_stores.offline_store import ( + _extract_retrieval_metadata, + ) + + job = MagicMock() + job.metadata = None + + fv_names, feat_count = _extract_retrieval_metadata(job) + assert fv_names == [] + assert feat_count == 0 + + def test_handles_not_implemented_metadata(self): + from feast.infra.offline_stores.offline_store import ( + _extract_retrieval_metadata, + ) + + job = MagicMock() + type(job).metadata = property( + lambda self: (_ for _ in ()).throw(NotImplementedError()) + ) + + fv_names, feat_count = _extract_retrieval_metadata(job) + assert fv_names == [] + assert feat_count == 0 + + +class TestRetrievalJobToArrowInstrumentation: + """Tests for the metrics/audit instrumentation in RetrievalJob.to_arrow().""" + + def _make_job( + self, table, on_demand_fvs=None, metadata=None, raise_on_internal=None + ): + """Create a concrete RetrievalJob subclass for testing.""" + from feast.infra.offline_stores.offline_store import RetrievalJob + + class _TestJob(RetrievalJob): + def __init__(self): + self._table = table + self._odfvs = on_demand_fvs or [] + self._metadata = metadata + self._raise = raise_on_internal + + def _to_arrow_internal(self, timeout=None): + if self._raise: + raise self._raise + return self._table + + @property + def full_feature_names(self): + return False + + @property + def on_demand_feature_views(self): + return self._odfvs + + @property + def metadata(self): + return self._metadata + + return _TestJob() + + def test_success_increments_counter_and_records_latency(self): + import pyarrow as pa + + table = pa.table({"col": [1, 2, 3]}) + job = self._make_job(table) + + before_count = offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + before_latency = offline_store_request_latency_seconds.labels( + method="to_arrow" + )._sum.get() + + result = job.to_arrow() + + assert result.num_rows == 3 + assert ( + offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + == before_count + 1 + ) + assert ( + offline_store_request_latency_seconds.labels(method="to_arrow")._sum.get() + > before_latency + ) + + def test_error_increments_error_counter(self): + job = self._make_job(None, raise_on_internal=RuntimeError("query failed")) + + before_error = offline_store_request_total.labels( + method="to_arrow", status="error" + )._value.get() + + with pytest.raises(RuntimeError, match="query failed"): + job.to_arrow() + + assert ( + offline_store_request_total.labels( + method="to_arrow", status="error" + )._value.get() + == before_error + 1 + ) + + def test_row_count_recorded_on_success(self): + import pyarrow as pa + + table = pa.table({"a": list(range(500))}) + job = self._make_job(table) + + before_sum = offline_store_row_count.labels(method="to_arrow")._sum.get() + + job.to_arrow() + + assert ( + offline_store_row_count.labels(method="to_arrow")._sum.get() + >= before_sum + 500 + ) + + def test_row_count_not_recorded_when_zero(self): + import pyarrow as pa + + table = pa.table({"a": pa.array([], type=pa.int64())}) + job = self._make_job(table) + + before_count = offline_store_row_count.labels(method="to_arrow")._sum.get() + + job.to_arrow() + + assert ( + offline_store_row_count.labels(method="to_arrow")._sum.get() == before_count + ) + + def test_metrics_skipped_when_offline_features_disabled(self): + import pyarrow as pa + + import feast.metrics as m + + m._config = m._MetricsFlags( + enabled=True, offline_features=False, audit_logging=False + ) + + table = pa.table({"col": [1, 2]}) + job = self._make_job(table) + + before_count = offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + + job.to_arrow() + + assert ( + offline_store_request_total.labels( + method="to_arrow", status="success" + )._value.get() + == before_count + ) + + def test_audit_log_emitted_on_success(self): + import pyarrow as pa + + from feast.infra.offline_stores.offline_store import RetrievalMetadata + + meta = RetrievalMetadata( + features=["driver_fv:conv_rate", "driver_fv:acc_rate"], + keys=["driver_id"], + ) + table = pa.table({"col": [1, 2, 3]}) + job = self._make_job(table, metadata=meta) + + with patch("feast.metrics.emit_offline_audit_log") as mock_audit: + job.to_arrow() + + mock_audit.assert_called_once() + call_kwargs = mock_audit.call_args[1] + assert call_kwargs["method"] == "to_arrow" + assert call_kwargs["status"] == "success" + assert call_kwargs["row_count"] == 3 + assert call_kwargs["feature_count"] == 2 + assert set(call_kwargs["feature_views"]) == {"driver_fv"} + + def test_audit_log_skipped_when_disabled(self): + import pyarrow as pa + + import feast.metrics as m + + m._config = m._MetricsFlags( + enabled=True, offline_features=True, audit_logging=False + ) + + table = pa.table({"col": [1]}) + job = self._make_job(table) + + with patch("feast.metrics.emit_offline_audit_log") as mock_audit: + job.to_arrow() + mock_audit.assert_not_called() + + def test_instrumentation_failure_does_not_mask_query_error(self): + """If metrics code itself throws, the original query error still propagates.""" + import pyarrow as pa + + table = pa.table({"col": [1]}) + job = self._make_job(table) + + with patch( + "feast.metrics._config", + new_callable=lambda: property( + lambda self: (_ for _ in ()).throw(RuntimeError("metrics broken")) + ), + ): + result = job.to_arrow() + assert result.num_rows == 1 + + +class TestParseFeatureInfo: + """Tests for _parse_feature_info in feature_server.""" + + def test_feature_ref_list(self): + from feast.feature_server import _parse_feature_info + + refs = ["driver_fv:conv_rate", "driver_fv:acc_rate", "vehicle_fv:mileage"] + fv_names, feat_count = _parse_feature_info(refs) + assert feat_count == 3 + assert set(fv_names) == {"driver_fv", "vehicle_fv"} + + def test_empty_list(self): + from feast.feature_server import _parse_feature_info + + fv_names, feat_count = _parse_feature_info([]) + assert fv_names == [] + assert feat_count == 0 + + def test_feature_service(self): + from feast.feature_server import _parse_feature_info + + proj1 = MagicMock() + proj1.name = "driver_fv" + proj1.features = [MagicMock(), MagicMock()] + proj2 = MagicMock() + proj2.name = "order_fv" + proj2.features = [MagicMock()] + + fs_svc = MagicMock() + fs_svc.feature_view_projections = [proj1, proj2] + + from feast.feature_service import FeatureService + + fs_svc.__class__ = FeatureService + + fv_names, feat_count = _parse_feature_info(fs_svc) + assert feat_count == 3 + assert fv_names == ["driver_fv", "order_fv"] + + def test_strips_version_suffix(self): + from feast.feature_server import _parse_feature_info + + refs = ["driver_fv@v2:conv_rate"] + fv_names, feat_count = _parse_feature_info(refs) + assert feat_count == 1 + assert fv_names == ["driver_fv"] + + +class TestEmitOnlineAudit: + """Tests for the _emit_online_audit helper in feature_server.""" + + def test_emits_audit_log_with_anonymous_user(self): + from feast.feature_server import GetOnlineFeaturesRequest, _emit_online_audit + + request = GetOnlineFeaturesRequest( + entities={"driver_id": [1, 2]}, + features=["driver_fv:conv_rate"], + ) + + with ( + patch("feast.feature_server.feast_metrics") as mock_metrics, + patch( + "feast.permissions.security_manager.get_security_manager", + return_value=None, + ), + ): + _emit_online_audit( + request=request, + features=request.features, + entity_count=2, + status="success", + latency_ms=15.0, + ) + + mock_metrics.emit_online_audit_log.assert_called_once() + kwargs = mock_metrics.emit_online_audit_log.call_args[1] + assert kwargs["requestor_id"] == "anonymous" + assert kwargs["entity_keys"] == ["driver_id"] + assert kwargs["entity_count"] == 2 + assert kwargs["status"] == "success" + + def test_emits_audit_log_with_authenticated_user(self): + from feast.feature_server import GetOnlineFeaturesRequest, _emit_online_audit + + request = GetOnlineFeaturesRequest( + entities={"driver_id": [1]}, + features=["driver_fv:conv_rate"], + ) + + mock_sm = MagicMock() + mock_sm.current_user.username = "jdoe" + + with ( + patch("feast.feature_server.feast_metrics") as mock_metrics, + patch( + "feast.permissions.security_manager.get_security_manager", + return_value=mock_sm, + ), + ): + _emit_online_audit( + request=request, + features=request.features, + entity_count=1, + status="success", + latency_ms=10.0, + ) + + kwargs = mock_metrics.emit_online_audit_log.call_args[1] + assert kwargs["requestor_id"] == "jdoe" + + def test_does_not_raise_on_failure(self): + from feast.feature_server import GetOnlineFeaturesRequest, _emit_online_audit + + request = GetOnlineFeaturesRequest( + entities={"driver_id": [1]}, + features=["driver_fv:conv_rate"], + ) + + with patch( + "feast.permissions.security_manager.get_security_manager", + side_effect=RuntimeError("auth broken"), + ): + _emit_online_audit( + request=request, + features=request.features, + entity_count=1, + status="error", + latency_ms=5.0, + ) From 7050f541489d2ee7a3c046f483fa739d60ca919b Mon Sep 17 00:00:00 2001 From: Jitendra Yejare <11752425+jyejare@users.noreply.github.com> Date: Mon, 11 May 2026 14:29:18 +0530 Subject: [PATCH 02/12] fix: Resolve comments from review Signed-off-by: Jitendra Yejare <11752425+jyejare@users.noreply.github.com> --- .../feature-servers/python-feature-server.md | 72 ++++++++++++++++++- .../samples/v1_featurestore_serving.yaml | 4 +- sdk/python/feast/feature_server.py | 5 +- .../infra/offline_stores/offline_store.py | 16 ++--- sdk/python/feast/metrics.py | 1 + sdk/python/tests/unit/test_metrics.py | 10 +-- 6 files changed, 89 insertions(+), 19 deletions(-) diff --git a/docs/reference/feature-servers/python-feature-server.md b/docs/reference/feature-servers/python-feature-server.md index 654c4b9f938..4802599866d 100644 --- a/docs/reference/feature-servers/python-feature-server.md +++ b/docs/reference/feature-servers/python-feature-server.md @@ -352,11 +352,14 @@ feature_server: push: true # push request counters materialization: true # materialization counters & duration freshness: true # feature freshness gauges + offline_features: true # offline store retrieval counters & latency + audit_logging: false # structured JSON audit logs (see below) ``` Any category set to `false` will emit no metrics and start no background threads (e.g., setting `freshness: false` prevents the registry polling -thread from starting). All categories default to `true`. +thread from starting). All categories default to `true` except +`audit_logging`, which defaults to `false`. ### Available metrics @@ -375,6 +378,9 @@ thread from starting). All categories default to `true`. | `feast_materialization_result_total` | Counter | `feature_view`, `status` | `materialization` | Materialization runs (success/failure) | | `feast_materialization_duration_seconds` | Histogram | `feature_view` | `materialization` | Materialization duration per feature view | | `feast_feature_freshness_seconds` | Gauge | `feature_view`, `project` | `freshness` | Seconds since last materialization | +| `feast_offline_store_request_total` | Counter | `method`, `status` | `offline_features` | Total offline store retrieval requests | +| `feast_offline_store_request_latency_seconds` | Histogram | `method` | `offline_features` | Latency of offline store retrieval operations | +| `feast_offline_store_row_count` | Histogram | `method` | `offline_features` | Rows returned by offline store retrieval | ### Per-ODFV transformation metrics @@ -405,6 +411,70 @@ The `odfv_name` label lets you filter or group by individual ODFV, and the `mode` label (`python`, `pandas`, `substrait`) lets you compare transformation engines. +### Audit logging + +Feast can emit structured JSON audit log entries for every online and offline +feature retrieval. These are written via the standard `feast.audit` Python +logger, so you can route them to a dedicated file, SIEM, or log aggregator +independently of application logs. + +Audit logging is **disabled by default**. Enable it in `feature_store.yaml`: + +```yaml +feature_server: + type: local + metrics: + enabled: true + audit_logging: true +``` + +**Online audit log** (emitted per `/get-online-features` call): + +```json +{ + "event": "online_feature_request", + "timestamp": "2026-05-11T08:30:00.123456+00:00", + "requestor_id": "user@example.com", + "entity_keys": ["driver_id"], + "entity_count": 3, + "feature_views": ["driver_hourly_stats"], + "feature_count": 3, + "status": "success", + "latency_ms": 12.34 +} +``` + +**Offline audit log** (emitted per `RetrievalJob.to_arrow()` call): + +```json +{ + "event": "offline_feature_retrieval", + "timestamp": "2026-05-11T08:31:00.456789+00:00", + "method": "to_arrow", + "start_time": "2026-05-11T08:30:59.226789+00:00", + "end_time": "2026-05-11T08:31:00.456789+00:00", + "feature_views": ["driver_hourly_stats"], + "feature_count": 3, + "row_count": 500, + "status": "success", + "duration_ms": 1230.0 +} +``` + +The `requestor_id` field in online audit logs is populated from the +security manager's current user when authentication is configured, and +falls back to `"anonymous"` otherwise. + +To route audit logs to a separate file: + +```python +import logging + +handler = logging.FileHandler("/var/log/feast/audit.log") +handler.setFormatter(logging.Formatter("%(message)s")) +logging.getLogger("feast.audit").addHandler(handler) +``` + ### Scraping with Prometheus ```yaml diff --git a/infra/feast-operator/config/samples/v1_featurestore_serving.yaml b/infra/feast-operator/config/samples/v1_featurestore_serving.yaml index f60640624c9..412499412e6 100644 --- a/infra/feast-operator/config/samples/v1_featurestore_serving.yaml +++ b/infra/feast-operator/config/samples/v1_featurestore_serving.yaml @@ -26,8 +26,8 @@ spec: push: true # push/write request counters materialization: true # materialization counters and duration histograms freshness: false # feature freshness gauges (can be expensive at scale) - # Example: when a future SDK adds "registry_sync", enable it here - # registry_sync: false + offline_features: true # offline store retrieval counters, latency, row count + audit_logging: false # structured JSON audit logs via the feast.audit logger offlinePushBatching: enabled: true batchSize: 1000 # max rows per offline write batch diff --git a/sdk/python/feast/feature_server.py b/sdk/python/feast/feature_server.py index 1daf59fa2ec..58cd06f49e2 100644 --- a/sdk/python/feast/feature_server.py +++ b/sdk/python/feast/feature_server.py @@ -161,6 +161,7 @@ def _parse_feature_info( view name strings and feat_count is the total number of features. """ from feast.feature_service import FeatureService + from feast.utils import _parse_feature_ref if isinstance(features, FeatureService): projections = features.feature_view_projections @@ -168,9 +169,7 @@ def _parse_feature_info( feat_count = sum(len(p.features) for p in projections) elif isinstance(features, list): feat_count = len(features) - fv_names = list( - {ref.split(":")[0].split("@")[0] for ref in features if ":" in ref} - ) + fv_names = list({_parse_feature_ref(ref)[0] for ref in features if ":" in ref}) else: fv_names = [] feat_count = 0 diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py index 9d9fee22623..2803b15526e 100644 --- a/sdk/python/feast/infra/offline_stores/offline_store.py +++ b/sdk/python/feast/infra/offline_stores/offline_store.py @@ -15,7 +15,7 @@ import time import warnings from abc import ABC -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone from pathlib import Path from typing import ( TYPE_CHECKING, @@ -191,22 +191,22 @@ def to_arrow( feast_metrics.offline_store_request_latency_seconds.labels( method="to_arrow" ).observe(elapsed) - if row_count > 0: - feast_metrics.offline_store_row_count.labels( - method="to_arrow" - ).observe(row_count) + feast_metrics.offline_store_row_count.labels( + method="to_arrow" + ).observe(row_count) if feast_metrics._config.audit_logging: feature_views, feature_count = _extract_retrieval_metadata(self) - now_iso = datetime.now(tz=timezone.utc).isoformat() + end_dt = datetime.now(tz=timezone.utc) + start_dt = end_dt - timedelta(seconds=elapsed) feast_metrics.emit_offline_audit_log( method="to_arrow", feature_views=feature_views, feature_count=feature_count, row_count=row_count, status=status_label, - start_time=now_iso, - end_time=now_iso, + start_time=start_dt.isoformat(), + end_time=end_dt.isoformat(), duration_ms=elapsed * 1000, ) except Exception: diff --git a/sdk/python/feast/metrics.py b/sdk/python/feast/metrics.py index f827f6e31ee..13a855d587b 100644 --- a/sdk/python/feast/metrics.py +++ b/sdk/python/feast/metrics.py @@ -470,6 +470,7 @@ def emit_offline_audit_log( _json_dumps( { "event": "offline_feature_retrieval", + "timestamp": datetime.now(tz=timezone.utc).isoformat(), "method": method, "start_time": start_time, "end_time": end_time, diff --git a/sdk/python/tests/unit/test_metrics.py b/sdk/python/tests/unit/test_metrics.py index 2750757f67a..abf2a35e389 100644 --- a/sdk/python/tests/unit/test_metrics.py +++ b/sdk/python/tests/unit/test_metrics.py @@ -1246,6 +1246,7 @@ def test_emit_offline_audit_log_writes_json(self): record = json.loads(logged_json) assert record["event"] == "offline_feature_retrieval" + assert "timestamp" in record assert record["method"] == "to_arrow" assert record["feature_views"] == ["driver_fv"] assert record["feature_count"] == 3 @@ -1508,19 +1509,18 @@ def test_row_count_recorded_on_success(self): >= before_sum + 500 ) - def test_row_count_not_recorded_when_zero(self): + def test_row_count_recorded_when_zero(self): import pyarrow as pa table = pa.table({"a": pa.array([], type=pa.int64())}) job = self._make_job(table) - before_count = offline_store_row_count.labels(method="to_arrow")._sum.get() + hist = offline_store_row_count.labels(method="to_arrow") + before_bucket = hist._buckets[0].get() job.to_arrow() - assert ( - offline_store_row_count.labels(method="to_arrow")._sum.get() == before_count - ) + assert hist._buckets[0].get() == before_bucket + 1 def test_metrics_skipped_when_offline_features_disabled(self): import pyarrow as pa From b28265e4f260312f3df4d78b3987cbdb1988e3a9 Mon Sep 17 00:00:00 2001 From: Jitendra Yejare <11752425+jyejare@users.noreply.github.com> Date: Tue, 31 Mar 2026 15:56:33 +0530 Subject: [PATCH 03/12] feat: Statistical/Distribution metrics in Feast Signed-off-by: Jitendra Yejare <11752425+jyejare@users.noreply.github.com> --- .../feast/api/registry/rest/__init__.py | 2 + .../feast/api/registry/rest/monitoring.py | 300 ++++ sdk/python/feast/cli/cli.py | 2 + sdk/python/feast/cli/monitor.py | 122 ++ .../postgres_offline_store/postgres.py | 647 ++++++++- .../infra/offline_stores/offline_store.py | 136 +- sdk/python/feast/monitoring/__init__.py | 7 + .../feast/monitoring/dqm_job_manager.py | 202 +++ .../feast/monitoring/metrics_calculator.py | 158 +++ .../feast/monitoring/monitoring_service.py | 912 +++++++++++++ .../feast/monitoring/monitoring_store.py | 415 ++++++ sdk/python/feast/repo_operations.py | 1216 +++++++++-------- .../tests/integration/monitoring/__init__.py | 1 + .../monitoring/test_monitoring_integration.py | 804 +++++++++++ sdk/python/tests/unit/monitoring/__init__.py | 1 + .../monitoring/test_metrics_calculator.py | 169 +++ .../unit/monitoring/test_monitoring_store.py | 191 +++ 17 files changed, 4689 insertions(+), 596 deletions(-) create mode 100644 sdk/python/feast/api/registry/rest/monitoring.py create mode 100644 sdk/python/feast/cli/monitor.py create mode 100644 sdk/python/feast/monitoring/__init__.py create mode 100644 sdk/python/feast/monitoring/dqm_job_manager.py create mode 100644 sdk/python/feast/monitoring/metrics_calculator.py create mode 100644 sdk/python/feast/monitoring/monitoring_service.py create mode 100644 sdk/python/feast/monitoring/monitoring_store.py create mode 100644 sdk/python/tests/integration/monitoring/__init__.py create mode 100644 sdk/python/tests/integration/monitoring/test_monitoring_integration.py create mode 100644 sdk/python/tests/unit/monitoring/__init__.py create mode 100644 sdk/python/tests/unit/monitoring/test_metrics_calculator.py create mode 100644 sdk/python/tests/unit/monitoring/test_monitoring_store.py diff --git a/sdk/python/feast/api/registry/rest/__init__.py b/sdk/python/feast/api/registry/rest/__init__.py index 14db40d7af6..6cc5a99934a 100644 --- a/sdk/python/feast/api/registry/rest/__init__.py +++ b/sdk/python/feast/api/registry/rest/__init__.py @@ -7,6 +7,7 @@ from feast.api.registry.rest.features import get_feature_router from feast.api.registry.rest.lineage import get_lineage_router from feast.api.registry.rest.metrics import get_metrics_router +from feast.api.registry.rest.monitoring import get_monitoring_router from feast.api.registry.rest.permissions import get_permission_router from feast.api.registry.rest.projects import get_project_router from feast.api.registry.rest.saved_datasets import get_saved_dataset_router @@ -25,3 +26,4 @@ def register_all_routes(app: FastAPI, grpc_handler, server=None): app.include_router(get_saved_dataset_router(grpc_handler)) app.include_router(get_search_router(grpc_handler)) app.include_router(get_metrics_router(grpc_handler, server)) + app.include_router(get_monitoring_router(grpc_handler, server)) diff --git a/sdk/python/feast/api/registry/rest/monitoring.py b/sdk/python/feast/api/registry/rest/monitoring.py new file mode 100644 index 00000000000..001270df0b6 --- /dev/null +++ b/sdk/python/feast/api/registry/rest/monitoring.py @@ -0,0 +1,300 @@ +import logging +from datetime import date +from typing import List, Optional + +from fastapi import APIRouter, HTTPException, Query +from pydantic import BaseModel, Field + +from feast.infra.offline_stores.offline_store import OfflineStore +from feast.permissions.action import AuthzedAction +from feast.permissions.security_manager import assert_permissions + +VALID_GRANULARITIES = OfflineStore.MONITORING_VALID_GRANULARITIES + + +logger = logging.getLogger(__name__) + + +class ComputeMetricsRequest(BaseModel): + project: str + feature_view_name: Optional[str] = None + feature_names: Optional[List[str]] = None + start_date: Optional[str] = None + end_date: Optional[str] = None + granularity: str = Field("daily") + set_baseline: bool = False + + +class AutoComputeRequest(BaseModel): + project: str + feature_view_name: Optional[str] = None + + +class ComputeTransientRequest(BaseModel): + project: str + feature_view_name: str + feature_names: Optional[List[str]] = None + start_date: Optional[str] = None + end_date: Optional[str] = None + + +def get_monitoring_router(grpc_handler, server=None): + router = APIRouter() + + def _get_monitoring_service(): + from feast.monitoring.monitoring_service import MonitoringService + + store = server.store if server else grpc_handler.store + return MonitoringService(store) + + def _get_store(): + return server.store if server else grpc_handler.store + + # ------------------------------------------------------------------ # + # DQM Job: submit and track + # ------------------------------------------------------------------ # + + @router.post("/monitoring/compute", tags=["Monitoring"]) + async def compute_metrics(request: ComputeMetricsRequest): + """Submit a DQM job to compute and store metrics. Returns job_id.""" + if request.granularity not in VALID_GRANULARITIES: + raise HTTPException( + status_code=400, + detail=f"Invalid granularity '{request.granularity}'. " + f"Must be one of {VALID_GRANULARITIES}", + ) + + store = _get_store() + if request.feature_view_name: + fv = store.registry.get_feature_view( + name=request.feature_view_name, project=request.project + ) + assert_permissions(fv, actions=[AuthzedAction.UPDATE]) + + svc = _get_monitoring_service() + + params = {} + if request.start_date: + params["start_date"] = request.start_date + if request.end_date: + params["end_date"] = request.end_date + if request.feature_names: + params["feature_names"] = request.feature_names + params["granularity"] = request.granularity + params["set_baseline"] = request.set_baseline + + job_id = svc.submit_job( + project=request.project, + job_type="compute", + feature_view_name=request.feature_view_name, + parameters=params, + ) + + # Execute synchronously for now; async worker is a future enhancement + try: + result = svc.execute_job(job_id) + return {"job_id": job_id, **result} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @router.post("/monitoring/auto_compute", tags=["Monitoring"]) + async def auto_compute(request: AutoComputeRequest): + """Auto-detect date ranges and compute all granularities.""" + store = _get_store() + if request.feature_view_name: + fv = store.registry.get_feature_view( + name=request.feature_view_name, project=request.project + ) + assert_permissions(fv, actions=[AuthzedAction.UPDATE]) + + svc = _get_monitoring_service() + + job_id = svc.submit_job( + project=request.project, + job_type="auto_compute", + feature_view_name=request.feature_view_name, + ) + + try: + result = svc.execute_job(job_id) + return {"job_id": job_id, **result} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @router.get("/monitoring/jobs/{job_id}", tags=["Monitoring"]) + async def get_job_status(job_id: str): + svc = _get_monitoring_service() + job = svc.get_job(job_id) + if job is None: + raise HTTPException(status_code=404, detail=f"Job '{job_id}' not found") + return job + + # ------------------------------------------------------------------ # + # Transient compute (not stored) + # ------------------------------------------------------------------ # + + @router.post("/monitoring/compute/transient", tags=["Monitoring"]) + async def compute_transient(request: ComputeTransientRequest): + """Compute metrics on-the-fly for an arbitrary date range. Results are + returned directly and NOT persisted to the monitoring tables.""" + store = _get_store() + fv = store.registry.get_feature_view( + name=request.feature_view_name, project=request.project + ) + assert_permissions(fv, actions=[AuthzedAction.DESCRIBE]) + + svc = _get_monitoring_service() + + start_d = date.fromisoformat(request.start_date) if request.start_date else None + end_d = date.fromisoformat(request.end_date) if request.end_date else None + + result = svc.compute_transient( + project=request.project, + feature_view_name=request.feature_view_name, + feature_names=request.feature_names, + start_date=start_d, + end_date=end_d, + ) + return result + + # ------------------------------------------------------------------ # + # Read endpoints + # ------------------------------------------------------------------ # + + @router.get("/monitoring/metrics/features", tags=["Monitoring"]) + async def get_feature_metrics( + project: str = Query(...), + feature_view_name: Optional[str] = Query(None), + feature_name: Optional[str] = Query(None), + feature_service_name: Optional[str] = Query(None), + granularity: Optional[str] = Query(None), + data_source_type: Optional[str] = Query(None), + start_date: Optional[str] = Query(None), + end_date: Optional[str] = Query(None), + ): + store = _get_store() + if feature_view_name: + fv = store.registry.get_feature_view( + name=feature_view_name, project=project + ) + assert_permissions(fv, actions=[AuthzedAction.DESCRIBE]) + + svc = _get_monitoring_service() + return svc.get_feature_metrics( + project=project, + feature_service_name=feature_service_name, + feature_view_name=feature_view_name, + feature_name=feature_name, + granularity=granularity, + data_source_type=data_source_type, + start_date=date.fromisoformat(start_date) if start_date else None, + end_date=date.fromisoformat(end_date) if end_date else None, + ) + + @router.get("/monitoring/metrics/feature_views", tags=["Monitoring"]) + async def get_feature_view_metrics( + project: str = Query(...), + feature_view_name: Optional[str] = Query(None), + feature_service_name: Optional[str] = Query(None), + granularity: Optional[str] = Query(None), + data_source_type: Optional[str] = Query(None), + start_date: Optional[str] = Query(None), + end_date: Optional[str] = Query(None), + ): + store = _get_store() + if feature_view_name: + fv = store.registry.get_feature_view( + name=feature_view_name, project=project + ) + assert_permissions(fv, actions=[AuthzedAction.DESCRIBE]) + + svc = _get_monitoring_service() + return svc.get_feature_view_metrics( + project=project, + feature_service_name=feature_service_name, + feature_view_name=feature_view_name, + granularity=granularity, + data_source_type=data_source_type, + start_date=date.fromisoformat(start_date) if start_date else None, + end_date=date.fromisoformat(end_date) if end_date else None, + ) + + @router.get("/monitoring/metrics/feature_services", tags=["Monitoring"]) + async def get_feature_service_metrics( + project: str = Query(...), + feature_service_name: Optional[str] = Query(None), + granularity: Optional[str] = Query(None), + data_source_type: Optional[str] = Query(None), + start_date: Optional[str] = Query(None), + end_date: Optional[str] = Query(None), + ): + store = _get_store() + if feature_service_name: + fs = store.registry.get_feature_service( + name=feature_service_name, project=project + ) + assert_permissions(fs, actions=[AuthzedAction.DESCRIBE]) + + svc = _get_monitoring_service() + return svc.get_feature_service_metrics( + project=project, + feature_service_name=feature_service_name, + granularity=granularity, + data_source_type=data_source_type, + start_date=date.fromisoformat(start_date) if start_date else None, + end_date=date.fromisoformat(end_date) if end_date else None, + ) + + @router.get("/monitoring/metrics/baseline", tags=["Monitoring"]) + async def get_baseline( + project: str = Query(...), + feature_view_name: Optional[str] = Query(None), + feature_name: Optional[str] = Query(None), + data_source_type: Optional[str] = Query(None), + ): + store = _get_store() + if feature_view_name: + fv = store.registry.get_feature_view( + name=feature_view_name, project=project + ) + assert_permissions(fv, actions=[AuthzedAction.DESCRIBE]) + + svc = _get_monitoring_service() + return svc.get_baseline( + project=project, + feature_view_name=feature_view_name, + feature_name=feature_name, + data_source_type=data_source_type, + ) + + @router.get("/monitoring/metrics/timeseries", tags=["Monitoring"]) + async def get_timeseries( + project: str = Query(...), + feature_view_name: Optional[str] = Query(None), + feature_name: Optional[str] = Query(None), + feature_service_name: Optional[str] = Query(None), + granularity: Optional[str] = Query(None), + data_source_type: Optional[str] = Query(None), + start_date: Optional[str] = Query(None), + end_date: Optional[str] = Query(None), + ): + store = _get_store() + if feature_view_name: + fv = store.registry.get_feature_view( + name=feature_view_name, project=project + ) + assert_permissions(fv, actions=[AuthzedAction.DESCRIBE]) + + svc = _get_monitoring_service() + return svc.get_timeseries( + project=project, + feature_view_name=feature_view_name, + feature_name=feature_name, + feature_service_name=feature_service_name, + granularity=granularity, + data_source_type=data_source_type, + start_date=date.fromisoformat(start_date) if start_date else None, + end_date=date.fromisoformat(end_date) if end_date else None, + ) + + return router diff --git a/sdk/python/feast/cli/cli.py b/sdk/python/feast/cli/cli.py index 886c91f69ae..726e215a1b9 100644 --- a/sdk/python/feast/cli/cli.py +++ b/sdk/python/feast/cli/cli.py @@ -35,6 +35,7 @@ get_historical_features, get_online_features, ) +from feast.cli.monitor import monitor_cmd from feast.cli.on_demand_feature_views import on_demand_feature_views_cmd from feast.cli.permissions import feast_permissions_cmd from feast.cli.projects import projects_cmd @@ -650,6 +651,7 @@ def demo_notebooks_command(ctx: click.Context, output_dir: str, overwrite: bool) cli.add_command(serve_registry_command) cli.add_command(serve_transformations_command) cli.add_command(dbt_cmd) +cli.add_command(monitor_cmd) if __name__ == "__main__": cli() diff --git a/sdk/python/feast/cli/monitor.py b/sdk/python/feast/cli/monitor.py new file mode 100644 index 00000000000..d4f0953ff61 --- /dev/null +++ b/sdk/python/feast/cli/monitor.py @@ -0,0 +1,122 @@ +from datetime import date +from typing import List, Optional + +import click + +from feast.infra.offline_stores.offline_store import OfflineStore +from feast.repo_operations import create_feature_store + +VALID_GRANULARITIES = OfflineStore.MONITORING_VALID_GRANULARITIES + + +@click.group(name="monitor") +def monitor_cmd(): + """Feature monitoring commands.""" + pass + + +@monitor_cmd.command("run") +@click.option( + "--project", + "-p", + default=None, + help="Feast project name. Defaults to the project in feature_store.yaml.", +) +@click.option( + "--feature-view", + "-v", + default=None, + help="Feature view name. If omitted, all feature views are computed.", +) +@click.option( + "--feature-name", + "-f", + multiple=True, + help="Feature name(s) to compute. Can be specified multiple times.", +) +@click.option( + "--start-date", + default=None, + help="Start date (YYYY-MM-DD). If omitted, auto-detected from source data.", +) +@click.option( + "--end-date", + default=None, + help="End date (YYYY-MM-DD). If omitted, auto-detected from source data.", +) +@click.option( + "--granularity", + "-g", + default=None, + type=click.Choice(list(VALID_GRANULARITIES)), + help="Metric granularity. If omitted, all granularities are computed (auto mode).", +) +@click.option( + "--set-baseline", + is_flag=True, + default=False, + help="Mark this computation as the baseline for drift detection.", +) +@click.pass_context +def monitor_run( + ctx: click.Context, + project: Optional[str], + feature_view: Optional[str], + feature_name: tuple, + start_date: Optional[str], + end_date: Optional[str], + granularity: Optional[str], + set_baseline: bool, +): + """Compute feature quality metrics. + + Without --start-date/--end-date/--granularity, runs in auto mode: + detects date ranges from source data and computes all granularities. + """ + store = create_feature_store(ctx) + + if project is None: + project = store.project + + from feast.monitoring.monitoring_service import MonitoringService + + svc = MonitoringService(store) + + auto_mode = start_date is None and end_date is None and granularity is None + feat_names: Optional[List[str]] = list(feature_name) if feature_name else None + + if auto_mode and not set_baseline: + click.echo("Auto-computing metrics for all granularities...") + result = svc.auto_compute( + project=project, + feature_view_name=feature_view, + ) + click.echo(f"Status: {result['status']}") + click.echo(f"Feature views computed: {result['computed_feature_views']}") + click.echo(f"Features computed: {result['computed_features']}") + click.echo(f"Granularities: {', '.join(result['granularities'])}") + click.echo(f"Duration: {result['duration_ms']}ms") + else: + start_d = date.fromisoformat(start_date) if start_date else None + end_d = date.fromisoformat(end_date) if end_date else None + + result = svc.compute_metrics( + project=project, + feature_view_name=feature_view, + feature_names=feat_names, + start_date=start_d, + end_date=end_d, + granularity=granularity or "daily", + set_baseline=set_baseline, + ) + + click.echo(f"Status: {result['status']}") + click.echo(f"Granularity: {result['granularity']}") + click.echo(f"Features computed: {result['computed_features']}") + click.echo(f"Feature views computed: {result['computed_feature_views']}") + click.echo(f"Feature services computed: {result['computed_feature_services']}") + click.echo(f"Metric dates: {', '.join(result['metric_dates'])}") + click.echo(f"Duration: {result['duration_ms']}ms") + + if set_baseline: + click.echo("Baseline: SET") diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py index 50e48208647..03b77fa0e0e 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py @@ -1,6 +1,6 @@ import contextlib from dataclasses import asdict -from datetime import datetime, timezone +from datetime import date, datetime, timezone from enum import Enum from typing import ( Any, @@ -289,6 +289,240 @@ def pull_all_from_table_or_query( on_demand_feature_views=None, ) + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) + assert isinstance(data_source, PostgreSQLSource) + + from_expression = data_source.get_table_query_string() + ts_filter = get_timestamp_filter_sql( + start_date, + end_date, + timestamp_field, + tz=timezone.utc, + cast_style="timestamptz", + date_time_separator=" ", + ) + + numeric_features = [n for n, t in feature_columns if t == "numeric"] + categorical_features = [n for n, t in feature_columns if t == "categorical"] + results: List[Dict[str, Any]] = [] + + with _get_conn(config.offline_store) as conn: + conn.read_only = True + + if numeric_features: + results.extend( + _sql_numeric_stats( + conn, + from_expression, + numeric_features, + ts_filter, + histogram_bins, + ) + ) + + for col_name in categorical_features: + results.append( + _sql_categorical_stats( + conn, + from_expression, + col_name, + ts_filter, + top_n, + ) + ) + + return results + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) + assert isinstance(data_source, PostgreSQLSource) + + from_expression = data_source.get_table_query_string() + + with _get_conn(config.offline_store) as conn: + conn.read_only = True + with conn.cursor() as cur: + cur.execute(f'SELECT MAX("{timestamp_field}") FROM {from_expression}') + row = cur.fetchone() + + if row is None or row[0] is None: + return None + val = row[0] + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc) + + # ------------------------------------------------------------------ # + # Monitoring metrics storage (native PostgreSQL) + # ------------------------------------------------------------------ # + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) + with _get_conn(config.offline_store) as conn, conn.cursor() as cur: + cur.execute(f""" + CREATE TABLE IF NOT EXISTS {_MON_FEATURE_TABLE} ( + project_id VARCHAR(255) NOT NULL, + feature_view_name VARCHAR(255) NOT NULL, + feature_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + feature_type VARCHAR(50) NOT NULL, + row_count BIGINT, + null_count BIGINT, + null_rate DOUBLE PRECISION, + mean DOUBLE PRECISION, + stddev DOUBLE PRECISION, + min_val DOUBLE PRECISION, + max_val DOUBLE PRECISION, + p50 DOUBLE PRECISION, + p75 DOUBLE PRECISION, + p90 DOUBLE PRECISION, + p95 DOUBLE PRECISION, + p99 DOUBLE PRECISION, + histogram JSONB, + PRIMARY KEY (project_id, feature_view_name, feature_name, + metric_date, granularity, data_source_type) + ); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_project + ON {_MON_FEATURE_TABLE} (project_id); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_view + ON {_MON_FEATURE_TABLE} (project_id, feature_view_name); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_date + ON {_MON_FEATURE_TABLE} (metric_date); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_granularity + ON {_MON_FEATURE_TABLE} (granularity); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_baseline + ON {_MON_FEATURE_TABLE} (project_id, feature_view_name, feature_name) + WHERE is_baseline = TRUE; + """) + + cur.execute(f""" + CREATE TABLE IF NOT EXISTS {_MON_VIEW_TABLE} ( + project_id VARCHAR(255) NOT NULL, + feature_view_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + total_row_count BIGINT, + total_features INTEGER, + features_with_nulls INTEGER, + avg_null_rate DOUBLE PRECISION, + max_null_rate DOUBLE PRECISION, + PRIMARY KEY (project_id, feature_view_name, metric_date, + granularity, data_source_type) + ); + """) + + cur.execute(f""" + CREATE TABLE IF NOT EXISTS {_MON_SERVICE_TABLE} ( + project_id VARCHAR(255) NOT NULL, + feature_service_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + total_feature_views INTEGER, + total_features INTEGER, + avg_null_rate DOUBLE PRECISION, + max_null_rate DOUBLE PRECISION, + PRIMARY KEY (project_id, feature_service_name, metric_date, + granularity, data_source_type) + ); + """) + conn.commit() + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) + + table, columns, pk_columns = _mon_table_meta(metric_type) + _mon_upsert(config.offline_store, table, columns, pk_columns, metrics) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional["date"] = None, + end_date: Optional["date"] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) + + _, columns, _ = _mon_table_meta(metric_type) + return _mon_query( + config.offline_store, + metric_type, + columns, + project, + filters, + start_date, + end_date, + ) + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) + + conditions = [sql.SQL("project_id = %s")] + params: list = [project] + + if feature_view_name: + conditions.append(sql.SQL("feature_view_name = %s")) + params.append(feature_view_name) + if feature_name: + conditions.append(sql.SQL("feature_name = %s")) + params.append(feature_name) + if data_source_type: + conditions.append(sql.SQL("data_source_type = %s")) + params.append(data_source_type) + + conditions.append(sql.SQL("is_baseline = TRUE")) + + query = sql.SQL("UPDATE {} SET is_baseline = FALSE WHERE {}").format( + sql.Identifier(_MON_FEATURE_TABLE), + sql.SQL(" AND ").join(conditions), + ) + + with _get_conn(config.offline_store) as conn, conn.cursor() as cur: + cur.execute(query, params) + conn.commit() + class PostgreSQLRetrievalJob(RetrievalJob): def __init__( @@ -782,3 +1016,414 @@ def _get_entity_schema( {% endfor %} {% endif %} """ + + +# ------------------------------------------------------------------ # +# Monitoring SQL push-down helpers +# ------------------------------------------------------------------ # + +_EMPTY_METRIC_TEMPLATE: Dict[str, Any] = { + "feature_type": "numeric", + "row_count": 0, + "null_count": 0, + "null_rate": 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, +} + + +def _sql_numeric_stats( + conn, + from_expression: str, + feature_names: List[str], + ts_filter: str, + histogram_bins: int, +) -> List[Dict[str, Any]]: + """Batch-compute numeric statistics via one SQL query, then histograms.""" + # 11 aggregate columns per feature (non_null, mean..p99) + 1 row_count + select_parts = ["COUNT(*)"] + for col in feature_names: + q = f'"{col}"' + c = f"{q}::float8" + select_parts.extend( + [ + f"COUNT({q})", + f"AVG({c})", + f"STDDEV_SAMP({c})", + f"MIN({c})", + f"MAX({c})", + f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})", + ] + ) + + query = ( + f"SELECT {', '.join(select_parts)} " + f"FROM {from_expression} AS _src WHERE {ts_filter}" + ) + + with conn.cursor() as cur: + cur.execute(query) + row = cur.fetchone() + + if row is None: + return [{**_EMPTY_METRIC_TEMPLATE, "feature_name": n} for n in feature_names] + + row_count = row[0] + results: List[Dict[str, Any]] = [] + + for i, col in enumerate(feature_names): + base = 1 + i * 10 + non_null = row[base] or 0 + null_count = row_count - non_null + + min_val = _opt_float(row[base + 3]) + max_val = _opt_float(row[base + 4]) + + result: Dict[str, Any] = { + "feature_name": col, + "feature_type": "numeric", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": _opt_float(row[base + 1]), + "stddev": _opt_float(row[base + 2]), + "min_val": min_val, + "max_val": max_val, + "p50": _opt_float(row[base + 5]), + "p75": _opt_float(row[base + 6]), + "p90": _opt_float(row[base + 7]), + "p95": _opt_float(row[base + 8]), + "p99": _opt_float(row[base + 9]), + "histogram": None, + } + + if min_val is not None and max_val is not None and non_null > 0: + result["histogram"] = _sql_numeric_histogram( + conn, + from_expression, + col, + ts_filter, + histogram_bins, + min_val, + max_val, + ) + + results.append(result) + + return results + + +def _sql_numeric_histogram( + conn, + from_expression: str, + col_name: str, + ts_filter: str, + bins: int, + min_val: float, + max_val: float, +) -> Dict[str, Any]: + q_col = f'"{col_name}"' + + if min_val == max_val: + with conn.cursor() as cur: + cur.execute( + f"SELECT COUNT(*) FROM {from_expression} AS _src " + f"WHERE {q_col} IS NOT NULL AND {ts_filter}" + ) + cnt = (cur.fetchone() or (0,))[0] + return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0} + + upper = max_val + (max_val - min_val) * 1e-10 + bin_width = (max_val - min_val) / bins + + query = ( + f"SELECT width_bucket({q_col}::float8, {min_val}, {upper}, {bins}) AS bucket, " + f"COUNT(*) AS cnt " + f"FROM {from_expression} AS _src " + f"WHERE {q_col} IS NOT NULL AND {ts_filter} " + f"GROUP BY bucket ORDER BY bucket" + ) + + with conn.cursor() as cur: + cur.execute(query) + rows = cur.fetchall() + + counts = [0] * bins + for bucket, cnt in rows: + if 1 <= bucket <= bins: + counts[bucket - 1] = cnt + + bin_edges = [min_val + i * bin_width for i in range(bins + 1)] + return { + "bins": [float(b) for b in bin_edges], + "counts": counts, + "bin_width": float(bin_width), + } + + +def _sql_categorical_stats( + conn, + from_expression: str, + col_name: str, + ts_filter: str, + top_n: int, +) -> Dict[str, Any]: + q_col = f'"{col_name}"' + + query = ( + f"WITH filtered AS (" + f" SELECT * FROM {from_expression} AS _src WHERE {ts_filter}" + f") " + f"SELECT " + f" (SELECT COUNT(*) FROM filtered) AS row_count, " + f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, " + f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered " + f" WHERE {q_col} IS NOT NULL) AS unique_count, " + f" {q_col}::text AS value, COUNT(*) AS cnt " + f"FROM filtered WHERE {q_col} IS NOT NULL " + f"GROUP BY {q_col} ORDER BY cnt DESC LIMIT {int(top_n)}" + ) + + with conn.cursor() as cur: + cur.execute(query) + rows = cur.fetchall() + + if not rows: + return { + **_EMPTY_METRIC_TEMPLATE, + "feature_name": col_name, + "feature_type": "categorical", + } + + row_count = rows[0][0] + null_count = rows[0][1] + unique_count = rows[0][2] + + top_entries = [{"value": r[3], "count": r[4]} for r in rows] + top_total = sum(e["count"] for e in top_entries) + other_count = (row_count - null_count) - top_total + + return { + "feature_name": col_name, + "feature_type": "categorical", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": { + "values": top_entries, + "other_count": max(other_count, 0), + "unique_count": unique_count, + }, + } + + +def _opt_float(val: Any) -> Optional[float]: + """Convert a DB aggregate result to float, preserving None.""" + return float(val) if val is not None else None + + +# ------------------------------------------------------------------ # +# Monitoring metrics storage helpers +# ------------------------------------------------------------------ # + +_MON_FEATURE_TABLE = "feast_monitoring_feature_metrics" +_MON_VIEW_TABLE = "feast_monitoring_feature_view_metrics" +_MON_SERVICE_TABLE = "feast_monitoring_feature_service_metrics" + +_MON_FEATURE_COLUMNS = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "feature_type", + "row_count", + "null_count", + "null_rate", + "mean", + "stddev", + "min_val", + "max_val", + "p50", + "p75", + "p90", + "p95", + "p99", + "histogram", +] +_MON_FEATURE_PK = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", +] + +_MON_VIEW_COLUMNS = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_row_count", + "total_features", + "features_with_nulls", + "avg_null_rate", + "max_null_rate", +] +_MON_VIEW_PK = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", +] + +_MON_SERVICE_COLUMNS = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_feature_views", + "total_features", + "avg_null_rate", + "max_null_rate", +] +_MON_SERVICE_PK = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", +] + + +def _mon_table_meta(metric_type: str): + if metric_type == "feature": + return _MON_FEATURE_TABLE, _MON_FEATURE_COLUMNS, _MON_FEATURE_PK + if metric_type == "feature_view": + return _MON_VIEW_TABLE, _MON_VIEW_COLUMNS, _MON_VIEW_PK + if metric_type == "feature_service": + return _MON_SERVICE_TABLE, _MON_SERVICE_COLUMNS, _MON_SERVICE_PK + raise ValueError(f"Unknown metric_type '{metric_type}'") + + +def _mon_upsert( + pg_config: PostgreSQLConfig, + table: str, + columns: List[str], + pk_columns: List[str], + rows: List[Dict[str, Any]], +) -> None: + import json as _json + + non_pk = [c for c in columns if c not in pk_columns] + col_ids = sql.SQL(", ").join(sql.Identifier(c) for c in columns) + placeholders = sql.SQL(", ").join(sql.Placeholder() for _ in columns) + update_clause = sql.SQL(", ").join( + sql.SQL("{} = EXCLUDED.{}").format(sql.Identifier(c), sql.Identifier(c)) + for c in non_pk + ) + pk_ids = sql.SQL(", ").join(sql.Identifier(c) for c in pk_columns) + + query = sql.SQL( + "INSERT INTO {} ({}) VALUES ({}) ON CONFLICT ({}) DO UPDATE SET {}" + ).format(sql.Identifier(table), col_ids, placeholders, pk_ids, update_clause) + + with _get_conn(pg_config) as conn, conn.cursor() as cur: + for row in rows: + values = [] + for col in columns: + val = row.get(col) + if col == "histogram" and val is not None: + val = _json.dumps(val) + values.append(val) + cur.execute(query, values) + conn.commit() + + +def _mon_query( + pg_config: PostgreSQLConfig, + metric_type: str, + columns: List[str], + project: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional["date"] = None, + end_date: Optional["date"] = None, +) -> List[Dict[str, Any]]: + import json as _json + from datetime import date as _date + from datetime import datetime as _datetime + + table, _, _ = _mon_table_meta(metric_type) + + conditions = [sql.SQL("project_id = %s")] + params: list = [project] + + if filters: + for key, value in filters.items(): + if value is not None: + conditions.append(sql.SQL("{} = %s").format(sql.Identifier(key))) + params.append(value) + + if start_date: + conditions.append(sql.SQL("metric_date >= %s")) + params.append(start_date) + if end_date: + conditions.append(sql.SQL("metric_date <= %s")) + params.append(end_date) + + col_ids = sql.SQL(", ").join(sql.Identifier(c) for c in columns) + query = sql.SQL("SELECT {} FROM {} WHERE {} ORDER BY metric_date ASC").format( + col_ids, + sql.Identifier(table), + sql.SQL(" AND ").join(conditions), + ) + + with _get_conn(pg_config) as conn, conn.cursor() as cur: + conn.read_only = True + cur.execute(query, params) + rows = cur.fetchall() + + results = [] + for row in rows: + record = dict(zip(columns, row)) + if "histogram" in record and isinstance(record["histogram"], str): + record["histogram"] = _json.loads(record["histogram"]) + if "metric_date" in record and isinstance(record["metric_date"], _date): + record["metric_date"] = record["metric_date"].isoformat() + if "computed_at" in record and isinstance(record["computed_at"], _datetime): + record["computed_at"] = record["computed_at"].isoformat() + results.append(record) + + return results diff --git a/sdk/python/feast/infra/offline_stores/offline_store.py b/sdk/python/feast/infra/offline_stores/offline_store.py index 2803b15526e..816b8f454af 100644 --- a/sdk/python/feast/infra/offline_stores/offline_store.py +++ b/sdk/python/feast/infra/offline_stores/offline_store.py @@ -15,7 +15,7 @@ import time import warnings from abc import ABC -from datetime import datetime, timedelta, timezone +from datetime import date, datetime, timedelta, timezone from pathlib import Path from typing import ( TYPE_CHECKING, @@ -620,3 +620,137 @@ def get_table_column_names_and_types_from_data_source( data_source: DataSource object """ return data_source.get_table_column_names_and_types(config=config) + + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + """ + Compute monitoring metrics (stats, percentiles, histograms) directly + in the offline store using its native compute engine. + + Backends that don't support this should leave it unimplemented; + the monitoring service will fall back to Python-based computation. + + Args: + config: The config for the current feature store. + data_source: The data source to compute metrics from. + feature_columns: List of (feature_name, feature_type) where + feature_type is "numeric" or "categorical". + timestamp_field: Column used for time-range filtering. + start_date: Start of the time range. + end_date: End of the time range. + histogram_bins: Number of bins for numeric histograms. + top_n: Number of top values for categorical histograms. + + Returns: + A list of metric dicts, one per feature, matching the format + produced by MetricsCalculator.compute_all. + """ + raise NotImplementedError + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + """ + Return the maximum event timestamp from the data source. + + Used by the monitoring service to determine date ranges for + auto-compute. Backends that don't support this should leave it + unimplemented; the caller will fall back to a full-table scan. + + Args: + config: The config for the current feature store. + data_source: The data source to query. + timestamp_field: The timestamp column name. + + Returns: + The maximum timestamp, or None if no data exists. + """ + raise NotImplementedError + + # ------------------------------------------------------------------ # + # Monitoring metrics storage (native) + # ------------------------------------------------------------------ # + + MONITORING_VALID_GRANULARITIES = ( + "daily", + "weekly", + "biweekly", + "monthly", + "quarterly", + ) + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + """Create the monitoring metrics tables if they do not exist. + + Backends that don't support native monitoring storage should + leave this unimplemented; the monitoring service will raise an + error indicating the backend lacks storage support. + """ + raise NotImplementedError + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + """Persist monitoring metrics (upsert semantics). + + Args: + config: The config for the current feature store. + metric_type: One of "feature", "feature_view", "feature_service". + metrics: List of metric dicts to upsert. + """ + raise NotImplementedError + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + """Read monitoring metrics with optional filtering. + + Args: + config: The config for the current feature store. + project: Feast project name. + metric_type: One of "feature", "feature_view", "feature_service". + filters: Column-value pairs for WHERE clauses. + start_date: Inclusive lower bound on metric_date. + end_date: Inclusive upper bound on metric_date. + + Returns: + List of metric dicts ordered by metric_date ascending. + """ + raise NotImplementedError + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + """Set is_baseline=FALSE for matching feature metric rows. + + Used to ensure only one baseline exists per feature before + writing a new baseline. + """ + raise NotImplementedError diff --git a/sdk/python/feast/monitoring/__init__.py b/sdk/python/feast/monitoring/__init__.py new file mode 100644 index 00000000000..69a921060a5 --- /dev/null +++ b/sdk/python/feast/monitoring/__init__.py @@ -0,0 +1,7 @@ +from feast.monitoring.dqm_job_manager import DQMJobManager +from feast.monitoring.metrics_calculator import MetricsCalculator + +__all__ = [ + "DQMJobManager", + "MetricsCalculator", +] diff --git a/sdk/python/feast/monitoring/dqm_job_manager.py b/sdk/python/feast/monitoring/dqm_job_manager.py new file mode 100644 index 00000000000..1d6fd9ea384 --- /dev/null +++ b/sdk/python/feast/monitoring/dqm_job_manager.py @@ -0,0 +1,202 @@ +import json +import logging +import uuid +from datetime import datetime, timezone +from typing import Any, Dict, Optional + +from psycopg import sql + +from feast.infra.utils.postgres.connection_utils import _get_conn +from feast.infra.utils.postgres.postgres_config import PostgreSQLConfig + +logger = logging.getLogger(__name__) + +_JOBS_TABLE = "feast_monitoring_jobs" + +JOB_STATUS_PENDING = "pending" +JOB_STATUS_RUNNING = "running" +JOB_STATUS_COMPLETED = "completed" +JOB_STATUS_FAILED = "failed" + + +class DQMJobManager: + def __init__(self, config: PostgreSQLConfig): + self._config = config + + def ensure_table(self) -> None: + with _get_conn(self._config) as conn, conn.cursor() as cur: + cur.execute(f""" + CREATE TABLE IF NOT EXISTS {_JOBS_TABLE} ( + job_id VARCHAR(36) PRIMARY KEY, + project_id VARCHAR(255) NOT NULL, + feature_view_name VARCHAR(255), + job_type VARCHAR(50) NOT NULL, + status VARCHAR(20) NOT NULL DEFAULT 'pending', + parameters JSONB, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + started_at TIMESTAMPTZ, + completed_at TIMESTAMPTZ, + error_message TEXT, + result_summary JSONB + ); + CREATE INDEX IF NOT EXISTS idx_fm_jobs_status + ON {_JOBS_TABLE} (status); + CREATE INDEX IF NOT EXISTS idx_fm_jobs_project + ON {_JOBS_TABLE} (project_id); + """) + conn.commit() + + def submit( + self, + project: str, + job_type: str, + feature_view_name: Optional[str] = None, + parameters: Optional[Dict[str, Any]] = None, + ) -> str: + job_id = str(uuid.uuid4()) + now = datetime.now(timezone.utc) + + with _get_conn(self._config) as conn, conn.cursor() as cur: + cur.execute( + sql.SQL( + "INSERT INTO {} (job_id, project_id, feature_view_name, " + "job_type, status, parameters, created_at) " + "VALUES (%s, %s, %s, %s, %s, %s, %s)" + ).format(sql.Identifier(_JOBS_TABLE)), + ( + job_id, + project, + feature_view_name, + job_type, + JOB_STATUS_PENDING, + json.dumps(parameters) if parameters else None, + now, + ), + ) + conn.commit() + + return job_id + + def get_job(self, job_id: str) -> Optional[Dict[str, Any]]: + columns = [ + "job_id", + "project_id", + "feature_view_name", + "job_type", + "status", + "parameters", + "created_at", + "started_at", + "completed_at", + "error_message", + "result_summary", + ] + col_sql = sql.SQL(", ").join(sql.Identifier(c) for c in columns) + + with _get_conn(self._config) as conn, conn.cursor() as cur: + conn.read_only = True + cur.execute( + sql.SQL("SELECT {} FROM {} WHERE job_id = %s").format( + col_sql, sql.Identifier(_JOBS_TABLE) + ), + (job_id,), + ) + row = cur.fetchone() + + if row is None: + return None + + record = dict(zip(columns, row)) + for key in ("parameters", "result_summary"): + if isinstance(record.get(key), str): + record[key] = json.loads(record[key]) + for key in ("created_at", "started_at", "completed_at"): + if isinstance(record.get(key), datetime): + record[key] = record[key].isoformat() + return record + + def update_status( + self, + job_id: str, + status: str, + error_message: Optional[str] = None, + result_summary: Optional[Dict[str, Any]] = None, + ) -> None: + now = datetime.now(timezone.utc) + sets = [sql.SQL("status = %s")] + params: list = [status] + + if status == JOB_STATUS_RUNNING: + sets.append(sql.SQL("started_at = %s")) + params.append(now) + elif status in (JOB_STATUS_COMPLETED, JOB_STATUS_FAILED): + sets.append(sql.SQL("completed_at = %s")) + params.append(now) + + if error_message is not None: + sets.append(sql.SQL("error_message = %s")) + params.append(error_message) + + if result_summary is not None: + sets.append(sql.SQL("result_summary = %s")) + params.append(json.dumps(result_summary)) + + params.append(job_id) + + query = sql.SQL("UPDATE {} SET {} WHERE job_id = %s").format( + sql.Identifier(_JOBS_TABLE), + sql.SQL(", ").join(sets), + ) + + with _get_conn(self._config) as conn, conn.cursor() as cur: + cur.execute(query, params) + conn.commit() + + def execute_job(self, job_id: str, monitoring_service) -> Dict[str, Any]: + """Execute a DQM job synchronously. Manages status transitions.""" + job = self.get_job(job_id) + if job is None: + raise ValueError(f"Failed to find DQM job '{job_id}'") + + self.update_status(job_id, JOB_STATUS_RUNNING) + + try: + params = job.get("parameters") or {} + job_type = job["job_type"] + project = job["project_id"] + + if job_type == "auto_compute": + result = monitoring_service.auto_compute( + project=project, + feature_view_name=job.get("feature_view_name"), + ) + elif job_type == "baseline": + result = monitoring_service.compute_baseline( + project=project, + feature_view_name=job.get("feature_view_name"), + feature_names=params.get("feature_names"), + ) + elif job_type == "compute": + from datetime import date as date_type + + result = monitoring_service.compute_metrics( + project=project, + feature_view_name=job.get("feature_view_name"), + feature_names=params.get("feature_names"), + start_date=date_type.fromisoformat(params["start_date"]) + if params.get("start_date") + else None, + end_date=date_type.fromisoformat(params["end_date"]) + if params.get("end_date") + else None, + granularity=params.get("granularity", "daily"), + ) + else: + raise ValueError(f"Unknown job type '{job_type}'") + + self.update_status(job_id, JOB_STATUS_COMPLETED, result_summary=result) + return result + + except Exception as e: + self.update_status(job_id, JOB_STATUS_FAILED, error_message=str(e)) + raise diff --git a/sdk/python/feast/monitoring/metrics_calculator.py b/sdk/python/feast/monitoring/metrics_calculator.py new file mode 100644 index 00000000000..9160fabc6b0 --- /dev/null +++ b/sdk/python/feast/monitoring/metrics_calculator.py @@ -0,0 +1,158 @@ +import logging +from typing import Dict, List, Optional, Tuple + +import numpy as np +import pyarrow as pa +import pyarrow.compute as pc + +from feast.types import PrimitiveFeastType + +logger = logging.getLogger(__name__) + +_NUMERIC_TYPES = { + PrimitiveFeastType.INT32, + PrimitiveFeastType.INT64, + PrimitiveFeastType.FLOAT32, + PrimitiveFeastType.FLOAT64, + PrimitiveFeastType.DECIMAL, +} + +_CATEGORICAL_TYPES = { + PrimitiveFeastType.STRING, + PrimitiveFeastType.BOOL, +} + + +class MetricsCalculator: + def __init__(self, histogram_bins: int = 20, top_n: int = 10): + self.histogram_bins = histogram_bins + self.top_n = top_n + + @staticmethod + def classify_feature(dtype) -> Optional[str]: + primitive = dtype + if hasattr(dtype, "base_type"): + primitive = dtype.base_type if dtype.base_type else dtype + + if isinstance(primitive, PrimitiveFeastType): + if primitive in _NUMERIC_TYPES: + return "numeric" + if primitive in _CATEGORICAL_TYPES: + return "categorical" + return None + + def compute_numeric(self, array: pa.Array) -> Dict: + total = len(array) + null_count = array.null_count + result = { + "feature_type": "numeric", + "row_count": total, + "null_count": null_count, + "null_rate": null_count / total if total > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, + } + + valid = pc.drop_null(array) + if len(valid) == 0: + return result + + float_array = pc.cast(valid, pa.float64()) + result["mean"] = pc.mean(float_array).as_py() + result["stddev"] = pc.stddev(float_array, ddof=1).as_py() + + min_max = pc.min_max(float_array) + result["min_val"] = min_max["min"].as_py() + result["max_val"] = min_max["max"].as_py() + + quantiles = pc.quantile(float_array, q=[0.50, 0.75, 0.90, 0.95, 0.99]) + q_values = quantiles.to_pylist() + result["p50"] = q_values[0] + result["p75"] = q_values[1] + result["p90"] = q_values[2] + result["p95"] = q_values[3] + result["p99"] = q_values[4] + + np_array = float_array.to_numpy() + counts, bin_edges = np.histogram(np_array, bins=self.histogram_bins) + result["histogram"] = { + "bins": bin_edges.tolist(), + "counts": counts.tolist(), + "bin_width": float(bin_edges[1] - bin_edges[0]) + if len(bin_edges) > 1 + else 0, + } + + return result + + def compute_categorical(self, array: pa.Array) -> Dict: + total = len(array) + null_count = array.null_count + result = { + "feature_type": "categorical", + "row_count": total, + "null_count": null_count, + "null_rate": null_count / total if total > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, + } + + valid = pc.drop_null(array) + if len(valid) == 0: + return result + + value_counts = pc.value_counts(valid) + entries = [ + {"value": vc["values"].as_py(), "count": vc["counts"].as_py()} + for vc in value_counts + ] + entries.sort(key=lambda x: x["count"], reverse=True) + + unique_count = len(entries) + top_entries = entries[: self.top_n] + other_count = sum(e["count"] for e in entries[self.top_n :]) + + result["histogram"] = { + "values": top_entries, + "other_count": other_count, + "unique_count": unique_count, + } + + return result + + def compute_all( + self, + table: pa.Table, + feature_fields: List[Tuple[str, str]], + ) -> List[Dict]: + results = [] + for name, ftype in feature_fields: + if name not in table.column_names: + logger.warning("Column '%s' not found in arrow table, skipping", name) + continue + column = table.column(name) + if ftype == "numeric": + metrics = self.compute_numeric(column) + elif ftype == "categorical": + metrics = self.compute_categorical(column) + else: + continue + metrics["feature_name"] = name + results.append(metrics) + return results diff --git a/sdk/python/feast/monitoring/monitoring_service.py b/sdk/python/feast/monitoring/monitoring_service.py new file mode 100644 index 00000000000..8e2fb2cf04b --- /dev/null +++ b/sdk/python/feast/monitoring/monitoring_service.py @@ -0,0 +1,912 @@ +import logging +import time +from datetime import date, datetime, timedelta, timezone +from typing import Any, Dict, List, Optional, Tuple + +from feast.infra.offline_stores.offline_store import OfflineStore +from feast.monitoring.dqm_job_manager import DQMJobManager +from feast.monitoring.metrics_calculator import MetricsCalculator + +logger = logging.getLogger(__name__) + +VALID_GRANULARITIES = OfflineStore.MONITORING_VALID_GRANULARITIES + +_EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc) +_FAR_FUTURE = datetime(2099, 12, 31, 23, 59, 59, tzinfo=timezone.utc) + +GRANULARITY_WINDOWS = { + "daily": timedelta(days=1), + "weekly": timedelta(days=7), + "biweekly": timedelta(days=14), + "monthly": timedelta(days=30), + "quarterly": timedelta(days=90), +} + + +class MonitoringService: + def __init__(self, store: "FeatureStore"): # noqa: F821 + self._store = store + self._job_manager: Optional[DQMJobManager] = None + self._calculator = MetricsCalculator() + self._monitoring_tables_ensured = False + + def _get_offline_store(self): + return self._store._get_provider().offline_store + + def _ensure_monitoring_tables(self): + if not self._monitoring_tables_ensured: + self._get_offline_store().ensure_monitoring_tables(self._store.config) + self._monitoring_tables_ensured = True + + @property + def job_manager(self) -> DQMJobManager: + if self._job_manager is None: + offline_store_config = self._store.config.offline_store + self._job_manager = DQMJobManager(offline_store_config) + self._job_manager.ensure_table() + return self._job_manager + + # ------------------------------------------------------------------ # + # Auto-compute: detect dates, compute all granularities + # ------------------------------------------------------------------ # + + def auto_compute( + self, + project: Optional[str] = None, + feature_view_name: Optional[str] = None, + ) -> Dict[str, Any]: + """Detect date ranges from source data and compute all granularities.""" + start_time = time.time() + self._ensure_monitoring_tables() + if project is None: + project = self._store.config.project + + feature_views = self._resolve_feature_views(project, feature_view_name) + total_features = 0 + total_views = 0 + granularities_computed = set() + + for fv in feature_views: + try: + feature_fields = self._classify_fields(fv) + if not feature_fields: + continue + + max_ts = self._get_max_timestamp(fv) + if max_ts is None: + logger.warning( + "No data found for feature view '%s', skipping", fv.name + ) + continue + + now = datetime.now(timezone.utc) + + for granularity, window in GRANULARITY_WINDOWS.items(): + window_start = max_ts - window + metrics_list = self._compute_feature_metrics( + fv, + feature_fields, + window_start, + max_ts, + ) + self._save_computed_metrics( + project=project, + feature_view=fv, + metrics_list=metrics_list, + metric_date=window_start.date(), + granularity=granularity, + set_baseline=False, + now=now, + ) + total_features += len(metrics_list) + granularities_computed.add(granularity) + + self._compute_feature_service_metrics( + project=project, + granularity="daily", + metric_dates=[max_ts.date() - timedelta(days=1)], + set_baseline=False, + ) + total_views += 1 + except Exception: + logger.exception( + "Failed to auto-compute metrics for feature view '%s'", fv.name + ) + + duration_ms = int((time.time() - start_time) * 1000) + + return { + "status": "completed", + "computed_feature_views": total_views, + "computed_features": total_features, + "granularities": sorted(granularities_computed), + "duration_ms": duration_ms, + } + + # ------------------------------------------------------------------ # + # Baseline: compute from all available source data + # ------------------------------------------------------------------ # + + def compute_baseline( + self, + project: Optional[str] = None, + feature_view_name: Optional[str] = None, + feature_names: Optional[List[str]] = None, + ) -> Dict[str, Any]: + """Compute baseline metrics from all available source data. + + Idempotent: only features without existing baselines are computed. + """ + start_time = time.time() + self._ensure_monitoring_tables() + if project is None: + project = self._store.config.project + + feature_views = self._resolve_feature_views(project, feature_view_name) + total_features = 0 + total_views = 0 + + for fv in feature_views: + try: + fields_needing_baseline = self._get_features_without_baseline( + project, fv, feature_names + ) + if not fields_needing_baseline: + logger.info( + "All features in '%s' already have baselines, skipping", + fv.name, + ) + continue + + feature_fields = self._classify_fields( + fv, fields=fields_needing_baseline + ) + if not feature_fields: + continue + + metrics_list = self._compute_feature_metrics( + fv, + feature_fields, + _EPOCH, + _FAR_FUTURE, + ) + + now = datetime.now(timezone.utc) + offline_store = self._get_offline_store() + offline_store.clear_monitoring_baseline( + config=self._store.config, + project=project, + feature_view_name=fv.name, + ) + + self._save_computed_metrics( + project=project, + feature_view=fv, + metrics_list=metrics_list, + metric_date=date.today(), + granularity="daily", + set_baseline=True, + now=now, + ) + + total_features += len(metrics_list) + total_views += 1 + except Exception: + logger.exception( + "Failed to compute baseline for feature view '%s'", fv.name + ) + + duration_ms = int((time.time() - start_time) * 1000) + + return { + "status": "completed", + "computed_features": total_features, + "computed_feature_views": total_views, + "is_baseline": True, + "duration_ms": duration_ms, + } + + # ------------------------------------------------------------------ # + # Compute: explicit dates + granularity (stored) + # ------------------------------------------------------------------ # + + def compute_metrics( + self, + project: str, + feature_view_name: Optional[str] = None, + feature_names: Optional[List[str]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + granularity: str = "daily", + set_baseline: bool = False, + ) -> Dict[str, Any]: + self._ensure_monitoring_tables() + if granularity not in VALID_GRANULARITIES: + raise ValueError( + f"Invalid granularity '{granularity}'. " + f"Must be one of {VALID_GRANULARITIES}" + ) + + start_time = time.time() + start_dt, end_dt = self._to_date_range(start_date, end_date) + + feature_views = self._resolve_feature_views(project, feature_view_name) + + total_features = 0 + total_views = 0 + computed_dates: set = set() + + for fv in feature_views: + try: + fv_metrics = self._compute_for_feature_view( + project=project, + feature_view=fv, + feature_names=feature_names, + start_dt=start_dt, + end_dt=end_dt, + granularity=granularity, + set_baseline=set_baseline, + ) + total_features += fv_metrics["feature_count"] + total_views += 1 + computed_dates.update(fv_metrics["dates"]) + except Exception: + logger.exception( + "Failed to compute metrics for feature view '%s'", fv.name + ) + + total_services = self._compute_feature_service_metrics( + project=project, + granularity=granularity, + metric_dates=list(computed_dates), + set_baseline=set_baseline, + ) + + duration_ms = int((time.time() - start_time) * 1000) + + return { + "status": "completed", + "granularity": granularity, + "computed_features": total_features, + "computed_feature_views": total_views, + "computed_feature_services": total_services, + "metric_dates": sorted(d.isoformat() for d in computed_dates), + "duration_ms": duration_ms, + } + + # ------------------------------------------------------------------ # + # Transient compute (not stored) + # ------------------------------------------------------------------ # + + def compute_transient( + self, + project: str, + feature_view_name: str, + feature_names: Optional[List[str]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> Dict[str, Any]: + """Compute metrics on-the-fly for an arbitrary date range without persisting.""" + start_time = time.time() + start_dt, end_dt = self._to_date_range(start_date, end_date) + effective_start = start_date or (date.today() - timedelta(days=1)) + effective_end = end_date or date.today() + + fv = self._store.registry.get_feature_view( + name=feature_view_name, project=project + ) + + feature_fields = self._classify_fields(fv, feature_names=feature_names) + if not feature_fields: + return { + "status": "completed", + "feature_view_name": feature_view_name, + "start_date": effective_start.isoformat(), + "end_date": effective_end.isoformat(), + "metrics": [], + "duration_ms": int((time.time() - start_time) * 1000), + } + + metrics_list = self._compute_feature_metrics( + fv, + feature_fields, + start_dt, + end_dt, + ) + + for m in metrics_list: + m["feature_view_name"] = feature_view_name + m["start_date"] = effective_start.isoformat() + m["end_date"] = effective_end.isoformat() + + return { + "status": "completed", + "feature_view_name": feature_view_name, + "start_date": effective_start.isoformat(), + "end_date": effective_end.isoformat(), + "metrics": metrics_list, + "duration_ms": int((time.time() - start_time) * 1000), + } + + # ------------------------------------------------------------------ # + # DQM Job helpers + # ------------------------------------------------------------------ # + + def submit_job( + self, + project: str, + job_type: str, + feature_view_name: Optional[str] = None, + parameters: Optional[Dict[str, Any]] = None, + ) -> str: + return self.job_manager.submit( + project=project, + job_type=job_type, + feature_view_name=feature_view_name, + parameters=parameters, + ) + + def get_job(self, job_id: str) -> Optional[Dict[str, Any]]: + return self.job_manager.get_job(job_id) + + def execute_job(self, job_id: str) -> Dict[str, Any]: + return self.job_manager.execute_job(job_id, self) + + # ------------------------------------------------------------------ # + # Read helpers (delegate to offline store) + # ------------------------------------------------------------------ # + + def _query( + self, + metric_type: str, + project: str, + filters=None, + start_date=None, + end_date=None, + ): + self._ensure_monitoring_tables() + return self._get_offline_store().query_monitoring_metrics( + config=self._store.config, + project=project, + metric_type=metric_type, + filters=filters, + start_date=start_date, + end_date=end_date, + ) + + def get_feature_metrics( + self, + project: str, + feature_service_name: Optional[str] = None, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + granularity: Optional[str] = None, + data_source_type: Optional[str] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + filters = { + "feature_view_name": feature_view_name, + "feature_name": feature_name, + "granularity": granularity, + "data_source_type": data_source_type, + } + if feature_service_name: + return self._get_metrics_by_service( + project, + feature_service_name, + lambda fv_name: self._query( + "feature", + project, + {**filters, "feature_view_name": fv_name}, + start_date, + end_date, + ), + ) + return self._query("feature", project, filters, start_date, end_date) + + def get_feature_view_metrics( + self, + project: str, + feature_service_name: Optional[str] = None, + feature_view_name: Optional[str] = None, + granularity: Optional[str] = None, + data_source_type: Optional[str] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + filters = { + "feature_view_name": feature_view_name, + "granularity": granularity, + "data_source_type": data_source_type, + } + if feature_service_name: + return self._get_metrics_by_service( + project, + feature_service_name, + lambda fv_name: self._query( + "feature_view", + project, + {**filters, "feature_view_name": fv_name}, + start_date, + end_date, + ), + ) + return self._query("feature_view", project, filters, start_date, end_date) + + def get_feature_service_metrics( + self, + project: str, + feature_service_name: Optional[str] = None, + granularity: Optional[str] = None, + data_source_type: Optional[str] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + filters = { + "feature_service_name": feature_service_name, + "granularity": granularity, + "data_source_type": data_source_type, + } + return self._query("feature_service", project, filters, start_date, end_date) + + def get_baseline( + self, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> List[Dict[str, Any]]: + filters = { + "feature_view_name": feature_view_name, + "feature_name": feature_name, + "data_source_type": data_source_type, + "is_baseline": True, + } + return self._query("feature", project, filters) + + def get_timeseries( + self, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + feature_service_name: Optional[str] = None, + granularity: Optional[str] = None, + data_source_type: Optional[str] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + return self.get_feature_metrics( + project=project, + feature_service_name=feature_service_name, + feature_view_name=feature_view_name, + feature_name=feature_name, + granularity=granularity, + data_source_type=data_source_type, + start_date=start_date, + end_date=end_date, + ) + + # ------------------------------------------------------------------ # + # Auto-baseline trigger for feast apply + # ------------------------------------------------------------------ # + + def submit_baseline_for_new_features( + self, + project: str, + feature_views: Optional[List] = None, + ) -> List[str]: + """Submit baseline DQM jobs for feature views with new features. + + Called from feast apply. Returns list of submitted job IDs. + Idempotent — only features without existing baselines are included. + """ + if project is None: + project = self._store.config.project + + if feature_views is None: + feature_views = self._store.registry.list_feature_views(project=project) + + job_ids = [] + for fv in feature_views: + new_features = self._get_features_without_baseline(project, fv) + if not new_features: + continue + + feature_names = [f.name for f in new_features] + job_id = self.job_manager.submit( + project=project, + job_type="baseline", + feature_view_name=fv.name, + parameters={"feature_names": feature_names}, + ) + job_ids.append(job_id) + logger.info( + "Queued baseline computation for '%s' features %s (job: %s)", + fv.name, + feature_names, + job_id, + ) + + return job_ids + + # ------------------------------------------------------------------ # + # Private: compute engine dispatch (SQL push-down → Python fallback) + # ------------------------------------------------------------------ # + + def _compute_feature_metrics( + self, + feature_view, + feature_fields: List[Tuple[str, str]], + start_dt: datetime, + end_dt: datetime, + ) -> List[Dict[str, Any]]: + """Compute metrics, preferring offline store SQL push-down. + + Falls back to Python-based (PyArrow/NumPy) computation when the + offline store does not implement compute_monitoring_metrics. + """ + provider = self._store._get_provider() + offline_store = provider.offline_store + try: + return offline_store.compute_monitoring_metrics( + config=self._store.config, + data_source=feature_view.batch_source, + feature_columns=feature_fields, + timestamp_field=feature_view.batch_source.timestamp_field, + start_date=start_dt, + end_date=end_dt, + histogram_bins=self._calculator.histogram_bins, + top_n=self._calculator.top_n, + ) + except NotImplementedError: + logger.debug( + "Offline store does not support compute_monitoring_metrics, " + "falling back to Python-based computation" + ) + arrow_table = self._read_batch_source( + feature_view, + feature_fields, + start_dt, + end_dt, + ) + return self._calculator.compute_all(arrow_table, feature_fields) + + def _get_max_timestamp(self, feature_view) -> Optional[datetime]: + """Query the batch source for MAX(event_timestamp). + + Prefers the offline store's native push-down; falls back to reading + the full table and computing max in Python. + """ + provider = self._store._get_provider() + offline_store = provider.offline_store + try: + return offline_store.get_monitoring_max_timestamp( + config=self._store.config, + data_source=feature_view.batch_source, + timestamp_field=feature_view.batch_source.timestamp_field, + ) + except NotImplementedError: + return self._get_max_timestamp_fallback(feature_view) + + def _get_max_timestamp_fallback(self, feature_view) -> Optional[datetime]: + """Pull all data and compute max timestamp in Python (fallback).""" + import pyarrow.compute as pc + + data_source = feature_view.batch_source + ts_field = data_source.timestamp_field + + provider = self._store._get_provider() + offline_store = provider.offline_store + + retrieval_job = offline_store.pull_all_from_table_or_query( + config=self._store.config, + data_source=data_source, + join_key_columns=self._resolve_join_key_columns(feature_view), + feature_name_columns=[], + timestamp_field=ts_field, + created_timestamp_column=data_source.created_timestamp_column, + start_date=_EPOCH, + end_date=_FAR_FUTURE, + ) + + table = retrieval_job.to_arrow() + if ts_field not in table.column_names or len(table) == 0: + return None + + max_val = pc.max(table.column(ts_field)).as_py() + if max_val is None: + return None + + if isinstance(max_val, datetime): + return max_val if max_val.tzinfo else max_val.replace(tzinfo=timezone.utc) + + return datetime.combine(max_val, datetime.min.time(), tzinfo=timezone.utc) + + # ------------------------------------------------------------------ # + # Private: shared helpers (DRY) + # ------------------------------------------------------------------ # + + @staticmethod + def _to_date_range( + start_date: Optional[date], end_date: Optional[date] + ) -> Tuple[datetime, datetime]: + today = date.today() + if end_date is None: + end_date = today + if start_date is None: + start_date = end_date - timedelta(days=1) + start_dt = datetime( + start_date.year, start_date.month, start_date.day, tzinfo=timezone.utc + ) + end_dt = datetime( + end_date.year, end_date.month, end_date.day, 23, 59, 59, tzinfo=timezone.utc + ) + return start_dt, end_dt + + @staticmethod + def _classify_fields( + feature_view, + feature_names=None, + fields=None, + ) -> List[Tuple[str, str]]: + """Extract and classify features as numeric/categorical. + + Args: + feature_view: FeatureView to extract fields from (used if fields is None). + feature_names: Optional filter list of feature names. + fields: Optional pre-selected Field objects (e.g., from idempotency check). + """ + if fields is None: + fields = feature_view.features + if feature_names: + fields = [f for f in fields if f.name in feature_names] + + result = [] + for field in fields: + ftype = MetricsCalculator.classify_feature(field.dtype) + if ftype is None: + logger.warning( + "Unsupported dtype '%s' for feature '%s', skipping", + field.dtype, + field.name, + ) + continue + result.append((field.name, ftype)) + return result + + def _save_computed_metrics( + self, + project: str, + feature_view, + metrics_list: List[Dict[str, Any]], + metric_date: date, + granularity: str, + set_baseline: bool, + now: datetime, + ) -> None: + if not metrics_list: + return + + offline_store = self._get_offline_store() + config = self._store.config + + if set_baseline: + offline_store.clear_monitoring_baseline( + config=config, + project=project, + feature_view_name=feature_view.name, + ) + + for m in metrics_list: + m["project_id"] = project + m["feature_view_name"] = feature_view.name + m["metric_date"] = metric_date + m["granularity"] = granularity + m["data_source_type"] = "batch" + m["computed_at"] = now + m["is_baseline"] = set_baseline + + offline_store.save_monitoring_metrics(config, "feature", metrics_list) + + null_rates = [ + m["null_rate"] for m in metrics_list if m.get("null_rate") is not None + ] + view_metric = { + "project_id": project, + "feature_view_name": feature_view.name, + "metric_date": metric_date, + "granularity": granularity, + "data_source_type": "batch", + "computed_at": now, + "is_baseline": set_baseline, + "total_row_count": metrics_list[0]["row_count"] if metrics_list else 0, + "total_features": len(metrics_list), + "features_with_nulls": sum( + 1 for m in metrics_list if (m.get("null_count") or 0) > 0 + ), + "avg_null_rate": sum(null_rates) / len(null_rates) if null_rates else 0.0, + "max_null_rate": max(null_rates) if null_rates else 0.0, + } + offline_store.save_monitoring_metrics(config, "feature_view", [view_metric]) + + def _resolve_join_key_columns(self, feature_view) -> List[str]: + config = self._store.config + return ( + [ + entity.name + for entity in self._store.registry.list_entities(project=config.project) + if entity.name in (feature_view.entities or []) + ] + or feature_view.entities + or [] + ) + + def _get_metrics_by_service( + self, project: str, feature_service_name: str, query_fn + ): + fs = self._store.registry.get_feature_service( + name=feature_service_name, project=project + ) + fv_names = [proj.name for proj in fs.feature_view_projections] + results = [] + for fv_name in fv_names: + results.extend(query_fn(fv_name)) + return results + + def _resolve_feature_views(self, project: str, feature_view_name: Optional[str]): + if feature_view_name: + fv = self._store.registry.get_feature_view( + name=feature_view_name, project=project + ) + return [fv] + return self._store.registry.list_feature_views(project=project) + + def _get_features_without_baseline(self, project, feature_view, feature_names=None): + existing = self.get_baseline( + project=project, + feature_view_name=feature_view.name, + ) + existing_names = {m["feature_name"] for m in existing} + + fields = feature_view.features + if feature_names: + fields = [f for f in fields if f.name in feature_names] + + return [f for f in fields if f.name not in existing_names] + + def _compute_for_feature_view( + self, + project: str, + feature_view, + feature_names: Optional[List[str]], + start_dt: datetime, + end_dt: datetime, + granularity: str, + set_baseline: bool, + ) -> Dict[str, Any]: + feature_fields = self._classify_fields( + feature_view, feature_names=feature_names + ) + if not feature_fields: + return {"feature_count": 0, "dates": set()} + + metrics_list = self._compute_feature_metrics( + feature_view, + feature_fields, + start_dt, + end_dt, + ) + + now = datetime.now(timezone.utc) + metric_date = start_dt.date() + + self._save_computed_metrics( + project=project, + feature_view=feature_view, + metrics_list=metrics_list, + metric_date=metric_date, + granularity=granularity, + set_baseline=set_baseline, + now=now, + ) + + return {"feature_count": len(metrics_list), "dates": {metric_date}} + + def _read_batch_source(self, feature_view, feature_fields, start_dt, end_dt): + config = self._store.config + data_source = feature_view.batch_source + + provider = self._store._get_provider() + offline_store = provider.offline_store + + retrieval_job = offline_store.pull_all_from_table_or_query( + config=config, + data_source=data_source, + join_key_columns=self._resolve_join_key_columns(feature_view), + feature_name_columns=[name for name, _ in feature_fields], + timestamp_field=data_source.timestamp_field, + created_timestamp_column=data_source.created_timestamp_column, + start_date=start_dt, + end_date=end_dt, + ) + + return retrieval_job.to_arrow() + + def _compute_feature_service_metrics( + self, + project: str, + granularity: str, + metric_dates: List[date], + set_baseline: bool, + ) -> int: + if not metric_dates: + return 0 + + feature_services = self._store.registry.list_feature_services(project=project) + if not feature_services: + return 0 + + offline_store = self._get_offline_store() + config = self._store.config + now = datetime.now(timezone.utc) + count = 0 + + for fs in feature_services: + try: + fv_names = [proj.name for proj in fs.feature_view_projections] + + for metric_date in metric_dates: + fv_metrics = offline_store.query_monitoring_metrics( + config=config, + project=project, + metric_type="feature_view", + filters={ + "granularity": granularity, + "data_source_type": "batch", + }, + start_date=metric_date, + end_date=metric_date, + ) + + relevant = [ + m for m in fv_metrics if m.get("feature_view_name") in fv_names + ] + if not relevant: + continue + + null_rates = [ + m["avg_null_rate"] + for m in relevant + if m.get("avg_null_rate") is not None + ] + + service_metric = { + "project_id": project, + "feature_service_name": fs.name, + "metric_date": metric_date + if isinstance(metric_date, date) + else date.fromisoformat(str(metric_date)), + "granularity": granularity, + "data_source_type": "batch", + "computed_at": now, + "is_baseline": set_baseline, + "total_feature_views": len(relevant), + "total_features": sum( + m.get("total_features", 0) for m in relevant + ), + "avg_null_rate": ( + sum(null_rates) / len(null_rates) if null_rates else 0.0 + ), + "max_null_rate": max(null_rates) if null_rates else 0.0, + } + offline_store.save_monitoring_metrics( + config, + "feature_service", + [service_metric], + ) + count += 1 + except Exception: + logger.exception("Failed to compute service metrics for '%s'", fs.name) + + return count diff --git a/sdk/python/feast/monitoring/monitoring_store.py b/sdk/python/feast/monitoring/monitoring_store.py new file mode 100644 index 00000000000..c6ec4943c90 --- /dev/null +++ b/sdk/python/feast/monitoring/monitoring_store.py @@ -0,0 +1,415 @@ +import json +import logging +from datetime import date, datetime +from typing import Any, Dict, List, Optional + +from psycopg import sql + +from feast.infra.utils.postgres.connection_utils import _get_conn +from feast.infra.utils.postgres.postgres_config import PostgreSQLConfig + +logger = logging.getLogger(__name__) + +VALID_GRANULARITIES = ("daily", "weekly", "biweekly", "monthly", "quarterly") + +_FEATURE_METRICS_TABLE = "feast_monitoring_feature_metrics" +_FEATURE_VIEW_METRICS_TABLE = "feast_monitoring_feature_view_metrics" +_FEATURE_SERVICE_METRICS_TABLE = "feast_monitoring_feature_service_metrics" + +_FEATURE_METRICS_COLUMNS = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "feature_type", + "row_count", + "null_count", + "null_rate", + "mean", + "stddev", + "min_val", + "max_val", + "p50", + "p75", + "p90", + "p95", + "p99", + "histogram", +] + +_FEATURE_VIEW_METRICS_COLUMNS = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_row_count", + "total_features", + "features_with_nulls", + "avg_null_rate", + "max_null_rate", +] + +_FEATURE_SERVICE_METRICS_COLUMNS = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_feature_views", + "total_features", + "avg_null_rate", + "max_null_rate", +] + + +class MonitoringStore: + def __init__(self, config: PostgreSQLConfig): + self._config = config + + def ensure_tables(self) -> None: + with _get_conn(self._config) as conn, conn.cursor() as cur: + cur.execute(f""" + CREATE TABLE IF NOT EXISTS {_FEATURE_METRICS_TABLE} ( + project_id VARCHAR(255) NOT NULL, + feature_view_name VARCHAR(255) NOT NULL, + feature_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + feature_type VARCHAR(50) NOT NULL, + row_count BIGINT, + null_count BIGINT, + null_rate DOUBLE PRECISION, + mean DOUBLE PRECISION, + stddev DOUBLE PRECISION, + min_val DOUBLE PRECISION, + max_val DOUBLE PRECISION, + p50 DOUBLE PRECISION, + p75 DOUBLE PRECISION, + p90 DOUBLE PRECISION, + p95 DOUBLE PRECISION, + p99 DOUBLE PRECISION, + histogram JSONB, + PRIMARY KEY (project_id, feature_view_name, feature_name, + metric_date, granularity, data_source_type) + ); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_project + ON {_FEATURE_METRICS_TABLE} (project_id); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_view + ON {_FEATURE_METRICS_TABLE} (project_id, feature_view_name); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_date + ON {_FEATURE_METRICS_TABLE} (metric_date); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_granularity + ON {_FEATURE_METRICS_TABLE} (granularity); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_source_type + ON {_FEATURE_METRICS_TABLE} (data_source_type); + CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_baseline + ON {_FEATURE_METRICS_TABLE} (project_id, feature_view_name, feature_name) + WHERE is_baseline = TRUE; + """) + + cur.execute(f""" + CREATE TABLE IF NOT EXISTS {_FEATURE_VIEW_METRICS_TABLE} ( + project_id VARCHAR(255) NOT NULL, + feature_view_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + total_row_count BIGINT, + total_features INTEGER, + features_with_nulls INTEGER, + avg_null_rate DOUBLE PRECISION, + max_null_rate DOUBLE PRECISION, + PRIMARY KEY (project_id, feature_view_name, metric_date, + granularity, data_source_type) + ); + """) + + cur.execute(f""" + CREATE TABLE IF NOT EXISTS {_FEATURE_SERVICE_METRICS_TABLE} ( + project_id VARCHAR(255) NOT NULL, + feature_service_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + total_feature_views INTEGER, + total_features INTEGER, + avg_null_rate DOUBLE PRECISION, + max_null_rate DOUBLE PRECISION, + PRIMARY KEY (project_id, feature_service_name, metric_date, + granularity, data_source_type) + ); + """) + conn.commit() + + def save_feature_metrics(self, metrics: List[Dict[str, Any]]) -> None: + if not metrics: + return + self._upsert( + _FEATURE_METRICS_TABLE, + _FEATURE_METRICS_COLUMNS, + [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", + ], + metrics, + ) + + def save_feature_view_metrics(self, metrics: List[Dict[str, Any]]) -> None: + if not metrics: + return + self._upsert( + _FEATURE_VIEW_METRICS_TABLE, + _FEATURE_VIEW_METRICS_COLUMNS, + [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", + ], + metrics, + ) + + def save_feature_service_metrics(self, metrics: List[Dict[str, Any]]) -> None: + if not metrics: + return + self._upsert( + _FEATURE_SERVICE_METRICS_TABLE, + _FEATURE_SERVICE_METRICS_COLUMNS, + [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", + ], + metrics, + ) + + def get_feature_metrics( + self, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + granularity: Optional[str] = None, + data_source_type: Optional[str] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + return self._query_metrics( + _FEATURE_METRICS_TABLE, + _FEATURE_METRICS_COLUMNS, + project=project, + filters={ + "feature_view_name": feature_view_name, + "feature_name": feature_name, + "granularity": granularity, + "data_source_type": data_source_type, + }, + start_date=start_date, + end_date=end_date, + ) + + def get_feature_view_metrics( + self, + project: str, + feature_view_name: Optional[str] = None, + granularity: Optional[str] = None, + data_source_type: Optional[str] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + return self._query_metrics( + _FEATURE_VIEW_METRICS_TABLE, + _FEATURE_VIEW_METRICS_COLUMNS, + project=project, + filters={ + "feature_view_name": feature_view_name, + "granularity": granularity, + "data_source_type": data_source_type, + }, + start_date=start_date, + end_date=end_date, + ) + + def get_feature_service_metrics( + self, + project: str, + feature_service_name: Optional[str] = None, + granularity: Optional[str] = None, + data_source_type: Optional[str] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + return self._query_metrics( + _FEATURE_SERVICE_METRICS_TABLE, + _FEATURE_SERVICE_METRICS_COLUMNS, + project=project, + filters={ + "feature_service_name": feature_service_name, + "granularity": granularity, + "data_source_type": data_source_type, + }, + start_date=start_date, + end_date=end_date, + ) + + def get_baseline( + self, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> List[Dict[str, Any]]: + return self._query_metrics( + _FEATURE_METRICS_TABLE, + _FEATURE_METRICS_COLUMNS, + project=project, + filters={ + "feature_view_name": feature_view_name, + "feature_name": feature_name, + "data_source_type": data_source_type, + "is_baseline": True, + }, + ) + + def clear_baseline( + self, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + conditions = [sql.SQL("project_id = %s")] + params: list = [project] + + if feature_view_name: + conditions.append(sql.SQL("feature_view_name = %s")) + params.append(feature_view_name) + if feature_name: + conditions.append(sql.SQL("feature_name = %s")) + params.append(feature_name) + if data_source_type: + conditions.append(sql.SQL("data_source_type = %s")) + params.append(data_source_type) + + conditions.append(sql.SQL("is_baseline = TRUE")) + + query = sql.SQL("UPDATE {} SET is_baseline = FALSE WHERE {}").format( + sql.Identifier(_FEATURE_METRICS_TABLE), + sql.SQL(" AND ").join(conditions), + ) + + with _get_conn(self._config) as conn, conn.cursor() as cur: + cur.execute(query, params) + conn.commit() + + # -- Private helpers -- + + def _upsert( + self, + table: str, + columns: List[str], + pk_columns: List[str], + rows: List[Dict[str, Any]], + ) -> None: + non_pk_columns = [c for c in columns if c not in pk_columns] + + col_identifiers = sql.SQL(", ").join(sql.Identifier(c) for c in columns) + placeholders = sql.SQL(", ").join(sql.Placeholder() for _ in columns) + update_clause = sql.SQL(", ").join( + sql.SQL("{} = EXCLUDED.{}").format(sql.Identifier(c), sql.Identifier(c)) + for c in non_pk_columns + ) + pk_identifiers = sql.SQL(", ").join(sql.Identifier(c) for c in pk_columns) + + query = sql.SQL( + "INSERT INTO {} ({}) VALUES ({}) ON CONFLICT ({}) DO UPDATE SET {}" + ).format( + sql.Identifier(table), + col_identifiers, + placeholders, + pk_identifiers, + update_clause, + ) + + with _get_conn(self._config) as conn, conn.cursor() as cur: + for row in rows: + values = [] + for col in columns: + val = row.get(col) + if col == "histogram" and val is not None: + val = json.dumps(val) + values.append(val) + cur.execute(query, values) + conn.commit() + + def _query_metrics( + self, + table: str, + columns: List[str], + project: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + conditions = [sql.SQL("project_id = %s")] + params: list = [project] + + if filters: + for key, value in filters.items(): + if value is not None: + conditions.append(sql.SQL("{} = %s").format(sql.Identifier(key))) + params.append(value) + + if start_date: + conditions.append(sql.SQL("metric_date >= %s")) + params.append(start_date) + if end_date: + conditions.append(sql.SQL("metric_date <= %s")) + params.append(end_date) + + col_identifiers = sql.SQL(", ").join(sql.Identifier(c) for c in columns) + query = sql.SQL("SELECT {} FROM {} WHERE {} ORDER BY metric_date ASC").format( + col_identifiers, + sql.Identifier(table), + sql.SQL(" AND ").join(conditions), + ) + + with _get_conn(self._config) as conn, conn.cursor() as cur: + conn.read_only = True + cur.execute(query, params) + rows = cur.fetchall() + + results = [] + for row in rows: + record = dict(zip(columns, row)) + if "histogram" in record and isinstance(record["histogram"], str): + record["histogram"] = json.loads(record["histogram"]) + if "metric_date" in record and isinstance(record["metric_date"], date): + record["metric_date"] = record["metric_date"].isoformat() + if "computed_at" in record and isinstance(record["computed_at"], datetime): + record["computed_at"] = record["computed_at"].isoformat() + results.append(record) + + return results diff --git a/sdk/python/feast/repo_operations.py b/sdk/python/feast/repo_operations.py index 28fe86602ad..199a5f5503e 100644 --- a/sdk/python/feast/repo_operations.py +++ b/sdk/python/feast/repo_operations.py @@ -1,594 +1,622 @@ -import base64 -import importlib -import json -import logging -import os -import random -import re -import sys -import tempfile -from importlib.abc import Loader -from importlib.machinery import ModuleSpec -from pathlib import Path -from typing import List, Optional, Set, Union - -import click -from click.exceptions import BadParameter - -from feast import PushSource -from feast.batch_feature_view import BatchFeatureView -from feast.constants import FEATURE_STORE_YAML_ENV_NAME -from feast.data_source import DataSource, KafkaSource, KinesisSource -from feast.diff.registry_diff import extract_objects_for_keep_delete_update_add -from feast.entity import Entity -from feast.feature_service import FeatureService -from feast.feature_store import FeatureStore -from feast.feature_view import DUMMY_ENTITY, FeatureView -from feast.file_utils import replace_str_in_file -from feast.infra.registry.base_registry import BaseRegistry -from feast.infra.registry.registry import FEAST_OBJECT_TYPES, FeastObjectType, Registry -from feast.names import adjectives, animals -from feast.on_demand_feature_view import OnDemandFeatureView -from feast.permissions.permission import Permission -from feast.project import Project -from feast.repo_config import RepoConfig -from feast.repo_contents import RepoContents -from feast.stream_feature_view import StreamFeatureView - -logger = logging.getLogger(__name__) - - -def py_path_to_module(path: Path) -> str: - return ( - str(path.relative_to(os.getcwd()))[: -len(".py")] - .replace("./", "") - .replace("/", ".") - .replace("\\", ".") - ) - - -def read_feastignore(repo_root: Path) -> List[str]: - """Read .feastignore in the repo root directory (if exists) and return the list of user-defined ignore paths""" - feast_ignore = repo_root / ".feastignore" - if not feast_ignore.is_file(): - return [] - lines = feast_ignore.read_text().strip().split("\n") - ignore_paths = [] - for line in lines: - # Remove everything after the first occurance of "#" symbol (comments) - if line.find("#") >= 0: - line = line[: line.find("#")] - # Strip leading or ending whitespaces - line = line.strip() - # Add this processed line to ignore_paths if it's not empty - if len(line) > 0: - ignore_paths.append(line) - return ignore_paths - - -def get_ignore_files(repo_root: Path, ignore_paths: List[str]) -> Set[Path]: - """Get all ignore files that match any of the user-defined ignore paths""" - ignore_files = set() - for ignore_path in set(ignore_paths): - # ignore_path may contains matchers (* or **). Use glob() to match user-defined path to actual paths - for matched_path in repo_root.glob(ignore_path): - if matched_path.is_file(): - # If the matched path is a file, add that to ignore_files set - ignore_files.add(matched_path.resolve()) - else: - # Otherwise, list all Python files in that directory and add all of them to ignore_files set - ignore_files |= { - sub_path.resolve() - for sub_path in matched_path.glob("**/*.py") - if sub_path.is_file() - } - return ignore_files - - -def get_repo_files(repo_root: Path) -> List[Path]: - """Get the list of all repo files, ignoring undesired files & directories specified in .feastignore""" - # Read ignore paths from .feastignore and create a set of all files that match any of these paths - ignore_paths = read_feastignore(repo_root) + [ - ".git", - ".feastignore", - ".venv", - "**/.ipynb_checkpoints", - "**/.pytest_cache", - "**/__pycache__", - ] - ignore_files = get_ignore_files(repo_root, ignore_paths) - - # List all Python files in the root directory (recursively) - repo_files = { - p.resolve() - for p in repo_root.glob("**/*.py") - if p.is_file() and "__init__.py" != p.name - } - # Ignore all files that match any of the ignore paths in .feastignore - repo_files -= ignore_files - - # Sort repo_files to read them in the same order every time - return sorted(repo_files) - - -def parse_repo(repo_root: Path) -> RepoContents: - """ - Collects unique Feast object definitions from the given feature repo. - - Specifically, if an object foo has already been added, bar will still be added if - (bar == foo), but not if (bar is foo). This ensures that import statements will - not result in duplicates, but defining two equal objects will. - """ - res = RepoContents( - projects=[], - data_sources=[], - entities=[], - feature_views=[], - feature_services=[], - on_demand_feature_views=[], - stream_feature_views=[], - permissions=[], - ) - - for repo_file in get_repo_files(repo_root): - module_path = py_path_to_module(repo_file) - module = importlib.import_module(module_path) - - for attr_name in dir(module): - obj = getattr(module, attr_name) - - if isinstance(obj, DataSource) and not any( - (obj is ds) for ds in res.data_sources - ): - res.data_sources.append(obj) - - # Handle batch sources defined within stream sources. - if ( - isinstance(obj, PushSource) - or isinstance(obj, KafkaSource) - or isinstance(obj, KinesisSource) - ): - batch_source = obj.batch_source - - if batch_source and not any( - (batch_source is ds) for ds in res.data_sources - ): - res.data_sources.append(batch_source) - if ( - isinstance(obj, FeatureView) - and not any((obj is fv) for fv in res.feature_views) - and not isinstance(obj, StreamFeatureView) - and not isinstance(obj, BatchFeatureView) - ): - res.feature_views.append(obj) - - # Handle batch sources defined with feature views. - batch_source = obj.batch_source - if batch_source is not None and not any( - (batch_source is ds) for ds in res.data_sources - ): - res.data_sources.append(batch_source) - - # Handle stream sources defined with feature views. - if obj.stream_source: - stream_source = obj.stream_source - if not any((stream_source is ds) for ds in res.data_sources): - res.data_sources.append(stream_source) - elif isinstance(obj, StreamFeatureView) and not any( - (obj is sfv) for sfv in res.stream_feature_views - ): - res.stream_feature_views.append(obj) - - # Handle batch sources defined with feature views. - batch_source = obj.batch_source - if batch_source is not None and not any( - (batch_source is ds) for ds in res.data_sources - ): - res.data_sources.append(batch_source) - assert obj.stream_source - stream_source = obj.stream_source - if not any((stream_source is ds) for ds in res.data_sources): - res.data_sources.append(stream_source) - elif isinstance(obj, BatchFeatureView) and not any( - (obj is bfv) for bfv in res.feature_views - ): - res.feature_views.append(obj) - - # Handle batch sources defined with feature views. - batch_source = obj.batch_source - if batch_source is not None and not any( - (batch_source is ds) for ds in res.data_sources - ): - res.data_sources.append(batch_source) - elif isinstance(obj, Entity) and not any( - (obj is entity) for entity in res.entities - ): - res.entities.append(obj) - elif isinstance(obj, FeatureService) and not any( - (obj is fs) for fs in res.feature_services - ): - res.feature_services.append(obj) - elif isinstance(obj, OnDemandFeatureView) and not any( - (obj is odfv) for odfv in res.on_demand_feature_views - ): - res.on_demand_feature_views.append(obj) - elif isinstance(obj, Permission) and not any( - (obj is p) for p in res.permissions - ): - res.permissions.append(obj) - elif isinstance(obj, Project) and not any((obj is p) for p in res.projects): - res.projects.append(obj) - - res.entities.append(DUMMY_ENTITY) - return res - - -def plan( - repo_config: RepoConfig, - repo_path: Path, - skip_source_validation: bool, - skip_feature_view_validation: bool = False, -): - os.chdir(repo_path) - repo = _get_repo_contents(repo_path, repo_config.project, repo_config) - for project in repo.projects: - repo_config.project = project.name - store, registry = _get_store_and_registry(repo_config) - # TODO: When we support multiple projects in a single repo, we should filter repo contents by project - if not skip_source_validation: - provider = store._get_provider() - data_sources = [ - t.batch_source for t in repo.feature_views if t.batch_source is not None - ] - # Make sure the data source used by this feature view is supported by Feast - for data_source in data_sources: - provider.validate_data_source(store.config, data_source) - - registry_diff, infra_diff, _ = store.plan( - repo, skip_feature_view_validation=skip_feature_view_validation - ) - click.echo(registry_diff.to_string()) - click.echo(infra_diff.to_string()) - - -def _get_repo_contents( - repo_path, - project_name: Optional[str] = None, - repo_config: Optional[RepoConfig] = None, -): - sys.dont_write_bytecode = True - repo = parse_repo(repo_path) - - if len(repo.projects) < 1: - if project_name: - print( - f"No project found in the repository. Using project name {project_name} defined in feature_store.yaml" - ) - project_description = ( - repo_config.project_description if repo_config else None - ) - repo.projects.append( - Project(name=project_name, description=project_description or "") - ) - else: - print( - "No project found in the repository. Either define Project in repository or define a project in feature_store.yaml" - ) - sys.exit(1) - elif len(repo.projects) == 1: - if repo.projects[0].name != project_name: - print( - "Project object name should match with the project name defined in feature_store.yaml" - ) - sys.exit(1) - else: - print( - "Multiple projects found in the repository. Currently no support for multiple projects" - ) - sys.exit(1) - - return repo - - -def _get_store_and_registry(repo_config): - store = FeatureStore(config=repo_config) - registry = store.registry - return store, registry - - -def extract_objects_for_apply_delete(project, registry, repo): - # TODO(achals): This code path should be refactored to handle added & kept entities separately. - ( - _, - objs_to_delete, - objs_to_update, - objs_to_add, - ) = extract_objects_for_keep_delete_update_add(registry, project, repo) - - all_to_apply: List[ - Union[ - Entity, - FeatureView, - OnDemandFeatureView, - StreamFeatureView, - FeatureService, - ] - ] = [] - for object_type in FEAST_OBJECT_TYPES: - to_apply = set(objs_to_add[object_type]).union(objs_to_update[object_type]) - all_to_apply.extend(to_apply) - - all_to_delete: List[ - Union[ - Entity, - FeatureView, - OnDemandFeatureView, - StreamFeatureView, - FeatureService, - ] - ] = [] - for object_type in FEAST_OBJECT_TYPES: - all_to_delete.extend(objs_to_delete[object_type]) - - return ( - all_to_apply, - all_to_delete, - set(objs_to_add[FeastObjectType.FEATURE_VIEW]).union( - set(objs_to_update[FeastObjectType.FEATURE_VIEW]) - ), - objs_to_delete[FeastObjectType.FEATURE_VIEW], - ) - - -def apply_total_with_repo_instance( - store: FeatureStore, - project_name: str, - registry: BaseRegistry, - repo: RepoContents, - skip_source_validation: bool, - skip_feature_view_validation: bool = False, - no_promote: bool = False, -): - if not skip_source_validation: - provider = store._get_provider() - data_sources = [ - t.batch_source for t in repo.feature_views if t.batch_source is not None - ] - # Make sure the data source used by this feature view is supported by Feast - for data_source in data_sources: - provider.validate_data_source(store.config, data_source) - - # For each object in the registry, determine whether it should be kept or deleted. - ( - all_to_apply, - all_to_delete, - views_to_keep, - views_to_delete, - ) = extract_objects_for_apply_delete(project_name, registry, repo) - - try: - if store._should_use_plan(): - # Planning phase - compute diffs first without progress bars - registry_diff, infra_diff, new_infra = store.plan( - repo, - skip_feature_view_validation=skip_feature_view_validation, - ) - click.echo(registry_diff.to_string()) - - # Only show progress bars if there are actual infrastructure changes - progress_ctx = None - if len(infra_diff.infra_object_diffs) > 0: - from feast.diff.apply_progress import ApplyProgressContext - - progress_ctx = ApplyProgressContext() - progress_ctx.start_overall_progress() - - # Apply phase - store._apply_diffs( - registry_diff, - infra_diff, - new_infra, - progress_ctx=progress_ctx, - no_promote=no_promote, - ) - click.echo(infra_diff.to_string()) - else: - # Legacy apply path - no progress bars for legacy path - store.apply( - all_to_apply, - objects_to_delete=all_to_delete, - partial=False, - skip_feature_view_validation=skip_feature_view_validation, - no_promote=no_promote, - ) - log_infra_changes(views_to_keep, views_to_delete) - finally: - # Cleanup is handled in the new _apply_diffs method - pass - - -def log_infra_changes( - views_to_keep: Set[FeatureView], views_to_delete: Set[FeatureView] -): - from colorama import Fore, Style - - for view in views_to_keep: - click.echo( - f"Deploying infrastructure for {Style.BRIGHT + Fore.GREEN}{view.name}{Style.RESET_ALL}" - ) - for view in views_to_delete: - click.echo( - f"Removing infrastructure for {Style.BRIGHT + Fore.RED}{view.name}{Style.RESET_ALL}" - ) - - -def create_feature_store( - ctx: click.Context, -) -> FeatureStore: - repo = ctx.obj["CHDIR"] - # If we received a base64 encoded version of feature_store.yaml, use that - config_base64 = os.getenv(FEATURE_STORE_YAML_ENV_NAME) - if config_base64: - print("Received base64 encoded feature_store.yaml") - config_bytes = base64.b64decode(config_base64) - # Create a new unique directory for writing feature_store.yaml - repo_path = Path(tempfile.mkdtemp()) - with open(repo_path / "feature_store.yaml", "wb") as f: - f.write(config_bytes) - return FeatureStore(repo_path=str(repo_path.resolve())) - else: - fs_yaml_file = ctx.obj["FS_YAML_FILE"] - cli_check_repo(repo, fs_yaml_file) - return FeatureStore(repo_path=str(repo), fs_yaml_file=fs_yaml_file) - - -def apply_total( - repo_config: RepoConfig, - repo_path: Path, - skip_source_validation: bool, - skip_feature_view_validation: bool = False, - no_promote: bool = False, -): - os.chdir(repo_path) - repo = _get_repo_contents(repo_path, repo_config.project, repo_config) - for project in repo.projects: - repo_config.project = project.name - store, registry = _get_store_and_registry(repo_config) - if not is_valid_name(project.name): - print( - f"{project.name} is not valid. Project name should only have " - f"alphanumerical values, underscores, and hyphens but not start with an underscore or hyphen." - ) - sys.exit(1) - # TODO: When we support multiple projects in a single repo, we should filter repo contents by project. Currently there is no way to associate Feast objects to project. - print(f"Applying changes for project {project.name}") - apply_total_with_repo_instance( - store, - project.name, - registry, - repo, - skip_source_validation, - skip_feature_view_validation, - no_promote=no_promote, - ) - - -def teardown(repo_config: RepoConfig, repo_path: Optional[str]): - # Cannot pass in both repo_path and repo_config to FeatureStore. - feature_store = FeatureStore(repo_path=repo_path, config=repo_config) - feature_store.teardown() - - -def registry_dump(repo_config: RepoConfig, repo_path: Path) -> str: - """For debugging only: output contents of the metadata registry""" - registry_config = repo_config.registry - project = repo_config.project - registry = Registry( - project, - registry_config=registry_config, - repo_path=repo_path, - auth_config=repo_config.auth_config, - ) - registry_dict = registry.to_dict(project=project) - return json.dumps(registry_dict, indent=2, sort_keys=True) - - -def cli_check_repo(repo_path: Path, fs_yaml_file: Path): - sys.path.append(str(repo_path)) - if not fs_yaml_file.exists(): - print( - f"Can't find feature repo configuration file at {fs_yaml_file}. " - "Make sure you're running feast from an initialized feast repository." - ) - sys.exit(1) - - -def init_repo(repo_name: str, template: str, repo_path: Optional[str] = None): - import os - from pathlib import Path - from shutil import copytree - - from colorama import Fore, Style - - # Validate project name - if not is_valid_name(repo_name): - raise BadParameter( - message="Name should be alphanumeric values, underscores, and hyphens but not start with an underscore or hyphen", - param_hint="PROJECT_DIRECTORY", - ) - - # Determine where to create the repository - if repo_path: - # User specified a custom path - target_path = Path(repo_path).resolve() - target_path.mkdir(parents=True, exist_ok=True) - display_path = repo_path - else: - # Default behavior: create subdirectory with project name - target_path = Path(os.path.join(Path.cwd(), repo_name)) - target_path.mkdir(exist_ok=True) - display_path = repo_name - - repo_config_path = target_path / "feature_store.yaml" - - if repo_config_path.exists(): - print( - f"The directory {Style.BRIGHT + Fore.GREEN}{display_path}{Style.RESET_ALL} contains an existing feature " - f"store repository that may cause a conflict" - ) - print() - sys.exit(1) - - # Copy template directory - template_path = str(Path(Path(__file__).parent / "templates" / template).absolute()) - if not os.path.exists(template_path): - raise IOError(f"Could not find template {template}") - copytree(template_path, str(target_path), dirs_exist_ok=True) - - # Rename gitignore files back to .gitignore - for gitignore_path in target_path.rglob("gitignore"): - gitignore_path.rename(gitignore_path.with_name(".gitignore")) - - # Seed the repository - bootstrap_path = target_path / "bootstrap.py" - if os.path.exists(bootstrap_path): - import importlib.util - - spec = importlib.util.spec_from_file_location("bootstrap", str(bootstrap_path)) - assert isinstance(spec, ModuleSpec) - bootstrap = importlib.util.module_from_spec(spec) - assert isinstance(spec.loader, Loader) - spec.loader.exec_module(bootstrap) - bootstrap.bootstrap() # type: ignore - os.remove(bootstrap_path) - - # Template the feature_store.yaml file - feature_store_yaml_path = target_path / "feature_repo" / "feature_store.yaml" - replace_str_in_file( - feature_store_yaml_path, "project: my_project", f"project: {repo_name}" - ) - - # Remove the __pycache__ folder if it exists - import shutil - - shutil.rmtree(target_path / "__pycache__", ignore_errors=True) - - import click - - click.echo() - click.echo( - f"Creating a new Feast repository in {Style.BRIGHT + Fore.GREEN}{target_path}{Style.RESET_ALL}." - ) - click.echo() - - -def is_valid_name(name: str) -> bool: - """A name should be alphanumeric values, underscores, and hyphens but not start with an underscore""" - return ( - not name.startswith(("_", "-")) and re.compile(r"[^\w-]+").search(name) is None - ) - - -def generate_project_name() -> str: - """Generates a unique project name""" - return f"{random.choice(adjectives)}_{random.choice(animals)}" +import base64 +import importlib +import json +import logging +import os +import random +import re +import sys +import tempfile +from importlib.abc import Loader +from importlib.machinery import ModuleSpec +from pathlib import Path +from typing import List, Optional, Set, Union + +import click +from click.exceptions import BadParameter + +from feast import PushSource +from feast.batch_feature_view import BatchFeatureView +from feast.constants import FEATURE_STORE_YAML_ENV_NAME +from feast.data_source import DataSource, KafkaSource, KinesisSource +from feast.diff.registry_diff import extract_objects_for_keep_delete_update_add +from feast.entity import Entity +from feast.feature_service import FeatureService +from feast.feature_store import FeatureStore +from feast.feature_view import DUMMY_ENTITY, FeatureView +from feast.file_utils import replace_str_in_file +from feast.infra.registry.base_registry import BaseRegistry +from feast.infra.registry.registry import FEAST_OBJECT_TYPES, FeastObjectType, Registry +from feast.names import adjectives, animals +from feast.on_demand_feature_view import OnDemandFeatureView +from feast.permissions.permission import Permission +from feast.project import Project +from feast.repo_config import RepoConfig +from feast.repo_contents import RepoContents +from feast.stream_feature_view import StreamFeatureView + +logger = logging.getLogger(__name__) + + +def py_path_to_module(path: Path) -> str: + return ( + str(path.relative_to(os.getcwd()))[: -len(".py")] + .replace("./", "") + .replace("/", ".") + .replace("\\", ".") + ) + + +def read_feastignore(repo_root: Path) -> List[str]: + """Read .feastignore in the repo root directory (if exists) and return the list of user-defined ignore paths""" + feast_ignore = repo_root / ".feastignore" + if not feast_ignore.is_file(): + return [] + lines = feast_ignore.read_text().strip().split("\n") + ignore_paths = [] + for line in lines: + # Remove everything after the first occurance of "#" symbol (comments) + if line.find("#") >= 0: + line = line[: line.find("#")] + # Strip leading or ending whitespaces + line = line.strip() + # Add this processed line to ignore_paths if it's not empty + if len(line) > 0: + ignore_paths.append(line) + return ignore_paths + + +def get_ignore_files(repo_root: Path, ignore_paths: List[str]) -> Set[Path]: + """Get all ignore files that match any of the user-defined ignore paths""" + ignore_files = set() + for ignore_path in set(ignore_paths): + # ignore_path may contains matchers (* or **). Use glob() to match user-defined path to actual paths + for matched_path in repo_root.glob(ignore_path): + if matched_path.is_file(): + # If the matched path is a file, add that to ignore_files set + ignore_files.add(matched_path.resolve()) + else: + # Otherwise, list all Python files in that directory and add all of them to ignore_files set + ignore_files |= { + sub_path.resolve() + for sub_path in matched_path.glob("**/*.py") + if sub_path.is_file() + } + return ignore_files + + +def get_repo_files(repo_root: Path) -> List[Path]: + """Get the list of all repo files, ignoring undesired files & directories specified in .feastignore""" + # Read ignore paths from .feastignore and create a set of all files that match any of these paths + ignore_paths = read_feastignore(repo_root) + [ + ".git", + ".feastignore", + ".venv", + "**/.ipynb_checkpoints", + "**/.pytest_cache", + "**/__pycache__", + ] + ignore_files = get_ignore_files(repo_root, ignore_paths) + + # List all Python files in the root directory (recursively) + repo_files = { + p.resolve() + for p in repo_root.glob("**/*.py") + if p.is_file() and "__init__.py" != p.name + } + # Ignore all files that match any of the ignore paths in .feastignore + repo_files -= ignore_files + + # Sort repo_files to read them in the same order every time + return sorted(repo_files) + + +def parse_repo(repo_root: Path) -> RepoContents: + """ + Collects unique Feast object definitions from the given feature repo. + + Specifically, if an object foo has already been added, bar will still be added if + (bar == foo), but not if (bar is foo). This ensures that import statements will + not result in duplicates, but defining two equal objects will. + """ + res = RepoContents( + projects=[], + data_sources=[], + entities=[], + feature_views=[], + feature_services=[], + on_demand_feature_views=[], + stream_feature_views=[], + permissions=[], + ) + + for repo_file in get_repo_files(repo_root): + module_path = py_path_to_module(repo_file) + module = importlib.import_module(module_path) + + for attr_name in dir(module): + obj = getattr(module, attr_name) + + if isinstance(obj, DataSource) and not any( + (obj is ds) for ds in res.data_sources + ): + res.data_sources.append(obj) + + # Handle batch sources defined within stream sources. + if ( + isinstance(obj, PushSource) + or isinstance(obj, KafkaSource) + or isinstance(obj, KinesisSource) + ): + batch_source = obj.batch_source + + if batch_source and not any( + (batch_source is ds) for ds in res.data_sources + ): + res.data_sources.append(batch_source) + if ( + isinstance(obj, FeatureView) + and not any((obj is fv) for fv in res.feature_views) + and not isinstance(obj, StreamFeatureView) + and not isinstance(obj, BatchFeatureView) + ): + res.feature_views.append(obj) + + # Handle batch sources defined with feature views. + batch_source = obj.batch_source + if batch_source is not None and not any( + (batch_source is ds) for ds in res.data_sources + ): + res.data_sources.append(batch_source) + + # Handle stream sources defined with feature views. + if obj.stream_source: + stream_source = obj.stream_source + if not any((stream_source is ds) for ds in res.data_sources): + res.data_sources.append(stream_source) + elif isinstance(obj, StreamFeatureView) and not any( + (obj is sfv) for sfv in res.stream_feature_views + ): + res.stream_feature_views.append(obj) + + # Handle batch sources defined with feature views. + batch_source = obj.batch_source + if batch_source is not None and not any( + (batch_source is ds) for ds in res.data_sources + ): + res.data_sources.append(batch_source) + assert obj.stream_source + stream_source = obj.stream_source + if not any((stream_source is ds) for ds in res.data_sources): + res.data_sources.append(stream_source) + elif isinstance(obj, BatchFeatureView) and not any( + (obj is bfv) for bfv in res.feature_views + ): + res.feature_views.append(obj) + + # Handle batch sources defined with feature views. + batch_source = obj.batch_source + if batch_source is not None and not any( + (batch_source is ds) for ds in res.data_sources + ): + res.data_sources.append(batch_source) + elif isinstance(obj, Entity) and not any( + (obj is entity) for entity in res.entities + ): + res.entities.append(obj) + elif isinstance(obj, FeatureService) and not any( + (obj is fs) for fs in res.feature_services + ): + res.feature_services.append(obj) + elif isinstance(obj, OnDemandFeatureView) and not any( + (obj is odfv) for odfv in res.on_demand_feature_views + ): + res.on_demand_feature_views.append(obj) + elif isinstance(obj, Permission) and not any( + (obj is p) for p in res.permissions + ): + res.permissions.append(obj) + elif isinstance(obj, Project) and not any((obj is p) for p in res.projects): + res.projects.append(obj) + + res.entities.append(DUMMY_ENTITY) + return res + + +def plan( + repo_config: RepoConfig, + repo_path: Path, + skip_source_validation: bool, + skip_feature_view_validation: bool = False, +): + os.chdir(repo_path) + repo = _get_repo_contents(repo_path, repo_config.project, repo_config) + for project in repo.projects: + repo_config.project = project.name + store, registry = _get_store_and_registry(repo_config) + # TODO: When we support multiple projects in a single repo, we should filter repo contents by project + if not skip_source_validation: + provider = store._get_provider() + data_sources = [ + t.batch_source for t in repo.feature_views if t.batch_source is not None + ] + # Make sure the data source used by this feature view is supported by Feast + for data_source in data_sources: + provider.validate_data_source(store.config, data_source) + + registry_diff, infra_diff, _ = store.plan( + repo, skip_feature_view_validation=skip_feature_view_validation + ) + click.echo(registry_diff.to_string()) + click.echo(infra_diff.to_string()) + + +def _get_repo_contents( + repo_path, + project_name: Optional[str] = None, + repo_config: Optional[RepoConfig] = None, +): + sys.dont_write_bytecode = True + repo = parse_repo(repo_path) + + if len(repo.projects) < 1: + if project_name: + print( + f"No project found in the repository. Using project name {project_name} defined in feature_store.yaml" + ) + project_description = ( + repo_config.project_description if repo_config else None + ) + repo.projects.append( + Project(name=project_name, description=project_description or "") + ) + else: + print( + "No project found in the repository. Either define Project in repository or define a project in feature_store.yaml" + ) + sys.exit(1) + elif len(repo.projects) == 1: + if repo.projects[0].name != project_name: + print( + "Project object name should match with the project name defined in feature_store.yaml" + ) + sys.exit(1) + else: + print( + "Multiple projects found in the repository. Currently no support for multiple projects" + ) + sys.exit(1) + + return repo + + +def _get_store_and_registry(repo_config): + store = FeatureStore(config=repo_config) + registry = store.registry + return store, registry + + +def extract_objects_for_apply_delete(project, registry, repo): + # TODO(achals): This code path should be refactored to handle added & kept entities separately. + ( + _, + objs_to_delete, + objs_to_update, + objs_to_add, + ) = extract_objects_for_keep_delete_update_add(registry, project, repo) + + all_to_apply: List[ + Union[ + Entity, + FeatureView, + OnDemandFeatureView, + StreamFeatureView, + FeatureService, + ] + ] = [] + for object_type in FEAST_OBJECT_TYPES: + to_apply = set(objs_to_add[object_type]).union(objs_to_update[object_type]) + all_to_apply.extend(to_apply) + + all_to_delete: List[ + Union[ + Entity, + FeatureView, + OnDemandFeatureView, + StreamFeatureView, + FeatureService, + ] + ] = [] + for object_type in FEAST_OBJECT_TYPES: + all_to_delete.extend(objs_to_delete[object_type]) + + return ( + all_to_apply, + all_to_delete, + set(objs_to_add[FeastObjectType.FEATURE_VIEW]).union( + set(objs_to_update[FeastObjectType.FEATURE_VIEW]) + ), + objs_to_delete[FeastObjectType.FEATURE_VIEW], + ) + + +def apply_total_with_repo_instance( + store: FeatureStore, + project_name: str, + registry: BaseRegistry, + repo: RepoContents, + skip_source_validation: bool, + skip_feature_view_validation: bool = False, + no_promote: bool = False, +): + if not skip_source_validation: + provider = store._get_provider() + data_sources = [ + t.batch_source for t in repo.feature_views if t.batch_source is not None + ] + # Make sure the data source used by this feature view is supported by Feast + for data_source in data_sources: + provider.validate_data_source(store.config, data_source) + + # For each object in the registry, determine whether it should be kept or deleted. + ( + all_to_apply, + all_to_delete, + views_to_keep, + views_to_delete, + ) = extract_objects_for_apply_delete(project_name, registry, repo) + + try: + if store._should_use_plan(): + # Planning phase - compute diffs first without progress bars + registry_diff, infra_diff, new_infra = store.plan( + repo, + skip_feature_view_validation=skip_feature_view_validation, + ) + click.echo(registry_diff.to_string()) + + # Only show progress bars if there are actual infrastructure changes + progress_ctx = None + if len(infra_diff.infra_object_diffs) > 0: + from feast.diff.apply_progress import ApplyProgressContext + + progress_ctx = ApplyProgressContext() + progress_ctx.start_overall_progress() + + # Apply phase + store._apply_diffs( + registry_diff, + infra_diff, + new_infra, + progress_ctx=progress_ctx, + no_promote=no_promote, + ) + click.echo(infra_diff.to_string()) + else: + # Legacy apply path - no progress bars for legacy path + store.apply( + all_to_apply, + objects_to_delete=all_to_delete, + partial=False, + skip_feature_view_validation=skip_feature_view_validation, + no_promote=no_promote, + ) + log_infra_changes(views_to_keep, views_to_delete) + finally: + # Cleanup is handled in the new _apply_diffs method + pass + + _submit_baseline_jobs_if_needed(store, project_name, repo) + + +def _submit_baseline_jobs_if_needed(store, project_name, repo): + """Submit async baseline DQM jobs for new features after feast apply.""" + try: + from feast.infra.utils.postgres.postgres_config import PostgreSQLConfig + + if not isinstance(store.config.offline_store, PostgreSQLConfig): + return + + from feast.monitoring.monitoring_service import MonitoringService + + svc = MonitoringService(store) + feature_views = list(repo.feature_views) + if not feature_views: + return + + job_ids = svc.submit_baseline_for_new_features( + project=project_name, feature_views=feature_views + ) + for job_id in job_ids: + click.echo(f" → Queued baseline metrics computation (DQM job: {job_id})") + except Exception: + logging.getLogger(__name__).debug( + "Monitoring baseline submission skipped (non-critical)", exc_info=True + ) + + +def log_infra_changes( + views_to_keep: Set[FeatureView], views_to_delete: Set[FeatureView] +): + from colorama import Fore, Style + + for view in views_to_keep: + click.echo( + f"Deploying infrastructure for {Style.BRIGHT + Fore.GREEN}{view.name}{Style.RESET_ALL}" + ) + for view in views_to_delete: + click.echo( + f"Removing infrastructure for {Style.BRIGHT + Fore.RED}{view.name}{Style.RESET_ALL}" + ) + + +def create_feature_store( + ctx: click.Context, +) -> FeatureStore: + repo = ctx.obj["CHDIR"] + # If we received a base64 encoded version of feature_store.yaml, use that + config_base64 = os.getenv(FEATURE_STORE_YAML_ENV_NAME) + if config_base64: + print("Received base64 encoded feature_store.yaml") + config_bytes = base64.b64decode(config_base64) + # Create a new unique directory for writing feature_store.yaml + repo_path = Path(tempfile.mkdtemp()) + with open(repo_path / "feature_store.yaml", "wb") as f: + f.write(config_bytes) + return FeatureStore(repo_path=str(repo_path.resolve())) + else: + fs_yaml_file = ctx.obj["FS_YAML_FILE"] + cli_check_repo(repo, fs_yaml_file) + return FeatureStore(repo_path=str(repo), fs_yaml_file=fs_yaml_file) + + +def apply_total( + repo_config: RepoConfig, + repo_path: Path, + skip_source_validation: bool, + skip_feature_view_validation: bool = False, + no_promote: bool = False, +): + os.chdir(repo_path) + repo = _get_repo_contents(repo_path, repo_config.project, repo_config) + for project in repo.projects: + repo_config.project = project.name + store, registry = _get_store_and_registry(repo_config) + if not is_valid_name(project.name): + print( + f"{project.name} is not valid. Project name should only have " + f"alphanumerical values, underscores, and hyphens but not start with an underscore or hyphen." + ) + sys.exit(1) + # TODO: When we support multiple projects in a single repo, we should filter repo contents by project. Currently there is no way to associate Feast objects to project. + print(f"Applying changes for project {project.name}") + apply_total_with_repo_instance( + store, + project.name, + registry, + repo, + skip_source_validation, + skip_feature_view_validation, + no_promote=no_promote, + ) + + +def teardown(repo_config: RepoConfig, repo_path: Optional[str]): + # Cannot pass in both repo_path and repo_config to FeatureStore. + feature_store = FeatureStore(repo_path=repo_path, config=repo_config) + feature_store.teardown() + + +def registry_dump(repo_config: RepoConfig, repo_path: Path) -> str: + """For debugging only: output contents of the metadata registry""" + registry_config = repo_config.registry + project = repo_config.project + registry = Registry( + project, + registry_config=registry_config, + repo_path=repo_path, + auth_config=repo_config.auth_config, + ) + registry_dict = registry.to_dict(project=project) + return json.dumps(registry_dict, indent=2, sort_keys=True) + + +def cli_check_repo(repo_path: Path, fs_yaml_file: Path): + sys.path.append(str(repo_path)) + if not fs_yaml_file.exists(): + print( + f"Can't find feature repo configuration file at {fs_yaml_file}. " + "Make sure you're running feast from an initialized feast repository." + ) + sys.exit(1) + + +def init_repo(repo_name: str, template: str, repo_path: Optional[str] = None): + import os + from pathlib import Path + from shutil import copytree + + from colorama import Fore, Style + + # Validate project name + if not is_valid_name(repo_name): + raise BadParameter( + message="Name should be alphanumeric values, underscores, and hyphens but not start with an underscore or hyphen", + param_hint="PROJECT_DIRECTORY", + ) + + # Determine where to create the repository + if repo_path: + # User specified a custom path + target_path = Path(repo_path).resolve() + target_path.mkdir(parents=True, exist_ok=True) + display_path = repo_path + else: + # Default behavior: create subdirectory with project name + target_path = Path(os.path.join(Path.cwd(), repo_name)) + target_path.mkdir(exist_ok=True) + display_path = repo_name + + repo_config_path = target_path / "feature_store.yaml" + + if repo_config_path.exists(): + print( + f"The directory {Style.BRIGHT + Fore.GREEN}{display_path}{Style.RESET_ALL} contains an existing feature " + f"store repository that may cause a conflict" + ) + print() + sys.exit(1) + + # Copy template directory + template_path = str(Path(Path(__file__).parent / "templates" / template).absolute()) + if not os.path.exists(template_path): + raise IOError(f"Could not find template {template}") + copytree(template_path, str(target_path), dirs_exist_ok=True) + + # Rename gitignore files back to .gitignore + for gitignore_path in target_path.rglob("gitignore"): + gitignore_path.rename(gitignore_path.with_name(".gitignore")) + + # Seed the repository + bootstrap_path = target_path / "bootstrap.py" + if os.path.exists(bootstrap_path): + import importlib.util + + spec = importlib.util.spec_from_file_location("bootstrap", str(bootstrap_path)) + assert isinstance(spec, ModuleSpec) + bootstrap = importlib.util.module_from_spec(spec) + assert isinstance(spec.loader, Loader) + spec.loader.exec_module(bootstrap) + bootstrap.bootstrap() # type: ignore + os.remove(bootstrap_path) + + # Template the feature_store.yaml file + feature_store_yaml_path = target_path / "feature_repo" / "feature_store.yaml" + replace_str_in_file( + feature_store_yaml_path, "project: my_project", f"project: {repo_name}" + ) + + # Remove the __pycache__ folder if it exists + import shutil + + shutil.rmtree(target_path / "__pycache__", ignore_errors=True) + + import click + + click.echo() + click.echo( + f"Creating a new Feast repository in {Style.BRIGHT + Fore.GREEN}{target_path}{Style.RESET_ALL}." + ) + click.echo() + + +def is_valid_name(name: str) -> bool: + """A name should be alphanumeric values, underscores, and hyphens but not start with an underscore""" + return ( + not name.startswith(("_", "-")) and re.compile(r"[^\w-]+").search(name) is None + ) + + +def generate_project_name() -> str: + """Generates a unique project name""" + return f"{random.choice(adjectives)}_{random.choice(animals)}" diff --git a/sdk/python/tests/integration/monitoring/__init__.py b/sdk/python/tests/integration/monitoring/__init__.py new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/sdk/python/tests/integration/monitoring/__init__.py @@ -0,0 +1 @@ + diff --git a/sdk/python/tests/integration/monitoring/test_monitoring_integration.py b/sdk/python/tests/integration/monitoring/test_monitoring_integration.py new file mode 100644 index 00000000000..1d0da72de1b --- /dev/null +++ b/sdk/python/tests/integration/monitoring/test_monitoring_integration.py @@ -0,0 +1,804 @@ +"""Integration tests for the monitoring feature. + +Tests cover: +- Auto-compute (all granularities from source timestamps) +- Compute baseline (idempotent) +- Transient compute +- DQM job lifecycle +- CLI commands +- REST API endpoints +- RBAC enforcement +- Compute engine dispatch (SQL push-down vs Python fallback) +""" + +from datetime import date, datetime, timezone +from unittest.mock import MagicMock, patch + +import pyarrow as pa +import pytest +from click.testing import CliRunner + +from feast.monitoring.monitoring_service import VALID_GRANULARITIES, MonitoringService +from feast.types import PrimitiveFeastType + +# ------------------------------------------------------------------ # +# Shared helpers +# ------------------------------------------------------------------ # + + +def _mock_pg_conn(): + mock_conn = MagicMock() + mock_cursor = MagicMock() + mock_conn.cursor.return_value.__enter__ = MagicMock(return_value=mock_cursor) + mock_conn.cursor.return_value.__exit__ = MagicMock(return_value=False) + mock_conn.__enter__ = MagicMock(return_value=mock_conn) + mock_conn.__exit__ = MagicMock(return_value=False) + return mock_conn, mock_cursor + + +def _make_feature_field(name, dtype): + field = MagicMock() + field.name = name + field.dtype = dtype + return field + + +def _make_feature_view(name, features, entities=None, batch_source=None): + fv = MagicMock() + fv.name = name + fv.features = features + fv.entities = entities or [] + if batch_source is None: + batch_source = MagicMock() + batch_source.timestamp_field = "event_timestamp" + batch_source.created_timestamp_column = "" + fv.batch_source = batch_source + return fv + + +def _make_feature_service(name, fv_names): + fs = MagicMock() + fs.name = name + fs.feature_view_projections = [MagicMock(name=n) for n in fv_names] + for proj, n in zip(fs.feature_view_projections, fv_names): + proj.name = n + return fs + + +def _make_mock_store(feature_views, feature_services=None): + """Create a mock FeatureStore with offline store that uses Python fallback.""" + store = MagicMock() + store.project = "test_project" + store.config.project = "test_project" + store.config.offline_store = MagicMock() + + store.registry.list_feature_views.return_value = feature_views + store.registry.list_entities.return_value = [] + store.registry.list_feature_services.return_value = feature_services or [] + + if feature_views: + store.registry.get_feature_view.return_value = feature_views[0] + + if feature_services: + store.registry.get_feature_service.return_value = feature_services[0] + + arrow_table = pa.table( + { + "conv_rate": [0.1, 0.5, 0.9, 0.3, 0.7], + "acc_rate": [0.8, 0.6, 0.4, 0.9, 0.2], + "city": ["NYC", "LA", "NYC", "SF", "LA"], + "event_timestamp": [ + datetime(2025, 3, 25, tzinfo=timezone.utc), + datetime(2025, 3, 26, tzinfo=timezone.utc), + datetime(2025, 3, 26, tzinfo=timezone.utc), + datetime(2025, 3, 27, tzinfo=timezone.utc), + datetime(2025, 3, 27, tzinfo=timezone.utc), + ], + } + ) + + mock_retrieval = MagicMock() + mock_retrieval.to_arrow.return_value = arrow_table + + provider = MagicMock() + provider.offline_store.pull_all_from_table_or_query.return_value = mock_retrieval + provider.offline_store.compute_monitoring_metrics.side_effect = NotImplementedError + provider.offline_store.get_monitoring_max_timestamp.side_effect = ( + NotImplementedError + ) + + # Storage methods: no-op by default (save does nothing, query returns []) + provider.offline_store.ensure_monitoring_tables.return_value = None + provider.offline_store.save_monitoring_metrics.return_value = None + provider.offline_store.query_monitoring_metrics.return_value = [] + provider.offline_store.clear_monitoring_baseline.return_value = None + + store._get_provider.return_value = provider + + return store + + +# ------------------------------------------------------------------ # +# Test: Auto-compute +# ------------------------------------------------------------------ # + + +class TestAutoCompute: + def test_auto_compute_all_granularities(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + result = svc.auto_compute(project="test_project") + + assert result["status"] == "completed" + assert result["computed_feature_views"] == 1 + assert len(result["granularities"]) == len(VALID_GRANULARITIES) + for g in VALID_GRANULARITIES: + assert g in result["granularities"] + + provider = store._get_provider.return_value + provider.offline_store.save_monitoring_metrics.assert_called() + + def test_auto_compute_specific_view(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + result = svc.auto_compute( + project="test_project", + feature_view_name="driver_stats", + ) + + assert result["status"] == "completed" + assert result["computed_feature_views"] == 1 + + +# ------------------------------------------------------------------ # +# Test: Compute baseline +# ------------------------------------------------------------------ # + + +class TestComputeBaseline: + def test_compute_baseline_for_new_features(self): + fv = _make_feature_view( + "driver_stats", + [ + _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64), + _make_feature_field("city", PrimitiveFeastType.STRING), + ], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + result = svc.compute_baseline(project="test_project") + + assert result["status"] == "completed" + assert result["is_baseline"] is True + assert result["computed_features"] == 2 + + provider = store._get_provider.return_value + provider.offline_store.clear_monitoring_baseline.assert_called() + provider.offline_store.save_monitoring_metrics.assert_called() + + def test_baseline_idempotent_skips_existing(self): + fv = _make_feature_view( + "driver_stats", + [ + _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64), + _make_feature_field("acc_rate", PrimitiveFeastType.FLOAT64), + ], + ) + store = _make_mock_store([fv]) + + # Simulate conv_rate already has baseline via query_monitoring_metrics + provider = store._get_provider.return_value + provider.offline_store.query_monitoring_metrics.return_value = [ + { + "project_id": "test_project", + "feature_view_name": "driver_stats", + "feature_name": "conv_rate", + "metric_date": "2025-01-01", + "granularity": "daily", + "data_source_type": "batch", + "computed_at": datetime.now(timezone.utc).isoformat(), + "is_baseline": True, + "feature_type": "numeric", + "row_count": 100, + "null_count": 0, + "null_rate": 0.0, + "mean": 5.0, + "stddev": 1.0, + "min_val": 0.0, + "max_val": 10.0, + "p50": 5.0, + "p75": 7.5, + "p90": 9.0, + "p95": 9.5, + "p99": 9.9, + "histogram": None, + }, + ] + + svc = MonitoringService(store) + result = svc.compute_baseline(project="test_project") + + # Only acc_rate should be computed (conv_rate already has baseline) + assert result["computed_features"] == 1 + + +# ------------------------------------------------------------------ # +# Test: Transient compute +# ------------------------------------------------------------------ # + + +class TestTransientCompute: + def test_transient_returns_metrics_without_saving(self): + fv = _make_feature_view( + "driver_stats", + [ + _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64), + _make_feature_field("city", PrimitiveFeastType.STRING), + ], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + result = svc.compute_transient( + project="test_project", + feature_view_name="driver_stats", + start_date=date(2025, 1, 1), + end_date=date(2025, 1, 15), + ) + + assert result["status"] == "completed" + assert result["start_date"] == "2025-01-01" + assert result["end_date"] == "2025-01-15" + assert len(result["metrics"]) == 2 + + # Transient should NOT call save + provider = store._get_provider.return_value + provider.offline_store.save_monitoring_metrics.assert_not_called() + + def test_transient_empty_features(self): + fv = _make_feature_view( + "fv", + [_make_feature_field("ts", PrimitiveFeastType.UNIX_TIMESTAMP)], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + result = svc.compute_transient( + project="test_project", + feature_view_name="fv", + ) + assert result["metrics"] == [] + + +# ------------------------------------------------------------------ # +# Test: DQM Job Manager +# ------------------------------------------------------------------ # + + +class TestDQMJobManager: + @patch("feast.monitoring.dqm_job_manager._get_conn") + def test_submit_and_get_job(self, mock_get_conn): + mock_conn, mock_cursor = _mock_pg_conn() + mock_get_conn.return_value = mock_conn + + from feast.monitoring.dqm_job_manager import DQMJobManager + + mgr = DQMJobManager(MagicMock()) + job_id = mgr.submit( + project="test_project", + job_type="auto_compute", + feature_view_name="driver_stats", + ) + + assert job_id is not None + assert len(job_id) == 36 # UUID format + mock_cursor.execute.assert_called_once() + + @patch("feast.monitoring.dqm_job_manager._get_conn") + def test_update_status(self, mock_get_conn): + mock_conn, mock_cursor = _mock_pg_conn() + mock_get_conn.return_value = mock_conn + + from feast.monitoring.dqm_job_manager import JOB_STATUS_RUNNING, DQMJobManager + + mgr = DQMJobManager(MagicMock()) + mgr.update_status("test-job-id", JOB_STATUS_RUNNING) + + mock_cursor.execute.assert_called_once() + mock_conn.commit.assert_called_once() + + +# ------------------------------------------------------------------ # +# Test: CLI +# ------------------------------------------------------------------ # + + +class TestComputeMetricsCLI: + def test_help(self): + from feast.cli.monitor import monitor_cmd + + runner = CliRunner() + result = runner.invoke(monitor_cmd, ["run", "--help"]) + assert result.exit_code == 0 + assert "--granularity" in result.output + assert "--set-baseline" in result.output + assert "--feature-view" in result.output + + @patch("feast.cli.monitor.create_feature_store") + @patch("feast.monitoring.monitoring_service.MonitoringService.auto_compute") + def test_run_auto_mode(self, mock_auto, mock_create_store): + from feast.cli.monitor import monitor_cmd + + mock_store = MagicMock() + mock_store.project = "proj" + mock_create_store.return_value = mock_store + + mock_auto.return_value = { + "status": "completed", + "computed_feature_views": 2, + "computed_features": 5, + "granularities": list(VALID_GRANULARITIES), + "duration_ms": 1200, + } + + runner = CliRunner() + result = runner.invoke(monitor_cmd, ["run"]) + + assert result.exit_code == 0 + assert "Auto-computing" in result.output + assert "Features computed: 5" in result.output + mock_auto.assert_called_once() + + @patch("feast.cli.monitor.create_feature_store") + @patch("feast.monitoring.monitoring_service.MonitoringService.compute_metrics") + def test_run_explicit_granularity(self, mock_compute, mock_create_store): + from feast.cli.monitor import monitor_cmd + + mock_store = MagicMock() + mock_store.project = "proj" + mock_create_store.return_value = mock_store + + mock_compute.return_value = { + "status": "completed", + "granularity": "weekly", + "computed_features": 3, + "computed_feature_views": 1, + "computed_feature_services": 1, + "metric_dates": ["2025-01-01"], + "duration_ms": 500, + } + + runner = CliRunner() + result = runner.invoke( + monitor_cmd, + [ + "run", + "--granularity", + "weekly", + "--start-date", + "2025-01-01", + "--end-date", + "2025-01-07", + ], + ) + + assert result.exit_code == 0 + assert "Granularity: weekly" in result.output + + +# ------------------------------------------------------------------ # +# Test: REST API +# ------------------------------------------------------------------ # + + +class TestRESTEndpoints: + @pytest.fixture + def app(self): + from fastapi import FastAPI + from fastapi.testclient import TestClient + + from feast.api.registry.rest.monitoring import get_monitoring_router + + mock_handler = MagicMock() + mock_server = MagicMock() + + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + mock_server.store = _make_mock_store([fv]) + + app = FastAPI() + app.include_router(get_monitoring_router(mock_handler, mock_server)) + + return TestClient(app), mock_server + + @patch("feast.monitoring.dqm_job_manager._get_conn") + @patch("feast.api.registry.rest.monitoring.assert_permissions") + def test_auto_compute_endpoint(self, mock_perms, mock_job_conn, app): + client, _ = app + + mock_job_conn_val, mock_job_cursor = _mock_pg_conn() + mock_job_conn.return_value = mock_job_conn_val + mock_job_cursor.fetchone.return_value = ( + "test-job-id", + "test_project", + None, + "auto_compute", + "pending", + None, + datetime.now(timezone.utc), + None, + None, + None, + None, + ) + + response = client.post( + "/monitoring/auto_compute", + json={"project": "test_project"}, + ) + + assert response.status_code == 200 + data = response.json() + assert data["status"] == "completed" + assert "job_id" in data + + @patch("feast.api.registry.rest.monitoring.assert_permissions") + def test_transient_compute_endpoint(self, mock_perms, app): + client, _ = app + + response = client.post( + "/monitoring/compute/transient", + json={ + "project": "test_project", + "feature_view_name": "driver_stats", + "start_date": "2025-01-05", + "end_date": "2025-01-20", + }, + ) + + assert response.status_code == 200 + data = response.json() + assert data["status"] == "completed" + assert len(data["metrics"]) >= 1 + + @patch("feast.api.registry.rest.monitoring.assert_permissions") + def test_get_metrics_with_granularity(self, mock_perms, app): + client, _ = app + + response = client.get( + "/monitoring/metrics/features", + params={"project": "test_project", "granularity": "weekly"}, + ) + + assert response.status_code == 200 + + @patch("feast.api.registry.rest.monitoring.assert_permissions") + def test_get_timeseries(self, mock_perms, app): + client, _ = app + + response = client.get( + "/monitoring/metrics/timeseries", + params={ + "project": "test_project", + "feature_view_name": "driver_stats", + "granularity": "daily", + }, + ) + + assert response.status_code == 200 + + +# ------------------------------------------------------------------ # +# Test: RBAC enforcement +# ------------------------------------------------------------------ # + + +class TestRBACEnforcement: + @pytest.fixture + def app(self): + from fastapi import FastAPI + from fastapi.testclient import TestClient + + from feast.api.registry.rest.monitoring import get_monitoring_router + + mock_handler = MagicMock() + mock_server = MagicMock() + + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + mock_server.store = _make_mock_store([fv]) + + app = FastAPI() + app.include_router(get_monitoring_router(mock_handler, mock_server)) + + return TestClient(app), mock_server + + @patch("feast.monitoring.dqm_job_manager._get_conn") + @patch("feast.api.registry.rest.monitoring.assert_permissions") + def test_compute_requires_update(self, mock_perms, mock_job_conn, app): + client, _ = app + mock_conn, _ = _mock_pg_conn() + mock_job_conn.return_value = mock_conn + + from feast.permissions.action import AuthzedAction + + client.post( + "/monitoring/compute", + json={ + "project": "test_project", + "feature_view_name": "driver_stats", + }, + ) + + mock_perms.assert_called() + call_args = mock_perms.call_args + assert AuthzedAction.UPDATE in call_args.kwargs.get( + "actions", call_args[1].get("actions", []) + ) + + @patch("feast.api.registry.rest.monitoring.assert_permissions") + def test_transient_requires_describe(self, mock_perms, app): + client, _ = app + + from feast.permissions.action import AuthzedAction + + client.post( + "/monitoring/compute/transient", + json={ + "project": "test_project", + "feature_view_name": "driver_stats", + }, + ) + + mock_perms.assert_called() + call_args = mock_perms.call_args + assert AuthzedAction.DESCRIBE in call_args.kwargs.get( + "actions", call_args[1].get("actions", []) + ) + + @patch("feast.api.registry.rest.monitoring.assert_permissions") + def test_read_requires_describe(self, mock_perms, app): + client, _ = app + + from feast.permissions.action import AuthzedAction + + client.get( + "/monitoring/metrics/features", + params={"project": "test_project", "feature_view_name": "driver_stats"}, + ) + + mock_perms.assert_called() + call_args = mock_perms.call_args + assert AuthzedAction.DESCRIBE in call_args.kwargs.get( + "actions", call_args[1].get("actions", []) + ) + + +# ------------------------------------------------------------------ # +# Test: SQL push-down dispatch +# ------------------------------------------------------------------ # + + +class TestComputeEngineDispatch: + """Verify that MonitoringService prefers SQL push-down and falls back + to Python-based computation when the offline store doesn't support it.""" + + def _make_store_with_pushdown(self, pushdown_result): + """Create a mock store where the offline store supports push-down.""" + fv = _make_feature_view( + "driver_stats", + [ + _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64), + _make_feature_field("city", PrimitiveFeastType.STRING), + ], + ) + store = _make_mock_store([fv]) + provider = store._get_provider.return_value + provider.offline_store.compute_monitoring_metrics.side_effect = None + provider.offline_store.compute_monitoring_metrics.return_value = pushdown_result + provider.offline_store.get_monitoring_max_timestamp.side_effect = None + provider.offline_store.get_monitoring_max_timestamp.return_value = datetime( + 2025, 3, 27, tzinfo=timezone.utc + ) + return store, fv + + def test_uses_sql_pushdown_when_available(self): + """When the offline store supports compute_monitoring_metrics, + pull_all_from_table_or_query should NOT be called.""" + sql_result = [ + { + "feature_name": "conv_rate", + "feature_type": "numeric", + "row_count": 100, + "null_count": 2, + "null_rate": 0.02, + "mean": 0.5, + "stddev": 0.2, + "min_val": 0.0, + "max_val": 1.0, + "p50": 0.5, + "p75": 0.75, + "p90": 0.9, + "p95": 0.95, + "p99": 0.99, + "histogram": { + "bins": [0.0, 0.5, 1.0], + "counts": [50, 50], + "bin_width": 0.5, + }, + }, + ] + store, _ = self._make_store_with_pushdown(sql_result) + svc = MonitoringService(store) + + result = svc.compute_transient( + project="test_project", + feature_view_name="driver_stats", + feature_names=["conv_rate"], + start_date=date(2025, 1, 1), + end_date=date(2025, 1, 15), + ) + + assert result["status"] == "completed" + assert len(result["metrics"]) == 1 + assert result["metrics"][0]["mean"] == 0.5 + + provider = store._get_provider.return_value + provider.offline_store.compute_monitoring_metrics.assert_called_once() + provider.offline_store.pull_all_from_table_or_query.assert_not_called() + + def test_falls_back_to_python_when_not_supported(self): + """When compute_monitoring_metrics raises NotImplementedError, + the service falls back to pulling data + Python compute.""" + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + + svc = MonitoringService(store) + result = svc.compute_transient( + project="test_project", + feature_view_name="driver_stats", + start_date=date(2025, 1, 1), + end_date=date(2025, 1, 15), + ) + + assert result["status"] == "completed" + assert len(result["metrics"]) == 1 + assert result["metrics"][0]["feature_name"] == "conv_rate" + + provider = store._get_provider.return_value + provider.offline_store.pull_all_from_table_or_query.assert_called() + + def test_auto_compute_uses_pushdown_for_max_timestamp(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + provider = store._get_provider.return_value + + provider.offline_store.get_monitoring_max_timestamp.side_effect = None + provider.offline_store.get_monitoring_max_timestamp.return_value = datetime( + 2025, 3, 27, tzinfo=timezone.utc + ) + provider.offline_store.compute_monitoring_metrics.side_effect = None + provider.offline_store.compute_monitoring_metrics.return_value = [ + { + "feature_name": "conv_rate", + "feature_type": "numeric", + "row_count": 5, + "null_count": 0, + "null_rate": 0.0, + "mean": 0.5, + "stddev": 0.2, + "min_val": 0.1, + "max_val": 0.9, + "p50": 0.5, + "p75": 0.7, + "p90": 0.9, + "p95": 0.9, + "p99": 0.9, + "histogram": None, + }, + ] + + svc = MonitoringService(store) + result = svc.auto_compute(project="test_project") + + assert result["status"] == "completed" + provider.offline_store.get_monitoring_max_timestamp.assert_called() + provider.offline_store.compute_monitoring_metrics.assert_called() + provider.offline_store.pull_all_from_table_or_query.assert_not_called() + + +# ------------------------------------------------------------------ # +# Test: Native storage dispatch +# ------------------------------------------------------------------ # + + +class TestNativeStorageDispatch: + """Verify that MonitoringService uses OfflineStore for all storage + operations (save, query, clear_baseline, ensure_tables).""" + + def test_save_goes_through_offline_store(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + svc.compute_metrics( + project="test_project", + granularity="daily", + ) + + provider = store._get_provider.return_value + provider.offline_store.ensure_monitoring_tables.assert_called() + provider.offline_store.save_monitoring_metrics.assert_called() + + save_calls = provider.offline_store.save_monitoring_metrics.call_args_list + metric_types_saved = {c[0][1] for c in save_calls} + assert "feature" in metric_types_saved + assert "feature_view" in metric_types_saved + + def test_query_goes_through_offline_store(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + svc.get_feature_metrics(project="test_project", granularity="daily") + + provider = store._get_provider.return_value + provider.offline_store.query_monitoring_metrics.assert_called() + call_args = provider.offline_store.query_monitoring_metrics.call_args + assert call_args[1]["metric_type"] == "feature" + + def test_baseline_clear_goes_through_offline_store(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + svc.compute_baseline(project="test_project") + + provider = store._get_provider.return_value + provider.offline_store.clear_monitoring_baseline.assert_called() + + def test_transient_does_not_save(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + store = _make_mock_store([fv]) + svc = MonitoringService(store) + + svc.compute_transient( + project="test_project", + feature_view_name="driver_stats", + ) + + provider = store._get_provider.return_value + provider.offline_store.save_monitoring_metrics.assert_not_called() diff --git a/sdk/python/tests/unit/monitoring/__init__.py b/sdk/python/tests/unit/monitoring/__init__.py new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/sdk/python/tests/unit/monitoring/__init__.py @@ -0,0 +1 @@ + diff --git a/sdk/python/tests/unit/monitoring/test_metrics_calculator.py b/sdk/python/tests/unit/monitoring/test_metrics_calculator.py new file mode 100644 index 00000000000..f322ec04261 --- /dev/null +++ b/sdk/python/tests/unit/monitoring/test_metrics_calculator.py @@ -0,0 +1,169 @@ +import pyarrow as pa +import pytest + +from feast.monitoring.metrics_calculator import MetricsCalculator +from feast.types import PrimitiveFeastType + + +def _make_calc(bins=20, top_n=10): + return MetricsCalculator(histogram_bins=bins, top_n=top_n) + + +class TestClassifyFeature: + @pytest.mark.parametrize( + "dtype, expected", + [ + (PrimitiveFeastType.INT32, "numeric"), + (PrimitiveFeastType.INT64, "numeric"), + (PrimitiveFeastType.FLOAT32, "numeric"), + (PrimitiveFeastType.FLOAT64, "numeric"), + (PrimitiveFeastType.STRING, "categorical"), + (PrimitiveFeastType.BOOL, "categorical"), + (PrimitiveFeastType.BYTES, None), + (PrimitiveFeastType.UNIX_TIMESTAMP, None), + ], + ) + def test_classification(self, dtype, expected): + assert MetricsCalculator.classify_feature(dtype) == expected + + +class TestComputeNumeric: + def test_basic_stats(self): + calc = _make_calc() + arr = pa.array([1.0, 2.0, 3.0, 4.0, 5.0]) + result = calc.compute_numeric(arr) + + assert result["feature_type"] == "numeric" + assert result["row_count"] == 5 + assert result["null_count"] == 0 + assert result["null_rate"] == 0.0 + assert result["mean"] == pytest.approx(3.0) + assert result["min_val"] == 1.0 + assert result["max_val"] == 5.0 + assert result["p50"] is not None + assert result["histogram"] is not None + assert "bins" in result["histogram"] + assert "counts" in result["histogram"] + + def test_with_nulls(self): + calc = _make_calc() + arr = pa.array([1.0, None, 3.0, None, 5.0]) + result = calc.compute_numeric(arr) + + assert result["row_count"] == 5 + assert result["null_count"] == 2 + assert result["null_rate"] == pytest.approx(0.4) + assert result["mean"] == pytest.approx(3.0) + + def test_all_nulls(self): + calc = _make_calc() + arr = pa.array([None, None, None], type=pa.float64()) + result = calc.compute_numeric(arr) + + assert result["null_count"] == 3 + assert result["mean"] is None + assert result["histogram"] is None + + def test_empty_array(self): + calc = _make_calc() + arr = pa.array([], type=pa.float64()) + result = calc.compute_numeric(arr) + + assert result["row_count"] == 0 + assert result["null_rate"] == 0.0 + + def test_single_value(self): + calc = _make_calc() + arr = pa.array([42.0]) + result = calc.compute_numeric(arr) + + assert result["mean"] == 42.0 + assert result["min_val"] == 42.0 + assert result["max_val"] == 42.0 + + def test_histogram_bin_count(self): + calc = _make_calc(bins=5) + arr = pa.array(list(range(100)), type=pa.float64()) + result = calc.compute_numeric(arr) + + assert len(result["histogram"]["counts"]) == 5 + assert len(result["histogram"]["bins"]) == 6 + + def test_percentiles_order(self): + calc = _make_calc() + arr = pa.array(list(range(1000)), type=pa.float64()) + result = calc.compute_numeric(arr) + + assert result["p50"] <= result["p75"] + assert result["p75"] <= result["p90"] + assert result["p90"] <= result["p95"] + assert result["p95"] <= result["p99"] + + +class TestComputeCategorical: + def test_basic(self): + calc = _make_calc() + arr = pa.array(["a", "b", "a", "c", "a", "b"]) + result = calc.compute_categorical(arr) + + assert result["feature_type"] == "categorical" + assert result["row_count"] == 6 + assert result["null_count"] == 0 + assert result["histogram"] is not None + assert result["histogram"]["unique_count"] == 3 + + top_values = {v["value"] for v in result["histogram"]["values"]} + assert "a" in top_values + + def test_with_nulls(self): + calc = _make_calc() + arr = pa.array(["a", None, "b", None]) + result = calc.compute_categorical(arr) + + assert result["null_count"] == 2 + assert result["null_rate"] == 0.5 + + def test_high_cardinality(self): + calc = _make_calc(top_n=3) + arr = pa.array([f"val_{i}" for i in range(100)]) + result = calc.compute_categorical(arr) + + assert len(result["histogram"]["values"]) == 3 + assert result["histogram"]["unique_count"] == 100 + assert result["histogram"]["other_count"] == 97 + + def test_all_nulls(self): + calc = _make_calc() + arr = pa.array([None, None], type=pa.string()) + result = calc.compute_categorical(arr) + + assert result["null_count"] == 2 + assert result["histogram"] is None + + +class TestComputeAll: + def test_mixed_features(self): + calc = _make_calc() + table = pa.table( + { + "age": [25, 30, 35, 40], + "city": ["NYC", "LA", "NYC", "SF"], + } + ) + fields = [("age", "numeric"), ("city", "categorical")] + results = calc.compute_all(table, fields) + + assert len(results) == 2 + assert results[0]["feature_name"] == "age" + assert results[0]["feature_type"] == "numeric" + assert results[1]["feature_name"] == "city" + assert results[1]["feature_type"] == "categorical" + + def test_missing_column_skipped(self): + calc = _make_calc() + table = pa.table({"age": [25, 30]}) + fields = [("age", "numeric"), ("missing_col", "numeric")] + results = calc.compute_all(table, fields) + + assert len(results) == 1 + assert results[0]["feature_name"] == "age" diff --git a/sdk/python/tests/unit/monitoring/test_monitoring_store.py b/sdk/python/tests/unit/monitoring/test_monitoring_store.py new file mode 100644 index 00000000000..0ee7787ad62 --- /dev/null +++ b/sdk/python/tests/unit/monitoring/test_monitoring_store.py @@ -0,0 +1,191 @@ +from datetime import date, datetime, timezone +from unittest.mock import MagicMock, patch + +import pytest + +from feast.monitoring.monitoring_store import ( + _FEATURE_METRICS_TABLE, + _FEATURE_SERVICE_METRICS_TABLE, + _FEATURE_VIEW_METRICS_TABLE, + MonitoringStore, +) + + +def _mock_pg_conn(): + mock_conn = MagicMock() + mock_cursor = MagicMock() + mock_conn.cursor.return_value.__enter__ = MagicMock(return_value=mock_cursor) + mock_conn.cursor.return_value.__exit__ = MagicMock(return_value=False) + mock_conn.__enter__ = MagicMock(return_value=mock_conn) + mock_conn.__exit__ = MagicMock(return_value=False) + return mock_conn, mock_cursor + + +@pytest.fixture +def store(): + return MonitoringStore(MagicMock()) + + +class TestEnsureTables: + @patch("feast.monitoring.monitoring_store._get_conn") + def test_creates_three_tables(self, mock_get_conn, store): + mock_conn, mock_cursor = _mock_pg_conn() + mock_get_conn.return_value = mock_conn + + store.ensure_tables() + + assert mock_cursor.execute.call_count == 3 + calls = [str(c) for c in mock_cursor.execute.call_args_list] + assert any(_FEATURE_METRICS_TABLE in c for c in calls) + assert any(_FEATURE_VIEW_METRICS_TABLE in c for c in calls) + assert any(_FEATURE_SERVICE_METRICS_TABLE in c for c in calls) + mock_conn.commit.assert_called_once() + + +class TestSaveAndGetMetrics: + @patch("feast.monitoring.monitoring_store._get_conn") + def test_save_feature_metrics_upsert(self, mock_get_conn, store): + mock_conn, mock_cursor = _mock_pg_conn() + mock_get_conn.return_value = mock_conn + + metrics = [ + { + "project_id": "proj", + "feature_view_name": "fv1", + "feature_name": "feat1", + "metric_date": date(2025, 1, 1), + "granularity": "daily", + "data_source_type": "batch", + "computed_at": datetime.now(timezone.utc), + "is_baseline": False, + "feature_type": "numeric", + "row_count": 100, + "null_count": 5, + "null_rate": 0.05, + "mean": 10.0, + "stddev": 2.0, + "min_val": 1.0, + "max_val": 20.0, + "p50": 10.0, + "p75": 15.0, + "p90": 18.0, + "p95": 19.0, + "p99": 19.9, + "histogram": {"bins": [0, 10, 20], "counts": [40, 60]}, + } + ] + store.save_feature_metrics(metrics) + + mock_cursor.execute.assert_called_once() + mock_conn.commit.assert_called_once() + + @patch("feast.monitoring.monitoring_store._get_conn") + def test_save_empty_list_is_noop(self, mock_get_conn, store): + store.save_feature_metrics([]) + mock_get_conn.assert_not_called() + + @patch("feast.monitoring.monitoring_store._get_conn") + def test_get_feature_metrics_with_granularity_filter(self, mock_get_conn, store): + mock_conn, mock_cursor = _mock_pg_conn() + mock_get_conn.return_value = mock_conn + mock_cursor.fetchall.return_value = [] + + store.get_feature_metrics( + project="proj", + feature_view_name="fv1", + granularity="weekly", + data_source_type="batch", + ) + + call_args = mock_cursor.execute.call_args + params = call_args[0][1] + assert "weekly" in params + assert "batch" in params + + @patch("feast.monitoring.monitoring_store._get_conn") + def test_get_feature_metrics_date_range(self, mock_get_conn, store): + mock_conn, mock_cursor = _mock_pg_conn() + mock_get_conn.return_value = mock_conn + mock_cursor.fetchall.return_value = [] + + store.get_feature_metrics( + project="proj", + start_date=date(2025, 1, 1), + end_date=date(2025, 1, 31), + ) + + call_args = mock_cursor.execute.call_args + params = call_args[0][1] + assert date(2025, 1, 1) in params + assert date(2025, 1, 31) in params + + +class TestBaseline: + @patch("feast.monitoring.monitoring_store._get_conn") + def test_get_baseline_filters_is_baseline(self, mock_get_conn, store): + mock_conn, mock_cursor = _mock_pg_conn() + mock_get_conn.return_value = mock_conn + mock_cursor.fetchall.return_value = [] + + store.get_baseline(project="proj", feature_view_name="fv1") + + call_args = mock_cursor.execute.call_args + params = call_args[0][1] + assert True in params + + @patch("feast.monitoring.monitoring_store._get_conn") + def test_clear_baseline(self, mock_get_conn, store): + mock_conn, mock_cursor = _mock_pg_conn() + mock_get_conn.return_value = mock_conn + + store.clear_baseline(project="proj", feature_view_name="fv1") + + mock_cursor.execute.assert_called_once() + call_args = mock_cursor.execute.call_args + params = call_args[0][1] + assert "proj" in params + assert "fv1" in params + mock_conn.commit.assert_called_once() + + +class TestHistogramSerialization: + @patch("feast.monitoring.monitoring_store._get_conn") + def test_histogram_json_serialized_on_save(self, mock_get_conn, store): + mock_conn, mock_cursor = _mock_pg_conn() + mock_get_conn.return_value = mock_conn + + histogram_data = {"bins": [0.0, 5.0, 10.0], "counts": [3, 7]} + metrics = [ + { + "project_id": "proj", + "feature_view_name": "fv1", + "feature_name": "feat1", + "metric_date": date(2025, 1, 1), + "granularity": "daily", + "data_source_type": "batch", + "computed_at": datetime.now(timezone.utc), + "is_baseline": False, + "feature_type": "numeric", + "row_count": 10, + "null_count": 0, + "null_rate": 0.0, + "mean": 5.0, + "stddev": 2.0, + "min_val": 0.0, + "max_val": 10.0, + "p50": 5.0, + "p75": 7.5, + "p90": 9.0, + "p95": 9.5, + "p99": 9.9, + "histogram": histogram_data, + } + ] + store.save_feature_metrics(metrics) + + import json + + call_values = mock_cursor.execute.call_args[0][1] + assert json.dumps(histogram_data) in [ + v for v in call_values if isinstance(v, str) + ] From 0022cd22aac7e8c64814786db1fb25c94d9b401b Mon Sep 17 00:00:00 2001 From: Jitendra Yejare <11752425+jyejare@users.noreply.github.com> Date: Mon, 20 Apr 2026 21:36:15 +0530 Subject: [PATCH 04/12] chore: Serving Time metrics and monitoring rollout to more backends Signed-off-by: Jitendra Yejare <11752425+jyejare@users.noreply.github.com> --- .../feast/api/registry/rest/monitoring.py | 77 +- sdk/python/feast/cli/monitor.py | 86 +- .../feast/infra/offline_stores/bigquery.py | 748 +++++++++++++++++- .../contrib/oracle_offline_store/oracle.py | 686 +++++++++++++++- .../contrib/spark_offline_store/spark.py | 613 +++++++++++++- sdk/python/feast/infra/offline_stores/dask.py | 467 ++++++++++- .../feast/infra/offline_stores/duckdb.py | 583 +++++++++++++- .../feast/infra/offline_stores/redshift.py | 687 +++++++++++++++- .../feast/infra/offline_stores/snowflake.py | 705 ++++++++++++++++- .../feast/monitoring/metrics_calculator.py | 17 + .../feast/monitoring/monitoring_service.py | 410 ++++++++++ sdk/python/feast/repo_operations.py | 5 - .../monitoring/test_monitoring_integration.py | 214 ++++- 13 files changed, 5278 insertions(+), 20 deletions(-) diff --git a/sdk/python/feast/api/registry/rest/monitoring.py b/sdk/python/feast/api/registry/rest/monitoring.py index 001270df0b6..92132becdbb 100644 --- a/sdk/python/feast/api/registry/rest/monitoring.py +++ b/sdk/python/feast/api/registry/rest/monitoring.py @@ -9,11 +9,10 @@ from feast.permissions.action import AuthzedAction from feast.permissions.security_manager import assert_permissions -VALID_GRANULARITIES = OfflineStore.MONITORING_VALID_GRANULARITIES - - logger = logging.getLogger(__name__) +VALID_GRANULARITIES = OfflineStore.MONITORING_VALID_GRANULARITIES + class ComputeMetricsRequest(BaseModel): project: str @@ -30,6 +29,20 @@ class AutoComputeRequest(BaseModel): feature_view_name: Optional[str] = None +class ComputeLogMetricsRequest(BaseModel): + project: str + feature_service_name: str + start_date: Optional[str] = None + end_date: Optional[str] = None + granularity: str = Field("daily") + set_baseline: bool = False + + +class AutoComputeLogRequest(BaseModel): + project: str + feature_service_name: Optional[str] = None + + class ComputeTransientRequest(BaseModel): project: str feature_view_name: str @@ -121,6 +134,64 @@ async def auto_compute(request: AutoComputeRequest): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) + # ------------------------------------------------------------------ # + # Log source: compute from feature serving logs + # ------------------------------------------------------------------ # + + @router.post("/monitoring/compute/log", tags=["Monitoring"]) + async def compute_log_metrics(request: ComputeLogMetricsRequest): + """Compute metrics from feature serving logs for a feature service.""" + if request.granularity not in VALID_GRANULARITIES: + raise HTTPException( + status_code=400, + detail=f"Invalid granularity '{request.granularity}'. " + f"Must be one of {VALID_GRANULARITIES}", + ) + + store = _get_store() + fs = store.registry.get_feature_service( + name=request.feature_service_name, project=request.project + ) + assert_permissions(fs, actions=[AuthzedAction.UPDATE]) + + svc = _get_monitoring_service() + + start_d = date.fromisoformat(request.start_date) if request.start_date else None + end_d = date.fromisoformat(request.end_date) if request.end_date else None + + try: + result = svc.compute_log_metrics( + project=request.project, + feature_service_name=request.feature_service_name, + start_date=start_d, + end_date=end_d, + granularity=request.granularity, + set_baseline=request.set_baseline, + ) + return result + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @router.post("/monitoring/auto_compute/log", tags=["Monitoring"]) + async def auto_compute_log(request: AutoComputeLogRequest): + """Auto-detect date ranges from log data and compute all granularities.""" + store = _get_store() + if request.feature_service_name: + fs = store.registry.get_feature_service( + name=request.feature_service_name, project=request.project + ) + assert_permissions(fs, actions=[AuthzedAction.UPDATE]) + + svc = _get_monitoring_service() + try: + result = svc.auto_compute_log_metrics( + project=request.project, + feature_service_name=request.feature_service_name, + ) + return result + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + @router.get("/monitoring/jobs/{job_id}", tags=["Monitoring"]) async def get_job_status(job_id: str): svc = _get_monitoring_service() diff --git a/sdk/python/feast/cli/monitor.py b/sdk/python/feast/cli/monitor.py index d4f0953ff61..3741700cea0 100644 --- a/sdk/python/feast/cli/monitor.py +++ b/sdk/python/feast/cli/monitor.py @@ -57,6 +57,12 @@ def monitor_cmd(): default=False, help="Mark this computation as the baseline for drift detection.", ) +@click.option( + "--source-type", + type=click.Choice(["batch", "log", "all"]), + default="batch", + help="Data source type: 'batch' (offline store), 'log' (serving logs), or 'all'.", +) @click.pass_context def monitor_run( ctx: click.Context, @@ -67,11 +73,15 @@ def monitor_run( end_date: Optional[str], granularity: Optional[str], set_baseline: bool, + source_type: str, ): """Compute feature quality metrics. Without --start-date/--end-date/--granularity, runs in auto mode: detects date ranges from source data and computes all granularities. + + Use --source-type log to compute metrics from feature serving logs + (requires feature services with logging configured). """ store = create_feature_store(ctx) @@ -85,8 +95,44 @@ def monitor_run( auto_mode = start_date is None and end_date is None and granularity is None feat_names: Optional[List[str]] = list(feature_name) if feature_name else None + if source_type in ("batch", "all"): + _run_batch_monitoring( + svc, + project, + feature_view, + feat_names, + start_date, + end_date, + granularity, + set_baseline, + auto_mode, + ) + + if source_type in ("log", "all"): + _run_log_monitoring( + svc, + project, + feature_view, + start_date, + end_date, + granularity, + auto_mode, + ) + + +def _run_batch_monitoring( + svc, + project, + feature_view, + feat_names, + start_date, + end_date, + granularity, + set_baseline, + auto_mode, +): if auto_mode and not set_baseline: - click.echo("Auto-computing metrics for all granularities...") + click.echo("Auto-computing batch metrics for all granularities...") result = svc.auto_compute( project=project, feature_view_name=feature_view, @@ -120,3 +166,41 @@ def monitor_run( if set_baseline: click.echo("Baseline: SET") + + +def _run_log_monitoring( + svc, project, feature_service_name, start_date, end_date, granularity, auto_mode +): + if auto_mode: + click.echo("Auto-computing log metrics for all granularities...") + result = svc.auto_compute_log_metrics( + project=project, + feature_service_name=feature_service_name, + ) + click.echo(f"Status: {result['status']}") + click.echo(f"Feature services computed: {result['computed_feature_services']}") + click.echo(f"Features computed: {result['computed_features']}") + click.echo(f"Granularities: {', '.join(result['granularities'])}") + click.echo(f"Duration: {result['duration_ms']}ms") + else: + if not feature_service_name: + click.echo( + "Error: --feature-view (as feature service name) is required for log source with explicit dates." + ) + return + + start_d = date.fromisoformat(start_date) if start_date else None + end_d = date.fromisoformat(end_date) if end_date else None + + result = svc.compute_log_metrics( + project=project, + feature_service_name=feature_service_name, + start_date=start_d, + end_date=end_d, + granularity=granularity or "daily", + ) + + click.echo(f"Status: {result['status']}") + click.echo("Source: log") + click.echo(f"Features computed: {result.get('computed_features', 0)}") + click.echo(f"Duration: {result['duration_ms']}ms") diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index 3d0f84bb3a5..c982383c895 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -1,7 +1,8 @@ import contextlib +import json import tempfile import uuid -from datetime import date, datetime, timedelta +from datetime import date, datetime, timedelta, timezone from pathlib import Path from typing import ( Any, @@ -477,6 +478,751 @@ def offline_write_batch( def _escape_query_columns(columns: List[str]) -> List[str]: return [f"`{x}`" for x in columns] + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + assert isinstance(data_source, BigQuerySource) + return _bq_compute_monitoring_metrics( + config, + data_source, + feature_columns, + timestamp_field, + start_date=start_date, + end_date=end_date, + histogram_bins=histogram_bins, + top_n=top_n, + ) + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + assert isinstance(data_source, BigQuerySource) + return _bq_get_monitoring_max_timestamp(config, data_source, timestamp_field) + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + _bq_ensure_monitoring_tables(config) + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + _bq_save_monitoring_metrics(config, metric_type, metrics) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + return _bq_query_monitoring_metrics( + config, project, metric_type, filters, start_date, end_date + ) + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + _bq_clear_monitoring_baseline( + config, project, feature_view_name, feature_name, data_source_type + ) + + +# ------------------------------------------------------------------ # +# BigQuery monitoring metrics (native) +# ------------------------------------------------------------------ # + +_BQ_MON_FEATURE_TABLE = "feast_monitoring_feature_metrics" +_BQ_MON_VIEW_TABLE = "feast_monitoring_feature_view_metrics" +_BQ_MON_SERVICE_TABLE = "feast_monitoring_feature_service_metrics" + +_BQ_EMPTY_METRIC_TEMPLATE: Dict[str, Any] = { + "feature_type": "numeric", + "row_count": 0, + "null_count": 0, + "null_rate": 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, +} + + +def _bq_monitoring_table_fqn(config: RepoConfig, table_name: str) -> str: + assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) + project_id = config.offline_store.project_id + if not project_id: + client = _get_bigquery_client( + project=config.offline_store.billing_project_id, + location=config.offline_store.location, + ) + project_id = client.project + return f"`{project_id}.{config.offline_store.dataset}.{table_name}`" + + +def _bq_opt_float(val: Any) -> Optional[float]: + return float(val) if val is not None else None + + +def _bq_mon_table_meta(metric_type: str) -> Tuple[str, List[str], List[str]]: + if metric_type == "feature": + return ( + _BQ_MON_FEATURE_TABLE, + [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "feature_type", + "row_count", + "null_count", + "null_rate", + "mean", + "stddev", + "min_val", + "max_val", + "p50", + "p75", + "p90", + "p95", + "p99", + "histogram", + ], + [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", + ], + ) + if metric_type == "feature_view": + return ( + _BQ_MON_VIEW_TABLE, + [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_row_count", + "total_features", + "features_with_nulls", + "avg_null_rate", + "max_null_rate", + ], + [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", + ], + ) + if metric_type == "feature_service": + return ( + _BQ_MON_SERVICE_TABLE, + [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_feature_views", + "total_features", + "avg_null_rate", + "max_null_rate", + ], + [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", + ], + ) + raise ValueError(f"Unknown metric_type '{metric_type}'") + + +def _bq_scalar_param_type(column: str) -> str: + if column == "is_baseline": + return "BOOL" + if column == "metric_date": + return "DATE" + if column == "computed_at": + return "TIMESTAMP" + if column in { + "row_count", + "null_count", + "total_row_count", + "total_features", + "features_with_nulls", + "total_feature_views", + }: + return "INT64" + if column in { + "null_rate", + "mean", + "stddev", + "min_val", + "max_val", + "p50", + "p75", + "p90", + "p95", + "p99", + "avg_null_rate", + "max_null_rate", + }: + return "FLOAT64" + return "STRING" + + +def _bq_merge_row( + config: RepoConfig, + table_fqn: str, + columns: List[str], + pk_columns: List[str], + row: Dict[str, Any], +) -> None: + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + non_pk = [c for c in columns if c not in pk_columns] + params: List[Any] = [] + using_parts: List[str] = [] + on_parts: List[str] = [] + merge_idx = 0 + for col in columns: + p = f"p{merge_idx}" + merge_idx += 1 + val = row.get(col) + if col == "histogram" and val is not None and not isinstance(val, str): + val = json.dumps(val) + param_type = _bq_scalar_param_type(col) + params.append(bigquery.ScalarQueryParameter(p, param_type, val)) + using_parts.append(f"@{p} AS {col}") + on_parts.append(f"T.{col} = S.{col}") + update_set = ", ".join(f"{c} = S.{c}" for c in non_pk) + merge_sql = f""" +MERGE {table_fqn} T +USING (SELECT {", ".join(using_parts)}) S +ON {" AND ".join(on_parts)} +WHEN MATCHED THEN UPDATE SET {update_set} +WHEN NOT MATCHED THEN INSERT ({", ".join(columns)}) VALUES ({", ".join(f"S.{c}" for c in columns)}) +""" + job_config = bigquery.QueryJobConfig(query_parameters=params) + client.query(merge_sql, job_config=job_config).result() + + +def _bq_save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], +) -> None: + table_short, columns, pk_columns = _bq_mon_table_meta(metric_type) + table_fqn = _bq_monitoring_table_fqn(config, table_short) + for row in metrics: + _bq_merge_row(config, table_fqn, columns, pk_columns, row) + + +def _bq_query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, +) -> List[Dict[str, Any]]: + table_short, columns, _ = _bq_mon_table_meta(metric_type) + table_fqn = _bq_monitoring_table_fqn(config, table_short) + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + params: List[Any] = [ + bigquery.ScalarQueryParameter("project", "STRING", project), + ] + conditions = ["project_id = @project"] + if filters: + for key, value in filters.items(): + if value is not None: + conditions.append(f"`{key}` = @{key}") + params.append( + bigquery.ScalarQueryParameter( + key, _bq_scalar_param_type(key), value + ) + ) + if start_date: + conditions.append("metric_date >= @start_date") + params.append(bigquery.ScalarQueryParameter("start_date", "DATE", start_date)) + if end_date: + conditions.append("metric_date <= @end_date") + params.append(bigquery.ScalarQueryParameter("end_date", "DATE", end_date)) + col_list = ", ".join(f"`{c}`" for c in columns) + where_sql = " AND ".join(conditions) + sql = ( + f"SELECT {col_list} FROM {table_fqn} WHERE {where_sql} ORDER BY metric_date ASC" + ) + job_config = bigquery.QueryJobConfig(query_parameters=params) + job = client.query(sql, job_config=job_config) + job.result() + results: List[Dict[str, Any]] = [] + for r in job: + record = {columns[i]: r[i] for i in range(len(columns))} + if "histogram" in record and isinstance(record["histogram"], str): + record["histogram"] = json.loads(record["histogram"]) + if "metric_date" in record and isinstance(record["metric_date"], date): + record["metric_date"] = record["metric_date"].isoformat() + if "computed_at" in record and isinstance(record["computed_at"], datetime): + record["computed_at"] = record["computed_at"].isoformat() + results.append(record) + return results + + +def _bq_clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, +) -> None: + table_fqn = _bq_monitoring_table_fqn(config, _BQ_MON_FEATURE_TABLE) + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + params: List[Any] = [ + bigquery.ScalarQueryParameter("project", "STRING", project), + ] + conditions = ["project_id = @project", "is_baseline = TRUE"] + if feature_view_name: + conditions.append("feature_view_name = @feature_view_name") + params.append( + bigquery.ScalarQueryParameter( + "feature_view_name", "STRING", feature_view_name + ) + ) + if feature_name: + conditions.append("feature_name = @feature_name") + params.append( + bigquery.ScalarQueryParameter("feature_name", "STRING", feature_name) + ) + if data_source_type: + conditions.append("data_source_type = @data_source_type") + params.append( + bigquery.ScalarQueryParameter( + "data_source_type", "STRING", data_source_type + ) + ) + sql = f"UPDATE {table_fqn} SET is_baseline = FALSE WHERE {' AND '.join(conditions)}" + job_config = bigquery.QueryJobConfig(query_parameters=params) + client.query(sql, job_config=job_config).result() + + +def _bq_ensure_monitoring_tables(config: RepoConfig) -> None: + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + ds = config.offline_store.dataset + proj = config.offline_store.project_id or client.project + feature_ddl = f""" +CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{_BQ_MON_FEATURE_TABLE}` ( + project_id STRING NOT NULL, + feature_view_name STRING NOT NULL, + feature_name STRING NOT NULL, + metric_date DATE NOT NULL, + granularity STRING NOT NULL, + data_source_type STRING NOT NULL, + computed_at TIMESTAMP NOT NULL, + is_baseline BOOL NOT NULL, + feature_type STRING NOT NULL, + row_count INT64, + null_count INT64, + null_rate FLOAT64, + mean FLOAT64, + stddev FLOAT64, + min_val FLOAT64, + max_val FLOAT64, + p50 FLOAT64, + p75 FLOAT64, + p90 FLOAT64, + p95 FLOAT64, + p99 FLOAT64, + histogram STRING +) +PRIMARY KEY (project_id, feature_view_name, feature_name, metric_date, granularity, data_source_type) NOT ENFORCED +""" + view_ddl = f""" +CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{_BQ_MON_VIEW_TABLE}` ( + project_id STRING NOT NULL, + feature_view_name STRING NOT NULL, + metric_date DATE NOT NULL, + granularity STRING NOT NULL, + data_source_type STRING NOT NULL, + computed_at TIMESTAMP NOT NULL, + is_baseline BOOL NOT NULL, + total_row_count INT64, + total_features INT64, + features_with_nulls INT64, + avg_null_rate FLOAT64, + max_null_rate FLOAT64 +) +PRIMARY KEY (project_id, feature_view_name, metric_date, granularity, data_source_type) NOT ENFORCED +""" + service_ddl = f""" +CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{_BQ_MON_SERVICE_TABLE}` ( + project_id STRING NOT NULL, + feature_service_name STRING NOT NULL, + metric_date DATE NOT NULL, + granularity STRING NOT NULL, + data_source_type STRING NOT NULL, + computed_at TIMESTAMP NOT NULL, + is_baseline BOOL NOT NULL, + total_feature_views INT64, + total_features INT64, + avg_null_rate FLOAT64, + max_null_rate FLOAT64 +) +PRIMARY KEY (project_id, feature_service_name, metric_date, granularity, data_source_type) NOT ENFORCED +""" + for ddl in (feature_ddl, view_ddl, service_ddl): + client.query(ddl).result() + + +def _bq_get_monitoring_max_timestamp( + config: RepoConfig, + data_source: BigQuerySource, + timestamp_field: str, +) -> Optional[datetime]: + from_expression = data_source.get_table_query_string() + ts_col = f"`{timestamp_field}`" + sql = f"SELECT MAX({ts_col}) AS _max_ts FROM {from_expression}" + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + job = client.query(sql) + job.result() + rows = list(job) + if not rows or rows[0][0] is None: + return None + val = rows[0][0] + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + if isinstance(val, date): + return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc) + return val # type: ignore[no-any-return] + + +def _bq_numeric_histogram( + config: RepoConfig, + from_expression: str, + col_name: str, + ts_filter: str, + bins: int, + min_val: float, + max_val: float, +) -> Dict[str, Any]: + q_col = f"`{col_name}`" + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + if min_val == max_val: + sql = ( + f"SELECT COUNT(*) AS cnt FROM {from_expression} AS _src " + f"WHERE {q_col} IS NOT NULL AND {ts_filter}" + ) + job = client.query(sql) + job.result() + hrows = list(job) + cnt = int(hrows[0][0]) if hrows else 0 + return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0} + + bin_width = (max_val - min_val) / bins + sql = f""" +SELECT + LEAST( + GREATEST( + CAST(FLOOR((CAST({q_col} AS FLOAT64) - {min_val}) / {bin_width}) AS INT64) + 1, + 1 + ), + {bins} + ) AS bucket, + COUNT(*) AS cnt +FROM {from_expression} AS _src +WHERE {q_col} IS NOT NULL AND {ts_filter} +GROUP BY bucket +ORDER BY bucket +""" + job = client.query(sql) + job.result() + rows = list(job) + counts = [0] * bins + for bucket, cnt in rows: + b = int(bucket) + if 1 <= b <= bins: + counts[b - 1] += int(cnt) + bin_edges = [min_val + i * bin_width for i in range(bins + 1)] + return { + "bins": [float(b) for b in bin_edges], + "counts": counts, + "bin_width": float(bin_width), + } + + +def _bq_numeric_stats( + config: RepoConfig, + from_expression: str, + feature_names: List[str], + ts_filter: str, + histogram_bins: int, +) -> List[Dict[str, Any]]: + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + select_parts: List[str] = ["COUNT(*) AS _row_count"] + for i, col in enumerate(feature_names): + q = f"`{col}`" + c = f"CAST({q} AS FLOAT64)" + select_parts.extend( + [ + f"COUNT({q}) AS c{i}_nn", + f"AVG({c}) AS c{i}_avg", + f"STDDEV_SAMP({c}) AS c{i}_stddev", + f"MIN({c}) AS c{i}_min", + f"MAX({c}) AS c{i}_max", + f"APPROX_QUANTILES({c}, 100)[OFFSET(50)] AS c{i}_p50", + f"APPROX_QUANTILES({c}, 100)[OFFSET(75)] AS c{i}_p75", + f"APPROX_QUANTILES({c}, 100)[OFFSET(90)] AS c{i}_p90", + f"APPROX_QUANTILES({c}, 100)[OFFSET(95)] AS c{i}_p95", + f"APPROX_QUANTILES({c}, 100)[OFFSET(99)] AS c{i}_p99", + ] + ) + query = ( + f"SELECT {', '.join(select_parts)} " + f"FROM {from_expression} AS _src WHERE {ts_filter}" + ) + job = client.query(query) + job.result() + rows = list(job) + if not rows: + return [{**_BQ_EMPTY_METRIC_TEMPLATE, "feature_name": n} for n in feature_names] + row = rows[0] + row_count = row["_row_count"] or 0 + results: List[Dict[str, Any]] = [] + for i, col in enumerate(feature_names): + base = f"c{i}_" + non_null = row[f"{base}nn"] or 0 + null_count = int(row_count) - int(non_null) + min_v = _bq_opt_float(row[f"{base}min"]) + max_v = _bq_opt_float(row[f"{base}max"]) + result: Dict[str, Any] = { + "feature_name": col, + "feature_type": "numeric", + "row_count": int(row_count), + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": _bq_opt_float(row[f"{base}avg"]), + "stddev": _bq_opt_float(row[f"{base}stddev"]), + "min_val": min_v, + "max_val": max_v, + "p50": _bq_opt_float(row[f"{base}p50"]), + "p75": _bq_opt_float(row[f"{base}p75"]), + "p90": _bq_opt_float(row[f"{base}p90"]), + "p95": _bq_opt_float(row[f"{base}p95"]), + "p99": _bq_opt_float(row[f"{base}p99"]), + "histogram": None, + } + if min_v is not None and max_v is not None and non_null and int(non_null) > 0: + result["histogram"] = _bq_numeric_histogram( + config, + from_expression, + col, + ts_filter, + histogram_bins, + min_v, + max_v, + ) + results.append(result) + return results + + +def _bq_categorical_stats( + config: RepoConfig, + from_expression: str, + col_name: str, + ts_filter: str, + top_n: int, +) -> Dict[str, Any]: + q_col = f"`{col_name}`" + project_id = ( + config.offline_store.billing_project_id or config.offline_store.project_id + ) + client = _get_bigquery_client( + project=project_id, + location=config.offline_store.location, + ) + query = f""" +WITH filtered AS ( + SELECT * FROM {from_expression} AS _src WHERE {ts_filter} +) +SELECT + (SELECT COUNT(*) FROM filtered) AS row_count, + (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, + (SELECT COUNT(DISTINCT {q_col}) FROM filtered WHERE {q_col} IS NOT NULL) AS unique_count, + CAST({q_col} AS STRING) AS value, + COUNT(*) AS cnt +FROM filtered +WHERE {q_col} IS NOT NULL +GROUP BY CAST({q_col} AS STRING) +ORDER BY cnt DESC +LIMIT {int(top_n)} +""" + job = client.query(query) + job.result() + rows = list(job) + if not rows: + return { + **_BQ_EMPTY_METRIC_TEMPLATE, + "feature_name": col_name, + "feature_type": "categorical", + } + row_count = rows[0]["row_count"] + null_count = rows[0]["null_count"] + unique_count = rows[0]["unique_count"] + top_entries = [{"value": r["value"], "count": r["cnt"]} for r in rows] + top_total = sum(e["count"] for e in top_entries) + other_count = (row_count - null_count) - top_total + return { + "feature_name": col_name, + "feature_type": "categorical", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": { + "values": top_entries, + "other_count": max(other_count, 0), + "unique_count": unique_count, + }, + } + + +def _bq_compute_monitoring_metrics( + config: RepoConfig, + data_source: BigQuerySource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, +) -> List[Dict[str, Any]]: + from_expression = data_source.get_table_query_string() + ts_filter = get_timestamp_filter_sql( + start_date, + end_date, + timestamp_field, + date_partition_column=data_source.date_partition_column, + quote_fields=False, + cast_style="timestamp_func", + ) + numeric_features = [n for n, t in feature_columns if t == "numeric"] + categorical_features = [n for n, t in feature_columns if t == "categorical"] + results: List[Dict[str, Any]] = [] + if numeric_features: + results.extend( + _bq_numeric_stats( + config, + from_expression, + numeric_features, + ts_filter, + histogram_bins, + ) + ) + for col_name in categorical_features: + results.append( + _bq_categorical_stats(config, from_expression, col_name, ts_filter, top_n) + ) + return results + class BigQueryRetrievalJob(RetrievalJob): def __init__( diff --git a/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py b/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py index 43c37f8ec10..c8f41457127 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py +++ b/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py @@ -1,6 +1,7 @@ -from datetime import datetime +import json +from datetime import date, datetime, timezone from pathlib import Path -from typing import Any, Callable, List, Literal, Optional, Union +from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union import ibis import pandas as pd @@ -22,6 +23,7 @@ write_logged_features_ibis, ) from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob +from feast.infra.offline_stores.offline_utils import get_timestamp_filter_sql from feast.infra.registry.base_registry import BaseRegistry from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.utils import compute_non_entity_date_range @@ -179,6 +181,415 @@ def _build_entity_df_from_feature_sources( return pd.concat(entity_dfs, ignore_index=True).drop_duplicates() +# ------------------------------------------------------------------ # +# Oracle monitoring helpers +# ------------------------------------------------------------------ # + +_MON_FEATURE_TABLE = "feast_monitoring_feature_metrics" +_MON_VIEW_TABLE = "feast_monitoring_feature_view_metrics" +_MON_SERVICE_TABLE = "feast_monitoring_feature_service_metrics" + +_MON_FEATURE_COLUMNS = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "feature_type", + "row_count", + "null_count", + "null_rate", + "mean", + "stddev", + "min_val", + "max_val", + "p50", + "p75", + "p90", + "p95", + "p99", + "histogram", +] +_MON_FEATURE_PK = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", +] + +_MON_VIEW_COLUMNS = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_row_count", + "total_features", + "features_with_nulls", + "avg_null_rate", + "max_null_rate", +] +_MON_VIEW_PK = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", +] + +_MON_SERVICE_COLUMNS = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_feature_views", + "total_features", + "avg_null_rate", + "max_null_rate", +] +_MON_SERVICE_PK = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", +] + + +def _oracle_mon_table_meta(metric_type: str): + if metric_type == "feature": + return _MON_FEATURE_TABLE, _MON_FEATURE_COLUMNS, _MON_FEATURE_PK + if metric_type == "feature_view": + return _MON_VIEW_TABLE, _MON_VIEW_COLUMNS, _MON_VIEW_PK + if metric_type == "feature_service": + return _MON_SERVICE_TABLE, _MON_SERVICE_COLUMNS, _MON_SERVICE_PK + raise ValueError(f"Unknown metric_type '{metric_type}'") + + +def _oracle_quote_ident(name: str) -> str: + return f'"{name}"' + + +def _oracle_ts_where(ts_filter: str) -> str: + return f"({ts_filter})" if (ts_filter and ts_filter.strip()) else "1=1" + + +_EMPTY_METRIC_TEMPLATE: Dict[str, Any] = { + "feature_type": "numeric", + "row_count": 0, + "null_count": 0, + "null_rate": 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, +} + + +def _oracle_opt_float(val: Any) -> Optional[float]: + return float(val) if val is not None else None + + +def _oracle_fetchall(con, sql: str): + cur = con.raw_sql(sql) + try: + return cur.fetchall() + finally: + if hasattr(cur, "close"): + cur.close() + + +def _oracle_exec(con, sql: str) -> None: + cur = con.raw_sql(sql) + try: + pass + finally: + if hasattr(cur, "close"): + cur.close() + + +def _oracle_numeric_histogram( + con, + from_expression: str, + col_name: str, + ts_filter: str, + bins: int, + min_val: float, + max_val: float, +) -> Dict[str, Any]: + q_col = _oracle_quote_ident(col_name) + + if min_val == max_val: + tw = _oracle_ts_where(ts_filter) + cnt_row = _oracle_fetchall( + con, + f"SELECT COUNT(*) FROM {from_expression} _src " + f"WHERE {q_col} IS NOT NULL AND {tw}", + ) + cnt = (cnt_row[0][0] if cnt_row else 0) or 0 + return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0} + + upper = max_val + (max_val - min_val) * 1e-10 + bin_width = (max_val - min_val) / bins + bw = bin_width if bin_width != 0 else 1e-300 + + tw = _oracle_ts_where(ts_filter) + query = ( + f"SELECT bucket, COUNT(*) AS cnt FROM (" + f" SELECT " + f" CASE WHEN {q_col} IS NULL THEN NULL " + f" WHEN {min_val} = {upper} THEN 1 " + f" ELSE LEAST(GREATEST(" + f" FLOOR((CAST({q_col} AS NUMBER) - {min_val}) / {bw}) + 1, " + f" 1), {bins}) " + f" END AS bucket " + f" FROM {from_expression} _src " + f" WHERE {q_col} IS NOT NULL AND {tw}" + f") sub " + f"WHERE bucket IS NOT NULL " + f"GROUP BY bucket ORDER BY bucket" + ) + + rows = _oracle_fetchall(con, query) + counts = [0] * bins + for bucket, cnt in rows: + b = int(bucket) + if 1 <= b <= bins: + counts[b - 1] = cnt + + bin_edges = [min_val + i * bin_width for i in range(bins + 1)] + return { + "bins": [float(b) for b in bin_edges], + "counts": counts, + "bin_width": float(bin_width), + } + + +def _oracle_numeric_stats( + con, + from_expression: str, + feature_names: List[str], + ts_filter: str, + histogram_bins: int, +) -> List[Dict[str, Any]]: + select_parts = ["COUNT(*)"] + for col in feature_names: + q = _oracle_quote_ident(col) + c = f"CAST({q} AS NUMBER)" + select_parts.extend( + [ + f"COUNT({q})", + f"AVG({c})", + f"STDDEV_SAMP({c})", + f"MIN({c})", + f"MAX({c})", + f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})", + ] + ) + + tw = _oracle_ts_where(ts_filter) + query = f"SELECT {', '.join(select_parts)} FROM {from_expression} _src WHERE {tw}" + + row = (_oracle_fetchall(con, query) or [None])[0] + + if row is None: + return [{**_EMPTY_METRIC_TEMPLATE, "feature_name": n} for n in feature_names] + + row_count = row[0] + results: List[Dict[str, Any]] = [] + + for i, col in enumerate(feature_names): + base = 1 + i * 10 + non_null = row[base] or 0 + null_count = row_count - non_null + + min_val = _oracle_opt_float(row[base + 3]) + max_val = _oracle_opt_float(row[base + 4]) + + result: Dict[str, Any] = { + "feature_name": col, + "feature_type": "numeric", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": _oracle_opt_float(row[base + 1]), + "stddev": _oracle_opt_float(row[base + 2]), + "min_val": min_val, + "max_val": max_val, + "p50": _oracle_opt_float(row[base + 5]), + "p75": _oracle_opt_float(row[base + 6]), + "p90": _oracle_opt_float(row[base + 7]), + "p95": _oracle_opt_float(row[base + 8]), + "p99": _oracle_opt_float(row[base + 9]), + "histogram": None, + } + + if min_val is not None and max_val is not None and non_null > 0: + result["histogram"] = _oracle_numeric_histogram( + con, + from_expression, + col, + ts_filter, + histogram_bins, + min_val, + max_val, + ) + + results.append(result) + + return results + + +def _oracle_categorical_stats( + con, + from_expression: str, + col_name: str, + ts_filter: str, + top_n: int, +) -> Dict[str, Any]: + q_col = _oracle_quote_ident(col_name) + + tw = _oracle_ts_where(ts_filter) + query = ( + f"WITH filtered AS (" + f" SELECT * FROM {from_expression} _src WHERE {tw}" + f") " + f"SELECT " + f" (SELECT COUNT(*) FROM filtered) AS row_count, " + f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, " + f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered " + f" WHERE {q_col} IS NOT NULL) AS unique_count, " + f" TO_CHAR({q_col}) AS value, COUNT(*) AS cnt " + f"FROM filtered WHERE {q_col} IS NOT NULL " + f"GROUP BY {q_col} " + f"ORDER BY cnt DESC " + f"FETCH FIRST {int(top_n)} ROWS ONLY" + ) + + rows = _oracle_fetchall(con, query) + + if not rows: + return { + **_EMPTY_METRIC_TEMPLATE, + "feature_name": col_name, + "feature_type": "categorical", + } + + row_count = rows[0][0] + null_count = rows[0][1] + unique_count = rows[0][2] + + top_entries = [{"value": r[3], "count": r[4]} for r in rows] + top_total = sum(e["count"] for e in top_entries) + other_count = (row_count - null_count) - top_total + + return { + "feature_name": col_name, + "feature_type": "categorical", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": { + "values": top_entries, + "other_count": max(other_count, 0), + "unique_count": unique_count, + }, + } + + +def _oracle_escape_literal(val: Any) -> str: + if val is None: + return "NULL" + if isinstance(val, bool): + return "1" if val else "0" + if isinstance(val, (int, float)) and not isinstance(val, bool): + return str(val) + if isinstance(val, datetime): + s = val.isoformat(sep=" ", timespec="seconds") + return f"TIMESTAMP '{s}'" + if isinstance(val, date): + return f"DATE '{val.isoformat()}'" + s = str(val).replace("'", "''") + return f"'{s}'" + + +def _oracle_merge_metric_row( + con, table: str, columns: List[str], pk_cols: List[str], row: Dict[str, Any] +) -> None: + def qid(c: str) -> str: + return f'"{c}"' + + non_pk = [c for c in columns if c not in pk_cols] + vals = [] + for c in columns: + v = row.get(c) + if c == "histogram" and v is not None and not isinstance(v, str): + v = json.dumps(v) + vals.append(_oracle_escape_literal(v)) + + join_cond = " AND ".join(f"t.{qid(c)} = s.{qid(c)}" for c in pk_cols) + insert_cols = ", ".join(qid(c) for c in columns) + insert_vals = ", ".join(f"s.{qid(c)}" for c in columns) + update_set = ", ".join(f"t.{qid(c)} = s.{qid(c)}" for c in non_pk) + + src_select = ", ".join( + f"{vals[i]} AS {qid(columns[i])}" for i in range(len(columns)) + ) + + sql = ( + f"MERGE INTO {table} t " + f"USING (SELECT {src_select} FROM dual) s " + f"ON ({join_cond}) " + f"WHEN MATCHED THEN UPDATE SET {update_set} " + f"WHEN NOT MATCHED THEN INSERT ({insert_cols}) VALUES ({insert_vals})" + ) + _oracle_exec(con, sql) + + +def _oracle_try_execute_ddl(con, ddl: str) -> None: + """Run DDL; ignore ORA-00955 (name already used).""" + escaped = ddl.strip().replace("'", "''") + plsql = ( + "BEGIN\n" + f" EXECUTE IMMEDIATE '{escaped}';\n" + "EXCEPTION\n" + " WHEN OTHERS THEN\n" + " IF SQLCODE != -955 THEN RAISE;\n" + " END IF;\n" + "END;" + ) + _oracle_exec(con, plsql) + + class OracleOfflineStore(OfflineStore): @staticmethod def pull_latest_from_table_or_query( @@ -306,3 +717,274 @@ def write_logged_features( logging_config=logging_config, registry=registry, ) + + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, OracleOfflineStoreConfig) + assert isinstance(data_source, OracleSource) + + from_expression = data_source.get_table_query_string() + ts_filter = get_timestamp_filter_sql( + start_date, + end_date, + timestamp_field, + tz=timezone.utc, + cast_style="timestamp", + date_time_separator=" ", + ) + + numeric_features = [n for n, t in feature_columns if t == "numeric"] + categorical_features = [n for n, t in feature_columns if t == "categorical"] + results: List[Dict[str, Any]] = [] + + con = get_ibis_connection(config) + + if numeric_features: + results.extend( + _oracle_numeric_stats( + con, + from_expression, + numeric_features, + ts_filter, + histogram_bins, + ) + ) + + for col_name in categorical_features: + results.append( + _oracle_categorical_stats( + con, + from_expression, + col_name, + ts_filter, + top_n, + ) + ) + + return results + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, OracleOfflineStoreConfig) + assert isinstance(data_source, OracleSource) + + from_expression = data_source.get_table_query_string() + ts_col = _oracle_quote_ident(timestamp_field) + + con = get_ibis_connection(config) + rows = _oracle_fetchall( + con, + f"SELECT MAX({ts_col}) FROM {from_expression}", + ) + row = rows[0] if rows else None + + if row is None or row[0] is None: + return None + val = row[0] + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + if isinstance(val, date): + return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc) + return None + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, OracleOfflineStoreConfig) + con = get_ibis_connection(config) + + _oracle_try_execute_ddl( + con, + f""" + CREATE TABLE {_MON_FEATURE_TABLE} ( + project_id VARCHAR2(255) NOT NULL, + feature_view_name VARCHAR2(255) NOT NULL, + feature_name VARCHAR2(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR2(20) DEFAULT 'daily' NOT NULL, + data_source_type VARCHAR2(50) DEFAULT 'batch' NOT NULL, + computed_at TIMESTAMP WITH TIME ZONE DEFAULT SYSTIMESTAMP NOT NULL, + is_baseline NUMBER(1) DEFAULT 0 NOT NULL, + feature_type VARCHAR2(50) NOT NULL, + row_count NUMBER, + null_count NUMBER, + null_rate NUMBER, + mean NUMBER, + stddev NUMBER, + min_val NUMBER, + max_val NUMBER, + p50 NUMBER, + p75 NUMBER, + p90 NUMBER, + p95 NUMBER, + p99 NUMBER, + histogram VARCHAR2(4000), + CONSTRAINT pk_fm PRIMARY KEY (project_id, feature_view_name, + feature_name, metric_date, granularity, data_source_type) + ) + """, + ) + + _oracle_try_execute_ddl( + con, + f""" + CREATE TABLE {_MON_VIEW_TABLE} ( + project_id VARCHAR2(255) NOT NULL, + feature_view_name VARCHAR2(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR2(20) DEFAULT 'daily' NOT NULL, + data_source_type VARCHAR2(50) DEFAULT 'batch' NOT NULL, + computed_at TIMESTAMP WITH TIME ZONE DEFAULT SYSTIMESTAMP NOT NULL, + is_baseline NUMBER(1) DEFAULT 0 NOT NULL, + total_row_count NUMBER, + total_features NUMBER, + features_with_nulls NUMBER, + avg_null_rate NUMBER, + max_null_rate NUMBER, + CONSTRAINT pk_fvm PRIMARY KEY (project_id, feature_view_name, + metric_date, granularity, data_source_type) + ) + """, + ) + + _oracle_try_execute_ddl( + con, + f""" + CREATE TABLE {_MON_SERVICE_TABLE} ( + project_id VARCHAR2(255) NOT NULL, + feature_service_name VARCHAR2(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR2(20) DEFAULT 'daily' NOT NULL, + data_source_type VARCHAR2(50) DEFAULT 'batch' NOT NULL, + computed_at TIMESTAMP WITH TIME ZONE DEFAULT SYSTIMESTAMP NOT NULL, + is_baseline NUMBER(1) DEFAULT 0 NOT NULL, + total_feature_views NUMBER, + total_features NUMBER, + avg_null_rate NUMBER, + max_null_rate NUMBER, + CONSTRAINT pk_fsm PRIMARY KEY (project_id, feature_service_name, + metric_date, granularity, data_source_type) + ) + """, + ) + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, OracleOfflineStoreConfig) + + table, columns, pk_columns = _oracle_mon_table_meta(metric_type) + con = get_ibis_connection(config) + for row in metrics: + _oracle_merge_metric_row(con, table, columns, pk_columns, row) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, OracleOfflineStoreConfig) + + table, columns, _ = _oracle_mon_table_meta(metric_type) + + conditions = [ + f"{_oracle_quote_ident('project_id')} = {_oracle_escape_literal(project)}" + ] + if filters: + for key, value in filters.items(): + if value is not None: + conditions.append( + f"{_oracle_quote_ident(key)} = {_oracle_escape_literal(value)}" + ) + if start_date: + conditions.append( + f"{_oracle_quote_ident('metric_date')} >= {_oracle_escape_literal(start_date)}" + ) + if end_date: + conditions.append( + f"{_oracle_quote_ident('metric_date')} <= {_oracle_escape_literal(end_date)}" + ) + + where_sql = " AND ".join(conditions) + col_list = ", ".join(_oracle_quote_ident(c) for c in columns) + + con = get_ibis_connection(config) + rows = _oracle_fetchall( + con, + f"SELECT {col_list} FROM {table} WHERE {where_sql} " + f"ORDER BY {_oracle_quote_ident('metric_date')} ASC", + ) + + results = [] + for row in rows: + record = dict(zip(columns, row)) + if "histogram" in record and isinstance(record["histogram"], str): + record["histogram"] = json.loads(record["histogram"]) + if "metric_date" in record and hasattr(record["metric_date"], "isoformat"): + record["metric_date"] = record["metric_date"].isoformat() + if "computed_at" in record and hasattr(record["computed_at"], "isoformat"): + record["computed_at"] = record["computed_at"].isoformat() + if "is_baseline" in record and record["is_baseline"] is not None: + record["is_baseline"] = bool(int(record["is_baseline"])) + results.append(record) + + return results + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, OracleOfflineStoreConfig) + + conditions = [ + f"{_oracle_quote_ident('project_id')} = {_oracle_escape_literal(project)}" + ] + if feature_view_name: + conditions.append( + f"{_oracle_quote_ident('feature_view_name')} = " + f"{_oracle_escape_literal(feature_view_name)}" + ) + if feature_name: + conditions.append( + f"{_oracle_quote_ident('feature_name')} = " + f"{_oracle_escape_literal(feature_name)}" + ) + if data_source_type: + conditions.append( + f"{_oracle_quote_ident('data_source_type')} = " + f"{_oracle_escape_literal(data_source_type)}" + ) + conditions.append(f"{_oracle_quote_ident('is_baseline')} = 1") + + where_sql = " AND ".join(conditions) + con = get_ibis_connection(config) + _oracle_exec( + con, + f"UPDATE {_MON_FEATURE_TABLE} SET {_oracle_quote_ident('is_baseline')} = 0 " + f"WHERE {where_sql}", + ) diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py index 3fc675ea402..3a188ace582 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py @@ -1,9 +1,11 @@ +import json import os import tempfile import uuid import warnings from dataclasses import asdict, dataclass -from datetime import datetime, timezone +from datetime import date, datetime, timezone +from datetime import time as dt_time from typing import ( TYPE_CHECKING, Any, @@ -423,6 +425,615 @@ def pull_all_from_table_or_query( config=config, ) + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, SparkOfflineStoreConfig) + assert isinstance(data_source, SparkSource) + + spark_session = get_spark_session_or_start_new_with_repoconfig( + store_config=config.offline_store + ) + from_expression = data_source.get_table_query_string() + ts_filter = get_timestamp_filter_sql( + start_date, + end_date, + timestamp_field, + tz=timezone.utc, + quote_fields=False, + ) + ts_clause = ts_filter if ts_filter else "1=1" + + numeric_features = [n for n, t in feature_columns if t == "numeric"] + categorical_features = [n for n, t in feature_columns if t == "categorical"] + results: List[Dict[str, Any]] = [] + + if numeric_features: + results.extend( + _spark_sql_numeric_stats( + spark_session, + from_expression, + numeric_features, + ts_clause, + histogram_bins, + ) + ) + + for col_name in categorical_features: + results.append( + _spark_sql_categorical_stats( + spark_session, + from_expression, + col_name, + ts_clause, + top_n, + ) + ) + + return results + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, SparkOfflineStoreConfig) + assert isinstance(data_source, SparkSource) + + spark_session = get_spark_session_or_start_new_with_repoconfig( + store_config=config.offline_store + ) + from_expression = data_source.get_table_query_string() + q_ts = f"`{timestamp_field}`" + sql = f"SELECT MAX({q_ts}) AS max_ts FROM {from_expression} AS _src" + row = spark_session.sql(sql).collect() + if not row or row[0][0] is None: + return None + val = row[0][0] + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + if isinstance(val, date): + return datetime.combine(val, dt_time.min, tzinfo=timezone.utc) + return pandas.to_datetime(val, utc=True).to_pydatetime() + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, SparkOfflineStoreConfig) + spark_session = get_spark_session_or_start_new_with_repoconfig( + store_config=config.offline_store + ) + for stmt in _SPARK_MONITORING_DDL_STATEMENTS: + spark_session.sql(stmt) + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, SparkOfflineStoreConfig) + table, columns, pk_columns = _spark_mon_table_meta(metric_type) + pdf_new = pd.DataFrame([{c: m.get(c) for c in columns} for m in metrics]) + pdf_new = _spark_normalize_histogram_column(pdf_new) + + spark_session = get_spark_session_or_start_new_with_repoconfig( + store_config=config.offline_store + ) + if spark_session.catalog.tableExists(table): + pdf_old = spark_session.table(table).toPandas() + pdf_merged = _spark_pandas_upsert(pdf_old, pdf_new, pk_columns) + else: + pdf_merged = pdf_new + + spark_session.createDataFrame(pdf_merged).write.mode("overwrite").saveAsTable( + table + ) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, SparkOfflineStoreConfig) + table, columns, _ = _spark_mon_table_meta(metric_type) + spark_session = get_spark_session_or_start_new_with_repoconfig( + store_config=config.offline_store + ) + if not spark_session.catalog.tableExists(table): + return [] + + from pyspark.sql import functions as F + + df = spark_session.table(table).filter(F.col("project_id") == project) + if filters: + for key, value in filters.items(): + if value is not None: + df = df.filter(F.col(key) == value) + if start_date is not None: + df = df.filter(F.col("metric_date") >= F.lit(start_date)) + if end_date is not None: + df = df.filter(F.col("metric_date") <= F.lit(end_date)) + + rows = df.orderBy("metric_date").collect() + return _spark_rows_to_metric_dicts(rows, columns) + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, SparkOfflineStoreConfig) + spark_session = get_spark_session_or_start_new_with_repoconfig( + store_config=config.offline_store + ) + if not spark_session.catalog.tableExists(_SPARK_MON_FEATURE_TABLE): + return + + pdf = spark_session.table(_SPARK_MON_FEATURE_TABLE).toPandas() + mask = (pdf["project_id"] == project) & (pdf["is_baseline"] == True) # noqa: E712 + if feature_view_name is not None: + mask &= pdf["feature_view_name"] == feature_view_name + if feature_name is not None: + mask &= pdf["feature_name"] == feature_name + if data_source_type is not None: + mask &= pdf["data_source_type"] == data_source_type + + pdf.loc[mask, "is_baseline"] = False + spark_session.createDataFrame(pdf).write.mode("overwrite").saveAsTable( + _SPARK_MON_FEATURE_TABLE + ) + + +_SPARK_MON_FEATURE_TABLE = "feast_monitoring_feature_metrics" +_SPARK_MON_VIEW_TABLE = "feast_monitoring_feature_view_metrics" +_SPARK_MON_SERVICE_TABLE = "feast_monitoring_feature_service_metrics" + +_SPARK_MON_FEATURE_COLUMNS = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "feature_type", + "row_count", + "null_count", + "null_rate", + "mean", + "stddev", + "min_val", + "max_val", + "p50", + "p75", + "p90", + "p95", + "p99", + "histogram", +] +_SPARK_MON_FEATURE_PK = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", +] + +_SPARK_MON_VIEW_COLUMNS = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_row_count", + "total_features", + "features_with_nulls", + "avg_null_rate", + "max_null_rate", +] +_SPARK_MON_VIEW_PK = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", +] + +_SPARK_MON_SERVICE_COLUMNS = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_feature_views", + "total_features", + "avg_null_rate", + "max_null_rate", +] +_SPARK_MON_SERVICE_PK = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", +] + +_SPARK_MONITORING_DDL_STATEMENTS = [ + f""" +CREATE TABLE IF NOT EXISTS {_SPARK_MON_FEATURE_TABLE} ( + project_id STRING NOT NULL, + feature_view_name STRING NOT NULL, + feature_name STRING NOT NULL, + metric_date DATE NOT NULL, + granularity STRING NOT NULL, + data_source_type STRING NOT NULL, + computed_at TIMESTAMP NOT NULL, + is_baseline BOOLEAN NOT NULL, + feature_type STRING NOT NULL, + row_count BIGINT, + null_count BIGINT, + null_rate DOUBLE, + mean DOUBLE, + stddev DOUBLE, + min_val DOUBLE, + max_val DOUBLE, + p50 DOUBLE, + p75 DOUBLE, + p90 DOUBLE, + p95 DOUBLE, + p99 DOUBLE, + histogram STRING +) USING PARQUET +""", + f""" +CREATE TABLE IF NOT EXISTS {_SPARK_MON_VIEW_TABLE} ( + project_id STRING NOT NULL, + feature_view_name STRING NOT NULL, + metric_date DATE NOT NULL, + granularity STRING NOT NULL, + data_source_type STRING NOT NULL, + computed_at TIMESTAMP NOT NULL, + is_baseline BOOLEAN NOT NULL, + total_row_count BIGINT, + total_features INT, + features_with_nulls INT, + avg_null_rate DOUBLE, + max_null_rate DOUBLE +) USING PARQUET +""", + f""" +CREATE TABLE IF NOT EXISTS {_SPARK_MON_SERVICE_TABLE} ( + project_id STRING NOT NULL, + feature_service_name STRING NOT NULL, + metric_date DATE NOT NULL, + granularity STRING NOT NULL, + data_source_type STRING NOT NULL, + computed_at TIMESTAMP NOT NULL, + is_baseline BOOLEAN NOT NULL, + total_feature_views INT, + total_features INT, + avg_null_rate DOUBLE, + max_null_rate DOUBLE +) USING PARQUET +""", +] + +_SPARK_EMPTY_METRIC_TEMPLATE: Dict[str, Any] = { + "feature_name": "", + "feature_type": "categorical", + "row_count": 0, + "null_count": 0, + "null_rate": 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, +} + + +def _spark_mon_table_meta(metric_type: str): + if metric_type == "feature": + return ( + _SPARK_MON_FEATURE_TABLE, + _SPARK_MON_FEATURE_COLUMNS, + _SPARK_MON_FEATURE_PK, + ) + if metric_type == "feature_view": + return _SPARK_MON_VIEW_TABLE, _SPARK_MON_VIEW_COLUMNS, _SPARK_MON_VIEW_PK + if metric_type == "feature_service": + return ( + _SPARK_MON_SERVICE_TABLE, + _SPARK_MON_SERVICE_COLUMNS, + _SPARK_MON_SERVICE_PK, + ) + raise ValueError(f"Unknown metric_type '{metric_type}'") + + +def _spark_normalize_histogram_column(pdf: pd.DataFrame) -> pd.DataFrame: + if "histogram" not in pdf.columns: + return pdf + out = pdf.copy() + + def _ser(x: Any) -> Any: + if x is None: + return None + if isinstance(x, str): + return x + return json.dumps(x) + + out["histogram"] = out["histogram"].map(_ser) + return out + + +def _spark_pandas_upsert( + pdf_old: pd.DataFrame, + pdf_new: pd.DataFrame, + pk_columns: List[str], +) -> pd.DataFrame: + if pdf_old.empty: + return pdf_new + old_idx = pdf_old.set_index(pk_columns) + new_idx = pdf_new.set_index(pk_columns) + kept = old_idx.loc[~old_idx.index.isin(new_idx.index)] + kept_df = kept.reset_index() + return pd.concat([kept_df, pdf_new], ignore_index=True) + + +def _spark_opt_float(val: Any) -> Optional[float]: + return float(val) if val is not None else None + + +def _spark_sql_numeric_stats( + spark_session: SparkSession, + from_expression: str, + feature_names: List[str], + ts_clause: str, + histogram_bins: int, +) -> List[Dict[str, Any]]: + select_parts = ["COUNT(*)"] + for col in feature_names: + q = f"`{col}`" + c = f"CAST({q} AS DOUBLE)" + select_parts.extend( + [ + f"COUNT({q})", + f"AVG({c})", + f"STDDEV_SAMP({c})", + f"MIN({c})", + f"MAX({c})", + f"PERCENTILE_APPROX({c}, 0.50)", + f"PERCENTILE_APPROX({c}, 0.75)", + f"PERCENTILE_APPROX({c}, 0.90)", + f"PERCENTILE_APPROX({c}, 0.95)", + f"PERCENTILE_APPROX({c}, 0.99)", + ] + ) + + query = ( + f"SELECT {', '.join(select_parts)} " + f"FROM {from_expression} AS _src WHERE {ts_clause}" + ) + rows = spark_session.sql(query).collect() + if not rows or rows[0] is None: + return [ + {**_SPARK_EMPTY_METRIC_TEMPLATE, "feature_name": n} for n in feature_names + ] + + row = rows[0] + row_count = int(row[0] or 0) + results: List[Dict[str, Any]] = [] + + for i, col in enumerate(feature_names): + base = 1 + i * 10 + non_null = int(row[base] or 0) + null_count = row_count - non_null + + min_val = _spark_opt_float(row[base + 3]) + max_val = _spark_opt_float(row[base + 4]) + + result: Dict[str, Any] = { + "feature_name": col, + "feature_type": "numeric", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": _spark_opt_float(row[base + 1]), + "stddev": _spark_opt_float(row[base + 2]), + "min_val": min_val, + "max_val": max_val, + "p50": _spark_opt_float(row[base + 5]), + "p75": _spark_opt_float(row[base + 6]), + "p90": _spark_opt_float(row[base + 7]), + "p95": _spark_opt_float(row[base + 8]), + "p99": _spark_opt_float(row[base + 9]), + "histogram": None, + } + + if min_val is not None and max_val is not None and non_null > 0: + result["histogram"] = _spark_sql_numeric_histogram( + spark_session, + from_expression, + col, + ts_clause, + histogram_bins, + min_val, + max_val, + ) + + results.append(result) + + return results + + +def _spark_sql_numeric_histogram( + spark_session: SparkSession, + from_expression: str, + col_name: str, + ts_clause: str, + bins: int, + min_val: float, + max_val: float, +) -> Dict[str, Any]: + q_col = f"`{col_name}`" + + if min_val == max_val: + sql = ( + f"SELECT COUNT(*) FROM {from_expression} AS _src " + f"WHERE {q_col} IS NOT NULL AND {ts_clause}" + ) + cnt = int(spark_session.sql(sql).collect()[0][0] or 0) + return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0} + + bin_width = (max_val - min_val) / bins + cast_col = f"CAST({q_col} AS DOUBLE)" + inner = ( + f"CASE WHEN {min_val} = {max_val} THEN 1 " + f"ELSE LEAST(GREATEST(FLOOR(({cast_col} - {min_val}) / {bin_width}) + 1, 1), {bins}) " + f"END AS bucket" + ) + + query = ( + f"SELECT bucket, COUNT(*) AS cnt FROM (" + f" SELECT {inner} " + f" FROM {from_expression} AS _src " + f" WHERE {q_col} IS NOT NULL AND {ts_clause}" + f") AS _b WHERE bucket IS NOT NULL " + f"GROUP BY bucket ORDER BY bucket" + ) + hrows = spark_session.sql(query).collect() + counts = [0] * bins + for hr in hrows: + bucket = int(hr[0] or 0) + cnt = int(hr[1] or 0) + if 1 <= bucket <= bins: + counts[bucket - 1] = cnt + + bin_edges = [min_val + i * bin_width for i in range(bins + 1)] + return { + "bins": [float(b) for b in bin_edges], + "counts": counts, + "bin_width": float(bin_width), + } + + +def _spark_sql_categorical_stats( + spark_session: SparkSession, + from_expression: str, + col_name: str, + ts_clause: str, + top_n: int, +) -> Dict[str, Any]: + q_col = f"`{col_name}`" + + query = ( + f"WITH filtered AS (" + f" SELECT * FROM {from_expression} AS _src WHERE {ts_clause}" + f") " + f"SELECT " + f" (SELECT COUNT(*) FROM filtered) AS row_count, " + f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, " + f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered " + f" WHERE {q_col} IS NOT NULL) AS unique_count, " + f" CAST({q_col} AS STRING) AS value, COUNT(*) AS cnt " + f"FROM filtered WHERE {q_col} IS NOT NULL " + f"GROUP BY {q_col} ORDER BY cnt DESC LIMIT {int(top_n)}" + ) + + rows = spark_session.sql(query).collect() + if not rows: + return { + **_SPARK_EMPTY_METRIC_TEMPLATE, + "feature_name": col_name, + "feature_type": "categorical", + } + + row_count = int(rows[0][0] or 0) + null_count = int(rows[0][1] or 0) + unique_count = int(rows[0][2] or 0) + + top_entries = [{"value": r[3], "count": int(r[4] or 0)} for r in rows] + top_total = sum(e["count"] for e in top_entries) + other_count = (row_count - null_count) - top_total + + return { + "feature_name": col_name, + "feature_type": "categorical", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": { + "values": top_entries, + "other_count": max(other_count, 0), + "unique_count": unique_count, + }, + } + + +def _spark_rows_to_metric_dicts( + rows: List[Any], + columns: List[str], +) -> List[Dict[str, Any]]: + from datetime import date as date_type + from datetime import datetime as datetime_type + + out: List[Dict[str, Any]] = [] + for r in rows: + d = r.asDict() + rec = {c: d.get(c) for c in columns} + h = rec.get("histogram") + if isinstance(h, str): + try: + rec["histogram"] = json.loads(h) + except json.JSONDecodeError: + pass + md = rec.get("metric_date") + if isinstance(md, date_type): + rec["metric_date"] = md.isoformat() + ca = rec.get("computed_at") + if isinstance(ca, datetime_type): + rec["computed_at"] = ca.isoformat() + out.append(rec) + return out + class SparkRetrievalJob(RetrievalJob): def __init__( diff --git a/sdk/python/feast/infra/offline_stores/dask.py b/sdk/python/feast/infra/offline_stores/dask.py index 3e640ce5af0..a9bc1908f54 100644 --- a/sdk/python/feast/infra/offline_stores/dask.py +++ b/sdk/python/feast/infra/offline_stores/dask.py @@ -1,15 +1,18 @@ +import json import os import uuid -from datetime import datetime, timezone +from datetime import date, datetime, timezone from pathlib import Path from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union import dask import dask.dataframe as dd +import numpy as np import pandas as pd import pyarrow +import pyarrow.compute as pc import pyarrow.dataset -import pyarrow.parquet +import pyarrow.parquet as pq import pytz from feast.data_source import DataSource @@ -582,6 +585,466 @@ def offline_write_batch( writer.write_table(new_table) writer.close() + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, DaskOfflineStoreConfig) + assert isinstance(data_source, FileSource) + + table = _dask_read_batch_arrow(data_source, config.repo_path) + table = _dask_filter_arrow_by_timestamp( + table, timestamp_field, start_date, end_date + ) + + results: List[Dict[str, Any]] = [] + for name, ftype in feature_columns: + if name not in table.column_names: + continue + col = table[name] + if ftype == "numeric": + m = _dask_compute_numeric_metrics(col, histogram_bins) + elif ftype == "categorical": + m = _dask_compute_categorical_metrics(col, top_n) + else: + continue + m["feature_name"] = name + results.append(m) + return results + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, DaskOfflineStoreConfig) + assert isinstance(data_source, FileSource) + + absolute_path = FileSource.get_uri_for_file_path( + repo_path=config.repo_path, + uri=data_source.file_options.uri, + ) + filesystem, path = FileSource.create_filesystem_and_path( + str(absolute_path), data_source.file_options.s3_endpoint_override + ) + try: + t = pq.read_table(path, filesystem=filesystem, columns=[timestamp_field]) + except Exception: + return None + if t.num_rows == 0: + return None + arr = t[timestamp_field] + mx = pc.max(arr) + val = mx.as_py() + if val is None: + return None + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + if isinstance(val, date): + return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc) + return None + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, DaskOfflineStoreConfig) + base = os.path.join(_dask_monitoring_base(config), _DASK_MON_DIR) + os.makedirs(base, exist_ok=True) + + tables = [ + (_DASK_FEATURE_METRICS_FILE, _DASK_MON_FEATURE_COLUMNS), + (_DASK_VIEW_METRICS_FILE, _DASK_MON_VIEW_COLUMNS), + (_DASK_SERVICE_METRICS_FILE, _DASK_MON_SERVICE_COLUMNS), + ] + for fname, columns in tables: + fpath = _dask_monitoring_path(config, fname) + if not os.path.isfile(fpath): + os.makedirs(os.path.dirname(fpath), exist_ok=True) + pd.DataFrame(columns=columns).to_parquet(fpath, index=False) + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, DaskOfflineStoreConfig) + + fname, columns, pk = _dask_mon_table_meta(metric_type) + path = _dask_monitoring_path(config, fname) + _dask_parquet_upsert(path, columns, pk, metrics) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, DaskOfflineStoreConfig) + + fname, columns, _ = _dask_mon_table_meta(metric_type) + path = _dask_monitoring_path(config, fname) + return _dask_parquet_query( + path, columns, project, filters, start_date, end_date + ) + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, DaskOfflineStoreConfig) + + path = _dask_monitoring_path(config, _DASK_FEATURE_METRICS_FILE) + tab = _dask_read_parquet_if_exists(path) + if tab is None or tab.num_rows == 0: + return + + df = tab.to_pandas() + mask = df["project_id"] == project + if feature_view_name is not None: + mask = mask & (df["feature_view_name"] == feature_view_name) + if feature_name is not None: + mask = mask & (df["feature_name"] == feature_name) + if data_source_type is not None: + mask = mask & (df["data_source_type"] == data_source_type) + mask = mask & (df["is_baseline"].isin([True, 1])) + df.loc[mask, "is_baseline"] = False + pq.write_table(pyarrow.Table.from_pandas(df, preserve_index=False), path) + + +_DASK_MON_DIR = "feast_monitoring" +_DASK_FEATURE_METRICS_FILE = "feature_metrics.parquet" +_DASK_VIEW_METRICS_FILE = "feature_view_metrics.parquet" +_DASK_SERVICE_METRICS_FILE = "feature_service_metrics.parquet" + +_DASK_MON_FEATURE_COLUMNS = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "feature_type", + "row_count", + "null_count", + "null_rate", + "mean", + "stddev", + "min_val", + "max_val", + "p50", + "p75", + "p90", + "p95", + "p99", + "histogram", +] +_DASK_MON_FEATURE_PK = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", +] + +_DASK_MON_VIEW_COLUMNS = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_row_count", + "total_features", + "features_with_nulls", + "avg_null_rate", + "max_null_rate", +] +_DASK_MON_VIEW_PK = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", +] + +_DASK_MON_SERVICE_COLUMNS = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_feature_views", + "total_features", + "avg_null_rate", + "max_null_rate", +] +_DASK_MON_SERVICE_PK = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", +] + + +def _dask_monitoring_base(config: RepoConfig) -> str: + base = config.repo_path + return str(base) if base else "." + + +def _dask_monitoring_path(config: RepoConfig, filename: str) -> str: + return os.path.join(_dask_monitoring_base(config), _DASK_MON_DIR, filename) + + +def _dask_mon_table_meta(metric_type: str): + if metric_type == "feature": + return ( + _DASK_FEATURE_METRICS_FILE, + _DASK_MON_FEATURE_COLUMNS, + _DASK_MON_FEATURE_PK, + ) + if metric_type == "feature_view": + return _DASK_VIEW_METRICS_FILE, _DASK_MON_VIEW_COLUMNS, _DASK_MON_VIEW_PK + if metric_type == "feature_service": + return ( + _DASK_SERVICE_METRICS_FILE, + _DASK_MON_SERVICE_COLUMNS, + _DASK_MON_SERVICE_PK, + ) + raise ValueError(f"Unknown metric_type '{metric_type}'") + + +def _dask_read_parquet_if_exists(path: str) -> Optional[pyarrow.Table]: + if not os.path.isfile(path): + return None + return pq.read_table(path) + + +def _dask_read_batch_arrow( + data_source: FileSource, repo_path: Optional[Path] +) -> pyarrow.Table: + absolute_path = FileSource.get_uri_for_file_path( + repo_path=repo_path, + uri=data_source.file_options.uri, + ) + filesystem, path = FileSource.create_filesystem_and_path( + str(absolute_path), data_source.file_options.s3_endpoint_override + ) + return pq.read_table(path, filesystem=filesystem) + + +def _dask_filter_arrow_by_timestamp( + table: pyarrow.Table, + timestamp_field: str, + start_date: Optional[datetime], + end_date: Optional[datetime], +) -> pyarrow.Table: + if start_date is None and end_date is None: + return table + arr = table[timestamp_field] + mask = None + if start_date is not None: + mask = pc.greater_equal(arr, pyarrow.scalar(start_date)) + if end_date is not None: + m2 = pc.less_equal(arr, pyarrow.scalar(end_date)) + mask = m2 if mask is None else pc.and_(mask, m2) + return table.filter(mask) + + +def _dask_compute_numeric_metrics( + column: pyarrow.ChunkedArray, histogram_bins: int +) -> Dict[str, Any]: + total = column.length + null_count = column.null_count + result: Dict[str, Any] = { + "feature_type": "numeric", + "row_count": total, + "null_count": null_count, + "null_rate": null_count / total if total > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, + } + + valid = pc.drop_null(column) + if len(valid) == 0: + return result + + float_array = pc.cast(valid, pyarrow.float64()) + result["mean"] = pc.mean(float_array).as_py() + result["stddev"] = pc.stddev(float_array, ddof=1).as_py() + + min_max = pc.min_max(float_array) + result["min_val"] = min_max["min"].as_py() + result["max_val"] = min_max["max"].as_py() + + quantiles = pc.quantile(float_array, q=[0.50, 0.75, 0.90, 0.95, 0.99]) + q_values = quantiles.to_pylist() + result["p50"] = q_values[0] + result["p75"] = q_values[1] + result["p90"] = q_values[2] + result["p95"] = q_values[3] + result["p99"] = q_values[4] + + np_array = float_array.to_numpy() + counts, bin_edges = np.histogram(np_array, bins=histogram_bins) + result["histogram"] = { + "bins": bin_edges.tolist(), + "counts": counts.tolist(), + "bin_width": float(bin_edges[1] - bin_edges[0]) if len(bin_edges) > 1 else 0, + } + + return result + + +def _dask_compute_categorical_metrics( + column: pyarrow.ChunkedArray, top_n: int +) -> Dict[str, Any]: + total = column.length + null_count = column.null_count + result: Dict[str, Any] = { + "feature_type": "categorical", + "row_count": total, + "null_count": null_count, + "null_rate": null_count / total if total > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, + } + + valid = pc.drop_null(column) + if len(valid) == 0: + return result + + value_counts = pc.value_counts(valid) + entries = [ + {"value": vc["values"].as_py(), "count": vc["counts"].as_py()} + for vc in value_counts + ] + entries.sort(key=lambda x: x["count"], reverse=True) + + unique_count = len(entries) + top_entries = entries[:top_n] + other_count = sum(e["count"] for e in entries[top_n:]) + + result["histogram"] = { + "values": top_entries, + "other_count": other_count, + "unique_count": unique_count, + } + + return result + + +def _dask_parquet_upsert( + path: str, + columns: List[str], + pk_cols: List[str], + new_rows: List[Dict[str, Any]], +) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) + + prepared: List[Dict[str, Any]] = [] + for row in new_rows: + r = dict(row) + if ( + "histogram" in r + and r["histogram"] is not None + and not isinstance(r["histogram"], str) + ): + r["histogram"] = json.dumps(r["histogram"]) + prepared.append(r) + + new_df = pd.DataFrame(prepared, columns=columns) + existing = _dask_read_parquet_if_exists(path) + if existing is not None: + old_df = existing.to_pandas() + combined = pd.concat([old_df, new_df], ignore_index=True) + else: + combined = new_df + + combined = combined.drop_duplicates(subset=pk_cols, keep="last") + table = pyarrow.Table.from_pandas(combined, preserve_index=False) + pq.write_table(table, path) + + +def _dask_parquet_query( + path: str, + columns: List[str], + project: str, + filters: Optional[Dict[str, Any]], + start_date: Optional[date], + end_date: Optional[date], +) -> List[Dict[str, Any]]: + tab = _dask_read_parquet_if_exists(path) + if tab is None or tab.num_rows == 0: + return [] + + df = tab.to_pandas() + df = df[df["project_id"] == project] + if filters: + for key, value in filters.items(): + if value is not None: + df = df[df[key] == value] + if start_date is not None: + df = df[df["metric_date"] >= start_date] + if end_date is not None: + df = df[df["metric_date"] <= end_date] + df = df.sort_values("metric_date", ascending=True) + + results = [] + for _, row in df.iterrows(): + record = {c: row.get(c) for c in columns} + if "histogram" in record and isinstance(record["histogram"], str): + try: + record["histogram"] = json.loads(record["histogram"]) + except json.JSONDecodeError: + pass + if "metric_date" in record and hasattr(record["metric_date"], "isoformat"): + record["metric_date"] = record["metric_date"].isoformat() + if "computed_at" in record and hasattr(record["computed_at"], "isoformat"): + record["computed_at"] = record["computed_at"].isoformat() + results.append(record) + + return results + def _get_entity_df_event_timestamp_range( entity_df: Union[pd.DataFrame, str], diff --git a/sdk/python/feast/infra/offline_stores/duckdb.py b/sdk/python/feast/infra/offline_stores/duckdb.py index e0a69e53c56..3f848e2870e 100644 --- a/sdk/python/feast/infra/offline_stores/duckdb.py +++ b/sdk/python/feast/infra/offline_stores/duckdb.py @@ -1,11 +1,15 @@ +import json import os -from datetime import datetime +from datetime import date, datetime, timezone from pathlib import Path -from typing import Any, Callable, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union +import duckdb import ibis import pandas as pd import pyarrow +import pyarrow as pa +import pyarrow.parquet as pq from ibis.expr.types import Table from pydantic import StrictStr @@ -23,6 +27,7 @@ write_logged_features_ibis, ) from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob +from feast.infra.offline_stores.offline_utils import get_timestamp_filter_sql from feast.infra.registry.base_registry import BaseRegistry from feast.repo_config import FeastConfigBaseModel, RepoConfig @@ -113,6 +118,427 @@ def _write_data_source( ) +# ------------------------------------------------------------------ # +# DuckDB monitoring (Parquet-backed) +# ------------------------------------------------------------------ # + +MONITORING_DIR = "feast_monitoring" +FEATURE_METRICS_FILE = "feature_metrics.parquet" +VIEW_METRICS_FILE = "feature_view_metrics.parquet" +SERVICE_METRICS_FILE = "feature_service_metrics.parquet" + +_MON_FEATURE_COLUMNS = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "feature_type", + "row_count", + "null_count", + "null_rate", + "mean", + "stddev", + "min_val", + "max_val", + "p50", + "p75", + "p90", + "p95", + "p99", + "histogram", +] +_MON_FEATURE_PK = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", +] + +_MON_VIEW_COLUMNS = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_row_count", + "total_features", + "features_with_nulls", + "avg_null_rate", + "max_null_rate", +] +_MON_VIEW_PK = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", +] + +_MON_SERVICE_COLUMNS = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_feature_views", + "total_features", + "avg_null_rate", + "max_null_rate", +] +_MON_SERVICE_PK = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", +] + + +def _duckdb_monitoring_base(config: RepoConfig) -> str: + base = config.repo_path + return str(base) if base else "." + + +def _duckdb_monitoring_path(config: RepoConfig, filename: str) -> str: + return os.path.join(_duckdb_monitoring_base(config), MONITORING_DIR, filename) + + +def _duckdb_parquet_from_expression(config: RepoConfig, data_source: FileSource) -> str: + absolute_path = FileSource.get_uri_for_file_path( + repo_path=_duckdb_monitoring_base(config), + uri=data_source.file_options.uri, + ) + return str(absolute_path).replace("'", "''") + + +def _duckdb_quote_ident(name: str) -> str: + return f'"{name}"' + + +def _duckdb_ts_where(ts_filter: str) -> str: + return f"({ts_filter})" if (ts_filter and ts_filter.strip()) else "1=1" + + +_EMPTY_METRIC_TEMPLATE: Dict[str, Any] = { + "feature_type": "numeric", + "row_count": 0, + "null_count": 0, + "null_rate": 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, +} + + +def _duckdb_opt_float(val: Any) -> Optional[float]: + return float(val) if val is not None else None + + +def _duckdb_numeric_stats( + conn: duckdb.DuckDBPyConnection, + from_expr: str, + feature_names: List[str], + ts_filter: str, + histogram_bins: int, +) -> List[Dict[str, Any]]: + select_parts = ["COUNT(*)"] + for col in feature_names: + q = _duckdb_quote_ident(col) + c = f"CAST({q} AS DOUBLE)" + select_parts.extend( + [ + f"COUNT({q})", + f"AVG({c})", + f"STDDEV_SAMP({c})", + f"MIN({c})", + f"MAX({c})", + f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})", + ] + ) + + tw = _duckdb_ts_where(ts_filter) + query = f"SELECT {', '.join(select_parts)} FROM {from_expr} AS _src WHERE {tw}" + row = conn.execute(query).fetchone() + + if row is None: + return [{**_EMPTY_METRIC_TEMPLATE, "feature_name": n} for n in feature_names] + + row_count = row[0] + results: List[Dict[str, Any]] = [] + + for i, col in enumerate(feature_names): + base = 1 + i * 10 + non_null = row[base] or 0 + null_count = row_count - non_null + + min_val = _duckdb_opt_float(row[base + 3]) + max_val = _duckdb_opt_float(row[base + 4]) + + result: Dict[str, Any] = { + "feature_name": col, + "feature_type": "numeric", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": _duckdb_opt_float(row[base + 1]), + "stddev": _duckdb_opt_float(row[base + 2]), + "min_val": min_val, + "max_val": max_val, + "p50": _duckdb_opt_float(row[base + 5]), + "p75": _duckdb_opt_float(row[base + 6]), + "p90": _duckdb_opt_float(row[base + 7]), + "p95": _duckdb_opt_float(row[base + 8]), + "p99": _duckdb_opt_float(row[base + 9]), + "histogram": None, + } + + if min_val is not None and max_val is not None and non_null > 0: + result["histogram"] = _duckdb_numeric_histogram( + conn, + from_expr, + col, + ts_filter, + histogram_bins, + min_val, + max_val, + ) + + results.append(result) + + return results + + +def _duckdb_numeric_histogram( + conn: duckdb.DuckDBPyConnection, + from_expr: str, + col_name: str, + ts_filter: str, + bins: int, + min_val: float, + max_val: float, +) -> Dict[str, Any]: + q_col = _duckdb_quote_ident(col_name) + + tw = _duckdb_ts_where(ts_filter) + if min_val == max_val: + cnt = conn.execute( + f"SELECT COUNT(*) FROM {from_expr} AS _src " + f"WHERE {q_col} IS NOT NULL AND {tw}" + ).fetchone()[0] + return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0} + + upper = max_val + (max_val - min_val) * 1e-10 + bin_width = (max_val - min_val) / bins + + query = ( + f"SELECT width_bucket(CAST({q_col} AS DOUBLE), {min_val}, {upper}, {bins}) AS bucket, " + f"COUNT(*) AS cnt " + f"FROM {from_expr} AS _src " + f"WHERE {q_col} IS NOT NULL AND {tw} " + f"GROUP BY bucket ORDER BY bucket" + ) + rows = conn.execute(query).fetchall() + + counts = [0] * bins + for bucket, cnt in rows: + b = int(bucket) + if 1 <= b <= bins: + counts[b - 1] = cnt + + bin_edges = [min_val + i * bin_width for i in range(bins + 1)] + return { + "bins": [float(b) for b in bin_edges], + "counts": counts, + "bin_width": float(bin_width), + } + + +def _duckdb_categorical_stats( + conn: duckdb.DuckDBPyConnection, + from_expr: str, + col_name: str, + ts_filter: str, + top_n: int, +) -> Dict[str, Any]: + q_col = _duckdb_quote_ident(col_name) + + tw = _duckdb_ts_where(ts_filter) + query = ( + f"WITH filtered AS (" + f" SELECT * FROM {from_expr} AS _src WHERE {tw}" + f") " + f"SELECT " + f" (SELECT COUNT(*) FROM filtered) AS row_count, " + f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, " + f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered " + f" WHERE {q_col} IS NOT NULL) AS unique_count, " + f" CAST({q_col} AS VARCHAR) AS value, COUNT(*) AS cnt " + f"FROM filtered WHERE {q_col} IS NOT NULL " + f"GROUP BY {q_col} " + f"ORDER BY cnt DESC LIMIT {int(top_n)}" + ) + + rows = conn.execute(query).fetchall() + + if not rows: + return { + **_EMPTY_METRIC_TEMPLATE, + "feature_name": col_name, + "feature_type": "categorical", + } + + row_count = rows[0][0] + null_count = rows[0][1] + unique_count = rows[0][2] + + top_entries = [{"value": r[3], "count": r[4]} for r in rows] + top_total = sum(e["count"] for e in top_entries) + other_count = (row_count - null_count) - top_total + + return { + "feature_name": col_name, + "feature_type": "categorical", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": { + "values": top_entries, + "other_count": max(other_count, 0), + "unique_count": unique_count, + }, + } + + +def _duckdb_mon_table_meta(metric_type: str): + if metric_type == "feature": + return FEATURE_METRICS_FILE, _MON_FEATURE_COLUMNS, _MON_FEATURE_PK + if metric_type == "feature_view": + return VIEW_METRICS_FILE, _MON_VIEW_COLUMNS, _MON_VIEW_PK + if metric_type == "feature_service": + return SERVICE_METRICS_FILE, _MON_SERVICE_COLUMNS, _MON_SERVICE_PK + raise ValueError(f"Unknown metric_type '{metric_type}'") + + +def _duckdb_read_parquet_if_exists(path: str) -> Optional[pa.Table]: + if not os.path.isfile(path): + return None + return pq.read_table(path) + + +def _duckdb_parquet_upsert( + path: str, + columns: List[str], + pk_cols: List[str], + new_rows: List[Dict[str, Any]], +) -> None: + os.makedirs(os.path.dirname(path), exist_ok=True) + + prepared: List[Dict[str, Any]] = [] + for row in new_rows: + r = dict(row) + if ( + "histogram" in r + and r["histogram"] is not None + and not isinstance(r["histogram"], str) + ): + r["histogram"] = json.dumps(r["histogram"]) + prepared.append(r) + + new_df = pd.DataFrame(prepared, columns=columns) + existing = _duckdb_read_parquet_if_exists(path) + if existing is not None: + old_df = existing.to_pandas() + combined = pd.concat([old_df, new_df], ignore_index=True) + else: + combined = new_df + + combined = combined.drop_duplicates(subset=pk_cols, keep="last") + table = pa.Table.from_pandas(combined, preserve_index=False) + pq.write_table(table, path) + + +def _duckdb_parquet_query( + path: str, + columns: List[str], + project: str, + filters: Optional[Dict[str, Any]], + start_date: Optional[date], + end_date: Optional[date], +) -> List[Dict[str, Any]]: + tab = _duckdb_read_parquet_if_exists(path) + if tab is None or tab.num_rows == 0: + return [] + + df = tab.to_pandas() + df = df[df["project_id"] == project] + if filters: + for key, value in filters.items(): + if value is not None: + df = df[df[key] == value] + if start_date is not None: + df = df[df["metric_date"] >= start_date] + if end_date is not None: + df = df[df["metric_date"] <= end_date] + df = df.sort_values("metric_date", ascending=True) + + results = [] + for _, row in df.iterrows(): + record = {c: row.get(c) for c in columns} + if "histogram" in record and isinstance(record["histogram"], str): + try: + record["histogram"] = json.loads(record["histogram"]) + except json.JSONDecodeError: + pass + if "metric_date" in record and hasattr(record["metric_date"], "isoformat"): + record["metric_date"] = record["metric_date"].isoformat() + if "computed_at" in record and hasattr(record["computed_at"], "isoformat"): + record["computed_at"] = record["computed_at"].isoformat() + results.append(record) + + return results + + +def _duckdb_sql_from_expression(config: RepoConfig, data_source: FileSource) -> str: + p = _duckdb_parquet_from_expression(config, data_source) + if isinstance(data_source.file_format, ParquetFormat): + return f"read_parquet('{p}')" + if isinstance(data_source.file_format, DeltaFormat): + return f"delta_scan('{p}')" + raise NotImplementedError( + "DuckDB monitoring compute supports Parquet and Delta file sources only." + ) + + class DuckDBOfflineStoreConfig(FeastConfigBaseModel): type: StrictStr = "duckdb" # """ Offline store type selector""" @@ -229,3 +655,156 @@ def write_logged_features( logging_config=logging_config, registry=registry, ) + + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, DuckDBOfflineStoreConfig) + assert isinstance(data_source, FileSource) + + from_expr = _duckdb_sql_from_expression(config, data_source) + ts_filter = get_timestamp_filter_sql( + start_date, + end_date, + timestamp_field, + tz=timezone.utc, + cast_style="timestamp", + date_time_separator=" ", + ) + + numeric_features = [n for n, t in feature_columns if t == "numeric"] + categorical_features = [n for n, t in feature_columns if t == "categorical"] + results: List[Dict[str, Any]] = [] + + conn = duckdb.connect() + if numeric_features: + results.extend( + _duckdb_numeric_stats( + conn, + from_expr, + numeric_features, + ts_filter, + histogram_bins, + ) + ) + for col_name in categorical_features: + results.append( + _duckdb_categorical_stats( + conn, + from_expr, + col_name, + ts_filter, + top_n, + ) + ) + conn.close() + return results + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, DuckDBOfflineStoreConfig) + assert isinstance(data_source, FileSource) + + from_expr = _duckdb_sql_from_expression(config, data_source) + ts_col = _duckdb_quote_ident(timestamp_field) + conn = duckdb.connect() + row = conn.execute( + f"SELECT MAX({ts_col}) AS m FROM {from_expr} AS _src" + ).fetchone() + conn.close() + + if row is None or row[0] is None: + return None + val = row[0] + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + if isinstance(val, date): + return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc) + return None + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, DuckDBOfflineStoreConfig) + base = os.path.join(_duckdb_monitoring_base(config), MONITORING_DIR) + os.makedirs(base, exist_ok=True) + + tables = [ + (FEATURE_METRICS_FILE, _MON_FEATURE_COLUMNS), + (VIEW_METRICS_FILE, _MON_VIEW_COLUMNS), + (SERVICE_METRICS_FILE, _MON_SERVICE_COLUMNS), + ] + for fname, columns in tables: + path = _duckdb_monitoring_path(config, fname) + if not os.path.isfile(path): + os.makedirs(os.path.dirname(path), exist_ok=True) + pd.DataFrame(columns=columns).to_parquet(path, index=False) + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, DuckDBOfflineStoreConfig) + + fname, columns, pk = _duckdb_mon_table_meta(metric_type) + path = _duckdb_monitoring_path(config, fname) + _duckdb_parquet_upsert(path, columns, pk, metrics) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, DuckDBOfflineStoreConfig) + + fname, columns, _ = _duckdb_mon_table_meta(metric_type) + path = _duckdb_monitoring_path(config, fname) + return _duckdb_parquet_query( + path, columns, project, filters, start_date, end_date + ) + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, DuckDBOfflineStoreConfig) + + path = _duckdb_monitoring_path(config, FEATURE_METRICS_FILE) + tab = _duckdb_read_parquet_if_exists(path) + if tab is None or tab.num_rows == 0: + return + + df = tab.to_pandas() + mask = df["project_id"] == project + if feature_view_name is not None: + mask = mask & (df["feature_view_name"] == feature_view_name) + if feature_name is not None: + mask = mask & (df["feature_name"] == feature_name) + if data_source_type is not None: + mask = mask & (df["data_source_type"] == data_source_type) + mask = mask & (df["is_baseline"].isin([True, 1])) + df.loc[mask, "is_baseline"] = False + pq.write_table(pa.Table.from_pandas(df, preserve_index=False), path) diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py index 900dfcfab80..3f7ab94aecd 100644 --- a/sdk/python/feast/infra/offline_stores/redshift.py +++ b/sdk/python/feast/infra/offline_stores/redshift.py @@ -1,6 +1,7 @@ import contextlib +import json import uuid -from datetime import datetime, timezone +from datetime import date, datetime, timezone from pathlib import Path from typing import ( Any, @@ -378,6 +379,690 @@ def offline_write_batch( fail_if_exists=False, ) + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) + assert isinstance(data_source, RedshiftSource) + + from_expression = data_source.get_table_query_string() + ts_filter = get_timestamp_filter_sql( + start_date, + end_date, + timestamp_field, + tz=timezone.utc, + ) + ts_clause = ts_filter if ts_filter else "1=1" + + numeric_features = [n for n, t in feature_columns if t == "numeric"] + categorical_features = [n for n, t in feature_columns if t == "categorical"] + results: List[Dict[str, Any]] = [] + + if numeric_features: + results.extend( + _redshift_sql_numeric_stats( + config, + from_expression, + numeric_features, + ts_clause, + histogram_bins, + ) + ) + + for col_name in categorical_features: + results.append( + _redshift_sql_categorical_stats( + config, from_expression, col_name, ts_clause, top_n + ) + ) + + return results + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) + assert isinstance(data_source, RedshiftSource) + + from_expression = data_source.get_table_query_string() + q_ts = f'"{timestamp_field}"' + sql = f"SELECT MAX({q_ts}) AS max_ts FROM {from_expression} AS _src" + rows = _redshift_execute_fetch_rows(config, sql) + if not rows or not rows[0]: + return None + val = _redshift_field_value(rows[0][0]) + if val is None: + return None + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + return parser.parse(str(val)) + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) + for stmt in _REDSHIFT_MONITORING_DDL_STATEMENTS: + _redshift_execute_statement(config, stmt) + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) + table, columns, pk_columns = _redshift_mon_table_meta(metric_type) + for row in metrics: + _redshift_merge_metric_row(config, table, columns, pk_columns, row) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) + _, columns, _ = _redshift_mon_table_meta(metric_type) + return _redshift_mon_query( + config, metric_type, columns, project, filters, start_date, end_date + ) + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) + parts = [ + f"project_id = {_redshift_sql_literal(project)}", + "is_baseline = TRUE", + ] + if feature_view_name is not None: + parts.append( + f"feature_view_name = {_redshift_sql_literal(feature_view_name)}" + ) + if feature_name is not None: + parts.append(f"feature_name = {_redshift_sql_literal(feature_name)}") + if data_source_type is not None: + parts.append( + f"data_source_type = {_redshift_sql_literal(data_source_type)}" + ) + where_sql = " AND ".join(parts) + sql = f"UPDATE {_MON_FEATURE_TABLE} SET is_baseline = FALSE WHERE {where_sql}" + _redshift_execute_statement(config, sql) + + +_MON_FEATURE_TABLE = "feast_monitoring_feature_metrics" +_MON_VIEW_TABLE = "feast_monitoring_feature_view_metrics" +_MON_SERVICE_TABLE = "feast_monitoring_feature_service_metrics" + +_MON_FEATURE_COLUMNS = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "feature_type", + "row_count", + "null_count", + "null_rate", + "mean", + "stddev", + "min_val", + "max_val", + "p50", + "p75", + "p90", + "p95", + "p99", + "histogram", +] +_MON_FEATURE_PK = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", +] + +_MON_VIEW_COLUMNS = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_row_count", + "total_features", + "features_with_nulls", + "avg_null_rate", + "max_null_rate", +] +_MON_VIEW_PK = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", +] + +_MON_SERVICE_COLUMNS = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_feature_views", + "total_features", + "avg_null_rate", + "max_null_rate", +] +_MON_SERVICE_PK = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", +] + +_REDSHIFT_MONITORING_DDL_STATEMENTS = [ + f""" +CREATE TABLE IF NOT EXISTS {_MON_FEATURE_TABLE} ( + project_id VARCHAR(255) NOT NULL, + feature_view_name VARCHAR(255) NOT NULL, + feature_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + feature_type VARCHAR(50) NOT NULL, + row_count BIGINT, + null_count BIGINT, + null_rate DOUBLE PRECISION, + mean DOUBLE PRECISION, + stddev DOUBLE PRECISION, + min_val DOUBLE PRECISION, + max_val DOUBLE PRECISION, + p50 DOUBLE PRECISION, + p75 DOUBLE PRECISION, + p90 DOUBLE PRECISION, + p95 DOUBLE PRECISION, + p99 DOUBLE PRECISION, + histogram VARCHAR(65535), + PRIMARY KEY (project_id, feature_view_name, feature_name, + metric_date, granularity, data_source_type) +); +""", + f""" +CREATE TABLE IF NOT EXISTS {_MON_VIEW_TABLE} ( + project_id VARCHAR(255) NOT NULL, + feature_view_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + total_row_count BIGINT, + total_features INTEGER, + features_with_nulls INTEGER, + avg_null_rate DOUBLE PRECISION, + max_null_rate DOUBLE PRECISION, + PRIMARY KEY (project_id, feature_view_name, metric_date, + granularity, data_source_type) +); +""", + f""" +CREATE TABLE IF NOT EXISTS {_MON_SERVICE_TABLE} ( + project_id VARCHAR(255) NOT NULL, + feature_service_name VARCHAR(255) NOT NULL, + metric_date DATE NOT NULL, + granularity VARCHAR(20) NOT NULL DEFAULT 'daily', + data_source_type VARCHAR(50) NOT NULL DEFAULT 'batch', + computed_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, + is_baseline BOOLEAN NOT NULL DEFAULT FALSE, + total_feature_views INTEGER, + total_features INTEGER, + avg_null_rate DOUBLE PRECISION, + max_null_rate DOUBLE PRECISION, + PRIMARY KEY (project_id, feature_service_name, metric_date, + granularity, data_source_type) +); +""", +] + +_EMPTY_METRIC_TEMPLATE: Dict[str, Any] = { + "feature_name": "", + "feature_type": "categorical", + "row_count": 0, + "null_count": 0, + "null_rate": 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, +} + + +def _redshift_mon_table_meta(metric_type: str): + if metric_type == "feature": + return _MON_FEATURE_TABLE, _MON_FEATURE_COLUMNS, _MON_FEATURE_PK + if metric_type == "feature_view": + return _MON_VIEW_TABLE, _MON_VIEW_COLUMNS, _MON_VIEW_PK + if metric_type == "feature_service": + return _MON_SERVICE_TABLE, _MON_SERVICE_COLUMNS, _MON_SERVICE_PK + raise ValueError(f"Unknown metric_type '{metric_type}'") + + +def _redshift_execute_statement(config: RepoConfig, sql: str) -> str: + client = aws_utils.get_redshift_data_client(config.offline_store.region) + return aws_utils.execute_redshift_statement( + client, + config.offline_store.cluster_id, + config.offline_store.workgroup, + config.offline_store.database, + config.offline_store.user, + sql, + ) + + +def _redshift_get_statement_pages( + client: Any, statement_id: str +) -> Tuple[List[Dict[str, Any]], List[List[Dict[str, Any]]]]: + column_metadata: List[Dict[str, Any]] = [] + all_records: List[List[Dict[str, Any]]] = [] + next_token: Optional[str] = None + while True: + kwargs: Dict[str, Any] = {"Id": statement_id} + if next_token: + kwargs["NextToken"] = next_token + resp = client.get_statement_result(**kwargs) + if not column_metadata: + column_metadata = resp.get("ColumnMetadata", []) + all_records.extend(resp.get("Records", [])) + next_token = resp.get("NextToken") + if not next_token: + break + return column_metadata, all_records + + +def _redshift_execute_fetch_rows( + config: RepoConfig, sql: str +) -> List[List[Dict[str, Any]]]: + client = aws_utils.get_redshift_data_client(config.offline_store.region) + sid = aws_utils.execute_redshift_statement( + client, + config.offline_store.cluster_id, + config.offline_store.workgroup, + config.offline_store.database, + config.offline_store.user, + sql, + ) + _, records = _redshift_get_statement_pages(client, sid) + return records + + +def _redshift_field_value(field: Dict[str, Any]) -> Any: + if field.get("isNull"): + return None + if "stringValue" in field: + return field["stringValue"] + if "longValue" in field: + return field["longValue"] + if "doubleValue" in field: + return field["doubleValue"] + if "booleanValue" in field: + return field["booleanValue"] + return None + + +def _redshift_opt_float(val: Any) -> Optional[float]: + return float(val) if val is not None else None + + +def _redshift_sql_literal(val: Any) -> str: + if val is None: + return "NULL" + if isinstance(val, bool): + return "TRUE" if val else "FALSE" + if isinstance(val, (int, float)) and not isinstance(val, bool): + return str(val) + if isinstance(val, date) and not isinstance(val, datetime): + return f"DATE '{val.isoformat()}'" + if isinstance(val, datetime): + s = val.isoformat(sep=" ", timespec="seconds") + return f"TIMESTAMP '{s}'" + s = str(val).replace("'", "''") + return f"'{s}'" + + +def _redshift_merge_metric_row( + config: RepoConfig, + table: str, + columns: List[str], + pk_columns: List[str], + row: Dict[str, Any], +) -> None: + non_pk = [c for c in columns if c not in pk_columns] + client = aws_utils.get_redshift_data_client(config.offline_store.region) + + select_parts = ", ".join( + f"{_redshift_sql_literal_for_column(c, row.get(c))} AS {c}" for c in columns + ) + on_clause = " AND ".join(f"t.{c} = s.{c}" for c in pk_columns) + update_set = ", ".join(f"{c} = s.{c}" for c in non_pk) + insert_cols = ", ".join(columns) + insert_vals = ", ".join(f"s.{c}" for c in columns) + + merge_sql = f""" +MERGE INTO {table} AS t +USING (SELECT {select_parts}) AS s +ON {on_clause} +WHEN MATCHED THEN UPDATE SET {update_set} +WHEN NOT MATCHED THEN INSERT ({insert_cols}) VALUES ({insert_vals}) +""".strip() + aws_utils.execute_redshift_statement( + client, + config.offline_store.cluster_id, + config.offline_store.workgroup, + config.offline_store.database, + config.offline_store.user, + merge_sql, + ) + + +def _redshift_sql_literal_for_column(column: str, val: Any) -> str: + if val is None: + return "NULL" + if column == "histogram" and val is not None: + dumped = json.dumps(val).replace("'", "''") + return f"'{dumped}'" + return _redshift_sql_literal(val) + + +def _redshift_mon_query( + config: RepoConfig, + metric_type: str, + columns: List[str], + project: str, + filters: Optional[Dict[str, Any]], + start_date: Optional[date], + end_date: Optional[date], +) -> List[Dict[str, Any]]: + table, _, _ = _redshift_mon_table_meta(metric_type) + conditions = [f"project_id = {_redshift_sql_literal(project)}"] + if filters: + for key, value in filters.items(): + if value is not None: + conditions.append(f'"{key}" = {_redshift_sql_literal(value)}') + if start_date: + conditions.append(f"metric_date >= DATE '{start_date.isoformat()}'") + if end_date: + conditions.append(f"metric_date <= DATE '{end_date.isoformat()}'") + where_sql = " AND ".join(conditions) + col_sql = ", ".join(f'"{c}"' for c in columns) + sql = f'SELECT {col_sql} FROM "{table}" WHERE {where_sql} ORDER BY metric_date ASC' + + client = aws_utils.get_redshift_data_client(config.offline_store.region) + sid = aws_utils.execute_redshift_statement( + client, + config.offline_store.cluster_id, + config.offline_store.workgroup, + config.offline_store.database, + config.offline_store.user, + sql, + ) + meta, rows = _redshift_get_statement_pages(client, sid) + col_names = [c["name"] for c in meta] + out: List[Dict[str, Any]] = [] + for rec in rows: + record = {col_names[i]: _redshift_field_value(rec[i]) for i in range(len(rec))} + if "histogram" in record and isinstance(record["histogram"], str): + record["histogram"] = json.loads(record["histogram"]) + if "metric_date" in record and record["metric_date"] is not None: + md = record["metric_date"] + if isinstance(md, str): + record["metric_date"] = md[:10] + elif isinstance(md, date): + record["metric_date"] = md.isoformat() + if "computed_at" in record and record["computed_at"] is not None: + ca = record["computed_at"] + record["computed_at"] = ca if isinstance(ca, str) else str(ca) + out.append(record) + return out + + +def _redshift_sql_numeric_stats( + config: RepoConfig, + from_expression: str, + feature_names: List[str], + ts_clause: str, + histogram_bins: int, +) -> List[Dict[str, Any]]: + select_parts = ["COUNT(*)"] + for col in feature_names: + q = f'"{col}"' + c = f"CAST({q} AS DOUBLE PRECISION)" + select_parts.extend( + [ + f"COUNT({q})", + f"AVG({c})", + f"STDDEV_SAMP({c})", + f"MIN({c})", + f"MAX({c})", + f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})", + ] + ) + + query = ( + f"SELECT {', '.join(select_parts)} " + f"FROM {from_expression} AS _src WHERE {ts_clause}" + ) + rows = _redshift_execute_fetch_rows(config, query) + if not rows or not rows[0]: + return [{**_EMPTY_METRIC_TEMPLATE, "feature_name": n} for n in feature_names] + + row = rows[0] + row_count = int(_redshift_field_value(row[0]) or 0) + results: List[Dict[str, Any]] = [] + + for i, col in enumerate(feature_names): + base = 1 + i * 10 + non_null = int(_redshift_field_value(row[base]) or 0) + null_count = row_count - non_null + + min_val = _redshift_opt_float(_redshift_field_value(row[base + 3])) + max_val = _redshift_opt_float(_redshift_field_value(row[base + 4])) + + result: Dict[str, Any] = { + "feature_name": col, + "feature_type": "numeric", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": _redshift_opt_float(_redshift_field_value(row[base + 1])), + "stddev": _redshift_opt_float(_redshift_field_value(row[base + 2])), + "min_val": min_val, + "max_val": max_val, + "p50": _redshift_opt_float(_redshift_field_value(row[base + 5])), + "p75": _redshift_opt_float(_redshift_field_value(row[base + 6])), + "p90": _redshift_opt_float(_redshift_field_value(row[base + 7])), + "p95": _redshift_opt_float(_redshift_field_value(row[base + 8])), + "p99": _redshift_opt_float(_redshift_field_value(row[base + 9])), + "histogram": None, + } + + if min_val is not None and max_val is not None and non_null > 0: + result["histogram"] = _redshift_sql_numeric_histogram( + config, + from_expression, + col, + ts_clause, + histogram_bins, + min_val, + max_val, + ) + + results.append(result) + + return results + + +def _redshift_sql_numeric_histogram( + config: RepoConfig, + from_expression: str, + col_name: str, + ts_clause: str, + bins: int, + min_val: float, + max_val: float, +) -> Dict[str, Any]: + q_col = f'"{col_name}"' + + if min_val == max_val: + sql = ( + f"SELECT COUNT(*) FROM {from_expression} AS _src " + f"WHERE {q_col} IS NOT NULL AND {ts_clause}" + ) + r = _redshift_execute_fetch_rows(config, sql) + cnt = int(_redshift_field_value(r[0][0]) or 0) if r and r[0] else 0 + return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0} + + bin_width = (max_val - min_val) / bins + cast_col = f"CAST({q_col} AS DOUBLE PRECISION)" + + inner = ( + f"CASE WHEN {min_val} = {max_val} THEN 1 " + f"ELSE LEAST(GREATEST(FLOOR(({cast_col} - {min_val}) / {bin_width}) + 1, 1), {bins}) " + f"END AS bucket" + ) + + query = ( + f"SELECT bucket, COUNT(*) AS cnt FROM (" + f" SELECT {inner} " + f" FROM {from_expression} AS _src " + f" WHERE {q_col} IS NOT NULL AND {ts_clause}" + f") AS _b WHERE bucket IS NOT NULL " + f"GROUP BY bucket ORDER BY bucket" + ) + hrows = _redshift_execute_fetch_rows(config, query) + counts = [0] * bins + for hr in hrows: + bucket = int(_redshift_field_value(hr[0]) or 0) + cnt = int(_redshift_field_value(hr[1]) or 0) + if 1 <= bucket <= bins: + counts[bucket - 1] = cnt + + bin_edges = [min_val + i * bin_width for i in range(bins + 1)] + return { + "bins": [float(b) for b in bin_edges], + "counts": counts, + "bin_width": float(bin_width), + } + + +def _redshift_sql_categorical_stats( + config: RepoConfig, + from_expression: str, + col_name: str, + ts_clause: str, + top_n: int, +) -> Dict[str, Any]: + q_col = f'"{col_name}"' + + query = ( + f"WITH filtered AS (" + f" SELECT * FROM {from_expression} AS _src WHERE {ts_clause}" + f") " + f"SELECT " + f" (SELECT COUNT(*) FROM filtered) AS row_count, " + f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, " + f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered " + f" WHERE {q_col} IS NOT NULL) AS unique_count, " + f" CAST({q_col} AS VARCHAR(65535)) AS value, COUNT(*) AS cnt " + f"FROM filtered WHERE {q_col} IS NOT NULL " + f"GROUP BY {q_col} ORDER BY cnt DESC LIMIT {int(top_n)}" + ) + + rows = _redshift_execute_fetch_rows(config, query) + if not rows: + return { + **_EMPTY_METRIC_TEMPLATE, + "feature_name": col_name, + "feature_type": "categorical", + } + + row_count = int(_redshift_field_value(rows[0][0]) or 0) + null_count = int(_redshift_field_value(rows[0][1]) or 0) + unique_count = int(_redshift_field_value(rows[0][2]) or 0) + + top_entries = [ + { + "value": _redshift_field_value(r[3]), + "count": int(_redshift_field_value(r[4]) or 0), + } + for r in rows + ] + top_total = sum(e["count"] for e in top_entries) + other_count = (row_count - null_count) - top_total + + return { + "feature_name": col_name, + "feature_type": "categorical", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": { + "values": top_entries, + "other_count": max(other_count, 0), + "unique_count": unique_count, + }, + } + class RedshiftRetrievalJob(RetrievalJob): def __init__( diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py index 7226c908d13..7cae609ef12 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake.py +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -3,7 +3,8 @@ import os import uuid import warnings -from datetime import datetime, timezone +from datetime import date, datetime, timezone +from decimal import Decimal from functools import reduce from pathlib import Path from typing import ( @@ -421,6 +422,234 @@ def offline_write_batch( auto_create_table=True, ) + @staticmethod + def compute_monitoring_metrics( + config: RepoConfig, + data_source: DataSource, + feature_columns: List[Tuple[str, str]], + timestamp_field: str, + start_date: Optional[datetime] = None, + end_date: Optional[datetime] = None, + histogram_bins: int = 20, + top_n: int = 10, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + assert isinstance(data_source, SnowflakeSource) + + from_expression = data_source.get_table_query_string() + from_expression = _qualify_snowflake_from_expression( + config, data_source, from_expression + ) + ts_filter = get_timestamp_filter_sql( + start_date, end_date, timestamp_field, tz=timezone.utc + ) + + numeric_features = [n for n, t in feature_columns if t == "numeric"] + categorical_features = [n for n, t in feature_columns if t == "categorical"] + results: List[Dict[str, Any]] = [] + + with GetSnowflakeConnection(config.offline_store) as conn: + if numeric_features: + results.extend( + _snowflake_sql_numeric_stats( + conn, + from_expression, + numeric_features, + ts_filter, + histogram_bins, + ) + ) + + for col_name in categorical_features: + results.append( + _snowflake_sql_categorical_stats( + conn, from_expression, col_name, ts_filter, top_n + ) + ) + + return results + + @staticmethod + def get_monitoring_max_timestamp( + config: RepoConfig, + data_source: DataSource, + timestamp_field: str, + ) -> Optional[datetime]: + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + assert isinstance(data_source, SnowflakeSource) + + from_expression = data_source.get_table_query_string() + from_expression = _qualify_snowflake_from_expression( + config, data_source, from_expression + ) + + with GetSnowflakeConnection(config.offline_store) as conn: + cursor = execute_snowflake_statement( + conn, + f'SELECT MAX("{timestamp_field}") FROM {from_expression}', + ) + row = cursor.fetchone() + + if row is None or row[0] is None: + return None + val = row[0] + if isinstance(val, pd.Timestamp): + val = val.to_pydatetime() + if isinstance(val, datetime): + return val if val.tzinfo else val.replace(tzinfo=timezone.utc) + return datetime.combine(val, datetime.min.time(), tzinfo=timezone.utc) + + @staticmethod + def ensure_monitoring_tables(config: RepoConfig) -> None: + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + + fq_feature = _snowflake_monitoring_table_fqn( + config, _SNOWFLAKE_MON_FEATURE_TABLE + ) + fq_view = _snowflake_monitoring_table_fqn(config, _SNOWFLAKE_MON_VIEW_TABLE) + fq_service = _snowflake_monitoring_table_fqn( + config, _SNOWFLAKE_MON_SERVICE_TABLE + ) + + ddl_feature = f""" + CREATE TABLE IF NOT EXISTS {fq_feature} ( + "project_id" VARCHAR(255) NOT NULL, + "feature_view_name" VARCHAR(255) NOT NULL, + "feature_name" VARCHAR(255) NOT NULL, + "metric_date" DATE NOT NULL, + "granularity" VARCHAR(20) NOT NULL DEFAULT 'daily', + "data_source_type" VARCHAR(50) NOT NULL DEFAULT 'batch', + "computed_at" TIMESTAMP_TZ NOT NULL DEFAULT CURRENT_TIMESTAMP(), + "is_baseline" BOOLEAN NOT NULL DEFAULT FALSE, + "feature_type" VARCHAR(50) NOT NULL, + "row_count" BIGINT, + "null_count" BIGINT, + "null_rate" DOUBLE, + "mean" DOUBLE, + "stddev" DOUBLE, + "min_val" DOUBLE, + "max_val" DOUBLE, + "p50" DOUBLE, + "p75" DOUBLE, + "p90" DOUBLE, + "p95" DOUBLE, + "p99" DOUBLE, + "histogram" VARIANT, + PRIMARY KEY ("project_id", "feature_view_name", "feature_name", + "metric_date", "granularity", "data_source_type") + ) + """ + ddl_view = f""" + CREATE TABLE IF NOT EXISTS {fq_view} ( + "project_id" VARCHAR(255) NOT NULL, + "feature_view_name" VARCHAR(255) NOT NULL, + "metric_date" DATE NOT NULL, + "granularity" VARCHAR(20) NOT NULL DEFAULT 'daily', + "data_source_type" VARCHAR(50) NOT NULL DEFAULT 'batch', + "computed_at" TIMESTAMP_TZ NOT NULL DEFAULT CURRENT_TIMESTAMP(), + "is_baseline" BOOLEAN NOT NULL DEFAULT FALSE, + "total_row_count" BIGINT, + "total_features" INTEGER, + "features_with_nulls" INTEGER, + "avg_null_rate" DOUBLE, + "max_null_rate" DOUBLE, + PRIMARY KEY ("project_id", "feature_view_name", "metric_date", + "granularity", "data_source_type") + ) + """ + ddl_service = f""" + CREATE TABLE IF NOT EXISTS {fq_service} ( + "project_id" VARCHAR(255) NOT NULL, + "feature_service_name" VARCHAR(255) NOT NULL, + "metric_date" DATE NOT NULL, + "granularity" VARCHAR(20) NOT NULL DEFAULT 'daily', + "data_source_type" VARCHAR(50) NOT NULL DEFAULT 'batch', + "computed_at" TIMESTAMP_TZ NOT NULL DEFAULT CURRENT_TIMESTAMP(), + "is_baseline" BOOLEAN NOT NULL DEFAULT FALSE, + "total_feature_views" INTEGER, + "total_features" INTEGER, + "avg_null_rate" DOUBLE, + "max_null_rate" DOUBLE, + PRIMARY KEY ("project_id", "feature_service_name", "metric_date", + "granularity", "data_source_type") + ) + """ + + with GetSnowflakeConnection(config.offline_store) as conn: + execute_snowflake_statement(conn, ddl_feature) + execute_snowflake_statement(conn, ddl_view) + execute_snowflake_statement(conn, ddl_service) + + @staticmethod + def save_monitoring_metrics( + config: RepoConfig, + metric_type: str, + metrics: List[Dict[str, Any]], + ) -> None: + if not metrics: + return + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + + table, columns, pk_columns = _snowflake_mon_table_meta(metric_type) + _snowflake_mon_merge_upsert( + config.offline_store, table, columns, pk_columns, metrics + ) + + @staticmethod + def query_monitoring_metrics( + config: RepoConfig, + project: str, + metric_type: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + ) -> List[Dict[str, Any]]: + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + + _, columns, _ = _snowflake_mon_table_meta(metric_type) + return _snowflake_mon_query( + config.offline_store, + metric_type, + columns, + project, + filters, + start_date, + end_date, + ) + + @staticmethod + def clear_monitoring_baseline( + config: RepoConfig, + project: str, + feature_view_name: Optional[str] = None, + feature_name: Optional[str] = None, + data_source_type: Optional[str] = None, + ) -> None: + assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) + + fq_table = _snowflake_monitoring_table_fqn(config, _SNOWFLAKE_MON_FEATURE_TABLE) + conditions = [f'"project_id" = {_snowflake_sql_literal(project)}'] + if feature_view_name: + conditions.append( + f'"feature_view_name" = {_snowflake_sql_literal(feature_view_name)}' + ) + if feature_name: + conditions.append( + f'"feature_name" = {_snowflake_sql_literal(feature_name)}' + ) + if data_source_type: + conditions.append( + f'"data_source_type" = {_snowflake_sql_literal(data_source_type)}' + ) + conditions.append('"is_baseline" = TRUE') + + sql = f'UPDATE {fq_table} SET "is_baseline" = FALSE WHERE ' + " AND ".join( + conditions + ) + + with GetSnowflakeConnection(config.offline_store) as conn: + execute_snowflake_statement(conn, sql) + class SnowflakeRetrievalJob(RetrievalJob): def __init__( @@ -640,6 +869,480 @@ def _get_file_names_from_copy_into(self, cursor, native_export_path) -> List[str ] +# ------------------------------------------------------------------ # +# Snowflake monitoring SQL push-down & storage helpers +# ------------------------------------------------------------------ # + +_SNOWFLAKE_MON_FEATURE_TABLE = "feast_monitoring_feature_metrics" +_SNOWFLAKE_MON_VIEW_TABLE = "feast_monitoring_feature_view_metrics" +_SNOWFLAKE_MON_SERVICE_TABLE = "feast_monitoring_feature_service_metrics" + +_SNOWFLAKE_MON_FEATURE_COLUMNS = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "feature_type", + "row_count", + "null_count", + "null_rate", + "mean", + "stddev", + "min_val", + "max_val", + "p50", + "p75", + "p90", + "p95", + "p99", + "histogram", +] +_SNOWFLAKE_MON_FEATURE_PK = [ + "project_id", + "feature_view_name", + "feature_name", + "metric_date", + "granularity", + "data_source_type", +] + +_SNOWFLAKE_MON_VIEW_COLUMNS = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_row_count", + "total_features", + "features_with_nulls", + "avg_null_rate", + "max_null_rate", +] +_SNOWFLAKE_MON_VIEW_PK = [ + "project_id", + "feature_view_name", + "metric_date", + "granularity", + "data_source_type", +] + +_SNOWFLAKE_MON_SERVICE_COLUMNS = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", + "computed_at", + "is_baseline", + "total_feature_views", + "total_features", + "avg_null_rate", + "max_null_rate", +] +_SNOWFLAKE_MON_SERVICE_PK = [ + "project_id", + "feature_service_name", + "metric_date", + "granularity", + "data_source_type", +] + +_EMPTY_SNOWFLAKE_NUMERIC_METRIC: Dict[str, Any] = { + "feature_type": "numeric", + "row_count": 0, + "null_count": 0, + "null_rate": 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": None, +} + + +def _opt_float(val: Any) -> Optional[float]: + if val is None: + return None + if isinstance(val, Decimal): + return float(val) + return float(val) + + +def _escape_snowflake_sql_string(value: str) -> str: + return value.replace("'", "''") + + +def _snowflake_sql_literal(val: Any) -> str: + if val is None: + return "NULL" + if isinstance(val, bool): + return "TRUE" if val else "FALSE" + if isinstance(val, (int, float)) and not isinstance(val, bool): + if isinstance(val, float) and (np.isnan(val) or np.isinf(val)): + return "NULL" + return str(val) + if isinstance(val, Decimal): + return str(val) + if isinstance(val, date) and not isinstance(val, datetime): + return f"DATE '{val.isoformat()}'" + if isinstance(val, datetime): + dt = val if val.tzinfo else val.replace(tzinfo=timezone.utc) + return f"TIMESTAMP_TZ '{dt.isoformat()}'" + if isinstance(val, str): + return f"'{_escape_snowflake_sql_string(val)}'" + return f"'{_escape_snowflake_sql_string(str(val))}'" + + +def _qualify_snowflake_from_expression( + config: RepoConfig, + data_source: SnowflakeSource, + from_expression: str, +) -> str: + if not data_source.database and not data_source.schema and data_source.table: + return ( + f'"{config.offline_store.database}"."{config.offline_store.schema_}".' + f"{from_expression}" + ) + if not data_source.database and data_source.schema and data_source.table: + return f'"{config.offline_store.database}".{from_expression}' + return from_expression + + +def _snowflake_monitoring_table_fqn( + config: RepoConfig, + table_name: str, +) -> str: + os = config.offline_store + assert isinstance(os, SnowflakeOfflineStoreConfig) + return f'"{os.database}"."{os.schema_}"."{table_name}"' + + +def _snowflake_mon_table_meta(metric_type: str) -> Tuple[str, List[str], List[str]]: + if metric_type == "feature": + return ( + _SNOWFLAKE_MON_FEATURE_TABLE, + _SNOWFLAKE_MON_FEATURE_COLUMNS, + _SNOWFLAKE_MON_FEATURE_PK, + ) + if metric_type == "feature_view": + return ( + _SNOWFLAKE_MON_VIEW_TABLE, + _SNOWFLAKE_MON_VIEW_COLUMNS, + _SNOWFLAKE_MON_VIEW_PK, + ) + if metric_type == "feature_service": + return ( + _SNOWFLAKE_MON_SERVICE_TABLE, + _SNOWFLAKE_MON_SERVICE_COLUMNS, + _SNOWFLAKE_MON_SERVICE_PK, + ) + raise ValueError(f"Unknown metric_type '{metric_type}'") + + +def _snowflake_sql_numeric_histogram( + conn: SnowflakeConnection, + from_expression: str, + col_name: str, + ts_filter: str, + bins: int, + min_val: float, + max_val: float, +) -> Dict[str, Any]: + q_col = f'"{col_name}"' + + if min_val == max_val: + cursor = execute_snowflake_statement( + conn, + f"SELECT COUNT(*) FROM {from_expression} AS _src " + f"WHERE {q_col} IS NOT NULL AND {ts_filter}", + ) + row = cursor.fetchone() + cnt = (row or (0,))[0] + return {"bins": [min_val, max_val], "counts": [cnt], "bin_width": 0.0} + + upper = max_val + (max_val - min_val) * 1e-10 + bin_width = (max_val - min_val) / bins + + query = ( + f"SELECT WIDTH_BUCKET(CAST({q_col} AS DOUBLE), {min_val}, {upper}, {bins}) " + f"AS bucket, COUNT(*) AS cnt " + f"FROM {from_expression} AS _src " + f"WHERE {q_col} IS NOT NULL AND {ts_filter} " + f"GROUP BY bucket ORDER BY bucket" + ) + + cursor = execute_snowflake_statement(conn, query) + rows = cursor.fetchall() + + counts = [0] * bins + for bucket, cnt in rows: + if bucket is not None and 1 <= int(bucket) <= bins: + counts[int(bucket) - 1] = cnt + + bin_edges = [min_val + i * bin_width for i in range(bins + 1)] + return { + "bins": [float(b) for b in bin_edges], + "counts": counts, + "bin_width": float(bin_width), + } + + +def _snowflake_sql_numeric_stats( + conn: SnowflakeConnection, + from_expression: str, + feature_names: List[str], + ts_filter: str, + histogram_bins: int, +) -> List[Dict[str, Any]]: + select_parts = ["COUNT(*)"] + for col in feature_names: + q = f'"{col}"' + c = f"CAST({q} AS DOUBLE)" + select_parts.extend( + [ + f"COUNT({q})", + f"AVG({c})", + f"STDDEV_SAMP({c})", + f"MIN({c})", + f"MAX({c})", + f"PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY {c})", + f"PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY {c})", + ] + ) + + query = ( + f"SELECT {', '.join(select_parts)} " + f"FROM {from_expression} AS _src WHERE {ts_filter}" + ) + + cursor = execute_snowflake_statement(conn, query) + row = cursor.fetchone() + + if row is None: + return [ + {**_EMPTY_SNOWFLAKE_NUMERIC_METRIC, "feature_name": n} + for n in feature_names + ] + + row_count = row[0] + results: List[Dict[str, Any]] = [] + + for i, col in enumerate(feature_names): + base = 1 + i * 10 + non_null = row[base] or 0 + null_count = row_count - non_null + + min_val = _opt_float(row[base + 3]) + max_val = _opt_float(row[base + 4]) + + result: Dict[str, Any] = { + "feature_name": col, + "feature_type": "numeric", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": _opt_float(row[base + 1]), + "stddev": _opt_float(row[base + 2]), + "min_val": min_val, + "max_val": max_val, + "p50": _opt_float(row[base + 5]), + "p75": _opt_float(row[base + 6]), + "p90": _opt_float(row[base + 7]), + "p95": _opt_float(row[base + 8]), + "p99": _opt_float(row[base + 9]), + "histogram": None, + } + + if min_val is not None and max_val is not None and non_null > 0: + result["histogram"] = _snowflake_sql_numeric_histogram( + conn, + from_expression, + col, + ts_filter, + histogram_bins, + min_val, + max_val, + ) + + results.append(result) + + return results + + +def _snowflake_sql_categorical_stats( + conn: SnowflakeConnection, + from_expression: str, + col_name: str, + ts_filter: str, + top_n: int, +) -> Dict[str, Any]: + q_col = f'"{col_name}"' + + query = ( + f"WITH filtered AS (" + f" SELECT * FROM {from_expression} AS _src WHERE {ts_filter}" + f") " + f"SELECT " + f" (SELECT COUNT(*) FROM filtered) AS row_count, " + f" (SELECT COUNT(*) - COUNT({q_col}) FROM filtered) AS null_count, " + f" (SELECT COUNT(DISTINCT {q_col}) FROM filtered " + f" WHERE {q_col} IS NOT NULL) AS unique_count, " + f" TO_VARCHAR({q_col}) AS value, COUNT(*) AS cnt " + f"FROM filtered WHERE {q_col} IS NOT NULL " + f"GROUP BY {q_col} ORDER BY cnt DESC LIMIT {int(top_n)}" + ) + + cursor = execute_snowflake_statement(conn, query) + rows = cursor.fetchall() + + if not rows: + return { + **_EMPTY_SNOWFLAKE_NUMERIC_METRIC, + "feature_name": col_name, + "feature_type": "categorical", + } + + row_count = rows[0][0] + null_count = rows[0][1] + unique_count = rows[0][2] + + top_entries = [{"value": r[3], "count": r[4]} for r in rows] + top_total = sum(e["count"] for e in top_entries) + other_count = (row_count - null_count) - top_total + + return { + "feature_name": col_name, + "feature_type": "categorical", + "row_count": row_count, + "null_count": null_count, + "null_rate": null_count / row_count if row_count > 0 else 0.0, + "mean": None, + "stddev": None, + "min_val": None, + "max_val": None, + "p50": None, + "p75": None, + "p90": None, + "p95": None, + "p99": None, + "histogram": { + "values": top_entries, + "other_count": max(other_count, 0), + "unique_count": unique_count, + }, + } + + +def _snowflake_mon_merge_upsert( + offline_store: SnowflakeOfflineStoreConfig, + table: str, + columns: List[str], + pk_columns: List[str], + rows: List[Dict[str, Any]], +) -> None: + fq = f'"{offline_store.database}"."{offline_store.schema_}"."{table}"' + non_pk = [c for c in columns if c not in pk_columns] + + with GetSnowflakeConnection(offline_store) as conn: + for row in rows: + select_parts: List[str] = [] + for col in columns: + val = row.get(col) + if col == "histogram": + if val is not None: + json_str = json.dumps(val) + select_parts.append( + f'PARSE_JSON({_snowflake_sql_literal(json_str)}) AS "{col}"' + ) + else: + select_parts.append(f'NULL AS "{col}"') + else: + select_parts.append(f'{_snowflake_sql_literal(val)} AS "{col}"') + + using = ", ".join(select_parts) + on_parts = [f't."{pk}" = s."{pk}"' for pk in pk_columns] + update_parts = [f't."{c}" = s."{c}"' for c in non_pk] + insert_cols = ", ".join(f'"{c}"' for c in columns) + insert_vals = ", ".join(f's."{c}"' for c in columns) + + sql = ( + f"MERGE INTO {fq} AS t " + f"USING (SELECT {using}) AS s " + f"ON {' AND '.join(on_parts)} " + f"WHEN MATCHED THEN UPDATE SET {', '.join(update_parts)} " + f"WHEN NOT MATCHED THEN INSERT ({insert_cols}) VALUES ({insert_vals})" + ) + + execute_snowflake_statement(conn, sql) + + +def _snowflake_mon_query( + offline_store: SnowflakeOfflineStoreConfig, + metric_type: str, + columns: List[str], + project: str, + filters: Optional[Dict[str, Any]] = None, + start_date: Optional[date] = None, + end_date: Optional[date] = None, +) -> List[Dict[str, Any]]: + table, _, _ = _snowflake_mon_table_meta(metric_type) + fq = f'"{offline_store.database}"."{offline_store.schema_}"."{table}"' + + conditions = [f'"project_id" = {_snowflake_sql_literal(project)}'] + if filters: + for key, value in filters.items(): + if value is not None: + conditions.append(f'"{key}" = {_snowflake_sql_literal(value)}') + + if start_date: + conditions.append(f'"metric_date" >= {_snowflake_sql_literal(start_date)}') + if end_date: + conditions.append(f'"metric_date" <= {_snowflake_sql_literal(end_date)}') + + col_list = ", ".join(f'"{c}"' for c in columns) + sql = ( + f"SELECT {col_list} FROM {fq} WHERE {' AND '.join(conditions)} " + f'ORDER BY "metric_date" ASC' + ) + + with GetSnowflakeConnection(offline_store) as conn: + cursor = execute_snowflake_statement(conn, sql) + rows = cursor.fetchall() + + results: List[Dict[str, Any]] = [] + for row in rows: + record = dict(zip(columns, row)) + hist = record.get("histogram") + if hist is not None and isinstance(hist, str): + record["histogram"] = json.loads(hist) + md = record.get("metric_date") + if md is not None: + if isinstance(md, datetime): + record["metric_date"] = md.date().isoformat() + elif isinstance(md, date): + record["metric_date"] = md.isoformat() + ca = record.get("computed_at") + if ca is not None and isinstance(ca, datetime): + record["computed_at"] = ca.isoformat() + results.append(record) + + return results + + def _get_entity_schema( entity_df: Union[pd.DataFrame, str], snowflake_conn: SnowflakeConnection, diff --git a/sdk/python/feast/monitoring/metrics_calculator.py b/sdk/python/feast/monitoring/metrics_calculator.py index 9160fabc6b0..b4ee16f8885 100644 --- a/sdk/python/feast/monitoring/metrics_calculator.py +++ b/sdk/python/feast/monitoring/metrics_calculator.py @@ -41,6 +41,23 @@ def classify_feature(dtype) -> Optional[str]: return "categorical" return None + @staticmethod + def classify_feature_arrow(arrow_type: pa.DataType) -> Optional[str]: + """Classify a PyArrow data type as numeric or categorical.""" + if ( + pa.types.is_integer(arrow_type) + or pa.types.is_floating(arrow_type) + or pa.types.is_decimal(arrow_type) + ): + return "numeric" + if ( + pa.types.is_string(arrow_type) + or pa.types.is_large_string(arrow_type) + or pa.types.is_boolean(arrow_type) + ): + return "categorical" + return None + def compute_numeric(self, array: pa.Array) -> Dict: total = len(array) null_count = array.null_count diff --git a/sdk/python/feast/monitoring/monitoring_service.py b/sdk/python/feast/monitoring/monitoring_service.py index 8e2fb2cf04b..d131d613171 100644 --- a/sdk/python/feast/monitoring/monitoring_service.py +++ b/sdk/python/feast/monitoring/monitoring_service.py @@ -1,8 +1,10 @@ import logging import time +from collections import defaultdict from datetime import date, datetime, timedelta, timezone from typing import Any, Dict, List, Optional, Tuple +from feast.feature_logging import LOG_TIMESTAMP_FIELD, FeatureServiceLoggingSource from feast.infra.offline_stores.offline_store import OfflineStore from feast.monitoring.dqm_job_manager import DQMJobManager from feast.monitoring.metrics_calculator import MetricsCalculator @@ -123,6 +125,163 @@ def auto_compute( "duration_ms": duration_ms, } + # ------------------------------------------------------------------ # + # Log source: compute metrics from feature serving logs + # ------------------------------------------------------------------ # + + def compute_log_metrics( + self, + project: str, + feature_service_name: str, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + granularity: str = "daily", + set_baseline: bool = False, + ) -> Dict[str, Any]: + """Compute monitoring metrics from feature serving logs. + + Requires the feature service to have a logging_config with a + LoggingDestination that can be converted to a DataSource. + """ + self._ensure_monitoring_tables() + if granularity not in VALID_GRANULARITIES: + raise ValueError( + f"Invalid granularity '{granularity}'. " + f"Must be one of {VALID_GRANULARITIES}" + ) + + start_time = time.time() + start_dt, end_dt = self._to_date_range(start_date, end_date) + + if project is None: + project = self._store.config.project + + fs = self._store.registry.get_feature_service( + name=feature_service_name, project=project + ) + log_source = self._resolve_log_source(fs) + if log_source is None: + return { + "status": "skipped", + "reason": f"Feature service '{feature_service_name}' has no logging configured", + "duration_ms": int((time.time() - start_time) * 1000), + } + + data_source, ts_field, feature_fields, log_col_map = log_source + metrics_list = self._compute_from_source( + data_source, + ts_field, + feature_fields, + start_dt, + end_dt, + ) + + now = datetime.now(timezone.utc) + metric_date = start_dt.date() + + self._save_log_metrics( + project=project, + feature_service_name=feature_service_name, + log_col_map=log_col_map, + metrics_list=metrics_list, + metric_date=metric_date, + granularity=granularity, + set_baseline=set_baseline, + now=now, + ) + + duration_ms = int((time.time() - start_time) * 1000) + return { + "status": "completed", + "data_source_type": "log", + "feature_service_name": feature_service_name, + "granularity": granularity, + "computed_features": len(metrics_list), + "metric_date": metric_date.isoformat(), + "duration_ms": duration_ms, + } + + def auto_compute_log_metrics( + self, + project: Optional[str] = None, + feature_service_name: Optional[str] = None, + ) -> Dict[str, Any]: + """Auto-detect date ranges from log data and compute all granularities.""" + start_time = time.time() + self._ensure_monitoring_tables() + if project is None: + project = self._store.config.project + + if feature_service_name: + services = [ + self._store.registry.get_feature_service( + name=feature_service_name, project=project + ) + ] + else: + services = self._store.registry.list_feature_services(project=project) + + total_features = 0 + total_services = 0 + granularities_computed: set = set() + + for fs in services: + try: + log_source = self._resolve_log_source(fs) + if log_source is None: + continue + + data_source, ts_field, feature_fields, log_col_map = log_source + + max_ts = self._get_max_timestamp_for_source(data_source, ts_field) + if max_ts is None: + logger.warning( + "No log data found for feature service '%s', skipping", + fs.name, + ) + continue + + now = datetime.now(timezone.utc) + + for gran, window in GRANULARITY_WINDOWS.items(): + window_start = max_ts - window + metrics_list = self._compute_from_source( + data_source, + ts_field, + feature_fields, + window_start, + max_ts, + ) + self._save_log_metrics( + project=project, + feature_service_name=fs.name, + log_col_map=log_col_map, + metrics_list=metrics_list, + metric_date=window_start.date(), + granularity=gran, + set_baseline=False, + now=now, + ) + total_features += len(metrics_list) + granularities_computed.add(gran) + + total_services += 1 + except Exception: + logger.exception( + "Failed to auto-compute log metrics for feature service '%s'", + fs.name, + ) + + duration_ms = int((time.time() - start_time) * 1000) + return { + "status": "completed", + "data_source_type": "log", + "computed_feature_services": total_services, + "computed_features": total_features, + "granularities": sorted(granularities_computed), + "duration_ms": duration_ms, + } + # ------------------------------------------------------------------ # # Baseline: compute from all available source data # ------------------------------------------------------------------ # @@ -813,6 +972,257 @@ def _compute_for_feature_view( return {"feature_count": len(metrics_list), "dates": {metric_date}} + # ------------------------------------------------------------------ # + # Private: log source helpers + # ------------------------------------------------------------------ # + + def _resolve_log_source(self, feature_service): + """Resolve log data source for a feature service. + + Returns (DataSource, timestamp_field, feature_fields, log_col_map) + or None if the feature service has no logging configured. + + ``feature_fields`` uses the raw log column names (needed for + SQL/PyArrow column access). ``log_col_map`` maps each raw log + column to ``(feature_view_name, normalized_feature_name)`` so + callers can store metrics under the correct view and feature + name — critical for drift detection across batch and log sources. + """ + if not feature_service.logging_config: + return None + + destination = feature_service.logging_config.destination + try: + data_source = destination.to_data_source() + except NotImplementedError: + logger.warning( + "Logging destination for '%s' does not support to_data_source()", + feature_service.name, + ) + return None + + logging_source = FeatureServiceLoggingSource( + feature_service, + self._store.config.project, + ) + schema = logging_source.get_schema(self._store.registry) + + skip_cols = { + LOG_TIMESTAMP_FIELD, + "__log_date", + "__request_id", + } + entity_columns = set() + view_feature_names: dict = {} + for proj in feature_service.feature_view_projections: + view_alias = proj.name_to_use() + try: + fv = self._store.registry.get_feature_view( + name=proj.name, project=self._store.config.project + ) + for ec in fv.entity_columns: + entity_columns.add(ec.name) + except Exception: + pass + for feat in proj.features: + log_col = f"{view_alias}__{feat.name}" + view_feature_names[log_col] = (proj.name, feat.name) + + feature_fields = [] + log_col_map: dict = {} + for field in schema: + if field.name in skip_cols or field.name in entity_columns: + continue + if field.name.endswith("__timestamp") or field.name.endswith("__status"): + continue + ftype = MetricsCalculator.classify_feature_arrow(field.type) + if ftype is not None: + feature_fields.append((field.name, ftype)) + if field.name in view_feature_names: + log_col_map[field.name] = view_feature_names[field.name] + + if not feature_fields: + return None + + return data_source, LOG_TIMESTAMP_FIELD, feature_fields, log_col_map + + def _get_max_timestamp_for_source(self, data_source, ts_field): + """Get MAX timestamp from an arbitrary data source.""" + provider = self._store._get_provider() + offline_store = provider.offline_store + try: + return offline_store.get_monitoring_max_timestamp( + config=self._store.config, + data_source=data_source, + timestamp_field=ts_field, + ) + except NotImplementedError: + return self._get_max_timestamp_for_source_fallback( + data_source, + ts_field, + ) + + def _get_max_timestamp_for_source_fallback(self, data_source, ts_field): + """Pull data and compute max timestamp in Python (fallback).""" + import pyarrow.compute as pc + + provider = self._store._get_provider() + offline_store = provider.offline_store + + retrieval_job = offline_store.pull_all_from_table_or_query( + config=self._store.config, + data_source=data_source, + join_key_columns=[], + feature_name_columns=[], + timestamp_field=ts_field, + start_date=_EPOCH, + end_date=_FAR_FUTURE, + ) + + table = retrieval_job.to_arrow() + if ts_field not in table.column_names or len(table) == 0: + return None + + max_val = pc.max(table.column(ts_field)).as_py() + if max_val is None: + return None + + if isinstance(max_val, datetime): + return max_val if max_val.tzinfo else max_val.replace(tzinfo=timezone.utc) + return datetime.combine(max_val, datetime.min.time(), tzinfo=timezone.utc) + + def _compute_from_source( + self, + data_source, + ts_field: str, + feature_fields: List[Tuple[str, str]], + start_dt: datetime, + end_dt: datetime, + ) -> List[Dict[str, Any]]: + """Compute metrics from an arbitrary data source (batch or log).""" + provider = self._store._get_provider() + offline_store = provider.offline_store + try: + return offline_store.compute_monitoring_metrics( + config=self._store.config, + data_source=data_source, + feature_columns=feature_fields, + timestamp_field=ts_field, + start_date=start_dt, + end_date=end_dt, + histogram_bins=self._calculator.histogram_bins, + top_n=self._calculator.top_n, + ) + except NotImplementedError: + logger.debug( + "Offline store does not support compute_monitoring_metrics, " + "falling back to Python-based computation for log source" + ) + retrieval_job = offline_store.pull_all_from_table_or_query( + config=self._store.config, + data_source=data_source, + join_key_columns=[], + feature_name_columns=[name for name, _ in feature_fields], + timestamp_field=ts_field, + start_date=start_dt, + end_date=end_dt, + ) + arrow_table = retrieval_job.to_arrow() + return self._calculator.compute_all(arrow_table, feature_fields) + + def _save_log_metrics( + self, + project: str, + feature_service_name: str, + log_col_map: Dict[str, Tuple[str, str]], + metrics_list: List[Dict[str, Any]], + metric_date: date, + granularity: str, + set_baseline: bool, + now: datetime, + ) -> None: + """Save log-sourced metrics tagged with data_source_type='log'. + + Normalizes log column names (``driver_stats__conv_rate``) back to + their originating ``feature_view_name`` and ``feature_name`` so + that drift detection can join batch and log metrics on the same + feature identity. + """ + if not metrics_list: + return + + offline_store = self._get_offline_store() + config = self._store.config + + for m in metrics_list: + log_col = m.get("feature_name", "") + view_name, feat_name = log_col_map.get( + log_col, (feature_service_name, log_col) + ) + m["project_id"] = project + m["feature_view_name"] = view_name + m["feature_name"] = feat_name + m["metric_date"] = metric_date + m["granularity"] = granularity + m["data_source_type"] = "log" + m["computed_at"] = now + m["is_baseline"] = set_baseline + + offline_store.save_monitoring_metrics(config, "feature", metrics_list) + + # --- per-feature-view aggregates (grouped by originating view) --- + by_view: Dict[str, List[Dict[str, Any]]] = defaultdict(list) + for m in metrics_list: + by_view[m["feature_view_name"]].append(m) + + view_metrics = [] + for vname, vmetrics in by_view.items(): + null_rates = [ + m["null_rate"] for m in vmetrics if m.get("null_rate") is not None + ] + view_metrics.append( + { + "project_id": project, + "feature_view_name": vname, + "metric_date": metric_date, + "granularity": granularity, + "data_source_type": "log", + "computed_at": now, + "is_baseline": set_baseline, + "total_row_count": vmetrics[0]["row_count"] if vmetrics else 0, + "total_features": len(vmetrics), + "features_with_nulls": sum( + 1 for m in vmetrics if (m.get("null_count") or 0) > 0 + ), + "avg_null_rate": ( + sum(null_rates) / len(null_rates) if null_rates else 0.0 + ), + "max_null_rate": max(null_rates) if null_rates else 0.0, + } + ) + offline_store.save_monitoring_metrics(config, "feature_view", view_metrics) + + # --- feature service aggregate --- + all_null_rates = [ + m["null_rate"] for m in metrics_list if m.get("null_rate") is not None + ] + svc_metric = { + "project_id": project, + "feature_service_name": feature_service_name, + "metric_date": metric_date, + "granularity": granularity, + "data_source_type": "log", + "computed_at": now, + "is_baseline": set_baseline, + "total_feature_views": len(by_view), + "total_features": len(metrics_list), + "avg_null_rate": ( + sum(all_null_rates) / len(all_null_rates) if all_null_rates else 0.0 + ), + "max_null_rate": max(all_null_rates) if all_null_rates else 0.0, + } + offline_store.save_monitoring_metrics(config, "feature_service", [svc_metric]) + def _read_batch_source(self, feature_view, feature_fields, start_dt, end_dt): config = self._store.config data_source = feature_view.batch_source diff --git a/sdk/python/feast/repo_operations.py b/sdk/python/feast/repo_operations.py index 199a5f5503e..5712b75c7bf 100644 --- a/sdk/python/feast/repo_operations.py +++ b/sdk/python/feast/repo_operations.py @@ -412,11 +412,6 @@ def apply_total_with_repo_instance( def _submit_baseline_jobs_if_needed(store, project_name, repo): """Submit async baseline DQM jobs for new features after feast apply.""" try: - from feast.infra.utils.postgres.postgres_config import PostgreSQLConfig - - if not isinstance(store.config.offline_store, PostgreSQLConfig): - return - from feast.monitoring.monitoring_service import MonitoringService svc = MonitoringService(store) diff --git a/sdk/python/tests/integration/monitoring/test_monitoring_integration.py b/sdk/python/tests/integration/monitoring/test_monitoring_integration.py index 1d0da72de1b..03f1927e810 100644 --- a/sdk/python/tests/integration/monitoring/test_monitoring_integration.py +++ b/sdk/python/tests/integration/monitoring/test_monitoring_integration.py @@ -9,6 +9,7 @@ - REST API endpoints - RBAC enforcement - Compute engine dispatch (SQL push-down vs Python fallback) +- Log source monitoring (feature serving logs) """ from datetime import date, datetime, timezone @@ -56,15 +57,42 @@ def _make_feature_view(name, features, entities=None, batch_source=None): return fv -def _make_feature_service(name, fv_names): +def _make_feature_service(name, fv_names, logging_config=None, feature_map=None): + """Create a mock FeatureService. + + Args: + feature_map: optional dict mapping view_name -> list of feature names. + Used to build realistic projections with features and name_to_use(). + """ fs = MagicMock() fs.name = name fs.feature_view_projections = [MagicMock(name=n) for n in fv_names] for proj, n in zip(fs.feature_view_projections, fv_names): proj.name = n + proj.name_to_use.return_value = n + if feature_map and n in feature_map: + feats = [] + for fname in feature_map[n]: + f = MagicMock() + f.name = fname + feats.append(f) + proj.features = feats + else: + proj.features = [] + fs.logging_config = logging_config return fs +def _make_logging_config_with_source(log_table_schema): + """Create a mock LoggingConfig whose destination.to_data_source() returns a DataSource.""" + logging_config = MagicMock() + mock_data_source = MagicMock() + mock_data_source.timestamp_field = "__log_timestamp" + mock_data_source.created_timestamp_column = "" + logging_config.destination.to_data_source.return_value = mock_data_source + return logging_config, mock_data_source + + def _make_mock_store(feature_views, feature_services=None): """Create a mock FeatureStore with offline store that uses Python fallback.""" store = MagicMock() @@ -802,3 +830,187 @@ def test_transient_does_not_save(self): provider = store._get_provider.return_value provider.offline_store.save_monitoring_metrics.assert_not_called() + + +# ------------------------------------------------------------------ # +# Test: Log source monitoring +# ------------------------------------------------------------------ # + + +class TestLogSourceMonitoring: + """Verify that monitoring can compute metrics from feature serving logs.""" + + # Realistic log column names follow the {view}__{feature} convention + # produced by FeatureServiceLoggingSource.get_schema(). + _LOG_SCHEMA = pa.schema( + [ + ("driver_id", pa.int64()), + ("driver_stats__conv_rate", pa.float64()), + ("driver_stats__conv_rate__timestamp", pa.timestamp("us", tz="UTC")), + ("driver_stats__conv_rate__status", pa.int32()), + ("driver_stats__city", pa.utf8()), + ("driver_stats__city__timestamp", pa.timestamp("us", tz="UTC")), + ("driver_stats__city__status", pa.int32()), + ("__log_timestamp", pa.timestamp("us", tz="UTC")), + ("__log_date", pa.date32()), + ("__request_id", pa.utf8()), + ] + ) + + def _make_log_store(self): + """Create a mock store with a feature service that has logging configured.""" + fv = _make_feature_view( + "driver_stats", + [ + _make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64), + _make_feature_field("city", PrimitiveFeastType.STRING), + ], + ) + + logging_config, log_data_source = _make_logging_config_with_source( + self._LOG_SCHEMA + ) + + fs = _make_feature_service( + "driver_service", + ["driver_stats"], + logging_config=logging_config, + feature_map={"driver_stats": ["conv_rate", "city"]}, + ) + store = _make_mock_store([fv], feature_services=[fs]) + + log_arrow_table = pa.table( + { + "driver_stats__conv_rate": [0.1, 0.5, 0.9, 0.3, 0.7], + "driver_stats__city": ["NYC", "LA", "NYC", "SF", "LA"], + "__log_timestamp": [ + datetime(2025, 3, 25, tzinfo=timezone.utc), + datetime(2025, 3, 26, tzinfo=timezone.utc), + datetime(2025, 3, 26, tzinfo=timezone.utc), + datetime(2025, 3, 27, tzinfo=timezone.utc), + datetime(2025, 3, 27, tzinfo=timezone.utc), + ], + } + ) + + mock_log_retrieval = MagicMock() + mock_log_retrieval.to_arrow.return_value = log_arrow_table + + provider = store._get_provider.return_value + provider.offline_store.pull_all_from_table_or_query.return_value = ( + mock_log_retrieval + ) + + entity_col = MagicMock() + entity_col.name = "driver_id" + fv.entity_columns = [entity_col] + + return store, fs + + def test_compute_log_metrics(self): + store, fs = self._make_log_store() + svc = MonitoringService(store) + + with patch( + "feast.monitoring.monitoring_service.FeatureServiceLoggingSource" + ) as mock_cls: + mock_instance = MagicMock() + mock_instance.get_schema.return_value = self._LOG_SCHEMA + mock_cls.return_value = mock_instance + + result = svc.compute_log_metrics( + project="test_project", + feature_service_name="driver_service", + start_date=date(2025, 3, 25), + end_date=date(2025, 3, 27), + granularity="daily", + ) + + assert result["status"] == "completed" + assert result["data_source_type"] == "log" + assert result["computed_features"] == 2 + + provider = store._get_provider.return_value + provider.offline_store.save_monitoring_metrics.assert_called() + + save_calls = provider.offline_store.save_monitoring_metrics.call_args_list + feature_calls = [c for c in save_calls if c[0][1] == "feature"] + assert len(feature_calls) >= 1 + saved_metrics = feature_calls[0][0][2] + assert all(m["data_source_type"] == "log" for m in saved_metrics) + # Feature names normalized: driver_stats__conv_rate -> conv_rate + saved_names = {m["feature_name"] for m in saved_metrics} + assert saved_names == {"conv_rate", "city"} + # Feature view name is the actual view, not the service + assert all(m["feature_view_name"] == "driver_stats" for m in saved_metrics) + + # Feature service aggregate saved to the service table + svc_calls = [c for c in save_calls if c[0][1] == "feature_service"] + assert len(svc_calls) >= 1 + svc_metric = svc_calls[0][0][2][0] + assert svc_metric["feature_service_name"] == "driver_service" + assert svc_metric["data_source_type"] == "log" + assert svc_metric["total_features"] == 2 + + def test_compute_log_metrics_no_logging_config(self): + fv = _make_feature_view( + "driver_stats", + [_make_feature_field("conv_rate", PrimitiveFeastType.FLOAT64)], + ) + fs = _make_feature_service("no_log_service", ["driver_stats"]) + fs.logging_config = None + store = _make_mock_store([fv], feature_services=[fs]) + svc = MonitoringService(store) + + result = svc.compute_log_metrics( + project="test_project", + feature_service_name="no_log_service", + ) + + assert result["status"] == "skipped" + assert "no logging configured" in result["reason"] + + def test_auto_compute_log_metrics(self): + store, fs = self._make_log_store() + svc = MonitoringService(store) + + with patch( + "feast.monitoring.monitoring_service.FeatureServiceLoggingSource" + ) as mock_cls: + mock_instance = MagicMock() + mock_instance.get_schema.return_value = self._LOG_SCHEMA + mock_cls.return_value = mock_instance + + result = svc.auto_compute_log_metrics(project="test_project") + + assert result["status"] == "completed" + assert result["data_source_type"] == "log" + assert result["computed_feature_services"] == 1 + assert len(result["granularities"]) == len(VALID_GRANULARITIES) + + def test_log_metrics_tagged_differently_from_batch(self): + """Log metrics should have data_source_type='log', batch should have 'batch'.""" + store, fs = self._make_log_store() + svc = MonitoringService(store) + + with patch( + "feast.monitoring.monitoring_service.FeatureServiceLoggingSource" + ) as mock_cls: + mock_instance = MagicMock() + mock_instance.get_schema.return_value = self._LOG_SCHEMA + mock_cls.return_value = mock_instance + + svc.compute_log_metrics( + project="test_project", + feature_service_name="driver_service", + granularity="daily", + ) + + provider = store._get_provider.return_value + save_calls = provider.offline_store.save_monitoring_metrics.call_args_list + feature_calls = [c for c in save_calls if c[0][1] == "feature"] + for call in feature_calls: + for m in call[0][2]: + assert m["data_source_type"] == "log" + assert m["feature_view_name"] == "driver_stats" + assert m["feature_name"] in ("conv_rate", "city") From 59624979f8c2ecaf567e048d8d93f5c5b7f16f97 Mon Sep 17 00:00:00 2001 From: Jitendra Yejare <11752425+jyejare@users.noreply.github.com> Date: Mon, 20 Apr 2026 21:59:19 +0530 Subject: [PATCH 05/12] chore: Performance Improvements Signed-off-by: Jitendra Yejare <11752425+jyejare@users.noreply.github.com> --- .../feast/infra/offline_stores/bigquery.py | 170 +++------------ .../contrib/oracle_offline_store/oracle.py | 170 +++------------ .../postgres_offline_store/postgres.py | 184 +++------------- .../contrib/spark_offline_store/spark.py | 190 +++-------------- sdk/python/feast/infra/offline_stores/dask.py | 123 +++-------- .../feast/infra/offline_stores/duckdb.py | 170 ++++----------- .../feast/infra/offline_stores/redshift.py | 175 +++------------- .../feast/infra/offline_stores/snowflake.py | 198 +++--------------- .../feast/monitoring/monitoring_service.py | 197 +++++------------ 9 files changed, 296 insertions(+), 1281 deletions(-) diff --git a/sdk/python/feast/infra/offline_stores/bigquery.py b/sdk/python/feast/infra/offline_stores/bigquery.py index c982383c895..3f9507b6583 100644 --- a/sdk/python/feast/infra/offline_stores/bigquery.py +++ b/sdk/python/feast/infra/offline_stores/bigquery.py @@ -43,6 +43,16 @@ RetrievalMetadata, ) from feast.infra.registry.base_registry import BaseRegistry +from feast.monitoring.monitoring_utils import ( + MON_TABLE_FEATURE, + MON_TABLE_FEATURE_SERVICE, + MON_TABLE_FEATURE_VIEW, + empty_categorical_metric, + empty_numeric_metric, + monitoring_table_meta, + normalize_monitoring_row, + opt_float, +) from feast.on_demand_feature_view import OnDemandFeatureView from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.saved_dataset import SavedDatasetStorage @@ -560,27 +570,6 @@ def clear_monitoring_baseline( # BigQuery monitoring metrics (native) # ------------------------------------------------------------------ # -_BQ_MON_FEATURE_TABLE = "feast_monitoring_feature_metrics" -_BQ_MON_VIEW_TABLE = "feast_monitoring_feature_view_metrics" -_BQ_MON_SERVICE_TABLE = "feast_monitoring_feature_service_metrics" - -_BQ_EMPTY_METRIC_TEMPLATE: Dict[str, Any] = { - "feature_type": "numeric", - "row_count": 0, - "null_count": 0, - "null_rate": 0.0, - "mean": None, - "stddev": None, - "min_val": None, - "max_val": None, - "p50": None, - "p75": None, - "p90": None, - "p95": None, - "p99": None, - "histogram": None, -} - def _bq_monitoring_table_fqn(config: RepoConfig, table_name: str) -> str: assert isinstance(config.offline_store, BigQueryOfflineStoreConfig) @@ -594,99 +583,6 @@ def _bq_monitoring_table_fqn(config: RepoConfig, table_name: str) -> str: return f"`{project_id}.{config.offline_store.dataset}.{table_name}`" -def _bq_opt_float(val: Any) -> Optional[float]: - return float(val) if val is not None else None - - -def _bq_mon_table_meta(metric_type: str) -> Tuple[str, List[str], List[str]]: - if metric_type == "feature": - return ( - _BQ_MON_FEATURE_TABLE, - [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "feature_type", - "row_count", - "null_count", - "null_rate", - "mean", - "stddev", - "min_val", - "max_val", - "p50", - "p75", - "p90", - "p95", - "p99", - "histogram", - ], - [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", - ], - ) - if metric_type == "feature_view": - return ( - _BQ_MON_VIEW_TABLE, - [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_row_count", - "total_features", - "features_with_nulls", - "avg_null_rate", - "max_null_rate", - ], - [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", - ], - ) - if metric_type == "feature_service": - return ( - _BQ_MON_SERVICE_TABLE, - [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_feature_views", - "total_features", - "avg_null_rate", - "max_null_rate", - ], - [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", - ], - ) - raise ValueError(f"Unknown metric_type '{metric_type}'") - - def _bq_scalar_param_type(column: str) -> str: if column == "is_baseline": return "BOOL" @@ -767,7 +663,7 @@ def _bq_save_monitoring_metrics( metric_type: str, metrics: List[Dict[str, Any]], ) -> None: - table_short, columns, pk_columns = _bq_mon_table_meta(metric_type) + table_short, columns, pk_columns = monitoring_table_meta(metric_type) table_fqn = _bq_monitoring_table_fqn(config, table_short) for row in metrics: _bq_merge_row(config, table_fqn, columns, pk_columns, row) @@ -781,7 +677,7 @@ def _bq_query_monitoring_metrics( start_date: Optional[date] = None, end_date: Optional[date] = None, ) -> List[Dict[str, Any]]: - table_short, columns, _ = _bq_mon_table_meta(metric_type) + table_short, columns, _ = monitoring_table_meta(metric_type) table_fqn = _bq_monitoring_table_fqn(config, table_short) project_id = ( config.offline_store.billing_project_id or config.offline_store.project_id @@ -820,13 +716,7 @@ def _bq_query_monitoring_metrics( results: List[Dict[str, Any]] = [] for r in job: record = {columns[i]: r[i] for i in range(len(columns))} - if "histogram" in record and isinstance(record["histogram"], str): - record["histogram"] = json.loads(record["histogram"]) - if "metric_date" in record and isinstance(record["metric_date"], date): - record["metric_date"] = record["metric_date"].isoformat() - if "computed_at" in record and isinstance(record["computed_at"], datetime): - record["computed_at"] = record["computed_at"].isoformat() - results.append(record) + results.append(normalize_monitoring_row(record)) return results @@ -837,7 +727,7 @@ def _bq_clear_monitoring_baseline( feature_name: Optional[str] = None, data_source_type: Optional[str] = None, ) -> None: - table_fqn = _bq_monitoring_table_fqn(config, _BQ_MON_FEATURE_TABLE) + table_fqn = _bq_monitoring_table_fqn(config, MON_TABLE_FEATURE) project_id = ( config.offline_store.billing_project_id or config.offline_store.project_id ) @@ -884,7 +774,7 @@ def _bq_ensure_monitoring_tables(config: RepoConfig) -> None: ds = config.offline_store.dataset proj = config.offline_store.project_id or client.project feature_ddl = f""" -CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{_BQ_MON_FEATURE_TABLE}` ( +CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{MON_TABLE_FEATURE}` ( project_id STRING NOT NULL, feature_view_name STRING NOT NULL, feature_name STRING NOT NULL, @@ -911,7 +801,7 @@ def _bq_ensure_monitoring_tables(config: RepoConfig) -> None: PRIMARY KEY (project_id, feature_view_name, feature_name, metric_date, granularity, data_source_type) NOT ENFORCED """ view_ddl = f""" -CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{_BQ_MON_VIEW_TABLE}` ( +CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{MON_TABLE_FEATURE_VIEW}` ( project_id STRING NOT NULL, feature_view_name STRING NOT NULL, metric_date DATE NOT NULL, @@ -928,7 +818,7 @@ def _bq_ensure_monitoring_tables(config: RepoConfig) -> None: PRIMARY KEY (project_id, feature_view_name, metric_date, granularity, data_source_type) NOT ENFORCED """ service_ddl = f""" -CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{_BQ_MON_SERVICE_TABLE}` ( +CREATE TABLE IF NOT EXISTS `{proj}.{ds}.{MON_TABLE_FEATURE_SERVICE}` ( project_id STRING NOT NULL, feature_service_name STRING NOT NULL, metric_date DATE NOT NULL, @@ -1075,7 +965,7 @@ def _bq_numeric_stats( job.result() rows = list(job) if not rows: - return [{**_BQ_EMPTY_METRIC_TEMPLATE, "feature_name": n} for n in feature_names] + return [empty_numeric_metric(n) for n in feature_names] row = rows[0] row_count = row["_row_count"] or 0 results: List[Dict[str, Any]] = [] @@ -1083,23 +973,23 @@ def _bq_numeric_stats( base = f"c{i}_" non_null = row[f"{base}nn"] or 0 null_count = int(row_count) - int(non_null) - min_v = _bq_opt_float(row[f"{base}min"]) - max_v = _bq_opt_float(row[f"{base}max"]) + min_v = opt_float(row[f"{base}min"]) + max_v = opt_float(row[f"{base}max"]) result: Dict[str, Any] = { "feature_name": col, "feature_type": "numeric", "row_count": int(row_count), "null_count": null_count, "null_rate": null_count / row_count if row_count > 0 else 0.0, - "mean": _bq_opt_float(row[f"{base}avg"]), - "stddev": _bq_opt_float(row[f"{base}stddev"]), + "mean": opt_float(row[f"{base}avg"]), + "stddev": opt_float(row[f"{base}stddev"]), "min_val": min_v, "max_val": max_v, - "p50": _bq_opt_float(row[f"{base}p50"]), - "p75": _bq_opt_float(row[f"{base}p75"]), - "p90": _bq_opt_float(row[f"{base}p90"]), - "p95": _bq_opt_float(row[f"{base}p95"]), - "p99": _bq_opt_float(row[f"{base}p99"]), + "p50": opt_float(row[f"{base}p50"]), + "p75": opt_float(row[f"{base}p75"]), + "p90": opt_float(row[f"{base}p90"]), + "p95": opt_float(row[f"{base}p95"]), + "p99": opt_float(row[f"{base}p99"]), "histogram": None, } if min_v is not None and max_v is not None and non_null and int(non_null) > 0: @@ -1151,11 +1041,7 @@ def _bq_categorical_stats( job.result() rows = list(job) if not rows: - return { - **_BQ_EMPTY_METRIC_TEMPLATE, - "feature_name": col_name, - "feature_type": "categorical", - } + return empty_categorical_metric(col_name) row_count = rows[0]["row_count"] null_count = rows[0]["null_count"] unique_count = rows[0]["unique_count"] diff --git a/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py b/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py index c8f41457127..b7416e302c1 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py +++ b/sdk/python/feast/infra/offline_stores/contrib/oracle_offline_store/oracle.py @@ -25,6 +25,16 @@ from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob from feast.infra.offline_stores.offline_utils import get_timestamp_filter_sql from feast.infra.registry.base_registry import BaseRegistry +from feast.monitoring.monitoring_utils import ( + MON_TABLE_FEATURE, + MON_TABLE_FEATURE_SERVICE, + MON_TABLE_FEATURE_VIEW, + empty_categorical_metric, + empty_numeric_metric, + monitoring_table_meta, + normalize_monitoring_row, + opt_float, +) from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.utils import compute_non_entity_date_range @@ -185,96 +195,6 @@ def _build_entity_df_from_feature_sources( # Oracle monitoring helpers # ------------------------------------------------------------------ # -_MON_FEATURE_TABLE = "feast_monitoring_feature_metrics" -_MON_VIEW_TABLE = "feast_monitoring_feature_view_metrics" -_MON_SERVICE_TABLE = "feast_monitoring_feature_service_metrics" - -_MON_FEATURE_COLUMNS = [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "feature_type", - "row_count", - "null_count", - "null_rate", - "mean", - "stddev", - "min_val", - "max_val", - "p50", - "p75", - "p90", - "p95", - "p99", - "histogram", -] -_MON_FEATURE_PK = [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", -] - -_MON_VIEW_COLUMNS = [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_row_count", - "total_features", - "features_with_nulls", - "avg_null_rate", - "max_null_rate", -] -_MON_VIEW_PK = [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", -] - -_MON_SERVICE_COLUMNS = [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_feature_views", - "total_features", - "avg_null_rate", - "max_null_rate", -] -_MON_SERVICE_PK = [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", -] - - -def _oracle_mon_table_meta(metric_type: str): - if metric_type == "feature": - return _MON_FEATURE_TABLE, _MON_FEATURE_COLUMNS, _MON_FEATURE_PK - if metric_type == "feature_view": - return _MON_VIEW_TABLE, _MON_VIEW_COLUMNS, _MON_VIEW_PK - if metric_type == "feature_service": - return _MON_SERVICE_TABLE, _MON_SERVICE_COLUMNS, _MON_SERVICE_PK - raise ValueError(f"Unknown metric_type '{metric_type}'") - def _oracle_quote_ident(name: str) -> str: return f'"{name}"' @@ -284,28 +204,6 @@ def _oracle_ts_where(ts_filter: str) -> str: return f"({ts_filter})" if (ts_filter and ts_filter.strip()) else "1=1" -_EMPTY_METRIC_TEMPLATE: Dict[str, Any] = { - "feature_type": "numeric", - "row_count": 0, - "null_count": 0, - "null_rate": 0.0, - "mean": None, - "stddev": None, - "min_val": None, - "max_val": None, - "p50": None, - "p75": None, - "p90": None, - "p95": None, - "p99": None, - "histogram": None, -} - - -def _oracle_opt_float(val: Any) -> Optional[float]: - return float(val) if val is not None else None - - def _oracle_fetchall(con, sql: str): cur = con.raw_sql(sql) try: @@ -413,7 +311,7 @@ def _oracle_numeric_stats( row = (_oracle_fetchall(con, query) or [None])[0] if row is None: - return [{**_EMPTY_METRIC_TEMPLATE, "feature_name": n} for n in feature_names] + return [empty_numeric_metric(n) for n in feature_names] row_count = row[0] results: List[Dict[str, Any]] = [] @@ -423,8 +321,8 @@ def _oracle_numeric_stats( non_null = row[base] or 0 null_count = row_count - non_null - min_val = _oracle_opt_float(row[base + 3]) - max_val = _oracle_opt_float(row[base + 4]) + min_val = opt_float(row[base + 3]) + max_val = opt_float(row[base + 4]) result: Dict[str, Any] = { "feature_name": col, @@ -432,15 +330,15 @@ def _oracle_numeric_stats( "row_count": row_count, "null_count": null_count, "null_rate": null_count / row_count if row_count > 0 else 0.0, - "mean": _oracle_opt_float(row[base + 1]), - "stddev": _oracle_opt_float(row[base + 2]), + "mean": opt_float(row[base + 1]), + "stddev": opt_float(row[base + 2]), "min_val": min_val, "max_val": max_val, - "p50": _oracle_opt_float(row[base + 5]), - "p75": _oracle_opt_float(row[base + 6]), - "p90": _oracle_opt_float(row[base + 7]), - "p95": _oracle_opt_float(row[base + 8]), - "p99": _oracle_opt_float(row[base + 9]), + "p50": opt_float(row[base + 5]), + "p75": opt_float(row[base + 6]), + "p90": opt_float(row[base + 7]), + "p95": opt_float(row[base + 8]), + "p99": opt_float(row[base + 9]), "histogram": None, } @@ -489,11 +387,7 @@ def _oracle_categorical_stats( rows = _oracle_fetchall(con, query) if not rows: - return { - **_EMPTY_METRIC_TEMPLATE, - "feature_name": col_name, - "feature_type": "categorical", - } + return empty_categorical_metric(col_name) row_count = rows[0][0] null_count = rows[0][1] @@ -808,7 +702,7 @@ def ensure_monitoring_tables(config: RepoConfig) -> None: _oracle_try_execute_ddl( con, f""" - CREATE TABLE {_MON_FEATURE_TABLE} ( + CREATE TABLE {MON_TABLE_FEATURE} ( project_id VARCHAR2(255) NOT NULL, feature_view_name VARCHAR2(255) NOT NULL, feature_name VARCHAR2(255) NOT NULL, @@ -840,7 +734,7 @@ def ensure_monitoring_tables(config: RepoConfig) -> None: _oracle_try_execute_ddl( con, f""" - CREATE TABLE {_MON_VIEW_TABLE} ( + CREATE TABLE {MON_TABLE_FEATURE_VIEW} ( project_id VARCHAR2(255) NOT NULL, feature_view_name VARCHAR2(255) NOT NULL, metric_date DATE NOT NULL, @@ -862,7 +756,7 @@ def ensure_monitoring_tables(config: RepoConfig) -> None: _oracle_try_execute_ddl( con, f""" - CREATE TABLE {_MON_SERVICE_TABLE} ( + CREATE TABLE {MON_TABLE_FEATURE_SERVICE} ( project_id VARCHAR2(255) NOT NULL, feature_service_name VARCHAR2(255) NOT NULL, metric_date DATE NOT NULL, @@ -890,7 +784,7 @@ def save_monitoring_metrics( return assert isinstance(config.offline_store, OracleOfflineStoreConfig) - table, columns, pk_columns = _oracle_mon_table_meta(metric_type) + table, columns, pk_columns = monitoring_table_meta(metric_type) con = get_ibis_connection(config) for row in metrics: _oracle_merge_metric_row(con, table, columns, pk_columns, row) @@ -906,7 +800,7 @@ def query_monitoring_metrics( ) -> List[Dict[str, Any]]: assert isinstance(config.offline_store, OracleOfflineStoreConfig) - table, columns, _ = _oracle_mon_table_meta(metric_type) + table, columns, _ = monitoring_table_meta(metric_type) conditions = [ f"{_oracle_quote_ident('project_id')} = {_oracle_escape_literal(project)}" @@ -939,15 +833,7 @@ def query_monitoring_metrics( results = [] for row in rows: record = dict(zip(columns, row)) - if "histogram" in record and isinstance(record["histogram"], str): - record["histogram"] = json.loads(record["histogram"]) - if "metric_date" in record and hasattr(record["metric_date"], "isoformat"): - record["metric_date"] = record["metric_date"].isoformat() - if "computed_at" in record and hasattr(record["computed_at"], "isoformat"): - record["computed_at"] = record["computed_at"].isoformat() - if "is_baseline" in record and record["is_baseline"] is not None: - record["is_baseline"] = bool(int(record["is_baseline"])) - results.append(record) + results.append(normalize_monitoring_row(record)) return results @@ -985,6 +871,6 @@ def clear_monitoring_baseline( con = get_ibis_connection(config) _oracle_exec( con, - f"UPDATE {_MON_FEATURE_TABLE} SET {_oracle_quote_ident('is_baseline')} = 0 " + f"UPDATE {MON_TABLE_FEATURE} SET {_oracle_quote_ident('is_baseline')} = 0 " f"WHERE {where_sql}", ) diff --git a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py index 03b77fa0e0e..83aede66978 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py +++ b/sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres.py @@ -42,6 +42,16 @@ get_query_schema, ) from feast.infra.utils.postgres.postgres_config import PostgreSQLConfig +from feast.monitoring.monitoring_utils import ( + MON_TABLE_FEATURE, + MON_TABLE_FEATURE_SERVICE, + MON_TABLE_FEATURE_VIEW, + empty_categorical_metric, + empty_numeric_metric, + monitoring_table_meta, + normalize_monitoring_row, + opt_float, +) from feast.on_demand_feature_view import OnDemandFeatureView from feast.repo_config import RepoConfig from feast.saved_dataset import SavedDatasetStorage @@ -377,7 +387,7 @@ def ensure_monitoring_tables(config: RepoConfig) -> None: assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) with _get_conn(config.offline_store) as conn, conn.cursor() as cur: cur.execute(f""" - CREATE TABLE IF NOT EXISTS {_MON_FEATURE_TABLE} ( + CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE} ( project_id VARCHAR(255) NOT NULL, feature_view_name VARCHAR(255) NOT NULL, feature_name VARCHAR(255) NOT NULL, @@ -404,20 +414,20 @@ def ensure_monitoring_tables(config: RepoConfig) -> None: metric_date, granularity, data_source_type) ); CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_project - ON {_MON_FEATURE_TABLE} (project_id); + ON {MON_TABLE_FEATURE} (project_id); CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_view - ON {_MON_FEATURE_TABLE} (project_id, feature_view_name); + ON {MON_TABLE_FEATURE} (project_id, feature_view_name); CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_date - ON {_MON_FEATURE_TABLE} (metric_date); + ON {MON_TABLE_FEATURE} (metric_date); CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_granularity - ON {_MON_FEATURE_TABLE} (granularity); + ON {MON_TABLE_FEATURE} (granularity); CREATE INDEX IF NOT EXISTS idx_fm_feature_metrics_baseline - ON {_MON_FEATURE_TABLE} (project_id, feature_view_name, feature_name) + ON {MON_TABLE_FEATURE} (project_id, feature_view_name, feature_name) WHERE is_baseline = TRUE; """) cur.execute(f""" - CREATE TABLE IF NOT EXISTS {_MON_VIEW_TABLE} ( + CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_VIEW} ( project_id VARCHAR(255) NOT NULL, feature_view_name VARCHAR(255) NOT NULL, metric_date DATE NOT NULL, @@ -436,7 +446,7 @@ def ensure_monitoring_tables(config: RepoConfig) -> None: """) cur.execute(f""" - CREATE TABLE IF NOT EXISTS {_MON_SERVICE_TABLE} ( + CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_SERVICE} ( project_id VARCHAR(255) NOT NULL, feature_service_name VARCHAR(255) NOT NULL, metric_date DATE NOT NULL, @@ -464,7 +474,7 @@ def save_monitoring_metrics( return assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) - table, columns, pk_columns = _mon_table_meta(metric_type) + table, columns, pk_columns = monitoring_table_meta(metric_type) _mon_upsert(config.offline_store, table, columns, pk_columns, metrics) @staticmethod @@ -478,7 +488,7 @@ def query_monitoring_metrics( ) -> List[Dict[str, Any]]: assert isinstance(config.offline_store, PostgreSQLOfflineStoreConfig) - _, columns, _ = _mon_table_meta(metric_type) + _, columns, _ = monitoring_table_meta(metric_type) return _mon_query( config.offline_store, metric_type, @@ -515,7 +525,7 @@ def clear_monitoring_baseline( conditions.append(sql.SQL("is_baseline = TRUE")) query = sql.SQL("UPDATE {} SET is_baseline = FALSE WHERE {}").format( - sql.Identifier(_MON_FEATURE_TABLE), + sql.Identifier(MON_TABLE_FEATURE), sql.SQL(" AND ").join(conditions), ) @@ -1022,23 +1032,6 @@ def _get_entity_schema( # Monitoring SQL push-down helpers # ------------------------------------------------------------------ # -_EMPTY_METRIC_TEMPLATE: Dict[str, Any] = { - "feature_type": "numeric", - "row_count": 0, - "null_count": 0, - "null_rate": 0.0, - "mean": None, - "stddev": None, - "min_val": None, - "max_val": None, - "p50": None, - "p75": None, - "p90": None, - "p95": None, - "p99": None, - "histogram": None, -} - def _sql_numeric_stats( conn, @@ -1078,7 +1071,7 @@ def _sql_numeric_stats( row = cur.fetchone() if row is None: - return [{**_EMPTY_METRIC_TEMPLATE, "feature_name": n} for n in feature_names] + return [empty_numeric_metric(n) for n in feature_names] row_count = row[0] results: List[Dict[str, Any]] = [] @@ -1088,8 +1081,8 @@ def _sql_numeric_stats( non_null = row[base] or 0 null_count = row_count - non_null - min_val = _opt_float(row[base + 3]) - max_val = _opt_float(row[base + 4]) + min_val = opt_float(row[base + 3]) + max_val = opt_float(row[base + 4]) result: Dict[str, Any] = { "feature_name": col, @@ -1097,15 +1090,15 @@ def _sql_numeric_stats( "row_count": row_count, "null_count": null_count, "null_rate": null_count / row_count if row_count > 0 else 0.0, - "mean": _opt_float(row[base + 1]), - "stddev": _opt_float(row[base + 2]), + "mean": opt_float(row[base + 1]), + "stddev": opt_float(row[base + 2]), "min_val": min_val, "max_val": max_val, - "p50": _opt_float(row[base + 5]), - "p75": _opt_float(row[base + 6]), - "p90": _opt_float(row[base + 7]), - "p95": _opt_float(row[base + 8]), - "p99": _opt_float(row[base + 9]), + "p50": opt_float(row[base + 5]), + "p75": opt_float(row[base + 6]), + "p90": opt_float(row[base + 7]), + "p95": opt_float(row[base + 8]), + "p99": opt_float(row[base + 9]), "histogram": None, } @@ -1201,11 +1194,7 @@ def _sql_categorical_stats( rows = cur.fetchall() if not rows: - return { - **_EMPTY_METRIC_TEMPLATE, - "feature_name": col_name, - "feature_type": "categorical", - } + return empty_categorical_metric(col_name) row_count = rows[0][0] null_count = rows[0][1] @@ -1238,105 +1227,10 @@ def _sql_categorical_stats( } -def _opt_float(val: Any) -> Optional[float]: - """Convert a DB aggregate result to float, preserving None.""" - return float(val) if val is not None else None - - # ------------------------------------------------------------------ # # Monitoring metrics storage helpers # ------------------------------------------------------------------ # -_MON_FEATURE_TABLE = "feast_monitoring_feature_metrics" -_MON_VIEW_TABLE = "feast_monitoring_feature_view_metrics" -_MON_SERVICE_TABLE = "feast_monitoring_feature_service_metrics" - -_MON_FEATURE_COLUMNS = [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "feature_type", - "row_count", - "null_count", - "null_rate", - "mean", - "stddev", - "min_val", - "max_val", - "p50", - "p75", - "p90", - "p95", - "p99", - "histogram", -] -_MON_FEATURE_PK = [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", -] - -_MON_VIEW_COLUMNS = [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_row_count", - "total_features", - "features_with_nulls", - "avg_null_rate", - "max_null_rate", -] -_MON_VIEW_PK = [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", -] - -_MON_SERVICE_COLUMNS = [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_feature_views", - "total_features", - "avg_null_rate", - "max_null_rate", -] -_MON_SERVICE_PK = [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", -] - - -def _mon_table_meta(metric_type: str): - if metric_type == "feature": - return _MON_FEATURE_TABLE, _MON_FEATURE_COLUMNS, _MON_FEATURE_PK - if metric_type == "feature_view": - return _MON_VIEW_TABLE, _MON_VIEW_COLUMNS, _MON_VIEW_PK - if metric_type == "feature_service": - return _MON_SERVICE_TABLE, _MON_SERVICE_COLUMNS, _MON_SERVICE_PK - raise ValueError(f"Unknown metric_type '{metric_type}'") - def _mon_upsert( pg_config: PostgreSQLConfig, @@ -1381,11 +1275,7 @@ def _mon_query( start_date: Optional["date"] = None, end_date: Optional["date"] = None, ) -> List[Dict[str, Any]]: - import json as _json - from datetime import date as _date - from datetime import datetime as _datetime - - table, _, _ = _mon_table_meta(metric_type) + table, _, _ = monitoring_table_meta(metric_type) conditions = [sql.SQL("project_id = %s")] params: list = [project] @@ -1418,12 +1308,6 @@ def _mon_query( results = [] for row in rows: record = dict(zip(columns, row)) - if "histogram" in record and isinstance(record["histogram"], str): - record["histogram"] = _json.loads(record["histogram"]) - if "metric_date" in record and isinstance(record["metric_date"], _date): - record["metric_date"] = record["metric_date"].isoformat() - if "computed_at" in record and isinstance(record["computed_at"], _datetime): - record["computed_at"] = record["computed_at"].isoformat() - results.append(record) + results.append(normalize_monitoring_row(record)) return results diff --git a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py index 3a188ace582..dbd8c4da7e4 100644 --- a/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py +++ b/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py @@ -52,6 +52,15 @@ ) from feast.infra.offline_stores.offline_utils import get_timestamp_filter_sql from feast.infra.registry.base_registry import BaseRegistry +from feast.monitoring.monitoring_utils import ( + MON_TABLE_FEATURE, + MON_TABLE_FEATURE_SERVICE, + MON_TABLE_FEATURE_VIEW, + empty_categorical_metric, + monitoring_table_meta, + normalize_monitoring_row, + opt_float, +) from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.saved_dataset import SavedDatasetStorage from feast.type_map import spark_schema_to_np_dtypes @@ -523,7 +532,7 @@ def save_monitoring_metrics( if not metrics: return assert isinstance(config.offline_store, SparkOfflineStoreConfig) - table, columns, pk_columns = _spark_mon_table_meta(metric_type) + table, columns, pk_columns = monitoring_table_meta(metric_type) pdf_new = pd.DataFrame([{c: m.get(c) for c in columns} for m in metrics]) pdf_new = _spark_normalize_histogram_column(pdf_new) @@ -550,7 +559,7 @@ def query_monitoring_metrics( end_date: Optional[date] = None, ) -> List[Dict[str, Any]]: assert isinstance(config.offline_store, SparkOfflineStoreConfig) - table, columns, _ = _spark_mon_table_meta(metric_type) + table, columns, _ = monitoring_table_meta(metric_type) spark_session = get_spark_session_or_start_new_with_repoconfig( store_config=config.offline_store ) @@ -584,10 +593,10 @@ def clear_monitoring_baseline( spark_session = get_spark_session_or_start_new_with_repoconfig( store_config=config.offline_store ) - if not spark_session.catalog.tableExists(_SPARK_MON_FEATURE_TABLE): + if not spark_session.catalog.tableExists(MON_TABLE_FEATURE): return - pdf = spark_session.table(_SPARK_MON_FEATURE_TABLE).toPandas() + pdf = spark_session.table(MON_TABLE_FEATURE).toPandas() mask = (pdf["project_id"] == project) & (pdf["is_baseline"] == True) # noqa: E712 if feature_view_name is not None: mask &= pdf["feature_view_name"] == feature_view_name @@ -598,93 +607,13 @@ def clear_monitoring_baseline( pdf.loc[mask, "is_baseline"] = False spark_session.createDataFrame(pdf).write.mode("overwrite").saveAsTable( - _SPARK_MON_FEATURE_TABLE + MON_TABLE_FEATURE ) -_SPARK_MON_FEATURE_TABLE = "feast_monitoring_feature_metrics" -_SPARK_MON_VIEW_TABLE = "feast_monitoring_feature_view_metrics" -_SPARK_MON_SERVICE_TABLE = "feast_monitoring_feature_service_metrics" - -_SPARK_MON_FEATURE_COLUMNS = [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "feature_type", - "row_count", - "null_count", - "null_rate", - "mean", - "stddev", - "min_val", - "max_val", - "p50", - "p75", - "p90", - "p95", - "p99", - "histogram", -] -_SPARK_MON_FEATURE_PK = [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", -] - -_SPARK_MON_VIEW_COLUMNS = [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_row_count", - "total_features", - "features_with_nulls", - "avg_null_rate", - "max_null_rate", -] -_SPARK_MON_VIEW_PK = [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", -] - -_SPARK_MON_SERVICE_COLUMNS = [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_feature_views", - "total_features", - "avg_null_rate", - "max_null_rate", -] -_SPARK_MON_SERVICE_PK = [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", -] - _SPARK_MONITORING_DDL_STATEMENTS = [ f""" -CREATE TABLE IF NOT EXISTS {_SPARK_MON_FEATURE_TABLE} ( +CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE} ( project_id STRING NOT NULL, feature_view_name STRING NOT NULL, feature_name STRING NOT NULL, @@ -710,7 +639,7 @@ def clear_monitoring_baseline( ) USING PARQUET """, f""" -CREATE TABLE IF NOT EXISTS {_SPARK_MON_VIEW_TABLE} ( +CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_VIEW} ( project_id STRING NOT NULL, feature_view_name STRING NOT NULL, metric_date DATE NOT NULL, @@ -726,7 +655,7 @@ def clear_monitoring_baseline( ) USING PARQUET """, f""" -CREATE TABLE IF NOT EXISTS {_SPARK_MON_SERVICE_TABLE} ( +CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_SERVICE} ( project_id STRING NOT NULL, feature_service_name STRING NOT NULL, metric_date DATE NOT NULL, @@ -742,42 +671,6 @@ def clear_monitoring_baseline( """, ] -_SPARK_EMPTY_METRIC_TEMPLATE: Dict[str, Any] = { - "feature_name": "", - "feature_type": "categorical", - "row_count": 0, - "null_count": 0, - "null_rate": 0.0, - "mean": None, - "stddev": None, - "min_val": None, - "max_val": None, - "p50": None, - "p75": None, - "p90": None, - "p95": None, - "p99": None, - "histogram": None, -} - - -def _spark_mon_table_meta(metric_type: str): - if metric_type == "feature": - return ( - _SPARK_MON_FEATURE_TABLE, - _SPARK_MON_FEATURE_COLUMNS, - _SPARK_MON_FEATURE_PK, - ) - if metric_type == "feature_view": - return _SPARK_MON_VIEW_TABLE, _SPARK_MON_VIEW_COLUMNS, _SPARK_MON_VIEW_PK - if metric_type == "feature_service": - return ( - _SPARK_MON_SERVICE_TABLE, - _SPARK_MON_SERVICE_COLUMNS, - _SPARK_MON_SERVICE_PK, - ) - raise ValueError(f"Unknown metric_type '{metric_type}'") - def _spark_normalize_histogram_column(pdf: pd.DataFrame) -> pd.DataFrame: if "histogram" not in pdf.columns: @@ -809,10 +702,6 @@ def _spark_pandas_upsert( return pd.concat([kept_df, pdf_new], ignore_index=True) -def _spark_opt_float(val: Any) -> Optional[float]: - return float(val) if val is not None else None - - def _spark_sql_numeric_stats( spark_session: SparkSession, from_expression: str, @@ -845,9 +734,7 @@ def _spark_sql_numeric_stats( ) rows = spark_session.sql(query).collect() if not rows or rows[0] is None: - return [ - {**_SPARK_EMPTY_METRIC_TEMPLATE, "feature_name": n} for n in feature_names - ] + return [empty_categorical_metric(n) for n in feature_names] row = rows[0] row_count = int(row[0] or 0) @@ -858,8 +745,8 @@ def _spark_sql_numeric_stats( non_null = int(row[base] or 0) null_count = row_count - non_null - min_val = _spark_opt_float(row[base + 3]) - max_val = _spark_opt_float(row[base + 4]) + min_val = opt_float(row[base + 3]) + max_val = opt_float(row[base + 4]) result: Dict[str, Any] = { "feature_name": col, @@ -867,15 +754,15 @@ def _spark_sql_numeric_stats( "row_count": row_count, "null_count": null_count, "null_rate": null_count / row_count if row_count > 0 else 0.0, - "mean": _spark_opt_float(row[base + 1]), - "stddev": _spark_opt_float(row[base + 2]), + "mean": opt_float(row[base + 1]), + "stddev": opt_float(row[base + 2]), "min_val": min_val, "max_val": max_val, - "p50": _spark_opt_float(row[base + 5]), - "p75": _spark_opt_float(row[base + 6]), - "p90": _spark_opt_float(row[base + 7]), - "p95": _spark_opt_float(row[base + 8]), - "p99": _spark_opt_float(row[base + 9]), + "p50": opt_float(row[base + 5]), + "p75": opt_float(row[base + 6]), + "p90": opt_float(row[base + 7]), + "p95": opt_float(row[base + 8]), + "p99": opt_float(row[base + 9]), "histogram": None, } @@ -971,11 +858,7 @@ def _spark_sql_categorical_stats( rows = spark_session.sql(query).collect() if not rows: - return { - **_SPARK_EMPTY_METRIC_TEMPLATE, - "feature_name": col_name, - "feature_type": "categorical", - } + return empty_categorical_metric(col_name) row_count = int(rows[0][0] or 0) null_count = int(rows[0][1] or 0) @@ -1012,26 +895,11 @@ def _spark_rows_to_metric_dicts( rows: List[Any], columns: List[str], ) -> List[Dict[str, Any]]: - from datetime import date as date_type - from datetime import datetime as datetime_type - out: List[Dict[str, Any]] = [] for r in rows: d = r.asDict() rec = {c: d.get(c) for c in columns} - h = rec.get("histogram") - if isinstance(h, str): - try: - rec["histogram"] = json.loads(h) - except json.JSONDecodeError: - pass - md = rec.get("metric_date") - if isinstance(md, date_type): - rec["metric_date"] = md.isoformat() - ca = rec.get("computed_at") - if isinstance(ca, datetime_type): - rec["computed_at"] = ca.isoformat() - out.append(rec) + out.append(normalize_monitoring_row(rec)) return out diff --git a/sdk/python/feast/infra/offline_stores/dask.py b/sdk/python/feast/infra/offline_stores/dask.py index a9bc1908f54..4f2e71a9ec8 100644 --- a/sdk/python/feast/infra/offline_stores/dask.py +++ b/sdk/python/feast/infra/offline_stores/dask.py @@ -37,6 +37,15 @@ get_pyarrow_schema_from_batch_source, ) from feast.infra.registry.base_registry import BaseRegistry +from feast.monitoring.monitoring_utils import ( + FEATURE_METRICS_COLUMNS, + FEATURE_METRICS_PK, + FEATURE_SERVICE_METRICS_COLUMNS, + FEATURE_SERVICE_METRICS_PK, + FEATURE_VIEW_METRICS_COLUMNS, + FEATURE_VIEW_METRICS_PK, + normalize_monitoring_row, +) from feast.on_demand_feature_view import OnDemandFeatureView from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.saved_dataset import SavedDatasetStorage @@ -659,9 +668,9 @@ def ensure_monitoring_tables(config: RepoConfig) -> None: os.makedirs(base, exist_ok=True) tables = [ - (_DASK_FEATURE_METRICS_FILE, _DASK_MON_FEATURE_COLUMNS), - (_DASK_VIEW_METRICS_FILE, _DASK_MON_VIEW_COLUMNS), - (_DASK_SERVICE_METRICS_FILE, _DASK_MON_SERVICE_COLUMNS), + (_DASK_FEATURE_METRICS_FILE, FEATURE_METRICS_COLUMNS), + (_DASK_VIEW_METRICS_FILE, FEATURE_VIEW_METRICS_COLUMNS), + (_DASK_SERVICE_METRICS_FILE, FEATURE_SERVICE_METRICS_COLUMNS), ] for fname, columns in tables: fpath = _dask_monitoring_path(config, fname) @@ -733,82 +742,6 @@ def clear_monitoring_baseline( _DASK_VIEW_METRICS_FILE = "feature_view_metrics.parquet" _DASK_SERVICE_METRICS_FILE = "feature_service_metrics.parquet" -_DASK_MON_FEATURE_COLUMNS = [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "feature_type", - "row_count", - "null_count", - "null_rate", - "mean", - "stddev", - "min_val", - "max_val", - "p50", - "p75", - "p90", - "p95", - "p99", - "histogram", -] -_DASK_MON_FEATURE_PK = [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", -] - -_DASK_MON_VIEW_COLUMNS = [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_row_count", - "total_features", - "features_with_nulls", - "avg_null_rate", - "max_null_rate", -] -_DASK_MON_VIEW_PK = [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", -] - -_DASK_MON_SERVICE_COLUMNS = [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_feature_views", - "total_features", - "avg_null_rate", - "max_null_rate", -] -_DASK_MON_SERVICE_PK = [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", -] - def _dask_monitoring_base(config: RepoConfig) -> str: base = config.repo_path @@ -821,18 +754,18 @@ def _dask_monitoring_path(config: RepoConfig, filename: str) -> str: def _dask_mon_table_meta(metric_type: str): if metric_type == "feature": + return _DASK_FEATURE_METRICS_FILE, FEATURE_METRICS_COLUMNS, FEATURE_METRICS_PK + if metric_type == "feature_view": return ( - _DASK_FEATURE_METRICS_FILE, - _DASK_MON_FEATURE_COLUMNS, - _DASK_MON_FEATURE_PK, + _DASK_VIEW_METRICS_FILE, + FEATURE_VIEW_METRICS_COLUMNS, + FEATURE_VIEW_METRICS_PK, ) - if metric_type == "feature_view": - return _DASK_VIEW_METRICS_FILE, _DASK_MON_VIEW_COLUMNS, _DASK_MON_VIEW_PK if metric_type == "feature_service": return ( _DASK_SERVICE_METRICS_FILE, - _DASK_MON_SERVICE_COLUMNS, - _DASK_MON_SERVICE_PK, + FEATURE_SERVICE_METRICS_COLUMNS, + FEATURE_SERVICE_METRICS_PK, ) raise ValueError(f"Unknown metric_type '{metric_type}'") @@ -1032,15 +965,15 @@ def _dask_parquet_query( results = [] for _, row in df.iterrows(): record = {c: row.get(c) for c in columns} - if "histogram" in record and isinstance(record["histogram"], str): - try: - record["histogram"] = json.loads(record["histogram"]) - except json.JSONDecodeError: - pass - if "metric_date" in record and hasattr(record["metric_date"], "isoformat"): - record["metric_date"] = record["metric_date"].isoformat() - if "computed_at" in record and hasattr(record["computed_at"], "isoformat"): - record["computed_at"] = record["computed_at"].isoformat() + normalize_monitoring_row(record) + for key in ("metric_date", "computed_at"): + val = record.get(key) + if ( + val is not None + and not isinstance(val, str) + and hasattr(val, "isoformat") + ): + record[key] = val.isoformat() results.append(record) return results diff --git a/sdk/python/feast/infra/offline_stores/duckdb.py b/sdk/python/feast/infra/offline_stores/duckdb.py index 3f848e2870e..559200f3b67 100644 --- a/sdk/python/feast/infra/offline_stores/duckdb.py +++ b/sdk/python/feast/infra/offline_stores/duckdb.py @@ -29,6 +29,18 @@ from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob from feast.infra.offline_stores.offline_utils import get_timestamp_filter_sql from feast.infra.registry.base_registry import BaseRegistry +from feast.monitoring.monitoring_utils import ( + FEATURE_METRICS_COLUMNS, + FEATURE_METRICS_PK, + FEATURE_SERVICE_METRICS_COLUMNS, + FEATURE_SERVICE_METRICS_PK, + FEATURE_VIEW_METRICS_COLUMNS, + FEATURE_VIEW_METRICS_PK, + empty_categorical_metric, + empty_numeric_metric, + normalize_monitoring_row, + opt_float, +) from feast.repo_config import FeastConfigBaseModel, RepoConfig @@ -127,82 +139,6 @@ def _write_data_source( VIEW_METRICS_FILE = "feature_view_metrics.parquet" SERVICE_METRICS_FILE = "feature_service_metrics.parquet" -_MON_FEATURE_COLUMNS = [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "feature_type", - "row_count", - "null_count", - "null_rate", - "mean", - "stddev", - "min_val", - "max_val", - "p50", - "p75", - "p90", - "p95", - "p99", - "histogram", -] -_MON_FEATURE_PK = [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", -] - -_MON_VIEW_COLUMNS = [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_row_count", - "total_features", - "features_with_nulls", - "avg_null_rate", - "max_null_rate", -] -_MON_VIEW_PK = [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", -] - -_MON_SERVICE_COLUMNS = [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_feature_views", - "total_features", - "avg_null_rate", - "max_null_rate", -] -_MON_SERVICE_PK = [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", -] - def _duckdb_monitoring_base(config: RepoConfig) -> str: base = config.repo_path @@ -229,28 +165,6 @@ def _duckdb_ts_where(ts_filter: str) -> str: return f"({ts_filter})" if (ts_filter and ts_filter.strip()) else "1=1" -_EMPTY_METRIC_TEMPLATE: Dict[str, Any] = { - "feature_type": "numeric", - "row_count": 0, - "null_count": 0, - "null_rate": 0.0, - "mean": None, - "stddev": None, - "min_val": None, - "max_val": None, - "p50": None, - "p75": None, - "p90": None, - "p95": None, - "p99": None, - "histogram": None, -} - - -def _duckdb_opt_float(val: Any) -> Optional[float]: - return float(val) if val is not None else None - - def _duckdb_numeric_stats( conn: duckdb.DuckDBPyConnection, from_expr: str, @@ -282,7 +196,7 @@ def _duckdb_numeric_stats( row = conn.execute(query).fetchone() if row is None: - return [{**_EMPTY_METRIC_TEMPLATE, "feature_name": n} for n in feature_names] + return [empty_numeric_metric(n) for n in feature_names] row_count = row[0] results: List[Dict[str, Any]] = [] @@ -292,8 +206,8 @@ def _duckdb_numeric_stats( non_null = row[base] or 0 null_count = row_count - non_null - min_val = _duckdb_opt_float(row[base + 3]) - max_val = _duckdb_opt_float(row[base + 4]) + min_val = opt_float(row[base + 3]) + max_val = opt_float(row[base + 4]) result: Dict[str, Any] = { "feature_name": col, @@ -301,15 +215,15 @@ def _duckdb_numeric_stats( "row_count": row_count, "null_count": null_count, "null_rate": null_count / row_count if row_count > 0 else 0.0, - "mean": _duckdb_opt_float(row[base + 1]), - "stddev": _duckdb_opt_float(row[base + 2]), + "mean": opt_float(row[base + 1]), + "stddev": opt_float(row[base + 2]), "min_val": min_val, "max_val": max_val, - "p50": _duckdb_opt_float(row[base + 5]), - "p75": _duckdb_opt_float(row[base + 6]), - "p90": _duckdb_opt_float(row[base + 7]), - "p95": _duckdb_opt_float(row[base + 8]), - "p99": _duckdb_opt_float(row[base + 9]), + "p50": opt_float(row[base + 5]), + "p75": opt_float(row[base + 6]), + "p90": opt_float(row[base + 7]), + "p95": opt_float(row[base + 8]), + "p99": opt_float(row[base + 9]), "histogram": None, } @@ -402,11 +316,7 @@ def _duckdb_categorical_stats( rows = conn.execute(query).fetchall() if not rows: - return { - **_EMPTY_METRIC_TEMPLATE, - "feature_name": col_name, - "feature_type": "categorical", - } + return empty_categorical_metric(col_name) row_count = rows[0][0] null_count = rows[0][1] @@ -441,11 +351,15 @@ def _duckdb_categorical_stats( def _duckdb_mon_table_meta(metric_type: str): if metric_type == "feature": - return FEATURE_METRICS_FILE, _MON_FEATURE_COLUMNS, _MON_FEATURE_PK + return FEATURE_METRICS_FILE, FEATURE_METRICS_COLUMNS, FEATURE_METRICS_PK if metric_type == "feature_view": - return VIEW_METRICS_FILE, _MON_VIEW_COLUMNS, _MON_VIEW_PK + return VIEW_METRICS_FILE, FEATURE_VIEW_METRICS_COLUMNS, FEATURE_VIEW_METRICS_PK if metric_type == "feature_service": - return SERVICE_METRICS_FILE, _MON_SERVICE_COLUMNS, _MON_SERVICE_PK + return ( + SERVICE_METRICS_FILE, + FEATURE_SERVICE_METRICS_COLUMNS, + FEATURE_SERVICE_METRICS_PK, + ) raise ValueError(f"Unknown metric_type '{metric_type}'") @@ -514,15 +428,15 @@ def _duckdb_parquet_query( results = [] for _, row in df.iterrows(): record = {c: row.get(c) for c in columns} - if "histogram" in record and isinstance(record["histogram"], str): - try: - record["histogram"] = json.loads(record["histogram"]) - except json.JSONDecodeError: - pass - if "metric_date" in record and hasattr(record["metric_date"], "isoformat"): - record["metric_date"] = record["metric_date"].isoformat() - if "computed_at" in record and hasattr(record["computed_at"], "isoformat"): - record["computed_at"] = record["computed_at"].isoformat() + normalize_monitoring_row(record) + for key in ("metric_date", "computed_at"): + val = record.get(key) + if ( + val is not None + and not isinstance(val, str) + and hasattr(val, "isoformat") + ): + record[key] = val.isoformat() results.append(record) return results @@ -741,9 +655,9 @@ def ensure_monitoring_tables(config: RepoConfig) -> None: os.makedirs(base, exist_ok=True) tables = [ - (FEATURE_METRICS_FILE, _MON_FEATURE_COLUMNS), - (VIEW_METRICS_FILE, _MON_VIEW_COLUMNS), - (SERVICE_METRICS_FILE, _MON_SERVICE_COLUMNS), + (FEATURE_METRICS_FILE, FEATURE_METRICS_COLUMNS), + (VIEW_METRICS_FILE, FEATURE_VIEW_METRICS_COLUMNS), + (SERVICE_METRICS_FILE, FEATURE_SERVICE_METRICS_COLUMNS), ] for fname, columns in tables: path = _duckdb_monitoring_path(config, fname) diff --git a/sdk/python/feast/infra/offline_stores/redshift.py b/sdk/python/feast/infra/offline_stores/redshift.py index 3f7ab94aecd..203ed97dd8d 100644 --- a/sdk/python/feast/infra/offline_stores/redshift.py +++ b/sdk/python/feast/infra/offline_stores/redshift.py @@ -41,6 +41,16 @@ ) from feast.infra.registry.base_registry import BaseRegistry from feast.infra.utils import aws_utils +from feast.monitoring.monitoring_utils import ( + MON_TABLE_FEATURE, + MON_TABLE_FEATURE_SERVICE, + MON_TABLE_FEATURE_VIEW, + empty_categorical_metric, + empty_numeric_metric, + monitoring_table_meta, + normalize_monitoring_row, + opt_float, +) from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.saved_dataset import SavedDatasetStorage @@ -463,7 +473,7 @@ def save_monitoring_metrics( if not metrics: return assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) - table, columns, pk_columns = _redshift_mon_table_meta(metric_type) + table, columns, pk_columns = monitoring_table_meta(metric_type) for row in metrics: _redshift_merge_metric_row(config, table, columns, pk_columns, row) @@ -477,7 +487,7 @@ def query_monitoring_metrics( end_date: Optional[date] = None, ) -> List[Dict[str, Any]]: assert isinstance(config.offline_store, RedshiftOfflineStoreConfig) - _, columns, _ = _redshift_mon_table_meta(metric_type) + _, columns, _ = monitoring_table_meta(metric_type) return _redshift_mon_query( config, metric_type, columns, project, filters, start_date, end_date ) @@ -506,93 +516,13 @@ def clear_monitoring_baseline( f"data_source_type = {_redshift_sql_literal(data_source_type)}" ) where_sql = " AND ".join(parts) - sql = f"UPDATE {_MON_FEATURE_TABLE} SET is_baseline = FALSE WHERE {where_sql}" + sql = f"UPDATE {MON_TABLE_FEATURE} SET is_baseline = FALSE WHERE {where_sql}" _redshift_execute_statement(config, sql) -_MON_FEATURE_TABLE = "feast_monitoring_feature_metrics" -_MON_VIEW_TABLE = "feast_monitoring_feature_view_metrics" -_MON_SERVICE_TABLE = "feast_monitoring_feature_service_metrics" - -_MON_FEATURE_COLUMNS = [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "feature_type", - "row_count", - "null_count", - "null_rate", - "mean", - "stddev", - "min_val", - "max_val", - "p50", - "p75", - "p90", - "p95", - "p99", - "histogram", -] -_MON_FEATURE_PK = [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", -] - -_MON_VIEW_COLUMNS = [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_row_count", - "total_features", - "features_with_nulls", - "avg_null_rate", - "max_null_rate", -] -_MON_VIEW_PK = [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", -] - -_MON_SERVICE_COLUMNS = [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_feature_views", - "total_features", - "avg_null_rate", - "max_null_rate", -] -_MON_SERVICE_PK = [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", -] - _REDSHIFT_MONITORING_DDL_STATEMENTS = [ f""" -CREATE TABLE IF NOT EXISTS {_MON_FEATURE_TABLE} ( +CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE} ( project_id VARCHAR(255) NOT NULL, feature_view_name VARCHAR(255) NOT NULL, feature_name VARCHAR(255) NOT NULL, @@ -620,7 +550,7 @@ def clear_monitoring_baseline( ); """, f""" -CREATE TABLE IF NOT EXISTS {_MON_VIEW_TABLE} ( +CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_VIEW} ( project_id VARCHAR(255) NOT NULL, feature_view_name VARCHAR(255) NOT NULL, metric_date DATE NOT NULL, @@ -638,7 +568,7 @@ def clear_monitoring_baseline( ); """, f""" -CREATE TABLE IF NOT EXISTS {_MON_SERVICE_TABLE} ( +CREATE TABLE IF NOT EXISTS {MON_TABLE_FEATURE_SERVICE} ( project_id VARCHAR(255) NOT NULL, feature_service_name VARCHAR(255) NOT NULL, metric_date DATE NOT NULL, @@ -656,34 +586,6 @@ def clear_monitoring_baseline( """, ] -_EMPTY_METRIC_TEMPLATE: Dict[str, Any] = { - "feature_name": "", - "feature_type": "categorical", - "row_count": 0, - "null_count": 0, - "null_rate": 0.0, - "mean": None, - "stddev": None, - "min_val": None, - "max_val": None, - "p50": None, - "p75": None, - "p90": None, - "p95": None, - "p99": None, - "histogram": None, -} - - -def _redshift_mon_table_meta(metric_type: str): - if metric_type == "feature": - return _MON_FEATURE_TABLE, _MON_FEATURE_COLUMNS, _MON_FEATURE_PK - if metric_type == "feature_view": - return _MON_VIEW_TABLE, _MON_VIEW_COLUMNS, _MON_VIEW_PK - if metric_type == "feature_service": - return _MON_SERVICE_TABLE, _MON_SERVICE_COLUMNS, _MON_SERVICE_PK - raise ValueError(f"Unknown metric_type '{metric_type}'") - def _redshift_execute_statement(config: RepoConfig, sql: str) -> str: client = aws_utils.get_redshift_data_client(config.offline_store.region) @@ -747,10 +649,6 @@ def _redshift_field_value(field: Dict[str, Any]) -> Any: return None -def _redshift_opt_float(val: Any) -> Optional[float]: - return float(val) if val is not None else None - - def _redshift_sql_literal(val: Any) -> str: if val is None: return "NULL" @@ -820,7 +718,7 @@ def _redshift_mon_query( start_date: Optional[date], end_date: Optional[date], ) -> List[Dict[str, Any]]: - table, _, _ = _redshift_mon_table_meta(metric_type) + table, _, _ = monitoring_table_meta(metric_type) conditions = [f"project_id = {_redshift_sql_literal(project)}"] if filters: for key, value in filters.items(): @@ -848,18 +746,7 @@ def _redshift_mon_query( out: List[Dict[str, Any]] = [] for rec in rows: record = {col_names[i]: _redshift_field_value(rec[i]) for i in range(len(rec))} - if "histogram" in record and isinstance(record["histogram"], str): - record["histogram"] = json.loads(record["histogram"]) - if "metric_date" in record and record["metric_date"] is not None: - md = record["metric_date"] - if isinstance(md, str): - record["metric_date"] = md[:10] - elif isinstance(md, date): - record["metric_date"] = md.isoformat() - if "computed_at" in record and record["computed_at"] is not None: - ca = record["computed_at"] - record["computed_at"] = ca if isinstance(ca, str) else str(ca) - out.append(record) + out.append(normalize_monitoring_row(record)) return out @@ -895,7 +782,7 @@ def _redshift_sql_numeric_stats( ) rows = _redshift_execute_fetch_rows(config, query) if not rows or not rows[0]: - return [{**_EMPTY_METRIC_TEMPLATE, "feature_name": n} for n in feature_names] + return [empty_numeric_metric(n) for n in feature_names] row = rows[0] row_count = int(_redshift_field_value(row[0]) or 0) @@ -906,8 +793,8 @@ def _redshift_sql_numeric_stats( non_null = int(_redshift_field_value(row[base]) or 0) null_count = row_count - non_null - min_val = _redshift_opt_float(_redshift_field_value(row[base + 3])) - max_val = _redshift_opt_float(_redshift_field_value(row[base + 4])) + min_val = opt_float(_redshift_field_value(row[base + 3])) + max_val = opt_float(_redshift_field_value(row[base + 4])) result: Dict[str, Any] = { "feature_name": col, @@ -915,15 +802,15 @@ def _redshift_sql_numeric_stats( "row_count": row_count, "null_count": null_count, "null_rate": null_count / row_count if row_count > 0 else 0.0, - "mean": _redshift_opt_float(_redshift_field_value(row[base + 1])), - "stddev": _redshift_opt_float(_redshift_field_value(row[base + 2])), + "mean": opt_float(_redshift_field_value(row[base + 1])), + "stddev": opt_float(_redshift_field_value(row[base + 2])), "min_val": min_val, "max_val": max_val, - "p50": _redshift_opt_float(_redshift_field_value(row[base + 5])), - "p75": _redshift_opt_float(_redshift_field_value(row[base + 6])), - "p90": _redshift_opt_float(_redshift_field_value(row[base + 7])), - "p95": _redshift_opt_float(_redshift_field_value(row[base + 8])), - "p99": _redshift_opt_float(_redshift_field_value(row[base + 9])), + "p50": opt_float(_redshift_field_value(row[base + 5])), + "p75": opt_float(_redshift_field_value(row[base + 6])), + "p90": opt_float(_redshift_field_value(row[base + 7])), + "p95": opt_float(_redshift_field_value(row[base + 8])), + "p99": opt_float(_redshift_field_value(row[base + 9])), "histogram": None, } @@ -1021,11 +908,7 @@ def _redshift_sql_categorical_stats( rows = _redshift_execute_fetch_rows(config, query) if not rows: - return { - **_EMPTY_METRIC_TEMPLATE, - "feature_name": col_name, - "feature_type": "categorical", - } + return empty_categorical_metric(col_name) row_count = int(_redshift_field_value(rows[0][0]) or 0) null_count = int(_redshift_field_value(rows[0][1]) or 0) diff --git a/sdk/python/feast/infra/offline_stores/snowflake.py b/sdk/python/feast/infra/offline_stores/snowflake.py index 7cae609ef12..bcf347ae2e9 100644 --- a/sdk/python/feast/infra/offline_stores/snowflake.py +++ b/sdk/python/feast/infra/offline_stores/snowflake.py @@ -51,6 +51,16 @@ write_pandas, write_parquet, ) +from feast.monitoring.monitoring_utils import ( + MON_TABLE_FEATURE, + MON_TABLE_FEATURE_SERVICE, + MON_TABLE_FEATURE_VIEW, + empty_categorical_metric, + empty_numeric_metric, + monitoring_table_meta, + normalize_monitoring_row, + opt_float, +) from feast.repo_config import FeastConfigBaseModel, RepoConfig from feast.saved_dataset import SavedDatasetStorage from feast.types import ( @@ -503,13 +513,9 @@ def get_monitoring_max_timestamp( def ensure_monitoring_tables(config: RepoConfig) -> None: assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) - fq_feature = _snowflake_monitoring_table_fqn( - config, _SNOWFLAKE_MON_FEATURE_TABLE - ) - fq_view = _snowflake_monitoring_table_fqn(config, _SNOWFLAKE_MON_VIEW_TABLE) - fq_service = _snowflake_monitoring_table_fqn( - config, _SNOWFLAKE_MON_SERVICE_TABLE - ) + fq_feature = _snowflake_monitoring_table_fqn(config, MON_TABLE_FEATURE) + fq_view = _snowflake_monitoring_table_fqn(config, MON_TABLE_FEATURE_VIEW) + fq_service = _snowflake_monitoring_table_fqn(config, MON_TABLE_FEATURE_SERVICE) ddl_feature = f""" CREATE TABLE IF NOT EXISTS {fq_feature} ( @@ -590,7 +596,7 @@ def save_monitoring_metrics( return assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) - table, columns, pk_columns = _snowflake_mon_table_meta(metric_type) + table, columns, pk_columns = monitoring_table_meta(metric_type) _snowflake_mon_merge_upsert( config.offline_store, table, columns, pk_columns, metrics ) @@ -606,7 +612,7 @@ def query_monitoring_metrics( ) -> List[Dict[str, Any]]: assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) - _, columns, _ = _snowflake_mon_table_meta(metric_type) + _, columns, _ = monitoring_table_meta(metric_type) return _snowflake_mon_query( config.offline_store, metric_type, @@ -627,7 +633,7 @@ def clear_monitoring_baseline( ) -> None: assert isinstance(config.offline_store, SnowflakeOfflineStoreConfig) - fq_table = _snowflake_monitoring_table_fqn(config, _SNOWFLAKE_MON_FEATURE_TABLE) + fq_table = _snowflake_monitoring_table_fqn(config, MON_TABLE_FEATURE) conditions = [f'"project_id" = {_snowflake_sql_literal(project)}'] if feature_view_name: conditions.append( @@ -873,111 +879,6 @@ def _get_file_names_from_copy_into(self, cursor, native_export_path) -> List[str # Snowflake monitoring SQL push-down & storage helpers # ------------------------------------------------------------------ # -_SNOWFLAKE_MON_FEATURE_TABLE = "feast_monitoring_feature_metrics" -_SNOWFLAKE_MON_VIEW_TABLE = "feast_monitoring_feature_view_metrics" -_SNOWFLAKE_MON_SERVICE_TABLE = "feast_monitoring_feature_service_metrics" - -_SNOWFLAKE_MON_FEATURE_COLUMNS = [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "feature_type", - "row_count", - "null_count", - "null_rate", - "mean", - "stddev", - "min_val", - "max_val", - "p50", - "p75", - "p90", - "p95", - "p99", - "histogram", -] -_SNOWFLAKE_MON_FEATURE_PK = [ - "project_id", - "feature_view_name", - "feature_name", - "metric_date", - "granularity", - "data_source_type", -] - -_SNOWFLAKE_MON_VIEW_COLUMNS = [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_row_count", - "total_features", - "features_with_nulls", - "avg_null_rate", - "max_null_rate", -] -_SNOWFLAKE_MON_VIEW_PK = [ - "project_id", - "feature_view_name", - "metric_date", - "granularity", - "data_source_type", -] - -_SNOWFLAKE_MON_SERVICE_COLUMNS = [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", - "computed_at", - "is_baseline", - "total_feature_views", - "total_features", - "avg_null_rate", - "max_null_rate", -] -_SNOWFLAKE_MON_SERVICE_PK = [ - "project_id", - "feature_service_name", - "metric_date", - "granularity", - "data_source_type", -] - -_EMPTY_SNOWFLAKE_NUMERIC_METRIC: Dict[str, Any] = { - "feature_type": "numeric", - "row_count": 0, - "null_count": 0, - "null_rate": 0.0, - "mean": None, - "stddev": None, - "min_val": None, - "max_val": None, - "p50": None, - "p75": None, - "p90": None, - "p95": None, - "p99": None, - "histogram": None, -} - - -def _opt_float(val: Any) -> Optional[float]: - if val is None: - return None - if isinstance(val, Decimal): - return float(val) - return float(val) - def _escape_snowflake_sql_string(value: str) -> str: return value.replace("'", "''") @@ -1028,28 +929,6 @@ def _snowflake_monitoring_table_fqn( return f'"{os.database}"."{os.schema_}"."{table_name}"' -def _snowflake_mon_table_meta(metric_type: str) -> Tuple[str, List[str], List[str]]: - if metric_type == "feature": - return ( - _SNOWFLAKE_MON_FEATURE_TABLE, - _SNOWFLAKE_MON_FEATURE_COLUMNS, - _SNOWFLAKE_MON_FEATURE_PK, - ) - if metric_type == "feature_view": - return ( - _SNOWFLAKE_MON_VIEW_TABLE, - _SNOWFLAKE_MON_VIEW_COLUMNS, - _SNOWFLAKE_MON_VIEW_PK, - ) - if metric_type == "feature_service": - return ( - _SNOWFLAKE_MON_SERVICE_TABLE, - _SNOWFLAKE_MON_SERVICE_COLUMNS, - _SNOWFLAKE_MON_SERVICE_PK, - ) - raise ValueError(f"Unknown metric_type '{metric_type}'") - - def _snowflake_sql_numeric_histogram( conn: SnowflakeConnection, from_expression: str, @@ -1133,10 +1012,7 @@ def _snowflake_sql_numeric_stats( row = cursor.fetchone() if row is None: - return [ - {**_EMPTY_SNOWFLAKE_NUMERIC_METRIC, "feature_name": n} - for n in feature_names - ] + return [empty_numeric_metric(n) for n in feature_names] row_count = row[0] results: List[Dict[str, Any]] = [] @@ -1146,8 +1022,8 @@ def _snowflake_sql_numeric_stats( non_null = row[base] or 0 null_count = row_count - non_null - min_val = _opt_float(row[base + 3]) - max_val = _opt_float(row[base + 4]) + min_val = opt_float(row[base + 3]) + max_val = opt_float(row[base + 4]) result: Dict[str, Any] = { "feature_name": col, @@ -1155,15 +1031,15 @@ def _snowflake_sql_numeric_stats( "row_count": row_count, "null_count": null_count, "null_rate": null_count / row_count if row_count > 0 else 0.0, - "mean": _opt_float(row[base + 1]), - "stddev": _opt_float(row[base + 2]), + "mean": opt_float(row[base + 1]), + "stddev": opt_float(row[base + 2]), "min_val": min_val, "max_val": max_val, - "p50": _opt_float(row[base + 5]), - "p75": _opt_float(row[base + 6]), - "p90": _opt_float(row[base + 7]), - "p95": _opt_float(row[base + 8]), - "p99": _opt_float(row[base + 9]), + "p50": opt_float(row[base + 5]), + "p75": opt_float(row[base + 6]), + "p90": opt_float(row[base + 7]), + "p95": opt_float(row[base + 8]), + "p99": opt_float(row[base + 9]), "histogram": None, } @@ -1210,11 +1086,7 @@ def _snowflake_sql_categorical_stats( rows = cursor.fetchall() if not rows: - return { - **_EMPTY_SNOWFLAKE_NUMERIC_METRIC, - "feature_name": col_name, - "feature_type": "categorical", - } + return empty_categorical_metric(col_name) row_count = rows[0][0] null_count = rows[0][1] @@ -1299,7 +1171,7 @@ def _snowflake_mon_query( start_date: Optional[date] = None, end_date: Optional[date] = None, ) -> List[Dict[str, Any]]: - table, _, _ = _snowflake_mon_table_meta(metric_type) + table, _, _ = monitoring_table_meta(metric_type) fq = f'"{offline_store.database}"."{offline_store.schema_}"."{table}"' conditions = [f'"project_id" = {_snowflake_sql_literal(project)}'] @@ -1326,19 +1198,7 @@ def _snowflake_mon_query( results: List[Dict[str, Any]] = [] for row in rows: record = dict(zip(columns, row)) - hist = record.get("histogram") - if hist is not None and isinstance(hist, str): - record["histogram"] = json.loads(hist) - md = record.get("metric_date") - if md is not None: - if isinstance(md, datetime): - record["metric_date"] = md.date().isoformat() - elif isinstance(md, date): - record["metric_date"] = md.isoformat() - ca = record.get("computed_at") - if ca is not None and isinstance(ca, datetime): - record["computed_at"] = ca.isoformat() - results.append(record) + results.append(normalize_monitoring_row(record)) return results diff --git a/sdk/python/feast/monitoring/monitoring_service.py b/sdk/python/feast/monitoring/monitoring_service.py index d131d613171..5e4a3f5016b 100644 --- a/sdk/python/feast/monitoring/monitoring_service.py +++ b/sdk/python/feast/monitoring/monitoring_service.py @@ -8,6 +8,7 @@ from feast.infra.offline_stores.offline_store import OfflineStore from feast.monitoring.dqm_job_manager import DQMJobManager from feast.monitoring.metrics_calculator import MetricsCalculator +from feast.monitoring.monitoring_utils import build_view_aggregate logger = logging.getLogger(__name__) @@ -31,9 +32,12 @@ def __init__(self, store: "FeatureStore"): # noqa: F821 self._job_manager: Optional[DQMJobManager] = None self._calculator = MetricsCalculator() self._monitoring_tables_ensured = False + self._offline_store_cache = None def _get_offline_store(self): - return self._store._get_provider().offline_store + if self._offline_store_cache is None: + self._offline_store_cache = self._store._get_provider().offline_store + return self._offline_store_cache def _ensure_monitoring_tables(self): if not self._monitoring_tables_ensured: @@ -331,13 +335,6 @@ def compute_baseline( ) now = datetime.now(timezone.utc) - offline_store = self._get_offline_store() - offline_store.clear_monitoring_baseline( - config=self._store.config, - project=project, - feature_view_name=fv.name, - ) - self._save_computed_metrics( project=project, feature_view=fv, @@ -700,88 +697,22 @@ def _compute_feature_metrics( start_dt: datetime, end_dt: datetime, ) -> List[Dict[str, Any]]: - """Compute metrics, preferring offline store SQL push-down. - - Falls back to Python-based (PyArrow/NumPy) computation when the - offline store does not implement compute_monitoring_metrics. - """ - provider = self._store._get_provider() - offline_store = provider.offline_store - try: - return offline_store.compute_monitoring_metrics( - config=self._store.config, - data_source=feature_view.batch_source, - feature_columns=feature_fields, - timestamp_field=feature_view.batch_source.timestamp_field, - start_date=start_dt, - end_date=end_dt, - histogram_bins=self._calculator.histogram_bins, - top_n=self._calculator.top_n, - ) - except NotImplementedError: - logger.debug( - "Offline store does not support compute_monitoring_metrics, " - "falling back to Python-based computation" - ) - arrow_table = self._read_batch_source( - feature_view, - feature_fields, - start_dt, - end_dt, - ) - return self._calculator.compute_all(arrow_table, feature_fields) + """Compute metrics from a feature view's batch source.""" + return self._compute_from_source( + feature_view.batch_source, + feature_view.batch_source.timestamp_field, + feature_fields, + start_dt, + end_dt, + ) def _get_max_timestamp(self, feature_view) -> Optional[datetime]: - """Query the batch source for MAX(event_timestamp). - - Prefers the offline store's native push-down; falls back to reading - the full table and computing max in Python. - """ - provider = self._store._get_provider() - offline_store = provider.offline_store - try: - return offline_store.get_monitoring_max_timestamp( - config=self._store.config, - data_source=feature_view.batch_source, - timestamp_field=feature_view.batch_source.timestamp_field, - ) - except NotImplementedError: - return self._get_max_timestamp_fallback(feature_view) - - def _get_max_timestamp_fallback(self, feature_view) -> Optional[datetime]: - """Pull all data and compute max timestamp in Python (fallback).""" - import pyarrow.compute as pc - - data_source = feature_view.batch_source - ts_field = data_source.timestamp_field - - provider = self._store._get_provider() - offline_store = provider.offline_store - - retrieval_job = offline_store.pull_all_from_table_or_query( - config=self._store.config, - data_source=data_source, - join_key_columns=self._resolve_join_key_columns(feature_view), - feature_name_columns=[], - timestamp_field=ts_field, - created_timestamp_column=data_source.created_timestamp_column, - start_date=_EPOCH, - end_date=_FAR_FUTURE, + """Query the batch source for MAX(event_timestamp).""" + return self._get_max_timestamp_for_source( + feature_view.batch_source, + feature_view.batch_source.timestamp_field, ) - table = retrieval_job.to_arrow() - if ts_field not in table.column_names or len(table) == 0: - return None - - max_val = pc.max(table.column(ts_field)).as_py() - if max_val is None: - return None - - if isinstance(max_val, datetime): - return max_val if max_val.tzinfo else max_val.replace(tzinfo=timezone.utc) - - return datetime.combine(max_val, datetime.min.time(), tzinfo=timezone.utc) - # ------------------------------------------------------------------ # # Private: shared helpers (DRY) # ------------------------------------------------------------------ # @@ -868,9 +799,6 @@ def _save_computed_metrics( offline_store.save_monitoring_metrics(config, "feature", metrics_list) - null_rates = [ - m["null_rate"] for m in metrics_list if m.get("null_rate") is not None - ] view_metric = { "project_id": project, "feature_view_name": feature_view.name, @@ -879,13 +807,7 @@ def _save_computed_metrics( "data_source_type": "batch", "computed_at": now, "is_baseline": set_baseline, - "total_row_count": metrics_list[0]["row_count"] if metrics_list else 0, - "total_features": len(metrics_list), - "features_with_nulls": sum( - 1 for m in metrics_list if (m.get("null_count") or 0) > 0 - ), - "avg_null_rate": sum(null_rates) / len(null_rates) if null_rates else 0.0, - "max_null_rate": max(null_rates) if null_rates else 0.0, + **build_view_aggregate(metrics_list), } offline_store.save_monitoring_metrics(config, "feature_view", [view_metric]) @@ -1047,9 +969,12 @@ def _resolve_log_source(self, feature_service): return data_source, LOG_TIMESTAMP_FIELD, feature_fields, log_col_map def _get_max_timestamp_for_source(self, data_source, ts_field): - """Get MAX timestamp from an arbitrary data source.""" - provider = self._store._get_provider() - offline_store = provider.offline_store + """Get MAX timestamp from an arbitrary data source. + + Prefers the offline store's native push-down; falls back to reading + the table and computing max in Python. + """ + offline_store = self._get_offline_store() try: return offline_store.get_monitoring_max_timestamp( config=self._store.config, @@ -1057,18 +982,10 @@ def _get_max_timestamp_for_source(self, data_source, ts_field): timestamp_field=ts_field, ) except NotImplementedError: - return self._get_max_timestamp_for_source_fallback( - data_source, - ts_field, - ) + pass - def _get_max_timestamp_for_source_fallback(self, data_source, ts_field): - """Pull data and compute max timestamp in Python (fallback).""" import pyarrow.compute as pc - provider = self._store._get_provider() - offline_store = provider.offline_store - retrieval_job = offline_store.pull_all_from_table_or_query( config=self._store.config, data_source=data_source, @@ -1099,9 +1016,11 @@ def _compute_from_source( start_dt: datetime, end_dt: datetime, ) -> List[Dict[str, Any]]: - """Compute metrics from an arbitrary data source (batch or log).""" - provider = self._store._get_provider() - offline_store = provider.offline_store + """Compute metrics from an arbitrary data source (batch or log). + + Prefers SQL push-down; falls back to Python-based computation. + """ + offline_store = self._get_offline_store() try: return offline_store.compute_monitoring_metrics( config=self._store.config, @@ -1175,37 +1094,23 @@ def _save_log_metrics( for m in metrics_list: by_view[m["feature_view_name"]].append(m) - view_metrics = [] - for vname, vmetrics in by_view.items(): - null_rates = [ - m["null_rate"] for m in vmetrics if m.get("null_rate") is not None - ] - view_metrics.append( - { - "project_id": project, - "feature_view_name": vname, - "metric_date": metric_date, - "granularity": granularity, - "data_source_type": "log", - "computed_at": now, - "is_baseline": set_baseline, - "total_row_count": vmetrics[0]["row_count"] if vmetrics else 0, - "total_features": len(vmetrics), - "features_with_nulls": sum( - 1 for m in vmetrics if (m.get("null_count") or 0) > 0 - ), - "avg_null_rate": ( - sum(null_rates) / len(null_rates) if null_rates else 0.0 - ), - "max_null_rate": max(null_rates) if null_rates else 0.0, - } - ) + view_metrics = [ + { + "project_id": project, + "feature_view_name": vname, + "metric_date": metric_date, + "granularity": granularity, + "data_source_type": "log", + "computed_at": now, + "is_baseline": set_baseline, + **build_view_aggregate(vmetrics), + } + for vname, vmetrics in by_view.items() + ] offline_store.save_monitoring_metrics(config, "feature_view", view_metrics) # --- feature service aggregate --- - all_null_rates = [ - m["null_rate"] for m in metrics_list if m.get("null_rate") is not None - ] + svc_agg = build_view_aggregate(metrics_list) svc_metric = { "project_id": project, "feature_service_name": feature_service_name, @@ -1215,20 +1120,16 @@ def _save_log_metrics( "computed_at": now, "is_baseline": set_baseline, "total_feature_views": len(by_view), - "total_features": len(metrics_list), - "avg_null_rate": ( - sum(all_null_rates) / len(all_null_rates) if all_null_rates else 0.0 - ), - "max_null_rate": max(all_null_rates) if all_null_rates else 0.0, + "total_features": svc_agg["total_features"], + "avg_null_rate": svc_agg["avg_null_rate"], + "max_null_rate": svc_agg["max_null_rate"], } offline_store.save_monitoring_metrics(config, "feature_service", [svc_metric]) def _read_batch_source(self, feature_view, feature_fields, start_dt, end_dt): config = self._store.config data_source = feature_view.batch_source - - provider = self._store._get_provider() - offline_store = provider.offline_store + offline_store = self._get_offline_store() retrieval_job = offline_store.pull_all_from_table_or_query( config=config, @@ -1264,7 +1165,7 @@ def _compute_feature_service_metrics( for fs in feature_services: try: - fv_names = [proj.name for proj in fs.feature_view_projections] + fv_names = {proj.name for proj in fs.feature_view_projections} for metric_date in metric_dates: fv_metrics = offline_store.query_monitoring_metrics( From 52576c2e8923edc9ed5756989c1399dcc952531b Mon Sep 17 00:00:00 2001 From: Jitendra Yejare <11752425+jyejare@users.noreply.github.com> Date: Tue, 21 Apr 2026 13:03:33 +0530 Subject: [PATCH 06/12] docs: Monitoring User Guide Signed-off-by: Jitendra Yejare <11752425+jyejare@users.noreply.github.com> --- .secrets.baseline | 23 + docs/SUMMARY.md | 2 + docs/how-to-guides/feature-monitoring.md | 368 +++++ .../monitoring/monitoring-quickstart.ipynb | 1256 +++++++++++++++++ .../contrib/oracle_offline_store/oracle.py | 2 +- .../postgres_offline_store/postgres.py | 2 +- .../feast/infra/offline_stores/snowflake.py | 2 +- 7 files changed, 1652 insertions(+), 3 deletions(-) create mode 100644 docs/how-to-guides/feature-monitoring.md create mode 100644 examples/monitoring/monitoring-quickstart.ipynb diff --git a/.secrets.baseline b/.secrets.baseline index e0030466f1f..dd65312d57b 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -136,6 +136,29 @@ } ], "results": { + "examples/monitoring/monitoring-quickstart.ipynb": [ + { + "type": "Base64 High Entropy String", + "filename": "examples/monitoring/monitoring-quickstart.ipynb", + "hashed_secret": "8d921d6d629bc22832e5fae42dfc828b8ce5cf47", + "is_verified": false, + "line_number": 606 + }, + { + "type": "Base64 High Entropy String", + "filename": "examples/monitoring/monitoring-quickstart.ipynb", + "hashed_secret": "37b47d0b2461457e316f1b0be0eef0f9599d440d", + "is_verified": false, + "line_number": 780 + }, + { + "type": "Base64 High Entropy String", + "filename": "examples/monitoring/monitoring-quickstart.ipynb", + "hashed_secret": "be6715cc8d40a964c7bd1fd8eff5e840d61ad598", + "is_verified": false, + "line_number": 875 + } + ], ".github/workflows/publish.yml": [ { "type": "Secret Keyword", diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 1b0b0961d79..ab1d5a80e1b 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -57,6 +57,7 @@ * [MCP - AI Agent Example](../examples/mcp_feature_store/README.md) * [Feast-Powered AI Agent](../examples/agent_feature_store/README.md) * [Demo Notebooks](tutorials/demo-notebooks.md) +* [Feature Quality Monitoring Quickstart](../examples/monitoring/monitoring-quickstart.ipynb) ## How-to Guides @@ -90,6 +91,7 @@ * [Adding or reusing tests](how-to-guides/adding-or-reusing-tests.md) * [Starting Feast servers in TLS(SSL) Mode](how-to-guides/starting-feast-servers-tls-mode.md) * [Importing Features from dbt](how-to-guides/dbt-integration.md) +* [Feature Quality Monitoring](how-to-guides/feature-monitoring.md) ## Reference diff --git a/docs/how-to-guides/feature-monitoring.md b/docs/how-to-guides/feature-monitoring.md new file mode 100644 index 00000000000..aca9f1abf45 --- /dev/null +++ b/docs/how-to-guides/feature-monitoring.md @@ -0,0 +1,368 @@ +# Feature Quality Monitoring + +## Overview + +Feast's data quality monitoring system computes, stores, and serves statistical metrics for every registered feature. It gives you visibility into feature health — distributions, null rates, percentiles, histograms — across batch data and feature serving logs. + +This guide covers: + +1. [Prerequisites](#1-prerequisites) +2. [Auto-baseline on registration](#2-auto-baseline-on-registration) +3. [Scheduled monitoring with the CLI](#3-scheduled-monitoring-with-the-cli) +4. [Monitoring feature serving logs](#4-monitoring-feature-serving-logs) +5. [Reading metrics via REST API](#5-reading-metrics-via-rest-api) +6. [On-demand exploration (transient compute)](#6-on-demand-exploration) +7. [Integrating with orchestrators](#7-integrating-with-orchestrators) +8. [Supported backends](#8-supported-backends) + +## 1. Prerequisites + +Monitoring works with any supported offline store backend. No additional infrastructure or configuration is needed — monitoring tables are created automatically on first use. + +**Minimum setup:** + +- A Feast project with at least one feature view and a configured offline store +- Feast SDK installed (`pip install feast`) + +**For serving log monitoring:** + +- At least one feature service with `logging_config` set (see [step 4](#4-monitoring-feature-serving-logs)) + +## 2. Auto-baseline on registration + +When you run `feast apply` to register new features, Feast automatically queues baseline metric computation: + +```bash +$ feast apply +Applying changes... +Created feature view 'driver_stats' with 3 features + → Queued baseline metrics computation (DQM job: abc-123) +Done! +``` + +The baseline reads all available source data and stores the resulting statistics with `is_baseline=TRUE`. This serves as the reference distribution for future drift detection. + +Baseline computation is: +- **Non-blocking** — `feast apply` returns immediately; computation runs asynchronously +- **Idempotent** — only features without existing baselines are computed; re-running `feast apply` won't recompute existing baselines + +## 3. Scheduled monitoring with the CLI + +### Auto mode (recommended for production) + +Schedule a single daily job that computes all granularities automatically: + +```bash +feast monitor run +``` + +This detects the latest event timestamp in the source data and computes metrics for 5 time windows: + +| Granularity | Window | +|-------------|--------| +| `daily` | Last 1 day | +| `weekly` | Last 7 days | +| `biweekly` | Last 14 days | +| `monthly` | Last 30 days | +| `quarterly` | Last 90 days | + +No date arguments needed. One scheduled job produces all granularities. + +### Targeting a specific feature view + +```bash +feast monitor run --feature-view driver_stats +``` + +### Explicit date range and granularity + +```bash +feast monitor run \ + --feature-view driver_stats \ + --start-date 2025-01-01 \ + --end-date 2025-01-07 \ + --granularity weekly +``` + +### Setting a manual baseline + +```bash +feast monitor run \ + --feature-view driver_stats \ + --start-date 2025-01-01 \ + --end-date 2025-03-31 \ + --granularity daily \ + --set-baseline +``` + +### CLI reference + +``` +Usage: feast monitor run [OPTIONS] + +Options: + -p, --project TEXT Feast project name (defaults to feature_store.yaml) + -v, --feature-view TEXT Feature view name (omit for all) + -f, --feature-name TEXT Feature name(s), repeatable (omit for all) + --start-date TEXT Start date YYYY-MM-DD (omit for auto-detect) + --end-date TEXT End date YYYY-MM-DD (omit for auto-detect) + -g, --granularity One of: daily, weekly, biweekly, monthly, quarterly + --set-baseline Mark this computation as baseline + --source-type One of: batch, log, all (default: batch) + --help Show this message and exit. +``` + +## 4. Monitoring feature serving logs + +If your feature services have logging configured, you can compute metrics from the actual features served to models in production. + +### Setting up feature service logging + +In your feature definitions: + +```python +from feast import FeatureService, LoggingConfig +from feast.infra.offline_stores.contrib.postgres_offline_store.postgres_source import ( + PostgreSQLLoggingDestination, +) + +driver_service = FeatureService( + name="driver_service", + features=[driver_stats_fv], + logging_config=LoggingConfig( + destination=PostgreSQLLoggingDestination(table_name="feast_driver_logs"), + sample_rate=1.0, + ), +) +``` + +### Computing log metrics + +**Auto mode (all feature services with logging):** + +```bash +feast monitor run --source-type log +``` + +**Specific feature service:** + +```bash +feast monitor run --source-type log --feature-view driver_service +``` + +**Both batch and log in one run:** + +```bash +feast monitor run --source-type all +``` + +Log metrics are stored with `data_source_type="log"` alongside batch metrics in the same monitoring tables. Feature names from the log schema (e.g., `driver_stats__conv_rate`) are automatically normalized back to their original names (`conv_rate`) and associated with the correct feature view — enabling batch-vs-log comparison and drift detection. + +### Via REST API + +```bash +# Compute log metrics +POST /monitoring/compute/log +{ + "project": "my_project", + "feature_service_name": "driver_service", + "granularity": "daily" +} + +# Auto-compute all log metrics +POST /monitoring/auto_compute/log +{ + "project": "my_project" +} +``` + +## 5. Reading metrics via REST API + +All read endpoints support cascading filters: `project` → `feature_service_name` → `feature_view_name` → `feature_name` → `granularity` → `data_source_type`. + +### Per-feature metrics + +``` +GET /monitoring/metrics/features?project=my_project&feature_view_name=driver_stats&granularity=daily +``` + +**Response:** + +```json +[ + { + "project_id": "my_project", + "feature_view_name": "driver_stats", + "feature_name": "conv_rate", + "feature_type": "numeric", + "metric_date": "2025-03-26", + "granularity": "daily", + "data_source_type": "batch", + "row_count": 15000, + "null_count": 12, + "null_rate": 0.0008, + "mean": 0.523, + "stddev": 0.189, + "min_val": 0.001, + "max_val": 0.998, + "p50": 0.51, + "p75": 0.68, + "p90": 0.82, + "p95": 0.89, + "p99": 0.96, + "histogram": { + "bins": [0.0, 0.05, 0.1, "..."], + "counts": [120, 340, 560, "..."], + "bin_width": 0.05 + } + } +] +``` + +### Per-feature-view aggregates + +``` +GET /monitoring/metrics/feature_views?project=my_project&feature_view_name=driver_stats +``` + +### Per-feature-service aggregates + +``` +GET /monitoring/metrics/feature_services?project=my_project&feature_service_name=driver_service +``` + +### Baseline + +``` +GET /monitoring/metrics/baseline?project=my_project&feature_view_name=driver_stats +``` + +### Time-series (for trend charts) + +``` +GET /monitoring/metrics/timeseries?project=my_project&feature_name=conv_rate&granularity=daily&start_date=2025-01-01&end_date=2025-03-31 +``` + +### Filtering batch vs. log metrics + +Add `data_source_type=batch` or `data_source_type=log` to any read endpoint: + +``` +GET /monitoring/metrics/features?project=my_project&data_source_type=log +``` + +### Full endpoint reference + +| Method | Endpoint | Description | +|--------|----------|-------------| +| `POST` | `/monitoring/compute` | Submit batch DQM job | +| `POST` | `/monitoring/auto_compute` | Auto-detect dates, all granularities | +| `POST` | `/monitoring/compute/transient` | On-demand compute (not stored) | +| `POST` | `/monitoring/compute/log` | Compute from serving logs | +| `POST` | `/monitoring/auto_compute/log` | Auto-detect log dates, all granularities | +| `GET` | `/monitoring/jobs/{job_id}` | DQM job status | +| `GET` | `/monitoring/metrics/features` | Per-feature metrics | +| `GET` | `/monitoring/metrics/feature_views` | Per-view aggregates | +| `GET` | `/monitoring/metrics/feature_services` | Per-service aggregates | +| `GET` | `/monitoring/metrics/baseline` | Baseline metrics | +| `GET` | `/monitoring/metrics/timeseries` | Time-series data | + +## 6. On-demand exploration + +When you need metrics for an arbitrary date range (e.g., "show me the distribution for Jan 5 to Jan 20"), use the transient compute endpoint. It reads source data for the exact range, computes fresh statistics, and returns them directly without storing. + +```bash +POST /monitoring/compute/transient +{ + "project": "my_project", + "feature_view_name": "driver_stats", + "feature_names": ["conv_rate"], + "start_date": "2025-01-05", + "end_date": "2025-01-20" +} +``` + +This is necessary because pre-computed histograms from different date ranges have different bin edges and cannot be merged losslessly. + +## 7. Integrating with orchestrators + +### Airflow + +```python +from airflow.operators.bash import BashOperator + +monitor_task = BashOperator( + task_id="feast_monitor", + bash_command="feast monitor run", + cwd="/path/to/feast/repo", +) +``` + +### Kubeflow Pipelines (KFP) + +```python +from kfp import dsl + +@dsl.component(base_image="feast-image:latest") +def monitor_features(): + import subprocess + subprocess.run(["feast", "monitor", "run"], check=True, cwd="/feast/repo") +``` + +### Cron + +```cron +# Daily at 2:00 AM UTC +0 2 * * * cd /path/to/feast/repo && feast monitor run >> /var/log/feast-monitor.log 2>&1 +``` + +### Monitoring both batch and log in one job + +```bash +feast monitor run --source-type all +``` + +## 8. Supported backends + +Monitoring works natively with all offline stores that serve as compute engines for Feast materialization: + +| Backend | Compute | Storage | +|---------|---------|---------| +| PostgreSQL | SQL push-down | `INSERT ON CONFLICT` | +| Snowflake | SQL push-down | `MERGE` with `VARIANT` JSON | +| BigQuery | SQL push-down | `MERGE` into BQ tables | +| Redshift | SQL push-down | `MERGE` via Data API | +| Spark | SparkSQL push-down | Parquet tables | +| Oracle | SQL via Ibis | `MERGE` from `DUAL` | +| DuckDB | In-memory SQL | Parquet files | +| Dask | PyArrow compute | Parquet files | + +Backends not listed above fall back to Python-based computation — the offline store's `pull_all_from_table_or_query()` returns a PyArrow Table, and metrics are computed using `pyarrow.compute` and `numpy`. + +## What metrics are computed + +**Per-feature (full profile):** + +| Metric | Numeric | Categorical | +|--------|:-------:|:-----------:| +| row_count, null_count, null_rate | Yes | Yes | +| mean, stddev, min, max | Yes | — | +| p50, p75, p90, p95, p99 | Yes | — | +| histogram (JSONB) | Binned distribution | Top-N values with counts | + +**Per-feature-view and per-feature-service (aggregate summaries):** + +| Metric | Description | +|--------|-------------| +| total_row_count | Total rows in the view | +| total_features | Number of features | +| features_with_nulls | Count of features with any nulls | +| avg_null_rate, max_null_rate | Aggregate null rate statistics | + +## RBAC + +Monitoring respects Feast's existing RBAC: + +- **Compute operations** (`POST /monitoring/compute`, `/auto_compute`, `/compute/log`, `/auto_compute/log`) require `AuthzedAction.UPDATE` +- **Transient compute** (`POST /monitoring/compute/transient`) requires `AuthzedAction.DESCRIBE` +- **Read operations** (`GET /monitoring/metrics/*`) require `AuthzedAction.DESCRIBE` diff --git a/examples/monitoring/monitoring-quickstart.ipynb b/examples/monitoring/monitoring-quickstart.ipynb new file mode 100644 index 00000000000..02739c96b0a --- /dev/null +++ b/examples/monitoring/monitoring-quickstart.ipynb @@ -0,0 +1,1256 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Feature Quality Monitoring — Quickstart\n", + "\n", + "This notebook walks you through Feast's data quality monitoring end-to-end:\n", + "\n", + "1. Set up a feature store with a PostgreSQL offline store\n", + "2. Register features and trigger baseline computation\n", + "3. Compute metrics across multiple granularities\n", + "4. Read metrics via the Python SDK and REST API\n", + "5. Set up serving log monitoring\n", + "6. Use on-demand exploration for custom date ranges\n", + "\n", + "**Prerequisites:** A running PostgreSQL instance and `feast[postgres]` installed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Install Feast" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "!uv pip install -q 'feast[postgres]'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Configure the Feature Store\n", + "\n", + "Create a minimal `feature_store.yaml` with a PostgreSQL offline store." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Working directory: /var/folders/cn/z7vz24yj25d8fjqdrs9jbsh00000gn/T/feast_monitoring_demo_kze7m3sk\n" + ] + } + ], + "source": [ + "import os\n", + "import tempfile\n", + "\n", + "REPO_DIR = tempfile.mkdtemp(prefix=\"feast_monitoring_demo_\")\n", + "os.makedirs(REPO_DIR, exist_ok=True)\n", + "print(f\"Working directory: {REPO_DIR}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "feature_store.yaml written.\n" + ] + } + ], + "source": [ + "# Adjust these to match your PostgreSQL instance\n", + "PG_HOST = os.environ.get(\"FEAST_PG_HOST\", \"localhost\")\n", + "PG_PORT = os.environ.get(\"FEAST_PG_PORT\", \"5432\")\n", + "PG_DB = os.environ.get(\"FEAST_PG_DB\", \"feast\")\n", + "PG_USER = os.environ.get(\"FEAST_PG_USER\", \"feast\")\n", + "PG_PASS = os.environ.get(\"FEAST_PG_PASS\", \"feast\")\n", + "\n", + "PG_SSLMODE = os.environ.get(\"FEAST_PG_SSLMODE\", \"disable\")\n", + "\n", + "feature_store_yaml = f\"\"\"\n", + "project: monitoring_demo\n", + "registry:\n", + " registry_type: sql\n", + " path: postgresql://{PG_USER}:{PG_PASS}@{PG_HOST}:{PG_PORT}/{PG_DB}?sslmode={PG_SSLMODE}\n", + "provider: local\n", + "offline_store:\n", + " type: postgres\n", + " host: {PG_HOST}\n", + " port: {PG_PORT}\n", + " database: {PG_DB}\n", + " user: {PG_USER}\n", + " password: {PG_PASS}\n", + " sslmode: {PG_SSLMODE}\n", + "online_store:\n", + " type: sqlite\n", + " path: {REPO_DIR}/online_store.db\n", + "entity_key_serialization_version: 3\n", + "\"\"\"\n", + "\n", + "with open(os.path.join(REPO_DIR, \"feature_store.yaml\"), \"w\") as f:\n", + " f.write(feature_store_yaml)\n", + "\n", + "print(\"feature_store.yaml written.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Create Sample Data and Feature Definitions" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample data: 5000 rows, 60 days\n" + ] + }, + { + "data": { + "text/html": [ + "
| \n", + " | driver_id | \n", + "event_timestamp | \n", + "conv_rate | \n", + "acc_rate | \n", + "avg_daily_trips | \n", + "vehicle_type | \n", + "created | \n", + "
|---|---|---|---|---|---|---|---|
| 0 | \n", + "1148 | \n", + "2025-02-08 | \n", + "0.348307 | \n", + "0.794390 | \n", + "14 | \n", + "compact | \n", + "2025-02-08 | \n", + "
| 1 | \n", + "1539 | \n", + "2025-02-21 | \n", + "0.305945 | \n", + "0.749046 | \n", + "25 | \n", + "van | \n", + "2025-02-21 | \n", + "
| 2 | \n", + "1487 | \n", + "2025-01-29 | \n", + "0.791641 | \n", + "0.784492 | \n", + "17 | \n", + "sedan | \n", + "2025-01-29 | \n", + "
| 3 | \n", + "1821 | \n", + "2025-01-15 | \n", + "0.267308 | \n", + "0.726226 | \n", + "17 | \n", + "sedan | \n", + "2025-01-15 | \n", + "
| 4 | \n", + "1437 | \n", + "2025-02-12 | \n", + "0.544618 | \n", + "0.729568 | \n", + "11 | \n", + "suv | \n", + "2025-02-12 | \n", + "
+ No monitoring metrics available for this feature. Run a + monitoring compute job to generate data quality metrics. +
+ } + actions={ +Histogram data is not available.
} + /> + )} ++ No monitoring metrics found for feature{" "} + {featureName} in feature view{" "} + {featureViewName}. Run a monitoring + compute job first. +
+ } + actions={ +Histogram data is not available for this metric.
} + /> + )} ++ Aggregated data quality metrics across feature services. +
++ {description} +
+ )} ++ Could not connect to the monitoring API. Make sure the Feast + registry server is running with monitoring enabled. +
+
+ No monitoring data has been computed for this project. Click
+ "Compute Metrics" to run data quality analysis on your
+ feature views, or use the CLI:{" "}
+ feast monitor run --data-source batch
+
+ Data quality metrics have been computed. The table above has + been refreshed. +
+{(computeMutation.error as Error)?.message}
+