apache
diff --git a/‎NOTICE‎
Lines changed: 9 additions & 0 deletions b/‎NOTICE‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎integration-test/src/test/java/org/apache/iotdb/ainode/utils/AINodeTestUtils.java‎
Lines changed: 3 additions & 1 deletion b/‎integration-test/src/test/java/org/apache/iotdb/ainode/utils/AINodeTestUtils.java‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎iotdb-core/ainode/build_binary.py‎
Lines changed: 5 additions & 3 deletions b/‎iotdb-core/ainode/build_binary.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎iotdb-core/ainode/iotdb/ainode/core/model/model_info.py‎
Lines changed: 13 additions & 0 deletions b/‎iotdb-core/ainode/iotdb/ainode/core/model/model_info.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/iotdb/ainode/core/model/toto/__init__.py‎
Lines changed: 17 additions & 0 deletions b/‎iotdb-core/ainode/iotdb/ainode/core/model/toto/__init__.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/iotdb/ainode/core/model/toto/configuration_toto.py‎
Lines changed: 78 additions & 0 deletions b/‎iotdb-core/ainode/iotdb/ainode/core/model/toto/configuration_toto.py‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/iotdb/ainode/core/model/toto/data/__init__.py‎
Lines changed: 20 additions & 0 deletions b/‎iotdb-core/ainode/iotdb/ainode/core/model/toto/data/__init__.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/iotdb/ainode/core/model/toto/data/util/__init__.py‎
Lines changed: 20 additions & 0 deletions b/‎iotdb-core/ainode/iotdb/ainode/core/model/toto/data/util/__init__.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/iotdb/ainode/core/model/toto/data/util/dataset.py‎
Lines changed: 127 additions & 0 deletions b/‎iotdb-core/ainode/iotdb/ainode/core/model/toto/data/util/dataset.py‎
Lines changed: 127 additions & 0 deletions
diff --git a/‎iotdb-core/ainode/iotdb/ainode/core/model/toto/inference/__init__.py‎
Lines changed: 20 additions & 0 deletions b/‎iotdb-core/ainode/iotdb/ainode/core/model/toto/inference/__init__.py‎
Lines changed: 20 additions & 0 deletions
@@ -17,6 +17,15 @@ grant the users the right to the use of patent under the requirement of Apache 2
 
 ============================================================================
 
+This product includes source code derived from the DataDog/toto project:
+
+  Toto – Timeseries-Optimized Transformer for Observability
+  Copyright 2025 Datadog, Inc.
+  Licensed under the Apache License, Version 2.0
+  https://github.com/DataDog/toto
+
+============================================================================
+
 Apache Commons Collections
 Copyright 2001-2019 The Apache Software Foundation
 
 
@@ -58,7 +58,9 @@ public class AINodeTestUtils {
               new AbstractMap.SimpleEntry<>(
                   "chronos2", new FakeModelInfo("chronos2", "t5", "builtin", "active")),
               new AbstractMap.SimpleEntry<>(
-                  "moirai2", new FakeModelInfo("moirai2", "moirai", "builtin", "active")))
+                  "moirai2", new FakeModelInfo("moirai2", "moirai", "builtin", "active")),
+              new AbstractMap.SimpleEntry<>(
+                  "toto", new FakeModelInfo("toto", "toto", "builtin", "active")))
           .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
 
   public static final Map<String, FakeModelInfo> BUILTIN_MODEL_MAP;
 
@@ -423,26 +423,28 @@ def verify_poetry_env():
         [str(poetry_exe), "lock"],
         cwd=str(script_dir),
         env=venv_env,
-        check=True,
+        check=False,
         capture_output=True,
         text=True,
     )
     if result.stdout:
         print(result.stdout)
     if result.stderr:
         print(result.stderr)
+    if result.returncode != 0:
+        print(f"ERROR: poetry lock failed with exit code {result.returncode}")
+        sys.exit(1)
     verify_poetry_env()  # Verify after lock
 
     accelerator = detect_accelerator()
     print(f"Selected accelerator: {accelerator}")
 
     print("Running poetry install...")
     subprocess.run(
-        [str(poetry_exe), "lock"],
+        [str(poetry_exe), "install", "--no-root"],
         cwd=str(script_dir),
         env=venv_env,
         check=True,
-        capture_output=True,
         text=True,
     )
     verify_poetry_env()  # Verify before install
 
@@ -160,4 +160,17 @@ def __repr__(self):
         },
         transformers_registered=True,
     ),
+    "toto": ModelInfo(
+        model_id="toto",
+        category=ModelCategory.BUILTIN,
+        state=ModelStates.INACTIVE,
+        model_type="toto",
+        pipeline_cls="pipeline_toto.TotoPipeline",
+        repo_id="Datadog/Toto-Open-Base-1.0",
+        auto_map={
+            "AutoConfig": "configuration_toto.TotoConfig",
+            "AutoModelForCausalLM": "modeling_toto.TotoForPrediction",
+        },
+        transformers_registered=True,
+    ),
 }
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from typing import List, Optional
+
+from transformers import PretrainedConfig
+
+
+class TotoConfig(PretrainedConfig):
+    """
+    Configuration class for the Toto time series forecasting model.
+
+    Toto (Time Series Optimized Transformer for Observability) is a foundation model
+    for multivariate time series forecasting developed by Datadog. It uses a decoder-only
+    architecture with per-variate patch-based causal scaling, proportional time-variate
+    factorized attention, and a Student-T mixture prediction head.
+
+    Reference: https://github.com/DataDog/toto
+    """
+
+    model_type = "toto"
+
+    def __init__(
+        self,
+        patch_size: int = 32,
+        stride: int = 32,
+        embed_dim: int = 1024,
+        num_layers: int = 18,
+        num_heads: int = 16,
+        mlp_hidden_dim: int = 2816,
+        dropout: float = 0.0,
+        spacewise_every_n_layers: int = 3,
+        scaler_cls: str = "per_variate_causal",
+        output_distribution_classes: Optional[List[str]] = None,
+        output_distribution_kwargs: Optional[dict] = None,
+        spacewise_first: bool = True,
+        use_memory_efficient_attention: bool = True,
+        stabilize_with_global: bool = True,
+        scale_factor_exponent: float = 10.0,
+        **kwargs,
+    ):
+        self.patch_size = patch_size
+        self.stride = stride
+        self.embed_dim = embed_dim
+        self.num_layers = num_layers
+        self.num_heads = num_heads
+        self.mlp_hidden_dim = mlp_hidden_dim
+        self.dropout = dropout
+        self.spacewise_every_n_layers = spacewise_every_n_layers
+        self.scaler_cls = scaler_cls
+        self.output_distribution_classes = output_distribution_classes or [
+            "student_t_mixture"
+        ]
+        # k_components=5 is the default used by Datadog/Toto-Open-Base-1.0
+        self.output_distribution_kwargs = output_distribution_kwargs or {
+            "k_components": 5
+        }
+        self.spacewise_first = spacewise_first
+        self.use_memory_efficient_attention = use_memory_efficient_attention
+        self.stabilize_with_global = stabilize_with_global
+        self.scale_factor_exponent = scale_factor_exponent
+
+        super().__init__(**kwargs)
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# This file includes code derived from DataDog/toto
+# (https://github.com/DataDog/toto), licensed under the Apache-2.0 License.
+# Copyright 2025 Datadog, Inc.
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# This file includes code derived from DataDog/toto
+# (https://github.com/DataDog/toto), licensed under the Apache-2.0 License.
+# Copyright 2025 Datadog, Inc.
@@ -0,0 +1,127 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# This file includes code derived from DataDog/toto
+# (https://github.com/DataDog/toto), licensed under the Apache-2.0 License.
+# Copyright 2025 Datadog, Inc.
+
+from functools import reduce
+from typing import NamedTuple
+
+import numpy as np
+import torch
+import torch.utils.data
+from einops import repeat
+from jaxtyping import Bool, Float, Int, Shaped
+
+
+def pad_array(
+    values: Shaped[torch.Tensor, "*batch variates series_len"],  # noqa: F722
+    patch_stride: int,
+) -> Shaped[torch.Tensor, "*batch variates padded_length"]:  # noqa: F722
+    """
+    Makes sure that the series length is divisible by the patch_stride
+    by adding left-padding.
+    """
+    if isinstance(values, np.ndarray):
+        values = torch.from_numpy(values)
+    series_len = values.shape[-1]
+    padded_length = int(np.ceil(series_len / patch_stride) * patch_stride)
+    if values.ndim == 2:
+        padded_values = torch.zeros((values.shape[0], padded_length), dtype=values.dtype, device=values.device)
+    elif values.ndim == 3:
+        padded_values = torch.zeros(
+            (values.shape[0], values.shape[1], padded_length),
+            dtype=values.dtype,
+            device=values.device,
+        )
+    else:
+        raise ValueError(f"Unsupported number of dimensions: {values.ndim}")
+    padded_values[..., -series_len:] = values
+
+    return padded_values
+
+
+def pad_id_mask(
+    id_mask: Int[torch.Tensor, "*batch variates series_len"],  # noqa: F722
+    patch_stride: int,
+) -> Int[torch.Tensor, "*batch variates padded_length"]:  # noqa: F722
+    """
+    Makes sure that the series length is divisible by the patch_stride
+    by adding left-padding to the id mask.
+    """
+    series_len = id_mask.shape[-1]
+    padded_length = int(np.ceil(series_len / patch_stride) * patch_stride)
+    padding_amount = padded_length - series_len
+    left_edge: Int[torch.Tensor, "*batch variates"] = id_mask[..., 0]  # noqa: F722
+    if id_mask.ndim == 2:
+        padding = repeat(
+            left_edge,
+            "variates -> variates padding_amount",
+            padding_amount=padding_amount,
+        )
+        id_mask = torch.cat([padding, id_mask], dim=1)
+    elif id_mask.ndim == 3:
+        padding = repeat(
+            left_edge,
+            "batch variates -> batch variates padding_amount",
+            padding_amount=padding_amount,
+        )
+        id_mask = torch.cat([padding, id_mask], dim=2)
+    else:
+        raise ValueError(f"Unsupported number of dimensions: {id_mask.ndim}")
+
+    return id_mask
+
+
+class MaskedTimeseries(NamedTuple):
+    series: Float[torch.Tensor, "*batch variates series_len"]  # noqa: F722
+    padding_mask: Bool[torch.Tensor, "*batch variates series_len"]  # noqa: F722
+    id_mask: Int[torch.Tensor, "*batch variates #series_len"]  # noqa: F722
+    timestamp_seconds: Int[torch.Tensor, "*batch variates series_len"]  # noqa: F722
+    time_interval_seconds: Int[torch.Tensor, "*batch variates"]  # noqa: F722
+    num_exogenous_variables: int = 0
+
+    def to(self, device: torch.device) -> "MaskedTimeseries":
+        return MaskedTimeseries(
+            series=self.series.to(device),
+            padding_mask=self.padding_mask.to(device),
+            id_mask=self.id_mask.to(device),
+            timestamp_seconds=self.timestamp_seconds.to(device),
+            time_interval_seconds=self.time_interval_seconds.to(device),
+            num_exogenous_variables=self.num_exogenous_variables,
+        )
+
+
+def is_extreme_value(t: torch.Tensor) -> torch.Tensor:
+    if torch.is_floating_point(t):
+        max_value = torch.finfo(t.dtype).max
+    else:
+        max_value = torch.iinfo(t.dtype).max
+
+    return reduce(
+        torch.logical_or,
+        (
+            torch.isinf(t),
+            torch.isnan(t),
+            t.abs() >= max_value / 2,
+        ),
+    )
+
+
+def replace_extreme_values(t: torch.Tensor, replacement: float = 0.0) -> torch.Tensor:
+    return torch.where(is_extreme_value(t), torch.tensor(replacement, dtype=t.dtype, device=t.device), t)
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# This file includes code derived from DataDog/toto
+# (https://github.com/DataDog/toto), licensed under the Apache-2.0 License.
+# Copyright 2025 Datadog, Inc.