apache · HTHou · Apr 9, 2026
diff --git a/python/examples/example.py b/python/examples/example.py
@@ -19,7 +19,14 @@
 
 from tsfile import ColumnSchema, TableSchema
 from tsfile import Tablet
-from tsfile import TsFileTableWriter, TsFileReader, TSDataType, TSEncoding, Compressor, ColumnCategory
+from tsfile import (
+    TsFileTableWriter,
+    TsFileReader,
+    TSDataType,
+    TSEncoding,
+    Compressor,
+    ColumnCategory,
+)
 
 ##  Write
 table_data_dir = os.path.join(os.path.dirname(__file__), "table_data.tsfile")
@@ -36,9 +43,10 @@
 with TsFileTableWriter(table_data_dir, table_schema) as writer:
     tablet_row_num = 100
     tablet = Tablet(
-                    ["id", "id2", "value"],
-                    [TSDataType.STRING, TSDataType.STRING, TSDataType.FLOAT],
-                    tablet_row_num)
+        ["id", "id2", "value"],
+        [TSDataType.STRING, TSDataType.STRING, TSDataType.FLOAT],
+        tablet_row_num,
+    )
 
     for i in range(tablet_row_num):
         tablet.add_timestamp(i, i * 10)
@@ -57,4 +65,3 @@
             print(result.get_value_by_name("id2"))
             print(result.get_value_by_name("value"))
             print(result.read_data_frame())
-
diff --git a/python/pom.xml b/python/pom.xml
@@ -36,6 +36,36 @@
     <build>
         <sourceDirectory>${project.basedir}</sourceDirectory>
         <plugins>
+            <plugin>
+                <groupId>com.diffplug.spotless</groupId>
+                <artifactId>spotless-maven-plugin</artifactId>
+                <version>${spotless.version}</version>
+                <configuration>
+                    <python>
+                        <includes>
+                            <include>examples/**/*.py</include>
+                            <include>tests/**/*.py</include>
+                            <include>tsfile/**/*.py</include>
+                            <include>setup.py</include>
+                        </includes>
+                        <black>
+                            <version>24.10.0</version>
+                            <pathToExe>${project.basedir}/${python.venv.bin}black</pathToExe>
+                        </black>
+                    </python>
+                    <lineEndings>UNIX</lineEndings>
+                    <skip>${spotless.skip}</skip>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>spotless-check</id>
+                        <goals>
+                            <goal>check</goal>
+                        </goals>
+                        <phase>validate</phase>
+                    </execution>
+                </executions>
+            </plugin>
             <plugin>
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>exec-maven-plugin</artifactId>

diff --git a/python/requirements.txt b/python/requirements.txt
@@ -18,9 +18,9 @@
 #
 
 cython==3.0.10
+black==24.10.0
 numpy>=2.0.0,<3
 pandas==2.2.2
 setuptools==78.1.1
 wheel==0.46.2
 pyarrow>=8.0.0
-
diff --git a/python/setup.py b/python/setup.py
@@ -41,7 +41,9 @@
     shutil.rmtree(PKG / "include")
 shutil.copytree(CPP_INC, PKG / "include")
 if sys.platform.startswith("linux"):
-    candidates = sorted(CPP_LIB.glob("libtsfile.so*"), key=lambda p: len(p.name), reverse=True)
+    candidates = sorted(
+        CPP_LIB.glob("libtsfile.so*"), key=lambda p: len(p.name), reverse=True
+    )
     if not candidates:
         raise FileNotFoundError("missing libtsfile.so* in build output")
     src = candidates[0]
@@ -51,7 +53,9 @@
     shutil.copy2(src, link_name)
 
 elif sys.platform == "darwin":
-    candidates = sorted(CPP_LIB.glob("libtsfile.*.dylib")) or list(CPP_LIB.glob("libtsfile.dylib"))
+    candidates = sorted(CPP_LIB.glob("libtsfile.*.dylib")) or list(
+        CPP_LIB.glob("libtsfile.dylib")
+    )
     if not candidates:
         raise FileNotFoundError("missing libtsfile*.dylib in build output")
     src = candidates[0]
@@ -61,8 +65,12 @@
     shutil.copy2(src, link_name)
 elif sys.platform == "win32":
     for base_name in ("libtsfile",):
-        dll_candidates = sorted(CPP_LIB.glob(f"{base_name}*.dll"), key=lambda p: len(p.name), reverse=True)
-        dll_a_candidates = sorted(CPP_LIB.glob(f"{base_name}*.dll.a"), key=lambda p: len(p.name), reverse=True)
+        dll_candidates = sorted(
+            CPP_LIB.glob(f"{base_name}*.dll"), key=lambda p: len(p.name), reverse=True
+        )
+        dll_a_candidates = sorted(
+            CPP_LIB.glob(f"{base_name}*.dll.a"), key=lambda p: len(p.name), reverse=True
+        )
 
         if not dll_candidates:
             raise FileNotFoundError(f"missing {base_name}*.dll in build output")
@@ -119,8 +127,14 @@ def finalize_options(self):
     extra_link_args += ["-Wl,-rpath,@loader_path", "-stdlib=libc++"]
 elif sys.platform == "win32":
     libraries = ["tsfile"]
-    extra_compile_args += ["-O2", "-std=c++11", "-DSIZEOF_VOID_P=8", "-D__USE_MINGW_ANSI_STDIO=1", "-DMS_WIN64",
-                           "-D_WIN64"]
+    extra_compile_args += [
+        "-O2",
+        "-std=c++11",
+        "-DSIZEOF_VOID_P=8",
+        "-D__USE_MINGW_ANSI_STDIO=1",
+        "-DMS_WIN64",
+        "-D_WIN64",
+    ]
     extra_link_args += []
 else:
     raise RuntimeError(f"Unsupported platform: {sys.platform}")

diff --git a/python/tests/bench_batch_arrow_vs_dataframe.py b/python/tests/bench_batch_arrow_vs_dataframe.py
@@ -61,12 +61,15 @@ def _ensure_bench_tsfile(file_path: str, row_count: int) -> None:
         remove(file_path)
     # Build data with pandas/numpy (vectorized, much faster than row-by-row Tablet)
     import numpy as np
-    df = pd.DataFrame({
-        "time": np.arange(row_count, dtype=np.int64),
-        "device": pd.Series([f"device" for i in range(row_count)]),
-        "value1": np.arange(0, row_count * 10, 10, dtype=np.int64),
-        "value2": np.arange(row_count, dtype=np.float64) * 1.5,
-    })
+
+    df = pd.DataFrame(
+        {
+            "time": np.arange(row_count, dtype=np.int64),
+            "device": pd.Series([f"device" for i in range(row_count)]),
+            "value1": np.arange(0, row_count * 10, 10, dtype=np.int64),
+            "value2": np.arange(row_count, dtype=np.float64) * 1.5,
+        }
+    )
 
     table = TableSchema(
         TABLE_NAME,
@@ -135,7 +138,9 @@ def _run_timed(name: str, func, *args, rounds: int = DEFAULT_TIMED_ROUNDS):
     avg = sum(times) / len(times)
     total_rows = n
     rows_per_sec = total_rows / avg if avg > 0 else 0
-    print(f"  {name}: {avg:.3f}s avg ({min(times):.3f}s min)  rows={total_rows}  {rows_per_sec:.0f} rows/s")
+    print(
+        f"  {name}: {avg:.3f}s avg ({min(times):.3f}s min)  rows={total_rows}  {rows_per_sec:.0f} rows/s"
+    )
     return avg, total_rows
 
 
@@ -148,7 +153,9 @@ def run_benchmark(
     _ensure_bench_tsfile(file_path, row_count)
     end_time = row_count + 1
 
-    print(f"Benchmark: {row_count} rows, batch_size={batch_size}, timed_rounds={timed_rounds}")
+    print(
+        f"Benchmark: {row_count} rows, batch_size={batch_size}, timed_rounds={timed_rounds}"
+    )
 
     df_avg, df_rows = _run_timed(
         "query_table + read_data_frame",
@@ -170,7 +177,9 @@ def run_benchmark(
     print()
     if df_avg > 0:
         speedup = arrow_avg / df_avg
-        print(f"  Arrow vs DataFrame time ratio: {speedup:.2f}x ({'Arrow faster' if speedup < 1 else 'DataFrame faster'})")
+        print(
+            f"  Arrow vs DataFrame time ratio: {speedup:.2f}x ({'Arrow faster' if speedup < 1 else 'DataFrame faster'})"
+        )
     assert df_rows == row_count, f"DataFrame path row count {df_rows} != {row_count}"
     assert arrow_rows == row_count, f"Arrow path row count {arrow_rows} != {row_count}"