diff --git a/benchmarks/pandas/bench_at_iat.py b/benchmarks/pandas/bench_at_iat.py
new file mode 100644
index 00000000..662c5e43
--- /dev/null
+++ b/benchmarks/pandas/bench_at_iat.py
@@ -0,0 +1,37 @@
+"""Benchmark: Series.at, Series.iat, DataFrame.at, DataFrame.iat — fast scalar access"""
+import json
+import time
+import pandas as pd
+
+N = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+labels = [f"r{i}" for i in range(N)]
+values = [i * 1.5 for i in range(N)]
+
+s = pd.Series(values, index=labels)
+df = pd.DataFrame({"a": values, "b": [v * 2 for v in values]}, index=labels)
+
+mid_label = f"r{N // 2}"
+
+for _ in range(WARMUP):
+ _ = s.at[mid_label]
+ _ = s.iat[N // 2]
+ _ = df.at[mid_label, "a"]
+ _ = df.iat[N // 2, 0]
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ _ = s.at[mid_label]
+ _ = s.iat[N // 2]
+ _ = df.at[mid_label, "a"]
+ _ = df.iat[N // 2, 0]
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "at_iat",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_autocorr.py b/benchmarks/pandas/bench_autocorr.py
new file mode 100644
index 00000000..ee5c00e0
--- /dev/null
+++ b/benchmarks/pandas/bench_autocorr.py
@@ -0,0 +1,37 @@
+"""
+Benchmark: Series.autocorr(lag) — lag-N autocorrelation for a 100k-element numeric Series.
+
+Mirrors tsb autoCorr.
+Benchmarks lag=1, lag=5, and lag=20.
+Outputs JSON: {"function": "autocorr", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+import json
+import math
+import time
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+data = [math.sin(i * 0.05) + (i % 7) * 0.01 for i in range(SIZE)]
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+ s.autocorr(lag=1)
+ s.autocorr(lag=5)
+ s.autocorr(lag=20)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.autocorr(lag=1)
+ s.autocorr(lag=5)
+ s.autocorr(lag=20)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "autocorr",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_convert_dtypes.py b/benchmarks/pandas/bench_convert_dtypes.py
new file mode 100644
index 00000000..543fa870
--- /dev/null
+++ b/benchmarks/pandas/bench_convert_dtypes.py
@@ -0,0 +1,50 @@
+"""
+Benchmark: pandas Series.convert_dtypes() and DataFrame.convert_dtypes()
+
+Creates a 50k-row dataset with object-dtype numeric, boolean, and string
+columns, then measures how fast pandas can infer and convert to best dtypes.
+"""
+import json
+import time
+import numpy as np
+import pandas as pd
+
+N = 50_000
+WARMUP = 3
+ITERATIONS = 20
+
+# Object-dtype arrays (same structure as the TypeScript version)
+int_data = [None if i % 17 == 0 else i for i in range(N)]
+float_data = [None if i % 13 == 0 else i * 1.5 for i in range(N)]
+str_data = [None if i % 11 == 0 else f"str_{i}" for i in range(N)]
+bool_data = [None if i % 7 == 0 else (i % 2 == 0) for i in range(N)]
+
+int_series = pd.Series(int_data, dtype=object)
+float_series = pd.Series(float_data, dtype=object)
+
+df = pd.DataFrame({
+ "int_col": int_data,
+ "float_col": float_data,
+ "str_col": str_data,
+ "bool_col": bool_data,
+})
+
+# Warm-up
+for _ in range(WARMUP):
+ int_series.convert_dtypes()
+ float_series.convert_dtypes()
+ df.convert_dtypes()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ int_series.convert_dtypes()
+ float_series.convert_dtypes()
+ df.convert_dtypes()
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "convert_dtypes",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_cross_join.py b/benchmarks/pandas/bench_cross_join.py
new file mode 100644
index 00000000..ad1de45b
--- /dev/null
+++ b/benchmarks/pandas/bench_cross_join.py
@@ -0,0 +1,32 @@
+"""Benchmark: cross_join — Cartesian product of two 300-row DataFrames (90k result rows)"""
+import json
+import time
+import pandas as pd
+
+N = 300
+WARMUP = 3
+ITERATIONS = 10
+
+left = pd.DataFrame({
+ "id_a": list(range(N)),
+ "val_a": [i * 1.5 for i in range(N)],
+})
+right = pd.DataFrame({
+ "id_b": list(range(N)),
+ "val_b": [i * 2.5 for i in range(N)],
+})
+
+for _ in range(WARMUP):
+ pd.merge(left, right, how="cross")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.merge(left, right, how="cross")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "cross_join",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_cut_bins_to_frame.py b/benchmarks/pandas/bench_cut_bins_to_frame.py
new file mode 100644
index 00000000..5ae5908c
--- /dev/null
+++ b/benchmarks/pandas/bench_cut_bins_to_frame.py
@@ -0,0 +1,56 @@
+"""Benchmark: cut_bins_to_frame — pd.cut with value_counts and bin summary on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+NUM_BINS = 20
+WARMUP = 5
+ITERATIONS = 50
+
+data = np.array([(i % 1000) * 0.1 for i in range(SIZE)])
+
+for _ in range(WARMUP):
+ # pandas equivalent of cutBinsToFrame: cut + value_counts on the categorical result
+ cut_result = pd.cut(data, NUM_BINS)
+ # Summary DataFrame equivalent to cutBinsToFrame
+ counts = cut_result.value_counts(sort=False)
+ summary = pd.DataFrame({
+ "bin": counts.index.astype(str),
+ "left": [iv.left for iv in counts.index],
+ "right": [iv.right for iv in counts.index],
+ "count": counts.values,
+ "frequency": counts.values / len(data),
+ })
+ # cutBinCounts equivalent: counts dict
+ count_dict = dict(zip(counts.index.astype(str), counts.values))
+ # binEdges equivalent: DataFrame of interval edges
+ edges = pd.DataFrame({
+ "left": [iv.left for iv in counts.index],
+ "right": [iv.right for iv in counts.index],
+ })
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ cut_result = pd.cut(data, NUM_BINS)
+ counts = cut_result.value_counts(sort=False)
+ summary = pd.DataFrame({
+ "bin": counts.index.astype(str),
+ "left": [iv.left for iv in counts.index],
+ "right": [iv.right for iv in counts.index],
+ "count": counts.values,
+ "frequency": counts.values / len(data),
+ })
+ count_dict = dict(zip(counts.index.astype(str), counts.values))
+ edges = pd.DataFrame({
+ "left": [iv.left for iv in counts.index],
+ "right": [iv.right for iv in counts.index],
+ })
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "cut_bins_to_frame",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_compare_pair.py b/benchmarks/pandas/bench_dataframe_compare_pair.py
new file mode 100644
index 00000000..4dd28ff4
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_compare_pair.py
@@ -0,0 +1,50 @@
+"""
+Benchmark: DataFrame-to-DataFrame element-wise comparisons.
+
+The existing dataframe_compare benchmark tests scalar comparisons only.
+This tests df1.eq(df2), df1.ne(df2), df1.gt(df2), df1.le(df2) (DataFrame vs DataFrame).
+Mirrors tsb dataFrameEq(df1, df2), dataFrameNe, dataFrameGt, dataFrameLe.
+
+Outputs JSON: {"function": "dataframe_compare_pair", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+import json
+import time
+import numpy as np
+import pandas as pd
+
+SIZE = 50_000
+WARMUP = 5
+ITERATIONS = 50
+
+df1 = pd.DataFrame({
+ "a": np.array([(i * 1.7) % 1000 for i in range(SIZE)]),
+ "b": np.array([(i * 2.3) % 1000 for i in range(SIZE)]),
+ "c": np.array([i % 100 for i in range(SIZE)]),
+})
+
+df2 = pd.DataFrame({
+ "a": np.array([(i * 2.1) % 1000 for i in range(SIZE)]),
+ "b": np.array([(i * 1.9) % 1000 for i in range(SIZE)]),
+ "c": np.array([(i + 7) % 100 for i in range(SIZE)]),
+})
+
+for _ in range(WARMUP):
+ df1.eq(df2)
+ df1.ne(df2)
+ df1.gt(df2)
+ df1.le(df2)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df1.eq(df2)
+ df1.ne(df2)
+ df1.gt(df2)
+ df1.le(df2)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "dataframe_compare_pair",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_itertuples.py b/benchmarks/pandas/bench_dataframe_itertuples.py
new file mode 100644
index 00000000..18ac5108
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_itertuples.py
@@ -0,0 +1,29 @@
+"""Benchmark: DataFrame.itertuples() — iterate over rows as namedtuples."""
+import time
+import pandas as pd
+
+ROWS = 1_000
+WARMUP = 5
+ITERATIONS = 50
+
+df = pd.DataFrame({
+ "x": [i * 1.5 for i in range(ROWS)],
+ "y": [i * 2.5 for i in range(ROWS)],
+ "z": [i * 3.5 for i in range(ROWS)],
+})
+
+for _ in range(WARMUP):
+ for _row in df.itertuples():
+ pass
+
+times = []
+for _ in range(ITERATIONS):
+ t0 = time.perf_counter()
+ for _row in df.itertuples():
+ pass
+ times.append(time.perf_counter() - t0)
+
+total = sum(times)
+mean_ms = (total / ITERATIONS) * 1000
+total_ms = total * 1000
+print(f'{{"function": "dataframe_itertuples", "mean_ms": {mean_ms:.6f}, "iterations": {ITERATIONS}, "total_ms": {total_ms:.6f}}}')
diff --git a/benchmarks/pandas/bench_dataframe_transform_named.py b/benchmarks/pandas/bench_dataframe_transform_named.py
new file mode 100644
index 00000000..045650e9
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_transform_named.py
@@ -0,0 +1,40 @@
+"""
+Benchmark: pandas DataFrame.transform() with named aggregation strings.
+
+Mirrors tsb dataFrameTransform with string names like "mean", "cumsum",
+and ["sum", "mean"] applied column-wise.
+
+Uses 10k-row DataFrame to match the TypeScript benchmark.
+"""
+import json
+import time
+import pandas as pd
+
+ROWS = 10_000
+WARMUP = 3
+ITERATIONS = 20
+
+a = [(i % 100) * 1.5 + 1 for i in range(ROWS)]
+b = [((i * 3) % 200) * 0.5 + 2 for i in range(ROWS)]
+c = [((i * 7) % 50) * 2.0 + 0.5 for i in range(ROWS)]
+df = pd.DataFrame({"a": a, "b": b, "c": c})
+
+# Warm-up
+for _ in range(WARMUP):
+ df.transform("mean")
+ df.transform("cumsum")
+ df.transform(["sum", "mean"])
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df.transform("mean")
+ df.transform("cumsum")
+ df.transform(["sum", "mean"])
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "dataframe_transform_named",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_dataframe_update.py b/benchmarks/pandas/bench_dataframe_update.py
new file mode 100644
index 00000000..cea97283
--- /dev/null
+++ b/benchmarks/pandas/bench_dataframe_update.py
@@ -0,0 +1,48 @@
+"""
+Benchmark: DataFrame.update() — in-place-style DataFrame value update.
+
+Mirrors tsb dataFrameUpdate.
+Overwrites non-null values from `other` into `self`.
+Outputs JSON: {"function": "dataframe_update", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+
+import numpy as np
+import pandas as pd
+
+N = 10_000
+WARMUP = 20
+ITERATIONS = 200
+
+# Build two DataFrames; `other` has NaN in ~2/3 of rows (so 1/3 rows are updated).
+a_data = [i * 1.0 for i in range(N)]
+b_data = [i * 2.0 for i in range(N)]
+a_other = [i * 10.0 if i % 3 == 0 else np.nan for i in range(N)]
+b_other = [i * 20.0 if i % 3 == 0 else np.nan for i in range(N)]
+
+df = pd.DataFrame({"a": a_data, "b": b_data})
+other = pd.DataFrame({"a": a_other, "b": b_other})
+
+# Warm-up
+for _ in range(WARMUP):
+ dc = df.copy()
+ dc.update(other)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ dc = df.copy()
+ dc.update(other)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(
+ json.dumps(
+ {
+ "function": "dataframe_update",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+ }
+ )
+)
diff --git a/benchmarks/pandas/bench_errors.py b/benchmarks/pandas/bench_errors.py
new file mode 100644
index 00000000..b5a28b35
--- /dev/null
+++ b/benchmarks/pandas/bench_errors.py
@@ -0,0 +1,55 @@
+"""Benchmark: pd.errors namespace — instantiate and inspect pandas-compatible error classes.
+
+Mirrors tsb's errors namespace: create error instances, check isinstance, .name and .message.
+"""
+import json
+import time
+import pandas.errors as pd_errors
+
+WARMUP = 5
+ITERATIONS = 200
+
+
+def _run():
+ e1 = ValueError("bad value")
+ e2 = KeyError("missing key")
+ e3 = pd_errors.MergeError("incompatible merge")
+ e4 = pd_errors.EmptyDataError("no data")
+ e5 = pd_errors.OptionError("unknown option")
+ e6 = pd_errors.IntCastingNaNError()
+ e7 = pd_errors.UnsortedIndexError("MultiIndex slicing requires the index to be lexsorted")
+ e8 = pd_errors.ParserError("unexpected token")
+ e9 = pd_errors.PerformanceWarning("slow path")
+ e10 = pd_errors.InvalidIndexError("bad index")
+
+ _a = isinstance(e1, ValueError)
+ _b = isinstance(e2, KeyError)
+ _c = isinstance(e3, Exception)
+ _d = type(e4).__name__ == "EmptyDataError"
+ _e = "unknown" in str(e5)
+ _f = isinstance(e6, pd_errors.IntCastingNaNError)
+ _g = isinstance(e7, pd_errors.UnsortedIndexError)
+ _h = type(e8).__name__ == "ParserError"
+ _i = type(e9).__name__ == "PerformanceWarning"
+ _j = isinstance(e10, pd_errors.InvalidIndexError)
+ return [_a, _b, _c, _d, _e, _f, _g, _h, _i, _j]
+
+
+for _ in range(WARMUP):
+ _run()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ _run()
+total_ms = (time.perf_counter() - start) * 1000
+
+print(
+ json.dumps(
+ {
+ "function": "errors",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+ }
+ )
+)
diff --git a/benchmarks/pandas/bench_extensions.py b/benchmarks/pandas/bench_extensions.py
new file mode 100644
index 00000000..5aa00fe9
--- /dev/null
+++ b/benchmarks/pandas/bench_extensions.py
@@ -0,0 +1,136 @@
+"""Benchmark: pd.api.extensions — ExtensionDtype / ExtensionArray subclassing and
+accessor registration.
+
+Mirrors tsb's extensions benchmark:
+ - ExtensionDtype subclass construction
+ - ExtensionArray subclass instantiation, getitem, slice, dtype access
+ - register_extension_dtype() → tsb registerExtensionDtype()
+ - register_series_accessor() → tsb registerSeriesAccessor()
+ - register_dataframe_accessor() → tsb registerDataFrameAccessor()
+ - Accessor registry introspection via hasattr
+"""
+import json
+import time
+import numpy as np
+import pandas as pd
+import pandas.api.extensions as pd_ext
+
+WARMUP = 5
+ITERATIONS = 200
+
+
+@pd_ext.register_extension_dtype
+class TagDtype(pd_ext.ExtensionDtype):
+ name = "tag"
+ type = object
+ kind = "O"
+
+ @classmethod
+ def construct_array_type(cls):
+ return TagArray
+
+ @classmethod
+ def construct_from_string(cls, string):
+ if string == "tag":
+ return cls()
+ raise TypeError(f"Cannot construct a 'TagDtype' from '{string}'")
+
+
+class TagArray(pd_ext.ExtensionArray):
+ def __init__(self, data):
+ self._data = np.asarray(data, dtype=object)
+
+ @classmethod
+ def _from_sequence(cls, scalars, *, dtype=None, copy=False):
+ return cls(scalars)
+
+ @classmethod
+ def _from_factorized(cls, values, original):
+ return cls(values)
+
+ def __getitem__(self, key):
+ return self._data[key]
+
+ def __setitem__(self, key, value):
+ self._data[key] = value
+
+ def __len__(self):
+ return len(self._data)
+
+ @property
+ def dtype(self):
+ return TagDtype()
+
+ @property
+ def nbytes(self):
+ return self._data.nbytes
+
+ def isna(self):
+ return np.array([v is None for v in self._data])
+
+ def take(self, indices, *, allow_fill=False, fill_value=None):
+ return type(self)(self._data.take(indices))
+
+ def copy(self):
+ return type(self)(self._data.copy())
+
+ @classmethod
+ def _concat_same_type(cls, to_concat):
+ return cls(np.concatenate([a._data for a in to_concat]))
+
+
+@pd_ext.register_series_accessor("geo_bench")
+class GeoAccessor:
+ def __init__(self, obj):
+ self._obj = obj
+
+ def distance(self):
+ return 0
+
+
+@pd_ext.register_dataframe_accessor("geo_bench")
+class GeoDataFrameAccessor:
+ def __init__(self, obj):
+ self._obj = obj
+
+ def distance(self):
+ return 0
+
+
+_TAGS = ["alpha", "beta", "gamma", "delta", "epsilon"]
+_s = pd.Series(TagArray(_TAGS))
+_df = pd.DataFrame({"a": [1, 2, 3]})
+
+
+def _run():
+ arr = TagArray(_TAGS)
+ _len = len(arr)
+ _item = arr[2]
+ _sliced = arr[1:4]
+ _dtype_name = arr.dtype.name
+ _numeric = False
+
+ _has_series = hasattr(_s, "geo_bench")
+ _has_df = hasattr(_df, "geo_bench")
+
+ return [_len, _item, _sliced, _dtype_name, _numeric, _has_series, _has_df]
+
+
+for _ in range(WARMUP):
+ _run()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ _run()
+total_ms = (time.perf_counter() - start) * 1000
+
+print(
+ json.dumps(
+ {
+ "function": "extensions",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+ }
+ )
+)
diff --git a/benchmarks/pandas/bench_filter_series.py b/benchmarks/pandas/bench_filter_series.py
new file mode 100644
index 00000000..ec653243
--- /dev/null
+++ b/benchmarks/pandas/bench_filter_series.py
@@ -0,0 +1,31 @@
+"""Benchmark: Series.filter — filter Series index labels by items/like/regex"""
+import json
+import time
+import pandas as pd
+
+N = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+labels = [f"label_{i}" for i in range(N)]
+values = [i * 0.5 for i in range(N)]
+s = pd.Series(values, index=labels)
+
+keep_items = [f"label_{i * 100}" for i in range(1_000)]
+
+for _ in range(WARMUP):
+ s.filter(items=keep_items)
+ s.filter(like="label_5")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.filter(items=keep_items)
+ s.filter(like="label_5")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "filter_series",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_get_set_option.py b/benchmarks/pandas/bench_get_set_option.py
new file mode 100644
index 00000000..df9c675e
--- /dev/null
+++ b/benchmarks/pandas/bench_get_set_option.py
@@ -0,0 +1,44 @@
+"""
+Benchmark: get_option / set_option / reset_option — pandas options API.
+
+Mirrors tsb getOption / setOption / resetOption.
+Outputs JSON: {"function": "get_set_option", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+
+import pandas as pd
+
+WARMUP = 10
+ITERATIONS = 10_000
+
+# Warm-up
+for _ in range(WARMUP):
+ pd.get_option("display.max_rows")
+ pd.set_option("display.max_rows", 50)
+ pd.reset_option("display.max_rows")
+ pd.get_option("display.precision")
+ pd.set_option("display.precision", 3)
+ pd.reset_option("display.precision")
+
+start = time.perf_counter()
+for i in range(ITERATIONS):
+ pd.get_option("display.max_rows")
+ pd.set_option("display.max_rows", (i % 90) + 10)
+ pd.reset_option("display.max_rows")
+ pd.get_option("display.precision")
+ pd.set_option("display.precision", (i % 8) + 2)
+ pd.reset_option("display.precision")
+total_ms = (time.perf_counter() - start) * 1000
+
+print(
+ json.dumps(
+ {
+ "function": "get_set_option",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+ }
+ )
+)
diff --git a/benchmarks/pandas/bench_item_bool_extract.py b/benchmarks/pandas/bench_item_bool_extract.py
new file mode 100644
index 00000000..39839448
--- /dev/null
+++ b/benchmarks/pandas/bench_item_bool_extract.py
@@ -0,0 +1,35 @@
+"""
+Benchmark: Series.item() / bool(Series) / bool(DataFrame) — single-element scalar extraction.
+
+Mirrors tsb bench_item_bool_extract.
+Outputs JSON: {"function": "item_bool_extract", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+import json
+import time
+import pandas as pd
+
+WARMUP = 20
+ITERATIONS = 100_000
+
+numeric_series = pd.Series([42.5])
+true_series = pd.Series([True])
+true_df = pd.DataFrame({"x": [True]})
+
+for _ in range(WARMUP):
+ numeric_series.item()
+ bool(true_series)
+ bool(true_df)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ numeric_series.item()
+ bool(true_series)
+ bool(true_df)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "item_bool_extract",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_join_all.py b/benchmarks/pandas/bench_join_all.py
new file mode 100644
index 00000000..040aa028
--- /dev/null
+++ b/benchmarks/pandas/bench_join_all.py
@@ -0,0 +1,30 @@
+"""Benchmark: join_all — sequential left-join of 4 DataFrames each with 5k rows"""
+import json
+import time
+import pandas as pd
+
+N = 5_000
+WARMUP = 3
+ITERATIONS = 10
+
+idx = [str(i) for i in range(N)]
+
+base = pd.DataFrame({"a": list(range(N))}, index=idx)
+df1 = pd.DataFrame({"b": [i * 2 for i in range(N)]}, index=idx)
+df2 = pd.DataFrame({"c": [i * 3 for i in range(N)]}, index=idx)
+df3 = pd.DataFrame({"d": [i * 4 for i in range(N)]}, index=idx)
+
+for _ in range(WARMUP):
+ base.join([df1, df2, df3])
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ base.join([df1, df2, df3])
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "join_all",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_math_ops.py b/benchmarks/pandas/bench_math_ops.py
new file mode 100644
index 00000000..1159ec02
--- /dev/null
+++ b/benchmarks/pandas/bench_math_ops.py
@@ -0,0 +1,35 @@
+"""Benchmark: math_ops — abs / round on Series and DataFrame of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.where(np.arange(SIZE) % 2 == 0, -(np.arange(SIZE) + 0.567), np.arange(SIZE) + 0.567))
+df = pd.DataFrame({
+ "a": -(np.arange(SIZE) + 0.123),
+ "b": np.arange(SIZE) + 0.456,
+})
+
+for _ in range(WARMUP):
+ s.abs()
+ df.abs()
+ s.round(1)
+ df.round(1)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.abs()
+ df.abs()
+ s.round(1)
+ df.round(1)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "math_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_merge_asof.py b/benchmarks/pandas/bench_merge_asof.py
new file mode 100644
index 00000000..5517d2f8
--- /dev/null
+++ b/benchmarks/pandas/bench_merge_asof.py
@@ -0,0 +1,34 @@
+"""Benchmark: merge_asof — backward asof join of two 10k-row sorted DataFrames"""
+import json
+import time
+import pandas as pd
+
+N = 10_000
+WARMUP = 3
+ITERATIONS = 10
+
+# Trades sorted by time: 0, 2, 4, ...
+trade_times = list(range(0, N * 2, 2))
+prices = [100.0 + i * 0.5 for i in range(N)]
+
+# Quotes sorted by time, sparser: 0, 3, 6, ...
+quote_times = list(range(0, N * 3, 3))
+bids = [99.0 + i * 0.5 for i in range(N)]
+
+trades = pd.DataFrame({"time": trade_times, "price": prices})
+quotes = pd.DataFrame({"time": quote_times, "bid": bids})
+
+for _ in range(WARMUP):
+ pd.merge_asof(trades, quotes, on="time")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.merge_asof(trades, quotes, on="time")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "merge_asof",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_na_ops.py b/benchmarks/pandas/bench_na_ops.py
new file mode 100644
index 00000000..b7d0adf0
--- /dev/null
+++ b/benchmarks/pandas/bench_na_ops.py
@@ -0,0 +1,42 @@
+"""Benchmark: na_ops — isna / notna / ffill / bfill on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+data = pd.array([i if i % 5 != 0 else pd.NA for i in range(SIZE)], dtype="Int64")
+s = pd.Series(data, dtype="float64")
+s[np.arange(SIZE) % 5 == 0] = np.nan
+
+df = pd.DataFrame({
+ "a": s,
+ "b": pd.Series([float(i * 2) if i % 7 != 0 else np.nan for i in range(SIZE)]),
+})
+
+for _ in range(WARMUP):
+ pd.isna(s)
+ pd.notna(s)
+ s.ffill()
+ s.bfill()
+ df.ffill()
+ df.bfill()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.isna(s)
+ pd.notna(s)
+ s.ffill()
+ s.bfill()
+ df.ffill()
+ df.bfill()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "na_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_nanprod.py b/benchmarks/pandas/bench_nanprod.py
new file mode 100644
index 00000000..ec5fcfda
--- /dev/null
+++ b/benchmarks/pandas/bench_nanprod.py
@@ -0,0 +1,25 @@
+"""Benchmark: nanprod — product of array values ignoring NaN, via pd.Series.prod()."""
+import time
+import pandas as pd
+import numpy as np
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+data = [None if i % 13 == 0 else 1 + (i % 7) * 0.0001 for i in range(SIZE)]
+s = pd.Series(data, dtype=float)
+
+for _ in range(WARMUP):
+ s.prod(skipna=True)
+
+times = []
+for _ in range(ITERATIONS):
+ t0 = time.perf_counter()
+ s.prod(skipna=True)
+ times.append(time.perf_counter() - t0)
+
+total = sum(times)
+mean_ms = (total / ITERATIONS) * 1000
+total_ms = total * 1000
+print(f'{{"function": "nanprod", "mean_ms": {mean_ms:.6f}, "iterations": {ITERATIONS}, "total_ms": {total_ms:.6f}}}')
diff --git a/benchmarks/pandas/bench_notna_boolean.py b/benchmarks/pandas/bench_notna_boolean.py
new file mode 100644
index 00000000..96c0a59d
--- /dev/null
+++ b/benchmarks/pandas/bench_notna_boolean.py
@@ -0,0 +1,36 @@
+"""Benchmark: notna_boolean — boolean-mask indexing on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.arange(SIZE))
+mask = pd.Series(np.arange(SIZE) % 2 == 0)
+bool_arr = np.arange(SIZE) % 3 != 0
+
+df = pd.DataFrame({
+ "a": np.arange(SIZE),
+ "b": np.arange(SIZE) * 2,
+})
+
+for _ in range(WARMUP):
+ s[mask]
+ s[~mask]
+ df[bool_arr]
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s[mask]
+ s[~mask]
+ df[bool_arr]
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "notna_boolean",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_numeric_ops_log2_exp.py b/benchmarks/pandas/bench_numeric_ops_log2_exp.py
new file mode 100644
index 00000000..89208443
--- /dev/null
+++ b/benchmarks/pandas/bench_numeric_ops_log2_exp.py
@@ -0,0 +1,52 @@
+"""
+Benchmark: np.log2, np.log10, np.exp, np.sign applied to pandas Series and DataFrame.
+
+Mirrors tsb seriesLog2, seriesLog10, seriesExp, seriesSign and their DataFrame variants.
+Uses 100k-row data to match the TypeScript benchmark.
+"""
+import json
+import time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 30
+
+# Positive values for log2/log10; any values for exp/sign
+data = [(i + 1) * 0.1 for i in range(SIZE)]
+s = pd.Series(data, dtype=float)
+df = pd.DataFrame({
+ "a": [(i + 1) * 0.1 for i in range(SIZE)],
+ "b": [(i + 1) * 0.2 for i in range(SIZE)],
+})
+
+# Warm-up
+for _ in range(WARMUP):
+ np.log2(s)
+ np.log10(s)
+ np.exp(s)
+ np.sign(s)
+ np.log2(df)
+ np.log10(df)
+ np.exp(df)
+ np.sign(df)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ np.log2(s)
+ np.log10(s)
+ np.exp(s)
+ np.sign(s)
+ np.log2(df)
+ np.log10(df)
+ np.exp(df)
+ np.sign(df)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "numeric_ops_log2_exp",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_option_context.py b/benchmarks/pandas/bench_option_context.py
new file mode 100644
index 00000000..6f6aa310
--- /dev/null
+++ b/benchmarks/pandas/bench_option_context.py
@@ -0,0 +1,33 @@
+"""
+Benchmark: pd.describe_option() / pd.option_context() — pandas options describe and context manager.
+
+Mirrors tsb bench_option_context (describeOption + optionContext enter/exit).
+Outputs JSON: {"function": "option_context", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+import json
+import time
+import pandas as pd
+
+WARMUP = 20
+ITERATIONS = 50_000
+
+for _ in range(WARMUP):
+ pd.describe_option("display.max_rows")
+ pd.describe_option("display.precision")
+ with pd.option_context("display.max_rows", 50, "display.precision", 3):
+ pass
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.describe_option("display.max_rows")
+ pd.describe_option("display.precision")
+ with pd.option_context("display.max_rows", 50, "display.precision", 3):
+ pass
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "option_context",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_pow_mod.py b/benchmarks/pandas/bench_pow_mod.py
new file mode 100644
index 00000000..3458eb26
--- /dev/null
+++ b/benchmarks/pandas/bench_pow_mod.py
@@ -0,0 +1,34 @@
+"""Benchmark: Series.pow, Series.mod, DataFrame.pow on 100k rows"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = (np.arange(ROWS) % 100) + 1
+s = pd.Series(data.astype(float))
+df = pd.DataFrame({
+ "a": ((np.arange(ROWS) % 100) + 1).astype(float),
+ "b": ((np.arange(ROWS) % 50) + 1).astype(float),
+})
+
+for _ in range(WARMUP):
+ s.pow(2)
+ s.mod(7)
+ df.pow(2)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.pow(2)
+ s.mod(7)
+ df.pow(2)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "pow_mod",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_read_html.py b/benchmarks/pandas/bench_read_html.py
new file mode 100644
index 00000000..03dd0199
--- /dev/null
+++ b/benchmarks/pandas/bench_read_html.py
@@ -0,0 +1,52 @@
+"""
+Benchmark: pd.read_html — parse HTML tables into DataFrames.
+Outputs JSON: {"function": "read_html", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+import json
+import time
+import math
+
+try:
+ import pandas as pd
+except ImportError:
+ import subprocess, sys
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas", "--quiet"])
+ import pandas as pd
+
+try:
+ import lxml # noqa: F401
+except ImportError:
+ import subprocess, sys
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "lxml", "--quiet"])
+
+ROWS = 1_000
+WARMUP = 3
+ITERATIONS = 20
+
+
+def build_html(rows: int) -> str:
+ header = "
| id | name | value | score |
"
+ body_rows = [
+ f"| {i} | item_{i % 100} | {i * 1.5:.2f} | {math.sin(i * 0.01):.6f} |
"
+ for i in range(rows)
+ ]
+ return f"{header}{''.join(body_rows)}
"
+
+
+html = build_html(ROWS)
+
+# Warm-up
+for _ in range(WARMUP):
+ pd.read_html(html)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ pd.read_html(html)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "read_html",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_reduce_ops.py b/benchmarks/pandas/bench_reduce_ops.py
new file mode 100644
index 00000000..2be36963
--- /dev/null
+++ b/benchmarks/pandas/bench_reduce_ops.py
@@ -0,0 +1,37 @@
+"""Benchmark: reduce_ops — nunique / any / all on Series and DataFrame of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.arange(SIZE) % 1000)
+bool_s = pd.Series(np.arange(SIZE) > 0)
+df = pd.DataFrame({
+ "a": np.arange(SIZE) % 500,
+ "b": np.arange(SIZE) % 200,
+ "c": np.arange(SIZE) % 100,
+})
+
+for _ in range(WARMUP):
+ s.nunique()
+ bool_s.any()
+ bool_s.all()
+ df.nunique()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.nunique()
+ bool_s.any()
+ bool_s.all()
+ df.nunique()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "reduce_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_rename_ops.py b/benchmarks/pandas/bench_rename_ops.py
new file mode 100644
index 00000000..897f520b
--- /dev/null
+++ b/benchmarks/pandas/bench_rename_ops.py
@@ -0,0 +1,36 @@
+"""Benchmark: rename_ops — rename / add_prefix / add_suffix on Series/DataFrame of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.arange(SIZE), index=[f"row_{i}" for i in range(SIZE)])
+df = pd.DataFrame({
+ "col_a": np.arange(SIZE),
+ "col_b": np.arange(SIZE) * 2,
+ "col_c": np.arange(SIZE) * 3,
+})
+
+for _ in range(WARMUP):
+ s.rename(lambda lbl: f"new_{lbl}")
+ df.rename(columns={"col_a": "a", "col_b": "b"})
+ df.add_prefix("pre_")
+ df.add_suffix("_suf")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.rename(lambda lbl: f"new_{lbl}")
+ df.rename(columns={"col_a": "a", "col_b": "b"})
+ df.add_prefix("pre_")
+ df.add_suffix("_suf")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "rename_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_resample_dataframe.py b/benchmarks/pandas/bench_resample_dataframe.py
new file mode 100644
index 00000000..da5b555b
--- /dev/null
+++ b/benchmarks/pandas/bench_resample_dataframe.py
@@ -0,0 +1,45 @@
+"""
+Benchmark: DataFrame resampling with multiple aggregations.
+
+The existing resample benchmark only covers Series. This exercises
+df.resample("1h").mean() / .sum() / .min() on a multi-column datetime-indexed DataFrame.
+Mirrors tsb resampleDataFrame(df, "H").mean() / .sum() / .min().
+
+Outputs JSON: {"function": "resample_dataframe", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+import json
+import time
+import numpy as np
+import pandas as pd
+
+SIZE = 50_000
+WARMUP = 3
+ITERATIONS = 30
+
+idx = pd.date_range("2020-01-01", periods=SIZE, freq="1min")
+rng = np.random.default_rng(42)
+
+df = pd.DataFrame({
+ "a": np.sin(np.arange(SIZE) * 0.01) * 50 + 50,
+ "b": np.cos(np.arange(SIZE) * 0.02) * 30 + 30,
+ "c": (np.arange(SIZE) % 100) * 1.5,
+}, index=idx)
+
+for _ in range(WARMUP):
+ df.resample("1h").mean()
+ df.resample("1h").sum()
+ df.resample("1h").min()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df.resample("1h").mean()
+ df.resample("1h").sum()
+ df.resample("1h").min()
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "resample_dataframe",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_series_compare_pair.py b/benchmarks/pandas/bench_series_compare_pair.py
new file mode 100644
index 00000000..dbbb2043
--- /dev/null
+++ b/benchmarks/pandas/bench_series_compare_pair.py
@@ -0,0 +1,39 @@
+"""
+Benchmark: pandas Series-to-Series comparison operations.
+
+Mirrors tsb seriesNe(a, b), seriesGt(a, b), seriesLe(a, b), seriesEq(a, b).
+The existing compare benchmark tests scalar comparison; this tests Series-to-Series.
+Uses 100k-element Series to match the TypeScript benchmark.
+"""
+import json
+import time
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 100
+
+a = pd.Series([(i * 1.7) % 1000 for i in range(SIZE)], dtype=float)
+b = pd.Series([(i * 2.3) % 1000 for i in range(SIZE)], dtype=float)
+
+# Warm-up
+for _ in range(WARMUP):
+ a.ne(b)
+ a.gt(b)
+ a.le(b)
+ a.eq(b)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ a.ne(b)
+ a.gt(b)
+ a.le(b)
+ a.eq(b)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "series_compare_pair",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_series_dot_dataframe.py b/benchmarks/pandas/bench_series_dot_dataframe.py
new file mode 100644
index 00000000..59f85f24
--- /dev/null
+++ b/benchmarks/pandas/bench_series_dot_dataframe.py
@@ -0,0 +1,38 @@
+"""
+Benchmark: pd.Series.dot(DataFrame) and pd.DataFrame.dot(Series) — cross-form dot products.
+
+Mirrors tsb seriesDotDataFrame and dataFrameDotSeries.
+Dataset: 1000-element Series, 1000-row × 20-column DataFrame.
+Outputs JSON: {"function": "series_dot_dataframe", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+import json
+import time
+import pandas as pd
+
+N = 1_000
+K = 20
+WARMUP = 5
+ITERATIONS = 50
+
+s_data = [(i + 1) * 0.01 for i in range(N)]
+s = pd.Series(s_data)
+
+cols = {f"c{c}": [(i * K + c) * 0.001 for i in range(N)] for c in range(K)}
+df = pd.DataFrame(cols)
+
+for _ in range(WARMUP):
+ s.dot(df)
+ df.dot(s)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.dot(df)
+ df.dot(s)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "series_dot_dataframe",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_series_format_table.py b/benchmarks/pandas/bench_series_format_table.py
new file mode 100644
index 00000000..48abadd1
--- /dev/null
+++ b/benchmarks/pandas/bench_series_format_table.py
@@ -0,0 +1,42 @@
+"""
+Benchmark: pandas Series.to_markdown() and Series.to_latex() on a 500-element Series.
+
+Mirrors the tsb seriesToMarkdown and seriesToLaTeX benchmark.
+Exercises table-rendering of both numeric and string series.
+"""
+import json
+import time
+import math
+import pandas as pd
+
+N = 500
+WARMUP = 3
+ITERATIONS = 30
+
+num_data = [math.sin(i * 0.05) * 100 for i in range(N)]
+str_data = [None if i % 10 == 0 else f"item_{i}" for i in range(N)]
+
+num_series = pd.Series(num_data)
+str_series = pd.Series(str_data)
+
+# Warm-up
+for _ in range(WARMUP):
+ num_series.to_markdown()
+ num_series.to_latex()
+ str_series.to_markdown()
+ str_series.to_latex()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ num_series.to_markdown()
+ num_series.to_latex()
+ str_series.to_markdown()
+ str_series.to_latex()
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "series_format_table",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_series_items_iter.py b/benchmarks/pandas/bench_series_items_iter.py
new file mode 100644
index 00000000..bba399eb
--- /dev/null
+++ b/benchmarks/pandas/bench_series_items_iter.py
@@ -0,0 +1,28 @@
+"""Benchmark: Series.items() / Series.iteritems() — iterate over (label, value) pairs."""
+import time
+import pandas as pd
+
+SIZE = 10_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(
+ data=[i * 1.1 for i in range(SIZE)],
+ index=[f"row_{i}" for i in range(SIZE)],
+)
+
+for _ in range(WARMUP):
+ for _pair in s.items():
+ pass
+
+times = []
+for _ in range(ITERATIONS):
+ t0 = time.perf_counter()
+ for _pair in s.items():
+ pass
+ times.append(time.perf_counter() - t0)
+
+total = sum(times)
+mean_ms = (total / ITERATIONS) * 1000
+total_ms = total * 1000
+print(f'{{"function": "series_items_iter", "mean_ms": {mean_ms:.6f}, "iterations": {ITERATIONS}, "total_ms": {total_ms:.6f}}}')
diff --git a/benchmarks/pandas/bench_series_setaxis_toframe.py b/benchmarks/pandas/bench_series_setaxis_toframe.py
new file mode 100644
index 00000000..d23537b1
--- /dev/null
+++ b/benchmarks/pandas/bench_series_setaxis_toframe.py
@@ -0,0 +1,56 @@
+"""
+Benchmark: Series.to_frame() / Series.set_axis() / DataFrame.set_axis() /
+ Series.add_prefix() / Series.add_suffix()
+
+Mirrors tsb bench_series_setaxis_toframe.
+Dataset: 50 000-element numeric Series; 50 000-row × 3-column DataFrame.
+Outputs JSON: {"function": "series_setaxis_toframe", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+import json
+import time
+import pandas as pd
+import numpy as np
+
+SIZE = 50_000
+WARMUP = 5
+ITERATIONS = 50
+
+data = [i * 1.5 for i in range(SIZE)]
+idx = [f"r{i}" for i in range(SIZE)]
+new_idx = [f"row_{i}" for i in range(SIZE)]
+
+s = pd.Series(data, index=idx, name="values")
+df = pd.DataFrame(
+ {
+ "a": list(range(SIZE)),
+ "b": [i * 2 for i in range(SIZE)],
+ "c": [i * 3 for i in range(SIZE)],
+ },
+ index=idx,
+)
+new_cols = ["col_a", "col_b", "col_c"]
+
+for _ in range(WARMUP):
+ s.to_frame()
+ s.set_axis(new_idx)
+ df.set_axis(new_idx, axis=0)
+ df.set_axis(new_cols, axis=1)
+ s.add_prefix("pre_")
+ s.add_suffix("_suf")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.to_frame()
+ s.set_axis(new_idx)
+ df.set_axis(new_idx, axis=0)
+ df.set_axis(new_cols, axis=1)
+ s.add_prefix("pre_")
+ s.add_suffix("_suf")
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "series_setaxis_toframe",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_series_to_markdown.py b/benchmarks/pandas/bench_series_to_markdown.py
new file mode 100644
index 00000000..e219f33e
--- /dev/null
+++ b/benchmarks/pandas/bench_series_to_markdown.py
@@ -0,0 +1,32 @@
+"""
+Benchmark: Series.to_markdown() and Series.to_latex() on a 500-element numeric Series.
+
+Mirrors tsb seriesToMarkdown and seriesToLaTeX.
+Outputs JSON: {"function": "series_to_markdown", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+import json
+import time
+import pandas as pd
+
+SIZE = 500
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series([(i * 1.7) % 100 for i in range(SIZE)], name="values")
+
+for _ in range(WARMUP):
+ s.to_markdown()
+ s.to_latex()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.to_markdown()
+ s.to_latex()
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "series_to_markdown",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_shift_diff.py b/benchmarks/pandas/bench_shift_diff.py
new file mode 100644
index 00000000..878d05c6
--- /dev/null
+++ b/benchmarks/pandas/bench_shift_diff.py
@@ -0,0 +1,28 @@
+"""Benchmark: Series.shift and Series.diff on 100k-element Series"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = np.arange(ROWS, dtype=float) * 1.5
+s = pd.Series(data)
+
+for _ in range(WARMUP):
+ s.shift(1)
+ s.diff(1)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.shift(1)
+ s.diff(1)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "shift_diff",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_sort_ops.py b/benchmarks/pandas/bench_sort_ops.py
new file mode 100644
index 00000000..929558f3
--- /dev/null
+++ b/benchmarks/pandas/bench_sort_ops.py
@@ -0,0 +1,32 @@
+"""Benchmark: Series.sort_values and DataFrame.sort_values on 100k rows"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = np.sin(np.arange(ROWS, dtype=float)) * 1000
+s = pd.Series(data)
+df = pd.DataFrame({
+ "a": np.sin(np.arange(ROWS, dtype=float)) * 1000,
+ "b": np.cos(np.arange(ROWS, dtype=float)) * 500,
+})
+
+for _ in range(WARMUP):
+ s.sort_values()
+ df.sort_values("a")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.sort_values()
+ df.sort_values("a")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "sort_ops",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_str_findall_expand.py b/benchmarks/pandas/bench_str_findall_expand.py
new file mode 100644
index 00000000..54bf92fb
--- /dev/null
+++ b/benchmarks/pandas/bench_str_findall_expand.py
@@ -0,0 +1,36 @@
+"""
+Benchmark: pandas Series.str.extract() with named capture groups on a 5k-element Series.
+
+Mirrors the tsb strFindallExpand benchmark.
+Each string has the form "userN scoreM levelL" and the regex extracts
+named groups: word, num, score, level.
+"""
+import json
+import time
+import pandas as pd
+
+N = 5_000
+WARMUP = 3
+ITERATIONS = 20
+
+data = [None if i % 20 == 0 else f"user{i} score{(i * 7) % 100} level{(i % 5) + 1}" for i in range(N)]
+s = pd.Series(data, dtype="object")
+
+# Named capture-group pattern matching the TypeScript version
+pat = r"(?P[a-z]+)(?P\d+)\s+score(?P\d+)\s+level(?P\d+)"
+
+# Warm-up
+for _ in range(WARMUP):
+ s.str.extract(pat)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.str.extract(pat)
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "str_findall_expand",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_styler_format.py b/benchmarks/pandas/bench_styler_format.py
new file mode 100644
index 00000000..a3c98e22
--- /dev/null
+++ b/benchmarks/pandas/bench_styler_format.py
@@ -0,0 +1,59 @@
+"""Benchmark: Styler.format / apply / applymap / to_html — Styler formatting chain on 100 rows.
+
+Mirrors tsb Styler: format / formatIndex / apply / applymap / toHtml.
+"""
+import json
+import time
+import numpy as np
+import pandas as pd
+
+ROWS = 100
+WARMUP = 3
+ITERATIONS = 20
+
+
+df = pd.DataFrame(
+ {
+ "a": np.arange(ROWS) * 1.5,
+ "b": np.arange(ROWS, 0, -1) * 2.0,
+ "c": np.sin(np.arange(ROWS) / 10) * 50 + 50,
+ }
+)
+
+
+def _apply_red(vals):
+ return ["color: navy"] * len(vals)
+
+
+def _applymap_bold(v):
+ return "font-weight: bold" if isinstance(v, float) and v > 50 else ""
+
+
+def _run():
+ styler = df.style.format("{:.2f}").apply(_apply_red)
+ try:
+ # pandas 2.1+ renamed applymap → map
+ styler = styler.map(_applymap_bold)
+ except AttributeError:
+ styler = styler.applymap(_applymap_bold)
+ styler.to_html()
+
+
+for _ in range(WARMUP):
+ _run()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ _run()
+total_ms = (time.perf_counter() - start) * 1000
+
+print(
+ json.dumps(
+ {
+ "function": "styler_format",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+ }
+ )
+)
diff --git a/benchmarks/pandas/bench_styler_highlight_adv.py b/benchmarks/pandas/bench_styler_highlight_adv.py
new file mode 100644
index 00000000..d2eb702e
--- /dev/null
+++ b/benchmarks/pandas/bench_styler_highlight_adv.py
@@ -0,0 +1,54 @@
+"""Benchmark: Styler advanced — highlight_null / highlight_between / text_gradient /
+bar / set_caption / to_latex on 100 rows.
+
+Mirrors tsb Styler: highlightNull / highlightBetween / textGradient / barChart /
+setCaption / toLatex.
+"""
+import json
+import time
+import warnings
+import numpy as np
+import pandas as pd
+
+ROWS = 100
+WARMUP = 3
+ITERATIONS = 20
+
+a_data = np.arange(ROWS, dtype=float)
+b_data = np.where(np.arange(ROWS) % 10 == 0, np.nan, np.arange(ROWS) * 2.0)
+c_data = np.sin(np.arange(ROWS) / 10) * 50 + 50
+
+df = pd.DataFrame({"a": a_data, "b": b_data, "c": c_data})
+
+
+def _run():
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+ (
+ df.style.highlight_null(color="red")
+ .highlight_between(left=20, right=80, color="lightyellow")
+ .text_gradient(cmap="Blues")
+ .bar(align="mid", color="#aec6cf")
+ .set_caption("Benchmark Table")
+ .to_latex()
+ )
+
+
+for _ in range(WARMUP):
+ _run()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ _run()
+total_ms = (time.perf_counter() - start) * 1000
+
+print(
+ json.dumps(
+ {
+ "function": "styler_highlight_adv",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+ }
+ )
+)
diff --git a/benchmarks/pandas/bench_styler_table_props.py b/benchmarks/pandas/bench_styler_table_props.py
new file mode 100644
index 00000000..4d9b6e42
--- /dev/null
+++ b/benchmarks/pandas/bench_styler_table_props.py
@@ -0,0 +1,68 @@
+"""Benchmark: Styler table-level configuration — set_properties / set_table_styles /
+set_table_attributes / hide / set_precision / set_na_rep / clear / to_html.
+
+Mirrors tsb Styler: setProperties / setTableStyles / setTableAttributes /
+hide / setPrecision / setNaRep / clearStyles / toHtml.
+"""
+import json
+import time
+import warnings
+import numpy as np
+import pandas as pd
+
+ROWS = 100
+WARMUP = 3
+ITERATIONS = 20
+
+a_data = np.arange(ROWS, dtype=float) * 1.5
+b_data = np.where(np.arange(ROWS) % 10 == 0, np.nan, np.arange(ROWS) * 2.0)
+c_data = np.sin(np.arange(ROWS) / 10) * 50 + 50
+
+df = pd.DataFrame({"a": a_data, "b": b_data, "c": c_data})
+
+
+def _run():
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+ (
+ df.style.set_precision(3)
+ .set_na_rep("\u2014")
+ .set_properties(subset=["a", "b"], **{"font-size": "12px", "color": "navy"})
+ .set_table_styles(
+ [
+ {
+ "selector": "th",
+ "props": [("background-color", "#4a90d9"), ("color", "white")],
+ },
+ {
+ "selector": "tr:nth-child(even) td",
+ "props": [("background-color", "#f5f5f5")],
+ },
+ ]
+ )
+ .set_table_attributes('class="data-table" id="bench-table"')
+ .hide(axis="index")
+ .hide(subset=["c"], axis="columns")
+ .clear()
+ .to_html()
+ )
+
+
+for _ in range(WARMUP):
+ _run()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ _run()
+total_ms = (time.perf_counter() - start) * 1000
+
+print(
+ json.dumps(
+ {
+ "function": "styler_table_props",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+ }
+ )
+)
diff --git a/benchmarks/pandas/bench_to_json_denormalize.py b/benchmarks/pandas/bench_to_json_denormalize.py
new file mode 100644
index 00000000..ae51decf
--- /dev/null
+++ b/benchmarks/pandas/bench_to_json_denormalize.py
@@ -0,0 +1,41 @@
+"""Benchmark: to_json_denormalize — json orient variants on 10k-row DataFrame."""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 10_000
+WARMUP = 5
+ITERATIONS = 30
+
+# DataFrame matching the tsb benchmark (nested-structure-like columns)
+df = pd.DataFrame({
+ "name": [f"user_{i}" for i in range(ROWS)],
+ "address.city": [f"city_{i % 100}" for i in range(ROWS)],
+ "address.zip": [str(10000 + (i % 9000)) for i in range(ROWS)],
+ "score": np.arange(ROWS) * 0.01,
+})
+
+for _ in range(WARMUP):
+ # pandas equivalent of toJsonDenormalize: to_dict("records") then reconstruct nesting
+ recs = df.to_dict("records")
+ # pandas equivalent of toJsonRecords: orient="records"
+ df.to_json(orient="records")
+ # pandas equivalent of toJsonSplit: orient="split"
+ df.to_json(orient="split")
+ # pandas equivalent of toJsonIndex: orient="index"
+ df.to_json(orient="index")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ recs = df.to_dict("records")
+ df.to_json(orient="records")
+ df.to_json(orient="split")
+ df.to_json(orient="index")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "to_json_denormalize",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_to_latex.py b/benchmarks/pandas/bench_to_latex.py
new file mode 100644
index 00000000..c8289694
--- /dev/null
+++ b/benchmarks/pandas/bench_to_latex.py
@@ -0,0 +1,44 @@
+"""Benchmark: toLaTeX / seriesToLaTeX — DataFrame.to_latex() and Series.to_latex() on 500 rows.
+
+Mirrors tsb toLaTeX(df) / seriesToLaTeX(s) from src/stats/format_table.ts.
+"""
+import json
+import time
+import numpy as np
+import pandas as pd
+
+ROWS = 500
+WARMUP = 5
+ITERATIONS = 100
+
+df = pd.DataFrame(
+ {
+ "name": [f"item_{i}" for i in range(ROWS)],
+ "value": np.arange(ROWS) * 1.23,
+ "count": np.arange(ROWS, dtype=float),
+ }
+)
+s = pd.Series(np.arange(ROWS) * 0.5)
+
+for _ in range(WARMUP):
+ df.to_latex()
+ df.to_latex(index=False)
+ s.to_latex()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df.to_latex()
+ df.to_latex(index=False)
+ s.to_latex()
+total_ms = (time.perf_counter() - start) * 1000
+
+print(
+ json.dumps(
+ {
+ "function": "to_latex",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+ }
+ )
+)
diff --git a/benchmarks/pandas/bench_truncate_df.py b/benchmarks/pandas/bench_truncate_df.py
new file mode 100644
index 00000000..4f8b0c2a
--- /dev/null
+++ b/benchmarks/pandas/bench_truncate_df.py
@@ -0,0 +1,31 @@
+"""Benchmark: DataFrame.truncate — slice rows by before/after on 100k-row DataFrame"""
+import json
+import time
+import pandas as pd
+import numpy as np
+
+N = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+index = list(range(N))
+df = pd.DataFrame({
+ "a": np.arange(N, dtype=float),
+ "b": np.arange(N, dtype=float) * 2,
+ "c": np.arange(N, dtype=float) * 3,
+}, index=index)
+
+for _ in range(WARMUP):
+ df.truncate(before=10_000, after=90_000)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ df.truncate(before=10_000, after=90_000)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "truncate_df",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_value_counts_full.py b/benchmarks/pandas/bench_value_counts_full.py
new file mode 100644
index 00000000..284bb8ed
--- /dev/null
+++ b/benchmarks/pandas/bench_value_counts_full.py
@@ -0,0 +1,28 @@
+"""Benchmark: value_counts_full — value_counts(bins=N) on Series of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+rng = np.random.default_rng(42)
+s = pd.Series(rng.random(SIZE) * 100)
+
+for _ in range(WARMUP):
+ s.value_counts(bins=10)
+ s.value_counts(bins=20)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.value_counts(bins=10)
+ s.value_counts(bins=20)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "value_counts_full",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_window_extended.py b/benchmarks/pandas/bench_window_extended.py
new file mode 100644
index 00000000..ddafc28a
--- /dev/null
+++ b/benchmarks/pandas/bench_window_extended.py
@@ -0,0 +1,32 @@
+"""Benchmark: window_extended — rolling sem/skew/kurt/quantile on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 3
+ITERATIONS = 20
+WINDOW = 10
+
+s = pd.Series(np.sin(np.arange(SIZE) / 100) * 100 + np.arange(SIZE) * 0.001)
+
+for _ in range(WARMUP):
+ s.rolling(WINDOW).sem()
+ s.rolling(WINDOW).skew()
+ s.rolling(WINDOW).kurt()
+ s.rolling(WINDOW).quantile(0.5)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.rolling(WINDOW).sem()
+ s.rolling(WINDOW).skew()
+ s.rolling(WINDOW).kurt()
+ s.rolling(WINDOW).quantile(0.5)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "window_extended",
+ "mean_ms": total / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_window_indexers.py b/benchmarks/pandas/bench_window_indexers.py
new file mode 100644
index 00000000..0c3b32f1
--- /dev/null
+++ b/benchmarks/pandas/bench_window_indexers.py
@@ -0,0 +1,60 @@
+"""
+Benchmark: FixedForwardWindowIndexer and custom variable-offset BaseIndexer via rolling.
+
+Mirrors tsb FixedForwardWindowIndexer, VariableOffsetWindowIndexer, and applyIndexer.
+Uses a 50k-row Series. Each iteration:
+- Applies rolling(FixedForwardWindowIndexer(window_size=5)).sum() (forward-looking).
+- Applies rolling(custom IntegerOffsetIndexer).sum() (variable look-back, mirrors tsb).
+Outputs JSON: {"function": "window_indexers", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+import json
+import time
+import numpy as np
+import pandas as pd
+from pandas.api.indexers import BaseIndexer, FixedForwardWindowIndexer
+
+
+class IntegerOffsetIndexer(BaseIndexer):
+ """Variable look-back window where each row uses a per-row integer offset."""
+
+ def __init__(self, offsets):
+ super().__init__()
+ self._offsets = offsets
+
+ def get_window_bounds(self, num_values=0, min_periods=None, center=None, closed=None, step=1):
+ start = np.empty(num_values, dtype=np.int64)
+ end = np.empty(num_values, dtype=np.int64)
+ for i in range(num_values):
+ offset = self._offsets[i % len(self._offsets)]
+ start[i] = max(0, i - offset)
+ end[i] = i + 1
+ return start, end
+
+
+SIZE = 50_000
+WARMUP = 5
+ITERATIONS = 50
+
+values = [(i * 0.1) % 100 for i in range(SIZE)]
+s = pd.Series(values)
+
+fwd_indexer = FixedForwardWindowIndexer(window_size=5)
+offsets = [(i % 10) + 1 for i in range(SIZE)]
+var_indexer = IntegerOffsetIndexer(offsets=offsets)
+
+for _ in range(WARMUP):
+ s.rolling(fwd_indexer).sum()
+ s.rolling(var_indexer).sum()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+ s.rolling(fwd_indexer).sum()
+ s.rolling(var_indexer).sum()
+total_ms = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+ "function": "window_indexers",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+}))
diff --git a/benchmarks/pandas/bench_xs_series.py b/benchmarks/pandas/bench_xs_series.py
new file mode 100644
index 00000000..41dab0aa
--- /dev/null
+++ b/benchmarks/pandas/bench_xs_series.py
@@ -0,0 +1,55 @@
+"""
+Benchmark: Series.xs() — cross-section lookup on Series.
+
+Mirrors tsb xsSeries.
+Tests flat-index lookup (returns scalar) and MultiIndex lookup (returns sub-Series).
+Outputs JSON: {"function": "xs_series", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+"""
+
+import json
+import time
+
+import pandas as pd
+
+N = 1_000
+WARMUP = 10
+ITERATIONS = 5_000
+
+# Flat-index Series: each key appears once → xs returns a scalar.
+flat_series = pd.Series(
+ [i * 1.5 for i in range(N)],
+ index=[f"k{i}" for i in range(N)],
+ name="flat",
+)
+
+# MultiIndex Series: 10 outer keys × 100 inner keys → xs returns a sub-Series (100 rows).
+outer_keys = [f"g{i // 100}" for i in range(N)]
+inner_keys = [i % 100 for i in range(N)]
+multi_index = pd.MultiIndex.from_arrays([outer_keys, inner_keys], names=["outer", "inner"])
+multi_series = pd.Series(
+ [i * 2.0 for i in range(N)],
+ index=multi_index,
+ name="multi",
+)
+
+# Warm-up
+for i in range(WARMUP):
+ flat_series.xs(f"k{i % N}")
+ multi_series.xs(f"g{i % 10}")
+
+start = time.perf_counter()
+for i in range(ITERATIONS):
+ flat_series.xs(f"k{i % N}")
+ multi_series.xs(f"g{i % 10}")
+total_ms = (time.perf_counter() - start) * 1000
+
+print(
+ json.dumps(
+ {
+ "function": "xs_series",
+ "mean_ms": total_ms / ITERATIONS,
+ "iterations": ITERATIONS,
+ "total_ms": total_ms,
+ }
+ )
+)
diff --git a/benchmarks/tsb/bench_at_iat.ts b/benchmarks/tsb/bench_at_iat.ts
new file mode 100644
index 00000000..ed33ba07
--- /dev/null
+++ b/benchmarks/tsb/bench_at_iat.ts
@@ -0,0 +1,45 @@
+/**
+ * Benchmark: seriesAt, seriesIat, dataFrameAt, dataFrameIat — fast scalar access
+ * Outputs JSON: {"function": "at_iat", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, seriesAt, seriesIat, dataFrameAt, dataFrameIat } from "../../src/index.ts";
+
+const N = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const labels = Array.from({ length: N }, (_, i) => `r${i}`);
+const values = Array.from({ length: N }, (_, i) => i * 1.5);
+
+const s = new Series({ data: values, index: labels });
+const df = DataFrame.fromColumns(
+ { a: values, b: values.map((v) => v * 2) },
+ { index: labels },
+);
+
+const midLabel = `r${Math.floor(N / 2)}`;
+
+for (let i = 0; i < WARMUP; i++) {
+ seriesAt(s, midLabel);
+ seriesIat(s, N / 2);
+ dataFrameAt(df, midLabel, "a");
+ dataFrameIat(df, N / 2, 0);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ seriesAt(s, midLabel);
+ seriesIat(s, N / 2);
+ dataFrameAt(df, midLabel, "a");
+ dataFrameIat(df, N / 2, 0);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "at_iat",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_autocorr.ts b/benchmarks/tsb/bench_autocorr.ts
new file mode 100644
index 00000000..97c62646
--- /dev/null
+++ b/benchmarks/tsb/bench_autocorr.ts
@@ -0,0 +1,40 @@
+/**
+ * Benchmark: autoCorr — lag-N autocorrelation for a 100k-element numeric Series.
+ *
+ * Mirrors pandas Series.autocorr(lag).
+ * Benchmarks lag=1, lag=5, and lag=20.
+ *
+ * Outputs JSON: {"function": "autocorr", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, autoCorr } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+// A sinusoidal signal with some noise for a non-trivial autocorrelation.
+const data = Array.from({ length: SIZE }, (_, i) => Math.sin(i * 0.05) + (i % 7) * 0.01);
+const s = new Series({ data });
+
+for (let i = 0; i < WARMUP; i++) {
+ autoCorr(s, 1);
+ autoCorr(s, 5);
+ autoCorr(s, 20);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ autoCorr(s, 1);
+ autoCorr(s, 5);
+ autoCorr(s, 20);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "autocorr",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_convert_dtypes.ts b/benchmarks/tsb/bench_convert_dtypes.ts
new file mode 100644
index 00000000..2ba7f4d3
--- /dev/null
+++ b/benchmarks/tsb/bench_convert_dtypes.ts
@@ -0,0 +1,53 @@
+/**
+ * Benchmark: convertDtypesSeries and convertDtypesDataFrame
+ *
+ * Mirrors pandas Series.convert_dtypes() and DataFrame.convert_dtypes().
+ * Creates a 50k-row dataset with object-typed numeric, boolean, and string
+ * columns, then measures how fast tsb can infer and convert to best dtypes.
+ */
+import { Series, DataFrame, convertDtypesSeries, convertDtypesDataFrame } from "../../src/index.ts";
+import type { Scalar } from "../../src/types.ts";
+
+const N = 50_000;
+const WARMUP = 3;
+const ITERATIONS = 20;
+
+// Object-dtype series: integers stored as Scalars (no typed array)
+const intData: Scalar[] = Array.from({ length: N }, (_, i) => (i % 17 === 0 ? null : i));
+const floatData: Scalar[] = Array.from({ length: N }, (_, i) => (i % 13 === 0 ? null : i * 1.5));
+const strData: Scalar[] = Array.from({ length: N }, (_, i) => (i % 11 === 0 ? null : `str_${i}`));
+const boolData: Scalar[] = Array.from({ length: N }, (_, i) => (i % 7 === 0 ? null : i % 2 === 0));
+
+const intSeries = new Series({ data: intData });
+const floatSeries = new Series({ data: floatData });
+
+const df = DataFrame.fromColumns({
+ int_col: intData,
+ float_col: floatData,
+ str_col: strData,
+ bool_col: boolData,
+});
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ convertDtypesSeries(intSeries);
+ convertDtypesSeries(floatSeries);
+ convertDtypesDataFrame(df);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ convertDtypesSeries(intSeries);
+ convertDtypesSeries(floatSeries);
+ convertDtypesDataFrame(df);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "convert_dtypes",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_cross_join.ts b/benchmarks/tsb/bench_cross_join.ts
new file mode 100644
index 00000000..0bdf02fb
--- /dev/null
+++ b/benchmarks/tsb/bench_cross_join.ts
@@ -0,0 +1,38 @@
+/**
+ * Benchmark: crossJoin — Cartesian product of two 300-row DataFrames (90k result rows).
+ * Outputs JSON: {"function": "cross_join", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, crossJoin } from "../../src/index.ts";
+
+const N = 300;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+// Distinct column names so no suffix needed
+const left = DataFrame.fromColumns({
+ id_a: Array.from({ length: N }, (_, i) => i),
+ val_a: Array.from({ length: N }, (_, i) => i * 1.5),
+});
+const right = DataFrame.fromColumns({
+ id_b: Array.from({ length: N }, (_, i) => i),
+ val_b: Array.from({ length: N }, (_, i) => i * 2.5),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ crossJoin(left, right);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ crossJoin(left, right);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "cross_join",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_cut_bins_to_frame.ts b/benchmarks/tsb/bench_cut_bins_to_frame.ts
new file mode 100644
index 00000000..135fcd91
--- /dev/null
+++ b/benchmarks/tsb/bench_cut_bins_to_frame.ts
@@ -0,0 +1,36 @@
+/**
+ * Benchmark: cut_bins_to_frame — cutBinsToFrame / cutBinCounts / binEdges on 100k data points.
+ * Outputs JSON: {"function": "cut_bins_to_frame", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { cut, cutBinsToFrame, cutBinCounts, binEdges } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const NUM_BINS = 20;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const data = Array.from({ length: SIZE }, (_, i) => (i % 1000) * 0.1);
+const binResult = cut(data, NUM_BINS);
+
+for (let i = 0; i < WARMUP; i++) {
+ cutBinsToFrame(binResult, { data });
+ cutBinCounts(binResult);
+ binEdges(binResult);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ cutBinsToFrame(binResult, { data });
+ cutBinCounts(binResult);
+ binEdges(binResult);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "cut_bins_to_frame",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_compare_pair.ts b/benchmarks/tsb/bench_dataframe_compare_pair.ts
new file mode 100644
index 00000000..8cdbe042
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_compare_pair.ts
@@ -0,0 +1,58 @@
+/**
+ * Benchmark: DataFrame-to-DataFrame element-wise comparisons.
+ *
+ * The existing `dataframe_compare` benchmark only tests scalar comparisons (df vs 50).
+ * This benchmark tests DataFrame-to-DataFrame element-wise comparisons:
+ * dataFrameEq(df1, df2), dataFrameNe(df1, df2), dataFrameGt(df1, df2), dataFrameLe(df1, df2).
+ * Mirrors pandas df1.eq(df2), df1.ne(df2), df1.gt(df2), df1.le(df2).
+ *
+ * Outputs JSON: {"function": "dataframe_compare_pair", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import {
+ DataFrame,
+ dataFrameEq,
+ dataFrameNe,
+ dataFrameGt,
+ dataFrameLe,
+} from "../../src/index.ts";
+
+const SIZE = 50_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const df1 = DataFrame.fromColumns({
+ a: Array.from({ length: SIZE }, (_, i) => (i * 1.7) % 1000),
+ b: Array.from({ length: SIZE }, (_, i) => (i * 2.3) % 1000),
+ c: Array.from({ length: SIZE }, (_, i) => i % 100),
+});
+
+const df2 = DataFrame.fromColumns({
+ a: Array.from({ length: SIZE }, (_, i) => (i * 2.1) % 1000),
+ b: Array.from({ length: SIZE }, (_, i) => (i * 1.9) % 1000),
+ c: Array.from({ length: SIZE }, (_, i) => (i + 7) % 100),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ dataFrameEq(df1, df2);
+ dataFrameNe(df1, df2);
+ dataFrameGt(df1, df2);
+ dataFrameLe(df1, df2);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ dataFrameEq(df1, df2);
+ dataFrameNe(df1, df2);
+ dataFrameGt(df1, df2);
+ dataFrameLe(df1, df2);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "dataframe_compare_pair",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_itertuples.ts b/benchmarks/tsb/bench_dataframe_itertuples.ts
new file mode 100644
index 00000000..b1500b18
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_itertuples.ts
@@ -0,0 +1,39 @@
+/**
+ * Benchmark: DataFrame.itertuples() — iterate over rows as record objects.
+ * Outputs JSON: {"function": "dataframe_itertuples", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame } from "../../src/index.ts";
+
+const ROWS = 1_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const df = DataFrame.fromColumns({
+ x: Array.from({ length: ROWS }, (_, i) => i * 1.5),
+ y: Array.from({ length: ROWS }, (_, i) => i * 2.5),
+ z: Array.from({ length: ROWS }, (_, i) => i * 3.5),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ for (const _row of df.itertuples()) {
+ /* warm up */
+ }
+}
+
+const times: number[] = [];
+for (let i = 0; i < ITERATIONS; i++) {
+ const t0 = performance.now();
+ for (const _row of df.itertuples()) {
+ /* iterate */
+ }
+ times.push(performance.now() - t0);
+}
+const total = times.reduce((a, b) => a + b, 0);
+console.log(
+ JSON.stringify({
+ function: "dataframe_itertuples",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_transform_named.ts b/benchmarks/tsb/bench_dataframe_transform_named.ts
new file mode 100644
index 00000000..d45ab0f1
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_transform_named.ts
@@ -0,0 +1,43 @@
+/**
+ * Benchmark: dataFrameTransform with named aggregation strings.
+ *
+ * Mirrors pandas DataFrame.transform(["sum", "mean", "cumsum"]) which applies
+ * multiple aggregation functions per column. Tests the string-name form of
+ * dataFrameTransform from stats/transform_agg.ts.
+ *
+ * Outputs JSON: {"function": "dataframe_transform_named", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, dataFrameTransform } from "../../src/index.ts";
+
+const ROWS = 10_000;
+const WARMUP = 3;
+const ITERATIONS = 20;
+
+const a = Array.from({ length: ROWS }, (_, i) => (i % 100) * 1.5 + 1);
+const b = Array.from({ length: ROWS }, (_, i) => ((i * 3) % 200) * 0.5 + 2);
+const c = Array.from({ length: ROWS }, (_, i) => ((i * 7) % 50) * 2.0 + 0.5);
+const df = DataFrame.fromColumns({ a, b, c });
+
+// Warm-up: single-string transform and array-of-strings transform
+for (let i = 0; i < WARMUP; i++) {
+ dataFrameTransform(df, "mean");
+ dataFrameTransform(df, "cumsum");
+ dataFrameTransform(df, ["sum", "mean"] as const);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ dataFrameTransform(df, "mean");
+ dataFrameTransform(df, "cumsum");
+ dataFrameTransform(df, ["sum", "mean"] as const);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "dataframe_transform_named",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_dataframe_update.ts b/benchmarks/tsb/bench_dataframe_update.ts
new file mode 100644
index 00000000..eaacbe9d
--- /dev/null
+++ b/benchmarks/tsb/bench_dataframe_update.ts
@@ -0,0 +1,46 @@
+/**
+ * Benchmark: dataFrameUpdate — in-place-style DataFrame value update.
+ *
+ * Mirrors pandas `DataFrame.update()`.
+ * Overwrites non-null values from `other` into `self`.
+ * Outputs JSON: {"function": "dataframe_update", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, dataFrameUpdate } from "../../src/index.ts";
+
+const N = 10_000;
+const WARMUP = 20;
+const ITERATIONS = 200;
+
+// Build two DataFrames; `other` has null in ~2/3 of rows (so 1/3 rows are updated).
+const aData = Array.from({ length: N }, (_, i) => i * 1.0);
+const bData = Array.from({ length: N }, (_, i) => i * 2.0);
+
+const aOther = Array.from({ length: N }, (_, i) =>
+ i % 3 === 0 ? i * 10.0 : (null as unknown as number),
+);
+const bOther = Array.from({ length: N }, (_, i) =>
+ i % 3 === 0 ? i * 20.0 : (null as unknown as number),
+);
+
+const df = new DataFrame({ a: aData, b: bData });
+const other = new DataFrame({ a: aOther, b: bOther });
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ dataFrameUpdate(df, other);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ dataFrameUpdate(df, other);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "dataframe_update",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_errors.ts b/benchmarks/tsb/bench_errors.ts
new file mode 100644
index 00000000..fffbef27
--- /dev/null
+++ b/benchmarks/tsb/bench_errors.ts
@@ -0,0 +1,56 @@
+/**
+ * Benchmark: pd.errors namespace — instantiate and inspect pandas-compatible error classes.
+ *
+ * Covers the `errors` namespace from tsb:
+ * - errors.ValueError, errors.KeyError, errors.IndexError (base classes)
+ * - errors.EmptyDataError, errors.MergeError, errors.OptionError
+ * - errors.IntCastingNaNError, errors.UnsortedIndexError
+ * - errors.ParserError, errors.PerformanceWarning, errors.InvalidIndexError
+ * - instanceof checks and .name/.message property access
+ *
+ * Outputs JSON: {"function": "errors", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { errors } from "../../src/index.ts";
+
+const WARMUP = 5;
+const ITERATIONS = 200;
+
+function run(): void {
+ const e1 = new errors.ValueError("bad value");
+ const e2 = new errors.KeyError("missing key");
+ const e3 = new errors.MergeError("incompatible merge");
+ const e4 = new errors.EmptyDataError("no data");
+ const e5 = new errors.OptionError("unknown option");
+ const e6 = new errors.IntCastingNaNError();
+ const e7 = new errors.UnsortedIndexError();
+ const e8 = new errors.ParserError("unexpected token");
+ const e9 = new errors.PerformanceWarning("slow path");
+ const e10 = new errors.InvalidIndexError("bad index");
+
+ const _a = e1 instanceof errors.ValueError;
+ const _b = e2 instanceof errors.KeyError;
+ const _c = e3 instanceof Error;
+ const _d = e4.name === "EmptyDataError";
+ const _e = e5.message.includes("unknown");
+ const _f = e6 instanceof errors.IntCastingNaNError;
+ const _g = e7 instanceof errors.UnsortedIndexError;
+ const _h = e8.name === "ParserError";
+ const _i = e9.name === "PerformanceWarning";
+ const _j = e10 instanceof errors.InvalidIndexError;
+ void [_a, _b, _c, _d, _e, _f, _g, _h, _i, _j];
+}
+
+for (let i = 0; i < WARMUP; i++) run();
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) run();
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "errors",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_extensions.ts b/benchmarks/tsb/bench_extensions.ts
new file mode 100644
index 00000000..fb21dbbd
--- /dev/null
+++ b/benchmarks/tsb/bench_extensions.ts
@@ -0,0 +1,114 @@
+/**
+ * Benchmark: pd.api.extensions — ExtensionDtype / ExtensionArray / accessor registration.
+ *
+ * Covers:
+ * - ExtensionDtype subclassing → pandas `pandas.api.extensions.ExtensionDtype`
+ * - ExtensionArray subclassing → pandas `pandas.api.extensions.ExtensionArray`
+ * - registerExtensionDtype() → pandas `register_extension_dtype()`
+ * - constructExtensionDtypeFromString() → pandas dtype string resolution
+ * - registerSeriesAccessor() → pandas `register_series_accessor()`
+ * - registerDataFrameAccessor() → pandas `register_dataframe_accessor()`
+ * - getRegisteredAccessors() → accessor registry lookup
+ *
+ * Outputs JSON: {"function": "extensions", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import {
+ ExtensionDtype,
+ ExtensionArray,
+ registerExtensionDtype,
+ constructExtensionDtypeFromString,
+ registerSeriesAccessor,
+ registerDataFrameAccessor,
+ getRegisteredAccessors,
+} from "../../src/index.ts";
+
+const WARMUP = 5;
+const ITERATIONS = 200;
+
+class TagDtype extends ExtensionDtype {
+ override get name(): string {
+ return "tag";
+ }
+ override get type(): abstract new (...args: readonly unknown[]) => unknown {
+ return String as unknown as abstract new (...args: readonly unknown[]) => unknown;
+ }
+ override get kind(): string {
+ return "O";
+ }
+ override get isNumeric(): boolean {
+ return false;
+ }
+ static override construct_from_string(dtype: string): TagDtype | null {
+ return dtype === "tag" ? new TagDtype() : null;
+ }
+}
+
+class TagArray extends ExtensionArray {
+ private readonly _data: readonly string[];
+ constructor(data: readonly string[]) {
+ super();
+ this._data = data;
+ }
+ override get dtype(): TagDtype {
+ return new TagDtype();
+ }
+ override get length(): number {
+ return this._data.length;
+ }
+ override getItem(i: number): string | null {
+ const idx = i < 0 ? this._data.length + i : i;
+ return this._data[idx] ?? null;
+ }
+ override slice(start: number, stop: number): TagArray {
+ return new TagArray(this._data.slice(start, stop));
+ }
+}
+
+class GeoAccessor {
+ constructor(_obj: unknown) {}
+ distance(): number {
+ return 0;
+ }
+}
+
+// Register once — idempotent for repeated benchmark runs
+registerExtensionDtype(TagDtype as unknown as { new (): ExtensionDtype } & typeof ExtensionDtype);
+registerSeriesAccessor("geo_bench", GeoAccessor);
+registerDataFrameAccessor("geo_bench", GeoAccessor);
+
+function run(): void {
+ const dt = constructExtensionDtypeFromString("tag");
+ const _name = dt?.name;
+
+ const arr = new TagArray(["alpha", "beta", "gamma", "delta", "epsilon"]);
+ const _len = arr.length;
+ const _item = arr.getItem(2);
+ const _neg = arr.getItem(-1);
+ const _sliced = arr.slice(1, 4);
+ const _dtype = arr.dtype.name;
+ const _numeric = arr.dtype.isNumeric;
+
+ const seriesMap = getRegisteredAccessors("series");
+ const _hasSeries = seriesMap.has("geo_bench");
+ const dfMap = getRegisteredAccessors("dataframe");
+ const _hasDf = dfMap.has("geo_bench");
+ const idxMap = getRegisteredAccessors("index");
+ const _idxSize = idxMap.size;
+
+ void [_name, _len, _item, _neg, _sliced, _dtype, _numeric, _hasSeries, _hasDf, _idxSize];
+}
+
+for (let i = 0; i < WARMUP; i++) run();
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) run();
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "extensions",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_filter_series.ts b/benchmarks/tsb/bench_filter_series.ts
new file mode 100644
index 00000000..d1bdef87
--- /dev/null
+++ b/benchmarks/tsb/bench_filter_series.ts
@@ -0,0 +1,38 @@
+/**
+ * Benchmark: filterSeries — filter Series index labels by items/like/regex
+ * Outputs JSON: {"function": "filter_series", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, filterSeries } from "../../src/index.ts";
+
+const N = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+// Series with string labels: "label_0", "label_1", ..., "label_N-1"
+const labels = Array.from({ length: N }, (_, i) => `label_${i}`);
+const values = Array.from({ length: N }, (_, i) => i * 0.5);
+const s = new Series({ data: values, index: labels });
+
+// Pre-build a set of 1000 items to keep
+const keepItems = Array.from({ length: 1_000 }, (_, i) => `label_${i * 100}`);
+
+for (let i = 0; i < WARMUP; i++) {
+ filterSeries(s, { items: keepItems });
+ filterSeries(s, { like: "label_5" });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ filterSeries(s, { items: keepItems });
+ filterSeries(s, { like: "label_5" });
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "filter_series",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_get_set_option.ts b/benchmarks/tsb/bench_get_set_option.ts
new file mode 100644
index 00000000..c9c4d07a
--- /dev/null
+++ b/benchmarks/tsb/bench_get_set_option.ts
@@ -0,0 +1,40 @@
+/**
+ * Benchmark: getOption / setOption / resetOption — pandas options API.
+ *
+ * Mirrors pandas `pd.get_option`, `pd.set_option`, `pd.reset_option`.
+ * Outputs JSON: {"function": "get_set_option", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { getOption, setOption, resetOption } from "../../src/index.ts";
+
+const WARMUP = 10;
+const ITERATIONS = 10_000;
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ getOption("display.max_rows");
+ setOption("display.max_rows", 50);
+ resetOption("display.max_rows");
+ getOption("display.precision");
+ setOption("display.precision", 3);
+ resetOption("display.precision");
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ getOption("display.max_rows");
+ setOption("display.max_rows", (i % 90) + 10);
+ resetOption("display.max_rows");
+ getOption("display.precision");
+ setOption("display.precision", (i % 8) + 2);
+ resetOption("display.precision");
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "get_set_option",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_item_bool_extract.ts b/benchmarks/tsb/bench_item_bool_extract.ts
new file mode 100644
index 00000000..6b2a940e
--- /dev/null
+++ b/benchmarks/tsb/bench_item_bool_extract.ts
@@ -0,0 +1,49 @@
+/**
+ * Benchmark: itemSeries / boolSeries / boolDataFrame — single-element scalar extraction.
+ *
+ * Covers functions in scalar_extract.ts not benchmarked by bench_scalar_extract
+ * (which benchmarks squeeze, firstValidIndex, lastValidIndex but not item/bool).
+ *
+ * Mirrors pandas:
+ * - Series.item() → itemSeries
+ * - bool(pd.Series([True])) → boolSeries
+ * - bool(pd.DataFrame([[1]])) → boolDataFrame
+ *
+ * Single-element objects are created once outside the loop; the hot path is
+ * the repeated extraction call itself.
+ *
+ * Outputs JSON: {"function": "item_bool_extract", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, itemSeries, boolSeries, boolDataFrame } from "../../src/index.ts";
+
+const WARMUP = 20;
+const ITERATIONS = 100_000;
+
+// Single-element Series / DataFrames (reused each iteration).
+const numericSeries = new Series({ data: [42.5] });
+const trueSeries = new Series({ data: [true] });
+const trueDF = DataFrame.fromColumns({ x: [true] });
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ itemSeries(numericSeries);
+ boolSeries(trueSeries);
+ boolDataFrame(trueDF);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ itemSeries(numericSeries);
+ boolSeries(trueSeries);
+ boolDataFrame(trueDF);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "item_bool_extract",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_join_all.ts b/benchmarks/tsb/bench_join_all.ts
new file mode 100644
index 00000000..2dfb3358
--- /dev/null
+++ b/benchmarks/tsb/bench_join_all.ts
@@ -0,0 +1,36 @@
+/**
+ * Benchmark: joinAll — sequential left-join of 4 DataFrames each with 5k rows.
+ * Outputs JSON: {"function": "join_all", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, joinAll } from "../../src/index.ts";
+
+const N = 5_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const idx = Array.from({ length: N }, (_, i) => String(i));
+
+// Base DataFrame and three others — distinct column names, shared index
+const base = DataFrame.fromColumns({ a: Array.from({ length: N }, (_, i) => i) }, { index: idx });
+const df1 = DataFrame.fromColumns({ b: Array.from({ length: N }, (_, i) => i * 2) }, { index: idx });
+const df2 = DataFrame.fromColumns({ c: Array.from({ length: N }, (_, i) => i * 3) }, { index: idx });
+const df3 = DataFrame.fromColumns({ d: Array.from({ length: N }, (_, i) => i * 4) }, { index: idx });
+
+for (let i = 0; i < WARMUP; i++) {
+ joinAll(base, [df1, df2, df3]);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ joinAll(base, [df1, df2, df3]);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "join_all",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_math_ops.ts b/benchmarks/tsb/bench_math_ops.ts
new file mode 100644
index 00000000..5559bde5
--- /dev/null
+++ b/benchmarks/tsb/bench_math_ops.ts
@@ -0,0 +1,40 @@
+/**
+ * Benchmark: math_ops — absSeries / absDataFrame / roundSeries / roundDataFrame on 100k rows.
+ * Outputs JSON: {"function": "math_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, absSeries, absDataFrame, roundSeries, roundDataFrame } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => (i % 2 === 0 ? -(i + 0.567) : i + 0.567)) });
+const df = DataFrame.fromColumns({
+ a: Array.from({ length: SIZE }, (_, i) => -(i + 0.123)),
+ b: Array.from({ length: SIZE }, (_, i) => i + 0.456),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ absSeries(s);
+ absDataFrame(df);
+ roundSeries(s, 1);
+ roundDataFrame(df, 1);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ absSeries(s);
+ absDataFrame(df);
+ roundSeries(s, 1);
+ roundDataFrame(df, 1);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "math_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_merge_asof.ts b/benchmarks/tsb/bench_merge_asof.ts
new file mode 100644
index 00000000..9ef2a2b8
--- /dev/null
+++ b/benchmarks/tsb/bench_merge_asof.ts
@@ -0,0 +1,39 @@
+/**
+ * Benchmark: mergeAsof — backward asof join of two 10k-row sorted DataFrames.
+ * Outputs JSON: {"function": "merge_asof", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, mergeAsof } from "../../src/index.ts";
+
+const N = 10_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+// Trades sorted by time: 0, 2, 4, ...
+const tradeTimes = Array.from({ length: N }, (_, i) => i * 2);
+const prices = Array.from({ length: N }, (_, i) => 100.0 + i * 0.5);
+
+// Quotes sorted by time, sparser: 0, 3, 6, ...
+const quoteTimes = Array.from({ length: N }, (_, i) => i * 3);
+const bids = Array.from({ length: N }, (_, i) => 99.0 + i * 0.5);
+
+const trades = DataFrame.fromColumns({ time: tradeTimes, price: prices });
+const quotes = DataFrame.fromColumns({ time: quoteTimes, bid: bids });
+
+for (let i = 0; i < WARMUP; i++) {
+ mergeAsof(trades, quotes, { on: "time" });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ mergeAsof(trades, quotes, { on: "time" });
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "merge_asof",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_na_ops.ts b/benchmarks/tsb/bench_na_ops.ts
new file mode 100644
index 00000000..31990d0c
--- /dev/null
+++ b/benchmarks/tsb/bench_na_ops.ts
@@ -0,0 +1,47 @@
+/**
+ * Benchmark: na_ops — isna / notna / ffillSeries / bfillSeries on 100k rows.
+ * Outputs JSON: {"function": "na_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, isna, notna, ffillSeries, bfillSeries, dataFrameFfill, dataFrameBfill } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const data: (number | null)[] = Array.from({ length: SIZE }, (_, i) =>
+ i % 5 === 0 ? null : i,
+);
+const s = new Series({ data });
+const df = DataFrame.fromColumns({
+ a: data,
+ b: Array.from({ length: SIZE }, (_, i) => (i % 7 === 0 ? null : i * 2)),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ isna(s);
+ notna(s);
+ ffillSeries(s);
+ bfillSeries(s);
+ dataFrameFfill(df);
+ dataFrameBfill(df);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ isna(s);
+ notna(s);
+ ffillSeries(s);
+ bfillSeries(s);
+ dataFrameFfill(df);
+ dataFrameBfill(df);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "na_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_nanprod.ts b/benchmarks/tsb/bench_nanprod.ts
new file mode 100644
index 00000000..52350baa
--- /dev/null
+++ b/benchmarks/tsb/bench_nanprod.ts
@@ -0,0 +1,33 @@
+/**
+ * Benchmark: nanprod() — product of array values, ignoring NaN/null.
+ * Outputs JSON: {"function": "nanprod", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { nanprod } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const data = Array.from({ length: SIZE }, (_, i) =>
+ i % 13 === 0 ? null : 1 + (i % 7) * 0.0001,
+);
+
+for (let i = 0; i < WARMUP; i++) {
+ nanprod(data);
+}
+
+const times: number[] = [];
+for (let i = 0; i < ITERATIONS; i++) {
+ const t0 = performance.now();
+ nanprod(data);
+ times.push(performance.now() - t0);
+}
+const total = times.reduce((a, b) => a + b, 0);
+console.log(
+ JSON.stringify({
+ function: "nanprod",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_notna_boolean.ts b/benchmarks/tsb/bench_notna_boolean.ts
new file mode 100644
index 00000000..ecd113db
--- /dev/null
+++ b/benchmarks/tsb/bench_notna_boolean.ts
@@ -0,0 +1,41 @@
+/**
+ * Benchmark: notna_boolean — keepTrue / keepFalse / filterBy on 100k rows.
+ * Outputs JSON: {"function": "notna_boolean", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, keepTrue, keepFalse, filterBy } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i) });
+const mask = new Series({ data: Array.from({ length: SIZE }, (_, i) => i % 2 === 0) });
+const boolArr = Array.from({ length: SIZE }, (_, i) => i % 3 !== 0);
+
+const df = DataFrame.fromColumns({
+ a: Array.from({ length: SIZE }, (_, i) => i),
+ b: Array.from({ length: SIZE }, (_, i) => i * 2),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ keepTrue(s, mask);
+ keepFalse(s, mask);
+ filterBy(df, boolArr);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ keepTrue(s, mask);
+ keepFalse(s, mask);
+ filterBy(df, boolArr);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "notna_boolean",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_numeric_ops_log2_exp.ts b/benchmarks/tsb/bench_numeric_ops_log2_exp.ts
new file mode 100644
index 00000000..b717b219
--- /dev/null
+++ b/benchmarks/tsb/bench_numeric_ops_log2_exp.ts
@@ -0,0 +1,66 @@
+/**
+ * Benchmark: seriesLog2 / seriesLog10 / seriesExp / seriesSign and DataFrame variants.
+ *
+ * Mirrors numpy/pandas element-wise math functions on 100k-row data:
+ * - np.log2(s), np.log10(s), np.exp(s), np.sign(s)
+ * - DataFrame.apply(np.log2), etc.
+ *
+ * Outputs JSON: {"function": "numeric_ops_log2_exp", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import {
+ Series,
+ DataFrame,
+ seriesLog2,
+ seriesLog10,
+ seriesExp,
+ seriesSign,
+ dataFrameLog2,
+ dataFrameLog10,
+ dataFrameExp,
+ dataFrameSign,
+} from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 30;
+
+// Positive values for log2/log10; any values for exp/sign
+const data = Array.from({ length: SIZE }, (_, i) => (i + 1) * 0.1);
+const s = new Series({ data });
+const df = DataFrame.fromColumns({
+ a: Array.from({ length: SIZE }, (_, i) => (i + 1) * 0.1),
+ b: Array.from({ length: SIZE }, (_, i) => (i + 1) * 0.2),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ seriesLog2(s);
+ seriesLog10(s);
+ seriesExp(s);
+ seriesSign(s);
+ dataFrameLog2(df);
+ dataFrameLog10(df);
+ dataFrameExp(df);
+ dataFrameSign(df);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ seriesLog2(s);
+ seriesLog10(s);
+ seriesExp(s);
+ seriesSign(s);
+ dataFrameLog2(df);
+ dataFrameLog10(df);
+ dataFrameExp(df);
+ dataFrameSign(df);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "numeric_ops_log2_exp",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_option_context.ts b/benchmarks/tsb/bench_option_context.ts
new file mode 100644
index 00000000..f52b9729
--- /dev/null
+++ b/benchmarks/tsb/bench_option_context.ts
@@ -0,0 +1,46 @@
+/**
+ * Benchmark: describeOption / optionContext — pandas options describe and context manager.
+ *
+ * The existing bench_get_set_option covers getOption / setOption / resetOption.
+ * This benchmark covers the remaining options API:
+ * - describeOption(key?) → string — describe one or all option(s)
+ * - optionContext("key", value).enter() / .exit() — temporary option override
+ *
+ * Mirrors pandas:
+ * - pd.describe_option("display.max_rows") → describeOption
+ * - with pd.option_context(...) → optionContext + enter/exit
+ *
+ * Outputs JSON: {"function": "option_context", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { describeOption, optionContext } from "../../src/index.ts";
+
+const WARMUP = 20;
+const ITERATIONS = 50_000;
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ describeOption("display.max_rows");
+ describeOption("display.precision");
+ const ctx = optionContext("display.max_rows", 50, "display.precision", 3);
+ ctx.enter();
+ ctx.exit();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ describeOption("display.max_rows");
+ describeOption("display.precision");
+ const ctx = optionContext("display.max_rows", 50, "display.precision", 3);
+ ctx.enter();
+ ctx.exit();
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "option_context",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_pow_mod.ts b/benchmarks/tsb/bench_pow_mod.ts
new file mode 100644
index 00000000..1873099c
--- /dev/null
+++ b/benchmarks/tsb/bench_pow_mod.ts
@@ -0,0 +1,40 @@
+/**
+ * Benchmark: seriesPow, seriesMod, dataFramePow on 100k rows
+ */
+import { Series, DataFrame, seriesPow, seriesMod, dataFramePow } from "../../src/index.ts";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Array.from({ length: ROWS }, (_, i) => (i % 100) + 1);
+const s = new Series({ data });
+
+const dfData = {
+ a: Array.from({ length: ROWS }, (_, i) => (i % 100) + 1),
+ b: Array.from({ length: ROWS }, (_, i) => (i % 50) + 1),
+};
+const df = new DataFrame(dfData);
+
+for (let i = 0; i < WARMUP; i++) {
+ seriesPow(s, 2);
+ seriesMod(s, 7);
+ dataFramePow(df, 2);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ seriesPow(s, 2);
+ seriesMod(s, 7);
+ dataFramePow(df, 2);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "pow_mod",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_read_html.ts b/benchmarks/tsb/bench_read_html.ts
new file mode 100644
index 00000000..3cbc7149
--- /dev/null
+++ b/benchmarks/tsb/bench_read_html.ts
@@ -0,0 +1,43 @@
+/**
+ * Benchmark: readHtml — parse HTML tables into DataFrames.
+ * Outputs JSON: {"function": "read_html", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { readHtml } from "../../src/index.js";
+
+const ROWS = 1_000;
+const WARMUP = 3;
+const ITERATIONS = 20;
+
+// Build a realistic HTML string with a 1000-row table.
+function buildHtml(rows: number): string {
+ const header = "| id | name | value | score |
";
+ const bodyRows: string[] = [];
+ for (let i = 0; i < rows; i++) {
+ bodyRows.push(
+ `| ${i} | item_${i % 100} | ${(i * 1.5).toFixed(2)} | ${Math.sin(i * 0.01).toFixed(6)} |
`,
+ );
+ }
+ return `${header}${bodyRows.join("")}
`;
+}
+
+const html = buildHtml(ROWS);
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ readHtml(html);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ readHtml(html);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "read_html",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_reduce_ops.ts b/benchmarks/tsb/bench_reduce_ops.ts
new file mode 100644
index 00000000..f2e524f7
--- /dev/null
+++ b/benchmarks/tsb/bench_reduce_ops.ts
@@ -0,0 +1,42 @@
+/**
+ * Benchmark: reduce_ops — nuniqueSeries / anySeries / allSeries / nunique(df) on 100k rows.
+ * Outputs JSON: {"function": "reduce_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, nuniqueSeries, anySeries, allSeries, nunique } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i % 1000) });
+const boolSeries = new Series({ data: Array.from({ length: SIZE }, (_, i) => i > 0) });
+const df = DataFrame.fromColumns({
+ a: Array.from({ length: SIZE }, (_, i) => i % 500),
+ b: Array.from({ length: SIZE }, (_, i) => i % 200),
+ c: Array.from({ length: SIZE }, (_, i) => i % 100),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ nuniqueSeries(s);
+ anySeries(boolSeries);
+ allSeries(boolSeries);
+ nunique(df);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ nuniqueSeries(s);
+ anySeries(boolSeries);
+ allSeries(boolSeries);
+ nunique(df);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "reduce_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_rename_ops.ts b/benchmarks/tsb/bench_rename_ops.ts
new file mode 100644
index 00000000..9277e6e6
--- /dev/null
+++ b/benchmarks/tsb/bench_rename_ops.ts
@@ -0,0 +1,41 @@
+/**
+ * Benchmark: rename_ops — renameSeriesIndex / renameDataFrame / addPrefixDataFrame / addSuffixDataFrame on 100k rows.
+ * Outputs JSON: {"function": "rename_ops", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, renameSeriesIndex, renameDataFrame, addPrefixDataFrame, addSuffixDataFrame } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => i), index: Array.from({ length: SIZE }, (_, i) => `row_${i}`) });
+const df = DataFrame.fromColumns({
+ col_a: Array.from({ length: SIZE }, (_, i) => i),
+ col_b: Array.from({ length: SIZE }, (_, i) => i * 2),
+ col_c: Array.from({ length: SIZE }, (_, i) => i * 3),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ renameSeriesIndex(s, (lbl) => `new_${String(lbl)}`);
+ renameDataFrame(df, { columns: { col_a: "a", col_b: "b" } });
+ addPrefixDataFrame(df, "pre_");
+ addSuffixDataFrame(df, "_suf");
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ renameSeriesIndex(s, (lbl) => `new_${String(lbl)}`);
+ renameDataFrame(df, { columns: { col_a: "a", col_b: "b" } });
+ addPrefixDataFrame(df, "pre_");
+ addSuffixDataFrame(df, "_suf");
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "rename_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_resample_dataframe.ts b/benchmarks/tsb/bench_resample_dataframe.ts
new file mode 100644
index 00000000..f9e656e7
--- /dev/null
+++ b/benchmarks/tsb/bench_resample_dataframe.ts
@@ -0,0 +1,49 @@
+/**
+ * Benchmark: resampleDataFrame — DataFrame resampling with multiple aggregations.
+ *
+ * The existing `resample` benchmark only covers Series. This benchmark exercises
+ * resampleDataFrame on a multi-column datetime-indexed DataFrame, mirroring pandas
+ * df.resample("1h").mean() / .sum() / .min().
+ *
+ * Outputs JSON: {"function": "resample_dataframe", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, resampleDataFrame } from "../../src/index.ts";
+
+const SIZE = 50_000;
+const WARMUP = 3;
+const ITERATIONS = 30;
+
+const base = new Date("2020-01-01T00:00:00Z").getTime();
+const idx = Array.from({ length: SIZE }, (_, i) => new Date(base + i * 60_000));
+
+const df = DataFrame.fromColumns(
+ {
+ a: Array.from({ length: SIZE }, (_, i) => Math.sin(i * 0.01) * 50 + 50),
+ b: Array.from({ length: SIZE }, (_, i) => Math.cos(i * 0.02) * 30 + 30),
+ c: Array.from({ length: SIZE }, (_, i) => (i % 100) * 1.5),
+ },
+ { index: idx },
+);
+
+for (let i = 0; i < WARMUP; i++) {
+ resampleDataFrame(df, "H").mean();
+ resampleDataFrame(df, "H").sum();
+ resampleDataFrame(df, "H").min();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ resampleDataFrame(df, "H").mean();
+ resampleDataFrame(df, "H").sum();
+ resampleDataFrame(df, "H").min();
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "resample_dataframe",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_compare_pair.ts b/benchmarks/tsb/bench_series_compare_pair.ts
new file mode 100644
index 00000000..ddf56659
--- /dev/null
+++ b/benchmarks/tsb/bench_series_compare_pair.ts
@@ -0,0 +1,42 @@
+/**
+ * Benchmark: Series-to-Series comparison operations (seriesNe, seriesGt, seriesLe).
+ *
+ * The existing `compare` benchmark only tests scalar comparison (s.eq(500)).
+ * This benchmark tests element-wise comparison between two Series of 100k elements,
+ * mirroring pandas s1.ne(s2), s1.gt(s2), s1.le(s2).
+ *
+ * Outputs JSON: {"function": "series_compare_pair", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, seriesNe, seriesGt, seriesLe, seriesEq } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 100;
+
+const a = new Series({ data: Array.from({ length: SIZE }, (_, i) => (i * 1.7) % 1000) });
+const b = new Series({ data: Array.from({ length: SIZE }, (_, i) => (i * 2.3) % 1000) });
+
+for (let i = 0; i < WARMUP; i++) {
+ seriesNe(a, b);
+ seriesGt(a, b);
+ seriesLe(a, b);
+ seriesEq(a, b);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ seriesNe(a, b);
+ seriesGt(a, b);
+ seriesLe(a, b);
+ seriesEq(a, b);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "series_compare_pair",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_dot_dataframe.ts b/benchmarks/tsb/bench_series_dot_dataframe.ts
new file mode 100644
index 00000000..94e4f546
--- /dev/null
+++ b/benchmarks/tsb/bench_series_dot_dataframe.ts
@@ -0,0 +1,54 @@
+/**
+ * Benchmark: seriesDotDataFrame and dataFrameDotSeries — cross-form dot products.
+ *
+ * The existing bench_dot_matmul covers seriesDotSeries and dataFrameDotDataFrame.
+ * This benchmark exercises the remaining cross-form variants:
+ * - seriesDotDataFrame(s, df) → Series (Series × DataFrame matrix multiply)
+ * - dataFrameDotSeries(df, s) → Series (DataFrame × Series matrix multiply)
+ *
+ * Mirrors pandas:
+ * - pd.Series.dot(DataFrame) → pd.Series
+ * - pd.DataFrame.dot(Series) → pd.Series
+ *
+ * Dataset: 1000-element Series, 1000-row × 20-column DataFrame.
+ *
+ * Outputs JSON: {"function": "series_dot_dataframe", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, seriesDotDataFrame, dataFrameDotSeries } from "../../src/index.ts";
+
+const N = 1_000;
+const K = 20;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+// Series with N elements, indexed 0..N-1
+const sData = Array.from({ length: N }, (_, i) => (i + 1) * 0.01);
+const s = new Series({ data: sData });
+
+// DataFrame: N rows × K columns, indexed 0..N-1, columns "c0".."c19"
+const cols: Record = {};
+for (let c = 0; c < K; c++) {
+ cols[`c${c}`] = Array.from({ length: N }, (_, i) => (i * K + c) * 0.001);
+}
+const df = DataFrame.fromColumns(cols);
+
+for (let i = 0; i < WARMUP; i++) {
+ seriesDotDataFrame(s, df);
+ dataFrameDotSeries(df, s);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ seriesDotDataFrame(s, df);
+ dataFrameDotSeries(df, s);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "series_dot_dataframe",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_format_table.ts b/benchmarks/tsb/bench_series_format_table.ts
new file mode 100644
index 00000000..11683ffc
--- /dev/null
+++ b/benchmarks/tsb/bench_series_format_table.ts
@@ -0,0 +1,44 @@
+/**
+ * Benchmark: seriesToMarkdown and seriesToLaTeX on a 500-element Series.
+ *
+ * Mirrors pandas Series.to_markdown() and Series.to_latex().
+ * Exercises table-rendering of both numeric and mixed-type series.
+ */
+import { Series, seriesToMarkdown, seriesToLaTeX } from "../../src/index.ts";
+import type { Scalar } from "../../src/types.ts";
+
+const N = 500;
+const WARMUP = 3;
+const ITERATIONS = 30;
+
+const numData: number[] = Array.from({ length: N }, (_, i) => Math.sin(i * 0.05) * 100);
+const strData: Scalar[] = Array.from({ length: N }, (_, i) => (i % 10 === 0 ? null : `item_${i}`));
+
+const numSeries = new Series({ data: numData });
+const strSeries = new Series({ data: strData });
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ seriesToMarkdown(numSeries);
+ seriesToLaTeX(numSeries);
+ seriesToMarkdown(strSeries);
+ seriesToLaTeX(strSeries);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ seriesToMarkdown(numSeries);
+ seriesToLaTeX(numSeries);
+ seriesToMarkdown(strSeries);
+ seriesToLaTeX(strSeries);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "series_format_table",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_items_iter.ts b/benchmarks/tsb/bench_series_items_iter.ts
new file mode 100644
index 00000000..4750413c
--- /dev/null
+++ b/benchmarks/tsb/bench_series_items_iter.ts
@@ -0,0 +1,44 @@
+/**
+ * Benchmark: Series.items() / Series.iteritems() — iterate over (label, value) pairs.
+ * Outputs JSON: {"function": "series_items_iter", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series } from "../../src/index.ts";
+
+const SIZE = 10_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({
+ data: Array.from({ length: SIZE }, (_, i) => i * 1.1),
+ index: Array.from({ length: SIZE }, (_, i) => `row_${i}`),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ for (const _pair of s.items()) {
+ /* warm up */
+ }
+ for (const _pair of s.iteritems()) {
+ /* warm up */
+ }
+}
+
+const times: number[] = [];
+for (let i = 0; i < ITERATIONS; i++) {
+ const t0 = performance.now();
+ for (const _pair of s.items()) {
+ /* iterate */
+ }
+ for (const _pair of s.iteritems()) {
+ /* iterate */
+ }
+ times.push(performance.now() - t0);
+}
+const total = times.reduce((a, b) => a + b, 0);
+console.log(
+ JSON.stringify({
+ function: "series_items_iter",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_setaxis_toframe.ts b/benchmarks/tsb/bench_series_setaxis_toframe.ts
new file mode 100644
index 00000000..e8a24fb3
--- /dev/null
+++ b/benchmarks/tsb/bench_series_setaxis_toframe.ts
@@ -0,0 +1,75 @@
+/**
+ * Benchmark: seriesToFrame / setAxisSeries / setAxisDataFrame / addPrefixSeries / addSuffixSeries
+ *
+ * Covers rename_ops functions not benchmarked by bench_rename_ops (which only benchmarks
+ * renameSeriesIndex, renameDataFrame, addPrefixDataFrame, addSuffixDataFrame).
+ *
+ * Mirrors pandas:
+ * - Series.to_frame() → seriesToFrame
+ * - Series.set_axis(labels) → setAxisSeries
+ * - DataFrame.set_axis(labels) → setAxisDataFrame
+ * - Series.add_prefix(prefix) → addPrefixSeries
+ * - Series.add_suffix(suffix) → addSuffixSeries
+ *
+ * Dataset: 50 000-element numeric Series; 50 000-row × 3-column DataFrame.
+ *
+ * Outputs JSON: {"function": "series_setaxis_toframe", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import {
+ Series,
+ DataFrame,
+ seriesToFrame,
+ setAxisSeries,
+ setAxisDataFrame,
+ addPrefixSeries,
+ addSuffixSeries,
+} from "../../src/index.ts";
+
+const SIZE = 50_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const data = Array.from({ length: SIZE }, (_, i) => i * 1.5);
+const idx = Array.from({ length: SIZE }, (_, i) => `r${i}`);
+const newIdx = Array.from({ length: SIZE }, (_, i) => `row_${i}`);
+
+const s = new Series({ data, index: idx, name: "values" });
+const df = DataFrame.fromColumns(
+ {
+ a: Array.from({ length: SIZE }, (_, i) => i),
+ b: Array.from({ length: SIZE }, (_, i) => i * 2),
+ c: Array.from({ length: SIZE }, (_, i) => i * 3),
+ },
+ { index: idx },
+);
+const newCols = ["col_a", "col_b", "col_c"];
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ seriesToFrame(s);
+ setAxisSeries(s, newIdx);
+ setAxisDataFrame(df, newIdx, 0);
+ setAxisDataFrame(df, newCols, 1);
+ addPrefixSeries(s, "pre_");
+ addSuffixSeries(s, "_suf");
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ seriesToFrame(s);
+ setAxisSeries(s, newIdx);
+ setAxisDataFrame(df, newIdx, 0);
+ setAxisDataFrame(df, newCols, 1);
+ addPrefixSeries(s, "pre_");
+ addSuffixSeries(s, "_suf");
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "series_setaxis_toframe",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_series_to_markdown.ts b/benchmarks/tsb/bench_series_to_markdown.ts
new file mode 100644
index 00000000..bcaffa46
--- /dev/null
+++ b/benchmarks/tsb/bench_series_to_markdown.ts
@@ -0,0 +1,40 @@
+/**
+ * Benchmark: seriesToMarkdown and seriesToLaTeX on a 500-element numeric Series.
+ *
+ * The existing `to_markdown` benchmark covers DataFrames only.
+ * This benchmark exercises the Series variants: seriesToMarkdown / seriesToLaTeX.
+ * Mirrors pandas Series.to_markdown() and Series.to_latex().
+ *
+ * Outputs JSON: {"function": "series_to_markdown", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, seriesToMarkdown, seriesToLaTeX } from "../../src/index.ts";
+
+const SIZE = 500;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({
+ data: Array.from({ length: SIZE }, (_, i) => (i * 1.7) % 100),
+ name: "values",
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ seriesToMarkdown(s);
+ seriesToLaTeX(s);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ seriesToMarkdown(s);
+ seriesToLaTeX(s);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "series_to_markdown",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_shift_diff.ts b/benchmarks/tsb/bench_shift_diff.ts
new file mode 100644
index 00000000..49a8ae4a
--- /dev/null
+++ b/benchmarks/tsb/bench_shift_diff.ts
@@ -0,0 +1,32 @@
+/**
+ * Benchmark: shiftSeries and diffSeries on 100k-element Series
+ */
+import { Series, shiftSeries, diffSeries } from "../../src/index.ts";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Array.from({ length: ROWS }, (_, i) => i * 1.5);
+const s = new Series({ data });
+
+for (let i = 0; i < WARMUP; i++) {
+ shiftSeries(s, 1);
+ diffSeries(s, 1);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ shiftSeries(s, 1);
+ diffSeries(s, 1);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "shift_diff",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_sort_ops.ts b/benchmarks/tsb/bench_sort_ops.ts
new file mode 100644
index 00000000..684f1b6e
--- /dev/null
+++ b/benchmarks/tsb/bench_sort_ops.ts
@@ -0,0 +1,38 @@
+/**
+ * Benchmark: sortValuesSeries and sortValuesDataFrame on 100k rows
+ */
+import { Series, DataFrame, sortValuesSeries, sortValuesDataFrame } from "../../src/index.ts";
+
+const ROWS = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const data = Array.from({ length: ROWS }, (_, i) => Math.sin(i) * 1000);
+const s = new Series({ data });
+
+const dfData = {
+ a: Array.from({ length: ROWS }, (_, i) => Math.sin(i) * 1000),
+ b: Array.from({ length: ROWS }, (_, i) => Math.cos(i) * 500),
+};
+const df = new DataFrame(dfData);
+
+for (let i = 0; i < WARMUP; i++) {
+ sortValuesSeries(s);
+ sortValuesDataFrame(df, "a");
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ sortValuesSeries(s);
+ sortValuesDataFrame(df, "a");
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "sort_ops",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_str_findall_expand.ts b/benchmarks/tsb/bench_str_findall_expand.ts
new file mode 100644
index 00000000..4b4e5deb
--- /dev/null
+++ b/benchmarks/tsb/bench_str_findall_expand.ts
@@ -0,0 +1,42 @@
+/**
+ * Benchmark: strFindallExpand on a 5k-element string Series.
+ *
+ * Mirrors pandas Series.str.extract() with named capture groups.
+ * Each string has the form "name42 score88 level3" so the regex
+ * captures three named groups: word, number, and level.
+ */
+import { Series, strFindallExpand } from "../../src/index.ts";
+import type { Scalar } from "../../src/types.ts";
+
+const N = 5_000;
+const WARMUP = 3;
+const ITERATIONS = 20;
+
+const data: Scalar[] = Array.from(
+ { length: N },
+ (_, i) => (i % 20 === 0 ? null : `user${i} score${(i * 7) % 100} level${(i % 5) + 1}`),
+);
+const s = new Series({ data });
+
+// Named capture-group pattern: extract word, score, and level
+const pat = /(?[a-z]+)(?\d+)\s+score(?\d+)\s+level(?\d+)/;
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ strFindallExpand(s, pat);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ strFindallExpand(s, pat);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "str_findall_expand",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_styler_format.ts b/benchmarks/tsb/bench_styler_format.ts
new file mode 100644
index 00000000..294fd472
--- /dev/null
+++ b/benchmarks/tsb/bench_styler_format.ts
@@ -0,0 +1,52 @@
+/**
+ * Benchmark: Styler.format / apply / applymap / toHtml — Styler formatting chain.
+ *
+ * Covers Styler methods not included in bench_styler:
+ * - format(fn) → pandas `df.style.format(fn)`
+ * - formatIndex(fn) → pandas `df.style.format_index(fn)` (pandas 1.4+)
+ * - apply(fn) → pandas `df.style.apply(fn)`
+ * - applymap(fn) → pandas `df.style.applymap(fn)` / `map(fn)` (pandas 2.1+)
+ * - toHtml() → pandas `df.style.to_html()`
+ *
+ * Outputs JSON: {"function": "styler_format", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, dataFrameStyle } from "../../src/index.ts";
+
+const ROWS = 100;
+const WARMUP = 3;
+const ITERATIONS = 20;
+
+const df = DataFrame.fromColumns({
+ a: Float64Array.from({ length: ROWS }, (_, i) => i * 1.5),
+ b: Float64Array.from({ length: ROWS }, (_, i) => (ROWS - i) * 2.0),
+ c: Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i / 10) * 50 + 50),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ dataFrameStyle(df)
+ .format((v) => (typeof v === "number" ? v.toFixed(2) : String(v)))
+ .formatIndex((v) => `r${String(v)}`)
+ .apply((vals) => vals.map(() => "color: navy"))
+ .applymap((v) => (typeof v === "number" && (v as number) > 50 ? "font-weight: bold" : ""))
+ .toHtml();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ dataFrameStyle(df)
+ .format((v) => (typeof v === "number" ? v.toFixed(2) : String(v)))
+ .formatIndex((v) => `r${String(v)}`)
+ .apply((vals) => vals.map(() => "color: navy"))
+ .applymap((v) => (typeof v === "number" && (v as number) > 50 ? "font-weight: bold" : ""))
+ .toHtml();
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "styler_format",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_styler_highlight_adv.ts b/benchmarks/tsb/bench_styler_highlight_adv.ts
new file mode 100644
index 00000000..1e848c26
--- /dev/null
+++ b/benchmarks/tsb/bench_styler_highlight_adv.ts
@@ -0,0 +1,56 @@
+/**
+ * Benchmark: Styler advanced highlighting — highlightNull / highlightBetween /
+ * textGradient / barChart / setCaption / toLatex.
+ *
+ * Covers Styler methods not included in bench_styler:
+ * - highlightNull() → pandas `df.style.highlight_null()`
+ * - highlightBetween() → pandas `df.style.highlight_between()`
+ * - textGradient() → pandas `df.style.text_gradient()`
+ * - barChart() → pandas `df.style.bar()`
+ * - setCaption(caption) → pandas `df.style.set_caption(caption)`
+ * - toLatex() → pandas `df.style.to_latex()`
+ *
+ * Outputs JSON: {"function": "styler_highlight_adv", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, dataFrameStyle } from "../../src/index.ts";
+
+const ROWS = 100;
+const WARMUP = 3;
+const ITERATIONS = 20;
+
+const df = DataFrame.fromColumns({
+ a: Float64Array.from({ length: ROWS }, (_, i) => i * 1.0),
+ b: Array.from({ length: ROWS }, (_, i): number | null => (i % 10 === 0 ? null : i * 2.0)),
+ c: Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i / 10) * 50 + 50),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ dataFrameStyle(df)
+ .highlightNull("red")
+ .highlightBetween({ left: 20, right: 80, color: "lightyellow" })
+ .textGradient({ cmap: "Blues" })
+ .barChart({ align: "mid", color: "#aec6cf" })
+ .setCaption("Benchmark Table")
+ .toLatex();
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ dataFrameStyle(df)
+ .highlightNull("red")
+ .highlightBetween({ left: 20, right: 80, color: "lightyellow" })
+ .textGradient({ cmap: "Blues" })
+ .barChart({ align: "mid", color: "#aec6cf" })
+ .setCaption("Benchmark Table")
+ .toLatex();
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "styler_highlight_adv",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_styler_table_props.ts b/benchmarks/tsb/bench_styler_table_props.ts
new file mode 100644
index 00000000..7ade8b2b
--- /dev/null
+++ b/benchmarks/tsb/bench_styler_table_props.ts
@@ -0,0 +1,59 @@
+/**
+ * Benchmark: Styler table-level configuration — setProperties / setTableStyles /
+ * setTableAttributes / hide / setPrecision / setNaRep / clearStyles / toHtml.
+ *
+ * Covers Styler configuration methods not included in other styler benchmarks:
+ * - setPrecision(n) → pandas `df.style.set_precision(n)`
+ * - setNaRep(s) → pandas `df.style.set_na_rep(s)`
+ * - setProperties(props,subset) → pandas `df.style.set_properties(subset=…)`
+ * - setTableStyles(styles) → pandas `df.style.set_table_styles()`
+ * - setTableAttributes(attrs) → pandas `df.style.set_table_attributes()`
+ * - hide(0) → pandas `df.style.hide(axis="index")`
+ * - hide(1, subset) → pandas `df.style.hide(subset=…, axis="columns")`
+ * - clearStyles() → pandas `df.style.clear()`
+ * - toHtml() → pandas `df.style.to_html()`
+ *
+ * Outputs JSON: {"function": "styler_table_props", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, dataFrameStyle } from "../../src/index.ts";
+
+const ROWS = 100;
+const WARMUP = 3;
+const ITERATIONS = 20;
+
+const df = DataFrame.fromColumns({
+ a: Float64Array.from({ length: ROWS }, (_, i) => i * 1.5),
+ b: Array.from({ length: ROWS }, (_, i): number | null => (i % 10 === 0 ? null : i * 2.0)),
+ c: Float64Array.from({ length: ROWS }, (_, i) => Math.sin(i / 10) * 50 + 50),
+});
+
+function run(): void {
+ dataFrameStyle(df)
+ .setPrecision(3)
+ .setNaRep("—")
+ .setProperties({ "font-size": "12px", color: "navy" }, ["a", "b"])
+ .setTableStyles([
+ { selector: "th", props: { "background-color": "#4a90d9", color: "white" } },
+ { selector: "tr:nth-child(even) td", props: { "background-color": "#f5f5f5" } },
+ ])
+ .setTableAttributes('class="data-table" id="bench-table"')
+ .hide(0)
+ .hide(1, ["c"])
+ .clearStyles()
+ .toHtml();
+}
+
+for (let i = 0; i < WARMUP; i++) run();
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) run();
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "styler_table_props",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_to_json_denormalize.ts b/benchmarks/tsb/bench_to_json_denormalize.ts
new file mode 100644
index 00000000..07a42f5f
--- /dev/null
+++ b/benchmarks/tsb/bench_to_json_denormalize.ts
@@ -0,0 +1,42 @@
+/**
+ * Benchmark: to_json_denormalize — toJsonDenormalize / toJsonRecords / toJsonSplit / toJsonIndex
+ * Outputs JSON: {"function": "to_json_denormalize", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, toJsonDenormalize, toJsonRecords, toJsonSplit, toJsonIndex } from "../../src/index.ts";
+
+const ROWS = 10_000;
+const WARMUP = 5;
+const ITERATIONS = 30;
+
+// Create a nested-structure-like DataFrame (address.city, address.zip pattern)
+const df = DataFrame.fromColumns({
+ "name": Array.from({ length: ROWS }, (_, i) => `user_${i}`),
+ "address.city": Array.from({ length: ROWS }, (_, i) => `city_${i % 100}`),
+ "address.zip": Array.from({ length: ROWS }, (_, i) => `${10000 + (i % 9000)}`),
+ "score": Float64Array.from({ length: ROWS }, (_, i) => i * 0.01),
+});
+
+for (let i = 0; i < WARMUP; i++) {
+ toJsonDenormalize(df);
+ toJsonRecords(df);
+ toJsonSplit(df);
+ toJsonIndex(df);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ toJsonDenormalize(df);
+ toJsonRecords(df);
+ toJsonSplit(df);
+ toJsonIndex(df);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "to_json_denormalize",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_to_latex.ts b/benchmarks/tsb/bench_to_latex.ts
new file mode 100644
index 00000000..02c59842
--- /dev/null
+++ b/benchmarks/tsb/bench_to_latex.ts
@@ -0,0 +1,45 @@
+/**
+ * Benchmark: toLaTeX / seriesToLaTeX — render DataFrame/Series to LaTeX tabular format.
+ *
+ * Mirrors pandas:
+ * - `DataFrame.to_latex()` → tsb `toLaTeX(df)`
+ * - `Series.to_latex()` → tsb `seriesToLaTeX(s)`
+ *
+ * Outputs JSON: {"function": "to_latex", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { DataFrame, Series, toLaTeX, seriesToLaTeX } from "../../src/index.ts";
+
+const ROWS = 500;
+const WARMUP = 5;
+const ITERATIONS = 100;
+
+const df = DataFrame.fromColumns({
+ name: Array.from({ length: ROWS }, (_, i) => `item_${i}`),
+ value: Float64Array.from({ length: ROWS }, (_, i) => i * 1.23),
+ count: Float64Array.from({ length: ROWS }, (_, i) => i),
+});
+
+const s = new Series({ data: Float64Array.from({ length: ROWS }, (_, i) => i * 0.5) });
+
+for (let i = 0; i < WARMUP; i++) {
+ toLaTeX(df);
+ toLaTeX(df, { index: false, booktabs: true });
+ seriesToLaTeX(s);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ toLaTeX(df);
+ toLaTeX(df, { index: false, booktabs: true });
+ seriesToLaTeX(s);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "to_latex",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_truncate_df.ts b/benchmarks/tsb/bench_truncate_df.ts
new file mode 100644
index 00000000..f2661ce0
--- /dev/null
+++ b/benchmarks/tsb/bench_truncate_df.ts
@@ -0,0 +1,35 @@
+/**
+ * Benchmark: truncateDataFrame — slice rows by before/after labels on 100k-row DataFrame
+ * Outputs JSON: {"function": "truncate_df", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, DataFrame, truncateDataFrame } from "../../src/index.ts";
+
+const N = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 10;
+
+const index = Array.from({ length: N }, (_, i) => i);
+const a = Array.from({ length: N }, (_, i) => i * 1.0);
+const b = Array.from({ length: N }, (_, i) => i * 2.0);
+const c = Array.from({ length: N }, (_, i) => i * 3.0);
+
+const df = DataFrame.fromColumns({ a, b, c }, { index });
+
+for (let i = 0; i < WARMUP; i++) {
+ truncateDataFrame(df, 10_000, 90_000);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ truncateDataFrame(df, 10_000, 90_000);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "truncate_df",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_value_counts_full.ts b/benchmarks/tsb/bench_value_counts_full.ts
new file mode 100644
index 00000000..d55b5b72
--- /dev/null
+++ b/benchmarks/tsb/bench_value_counts_full.ts
@@ -0,0 +1,32 @@
+/**
+ * Benchmark: value_counts_full — valueCountsBinned on 100k rows.
+ * Outputs JSON: {"function": "value_counts_full", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, valueCountsBinned } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const s = new Series({ data: Array.from({ length: SIZE }, () => Math.random() * 100) });
+
+for (let i = 0; i < WARMUP; i++) {
+ valueCountsBinned(s, { bins: 10 });
+ valueCountsBinned(s, { bins: 20 });
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ valueCountsBinned(s, { bins: 10 });
+ valueCountsBinned(s, { bins: 20 });
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "value_counts_full",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_window_extended.ts b/benchmarks/tsb/bench_window_extended.ts
new file mode 100644
index 00000000..a4b933cb
--- /dev/null
+++ b/benchmarks/tsb/bench_window_extended.ts
@@ -0,0 +1,37 @@
+/**
+ * Benchmark: window_extended — rollingSem / rollingSkew / rollingKurt / rollingQuantile on 100k rows.
+ * Outputs JSON: {"function": "window_extended", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, rollingSem, rollingSkew, rollingKurt, rollingQuantile } from "../../src/index.ts";
+
+const SIZE = 100_000;
+const WARMUP = 3;
+const ITERATIONS = 20;
+const WINDOW = 10;
+
+const s = new Series({ data: Array.from({ length: SIZE }, (_, i) => Math.sin(i / 100) * 100 + i * 0.001) });
+
+for (let i = 0; i < WARMUP; i++) {
+ rollingSem(s, WINDOW);
+ rollingSkew(s, WINDOW);
+ rollingKurt(s, WINDOW);
+ rollingQuantile(s, WINDOW, 0.5);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ rollingSem(s, WINDOW);
+ rollingSkew(s, WINDOW);
+ rollingKurt(s, WINDOW);
+ rollingQuantile(s, WINDOW, 0.5);
+}
+const total = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "window_extended",
+ mean_ms: total / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total,
+ }),
+);
diff --git a/benchmarks/tsb/bench_window_indexers.ts b/benchmarks/tsb/bench_window_indexers.ts
new file mode 100644
index 00000000..1eef8d23
--- /dev/null
+++ b/benchmarks/tsb/bench_window_indexers.ts
@@ -0,0 +1,50 @@
+/**
+ * Benchmark: FixedForwardWindowIndexer, VariableOffsetWindowIndexer, applyIndexer.
+ *
+ * Mirrors pandas.api.indexers.FixedForwardWindowIndexer and
+ * pandas.api.indexers.VariableOffsetWindowIndexer.
+ *
+ * Uses a 50k-row dataset. Each iteration:
+ * - Generates bounds via FixedForwardWindowIndexer (window=5) on 50k rows.
+ * - Generates bounds via VariableOffsetWindowIndexer with random offsets.
+ * - Applies applyIndexer with FixedForwardWindowIndexer to compute rolling sum.
+ *
+ * Outputs JSON: {"function": "window_indexers", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import {
+ FixedForwardWindowIndexer,
+ VariableOffsetWindowIndexer,
+ applyIndexer,
+} from "../../src/index.ts";
+
+const SIZE = 50_000;
+const WARMUP = 5;
+const ITERATIONS = 50;
+
+const fwdIdx = new FixedForwardWindowIndexer({ windowSize: 5 });
+const offsets = Array.from({ length: SIZE }, (_, i) => (i % 10) + 1);
+const varIdx = new VariableOffsetWindowIndexer({ offsets });
+const values = Array.from({ length: SIZE }, (_, i) => (i * 0.1) % 100);
+
+for (let i = 0; i < WARMUP; i++) {
+ fwdIdx.getWindowBounds(SIZE);
+ varIdx.getWindowBounds(SIZE);
+ applyIndexer(fwdIdx, values, (nums) => nums.reduce((a, b) => a + b, 0));
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ fwdIdx.getWindowBounds(SIZE);
+ varIdx.getWindowBounds(SIZE);
+ applyIndexer(fwdIdx, values, (nums) => nums.reduce((a, b) => a + b, 0));
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "window_indexers",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms,
+ }),
+);
diff --git a/benchmarks/tsb/bench_xs_series.ts b/benchmarks/tsb/bench_xs_series.ts
new file mode 100644
index 00000000..cb630e72
--- /dev/null
+++ b/benchmarks/tsb/bench_xs_series.ts
@@ -0,0 +1,46 @@
+/**
+ * Benchmark: xsSeries — cross-section lookup on Series.
+ *
+ * Mirrors pandas `Series.xs()`.
+ * Tests flat-index lookup (returns scalar) and MultiIndex lookup (returns sub-Series).
+ * Outputs JSON: {"function": "xs_series", "mean_ms": ..., "iterations": ..., "total_ms": ...}
+ */
+import { Series, MultiIndex, xsSeries } from "../../src/index.ts";
+
+const N = 1_000;
+const WARMUP = 10;
+const ITERATIONS = 5_000;
+
+// Flat-index Series: each key appears once → xsSeries returns a scalar.
+const flatData = Array.from({ length: N }, (_, i) => i * 1.5);
+const flatIdx = Array.from({ length: N }, (_, i) => `k${i}`);
+const flatSeries = new Series({ data: flatData, index: flatIdx, name: "flat" });
+
+// MultiIndex Series: 10 outer keys × 100 inner keys → xsSeries returns a sub-Series (100 rows).
+const outerKeys = Array.from({ length: N }, (_, i) => `g${Math.floor(i / 100)}`);
+const innerKeys = Array.from({ length: N }, (_, i) => i % 100);
+const multiIdx = MultiIndex.fromArrays([outerKeys, innerKeys], { names: ["outer", "inner"] });
+const multiData = Array.from({ length: N }, (_, i) => i * 2.0);
+const multiSeries = new Series({ data: multiData, index: multiIdx, name: "multi" });
+
+// Warm-up
+for (let i = 0; i < WARMUP; i++) {
+ xsSeries(flatSeries, `k${i % N}`);
+ xsSeries(multiSeries, `g${i % 10}`);
+}
+
+const start = performance.now();
+for (let i = 0; i < ITERATIONS; i++) {
+ xsSeries(flatSeries, `k${i % N}`);
+ xsSeries(multiSeries, `g${i % 10}`);
+}
+const total_ms = performance.now() - start;
+
+console.log(
+ JSON.stringify({
+ function: "xs_series",
+ mean_ms: total_ms / ITERATIONS,
+ iterations: ITERATIONS,
+ total_ms: total_ms,
+ }),
+);