google · copybara-service · Mar 26, 2026 · Mar 26, 2026
diff --git a/README.md b/README.md
@@ -187,8 +187,9 @@ Built-in standard errors include:
     `'percentile'` (because standard error is not well defined for percentile
     bootstrapping).
 
-+   `PoissonBootstrap(unit, n_replicates, confidence)` : Computes a Poisson
-    bootstrap estimate of the standard error. It's identical to `Bootstrap`
++   `PoissonBootstrap(unit, n_replicates, confidence, ci_method)` : Computes a
+    Poisson bootstrap estimate of the standard error or percentiles. It's
+    identical to `Bootstrap`
     except that we use `Poisson(1)` instead of multinomial distribution in
     sampling. It's faster than `Bootstrap` on large data when computing in SQL.
     See the [post](https://www.unofficialgoogledatascience.com/2015/08/an-introduction-to-poisson-bootstrap26.html)

diff --git a/operations.py b/operations.py
@@ -3274,6 +3274,27 @@ class PoissonBootstrap(Bootstrap):
   instead of multinomial distribution in resampling. See
   https://www.unofficialgoogledatascience.com/2015/08/an-introduction-to-poisson-bootstrap26.html
   for an introduction.
+
+  Attributes:
+    unit: The column representing the blocks to be resampled in block Poisson
+      bootstrap. If specified we sample the unique blocks in the `unit` column,
+      otherwise we sample rows.
+    n_replicates: The number of bootstrap replicates.
+    confidence: The level of the confidence interval, must be in (0, 1). If
+      specified, we return confidence interval range instead of standard error.
+      Additionally, a display() function will be bound to the result so you can
+      visualize the confidence interval nicely in Colab and Jupyter notebook.
+      Required if ci_method is 'percentile'.
+    children: A tuple of a Metric whose result we bootstrap on.
+    enable_optimization: If all leaf Metrics are Sum and/or Count, or can be
+      expressed equivalently by Sum and/or Count, then we can preaggregate the
+      data for faster computation. See compute_slices() for more details.
+    has_been_preaggregated: If the Metric and data has already been
+      preaggregated, this will be set to True. And all other attributes
+      inherited from Operation.
+    ci_method: The method to compute confidence intervals. Can be 'std'
+      (default, uses standard error and normal approximation) or 'percentile'
+      (uses empirical percentiles from the bootstrap distribution).
   """
 
   def __init__(
@@ -3284,6 +3305,7 @@ def __init__(
       confidence: Optional[float] = None,
       enable_optimization=True,
       name_tmpl: Text = '{} Poisson Bootstrap',
+      ci_method: Literal['std', 'percentile'] = 'std',
       **kwargs,
   ):
     super(PoissonBootstrap, self).__init__(
@@ -3293,7 +3315,7 @@ def __init__(
         confidence,
         enable_optimization,
         name_tmpl,
-        ci_method='std',
+        ci_method=ci_method,
         **kwargs,
     )
 

diff --git a/operations_test.py b/operations_test.py
@@ -2053,8 +2053,11 @@ def test_integration(self):
 
 class PoissonBootstrapTests(parameterized.TestCase):
 
-  @parameterized.parameters([([],), (['grp2'],), (('grp2', 'grp3'),)])
-  def test_runs(self, split_by):
+  @parameterized.product(
+      split_by=[[], ['grp2'], ('grp2', 'grp3')],
+      ci_method=['std', 'percentile'],
+  )
+  def test_runs(self, split_by, ci_method):
     df = pd.DataFrame({
         'x': range(6),
         'grp': range(6),
@@ -2067,9 +2070,11 @@ def test_runs(self, split_by):
         metrics.Max('x'),
         metrics.Min('x'),
     ])
-    m1 = operations.PoissonBootstrap('grp', m, 5, 0.9)
-    m2 = operations.PoissonBootstrap('grp', m, 5, 0.9, False)
-    m3 = operations.PoissonBootstrap(None, m, 5, 0.9)
+    m1 = operations.PoissonBootstrap('grp', m, 5, 0.9, ci_method=ci_method)
+    m2 = operations.PoissonBootstrap(
+        'grp', m, 5, 0.9, False, ci_method=ci_method
+    )
+    m3 = operations.PoissonBootstrap(None, m, 5, 0.9, ci_method=ci_method)
     np.random.seed(0)
     res1 = m1.compute_on(df, split_by).display(return_formatted_df=True)
     np.random.seed(0)
@@ -2079,8 +2084,11 @@ def test_runs(self, split_by):
     testing.assert_frame_equal(res1, res2)
     testing.assert_frame_equal(res2, res3)
 
-  @parameterized.parameters([([],), (['grp2'],), (('grp2', 'grp3'),)])
-  def test_each_unit_has_one_row(self, split_by):
+  @parameterized.product(
+      split_by=[[], ['grp2'], ('grp2', 'grp3')],
+      ci_method=['std', 'percentile'],
+  )
+  def test_each_unit_has_one_row(self, split_by, ci_method):
     df = pd.DataFrame({
         'x': range(6),
         'grp': range(6),
@@ -2095,16 +2103,19 @@ def test_each_unit_has_one_row(self, split_by):
         metrics.Min('x'),
     ])
     m = operations.AbsoluteChange('grp4', 1, m)
-    m1 = operations.PoissonBootstrap('grp', m, 5, 0.9)
-    m2 = operations.PoissonBootstrap(None, m, 5, 0.9)
+    m1 = operations.PoissonBootstrap('grp', m, 5, 0.9, ci_method=ci_method)
+    m2 = operations.PoissonBootstrap(None, m, 5, 0.9, ci_method=ci_method)
     np.random.seed(0)
     res1 = m1.compute_on(df, split_by).display(return_formatted_df=True)
     np.random.seed(0)
     res2 = m2.compute_on(df, split_by).display(return_formatted_df=True)
     testing.assert_frame_equal(res1, res2)
 
-  @parameterized.parameters([([],), (['grp2'],), (('grp2', 'grp3'),)])
-  def test_each_unit_has_multiple_rows(self, split_by):
+  @parameterized.product(
+      split_by=[[], ['grp2'], ('grp2', 'grp3')],
+      ci_method=['std', 'percentile'],
+  )
+  def test_each_unit_has_multiple_rows(self, split_by, ci_method):
     df = pd.DataFrame({
         'x': range(6),
         'grp': [0] * 3 + [1] * 3,
@@ -2119,14 +2130,91 @@ def test_each_unit_has_multiple_rows(self, split_by):
         metrics.Min('x'),
     ])
     m = operations.AbsoluteChange('grp4', 0, m)
-    m1 = operations.PoissonBootstrap('grp', m, 5, 0.9)
-    m2 = operations.PoissonBootstrap('grp', m, 5, 0.9, False)
+    m1 = operations.PoissonBootstrap('grp', m, 5, 0.9, ci_method=ci_method)
+    m2 = operations.PoissonBootstrap(
+        'grp', m, 5, 0.9, False, ci_method=ci_method
+    )
     np.random.seed(0)
     res1 = m1.compute_on(df, split_by).display(return_formatted_df=True)
     np.random.seed(0)
     res2 = m2.compute_on(df, split_by).display(return_formatted_df=True)
     testing.assert_frame_equal(res1, res2)
 
+  @parameterized.parameters(['std', 'percentile'])
+  def test_confidence(self, ci_method):
+    np.random.seed(42)
+    n = 100
+    x = np.arange(0, 3, 0.5)
+    df = pd.DataFrame({'x': x, 'grp': ['A'] * 3 + ['B'] * 3})
+    metric = metrics.MetricList((metrics.Sum('x'), metrics.Count('x')))
+    bootstrap_no_unit = operations.PoissonBootstrap(None, metric, n)
+
+    melted = operations.PoissonBootstrap(
+        None, metric, n, 0.9, ci_method=ci_method
+    ).compute_on(df, melted=True)
+    np.random.seed(42)
+    expected = bootstrap_no_unit.compute_on(df, melted=True)
+
+    if ci_method == 'std':
+      multiplier = stats.t.ppf((1 + 0.9) / 2, n - 1)
+      lower_ci = (
+          expected['Value'] - multiplier * expected['Poisson Bootstrap SE']
+      )
+      upper_ci = (
+          expected['Value'] + multiplier * expected['Poisson Bootstrap SE']
+      )
+    else:
+      np.random.seed(42)
+      estimates = []
+      for _ in range(n):
+        weights = np.random.poisson(size=len(x))
+        sample = df.iloc[np.arange(len(x)).repeat(weights)]
+        res = metric.compute_on(sample, return_dataframe=False)
+        estimates.append(res)
+
+      # calculate percentile directly here
+      q_lower = (1 - 0.9) / 2 * 100
+      q_upper = (1 + 0.9) / 2 * 100
+      lower_ci = [
+          np.percentile([e[0] for e in estimates], q_lower),
+          np.percentile([e[1] for e in estimates], q_lower),
+      ]
+      upper_ci = [
+          np.percentile([e[0] for e in estimates], q_upper),
+          np.percentile([e[1] for e in estimates], q_upper),
+      ]
+
+    expected['Poisson Bootstrap CI-lower'] = lower_ci
+    expected['Poisson Bootstrap CI-upper'] = upper_ci
+
+    expected.drop('Poisson Bootstrap SE', axis=1, inplace=True)
+    testing.assert_frame_equal(melted, expected, rtol=0.1)
+    melted.display()  # Check display() runs.
+
+    np.random.seed(42)
+    unmelted = operations.PoissonBootstrap(
+        None, metric, n, 0.9, ci_method=ci_method
+    ).compute_on(df)
+    expected = pd.DataFrame(
+        [
+            list(melted.loc['sum(x)'].values)
+            + list(melted.loc['count(x)'].values)
+        ],
+        columns=pd.MultiIndex.from_product(
+            [
+                ['sum(x)', 'count(x)'],
+                [
+                    'Value',
+                    'Poisson Bootstrap CI-lower',
+                    'Poisson Bootstrap CI-upper',
+                ],
+            ],
+            names=['Metric', None],
+        ),
+    )
+    testing.assert_frame_equal(unmelted, expected, rtol=0.1)
+    unmelted.display()  # Check display() runs.
+
   def test_get_samples_with_unit(self):
     x = range(6)
     df = pd.DataFrame({'x': x, 'grp': ['A'] * 3 + ['B'] * 3})