From 47118da7ddbee1c4fe7b221513c56bfedee7e5c9 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Fri, 12 Apr 2024 01:27:04 +0000
Subject: [PATCH 1/8] feat: Add quantile statistic

---
 bigframes/core/block_transforms.py            | 32 +++++++++++++
 bigframes/core/blocks.py                      | 11 +++--
 bigframes/core/compile/aggregate_compiler.py  |  8 ++++
 bigframes/core/groupby/__init__.py            | 46 ++++++++++++++++---
 bigframes/dataframe.py                        | 17 +++++++
 bigframes/operations/aggregations.py          | 12 +++++
 bigframes/series.py                           | 19 ++++++--
 tests/system/small/test_dataframe.py          | 30 +++++++++++-
 tests/system/small/test_groupby.py            | 35 ++++++++++++++
 tests/system/small/test_series.py             | 21 +++++++++
 .../ibis/backends/bigquery/registry.py        |  8 ++++
 .../bigframes_vendored/pandas/core/frame.py   | 43 ++++++++++++++++-
 .../pandas/core/groupby/__init__.py           | 28 +++++++++++
 .../bigframes_vendored/pandas/core/series.py  | 31 +++++++++++++
 14 files changed, 324 insertions(+), 17 deletions(-)

diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index c789b2a69c..61ffdf9a7d 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -15,6 +15,7 @@
 
 import functools
 import typing
+from typing import Sequence
 
 import pandas as pd
 
@@ -105,6 +106,37 @@ def indicate_duplicates(
     )
 
 
+def quantile(
+    block: blocks.Block,
+    columns: Sequence[str],
+    qs: Sequence[float],
+    grouping_column_ids: Sequence[str] = (),
+) -> blocks.Block:
+    # TODO: handle windowing and more interpolation methods
+    window = core.WindowSpec(
+        grouping_keys=tuple(grouping_column_ids),
+    )
+    quantile_cols = []
+    labels = []
+    for col in columns:
+        for q in qs:
+            label = block.col_id_to_label[col]
+            new_label = (*label, q) if isinstance(label, tuple) else (label, q)
+            labels.append(new_label)
+            block, quantile_col = block.apply_window_op(
+                col,
+                agg_ops.QuantileOp(q),
+                window_spec=window,
+            )
+            quantile_cols.append(quantile_col)
+    block, results = block.aggregate(
+        grouping_column_ids,
+        tuple((col, agg_ops.AnyValueOp()) for col in quantile_cols),
+        dropna=True,
+    )
+    return block.select_columns(results).with_column_labels(labels)
+
+
 def interpolate(block: blocks.Block, method: str = "linear") -> blocks.Block:
     supported_methods = [
         "linear",
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 5b411e5416..7ae513028e 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -1498,12 +1498,15 @@ def stack(self, how="left", levels: int = 1):
 
         row_label_tuples = utils.index_as_tuples(row_labels)
 
-        if col_labels is not None:
+        if col_labels is None:
+            result_index: pd.Index = pd.Index([None])
+            result_col_labels: Sequence[Tuple] = list([()])
+        elif all(col_labels.isna()):
+            result_index = pd.Index([None])
+            result_col_labels = utils.index_as_tuples(col_labels.drop_duplicates())
+        else:
             result_index = col_labels.drop_duplicates().dropna(how="all")
             result_col_labels = utils.index_as_tuples(result_index)
-        else:
-            result_index = pd.Index([None])
-            result_col_labels = list([()])
 
         # Get matching columns
         unpivot_columns: List[Tuple[str, List[str]]] = []
diff --git a/bigframes/core/compile/aggregate_compiler.py b/bigframes/core/compile/aggregate_compiler.py
index ae21243506..98d296c779 100644
--- a/bigframes/core/compile/aggregate_compiler.py
+++ b/bigframes/core/compile/aggregate_compiler.py
@@ -148,6 +148,14 @@ def _(
     return cast(ibis_types.NumericValue, value)
 
 
+@compile_unary_agg.register
+@numeric_op
+def _(
+    op: agg_ops.QuantileOp, column: ibis_types.NumericColumn, window=None
+) -> ibis_types.NumericValue:
+    return _apply_window_if_present(column.quantile(op.q), window)
+
+
 @compile_unary_agg.register
 @numeric_op
 def _(
diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index e2b28553c6..4efbdaea7e 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -15,6 +15,7 @@
 from __future__ import annotations
 
 import typing
+from typing import Sequence, Union
 
 import bigframes_vendored.pandas.core.groupby as vendored_pandas_groupby
 import pandas as pd
@@ -115,14 +116,26 @@ def mean(self, numeric_only: bool = False, *args) -> df.DataFrame:
     def median(
         self, numeric_only: bool = False, *, exact: bool = False
     ) -> df.DataFrame:
-        if exact:
-            raise NotImplementedError(
-                f"Only approximate median is supported. {constants.FEEDBACK_LINK}"
-            )
         if not numeric_only:
             self._raise_on_non_numeric("median")
+        if exact:
+            return self.quantile(0.5)
         return self._aggregate_all(agg_ops.median_op, numeric_only=True)
 
+    def quantile(self, q=0.5) -> df.DataFrame:
+        multi_q = utils.is_list_like(q)
+        result = block_ops.quantile(
+            self._block,
+            self._selected_cols,
+            qs=tuple(q) if multi_q else (q,),
+            grouping_column_ids=self._by_col_ids,
+        )
+        result_df = df.DataFrame(result)
+        if multi_q:
+            return result_df.stack()
+        else:
+            return result_df.droplevel(-1, 1)
+
     def min(self, numeric_only: bool = False, *args) -> df.DataFrame:
         return self._aggregate_all(agg_ops.min_op, numeric_only=numeric_only)
 
@@ -466,8 +479,29 @@ def sum(self, *args) -> series.Series:
     def mean(self, *args) -> series.Series:
         return self._aggregate(agg_ops.mean_op)
 
-    def median(self, *args, **kwargs) -> series.Series:
-        return self._aggregate(agg_ops.mean_op)
+    def median(
+        self,
+        *args,
+        exact: bool = False,
+        **kwargs,
+    ) -> series.Series:
+        if exact:
+            return self.quantile(0.5)
+        else:
+            return self._aggregate(agg_ops.median_op)
+
+    def quantile(self, q: Union[float, Sequence[float]] = 0.5) -> series.Series:
+        multi_q = utils.is_list_like(q)
+        result = block_ops.quantile(
+            self._block,
+            (self._value_column,),
+            qs=tuple(q) if multi_q else (q,),  # type: ignore
+            grouping_column_ids=self._by_col_ids,
+        )
+        if multi_q:
+            return series.Series(result.stack())
+        else:
+            return series.Series(result.stack()).droplevel(-1)
 
     def std(self, *args, **kwargs) -> series.Series:
         return self._aggregate(agg_ops.std_op)
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 32f5a36f79..ad21185c5a 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -1920,6 +1920,23 @@ def median(
         block = frame._block.aggregate_all_and_stack(agg_ops.median_op)
         return bigframes.series.Series(block.select_column("values"))
 
+    def quantile(self, q):
+        multi_q = utils.is_list_like(q)
+        result = block_ops.quantile(
+            self._block, self._block.value_columns, qs=tuple(q) if multi_q else (q,)
+        )
+        if multi_q:
+            return DataFrame(result.stack()).droplevel(0)
+        else:
+            result_df = (
+                DataFrame(result)
+                .stack(list(range(0, self.columns.nlevels)))
+                .droplevel(0)
+            )
+            result = bigframes.series.Series(result_df._block)
+            result.name = q
+            return result
+
     def std(
         self, axis: typing.Union[str, int] = 0, *, numeric_only: bool = False
     ) -> bigframes.series.Series:
diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py
index 36fa787644..45782d007b 100644
--- a/bigframes/operations/aggregations.py
+++ b/bigframes/operations/aggregations.py
@@ -109,6 +109,18 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
             return input_types[0]
 
 
+@dataclasses.dataclass(frozen=True)
+class QuantileOp(UnaryAggregateOp):
+    q: float
+
+    @property
+    def name(self):
+        return f"{int(self.q*100)}%"
+
+    def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
+        return signatures.UNARY_REAL_NUMERIC.output_type(input_types[0])
+
+
 @dataclasses.dataclass(frozen=True)
 class ApproxQuartilesOp(UnaryAggregateOp):
     quartile: int
diff --git a/bigframes/series.py b/bigframes/series.py
index f11511f969..a4053cec10 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -22,7 +22,7 @@
 import os
 import textwrap
 import typing
-from typing import Any, Literal, Mapping, Optional, Sequence, Tuple, Union
+from typing import Any, cast, Literal, Mapping, Optional, Sequence, Tuple, Union
 
 import bigframes_vendored.pandas.core.series as vendored_pandas_series
 import google.cloud.bigquery as bigquery
@@ -918,10 +918,19 @@ def mean(self) -> float:
 
     def median(self, *, exact: bool = False) -> float:
         if exact:
-            raise NotImplementedError(
-                f"Only approximate median is supported. {constants.FEEDBACK_LINK}"
-            )
-        return typing.cast(float, self._apply_aggregation(agg_ops.median_op))
+            return self.quantile(0.5)
+        else:
+            return typing.cast(float, self._apply_aggregation(agg_ops.median_op))
+
+    def quantile(self, q: float):
+        qs = tuple(q) if utils.is_list_like(q) else (q,)
+        result = block_ops.quantile(self._block, (self._value_column,), qs=qs)
+        if utils.is_list_like(q):
+            result = result.stack()
+            result = result.drop_levels([result.index_columns[0]])
+            return Series(result)
+        else:
+            return cast(float, Series(result).to_pandas().squeeze())
 
     def sum(self) -> float:
         return typing.cast(float, self._apply_aggregation(agg_ops.sum_op))
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index e70764fcc0..7fef7a9dc7 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -2504,7 +2504,10 @@ def test_df_melt_default(scalars_dfs):
 
     # Pandas produces int64 index, Bigframes produces Int64 (nullable)
     pd.testing.assert_frame_equal(
-        bf_result, pd_result, check_index_type=False, check_dtype=False
+        bf_result,
+        pd_result,
+        check_index_type=False,
+        check_dtype=False,
     )
 
 
@@ -3029,6 +3032,31 @@ def test_dataframe_aggregates_median(scalars_df_index, scalars_pandas_df_index):
         )
 
 
+def test_dataframe_aggregates_quantile_mono(scalars_df_index, scalars_pandas_df_index):
+    q = 0.45
+    col_names = ["int64_too", "int64_col", "float64_col"]
+    bf_result = scalars_df_index[col_names].quantile(q=q).to_pandas()
+    pd_result = scalars_pandas_df_index[col_names].quantile(q=q)
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+
+    pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)
+
+
+def test_dataframe_aggregates_quantile_multi(scalars_df_index, scalars_pandas_df_index):
+    q = [0, 0.33, 0.67, 1.0]
+    col_names = ["int64_too", "int64_col", "float64_col"]
+    bf_result = scalars_df_index[col_names].quantile(q=q).to_pandas()
+    pd_result = scalars_pandas_df_index[col_names].quantile(q=q)
+
+    # Pandas may produce narrower numeric types, but bigframes always produces Float64
+    pd_result = pd_result.astype("Float64")
+    pd_result.index = pd_result.index.astype("Float64")
+
+    pd.testing.assert_frame_equal(bf_result, pd_result)
+
+
 @pytest.mark.parametrize(
     ("op"),
     [
diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py
index ba79ba1ab1..7b36a06f49 100644
--- a/tests/system/small/test_groupby.py
+++ b/tests/system/small/test_groupby.py
@@ -65,6 +65,24 @@ def test_dataframe_groupby_median(scalars_df_index, scalars_pandas_df_index):
     assert ((pd_min <= bf_result_computed) & (bf_result_computed <= pd_max)).all().all()
 
 
+@pytest.mark.parametrize(
+    ("q"),
+    [
+        ([0.2, 0.4, 0.6, 0.8]),
+        (0.11),
+    ],
+)
+def test_dataframe_groupby_quantile(scalars_df_index, scalars_pandas_df_index, q):
+    col_names = ["int64_too", "float64_col", "int64_col", "string_col"]
+    bf_result = (
+        scalars_df_index[col_names].groupby("string_col").quantile(q)
+    ).to_pandas()
+    pd_result = scalars_pandas_df_index[col_names].groupby("string_col").quantile(q)
+    pd.testing.assert_frame_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
 @pytest.mark.parametrize(
     ("operator"),
     [
@@ -389,3 +407,20 @@ def test_dataframe_groupby_nonnumeric_with_mean():
     pd.testing.assert_frame_equal(
         pd_result, bf_result, check_index_type=False, check_dtype=False
     )
+
+
+@pytest.mark.parametrize(
+    ("q"),
+    [
+        ([0.2, 0.4, 0.6, 0.8]),
+        (0.11),
+    ],
+)
+def test_series_groupby_quantile(scalars_df_index, scalars_pandas_df_index, q):
+    bf_result = (
+        scalars_df_index.groupby("string_col")["int64_col"].quantile(q)
+    ).to_pandas()
+    pd_result = scalars_pandas_df_index.groupby("string_col")["int64_col"].quantile(q)
+    pd.testing.assert_series_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 6e4a87df4f..b733c69fd7 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -1320,6 +1320,27 @@ def test_median(scalars_dfs):
     assert pd_min < bf_result < pd_max
 
 
+def test_median_exact(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name = "int64_col"
+    bf_result = scalars_df[col_name].median(exact=True)
+    pd_result = scalars_pandas_df[col_name].median()
+    assert math.isclose(pd_result, bf_result)
+
+
+def test_series_quantile(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    col_name = "int64_col"
+    bf_series = scalars_df[col_name]
+    pd_series = scalars_pandas_df[col_name]
+
+    pd_result = pd_series.quantile([0.0, 0.4, 0.6, 1.0])
+    bf_result = bf_series.quantile([0.0, 0.4, 0.6, 1.0])
+    pd.testing.assert_series_equal(
+        pd_result, bf_result.to_pandas(), check_dtype=False, check_index_type=False
+    )
+
+
 def test_numeric_literal(scalars_dfs):
     scalars_df, _ = scalars_dfs
     col_name = "numeric_col"
diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py b/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
index 88826b31ce..fddeab19a2 100644
--- a/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
+++ b/third_party/bigframes_vendored/ibis/backends/bigquery/registry.py
@@ -3,6 +3,7 @@
 
 import bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
 from ibis.backends.bigquery.registry import OPERATION_REGISTRY
+import ibis.expr.operations.reductions as ibis_reductions
 
 
 def _approx_quantiles(translator, op: vendored_ibis_ops.ApproximateMultiQuantile):
@@ -31,12 +32,19 @@ def _generate_array(translator, op: vendored_ibis_ops.GenerateArray):
     return f"GENERATE_ARRAY(0, {arg})"
 
 
+def _quantile(translator, op: ibis_reductions.Quantile):
+    arg = translator.translate(op.arg)
+    quantile = translator.translate(op.quantile)
+    return f"PERCENTILE_CONT({arg}, {quantile})"
+
+
 patched_ops = {
     vendored_ibis_ops.ApproximateMultiQuantile: _approx_quantiles,  # type:ignore
     vendored_ibis_ops.FirstNonNullValue: _first_non_null_value,  # type:ignore
     vendored_ibis_ops.LastNonNullValue: _last_non_null_value,  # type:ignore
     vendored_ibis_ops.ToJsonString: _to_json_string,  # type:ignore
     vendored_ibis_ops.GenerateArray: _generate_array,  # type:ignore
+    ibis_reductions.Quantile: _quantile,  # type:ignore
 }
 
 OPERATION_REGISTRY.update(patched_ops)
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 1fc80449d1..7221245cb4 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -3786,13 +3786,54 @@ def median(self, *, numeric_only: bool = False, exact: bool = False):
                 Default False. Include only float, int, boolean columns.
             exact (bool. default False):
                 Default False. Get the exact median instead of an approximate
-                one. Note: ``exact=True`` not yet supported.
+                one.
 
         Returns:
             bigframes.series.Series: Series with the median of values.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def quantile(
+        self,
+        q=0.5,
+    ):
+        """
+        Return values at the given quantile over requested axis.
+
+        **Examples:**
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> df = bpd.DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]),
+            ...                   columns=['a', 'b'])
+            >>> df.quantile(.1)
+            a    1.3
+            b    3.7
+            Name: 0.1, dtype: Float64
+            >>> df.quantile([.1, .5])
+                   a     b
+            0.1  1.3   3.7
+            0.5  2.5  55.0
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        Args:
+            q (float or array-like, default 0.5 (50% quantile)):
+                Value between 0 <= q <= 1, the quantile(s) to compute.
+            axis : {0 or 'index', 1 or 'columns'}, default 0
+                Equals 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
+            numeric_only : bool, default False
+                Include only `float`, `int` or `boolean` data.
+
+        Returns:
+            Series or DataFrame:
+                If ``q`` is an array, a DataFrame will be returned where the
+                index is ``q``, the columns are the columns of self, and the
+                values are the quantiles.
+                If ``q`` is a float, a Series will be returned where the
+                index is the columns of self and the values are the quantiles.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def var(self, axis=0, *, numeric_only: bool = False):
         """Return unbiased variance over requested axis.
 
diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
index ed4ca66f38..9b65fe475c 100644
--- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
+++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
@@ -85,6 +85,34 @@ def median(
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def quantile(self, q=0.5):
+        """
+        Return group values at the given quantile, a la numpy.percentile.
+
+        **Examples:**
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> df = bpd.DataFrame([
+            ...     ['a', 1], ['a', 2], ['a', 3],
+            ...     ['b', 1], ['b', 3], ['b', 5]
+            ... ], columns=['key', 'val'])
+            >>> df.groupby('key').quantile()
+                 val
+            key
+            a    2.0
+            b    3.0
+            <BLANKLINE>
+            [2 rows x 1 columns]
+
+        Args:
+            q (float or array-like, default 0.5 (50% quantile)):
+                Value(s) between 0 and 1 providing the quantile(s) to compute.
+
+        Returns:
+            Series or DataFrame: Return type determined by caller of GroupBy object.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def std(
         self,
         *,
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 192e19fa5a..aec20a5c08 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -2757,6 +2757,37 @@ def median(self, *, exact: bool = False):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def quantile(
+        self,
+        q=0.5,
+    ) -> float | Series:
+        """
+        Return value at the given quantile.
+
+        **Examples:**
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> s = bpd.Series([1, 2, 3, 4])
+            >>> s.quantile(.5)
+            2.5
+            >>> s.quantile([.25, .5, .75])
+            0.25    1.75
+            0.5      2.5
+            0.75    3.25
+            dtype: Float64
+
+        Args:
+            q (float or array-like, default 0.5 (50% quantile)):
+                The quantile(s) to compute, which can lie in range: 0 <= q <= 1.
+
+        Returns:
+            float or Series:
+                If ``q`` is an array, a Series will be returned where the
+                index is ``q`` and the values are the quantiles, otherwise
+                a float will be returned.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def prod(self):
         """Return the product of the values over the requested axis.
 

From 77a982db3290511ba6a9861dc07771b0346a1893 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Mon, 15 Apr 2024 18:42:36 +0000
Subject: [PATCH 2/8] fix series.quantile type annotation

---
 bigframes/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/series.py b/bigframes/series.py
index c8a8db8b1a..9d30bdbb1e 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -972,7 +972,7 @@ def median(self, *, exact: bool = False) -> float:
         else:
             return typing.cast(float, self._apply_aggregation(agg_ops.median_op))
 
-    def quantile(self, q: float):
+    def quantile(self, q: float) -> Union[Series, float]:
         qs = tuple(q) if utils.is_list_like(q) else (q,)
         result = block_ops.quantile(self._block, (self._value_column,), qs=qs)
         if utils.is_list_like(q):

From 856115e6d80a485413af025f12ef882c7fb0d425 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Mon, 15 Apr 2024 22:05:25 +0000
Subject: [PATCH 3/8] add numeric_only param

---
 bigframes/core/groupby/__init__.py            | 19 +++++++++++++++----
 bigframes/dataframe.py                        | 19 ++++++++++++++-----
 .../bigframes_vendored/pandas/core/frame.py   |  9 ++-------
 .../pandas/core/groupby/__init__.py           |  4 +++-
 .../bigframes_vendored/pandas/core/series.py  |  2 +-
 5 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index 4efbdaea7e..0f53342352 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -122,12 +122,21 @@ def median(
             return self.quantile(0.5)
         return self._aggregate_all(agg_ops.median_op, numeric_only=True)
 
-    def quantile(self, q=0.5) -> df.DataFrame:
+    def quantile(
+        self, q: Union[float, Sequence[float]] = 0.5, *, numeric_only: bool = False
+    ) -> df.DataFrame:
+        if not numeric_only:
+            self._raise_on_non_numeric("quantile")
+        q_cols = tuple(
+            col
+            for col in self._selected_cols
+            if self._column_type(col) in dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE
+        )
         multi_q = utils.is_list_like(q)
         result = block_ops.quantile(
             self._block,
-            self._selected_cols,
-            qs=tuple(q) if multi_q else (q,),
+            q_cols,
+            qs=tuple(q) if multi_q else (q,),  # type: ignore
             grouping_column_ids=self._by_col_ids,
         )
         result_df = df.DataFrame(result)
@@ -490,7 +499,9 @@ def median(
         else:
             return self._aggregate(agg_ops.median_op)
 
-    def quantile(self, q: Union[float, Sequence[float]] = 0.5) -> series.Series:
+    def quantile(
+        self, q: Union[float, Sequence[float]] = 0.5, *, numeric_only: bool = False
+    ) -> series.Series:
         multi_q = utils.is_list_like(q)
         result = block_ops.quantile(
             self._block,
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 7a51ef9874..8a338aae61 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2009,20 +2009,29 @@ def median(
             frame = self._raise_on_non_numeric("median")
         else:
             frame = self._drop_non_numeric()
-        block = frame._block.aggregate_all_and_stack(agg_ops.median_op)
-        return bigframes.series.Series(block.select_column("values"))
+        if exact:
+            return self.quantile()
+        else:
+            block = frame._block.aggregate_all_and_stack(agg_ops.median_op)
+            return bigframes.series.Series(block.select_column("values"))
 
-    def quantile(self, q):
+    def quantile(
+        self, q: Union[float, Sequence[float]] = 0.5, *, numeric_only: bool = False
+    ):
+        if not numeric_only:
+            frame = self._raise_on_non_numeric("median")
+        else:
+            frame = self._drop_non_numeric()
         multi_q = utils.is_list_like(q)
         result = block_ops.quantile(
-            self._block, self._block.value_columns, qs=tuple(q) if multi_q else (q,)
+            frame._block, frame._block.value_columns, qs=tuple(q) if multi_q else (q,)  # type: ignore
         )
         if multi_q:
             return DataFrame(result.stack()).droplevel(0)
         else:
             result_df = (
                 DataFrame(result)
-                .stack(list(range(0, self.columns.nlevels)))
+                .stack(list(range(0, frame.columns.nlevels)))
                 .droplevel(0)
             )
             result = bigframes.series.Series(result_df._block)
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 4a030d0841..0791021bc2 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -4516,10 +4516,7 @@ def median(self, *, numeric_only: bool = False, exact: bool = False):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def quantile(
-        self,
-        q=0.5,
-    ):
+    def quantile(self, q=0.5, *, numeric_only: bool = False):
         """
         Return values at the given quantile over requested axis.
 
@@ -4542,9 +4539,7 @@ def quantile(
         Args:
             q (float or array-like, default 0.5 (50% quantile)):
                 Value between 0 <= q <= 1, the quantile(s) to compute.
-            axis : {0 or 'index', 1 or 'columns'}, default 0
-                Equals 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
-            numeric_only : bool, default False
+            numeric_only (bool, default False):
                 Include only `float`, `int` or `boolean` data.
 
         Returns:
diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
index 9b65fe475c..6310d7e271 100644
--- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
+++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
@@ -85,7 +85,7 @@ def median(
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def quantile(self, q=0.5):
+    def quantile(self, q=0.5, *, numeric_only: bool = False):
         """
         Return group values at the given quantile, a la numpy.percentile.
 
@@ -107,6 +107,8 @@ def quantile(self, q=0.5):
         Args:
             q (float or array-like, default 0.5 (50% quantile)):
                 Value(s) between 0 and 1 providing the quantile(s) to compute.
+            numeric_only (bool, default False):
+                Include only `float`, `int` or `boolean` data.
 
         Returns:
             Series or DataFrame: Return type determined by caller of GroupBy object.
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 919bd5986d..30341ed680 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -3154,7 +3154,7 @@ def median(self, *, exact: bool = False):
     def quantile(
         self,
         q=0.5,
-    ) -> float | Series:
+    ):
         """
         Return value at the given quantile.
 

From 9983ff0e93c4c8582da13e9118fea1e79546bf22 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Mon, 15 Apr 2024 23:16:00 +0000
Subject: [PATCH 4/8] add limit and fix types

---
 bigframes/constants.py             | 3 +++
 bigframes/core/block_transforms.py | 2 ++
 bigframes/dataframe.py             | 6 +++---
 bigframes/series.py                | 2 +-
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/bigframes/constants.py b/bigframes/constants.py
index 0751501085..c6d8f3acc2 100644
--- a/bigframes/constants.py
+++ b/bigframes/constants.py
@@ -92,3 +92,6 @@
 LEP_ENABLED_BIGQUERY_LOCATIONS = frozenset(
     ALL_BIGQUERY_LOCATIONS - REP_ENABLED_BIGQUERY_LOCATIONS
 )
+
+# BigQuery default is 10000, leave 100 for overhead
+MAX_COLUMNS = 9900
diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index 61ffdf9a7d..1eae73014c 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -118,6 +118,8 @@ def quantile(
     )
     quantile_cols = []
     labels = []
+    if len(columns) * len(qs) > constants.MAX_COLUMNS:
+        raise NotImplementedError("Too many aggregates requested.")
     for col in columns:
         for q in qs:
             label = block.col_id_to_label[col]
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 8a338aae61..953a89c34f 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2034,9 +2034,9 @@ def quantile(
                 .stack(list(range(0, frame.columns.nlevels)))
                 .droplevel(0)
             )
-            result = bigframes.series.Series(result_df._block)
-            result.name = q
-            return result
+            result_series = bigframes.series.Series(result_df._block)
+            result_series.name = q
+            return result_series
 
     def std(
         self, axis: typing.Union[str, int] = 0, *, numeric_only: bool = False
diff --git a/bigframes/series.py b/bigframes/series.py
index 9d30bdbb1e..febfdc3c6a 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -972,7 +972,7 @@ def median(self, *, exact: bool = False) -> float:
         else:
             return typing.cast(float, self._apply_aggregation(agg_ops.median_op))
 
-    def quantile(self, q: float) -> Union[Series, float]:
+    def quantile(self, q: Union[float, Sequence[float]]) -> Union[Series, float]:
         qs = tuple(q) if utils.is_list_like(q) else (q,)
         result = block_ops.quantile(self._block, (self._value_column,), qs=qs)
         if utils.is_list_like(q):

From 04dcb9877cf0281b4056fba9fad41874fea78e8f Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Tue, 16 Apr 2024 16:21:47 +0000
Subject: [PATCH 5/8] fix mypy

---
 bigframes/series.py                               |  2 +-
 .../bigframes_vendored/pandas/core/frame.py       |  4 +++-
 .../bigframes_vendored/pandas/core/series.py      | 15 ++++++++++++---
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/bigframes/series.py b/bigframes/series.py
index febfdc3c6a..0a8e398e08 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -972,7 +972,7 @@ def median(self, *, exact: bool = False) -> float:
         else:
             return typing.cast(float, self._apply_aggregation(agg_ops.median_op))
 
-    def quantile(self, q: Union[float, Sequence[float]]) -> Union[Series, float]:
+    def quantile(self, q: Union[float, Sequence[float]] = 0.5) -> Union[Series, float]:
         qs = tuple(q) if utils.is_list_like(q) else (q,)
         result = block_ops.quantile(self._block, (self._value_column,), qs=qs)
         if utils.is_list_like(q):
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 0791021bc2..e894900646 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -4516,7 +4516,9 @@ def median(self, *, numeric_only: bool = False, exact: bool = False):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def quantile(self, q=0.5, *, numeric_only: bool = False):
+    def quantile(
+        self, q: Union[float, Sequence[float]] = 0.5, *, numeric_only: bool = False
+    ):
         """
         Return values at the given quantile over requested axis.
 
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 30341ed680..5e3b4c46ef 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -3,7 +3,16 @@
 """
 from __future__ import annotations
 
-from typing import Hashable, IO, Literal, Mapping, Optional, Sequence, TYPE_CHECKING
+from typing import (
+    Hashable,
+    IO,
+    Literal,
+    Mapping,
+    Optional,
+    Sequence,
+    TYPE_CHECKING,
+    Union,
+)
 
 from bigframes_vendored.pandas.core.generic import NDFrame
 import numpy
@@ -3153,8 +3162,8 @@ def median(self, *, exact: bool = False):
 
     def quantile(
         self,
-        q=0.5,
-    ):
+        q: Union[float, Sequence[float]] = 0.5,
+    ) -> Union[Series, float]:
         """
         Return value at the given quantile.
 

From d0b3fc38770865e10306e81f8d723ca487413780 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Tue, 16 Apr 2024 17:34:51 +0000
Subject: [PATCH 6/8] actually fix mypy

---
 bigframes/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/series.py b/bigframes/series.py
index 0a8e398e08..b834411bce 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -968,7 +968,7 @@ def mean(self) -> float:
 
     def median(self, *, exact: bool = False) -> float:
         if exact:
-            return self.quantile(0.5)
+            return typing.cast(float, self.quantile(0.5))
         else:
             return typing.cast(float, self._apply_aggregation(agg_ops.median_op))
 

From b76ad9bd75efbf072eb30b65c77016339eefc753 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Tue, 16 Apr 2024 20:10:39 +0000
Subject: [PATCH 7/8] fix issue with multiindex isna not impl

---
 bigframes/core/blocks.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 7ae513028e..f6850020df 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -1501,7 +1501,9 @@ def stack(self, how="left", levels: int = 1):
         if col_labels is None:
             result_index: pd.Index = pd.Index([None])
             result_col_labels: Sequence[Tuple] = list([()])
-        elif all(col_labels.isna()):
+        elif (col_labels.nlevels == 1) and all(
+            col_labels.isna()
+        ):  # isna not implemented for MultiIndex for newer pandas versions
             result_index = pd.Index([None])
             result_col_labels = utils.index_as_tuples(col_labels.drop_duplicates())
         else:

From 677aaf2afcdf6277613ac446e1d9c0d780ec9e81 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Tue, 16 Apr 2024 22:23:30 +0000
Subject: [PATCH 8/8] fix plot accessor doctests printing progress bar

---
 third_party/bigframes_vendored/pandas/plotting/_core.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/third_party/bigframes_vendored/pandas/plotting/_core.py b/third_party/bigframes_vendored/pandas/plotting/_core.py
index 19f56965df..bf016357a6 100644
--- a/third_party/bigframes_vendored/pandas/plotting/_core.py
+++ b/third_party/bigframes_vendored/pandas/plotting/_core.py
@@ -11,6 +11,7 @@ class PlotAccessor:
     For Series:
 
         >>> import bigframes.pandas as bpd
+        >>> bpd.options.display.progress_bar = None
         >>> ser = bpd.Series([1, 2, 3, 3])
         >>> plot = ser.plot(kind='hist', title="My plot")
 
@@ -57,6 +58,7 @@ def hist(
 
             >>> import bigframes.pandas as bpd
             >>> import numpy as np
+            >>> bpd.options.display.progress_bar = None
             >>> df = bpd.DataFrame(np.random.randint(1, 7, 6000), columns=['one'])
             >>> df['two'] = np.random.randint(1, 7, 6000) + np.random.randint(1, 7, 6000)
             >>> ax = df.plot.hist(bins=12, alpha=0.5)
@@ -93,6 +95,7 @@ def line(
         **Examples:**
 
             >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
             >>> df = bpd.DataFrame(
             ...     {
             ...         'one': [1, 2, 3, 4],
@@ -160,6 +163,7 @@ def area(
         Draw an area plot based on basic business metrics:
 
             >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
             >>> df = bpd.DataFrame(
             ...     {
             ...         'sales': [3, 2, 3, 9, 10, 6],