Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 37 additions & 45 deletions bigframes/core/compile/aggregate_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,69 +360,61 @@ def _(
if isinstance(op.bins, int):
col_min = _apply_window_if_present(x.min(), window)
col_max = _apply_window_if_present(x.max(), window)
adj = (col_max - col_min) * 0.001
bin_width = (col_max - col_min) / op.bins

if op.labels is False:
for this_bin in range(op.bins - 1):
if op.right:
case_expr = x <= (col_min + (this_bin + 1) * bin_width)
else:
case_expr = x < (col_min + (this_bin + 1) * bin_width)
out = out.when(
case_expr,
compile_ibis_types.literal_to_ibis_scalar(
this_bin, force_dtype=pd.Int64Dtype()
),
for this_bin in range(op.bins):
if op.labels is False:
value = compile_ibis_types.literal_to_ibis_scalar(
this_bin, force_dtype=pd.Int64Dtype()
)
out = out.when(x.notnull(), op.bins - 1)
else:
interval_struct = None
adj = (col_max - col_min) * 0.001
for this_bin in range(op.bins):
left_edge_adj = adj if this_bin == 0 and op.right else 0
right_edge_adj = adj if this_bin == op.bins - 1 and not op.right else 0
else:
left_adj = adj if this_bin == 0 and op.right else 0
right_adj = adj if this_bin == op.bins - 1 and not op.right else 0

left_edge = col_min + this_bin * bin_width - left_edge_adj
right_edge = col_min + (this_bin + 1) * bin_width + right_edge_adj
left = col_min + this_bin * bin_width - left_adj
right = col_min + (this_bin + 1) * bin_width + right_adj

if op.right:
interval_struct = ibis_types.struct(
{
"left_exclusive": left_edge,
"right_inclusive": right_edge,
}
value = ibis_types.struct(
{"left_exclusive": left, "right_inclusive": right}
)
else:
interval_struct = ibis_types.struct(
{
"left_inclusive": left_edge,
"right_exclusive": right_edge,
}
value = ibis_types.struct(
{"left_inclusive": left, "right_exclusive": right}
)

if this_bin < op.bins - 1:
if op.right:
case_expr = x <= (col_min + (this_bin + 1) * bin_width)
else:
case_expr = x < (col_min + (this_bin + 1) * bin_width)
out = out.when(case_expr, interval_struct)
if this_bin == op.bins - 1:
case_expr = x.notnull()
else:
if op.right:
case_expr = x <= (col_min + (this_bin + 1) * bin_width)
else:
out = out.when(x.notnull(), interval_struct)
case_expr = x < (col_min + (this_bin + 1) * bin_width)
out = out.when(case_expr, value)
else: # Interpret as intervals
for interval in op.bins:
for this_bin, interval in enumerate(op.bins):
left = compile_ibis_types.literal_to_ibis_scalar(interval[0])
right = compile_ibis_types.literal_to_ibis_scalar(interval[1])
if op.right:
condition = (x > left) & (x <= right)
interval_struct = ibis_types.struct(
{"left_exclusive": left, "right_inclusive": right}
)
else:
condition = (x >= left) & (x < right)
interval_struct = ibis_types.struct(
{"left_inclusive": left, "right_exclusive": right}

if op.labels is False:
value = compile_ibis_types.literal_to_ibis_scalar(
this_bin, force_dtype=pd.Int64Dtype()
)
out = out.when(condition, interval_struct)
else:
if op.right:
value = ibis_types.struct(
{"left_exclusive": left, "right_inclusive": right}
)
else:
value = ibis_types.struct(
{"left_inclusive": left, "right_exclusive": right}
)

out = out.when(condition, value)
return out.end()


Expand Down
15 changes: 13 additions & 2 deletions bigframes/core/reshape/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ def cut(
"The 'labels' parameter must be either False or None. "
"Please provide a valid value for 'labels'."
)
if x.size == 0:
raise ValueError("Cannot cut empty array.")

if isinstance(bins, int):
if bins <= 0:
Expand All @@ -58,14 +60,19 @@ def cut(
bins = tuple((bin.left.item(), bin.right.item()) for bin in bins)
# To maintain consistency with pandas' behavior
right = True
labels = None
elif len(list(bins)) == 0:
as_index = pd.IntervalIndex.from_tuples(list(bins))
bins = tuple()
# To maintain consistency with pandas' behavior
right = True
labels = None
elif isinstance(list(bins)[0], tuple):
as_index = pd.IntervalIndex.from_tuples(list(bins))
bins = tuple(bins)
# To maintain consistency with pandas' behavior
right = True
labels = None
elif pd.api.types.is_number(list(bins)[0]):
bins_list = list(bins)
as_index = pd.IntervalIndex.from_breaks(bins_list)
Expand All @@ -83,9 +90,13 @@ def cut(
if as_index.is_overlapping:
raise ValueError("Overlapping IntervalIndex is not accepted.")
elif len(as_index) == 0:
op = agg_ops.CutOp(bins, right=right, labels=labels)
dtype = agg_ops.CutOp(bins, right=right, labels=labels).output_type()
return bigframes.series.Series(
[pd.NA] * len(x), dtype=op.output_type(), name=x.name
[pd.NA] * len(x),
dtype=dtype,
name=x.name,
index=x.index,
session=x._session,
)
else:
op = agg_ops.CutOp(bins, right=right, labels=labels)
Expand Down
2 changes: 1 addition & 1 deletion bigframes/operations/aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def skips_nulls(self):
return False

def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
if isinstance(self.bins, int) and (self.labels is False):
if self.labels is False:
return dtypes.INT_DTYPE
else:
# Assumption: buckets use same numeric type
Expand Down
Loading