Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions bigframes/core/compile/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,7 @@ def compile_read_table_unordered(
return compiled.UnorderedIR(
ibis_table,
tuple(
bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(
ibis_table[scan_item.source_id].name(scan_item.id.sql)
)
ibis_table[scan_item.source_id].name(scan_item.id.sql)
for scan_item in scan.items
),
)
Expand Down
31 changes: 10 additions & 21 deletions bigframes/core/compile/ibis_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,9 @@ def cast_ibis_value(

Raises:
TypeError: if the type cast cannot be executed"""
if value.type() == to_type:
# normalize to nullable, which doesn't impact compatibility
value_type = value.type().copy(nullable=True)
if value_type == to_type:
return value
# casts that just work
# TODO(bmil): add to this as more casts are verified
Expand Down Expand Up @@ -189,52 +191,39 @@ def cast_ibis_value(
ibis_dtypes.multipolygon: (IBIS_GEO_TYPE,),
}

value = ibis_value_to_canonical_type(value)
if value.type() in good_casts:
if to_type in good_casts[value.type()]:
if value_type in good_casts:
if to_type in good_casts[value_type]:
return value.try_cast(to_type) if safe else value.cast(to_type)
else:
# this should never happen
raise TypeError(
f"Unexpected value type {value.type()}. {constants.FEEDBACK_LINK}"
f"Unexpected value type {value_type}. {constants.FEEDBACK_LINK}"
)

# casts that need some encouragement

# BigQuery casts bools to lower case strings. Capitalize the result to match Pandas
# TODO(bmil): remove this workaround after fixing Ibis
if value.type() == ibis_dtypes.bool and to_type == ibis_dtypes.string:
if value_type == ibis_dtypes.bool and to_type == ibis_dtypes.string:
if safe:
return cast(ibis_types.StringValue, value.try_cast(to_type)).capitalize()
else:
return cast(ibis_types.StringValue, value.cast(to_type)).capitalize()

if value.type() == ibis_dtypes.bool and to_type == ibis_dtypes.float64:
if value_type == ibis_dtypes.bool and to_type == ibis_dtypes.float64:
if safe:
return value.try_cast(ibis_dtypes.int64).try_cast(ibis_dtypes.float64)
else:
return value.cast(ibis_dtypes.int64).cast(ibis_dtypes.float64)

if value.type() == ibis_dtypes.float64 and to_type == ibis_dtypes.bool:
if value_type == ibis_dtypes.float64 and to_type == ibis_dtypes.bool:
return value != ibis_types.literal(0)

raise TypeError(
f"Unsupported cast {value.type()} to {to_type}. {constants.FEEDBACK_LINK}"
f"Unsupported cast {value_type} to {to_type}. {constants.FEEDBACK_LINK}"
)


def ibis_value_to_canonical_type(value: ibis_types.Value) -> ibis_types.Value:
"""Converts an Ibis expression to canonical type.

This is useful in cases where multiple types correspond to the same BigFrames dtype.
"""
ibis_type = value.type()
name = value.get_name()
# Allow REQUIRED fields to be joined with NULLABLE fields.
nullable_type = ibis_type.copy(nullable=True)
return value.cast(nullable_type).name(name)


def bigframes_dtype_to_ibis_dtype(
bigframes_dtype: bigframes.dtypes.Dtype,
) -> ibis_dtypes.DataType:
Expand Down
30 changes: 10 additions & 20 deletions third_party/bigframes_vendored/ibis/expr/operations/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,7 @@
from typing import Annotated, Any, Literal, Optional, TypeVar

from bigframes_vendored.ibis.common.annotations import attribute
from bigframes_vendored.ibis.common.collections import (
ConflictingValuesError,
FrozenDict,
FrozenOrderedDict,
)
from bigframes_vendored.ibis.common.collections import FrozenDict, FrozenOrderedDict
from bigframes_vendored.ibis.common.exceptions import (
IbisTypeError,
IntegrityError,
Expand Down Expand Up @@ -342,20 +338,6 @@ class Set(Relation):
values = FrozenOrderedDict()

def __init__(self, left, right, **kwargs):
err_msg = "Table schemas must be equal for set operations."
try:
missing_from_left = right.schema - left.schema
missing_from_right = left.schema - right.schema
except ConflictingValuesError as e:
raise RelationError(err_msg + "\n" + str(e)) from e
if missing_from_left or missing_from_right:
msgs = [err_msg]
if missing_from_left:
msgs.append(f"Columns missing from the left:\n{missing_from_left}.")
if missing_from_right:
msgs.append(f"Columns missing from the right:\n{missing_from_right}.")
raise RelationError("\n".join(msgs))

if left.schema.names != right.schema.names:
# rewrite so that both sides have the columns in the same order making it
# easier for the backends to implement set operations
Expand All @@ -365,7 +347,15 @@ def __init__(self, left, right, **kwargs):

@attribute
def schema(self):
return self.left.schema
dtypes = (
dt.higher_precedence(ltype, rtype)
for ltype, rtype in zip(
self.left.schema.values(), self.right.schema.values()
)
)
return Schema.from_tuples(
(name, coltype) for name, coltype in zip(self.left.schema.names, dtypes)
)


@public
Expand Down