Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.
Merged
8 changes: 7 additions & 1 deletion bigframes/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@
unix_millis,
unix_seconds,
)
from bigframes.bigquery._operations.geo import st_area, st_difference, st_intersection
from bigframes.bigquery._operations.geo import (
st_area,
st_difference,
st_distance,
st_intersection,
)
from bigframes.bigquery._operations.json import (
json_extract,
json_extract_array,
Expand All @@ -49,6 +54,7 @@
# geo ops
"st_area",
"st_difference",
"st_distance",
"st_intersection",
# json ops
"json_set",
Expand Down
161 changes: 116 additions & 45 deletions bigframes/bigquery/_operations/geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@

from __future__ import annotations

from typing import Union

import shapely # type: ignore

from bigframes import operations as ops
import bigframes.dtypes
import bigframes.geopandas
import bigframes.series

Expand All @@ -25,7 +28,9 @@
"""


def st_area(series: bigframes.series.Series) -> bigframes.series.Series:
def st_area(
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
) -> bigframes.series.Series:
"""
Returns the area in square meters covered by the polygons in the input
`GEOGRAPHY`.
Expand Down Expand Up @@ -85,6 +90,10 @@ def st_area(series: bigframes.series.Series) -> bigframes.series.Series:
4 0.0
dtype: Float64

Args:
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
A series containing geography objects.

Returns:
bigframes.pandas.Series:
Series of float representing the areas.
Expand All @@ -95,7 +104,10 @@ def st_area(series: bigframes.series.Series) -> bigframes.series.Series:


def st_difference(
series: bigframes.series.Series, other: bigframes.series.Series
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
other: Union[
bigframes.series.Series, bigframes.geopandas.GeoSeries, shapely.Geometry
],
) -> bigframes.series.Series:
"""
Returns a `GEOGRAPHY` that represents the point set difference of
Expand Down Expand Up @@ -166,44 +178,23 @@ def st_difference(
5 None
dtype: geometry

We can also check difference of single shapely geometries:

>>> polygon_s1 = bigframes.geopandas.GeoSeries(
... [
... Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])
... ]
... )
>>> polygon_s2 = bigframes.geopandas.GeoSeries(
... [
... Polygon([(4, 2), (6, 2), (8, 6), (4, 2)])
... ]
... )

>>> polygon_s1
0 POLYGON ((0 0, 10 0, 10 10, 0 0))
dtype: geometry

>>> polygon_s2
0 POLYGON ((4 2, 6 2, 8 6, 4 2))
dtype: geometry

>>> bbq.st_difference(polygon_s1, polygon_s2)
0 POLYGON ((0 0, 10 0, 10 10, 0 0), (8 6, 6 2, 4...
dtype: geometry

Additionally, we can check difference of a GeoSeries against a single shapely geometry:

>>> bbq.st_difference(s1, polygon_s2)
0 POLYGON ((0 0, 2 2, 0 2, 0 0))
1 None
2 None
3 None
4 None
>>> polygon = Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])
>>> bbq.st_difference(s1, polygon)
0 POLYGON ((1.97082 2.00002, 0 2, 0 0, 1.97082 2...
1 POLYGON ((1.97082 2.00002, 0 2, 0 0, 1.97082 2...
2 GEOMETRYCOLLECTION EMPTY
3 LINESTRING (0.99265 1.00781, 0 2)
4 POINT (0 1)
dtype: geometry

Args:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Miss series argument here.

other (bigframes.series.Series or geometric object):
The GeoSeries (elementwise) or geometric object to find the difference to.
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
A series containing geography objects.
other (bigframes.pandas.Series | bigframes.geopandas.GeoSeries | shapely.Geometry):
The series or geometric object to subtract from the geography
objects in ``series``.

Returns:
bigframes.series.Series:
Expand All @@ -213,8 +204,86 @@ def st_difference(
return series._apply_binary_op(other, ops.geo_st_difference_op)


def st_distance(
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
other: Union[
bigframes.series.Series, bigframes.geopandas.GeoSeries, shapely.Geometry
],
*,
use_spheroid: bool = False,
) -> bigframes.series.Series:
"""
Returns the shortest distance in meters between two non-empty
``GEOGRAPHY`` objects.

**Examples:**

>>> import bigframes as bpd
>>> import bigframes.bigquery as bbq
>>> import bigframes.geopandas
>>> from shapely.geometry import Polygon, LineString, Point
>>> bpd.options.display.progress_bar = None

We can check two GeoSeries against each other, row by row.

>>> s1 = bigframes.geopandas.GeoSeries(
... [
... Point(0, 0),
... Point(0.00001, 0),
... Point(0.00002, 0),
... ],
... )
>>> s2 = bigframes.geopandas.GeoSeries(
... [
... Point(0.00001, 0),
... Point(0.00003, 0),
... Point(0.00005, 0),
... ],
... )

>>> bbq.st_distance(s1, s2, use_spheroid=True)
0 1.113195
1 2.22639
2 3.339585
dtype: Float64

We can also calculate the distance of each geometry and a single shapely geometry:

>>> bbq.st_distance(s2, Point(0.00001, 0))
0 0.0
1 2.223902
2 4.447804
dtype: Float64

Args:
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
A series containing geography objects.
other (bigframes.pandas.Series | bigframes.geopandas.GeoSeries | shapely.Geometry):
The series or geometric object to calculate the distance in meters
to from the geography objects in ``series``.
use_spheroid (optional, default ``False``):
Determines how this function measures distance. If ``use_spheroid``
is False, the function measures distance on the surface of a perfect
sphere. If ``use_spheroid`` is True, the function measures distance
on the surface of the `WGS84 spheroid
<https://cloud.google.com/bigquery/docs/geospatial-data>`_. The
default value of ``use_spheroid`` is False.

Returns:
bigframes.pandas.Series:
The Series (elementwise) of the smallest distance between
each aligned geometry with other.
"""
return series._apply_binary_op(
other, ops.GeoStDistanceOp(use_spheroid=use_spheroid)
)


def st_intersection(
series: bigframes.series.Series, other: bigframes.series.Series
series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries],
other: Union[
bigframes.series.Series, bigframes.geopandas.GeoSeries, shapely.Geometry
],
) -> bigframes.series.Series:
"""
Returns a `GEOGRAPHY` that represents the point set intersection of the two
Expand Down Expand Up @@ -284,18 +353,20 @@ def st_intersection(

We can also do intersection of each geometry and a single shapely geometry:

>>> bbq.st_intersection(s1, bigframes.geopandas.GeoSeries([Polygon([(0, 0), (1, 1), (0, 1)])]))
>>> bbq.st_intersection(s1, Polygon([(0, 0), (1, 1), (0, 1)]))
0 POLYGON ((0 0, 0.99954 1, 0 1, 0 0))
1 None
2 None
3 None
4 None
1 POLYGON ((0 0, 0.99954 1, 0 1, 0 0))
2 LINESTRING (0 0, 0.99954 1)
3 GEOMETRYCOLLECTION EMPTY
4 POINT (0 1)
dtype: geometry

Args:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing series argument here.

other (GeoSeries or geometric object):
The Geoseries (elementwise) or geometric object to find the
intersection with.
series (bigframes.pandas.Series | bigframes.geopandas.GeoSeries):
A series containing geography objects.
other (bigframes.pandas.Series | bigframes.geopandas.GeoSeries | shapely.Geometry):
The series or geometric object to intersect with the geography
objects in ``series``.

Returns:
bigframes.geopandas.GeoSeries:
Expand Down
12 changes: 12 additions & 0 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,13 @@ def geo_st_difference_op_impl(x: ibis_types.Value, y: ibis_types.Value):
)


@scalar_op_compiler.register_binary_op(ops.GeoStDistanceOp, pass_op=True)
def geo_st_distance_op_impl(
x: ibis_types.Value, y: ibis_types.Value, op: ops.GeoStDistanceOp
):
return st_distance(x, y, op.use_spheroid)


@scalar_op_compiler.register_unary_op(ops.geo_st_geogfromtext_op)
def geo_st_geogfromtext_op_impl(x: ibis_types.Value):
# Ibis doesn't seem to provide a dedicated method to cast from string to geography,
Expand Down Expand Up @@ -1989,6 +1996,11 @@ def st_boundary(a: ibis_dtypes.geography) -> ibis_dtypes.geography: # type: ign
"""Find the boundary of a geography."""


@ibis_udf.scalar.builtin
def st_distance(a: ibis_dtypes.geography, b: ibis_dtypes.geography, use_spheroid: bool) -> ibis_dtypes.float: # type: ignore
"""Convert string to geography."""


@ibis_udf.scalar.builtin
def unix_micros(a: ibis_dtypes.timestamp) -> int: # type: ignore
"""Convert a timestamp to microseconds"""
Expand Down
24 changes: 13 additions & 11 deletions bigframes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,30 +586,32 @@ def _is_bigframes_dtype(dtype) -> bool:
return False


def _infer_dtype_from_python_type(type: type) -> Dtype:
if type in (datetime.timedelta, pd.Timedelta, np.timedelta64):
def _infer_dtype_from_python_type(type_: type) -> Dtype:
if type_ in (datetime.timedelta, pd.Timedelta, np.timedelta64):
# Must check timedelta type first. Otherwise other branchs will be evaluated to true
# E.g. np.timedelta64 is a sublcass as np.integer
return TIMEDELTA_DTYPE
if issubclass(type, (bool, np.bool_)):
if issubclass(type_, (bool, np.bool_)):
return BOOL_DTYPE
if issubclass(type, (int, np.integer)):
if issubclass(type_, (int, np.integer)):
return INT_DTYPE
if issubclass(type, (float, np.floating)):
if issubclass(type_, (float, np.floating)):
return FLOAT_DTYPE
if issubclass(type, decimal.Decimal):
if issubclass(type_, decimal.Decimal):
return NUMERIC_DTYPE
if issubclass(type, (str, np.str_)):
if issubclass(type_, (str, np.str_)):
return STRING_DTYPE
if issubclass(type, (bytes, np.bytes_)):
if issubclass(type_, (bytes, np.bytes_)):
return BYTES_DTYPE
if issubclass(type, datetime.date):
if issubclass(type_, datetime.date):
return DATE_DTYPE
if issubclass(type, datetime.time):
if issubclass(type_, datetime.time):
return TIME_DTYPE
if issubclass(type_, shapely.Geometry):
return GEO_DTYPE
else:
raise TypeError(
f"No matching datatype for python type: {type}. {constants.FEEDBACK_LINK}"
f"No matching datatype for python type: {type_}. {constants.FEEDBACK_LINK}"
)


Expand Down
22 changes: 5 additions & 17 deletions bigframes/geopandas/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,23 +47,6 @@ def y(self) -> bigframes.series.Series:
# we can.
@property
def area(self, crs=None) -> bigframes.series.Series: # type: ignore
"""Returns a Series containing the area of each geometry in the GeoSeries
expressed in the units of the CRS.

Args:
crs (optional):
Coordinate Reference System of the geometry objects. Can be
anything accepted by pyproj.CRS.from_user_input(), such as an
authority string (eg “EPSG:4326”) or a WKT string.

Returns:
bigframes.pandas.Series:
Series of float representing the areas.

Raises:
NotImplementedError:
GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead.
"""
raise NotImplementedError(
f"GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. {constants.FEEDBACK_LINK}"
)
Expand Down Expand Up @@ -97,5 +80,10 @@ def to_wkt(self: GeoSeries) -> bigframes.series.Series:
def difference(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore
return self._apply_binary_op(other, ops.geo_st_difference_op)

def distance(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore
raise NotImplementedError(
f"GeoSeries.distance is not supported. Use bigframes.bigquery.st_distance(series, other), instead. {constants.FEEDBACK_LINK}"
)

def intersection(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore
return self._apply_binary_op(other, ops.geo_st_intersection_op)
2 changes: 2 additions & 0 deletions bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
geo_st_intersection_op,
geo_x_op,
geo_y_op,
GeoStDistanceOp,
)
from bigframes.operations.json_ops import (
JSONExtract,
Expand Down Expand Up @@ -375,6 +376,7 @@
"geo_st_intersection_op",
"geo_x_op",
"geo_y_op",
"GeoStDistanceOp",
# Numpy ops mapping
"NUMPY_TO_BINOP",
"NUMPY_TO_OP",
Expand Down
11 changes: 11 additions & 0 deletions bigframes/operations/geo_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import dataclasses

from bigframes import dtypes
from bigframes.operations import base_ops
import bigframes.operations.type as op_typing
Expand Down Expand Up @@ -69,3 +71,12 @@
geo_st_intersection_op = base_ops.create_binary_op(
name="geo_st_intersection", type_signature=op_typing.BinaryGeo()
)


@dataclasses.dataclass(frozen=True)
class GeoStDistanceOp(base_ops.BinaryOp):
name = "st_distance"
use_spheroid: bool

def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
return dtypes.FLOAT_DTYPE
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
"pyarrow >=15.0.2",
"pydata-google-auth >=1.8.2",
"requests >=2.27.1",
"shapely >=1.8.5",
"shapely >=2.0.0",
"sqlglot >=23.6.3",
"tabulate >=0.9",
"ipywidgets >=7.7.1",
Expand Down
Loading