diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index 3898a22b5b..21e61bc4b1 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -22,6 +22,11 @@ array_length, array_to_string, ) +from bigframes.bigquery._operations.datetime import ( + unix_micros, + unix_millis, + unix_seconds, +) from bigframes.bigquery._operations.json import ( json_extract, json_extract_array, @@ -53,4 +58,8 @@ "sql_scalar", # struct ops "struct", + # datetime ops + "unix_micros", + "unix_millis", + "unix_seconds", ] diff --git a/bigframes/bigquery/_operations/datetime.py b/bigframes/bigquery/_operations/datetime.py new file mode 100644 index 0000000000..f8767336dd --- /dev/null +++ b/bigframes/bigquery/_operations/datetime.py @@ -0,0 +1,97 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from bigframes import operations as ops +from bigframes import series + + +def unix_seconds(input: series.Series) -> series.Series: + """Converts a timestmap series to unix epoch seconds + + **Examples:** + + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([pd.Timestamp("1970-01-02", tz="UTC"), pd.Timestamp("1970-01-03", tz="UTC")]) + >>> bbq.unix_seconds(s) + 0 86400 + 1 172800 + dtype: Int64 + + Args: + input (bigframes.pandas.Series): + A timestamp series. + + Returns: + bigframes.pandas.Series: A new series of unix epoch in seconds. + + """ + return input._apply_unary_op(ops.UnixSeconds()) + + +def unix_millis(input: series.Series) -> series.Series: + """Converts a timestmap series to unix epoch milliseconds + + **Examples:** + + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([pd.Timestamp("1970-01-02", tz="UTC"), pd.Timestamp("1970-01-03", tz="UTC")]) + >>> bbq.unix_millis(s) + 0 86400000 + 1 172800000 + dtype: Int64 + + Args: + input (bigframes.pandas.Series): + A timestamp series. + + Returns: + bigframes.pandas.Series: A new series of unix epoch in milliseconds. + + """ + return input._apply_unary_op(ops.UnixMillis()) + + +def unix_micros(input: series.Series) -> series.Series: + """Converts a timestmap series to unix epoch microseconds + + **Examples:** + + >>> import pandas as pd + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series([pd.Timestamp("1970-01-02", tz="UTC"), pd.Timestamp("1970-01-03", tz="UTC")]) + >>> bbq.unix_micros(s) + 0 86400000000 + 1 172800000000 + dtype: Int64 + + Args: + input (bigframes.pandas.Series): + A timestamp series. + + Returns: + bigframes.pandas.Series: A new series of unix epoch in microseconds. + + """ + return input._apply_unary_op(ops.UnixMicros()) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index a44c785dec..4f670b51ca 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -722,6 +722,21 @@ def strftime_op_impl(x: ibis_types.Value, op: ops.StrftimeOp): ) +@scalar_op_compiler.register_unary_op(ops.UnixSeconds) +def unix_seconds_op_impl(x: ibis_types.TimestampValue): + return x.epoch_seconds() + + +@scalar_op_compiler.register_unary_op(ops.UnixMicros) +def unix_micros_op_impl(x: ibis_types.TimestampValue): + return unix_micros(x) + + +@scalar_op_compiler.register_unary_op(ops.UnixMillis) +def unix_millis_op_impl(x: ibis_types.TimestampValue): + return unix_millis(x) + + @scalar_op_compiler.register_unary_op(ops.FloorDtOp, pass_op=True) def floor_dt_op_impl(x: ibis_types.Value, op: ops.FloorDtOp): supported_freqs = ["Y", "Q", "M", "W", "D", "h", "min", "s", "ms", "us", "ns"] @@ -1887,6 +1902,16 @@ def timestamp(a: str) -> ibis_dtypes.timestamp: # type: ignore """Convert string to timestamp.""" +@ibis_udf.scalar.builtin +def unix_millis(a: ibis_dtypes.timestamp) -> int: # type: ignore + """Convert a timestamp to milliseconds""" + + +@ibis_udf.scalar.builtin +def unix_micros(a: ibis_dtypes.timestamp) -> int: # type: ignore + """Convert a timestamp to microseconds""" + + # Need these because ibis otherwise tries to do casts to int that can fail @ibis_udf.scalar.builtin(name="floor") def float_floor(a: float) -> float: diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 6645d1ca2b..e55cbc4925 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -51,6 +51,9 @@ time_op, ToDatetimeOp, ToTimestampOp, + UnixMicros, + UnixMillis, + UnixSeconds, ) from bigframes.operations.distance_ops import ( cosine_distance_op, @@ -243,6 +246,9 @@ "ToDatetimeOp", "ToTimestampOp", "StrftimeOp", + "UnixMicros", + "UnixMillis", + "UnixSeconds", # Numeric ops "abs_op", "add_op", diff --git a/bigframes/operations/datetime_ops.py b/bigframes/operations/datetime_ops.py index 54d4f4ffd4..3ee8a00141 100644 --- a/bigframes/operations/datetime_ops.py +++ b/bigframes/operations/datetime_ops.py @@ -77,3 +77,27 @@ class StrftimeOp(base_ops.UnaryOp): def output_type(self, *input_types): return dtypes.STRING_DTYPE + + +@dataclasses.dataclass(frozen=True) +class UnixSeconds(base_ops.UnaryOp): + name: typing.ClassVar[str] = "unix_seconds" + + def output_type(self, *input_types): + return dtypes.INT_DTYPE + + +@dataclasses.dataclass(frozen=True) +class UnixMillis(base_ops.UnaryOp): + name: typing.ClassVar[str] = "unix_millis" + + def output_type(self, *input_types): + return dtypes.INT_DTYPE + + +@dataclasses.dataclass(frozen=True) +class UnixMicros(base_ops.UnaryOp): + name: typing.ClassVar[str] = "unix_micros" + + def output_type(self, *input_types): + return dtypes.INT_DTYPE diff --git a/tests/system/small/bigquery/test_datetime.py b/tests/system/small/bigquery/test_datetime.py new file mode 100644 index 0000000000..984e75ac10 --- /dev/null +++ b/tests/system/small/bigquery/test_datetime.py @@ -0,0 +1,66 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import pandas as pd + +from bigframes import bigquery + + +def test_unix_seconds(scalars_dfs): + bigframes_df, pandas_df = scalars_dfs + + actual_res = bigquery.unix_seconds(bigframes_df["timestamp_col"]).to_pandas() + + expected_res = ( + pandas_df["timestamp_col"] + .apply(lambda ts: _to_unix_epoch(ts, "s")) + .astype("Int64") + ) + pd.testing.assert_series_equal(actual_res, expected_res) + + +def test_unix_millis(scalars_dfs): + bigframes_df, pandas_df = scalars_dfs + + actual_res = bigquery.unix_millis(bigframes_df["timestamp_col"]).to_pandas() + + expected_res = ( + pandas_df["timestamp_col"] + .apply(lambda ts: _to_unix_epoch(ts, "ms")) + .astype("Int64") + ) + pd.testing.assert_series_equal(actual_res, expected_res) + + +def test_unix_micros(scalars_dfs): + bigframes_df, pandas_df = scalars_dfs + + actual_res = bigquery.unix_micros(bigframes_df["timestamp_col"]).to_pandas() + + expected_res = ( + pandas_df["timestamp_col"] + .apply(lambda ts: _to_unix_epoch(ts, "us")) + .astype("Int64") + ) + pd.testing.assert_series_equal(actual_res, expected_res) + + +def _to_unix_epoch( + ts: pd.Timestamp, unit: typing.Literal["s", "ms", "us"] +) -> typing.Optional[int]: + if pd.isna(ts): + return None + return (ts - pd.Timestamp("1970-01-01", tz="UTC")) // pd.Timedelta(1, unit)