diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index a39914d6e7..ff52ae8d36 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -27,20 +27,27 @@ json_extract_array, json_extract_string_array, json_set, + parse_json, ) from bigframes.bigquery._operations.search import create_vector_index, vector_search from bigframes.bigquery._operations.struct import struct __all__ = [ + # approximate aggregate ops + "approx_top_count", + # array ops "array_length", "array_agg", "array_to_string", + # json ops "json_set", "json_extract", "json_extract_array", "json_extract_string_array", - "approx_top_count", - "struct", + "parse_json", + # search ops "create_vector_index", "vector_search", + # struct ops + "struct", ] diff --git a/bigframes/bigquery/_operations/json.py b/bigframes/bigquery/_operations/json.py index 843991807e..52b01d3ef7 100644 --- a/bigframes/bigquery/_operations/json.py +++ b/bigframes/bigquery/_operations/json.py @@ -23,6 +23,7 @@ from typing import Any, cast, Optional, Sequence, Tuple, Union +import bigframes.core.utils as utils import bigframes.dtypes import bigframes.operations as ops import bigframes.series as series @@ -30,6 +31,7 @@ from . import array +@utils.preview(name="The JSON-related API `json_set`") def json_set( input: series.Series, json_path_value_pairs: Sequence[Tuple[str, Any]], @@ -37,6 +39,10 @@ def json_set( """Produces a new JSON value within a Series by inserting or replacing values at specified paths. + .. warning:: + The JSON-related API `parse_json` is in preview. Its behavior may change in + future versions. + **Examples:** >>> import bigframes.pandas as bpd @@ -223,3 +229,37 @@ def json_extract_string_array( ), ) return array_series + + +@utils.preview(name="The JSON-related API `parse_json`") +def parse_json( + input: series.Series, +) -> series.Series: + """Converts a series with a JSON-formatted STRING value to a JSON value. + + .. warning:: + The JSON-related API `parse_json` is in preview. Its behavior may change in + future versions. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> bpd.options.display.progress_bar = None + + >>> s = bpd.Series(['{"class": {"students": [{"id": 5}, {"id": 12}]}}']) + >>> s + 0 {"class": {"students": [{"id": 5}, {"id": 12}]}} + dtype: string + >>> bbq.parse_json(s) + 0 {"class":{"students":[{"id":5},{"id":12}]}} + dtype: large_string[pyarrow] + + Args: + input (bigframes.series.Series): + The Series containing JSON-formatted strings). + + Returns: + bigframes.series.Series: A new Series with the JSON value. + """ + return input._apply_unary_op(ops.ParseJSON()) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 2b85a97483..d594cb3d68 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -20,7 +20,6 @@ import bigframes_vendored.constants as constants import bigframes_vendored.ibis.expr.api as ibis_api import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes -import bigframes_vendored.ibis.expr.operations as ibis_ops import bigframes_vendored.ibis.expr.operations.generic as ibis_generic import bigframes_vendored.ibis.expr.operations.udf as ibis_udf import bigframes_vendored.ibis.expr.types as ibis_types @@ -1181,13 +1180,13 @@ def json_set_op_impl(x: ibis_types.Value, y: ibis_types.Value, op: ops.JSONSet): ) else: # Enabling JSON type eliminates the need for less efficient string conversions. - return ibis_ops.ToJsonString( + return to_json_string( json_set( # type: ignore - json_obj=parse_json(x), + json_obj=parse_json(json_str=x), json_path=op.json_path, json_value=y, ) - ).to_expr() + ) @scalar_op_compiler.register_unary_op(ops.JSONExtract, pass_op=True) @@ -1210,6 +1209,11 @@ def json_extract_string_array_op_impl( return json_extract_string_array(json_obj=x, json_path=op.json_path) +@scalar_op_compiler.register_unary_op(ops.ParseJSON, pass_op=True) +def parse_json_op_impl(x: ibis_types.Value, op: ops.ParseJSON): + return parse_json(json_str=x) + + @scalar_op_compiler.register_unary_op(ops.ToJSONString) def to_json_string_op_impl(json_obj: ibis_types.Value): return to_json_string(json_obj=json_obj) diff --git a/bigframes/core/utils.py b/bigframes/core/utils.py index e684ac55a4..3bafa380bf 100644 --- a/bigframes/core/utils.py +++ b/bigframes/core/utils.py @@ -11,14 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import functools import re import typing from typing import Hashable, Iterable, List +import warnings import bigframes_vendored.pandas.io.common as vendored_pandas_io_common import pandas as pd import typing_extensions +import bigframes.exceptions as exc + UNNAMED_COLUMN_ID = "bigframes_unnamed_column" UNNAMED_INDEX_ID = "bigframes_unnamed_index" @@ -164,3 +168,24 @@ def merge_column_labels( result_labels.append(col_label) return pd.Index(result_labels) + + +def warn_preview(msg=""): + """Warn a preview API.""" + warnings.warn(msg, exc.PreviewWarning) + + +def preview(*, name: str): + """Decorate to warn of a preview API.""" + + def decorator(func): + msg = f"{name} is in preview. Its behavior may change in future versions." + + @functools.wraps(func) + def wrapper(*args, **kwargs): + warn_preview(msg=msg) + return func(*args, **kwargs) + + return wrapper + + return decorator diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 03d9d60d5f..37a40b7d01 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -740,6 +740,20 @@ def output_type(self, *input_types): ) +@dataclasses.dataclass(frozen=True) +class ParseJSON(UnaryOp): + name: typing.ClassVar[str] = "parse_json" + + def output_type(self, *input_types): + input_type = input_types[0] + if input_type != dtypes.STRING_DTYPE: + raise TypeError( + "Input type must be an valid JSON-formatted string type." + + f" Received type: {input_type}" + ) + return dtypes.JSON_DTYPE + + @dataclasses.dataclass(frozen=True) class ToJSONString(UnaryOp): name: typing.ClassVar[str] = "to_json_string" @@ -754,9 +768,6 @@ def output_type(self, *input_types): return dtypes.STRING_DTYPE -to_json_string_op = ToJSONString() - - ## Blob Ops @dataclasses.dataclass(frozen=True) class ObjGetAccessUrl(UnaryOp): diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py index 898d56ab83..d41cdf2b2d 100644 --- a/bigframes/operations/blob.py +++ b/bigframes/operations/blob.py @@ -110,8 +110,8 @@ def image_blur( ) dst_rt = dst._apply_unary_op(ops.ObjGetAccessUrl(mode="RW")) - src_rt = src_rt._apply_unary_op(ops.to_json_string_op) - dst_rt = dst_rt._apply_unary_op(ops.to_json_string_op) + src_rt = src_rt._apply_unary_op(ops.ToJSONString()) + dst_rt = dst_rt._apply_unary_op(ops.ToJSONString()) df = src_rt.to_frame().join(dst_rt.to_frame(), how="outer") df["ksize_x"], df["ksize_y"] = ksize diff --git a/tests/system/small/bigquery/test_json.py b/tests/system/small/bigquery/test_json.py index 3096897c80..b01ac3aaf2 100644 --- a/tests/system/small/bigquery/test_json.py +++ b/tests/system/small/bigquery/test_json.py @@ -209,3 +209,8 @@ def test_json_in_struct(): "SELECT STRUCT(JSON '{\\\"a\\\": 1}' AS data, 1 AS number) as struct_col" ) assert df["struct_col"].struct.field("data")[0] == '{"a":1}' + + +def test_parse_json_w_invalid_series_type(): + with pytest.raises(TypeError): + bbq.parse_json(bpd.Series([1, 2]))