-
Notifications
You must be signed in to change notification settings - Fork 63
feat: Add BigQuery ObjectRef functions to bigframes.bigquery.obj
#2380
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,107 @@ | ||
| # Copyright 2025 Google LLC | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
|
|
||
| """ | ||
| ObjectRef functions defined from | ||
| https://cloud.google.com/bigquery/docs/reference/standard-sql/object-ref-functions | ||
| """ | ||
|
|
||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import datetime | ||
| from typing import Optional, Union | ||
|
|
||
| import numpy as np | ||
| import pandas as pd | ||
|
|
||
| from bigframes.core import log_adapter | ||
| import bigframes.core.utils as utils | ||
| import bigframes.operations as ops | ||
| import bigframes.series as series | ||
|
|
||
|
|
||
| @log_adapter.method_logger(custom_base_name="bigquery_obj") | ||
| def fetch_metadata( | ||
| objectref: series.Series, | ||
| ) -> series.Series: | ||
| """The OBJ.FETCH_METADATA function returns Cloud Storage metadata for a partially populated ObjectRef value. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should add the preview disclaimer. |
||
| Args: | ||
| objectref (bigframes.series.Series): | ||
| A partially populated ObjectRef value, in which the uri and authorizer fields are populated and the details field isn't. | ||
| Returns: | ||
| bigframes.series.Series: A fully populated ObjectRef value. The metadata is provided in the details field of the returned ObjectRef value. | ||
| """ | ||
| return objectref._apply_unary_op(ops.obj_fetch_metadata_op) | ||
|
|
||
|
|
||
| @log_adapter.method_logger(custom_base_name="bigquery_obj") | ||
| def get_access_url( | ||
| objectref: series.Series, | ||
| mode: str, | ||
| duration: Optional[ | ||
| Union[datetime.timedelta, pd.Timedelta, np.timedelta64] | ||
| ] = None, | ||
| ) -> series.Series: | ||
| """The OBJ.GET_ACCESS_URL function returns JSON that contains reference information for the input ObjectRef value, and also access URLs that you can use to read or modify the Cloud Storage object. | ||
| Args: | ||
| objectref (bigframes.series.Series): | ||
| An ObjectRef value that represents a Cloud Storage object. | ||
| mode (str): | ||
| A STRING value that identifies the type of URL that you want to be returned. The following values are supported: | ||
| 'r': Returns a URL that lets you read the object. | ||
| 'rw': Returns two URLs, one that lets you read the object, and one that lets you modify the object. | ||
| duration (Union[datetime.timedelta, pandas.Timedelta, numpy.timedelta64], optional): | ||
| An optional INTERVAL value that specifies how long the generated access URLs remain valid. You can specify a value between 30 minutes and 6 hours. For example, you could specify INTERVAL 2 HOUR to generate URLs that expire after 2 hours. The default value is 6 hours. | ||
| Returns: | ||
| bigframes.series.Series: A JSON value that contains the Cloud Storage object reference information from the input ObjectRef value, and also one or more URLs that you can use to access the Cloud Storage object. | ||
| """ | ||
| duration_micros = None | ||
| if duration is not None: | ||
| duration_micros = utils.timedelta_to_micros(duration) | ||
|
|
||
| return objectref._apply_unary_op( | ||
| ops.ObjGetAccessUrl(mode=mode, duration=duration_micros) | ||
| ) | ||
|
|
||
|
|
||
| @log_adapter.method_logger(custom_base_name="bigquery_obj") | ||
| def make_ref( | ||
| uri_or_json: series.Series, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It can also be a single value in SQL. E.g. OBJ.MAKE_REF("gs://something", "us.my_connection"). It can be useful in local experiments. Do we want to support the case? |
||
| authorizer: Optional[series.Series] = None, | ||
| ) -> series.Series: | ||
| """Use the OBJ.MAKE_REF function to create an ObjectRef value that contains reference information for a Cloud Storage object. | ||
| Args: | ||
| uri_or_json (bigframes.series.Series): | ||
| A STRING value that contains the URI for the Cloud Storage object, for example, gs://mybucket/flowers/12345.jpg. | ||
| OR | ||
| A JSON value that represents a Cloud Storage object. | ||
| authorizer (bigframes.series.Series, optional): | ||
| A STRING value that contains the Cloud Resource connection used to access the Cloud Storage object. | ||
| Required if uri_or_json is a URI string. | ||
| Returns: | ||
| bigframes.series.Series: An ObjectRef value. | ||
| """ | ||
| if authorizer is not None: | ||
| return uri_or_json._apply_binary_op(authorizer, ops.obj_make_ref_op) | ||
|
|
||
| # If authorizer is not provided, we assume uri_or_json is a JSON objectref | ||
| return uri_or_json._apply_unary_op(ops.obj_make_ref_json_op) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| # Copyright 2025 Google LLC | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| """This module exposes `BigQuery ObjectRef | ||
| <https://cloud.google.com/bigquery/docs/object-table-object-ref-functions>`_ functions. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. link doesn't work. |
||
| """ | ||
|
|
||
| from bigframes.bigquery._operations.obj import ( | ||
| fetch_metadata, | ||
| get_access_url, | ||
| make_ref, | ||
| ) | ||
|
|
||
| __all__ = [ | ||
| "fetch_metadata", | ||
| "get_access_url", | ||
| "make_ref", | ||
| ] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| # Copyright 2025 Google LLC | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| import datetime | ||
| from unittest.mock import MagicMock | ||
|
|
||
| import pytest | ||
|
|
||
| import bigframes.bigquery.obj as obj | ||
| import bigframes.operations as ops | ||
| import bigframes.series as series | ||
|
|
||
|
|
||
| def test_fetch_metadata_op_structure(): | ||
| op = ops.obj_fetch_metadata_op | ||
| assert op.name == "obj_fetch_metadata" | ||
|
|
||
| def test_get_access_url_op_structure(): | ||
| op = ops.ObjGetAccessUrl(mode="r") | ||
| assert op.name == "obj_get_access_url" | ||
| assert op.mode == "r" | ||
| assert op.duration is None | ||
|
|
||
| def test_get_access_url_with_duration_op_structure(): | ||
| op = ops.ObjGetAccessUrl(mode="rw", duration=3600000000) | ||
| assert op.name == "obj_get_access_url" | ||
| assert op.mode == "rw" | ||
| assert op.duration == 3600000000 | ||
|
|
||
| def test_make_ref_op_structure(): | ||
| op = ops.obj_make_ref_op | ||
| assert op.name == "obj_make_ref" | ||
|
|
||
| def test_make_ref_json_op_structure(): | ||
| op = ops.obj_make_ref_json_op | ||
| assert op.name == "obj_make_ref_json" | ||
|
|
||
| def test_fetch_metadata_calls_apply_unary_op(): | ||
| s = MagicMock(spec=series.Series) | ||
|
|
||
| obj.fetch_metadata(s) | ||
|
|
||
| s._apply_unary_op.assert_called_once() | ||
| args, _ = s._apply_unary_op.call_args | ||
| assert args[0] == ops.obj_fetch_metadata_op | ||
|
|
||
| def test_get_access_url_calls_apply_unary_op_without_duration(): | ||
| s = MagicMock(spec=series.Series) | ||
|
|
||
| obj.get_access_url(s, mode="r") | ||
|
|
||
| s._apply_unary_op.assert_called_once() | ||
| args, _ = s._apply_unary_op.call_args | ||
| assert isinstance(args[0], ops.ObjGetAccessUrl) | ||
| assert args[0].mode == "r" | ||
| assert args[0].duration is None | ||
|
|
||
| def test_get_access_url_calls_apply_unary_op_with_duration(): | ||
| s = MagicMock(spec=series.Series) | ||
| duration = datetime.timedelta(hours=1) | ||
|
|
||
| obj.get_access_url(s, mode="rw", duration=duration) | ||
|
|
||
| s._apply_unary_op.assert_called_once() | ||
| args, kwargs = s._apply_unary_op.call_args | ||
| assert isinstance(args[0], ops.ObjGetAccessUrl) | ||
| assert args[0].mode == "rw" | ||
| # 1 hour = 3600 seconds = 3600 * 1000 * 1000 microseconds | ||
| assert args[0].duration == 3600000000 | ||
|
|
||
| def test_make_ref_calls_apply_binary_op_with_authorizer(): | ||
| uri = MagicMock(spec=series.Series) | ||
| auth = MagicMock(spec=series.Series) | ||
|
|
||
| obj.make_ref(uri, authorizer=auth) | ||
|
|
||
| uri._apply_binary_op.assert_called_once() | ||
| args, _ = uri._apply_binary_op.call_args | ||
| assert args[0] == auth | ||
| assert args[1] == ops.obj_make_ref_op | ||
|
|
||
| def test_make_ref_calls_apply_unary_op_without_authorizer(): | ||
| json_val = MagicMock(spec=series.Series) | ||
|
|
||
| obj.make_ref(json_val) | ||
|
|
||
| json_val._apply_unary_op.assert_called_once() | ||
| args, _ = json_val._apply_unary_op.call_args | ||
| assert args[0] == ops.obj_make_ref_json_op |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,7 +11,7 @@ WITH `bfcte_0` AS ( | |
| ), `bfcte_2` AS ( | ||
| SELECT | ||
| *, | ||
| OBJ.GET_ACCESS_URL(`bfcol_4`) AS `bfcol_7` | ||
| OBJ.GET_ACCESS_URL(`bfcol_4`, 'R') AS `bfcol_7` | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We may still need minimum system tests. Otherwise we may fail to catch errors like this. WDYT? |
||
| FROM `bfcte_1` | ||
| ), `bfcte_3` AS ( | ||
| SELECT | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the link doesn't work
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/objectref_functions