Skip to content

Commit 97993a8

Browse files
Merge remote-tracking branch 'github/main' into defer_reproject
2 parents 11802b6 + 52b7786 commit 97993a8

File tree

24 files changed

+1017
-240
lines changed

24 files changed

+1017
-240
lines changed

CHANGELOG.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,26 @@
44

55
[1]: https://pypi.org/project/bigframes/#history
66

7+
## [1.13.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.12.0...v1.13.0) (2024-08-05)
8+
9+
10+
### Features
11+
12+
* `df.apply(axis=1)` to support remote function with mutiple params ([#851](https://github.com/googleapis/python-bigquery-dataframes/issues/851)) ([2158818](https://github.com/googleapis/python-bigquery-dataframes/commit/2158818e53e09e55c87ffd574e3ebc2e201285fb))
13+
* Allow windowing in 'partial' ordering mode ([#861](https://github.com/googleapis/python-bigquery-dataframes/issues/861)) ([ca26fe5](https://github.com/googleapis/python-bigquery-dataframes/commit/ca26fe5f9edec519788c276a09eaff33ecd87434))
14+
* Create a separate OrderingModePartialPreviewWarning for more fine-grained warning filters ([#879](https://github.com/googleapis/python-bigquery-dataframes/issues/879)) ([8753bdd](https://github.com/googleapis/python-bigquery-dataframes/commit/8753bdd1e44701e56eae914ebc0e91d9b1a6adf1))
15+
16+
17+
### Bug Fixes
18+
19+
* Fix issue with invalid sql generated by ml distance functions ([#865](https://github.com/googleapis/python-bigquery-dataframes/issues/865)) ([9959fc8](https://github.com/googleapis/python-bigquery-dataframes/commit/9959fc8fcba93441fdd3d9c17e8fdbe6e6a7b504))
20+
21+
22+
### Documentation
23+
24+
* Create sample notebook using `ordering_mode="partial"` ([#880](https://github.com/googleapis/python-bigquery-dataframes/issues/880)) ([c415eb9](https://github.com/googleapis/python-bigquery-dataframes/commit/c415eb91eb71dea53d245ba2bce416062e3f02f8))
25+
* Update streaming notebook ([#875](https://github.com/googleapis/python-bigquery-dataframes/issues/875)) ([e9b0557](https://github.com/googleapis/python-bigquery-dataframes/commit/e9b05571123cf13079772856317ca3cd3d564c5a))
26+
727
## [1.12.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.11.1...v1.12.0) (2024-07-31)
828

929

bigframes/_config/bigquery_options.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
from __future__ import annotations
1818

19-
from enum import Enum
2019
from typing import Literal, Optional
2120
import warnings
2221

@@ -25,14 +24,9 @@
2524
import jellyfish
2625

2726
import bigframes.constants
27+
import bigframes.enums
2828
import bigframes.exceptions
2929

30-
31-
class OrderingMode(Enum):
32-
STRICT = "strict"
33-
PARTIAL = "partial"
34-
35-
3630
SESSION_STARTED_MESSAGE = (
3731
"Cannot change '{attribute}' once a session has started. "
3832
"Call bigframes.pandas.close_session() first, if you are using the bigframes.pandas API."
@@ -64,11 +58,11 @@ def _validate_location(value: Optional[str]):
6458
)
6559

6660

67-
def _validate_ordering_mode(value: str) -> OrderingMode:
68-
if value.casefold() == OrderingMode.STRICT.value.casefold():
69-
return OrderingMode.STRICT
70-
if value.casefold() == OrderingMode.PARTIAL.value.casefold():
71-
return OrderingMode.PARTIAL
61+
def _validate_ordering_mode(value: str) -> bigframes.enums.OrderingMode:
62+
if value.casefold() == bigframes.enums.OrderingMode.STRICT.value.casefold():
63+
return bigframes.enums.OrderingMode.STRICT
64+
if value.casefold() == bigframes.enums.OrderingMode.PARTIAL.value.casefold():
65+
return bigframes.enums.OrderingMode.PARTIAL
7266
raise ValueError("Ordering mode must be one of 'strict' or 'partial'.")
7367

7468

bigframes/core/__init__.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,17 @@ def promote_offsets(self, col_id: str) -> ArrayValue:
194194
"""
195195
Convenience function to promote copy of column offsets to a value column. Can be used to reset index.
196196
"""
197-
if self.node.order_ambiguous and not self.session._strictly_ordered:
198-
raise ValueError("Generating offsets not supported in unordered mode")
197+
if self.node.order_ambiguous and not (self.session._strictly_ordered):
198+
if not self.session._allows_ambiguity:
199+
raise ValueError(
200+
"Generating offsets not supported in partial ordering mode"
201+
)
202+
else:
203+
warnings.warn(
204+
"Window ordering may be ambiguous, this can cause unstable results.",
205+
bigframes.exceptions.AmbiguousWindowWarning,
206+
)
207+
199208
return ArrayValue(nodes.PromoteOffsetsNode(child=self.node, col_id=col_id))
200209

201210
def concat(self, other: typing.Sequence[ArrayValue]) -> ArrayValue:
@@ -347,9 +356,16 @@ def project_window_op(
347356
# TODO: Support non-deterministic windowing
348357
if window_spec.row_bounded or not op.order_independent:
349358
if self.node.order_ambiguous and not self.session._strictly_ordered:
350-
raise ValueError(
351-
"Order-dependent windowed ops not supported in unordered mode"
352-
)
359+
if not self.session._allows_ambiguity:
360+
raise ValueError(
361+
"Generating offsets not supported in partial ordering mode"
362+
)
363+
else:
364+
warnings.warn(
365+
"Window ordering may be ambiguous, this can cause unstable results.",
366+
bigframes.exceptions.AmbiguousWindowWarning,
367+
)
368+
353369
return ArrayValue(
354370
nodes.WindowOpNode(
355371
child=self.node,

bigframes/core/blocks.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,10 @@ def index_name_to_col_id(self) -> typing.Mapping[Label, typing.Sequence[str]]:
280280
mapping[label] = (*mapping.get(label, ()), id)
281281
return mapping
282282

283+
@property
284+
def explicitly_ordered(self) -> bool:
285+
return self.expr.node.explicitly_ordered
286+
283287
def cols_matching_label(self, partial_label: Label) -> typing.Sequence[str]:
284288
"""
285289
Unlike label_to_col_id, this works with partial labels for multi-index.

bigframes/core/compile/compiled.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def to_sql(
256256
ordered: bool = False,
257257
) -> str:
258258
if offset_column or ordered:
259-
raise ValueError("Cannot produce sorted sql in unordered mode")
259+
raise ValueError("Cannot produce sorted sql in partial ordering mode")
260260
sql = ibis_bigquery.Backend().compile(
261261
self._to_ibis_expr(
262262
col_id_overrides=col_id_overrides,

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,19 +191,27 @@ def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
191191

192192
return decorator
193193

194-
def register_nary_op(self, op_ref: typing.Union[ops.NaryOp, type[ops.NaryOp]]):
194+
def register_nary_op(
195+
self, op_ref: typing.Union[ops.NaryOp, type[ops.NaryOp]], pass_op: bool = False
196+
):
195197
"""
196198
Decorator to register a nary op implementation.
197199
198200
Args:
199201
op_ref (NaryOp or NaryOp type):
200202
Class or instance of operator that is implemented by the decorated function.
203+
pass_op (bool):
204+
Set to true if implementation takes the operator object as the last argument.
205+
This is needed for parameterized ops where parameters are part of op object.
201206
"""
202207
key = typing.cast(str, op_ref.name)
203208

204209
def decorator(impl: typing.Callable[..., ibis_types.Value]):
205210
def normalized_impl(args: typing.Sequence[ibis_types.Value], op: ops.RowOp):
206-
return impl(*args)
211+
if pass_op:
212+
return impl(*args, op=op)
213+
else:
214+
return impl(*args)
207215

208216
self._register(key, normalized_impl)
209217
return impl
@@ -1468,6 +1476,7 @@ def clip_op(
14681476
)
14691477

14701478

1479+
# N-ary Operations
14711480
@scalar_op_compiler.register_nary_op(ops.case_when_op)
14721481
def case_when_op(*cases_and_outputs: ibis_types.Value) -> ibis_types.Value:
14731482
# ibis can handle most type coercions, but we need to force bool -> int
@@ -1487,6 +1496,19 @@ def case_when_op(*cases_and_outputs: ibis_types.Value) -> ibis_types.Value:
14871496
return case_val.end()
14881497

14891498

1499+
@scalar_op_compiler.register_nary_op(ops.NaryRemoteFunctionOp, pass_op=True)
1500+
def nary_remote_function_op_impl(
1501+
*operands: ibis_types.Value, op: ops.NaryRemoteFunctionOp
1502+
):
1503+
ibis_node = getattr(op.func, "ibis_node", None)
1504+
if ibis_node is None:
1505+
raise TypeError(
1506+
f"only a bigframes remote function is supported as a callable. {constants.FEEDBACK_LINK}"
1507+
)
1508+
result = ibis_node(*operands)
1509+
return result
1510+
1511+
14901512
# Helpers
14911513
def is_null(value) -> bool:
14921514
# float NaN/inf should be treated as distinct from 'true' null values

bigframes/core/groupby/__init__.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def __getitem__(
109109
dropna=self._dropna,
110110
)
111111

112-
@validations.requires_strict_ordering()
112+
@validations.requires_ordering()
113113
def head(self, n: int = 5) -> df.DataFrame:
114114
block = self._block
115115
if self._dropna:
@@ -235,25 +235,25 @@ def count(self) -> df.DataFrame:
235235
def nunique(self) -> df.DataFrame:
236236
return self._aggregate_all(agg_ops.nunique_op)
237237

238-
@validations.requires_strict_ordering()
238+
@validations.requires_ordering()
239239
def cumsum(self, *args, numeric_only: bool = False, **kwargs) -> df.DataFrame:
240240
if not numeric_only:
241241
self._raise_on_non_numeric("cumsum")
242242
return self._apply_window_op(agg_ops.sum_op, numeric_only=True)
243243

244-
@validations.requires_strict_ordering()
244+
@validations.requires_ordering()
245245
def cummin(self, *args, numeric_only: bool = False, **kwargs) -> df.DataFrame:
246246
return self._apply_window_op(agg_ops.min_op, numeric_only=numeric_only)
247247

248-
@validations.requires_strict_ordering()
248+
@validations.requires_ordering()
249249
def cummax(self, *args, numeric_only: bool = False, **kwargs) -> df.DataFrame:
250250
return self._apply_window_op(agg_ops.max_op, numeric_only=numeric_only)
251251

252-
@validations.requires_strict_ordering()
252+
@validations.requires_ordering()
253253
def cumprod(self, *args, **kwargs) -> df.DataFrame:
254254
return self._apply_window_op(agg_ops.product_op, numeric_only=True)
255255

256-
@validations.requires_strict_ordering()
256+
@validations.requires_ordering()
257257
def shift(self, periods=1) -> series.Series:
258258
window = window_specs.rows(
259259
grouping_keys=tuple(self._by_col_ids),
@@ -262,7 +262,7 @@ def shift(self, periods=1) -> series.Series:
262262
)
263263
return self._apply_window_op(agg_ops.ShiftOp(periods), window=window)
264264

265-
@validations.requires_strict_ordering()
265+
@validations.requires_ordering()
266266
def diff(self, periods=1) -> series.Series:
267267
window = window_specs.rows(
268268
grouping_keys=tuple(self._by_col_ids),
@@ -271,7 +271,7 @@ def diff(self, periods=1) -> series.Series:
271271
)
272272
return self._apply_window_op(agg_ops.DiffOp(periods), window=window)
273273

274-
@validations.requires_strict_ordering()
274+
@validations.requires_ordering()
275275
def rolling(self, window: int, min_periods=None) -> windows.Window:
276276
# To get n size window, need current row and n-1 preceding rows.
277277
window_spec = window_specs.rows(
@@ -287,7 +287,7 @@ def rolling(self, window: int, min_periods=None) -> windows.Window:
287287
block, window_spec, self._selected_cols, drop_null_groups=self._dropna
288288
)
289289

290-
@validations.requires_strict_ordering()
290+
@validations.requires_ordering()
291291
def expanding(self, min_periods: int = 1) -> windows.Window:
292292
window_spec = window_specs.cumulative_rows(
293293
grouping_keys=tuple(self._by_col_ids),
@@ -532,7 +532,7 @@ def __init__(
532532
def _session(self) -> core.Session:
533533
return self._block.session
534534

535-
@validations.requires_strict_ordering()
535+
@validations.requires_ordering()
536536
def head(self, n: int = 5) -> series.Series:
537537
block = self._block
538538
if self._dropna:
@@ -650,31 +650,31 @@ def agg(self, func=None) -> typing.Union[df.DataFrame, series.Series]:
650650

651651
aggregate = agg
652652

653-
@validations.requires_strict_ordering()
653+
@validations.requires_ordering()
654654
def cumsum(self, *args, **kwargs) -> series.Series:
655655
return self._apply_window_op(
656656
agg_ops.sum_op,
657657
)
658658

659-
@validations.requires_strict_ordering()
659+
@validations.requires_ordering()
660660
def cumprod(self, *args, **kwargs) -> series.Series:
661661
return self._apply_window_op(
662662
agg_ops.product_op,
663663
)
664664

665-
@validations.requires_strict_ordering()
665+
@validations.requires_ordering()
666666
def cummax(self, *args, **kwargs) -> series.Series:
667667
return self._apply_window_op(
668668
agg_ops.max_op,
669669
)
670670

671-
@validations.requires_strict_ordering()
671+
@validations.requires_ordering()
672672
def cummin(self, *args, **kwargs) -> series.Series:
673673
return self._apply_window_op(
674674
agg_ops.min_op,
675675
)
676676

677-
@validations.requires_strict_ordering()
677+
@validations.requires_ordering()
678678
def cumcount(self, *args, **kwargs) -> series.Series:
679679
return (
680680
self._apply_window_op(
@@ -684,7 +684,7 @@ def cumcount(self, *args, **kwargs) -> series.Series:
684684
- 1
685685
)
686686

687-
@validations.requires_strict_ordering()
687+
@validations.requires_ordering()
688688
def shift(self, periods=1) -> series.Series:
689689
"""Shift index by desired number of periods."""
690690
window = window_specs.rows(
@@ -694,7 +694,7 @@ def shift(self, periods=1) -> series.Series:
694694
)
695695
return self._apply_window_op(agg_ops.ShiftOp(periods), window=window)
696696

697-
@validations.requires_strict_ordering()
697+
@validations.requires_ordering()
698698
def diff(self, periods=1) -> series.Series:
699699
window = window_specs.rows(
700700
grouping_keys=tuple(self._by_col_ids),
@@ -703,7 +703,7 @@ def diff(self, periods=1) -> series.Series:
703703
)
704704
return self._apply_window_op(agg_ops.DiffOp(periods), window=window)
705705

706-
@validations.requires_strict_ordering()
706+
@validations.requires_ordering()
707707
def rolling(self, window: int, min_periods=None) -> windows.Window:
708708
# To get n size window, need current row and n-1 preceding rows.
709709
window_spec = window_specs.rows(
@@ -723,7 +723,7 @@ def rolling(self, window: int, min_periods=None) -> windows.Window:
723723
is_series=True,
724724
)
725725

726-
@validations.requires_strict_ordering()
726+
@validations.requires_ordering()
727727
def expanding(self, min_periods: int = 1) -> windows.Window:
728728
window_spec = window_specs.cumulative_rows(
729729
grouping_keys=tuple(self._by_col_ids),

bigframes/core/indexes/base.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def empty(self) -> bool:
184184
return self.shape[0] == 0
185185

186186
@property
187-
@validations.requires_strict_ordering()
187+
@validations.requires_ordering()
188188
def is_monotonic_increasing(self) -> bool:
189189
"""
190190
Return a boolean if the values are equal or increasing.
@@ -198,7 +198,7 @@ def is_monotonic_increasing(self) -> bool:
198198
)
199199

200200
@property
201-
@validations.requires_strict_ordering()
201+
@validations.requires_ordering()
202202
def is_monotonic_decreasing(self) -> bool:
203203
"""
204204
Return a boolean if the values are equal or decreasing.
@@ -348,7 +348,7 @@ def max(self) -> typing.Any:
348348
def min(self) -> typing.Any:
349349
return self._apply_aggregation(agg_ops.min_op)
350350

351-
@validations.requires_strict_ordering()
351+
@validations.requires_ordering()
352352
def argmax(self) -> int:
353353
block, row_nums = self._block.promote_offsets()
354354
block = block.order_by(
@@ -361,7 +361,7 @@ def argmax(self) -> int:
361361

362362
return typing.cast(int, series.Series(block.select_column(row_nums)).iloc[0])
363363

364-
@validations.requires_strict_ordering()
364+
@validations.requires_ordering()
365365
def argmin(self) -> int:
366366
block, row_nums = self._block.promote_offsets()
367367
block = block.order_by(

0 commit comments

Comments
 (0)