googleapis
diff --git a/‎CHANGELOG.md
Lines changed: 29 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 29 additions & 0 deletions
diff --git a/‎bigframes/dataframe.py
Lines changed: 155 additions & 34 deletions b/‎bigframes/dataframe.py
Lines changed: 155 additions & 34 deletions
diff --git a/‎bigframes/version.py
Lines changed: 2 additions & 2 deletions b/‎bigframes/version.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/system/small/test_dataframe.py
Lines changed: 96 additions & 6 deletions b/‎tests/system/small/test_dataframe.py
Lines changed: 96 additions & 6 deletions
@@ -4,6 +4,35 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [2.15.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.14.0...v2.15.0) (2025-08-11)
+
+
+### Features
+
+* Add `st_buffer`, `st_centroid`, and `st_convexhull` and their corresponding GeoSeries methods ([#1963](https://github.com/googleapis/python-bigquery-dataframes/issues/1963)) ([c4c7fa5](https://github.com/googleapis/python-bigquery-dataframes/commit/c4c7fa578e135e7f0e31ad3063db379514957acc))
+* Add first, last support to GroupBy ([#1969](https://github.com/googleapis/python-bigquery-dataframes/issues/1969)) ([41dda88](https://github.com/googleapis/python-bigquery-dataframes/commit/41dda889860c0ed8ca2eab81b34a9d71372c69f7))
+* Add value_counts to GroupBy classes ([#1974](https://github.com/googleapis/python-bigquery-dataframes/issues/1974)) ([82175a4](https://github.com/googleapis/python-bigquery-dataframes/commit/82175a4d0fa41d8aee11efdf8778a21bb70b1c0f))
+* Allow callable as a conditional or replacement input in DataFrame.where ([#1971](https://github.com/googleapis/python-bigquery-dataframes/issues/1971)) ([a8d57d2](https://github.com/googleapis/python-bigquery-dataframes/commit/a8d57d2f7075158eff69ec65a14c232756ab72a6))
+* Can cast locally in hybrid engine ([#1944](https://github.com/googleapis/python-bigquery-dataframes/issues/1944)) ([d9bc4a5](https://github.com/googleapis/python-bigquery-dataframes/commit/d9bc4a5940e9930d5e3c3bfffdadd2f91f96b53b))
+* Df.join lsuffix and rsuffix support ([#1857](https://github.com/googleapis/python-bigquery-dataframes/issues/1857)) ([26515c3](https://github.com/googleapis/python-bigquery-dataframes/commit/26515c34c4f0a5e4602d2f59bf229d41e0fc9196))
+
+
+### Bug Fixes
+
+* Add warnings for duplicated or conflicting type hints in bigfram… ([#1956](https://github.com/googleapis/python-bigquery-dataframes/issues/1956)) ([d38e42c](https://github.com/googleapis/python-bigquery-dataframes/commit/d38e42ce689e65f57223e9a8b14c4262cba08966))
+* Make `remote_function` more robust when there are `create_function` retries ([#1973](https://github.com/googleapis/python-bigquery-dataframes/issues/1973)) ([cd954ac](https://github.com/googleapis/python-bigquery-dataframes/commit/cd954ac07ad5e5820a20b941d3c6cab7cfcc1f29))
+* Make ExecutionMetrics stats tracking more robust to missing stats ([#1977](https://github.com/googleapis/python-bigquery-dataframes/issues/1977)) ([feb3ff4](https://github.com/googleapis/python-bigquery-dataframes/commit/feb3ff4b543eb8acbf6adf335b67a266a1cf4297))
+
+
+### Performance Improvements
+
+* Remove an unnecessary extra `dry_run` query from `read_gbq_table` ([#1972](https://github.com/googleapis/python-bigquery-dataframes/issues/1972)) ([d17b711](https://github.com/googleapis/python-bigquery-dataframes/commit/d17b711750d281ef3efd42c160f3784cd60021ae))
+
+
+### Documentation
+
+* Divide BQ DataFrames quickstart code cell ([#1975](https://github.com/googleapis/python-bigquery-dataframes/issues/1975)) ([fedb8f2](https://github.com/googleapis/python-bigquery-dataframes/commit/fedb8f23120aa315c7e9dd6f1bf1255ccf1ebc48))
+
 ## [2.14.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v2.13.0...v2.14.0) (2025-08-05)
 
 
 
@@ -3520,70 +3520,138 @@ def join(
         *,
         on: Optional[str] = None,
         how: str = "left",
+        lsuffix: str = "",
+        rsuffix: str = "",
     ) -> DataFrame:
         if isinstance(other, bigframes.series.Series):
             other = other.to_frame()
 
         left, right = self, other
 
-        if not left.columns.intersection(right.columns).empty:
-            raise NotImplementedError(
-                f"Deduping column names is not implemented. {constants.FEEDBACK_LINK}"
-            )
+        col_intersection = left.columns.intersection(right.columns)
+
+        if not col_intersection.empty:
+            if lsuffix == rsuffix == "":
+                raise ValueError(
+                    f"columns overlap but no suffix specified: {col_intersection}"
+                )
+
         if how == "cross":
             if on is not None:
                 raise ValueError("'on' is not supported for cross join.")
             result_block = left._block.merge(
                 right._block,
                 left_join_ids=[],
                 right_join_ids=[],
-                suffixes=("", ""),
+                suffixes=(lsuffix, rsuffix),
                 how="cross",
                 sort=True,
             )
             return DataFrame(result_block)
 
         # Join left columns with right index
         if on is not None:
+            if left._has_index and (on in left.index.names):
+                if on in left.columns:
+                    raise ValueError(
+                        f"'{on}' is both an index level and a column label, which is ambiguous."
+                    )
+                else:
+                    raise NotImplementedError(
+                        f"Joining on index level '{on}' is not yet supported. {constants.FEEDBACK_LINK}"
+                    )
+            if (left.columns == on).sum() > 1:
+                raise ValueError(f"The column label '{on}' is not unique.")
+
             if other._block.index.nlevels != 1:
                 raise ValueError(
                     "Join on columns must match the index level of the other DataFrame. Join on column with multi-index haven't been supported."
                 )
-            # Switch left index with on column
-            left_columns = left.columns
-            left_idx_original_names = left.index.names if left._has_index else ()
-            left_idx_names_in_cols = [
-                f"bigframes_left_idx_name_{i}"
-                for i in range(len(left_idx_original_names))
-            ]
-            if left._has_index:
-                left.index.names = left_idx_names_in_cols
-            left = left.reset_index(drop=False)
-            left = left.set_index(on)
-
-            # Join on index and switch back
-            combined_df = left._perform_join_by_index(right, how=how)
-            combined_df.index.name = on
-            combined_df = combined_df.reset_index(drop=False)
-            combined_df = combined_df.set_index(left_idx_names_in_cols)
-
-            # To be consistent with Pandas
-            if combined_df._has_index:
-                combined_df.index.names = (
-                    left_idx_original_names
-                    if how in ("inner", "left")
-                    else ([None] * len(combined_df.index.names))
-                )
 
-            # Reorder columns
-            combined_df = combined_df[list(left_columns) + list(right.columns)]
-            return combined_df
+            return self._join_on_key(
+                other,
+                on=on,
+                how=how,
+                lsuffix=lsuffix,
+                rsuffix=rsuffix,
+                should_duplicate_on_key=(on in col_intersection),
+            )
 
         # Join left index with right index
         if left._block.index.nlevels != right._block.index.nlevels:
             raise ValueError("Index to join on must have the same number of levels.")
 
-        return left._perform_join_by_index(right, how=how)
+        return left._perform_join_by_index(right, how=how)._add_join_suffix(
+            left.columns, right.columns, lsuffix=lsuffix, rsuffix=rsuffix
+        )
+
+    def _join_on_key(
+        self,
+        other: DataFrame,
+        on: str,
+        how: str,
+        lsuffix: str,
+        rsuffix: str,
+        should_duplicate_on_key: bool,
+    ) -> DataFrame:
+        left, right = self.copy(), other
+        # Replace all columns names with unique names for reordering.
+        left_col_original_names = left.columns
+        on_col_name = "bigframes_left_col_on"
+        dup_on_col_name = "bigframes_left_col_on_dup"
+        left_col_temp_names = [
+            f"bigframes_left_col_name_{i}" if col_name != on else on_col_name
+            for i, col_name in enumerate(left_col_original_names)
+        ]
+        left.columns = pandas.Index(left_col_temp_names)
+        # if on column is also in right df, we need to duplicate the column
+        # and set it to be the first column
+        if should_duplicate_on_key:
+            left[dup_on_col_name] = left[on_col_name]
+            on_col_name = dup_on_col_name
+            left_col_temp_names = [on_col_name] + left_col_temp_names
+            left = left[left_col_temp_names]
+
+        # Switch left index with on column
+        left_idx_original_names = left.index.names if left._has_index else ()
+        left_idx_names_in_cols = [
+            f"bigframes_left_idx_name_{i}" for i in range(len(left_idx_original_names))
+        ]
+        if left._has_index:
+            left.index.names = left_idx_names_in_cols
+        left = left.reset_index(drop=False)
+        left = left.set_index(on_col_name)
+
+        right_col_original_names = right.columns
+        right_col_temp_names = [
+            f"bigframes_right_col_name_{i}"
+            for i in range(len(right_col_original_names))
+        ]
+        right.columns = pandas.Index(right_col_temp_names)
+
+        # Join on index and switch back
+        combined_df = left._perform_join_by_index(right, how=how)
+        combined_df.index.name = on_col_name
+        combined_df = combined_df.reset_index(drop=False)
+        combined_df = combined_df.set_index(left_idx_names_in_cols)
+
+        # To be consistent with Pandas
+        if combined_df._has_index:
+            combined_df.index.names = (
+                left_idx_original_names
+                if how in ("inner", "left")
+                else ([None] * len(combined_df.index.names))
+            )
+
+        # Reorder columns
+        combined_df = combined_df[left_col_temp_names + right_col_temp_names]
+        return combined_df._add_join_suffix(
+            left_col_original_names,
+            right_col_original_names,
+            lsuffix=lsuffix,
+            rsuffix=rsuffix,
+            extra_col=on if on_col_name == dup_on_col_name else None,
+        )
 
     def _perform_join_by_index(
         self,
@@ -3597,6 +3665,59 @@ def _perform_join_by_index(
         )
         return DataFrame(block)
 
+    def _add_join_suffix(
+        self,
+        left_columns,
+        right_columns,
+        lsuffix: str = "",
+        rsuffix: str = "",
+        extra_col: typing.Optional[str] = None,
+    ):
+        """Applies suffixes to overlapping column names to mimic a pandas join.
+
+        This method identifies columns that are common to both a "left" and "right"
+        set of columns and renames them using the provided suffixes. Columns that
+        are not in the intersection are kept with their original names.
+
+        Args:
+            left_columns (pandas.Index):
+                The column labels from the left DataFrame.
+            right_columns (pandas.Index):
+                The column labels from the right DataFrame.
+            lsuffix (str):
+                The suffix to apply to overlapping column names from the left side.
+            rsuffix (str):
+                The suffix to apply to overlapping column names from the right side.
+            extra_col (typing.Optional[str]):
+                An optional column name to prepend to the final list of columns.
+                This argument is used specifically to match the behavior of a
+                pandas join. When a join key (i.e., the 'on' column) exists
+                in both the left and right DataFrames, pandas creates two versions
+                of that column: one copy keeps its original name and is placed as
+                the first column, while the other instances receive the normal
+                suffix. Passing the join key's name here replicates that behavior.
+
+        Returns:
+            DataFrame:
+                A new DataFrame with the columns renamed to resolve overlaps.
+        """
+        combined_df = self.copy()
+        col_intersection = left_columns.intersection(right_columns)
+        final_col_names = [] if extra_col is None else [extra_col]
+        for col_name in left_columns:
+            if col_name in col_intersection:
+                final_col_names.append(f"{col_name}{lsuffix}")
+            else:
+                final_col_names.append(col_name)
+
+        for col_name in right_columns:
+            if col_name in col_intersection:
+                final_col_names.append(f"{col_name}{rsuffix}")
+            else:
+                final_col_names.append(col_name)
+        combined_df.columns = pandas.Index(final_col_names)
+        return combined_df
+
     @validations.requires_ordering()
     def rolling(
         self,
 
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "2.14.0"
+__version__ = "2.15.0"
 
 # {x-release-please-start-date}
-__release_date__ = "2025-08-05"
+__release_date__ = "2025-08-11"
 # {x-release-please-end}
@@ -2981,12 +2981,102 @@ def test_join_different_table(
     assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
-def test_join_duplicate_columns_raises_not_implemented(scalars_dfs):
-    scalars_df, _ = scalars_dfs
-    df_a = scalars_df[["string_col", "float64_col"]]
-    df_b = scalars_df[["float64_col"]]
-    with pytest.raises(NotImplementedError):
-        df_a.join(df_b, how="outer").to_pandas()
+@all_joins
+def test_join_different_table_with_duplicate_column_name(
+    scalars_df_index, scalars_pandas_df_index, how
+):
+    bf_df_a = scalars_df_index[["string_col", "int64_col", "int64_too"]].rename(
+        columns={"int64_too": "int64_col"}
+    )
+    bf_df_b = scalars_df_index.dropna()[
+        ["string_col", "int64_col", "int64_too"]
+    ].rename(columns={"int64_too": "int64_col"})
+    bf_result = bf_df_a.join(bf_df_b, how=how, lsuffix="_l", rsuffix="_r").to_pandas()
+    pd_df_a = scalars_pandas_df_index[["string_col", "int64_col", "int64_too"]].rename(
+        columns={"int64_too": "int64_col"}
+    )
+    pd_df_b = scalars_pandas_df_index.dropna()[
+        ["string_col", "int64_col", "int64_too"]
+    ].rename(columns={"int64_too": "int64_col"})
+    pd_result = pd_df_a.join(pd_df_b, how=how, lsuffix="_l", rsuffix="_r")
+
+    # Ensure no inplace changes
+    pd.testing.assert_index_equal(bf_df_a.columns, pd_df_a.columns)
+    pd.testing.assert_index_equal(bf_df_b.index.to_pandas(), pd_df_b.index)
+    pd.testing.assert_frame_equal(bf_result, pd_result, check_index_type=False)
+
+
+@all_joins
+def test_join_param_on_with_duplicate_column_name_not_on_col(
+    scalars_df_index, scalars_pandas_df_index, how
+):
+    # This test is for duplicate column names, but the 'on' column is not duplicated.
+    if how == "cross":
+        return
+    bf_df_a = scalars_df_index[
+        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    bf_df_b = scalars_df_index.dropna()[
+        ["string_col", "datetime_col", "timestamp_col"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    bf_result = bf_df_a.join(
+        bf_df_b, on="int64_too", how=how, lsuffix="_l", rsuffix="_r"
+    ).to_pandas()
+    pd_df_a = scalars_pandas_df_index[
+        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    pd_df_b = scalars_pandas_df_index.dropna()[
+        ["string_col", "datetime_col", "timestamp_col"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    pd_result = pd_df_a.join(
+        pd_df_b, on="int64_too", how=how, lsuffix="_l", rsuffix="_r"
+    )
+    pd.testing.assert_frame_equal(
+        bf_result.sort_index(),
+        pd_result.sort_index(),
+        check_like=True,
+        check_index_type=False,
+        check_names=False,
+    )
+    pd.testing.assert_index_equal(bf_result.columns, pd_result.columns)
+
+
+@pytest.mark.skipif(
+    pandas.__version__.startswith("1."), reason="bad left join in pandas 1.x"
+)
+@all_joins
+def test_join_param_on_with_duplicate_column_name_on_col(
+    scalars_df_index, scalars_pandas_df_index, how
+):
+    # This test is for duplicate column names, and the 'on' column is duplicated.
+    if how == "cross":
+        return
+    bf_df_a = scalars_df_index[
+        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    bf_df_b = scalars_df_index.dropna()[
+        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    bf_result = bf_df_a.join(
+        bf_df_b, on="int64_too", how=how, lsuffix="_l", rsuffix="_r"
+    ).to_pandas()
+    pd_df_a = scalars_pandas_df_index[
+        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    pd_df_b = scalars_pandas_df_index.dropna()[
+        ["string_col", "datetime_col", "timestamp_col", "int64_too"]
+    ].rename(columns={"timestamp_col": "datetime_col"})
+    pd_result = pd_df_a.join(
+        pd_df_b, on="int64_too", how=how, lsuffix="_l", rsuffix="_r"
+    )
+    pd.testing.assert_frame_equal(
+        bf_result.sort_index(),
+        pd_result.sort_index(),
+        check_like=True,
+        check_index_type=False,
+        check_names=False,
+    )
+    pd.testing.assert_index_equal(bf_result.columns, pd_result.columns)
 
 
 @all_joins