googleapis · shuoweil · Jul 24, 2025 · Jul 24, 2025 · Jul 24, 2025 · Jul 25, 2025
@@ -23,6 +23,7 @@
 import pandas as pd
 
 import bigframes
+import bigframes.core.blocks
 import bigframes.display.html
 
 # anywidget and traitlets are optional dependencies. We don't want the import of this
@@ -45,8 +46,10 @@
 
 
 class TableWidget(WIDGET_BASE):
-    """
-    An interactive, paginated table widget for BigFrames DataFrames.
+    """An interactive, paginated table widget for BigFrames DataFrames.
+
+    This widget provides a user-friendly way to display and navigate through
+    large BigQuery DataFrames within a Jupyter environment.
     """
 
     def __init__(self, dataframe: bigframes.dataframe.DataFrame):
@@ -60,32 +63,37 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
                 "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
             )
 
-        super().__init__()
         self._dataframe = dataframe
+        self._initializing = True
+        super().__init__()
 
-        # Initialize attributes that might be needed by observers FIRST
+        # Initialize attributes that might be needed by observers first
         self._table_id = str(uuid.uuid4())
         self._all_data_loaded = False
         self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
         self._cached_batches: List[pd.DataFrame] = []
 
-        # respect display options for initial page size
+        # Respect display options for initial page size
         initial_page_size = bigframes.options.display.max_rows
 
-        # Initialize data fetching attributes.
-        self._batches = dataframe.to_pandas_batches(page_size=initial_page_size)
+        execute_result = dataframe._block.session._executor.execute(
+            dataframe._block.expr,
+            ordered=True,
+            use_explicit_destination=True,
+        )
 
-        # set traitlets properties that trigger observers
-        self.page_size = initial_page_size
+        # The query issued by `to_pandas_batches()` already contains metadata
+        # about how many results there were. Use that to avoid doing an extra
+        # COUNT(*) query that `len(...)` would do.
+        self.row_count = execute_result.total_rows or 0
 
-        # len(dataframe) is expensive, since it will trigger a
-        # SELECT COUNT(*) query. It is a must have however.
-        # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
-        # before we get here so that the count might already be cached.
-        self.row_count = len(dataframe)
+        # Create pandas batches from the ExecuteResult
+        self._batches = execute_result.to_pandas_batches(page_size=initial_page_size)
+
+        self.page_size = initial_page_size
 
-        # get the initial page
         self._set_table_html()
+        self._initializing = False
 
     @functools.cached_property
     def _esm(self):
@@ -167,8 +175,7 @@ def _get_next_batch(self) -> bool:
     @property
     def _batch_iterator(self) -> Iterator[pd.DataFrame]:
         """Lazily initializes and returns the batch iterator."""
-        if self._batch_iter is None:
-            self._batch_iter = iter(self._batches)
+        self._batch_iter = iter(self._batches)
         return self._batch_iter
 
     @property
@@ -180,7 +187,16 @@ def _cached_data(self) -> pd.DataFrame:
 
     def _reset_batches_for_new_page_size(self):
         """Reset the batch iterator when page size changes."""
-        self._batches = self._dataframe.to_pandas_batches(page_size=self.page_size)
+        # Execute with explicit destination for consistency with __init__
+        execute_result = self._dataframe._block.session._executor.execute(
+            self._dataframe._block.expr,
+            ordered=True,
+            use_explicit_destination=True,
+        )
+
+        # Create pandas batches from the ExecuteResult
+        self._batches = execute_result.to_pandas_batches(page_size=self.page_size)
+
         self._cached_batches = []
         self._batch_iter = None
         self._all_data_loaded = False
@@ -210,11 +226,15 @@ def _set_table_html(self):
     @traitlets.observe("page")
     def _page_changed(self, _change: Dict[str, Any]):
         """Handler for when the page number is changed from the frontend."""
+        if self._initializing:
+            return
         self._set_table_html()
 
     @traitlets.observe("page_size")
     def _page_size_changed(self, _change: Dict[str, Any]):
         """Handler for when the page size is changed from the frontend."""
+        if self._initializing:
+            return
         # Reset the page to 0 when page size changes to avoid invalid page states
         self.page = 0
 

@@ -73,18 +73,6 @@
    "id": "f289d250",
    "metadata": {},
    "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "Query job a643d120-4af9-44fc-ba3c-ed461cf1092b is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:a643d120-4af9-44fc-ba3c-ed461cf1092b&page=queryresults\">Open Job</a>"
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -139,15 +127,27 @@
    "id": "ce250157",
    "metadata": {},
    "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 1171b7b3-3f65-4165-a69d-69dad5a100d1 is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:1171b7b3-3f65-4165-a69d-69dad5a100d1&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d2d4ef22ea9f414b89ea5bd85f0e6635",
+       "model_id": "6b70bf0e30a04a3cab11e03b2ed80856",
        "version_major": 2,
        "version_minor": 1
       },
       "text/plain": [
-       "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"table table-striped table-ho…"
+       "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
       ]
      },
      "metadata": {},
@@ -183,6 +183,18 @@
    "id": "6920d49b",
    "metadata": {},
    "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 3100859b-c57c-42fe-a5fb-abb4f2f25db2 is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:3100859b-c57c-42fe-a5fb-abb4f2f25db2&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -193,12 +205,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "121e3d2f28004036a922e3a11a08d4b7",
+       "model_id": "4714b0794f55435a8d3e136517158a5c",
        "version_major": 2,
        "version_minor": 1
       },
       "text/plain": [
-       "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"table table-striped table-ho…"
+       "TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
       ]
      },
      "execution_count": 7,
@@ -273,10 +285,22 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/swast/src/github.com/googleapis/python-bigquery-dataframes/bigframes/core/array_value.py:230: AmbiguousWindowWarning: Window ordering may be ambiguous, this can cause unstable results.\n",
+      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/array_value.py:230: AmbiguousWindowWarning: Window ordering may be ambiguous, this can cause unstable results.\n",
       "  warnings.warn(msg, bfe.AmbiguousWindowWarning)\n"
      ]
     },
+    {
+     "data": {
+      "text/html": [
+       "Query job b4143f15-4bac-44a5-bb29-c5056f95b30b is DONE. 171.4 MB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:b4143f15-4bac-44a5-bb29-c5056f95b30b&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -287,12 +311,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5ed335bbbc064e5391ea06a9a218642e",
+       "model_id": "c70b5611db6b4e6a806a16d0a8287cd3",
        "version_major": 2,
        "version_minor": 1
       },
       "text/plain": [
-       "TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"table table-striped table-hover\" i…"
+       "TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
       ]
      },
      "execution_count": 9,
@@ -307,19 +331,11 @@
     "print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n",
     "small_widget"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c4e5836b-c872-4a9c-b9ec-14f6f338176d",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "venv",
    "language": "python",
    "name": "python3"
   },
@@ -333,7 +349,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.16"
+   "version": "3.10.15"
   }
  },
  "nbformat": 4,

@@ -5366,7 +5366,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "venv",
    "language": "python",
    "name": "python3"
   },
@@ -5380,7 +5380,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.1"
+   "version": "3.10.15"
   }
  },
  "nbformat": 4,

@@ -26,8 +26,14 @@ def aggregate_output(*, project_id, dataset_id, table_id):
     df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
 
     # Simulate getting the first page, since we'll always do that first in the UI.
-    df.shape
-    next(iter(df.to_pandas_batches(page_size=PAGE_SIZE)))
+    execute_result = df._block.session._executor.execute(
+        df._block.expr,
+        ordered=True,
+        use_explicit_destination=True,
+    )
+    assert execute_result.total_rows is not None and execute_result.total_rows >= 0
+    batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
+    next(iter(batches))
 
     # To simulate very small rows that can only fit a boolean,
     # some tables don't have an integer column. If an integer column is available,
@@ -42,9 +48,19 @@ def aggregate_output(*, project_id, dataset_id, table_id):
         .groupby("rounded")
         .sum(numeric_only=True)
     )
-
-    df_aggregated.shape
-    next(iter(df_aggregated.to_pandas_batches(page_size=PAGE_SIZE)))
+    execute_result_aggregated = df_aggregated._block.session._executor.execute(
+        df_aggregated._block.expr,
+        ordered=True,
+        use_explicit_destination=True,
+    )
+    assert (
+        execute_result_aggregated.total_rows is not None
+        and execute_result_aggregated.total_rows >= 0
+    )
+    batches_aggregated = execute_result_aggregated.to_pandas_batches(
+        page_size=PAGE_SIZE
+    )
+    next(iter(batches_aggregated))
 
 
 if __name__ == "__main__":

@@ -31,16 +31,32 @@ def filter_output(
     df = bpd._read_gbq_colab(f"SELECT * FROM `{project_id}`.{dataset_id}.{table_id}")
 
     # Simulate getting the first page, since we'll always do that first in the UI.
-    df.shape
-    next(iter(df.to_pandas_batches(page_size=PAGE_SIZE)))
+    # Force BigQuery execution to get total_rows metadata
+    execute_result = df._block.session._executor.execute(
+        df._block.expr,
+        ordered=True,
+        use_explicit_destination=True,
+    )
+    batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
+    next(iter(batches))
 
     # Simulate the user filtering by a column and visualizing those results
     df_filtered = df[df["col_bool_0"]]
-    rows, _ = df_filtered.shape
+    # Force BigQuery execution for filtered DataFrame to get total_rows metadata
+    execute_result_filtered = df_filtered._block.session._executor.execute(
+        df_filtered._block.expr,
+        ordered=True,
+        use_explicit_destination=True,
+    )
+
+    rows = execute_result_filtered.total_rows or 0
+    assert rows >= 0
+
+    batches_filtered = execute_result_filtered.to_pandas_batches(page_size=PAGE_SIZE)
 
     # It's possible we don't have any pages at all, since we filtered out all
     # matching rows.
-    first_page = next(iter(df_filtered.to_pandas_batches(page_size=PAGE_SIZE)))
+    first_page = next(iter(batches_filtered))
     assert len(first_page.index) <= rows
 
 

@@ -28,8 +28,15 @@ def first_page(*, project_id, dataset_id, table_id):
     )
 
     # Get number of rows (to calculate number of pages) and the first page.
-    df.shape
-    next(iter(df.to_pandas_batches(page_size=PAGE_SIZE)))
+    execute_result = df._block.session._executor.execute(
+        df._block.expr,
+        ordered=True,
+        use_explicit_destination=True,
+    )
+    assert execute_result.total_rows is not None and execute_result.total_rows >= 0
+    batches = execute_result.to_pandas_batches(page_size=PAGE_SIZE)
+    first_page = next(iter(batches))
+    assert first_page is not None
 
 
 if __name__ == "__main__":

@@ -28,8 +28,8 @@ def last_page(*, project_id, dataset_id, table_id):
     )
 
     # Get number of rows (to calculate number of pages) and then all pages.
-    df.shape
-    for _ in df.to_pandas_batches(page_size=PAGE_SIZE):
+    batches = df.to_pandas_batches(page_size=PAGE_SIZE)
+    for _ in batches:
         pass