google · copybara-service · Jun 17, 2025
diff --git a/src/google/adk/sessions/vertex_ai_session_service.py b/src/google/adk/sessions/vertex_ai_session_service.py
@@ -15,13 +15,15 @@
 
 import asyncio
 import logging
+import os
 import re
 from typing import Any
 from typing import Dict
 from typing import Optional
 import urllib.parse
 
 from dateutil import parser
+from google.genai.errors import ClientError
 from typing_extensions import override
 
 from google import genai
@@ -93,24 +95,47 @@ async def create_session(
     operation_id = api_response['name'].split('/')[-1]
 
     max_retry_attempt = 5
-    lro_response = None
-    while max_retry_attempt >= 0:
-      lro_response = await api_client.async_request(
-          http_method='GET',
-          path=f'operations/{operation_id}',
-          request_dict={},
-      )
-
-      if lro_response.get('done', None):
-        break
-
-      await asyncio.sleep(1)
-      max_retry_attempt -= 1
 
-    if lro_response is None or not lro_response.get('done', None):
-      raise TimeoutError(
-          f'Timeout waiting for operation {operation_id} to complete.'
-      )
+    if _is_vertex_express_mode(self._project, self._location):
+      # Express mode doesn't support LRO, so we need to poll
+      # the session resource.
+      # TODO: remove this once LRO polling is supported in Express mode.
+      while max_retry_attempt >= 0:
+        try:
+          await api_client.async_request(
+              http_method='GET',
+              path=(
+                  f'reasoningEngines/{reasoning_engine_id}/sessions/{session_id}'
+              ),
+              request_dict={},
+          )
+          break
+        except ClientError as e:
+          logger.info('Polling for session %s: %s', session_id, e)
+          await asyncio.sleep(1)
+          max_retry_attempt -= 1
+          continue
+      if max_retry_attempt < 0:
+        raise TimeoutError('Session creation failed.')
+    else:
+      lro_response = None
+      while max_retry_attempt >= 0:
+        lro_response = await api_client.async_request(
+            http_method='GET',
+            path=f'operations/{operation_id}',
+            request_dict={},
+        )
+
+        if lro_response.get('done', None):
+          break
+
+        await asyncio.sleep(1)
+        max_retry_attempt -= 1
+
+      if lro_response is None or not lro_response.get('done', None):
+        raise TimeoutError(
+            f'Timeout waiting for operation {operation_id} to complete.'
+        )
 
     # Get session resource
     get_session_api_response = await api_client.async_request(
@@ -300,9 +325,24 @@ def _get_api_client(self):
     client = genai.Client(
         vertexai=True, project=self._project, location=self._location
     )
+    client._api_client._http_options.base_url = (
+        'https://staging-aiplatform.sandbox.googleapis.com'
+    )
     return client._api_client
 
 
+def _is_vertex_express_mode(
+    project: Optional[str], location: Optional[str]
+) -> bool:
+  """Check if Vertex AI and API key are both enabled, meaning the user is using the Vertex Express Mode."""
+  return (
+      os.environ.get('GOOGLE_GENAI_USE_VERTEXAI', '0').lower() in ['true', '1']
+      and os.environ.get('GOOGLE_API_KEY', None) is not None
+      and project is None
+      and location is None
+  )
+
+
 def _convert_event_to_json(event: Event) -> Dict[str, Any]:
   metadata_json = {
       'partial': event.partial,