From 1bfc14202485d0496e0b4d88dc0e22c98ed3cb89 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Tue, 4 Apr 2017 19:18:42 -0700 Subject: [PATCH] utils._http: Fix bytes -> str for py3 - http response content is a bytes, but downstream code expects it to be a str - A previous commit (14ce30b2) had fixed the type mismatch in the successful code path (`json.loads`) but not in the error code path (`RequestException(..., content)`) Test plan: - Run simple query to test success path: - `bq.Query('select 3').to_dataframe()` - Before: returns dataframe with `3` - After: same behavior - Run failing query that triggers the http error path: - e.g. `bq.Query('select * from table').to_dataframe()` where table is big and triggers error `Response too large to return` - Before: ``` Traceback (most recent call last): File "/Users/danb/hack/pydatalab/datalab/utils/_http.py", line 46, in __init__ error = json.loads(content)['error'] File "/Users/danb/miniconda3/envs/dwh/lib/python3.5/json/__init__.py", line 312, in loads s.__class__.__name__)) TypeError: the JSON object must be str, not 'bytes' During handling of the above exception, another exception occurred: Traceback (most recent call last): File "", line 1, in File "/Users/danb/hack/pydatalab/datalab/bigquery/_query.py", line 322, in to_dataframe return self.results(use_cache=use_cache, dialect=dialect, billing_tier=billing_tier) \ File "/Users/danb/hack/pydatalab/datalab/bigquery/_query.py", line 228, in results self.execute(use_cache=use_cache, dialect=dialect, billing_tier=billing_tier) File "/Users/danb/hack/pydatalab/datalab/bigquery/_query.py", line 528, in execute self._results = job.wait() File "/Users/danb/hack/pydatalab/datalab/bigquery/_query_job.py", line 84, in wait raise e File "/Users/danb/hack/pydatalab/datalab/bigquery/_query_job.py", line 82, in wait timeout=poll * 1000) File "/Users/danb/hack/pydatalab/datalab/bigquery/_api.py", line 237, in jobs_query_results return datalab.utils.Http.request(url, args=args, credentials=self._credentials) File "/Users/danb/hack/pydatalab/datalab/utils/_http.py", line 154, in request raise RequestException(response.status, content) File "/Users/danb/hack/pydatalab/datalab/utils/_http.py", line 51, in __init__ lines = content.split('\n') if isinstance(content, basestring) else [] TypeError: a bytes-like object is required, not 'str' ``` - After: ``` Traceback (most recent call last): File "", line 1, in File "/Users/danb/hack/pydatalab/datalab/bigquery/_query.py", line 322, in to_dataframe return self.results(use_cache=use_cache, dialect=dialect, billing_tier=billing_tier) \ File "/Users/danb/hack/pydatalab/datalab/bigquery/_query.py", line 228, in results self.execute(use_cache=use_cache, dialect=dialect, billing_tier=billing_tier) File "/Users/danb/hack/pydatalab/datalab/bigquery/_query.py", line 528, in execute self._results = job.wait() File "/Users/danb/hack/pydatalab/datalab/bigquery/_query_job.py", line 84, in wait raise e File "/Users/danb/hack/pydatalab/datalab/bigquery/_query_job.py", line 82, in wait timeout=poll * 1000) File "/Users/danb/hack/pydatalab/datalab/bigquery/_api.py", line 237, in jobs_query_results return datalab.utils.Http.request(url, args=args, credentials=self._credentials) File "/Users/danb/hack/pydatalab/datalab/utils/_http.py", line 153, in request raise RequestException(response.status, content) datalab.utils._http.RequestException: HTTP request failed: Response too large to return. Consider setting allowLargeResults to true in your job configuration. For more information, see https://cloud.google.com/bigquery/troubleshooting-errors ``` --- datalab/utils/_http.py | 5 ++--- google/datalab/utils/_http.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/datalab/utils/_http.py b/datalab/utils/_http.py index b545c9748..aa986cd97 100644 --- a/datalab/utils/_http.py +++ b/datalab/utils/_http.py @@ -143,13 +143,12 @@ def request(url, args=None, data=None, headers=None, method=None, method=method, body=data, headers=headers) + content = content.decode() if 200 <= response.status < 300: if raw_response: return content - if type(content) == str: - return json.loads(content) else: - return json.loads(str(content, encoding='UTF-8')) + return json.loads(content) else: raise RequestException(response.status, content) except ValueError: diff --git a/google/datalab/utils/_http.py b/google/datalab/utils/_http.py index 0254c96fc..115034e68 100644 --- a/google/datalab/utils/_http.py +++ b/google/datalab/utils/_http.py @@ -143,13 +143,12 @@ def request(url, args=None, data=None, headers=None, method=None, method=method, body=data, headers=headers) + content = content.decode() if 200 <= response.status < 300: if raw_response: return content - if type(content) == str: - return json.loads(content) else: - return json.loads(str(content, encoding='UTF-8')) + return json.loads(content) else: raise RequestException(response.status, content) except ValueError: