Skip to content

Commit 55d9750

Browse files
vlasenkoalexeyyongtang
authored andcommitted
More changes for BigQuery connector (#490)
* Fixing Dockerfile * Returning dataset in a form of Dictionary from BigQuery connector * Adding NULL fields support to BigQuery connector * python style tweak * more style tweaks * Style tweaks, comming from google account
1 parent ea53711 commit 55d9750

File tree

3 files changed

+22
-3
lines changed

3 files changed

+22
-3
lines changed

tensorflow_io/bigquery/kernels/bigquery_lib.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,9 @@ class BigQueryReaderDatasetIterator : public DatasetIterator<Dataset> {
184184
case avro::AVRO_ENUM:
185185
dtype = DT_STRING;
186186
break;
187+
case avro::AVRO_NULL:
188+
dtype = output_types[i];
189+
break;
187190
default:
188191
return errors::InvalidArgument("unsupported data type: ",
189192
field.type());
@@ -250,6 +253,8 @@ class BigQueryReaderDatasetIterator : public DatasetIterator<Dataset> {
250253
((*out_tensors)[i]).scalar<string>()() =
251254
field.value<avro::GenericEnum>().symbol();
252255
break;
256+
case avro::AVRO_NULL: // Fallthrough;
257+
break;
253258
default:
254259
return errors::InvalidArgument("unsupported data type: ",
255260
field.type());

tensorflow_io/bigquery/python/ops/bigquery_api.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727
from __future__ import division
2828
from __future__ import print_function
2929

30+
import collections
31+
from operator import itemgetter
32+
3033
from tensorflow.python.data.experimental.ops import interleave_ops
3134
from tensorflow.python.data.ops import dataset_ops
3235
from tensorflow.python.framework import dtypes
@@ -223,8 +226,19 @@ class _BigQueryDataset(dataset_ops.DatasetSource):
223226

224227
def __init__(self, client_resource, selected_fields, output_types,
225228
avro_schema, stream):
226-
self._element_spec = tuple(
227-
tensor_spec.TensorSpec([], dtype) for dtype in output_types)
229+
230+
# selected_fields and corresponding output_types have to be sorted because
231+
# of b/141251314
232+
sorted_fields_with_types = sorted(
233+
zip(selected_fields, output_types),
234+
key=itemgetter(0))
235+
selected_fields, output_types = list(zip(*sorted_fields_with_types))
236+
selected_fields = list(selected_fields)
237+
output_types = list(output_types)
238+
239+
self._element_spec = collections.OrderedDict(zip(
240+
selected_fields,
241+
(tensor_spec.TensorSpec([], dtype) for dtype in output_types)))
228242

229243
variant_tensor = _bigquery_so.big_query_dataset(
230244
client=client_resource,

tools/dev/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ RUN /bin/bash -c "source activate tfio-dev && python -m pip install \
4949
pyarrow==${ARROW_VERSION} \
5050
pandas \
5151
fastavro \
52-
gast==0.2.2
52+
gast==0.2.2 \
5353
${PIP_ADD_PACKAGES} \
5454
"
5555

0 commit comments

Comments
 (0)