3030
3131@pd .api .extensions .register_extension_dtype
3232class JSONDtype (pd .api .extensions .ExtensionDtype ):
33- """Extension dtype for JSON data."""
33+ """Extension dtype for BigQuery JSON data."""
3434
3535 name = "dbjson"
3636
3737 @property
3838 def na_value (self ) -> pd .NA :
39+ """Default NA value to use for this type."""
3940 return pd .NA
4041
4142 @property
4243 def type (self ) -> type [str ]:
44+ """Return the scalar type for the array, e.g. int."""
4345 return dict
4446
4547 @property
@@ -62,7 +64,9 @@ def __from_arrow__(array: typing.Union[pa.Array, pa.ChunkedArray]) -> JSONArray:
6264
6365
6466class JSONArray (ArrowExtensionArray ):
65- """Extension array containing JSON data."""
67+ """Extension array that handles BigQuery JSON data, leveraging a string-based
68+ pyarrow array for storage. It enables seamless conversion to JSON objects when
69+ accessing individual elements."""
6670
6771 _dtype = JSONDtype ()
6872
@@ -88,18 +92,7 @@ def __init__(self, values, dtype=None, copy=False) -> None:
8892 def _box_pa (
8993 cls , value , pa_type : pa .DataType | None = None
9094 ) -> pa .Array | pa .ChunkedArray | pa .Scalar :
91- """
92- Box value into a pyarrow Array, ChunkedArray or Scalar.
93-
94- Parameters
95- ----------
96- value : any
97- pa_type : pa.DataType | None
98-
99- Returns
100- -------
101- pa.Array or pa.ChunkedArray or pa.Scalar
102- """
95+ """Box value into a pyarrow Array, ChunkedArray or Scalar."""
10396 if isinstance (value , pa .Scalar ) or not (
10497 is_list_like (value ) and not is_dict_like (value )
10598 ):
@@ -108,18 +101,7 @@ def _box_pa(
108101
109102 @classmethod
110103 def _box_pa_scalar (cls , value , pa_type : pa .DataType | None = None ) -> pa .Scalar :
111- """
112- Box value into a pyarrow Scalar.
113-
114- Parameters
115- ----------
116- value : any
117- pa_type : pa.DataType | None
118-
119- Returns
120- -------
121- pa.Scalar
122- """
104+ """Box value into a pyarrow Scalar."""
123105 value = JSONArray ._seralizate_json (value )
124106 pa_scalar = super ()._box_pa_scalar (value , pa_type )
125107 if pa .types .is_string (pa_scalar .type ) and pa_type is None :
@@ -130,18 +112,7 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
130112 def _box_pa_array (
131113 cls , value , pa_type : pa .DataType | None = None , copy : bool = False
132114 ) -> pa .Array | pa .ChunkedArray :
133- """
134- Box value into a pyarrow Array or ChunkedArray.
135-
136- Parameters
137- ----------
138- value : Sequence
139- pa_type : pa.DataType | None
140-
141- Returns
142- -------
143- pa.Array or pa.ChunkedArray
144- """
115+ """Box value into a pyarrow Array or ChunkedArray."""
145116 if (
146117 not isinstance (value , cls )
147118 and not isinstance (value , (pa .Array , pa .ChunkedArray ))
@@ -155,18 +126,7 @@ def _box_pa_array(
155126
156127 @classmethod
157128 def _from_sequence (cls , scalars , * , dtype = None , copy = False ):
158- # TODO: check _from_arrow APIs etc.
159- # from pandas.core.arrays.masked import BaseMaskedArray
160-
161- # if isinstance(scalars, BaseMaskedArray):
162- # # avoid costly conversion to object dtype in ensure_string_array and
163- # # numerical issues with Float32Dtype
164- # na_values = scalars._mask
165- # result = scalars._data
166- # # result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
167- # return cls(pa.array(result, mask=na_values, type=pa.large_string()))
168- # elif isinstance(scalars, (pa.Array, pa.ChunkedArray)):
169- # return cls(pc.cast(scalars, pa.large_string()))
129+ """Construct a new ExtensionArray from a sequence of scalars."""
170130 result = []
171131 for scalar in scalars :
172132 result .append (JSONArray ._seralizate_json (scalar ))
@@ -176,10 +136,12 @@ def _from_sequence(cls, scalars, *, dtype=None, copy=False):
176136 def _from_sequence_of_strings (
177137 cls , strings , * , dtype : ExtensionDtype , copy : bool = False
178138 ) -> JSONArray :
139+ """Construct a new ExtensionArray from a sequence of strings."""
179140 return cls ._from_sequence (strings , dtype = dtype , copy = copy )
180141
181142 @staticmethod
182143 def _seralizate_json (value ):
144+ """A static method that converts a JSON value into a string representation."""
183145 if isinstance (value , str ) or pd .isna (value ):
184146 return value
185147 else :
@@ -189,6 +151,7 @@ def _seralizate_json(value):
189151
190152 @staticmethod
191153 def _deserialize_json (value ):
154+ """A static method that converts a JSON string back into its original value."""
192155 if not pd .isna (value ):
193156 return json .loads (value )
194157 else :
@@ -200,40 +163,24 @@ def dtype(self) -> JSONDtype:
200163 return self ._dtype
201164
202165 def __contains__ (self , key ) -> bool :
166+ """Return for `item in self`."""
203167 return super ().__contains__ (JSONArray ._seralizate_json (key ))
204168
205169 def insert (self , loc : int , item ) -> JSONArray :
170+ """
171+ Make new ExtensionArray inserting new item at location. Follows Python
172+ list.append semantics for negative values.
173+ """
206174 val = JSONArray ._seralizate_json (item )
207175 return super ().insert (loc , val )
208176
209177 @classmethod
210178 def _from_factorized (cls , values , original ):
179+ """Reconstruct an ExtensionArray after factorization."""
211180 return cls ._from_sequence (values , dtype = original .dtype )
212181
213182 def __getitem__ (self , item ):
214- """Select a subset of self.
215-
216- Parameters
217- ----------
218- item : int, slice, or ndarray
219- * int: The position in 'self' to get.
220- * slice: A slice object, where 'start', 'stop', and 'step' are
221- integers or None
222- * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
223-
224- Returns
225- -------
226- item : scalar or ExtensionArray
227-
228- Notes
229- -----
230- For scalar ``item``, return a scalar value suitable for the array's
231- type. This should be an instance of ``self.dtype.type``.
232- For slice ``key``, return an instance of ``ExtensionArray``, even
233- if the slice is length 0 or 1.
234- For a boolean mask, return an instance of ``ExtensionArray``, filtered
235- to the values where ``item`` is True.
236- """
183+ """Select a subset of self."""
237184 item = check_array_indexer (self , item )
238185
239186 if isinstance (item , np .ndarray ):
@@ -283,37 +230,17 @@ def __getitem__(self, item):
283230 return scalar
284231
285232 def __iter__ (self ):
286- """
287- Iterate over elements of the array.
288- """
233+ """Iterate over elements of the array."""
289234 for value in self ._pa_array :
290235 val = JSONArray ._deserialize_json (value .as_py ())
291236 if val is None :
292237 yield self ._dtype .na_value
293238 else :
294239 yield val
295240
296- @classmethod
297- def _result_converter (cls , values , na = None ):
298- return pd .BooleanDtype ().__from_arrow__ (values )
299-
300241 @classmethod
301242 def _concat_same_type (cls , to_concat ) -> JSONArray :
302- """
303- Concatenate multiple JSONArray.
304-
305- Parameters
306- ----------
307- to_concat : sequence of JSONArray
308-
309- Returns
310- -------
311- JSONArray
312- """
243+ """Concatenate multiple JSONArray."""
313244 chunks = [array for ea in to_concat for array in ea ._pa_array .iterchunks ()]
314245 arr = pa .chunked_array (chunks , type = pa .large_string ())
315246 return cls (arr )
316-
317- def _pad_or_backfill (self , * , method , limit = None , copy = True ):
318- # GH#56616 - test EA method without limit_area argument
319- return super ()._pad_or_backfill (method = method , limit = limit , copy = copy )
0 commit comments