diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py index 2dac5b056aba0..63ce25ec5f2b2 100644 --- a/python/pyspark/pandas/generic.py +++ b/python/pyspark/pandas/generic.py @@ -24,6 +24,7 @@ from typing import ( Any, Callable, + Dict, Iterable, IO, List, @@ -905,6 +906,9 @@ def to_json( .. note:: output JSON format is different from pandas'. It always use `orient='records'` for its output. This behaviour might have to change in the near future. + .. note:: Set `ignoreNullFields` keyword argument to `True` to omit `None` or `NaN` values + when writing JSON objects. It works only when `path` is provided. + Note NaN's and None will be converted to null and datetime objects will be converted to UNIX timestamps. @@ -981,6 +985,9 @@ def to_json( if "options" in options and isinstance(options.get("options"), dict) and len(options) == 1: options = options.get("options") + default_options: Dict[str, Any] = {"ignoreNullFields": False} + options = {**default_options, **options} + if not lines: raise NotImplementedError("lines=False is not implemented yet.")