larray-project
diff --git a/‎doc/source/changes/version_0_34.rst.inc‎
Lines changed: 2 additions & 1 deletion b/‎doc/source/changes/version_0_34.rst.inc‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎larray/core/array.py‎
Lines changed: 13 additions & 12 deletions b/‎larray/core/array.py‎
Lines changed: 13 additions & 12 deletions
diff --git a/‎larray/core/session.py‎
Lines changed: 19 additions & 9 deletions b/‎larray/core/session.py‎
Lines changed: 19 additions & 9 deletions
diff --git a/‎larray/inout/common.py‎
Lines changed: 13 additions & 8 deletions b/‎larray/inout/common.py‎
Lines changed: 13 additions & 8 deletions
diff --git a/‎larray/inout/csv.py‎
Lines changed: 22 additions & 24 deletions b/‎larray/inout/csv.py‎
Lines changed: 22 additions & 24 deletions
diff --git a/‎larray/inout/excel.py‎
Lines changed: 2 additions & 4 deletions b/‎larray/inout/excel.py‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎larray/inout/hdf.py‎
Lines changed: 5 additions & 1 deletion b/‎larray/inout/hdf.py‎
Lines changed: 5 additions & 1 deletion
@@ -49,7 +49,8 @@ New features
 Miscellaneous improvements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-* improved something.
+* made all I/O functions/methods/constructors to accept either a string or a pathlib.Path object
+  for all arguments representing a path (closes :issue:`896`).
 
 
 Fixes
 
@@ -28,8 +28,8 @@
 from collections import OrderedDict
 from itertools import product, chain, groupby
 from collections.abc import Iterable, Sequence
+from pathlib import Path
 import builtins
-import os
 import functools
 import warnings
 
@@ -6891,7 +6891,7 @@ def to_csv(self, filepath, sep=',', na_rep='', wide=True, value_name='value', dr
 
         Parameters
         ----------
-        filepath : str
+        filepath : str or Path
             path where the csv file has to be written.
         sep : str, optional
             separator for the csv file. Defaults to `,`.
@@ -6912,8 +6912,8 @@ def to_csv(self, filepath, sep=',', na_rep='', wide=True, value_name='value', dr
 
         Examples
         --------
-        >>> tmpdir = getfixture('tmpdir')
-        >>> fname = os.path.join(tmpdir.strpath, 'test.csv')
+        >>> tmp_path = getfixture('tmp_path')
+        >>> fname = tmp_path / 'test.csv'
         >>> a = ndtest('nat=BE,FO;sex=M,F')
         >>> a
         nat\sex  M  F
@@ -6965,7 +6965,7 @@ def to_hdf(self, filepath, key) -> None:
 
         Parameters
         ----------
-        filepath : str
+        filepath : str or Path
             Path where the hdf file has to be written.
         key : str or Group
             Key (path) of the array within the HDF file (see Notes below).
@@ -7037,7 +7037,7 @@ def to_excel(self, filepath=None, sheet=None, position='A1', overwrite_file=Fals
 
         Parameters
         ----------
-        filepath : str or int or None, optional
+        filepath : str or Path or int or None, optional
             Path where the excel file has to be written. If None (default), creates a new Excel Workbook in a live Excel
             instance (Windows only). Use -1 to use the currently active Excel Workbook. Use a name without extension
             (.xlsx) to use any unsaved* workbook.
@@ -7092,19 +7092,20 @@ def to_excel(self, filepath=None, sheet=None, position='A1', overwrite_file=Fals
         if engine is None:
             engine = 'xlwings' if xw is not None else None
 
+        if isinstance(filepath, str):
+            filepath = Path(filepath)
+
         if engine == 'xlwings':
             from larray.inout.xw_excel import open_excel
 
             close = False
             new_workbook = False
             if filepath is None:
                 new_workbook = True
-            elif isinstance(filepath, str):
-                basename, ext = os.path.splitext(filepath)
-                if ext:
-                    if not os.path.isfile(filepath):
-                        new_workbook = True
-                    close = True
+            elif isinstance(filepath, Path) and filepath.suffix:
+                if not filepath.is_file():
+                    new_workbook = True
+                close = True
             if new_workbook or overwrite_file:
                 new_workbook = overwrite_file = True
 
 
@@ -1,10 +1,10 @@
-import os
 import sys
 import re
 import fnmatch
 import warnings
 from collections import OrderedDict
 from collections.abc import Iterable
+from pathlib import Path
 
 import numpy as np
 
@@ -87,7 +87,7 @@ def __init__(self, *args, **kwargs):
         if len(args) == 1:
             assert len(kwargs) == 0
             a0 = args[0]
-            if isinstance(a0, str):
+            if isinstance(a0, (str, Path)):
                 # assume a0 is a filename
                 self.load(a0)
             else:
@@ -356,7 +356,7 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
 
         Parameters
         ----------
-        fname : str
+        fname : str or Path
             This can be either the path to a single file, a path to a directory containing .csv files or a pattern
             representing several .csv files.
         names : list of str, optional
@@ -427,13 +427,18 @@ def load(self, fname, names=None, engine='auto', display=False, **kwargs):
         if display:
             print("opening", fname)
         if fname is None:
-            if all([os.path.splitext(name)[1] == '.csv' for name in names]):
+            if all([Path(name).suffix == '.csv' for name in names]):
                 engine = ext_default_engine['csv']
             else:
                 raise ValueError(f"List of paths to only CSV files expected. Got {names}")
+        elif isinstance(fname, str):
+            fname = Path(fname)
+        if not isinstance(fname, Path):
+            raise TypeError(f"Expected a string or a Path object for the 'fname' argument. "
+                            f"Got object of type '{type(fname).__name__}' instead.")
         if engine == 'auto':
-            _, ext = os.path.splitext(fname)
-            ext = ext.strip('.') if '.' in ext else 'csv'
+            ext = fname.suffix
+            ext = ext.strip('.') if ext else 'csv'
             engine = ext_default_engine[ext]
         handler_cls = get_file_handler(engine)
         if engine == 'pandas_csv' and 'sep' in kwargs:
@@ -455,7 +460,7 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
 
         Parameters
         ----------
-        fname : str
+        fname : str or Path
             Path of the file for the dump.
             If objects are saved in CSV files, the path corresponds to a directory.
         names : list of str or None, optional
@@ -515,9 +520,14 @@ def save(self, fname, names=None, engine='auto', overwrite=True, display=False,
         dumping arr1 ... done
         dumping arr4 ... done
         """
+        if isinstance(fname, str):
+            fname = Path(fname)
+        if not isinstance(fname, Path):
+            raise TypeError(f"Expected a string or a Path object for the 'fname' argument. "
+                            f"Got object of type '{type(fname).__name__}' instead.")
         if engine == 'auto':
-            _, ext = os.path.splitext(fname)
-            ext = ext.strip('.') if '.' in ext else 'csv'
+            ext = fname.suffix
+            ext = ext.strip('.') if ext else 'csv'
             engine = ext_default_engine[ext]
         handler_cls = get_file_handler(engine)
         if engine == 'pandas_csv' and 'sep' in kwargs:
 
@@ -1,8 +1,9 @@
 import os
 from datetime import date, time, datetime
 from collections import OrderedDict
+from pathlib import Path
 
-from typing import List, Tuple
+from typing import Optional, Union, List, Tuple
 
 from larray.core.axis import Axis
 from larray.core.group import Group
@@ -41,15 +42,20 @@ class FileHandler:
 
     Parameters
     ----------
-    fname : str
+    fname : str or Path or None
         Filename.
 
     Attributes
     ----------
-    fname : str
+    fname : Path
         Filename.
     """
-    def __init__(self, fname, overwrite_file=False):
+    def __init__(self, fname: Optional[Union[str, Path]], overwrite_file: bool = False):
+        if isinstance(fname, str):
+            fname = Path(fname)
+        if fname is not None and not isinstance(fname, Path):
+            raise TypeError(f"Expected a string or a pathlib.Path object for the 'fname' argument. "
+                            f"Got an object of type {type(fname).__name__} instead.")
         self.fname = fname
         self.original_file_name = None
         self.overwrite_file = overwrite_file
@@ -96,13 +102,12 @@ def close(self):
         raise NotImplementedError()
 
     def _get_original_file_name(self):
-        if self.overwrite_file and os.path.isfile(self.fname):
+        if self.overwrite_file and self.fname.is_file():
             self.original_file_name = self.fname
-            fname, ext = os.path.splitext(self.fname)
-            self.fname = f'{fname}~{ext}'
+            self.fname = self.fname.parent / (self.fname.stem + '~' + self.fname.suffix)
 
     def _update_original_file(self):
-        if self.original_file_name is not None and os.path.isfile(self.fname):
+        if self.original_file_name is not None and self.fname.is_file():
             os.remove(self.original_file_name)
             os.rename(self.fname, self.original_file_name)
 
 
@@ -1,7 +1,7 @@
 import os
 import csv
 import warnings
-from glob import glob
+from pathlib import Path
 
 import pandas as pd
 import numpy as np
@@ -83,7 +83,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
     Examples
     --------
     >>> csv_dir = get_example_filepath('examples')
-    >>> fname = csv_dir + '/population.csv'
+    >>> fname = csv_dir / 'population.csv'
 
     >>> # The data below is derived from a subset of the demo_pjan table from Eurostat
     >>> read_csv(fname)
@@ -97,7 +97,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
 
     Missing label combinations
 
-    >>> fname = csv_dir + '/population_missing_values.csv'
+    >>> fname = csv_dir / 'population_missing_values.csv'
     >>> # let's take a look inside the CSV file.
     >>> # they are missing label combinations: (Paris, male) and (New York, female)
     >>> with open(fname) as f:
@@ -129,7 +129,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
 
     Specify the number of axes of the output array (useful when the name of the last axis is implicit)
 
-    >>> fname = csv_dir + '/population_missing_axis_name.csv'
+    >>> fname = csv_dir / 'population_missing_axis_name.csv'
     >>> # let's take a look inside the CSV file.
     >>> # The name of the last axis is missing.
     >>> with open(fname) as f:
@@ -164,7 +164,7 @@ def read_csv(filepath_or_buffer, nb_axes=None, index_col=None, sep=',', headerse
 
     Read array saved in "narrow" format (wide=False)
 
-    >>> fname = csv_dir + '/population_narrow_format.csv'
+    >>> fname = csv_dir / 'population_narrow_format.csv'
     >>> # let's take a look inside the CSV file.
     >>> # Here, data are stored in a 'narrow' format.
     >>> with open(fname) as f:
@@ -260,46 +260,44 @@ def __init__(self, fname, overwrite_file=False, sep=','):
         self.sep = sep
         self.axes = None
         self.groups = None
-        if fname is None:
+        if self.fname is None:
             self.pattern = None
             self.directory = None
-        elif '.csv' in fname or '*' in fname or '?' in fname:
-            self.pattern = fname
-            self.directory = os.path.dirname(fname)
+        elif self.fname.suffix == '.csv' or '*' in self.fname.name or '?' in self.fname.name:
+            self.pattern = self.fname.name
+            self.directory = fname.parent
         else:
             # assume fname is a directory.
-            # Not testing for os.path.isdir(fname) here because when writing, the directory might not exist.
-            self.pattern = os.path.join(fname, '*.csv')
-            self.directory = fname
+            # Not testing for fname.is_dir() here because when writing, the directory might not exist.
+            self.pattern = '*.csv'
+            self.directory = self.fname
 
     def _get_original_file_name(self):
         pass
 
-    def _to_filepath(self, key: str) -> str:
+    def _to_filepath(self, key) -> Path:
         if self.directory is not None:
-            return os.path.join(self.directory, f'{key}.csv')
+            return self.directory / f'{key}.csv'
         else:
-            return key
+            return Path(key)
 
     def _open_for_read(self):
-        if self.directory and not os.path.isdir(self.directory):
+        if self.directory and not self.directory.is_dir():
             raise ValueError(f"Directory '{self.directory}' does not exist")
 
     def _open_for_write(self):
         if self.directory is not None:
             try:
                 os.makedirs(self.directory)
             except OSError:
-                if not os.path.isdir(self.directory):
+                if not self.directory.is_dir():
                     raise ValueError(f"Path {self.directory} must represent a directory")
 
     def list_items(self) -> List[Tuple[str, str]]:
-        fnames = glob(self.pattern) if self.pattern is not None else []
-        # drop directory
-        fnames = [os.path.basename(fname) for fname in fnames]
-        # strip extension from files
-        # XXX: unsure we should use sorted here
-        fnames = sorted([os.path.splitext(fname)[0] for fname in fnames])
+        fnames = self.directory.glob(self.pattern) if self.pattern is not None else []
+        # stem = filename without extension
+        # FIXME : not sure sorted is required here
+        fnames = sorted([fname.stem for fname in fnames])
         return [(name, 'Array') for name in fnames if name != '__metadata__']
 
     def _read_item(self, key, type, *args, **kwargs) -> Array:
@@ -316,7 +314,7 @@ def _dump_item(self, key, value, *args, **kwargs):
 
     def _read_metadata(self) -> Metadata:
         filepath = self._to_filepath('__metadata__')
-        if os.path.isfile(filepath):
+        if filepath.is_file():
             meta = read_csv(filepath, wide=False)
             return Metadata.from_array(meta)
         else:
 
@@ -1,5 +1,4 @@
 import warnings
-import os
 
 import numpy as np
 import pandas as pd
@@ -39,7 +38,7 @@ def read_excel(filepath, sheet=0, nb_axes=None, index_col=None, fill_value=nan,
 
     Parameters
     ----------
-    filepath : str
+    filepath : str or Path
         Path where the Excel file has to be read or use -1 to refer to the currently active workbook.
     sheet : str, Group or int, optional
         Name or index of the Excel sheet containing the array to be read.
@@ -241,8 +240,7 @@ def _open_for_read(self):
         self.handle = pd.ExcelFile(self.fname)
 
     def _open_for_write(self):
-        _, ext = os.path.splitext(self.fname)
-        engine = 'xlsxwriter' if ext == '.xlsx' and xlsxwriter is not None else None
+        engine = 'xlsxwriter' if (self.fname.suffix == '.xlsx' and xlsxwriter is not None) else None
         self.handle = pd.ExcelWriter(self.fname, engine=engine)
 
     def list_items(self) -> List[Tuple[str, str]]:
 
@@ -31,7 +31,7 @@ def read_hdf(filepath_or_buffer, key, fill_value=nan, na=nan, sort_rows=False, s
 
     Parameters
     ----------
-    filepath_or_buffer : str or pandas.HDFStore
+    filepath_or_buffer : str or Path or pandas.HDFStore
         Path and name where the HDF5 file is stored or a HDFStore object.
     key : str or Group
         Name of the scalar or axis or group or array.
@@ -133,6 +133,10 @@ class PandasHDFHandler(FileHandler):
     r"""
     Handler for HDF5 files using Pandas.
     """
+    def __init__(self, fname, overwrite_file=False):
+        assert fname is not None
+        super(PandasHDFHandler, self).__init__(fname, overwrite_file)
+
     def _open_for_read(self):
         self.handle = HDFStore(self.fname, mode='r')