Lyncs-API · alexandrosangeli · Aug 17, 2021 · Aug 17, 2021 · Aug 17, 2021 · Aug 17, 2021
diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@
 [![license](https://img.shields.io/github/license/Lyncs-API/lyncs.io?logo=github&logoColor=white)](https://github.com/Lyncs-API/lyncs.io/blob/master/LICENSE)
 [![build & test](https://img.shields.io/github/workflow/status/Lyncs-API/lyncs.io/build%20&%20test?logo=github&logoColor=white)](https://github.com/Lyncs-API/lyncs.io/actions)
 [![codecov](https://img.shields.io/codecov/c/github/Lyncs-API/lyncs.io?logo=codecov&logoColor=white)](https://codecov.io/gh/Lyncs-API/lyncs.io)
-[![pylint](https://img.shields.io/badge/pylint%20score-9.6%2F10-green?logo=python&logoColor=white)](http://pylint.pycqa.org/)
+[![pylint](https://img.shields.io/badge/pylint%20score-9.5%2F10-green?logo=python&logoColor=white)](http://pylint.pycqa.org/)
 [![black](https://img.shields.io/badge/code%20style-black-000000.svg?logo=codefactor&logoColor=white)](https://github.com/ambv/black)
 
 Lyncs IO offers two high-level functions `load` and `save` (or `dump` as alias of `save`).

diff --git a/lyncs_io/convert.py b/lyncs_io/convert.py
@@ -5,20 +5,57 @@
 
 from datetime import datetime
 import numpy
-from .utils import is_dask_array
+from dask.array.core import Array as darr
+from torch import Tensor, tensor
+from .utils import (
+    is_dask_array,
+    is_sparse_matrix,
+    from_reduced,
+    in_torch_nn,
+    layer_to_tensor,
+    tensor_to_numpy,
+    check_support,
+)
 from . import __version__
 
 
-def get_attrs(data):
+def reconstruct_reduced(attrs):
+    "Reconstructs an object from the tuple returned by __reduce__"
+    fnc, args, kwargs = attrs
+    obj = fnc(*args)
+
+    if hasattr(obj, "__setstate__"):
+        obj.__setstate__(kwargs)
+    else:
+        obj.__dict__.update(kwargs)
+
+    return obj
+
+
+def get_attrs(data, flag=False):
     """
     Returns the list of attributes needed for reconstructing a data object
     """
-    return {
+    _dict = {
         "_lyncs_io": __version__,
         "created": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
         "type": repr(type(data)),
     }
 
+    _dict["type"] = type(data) if flag else _dict["type"]
+
+    if _dict["type"] not in (Tensor, numpy.ndarray, darr, type(None)):
+
+        if hasattr(data, "__reduce__"):
+            return data.__reduce__()
+        if hasattr(data, "__getstate__"):
+            return _dict["type"], data.__getstate__()
+
+        # No need for __dict__:
+        # "If the method is absent, the instance’s __dict__ is pickled as usual"
+
+    return _dict
+
 
 def get_array_attrs(data):
     "Returns attributes of an array"
@@ -39,6 +76,16 @@ def _to_array(data):
     "Converts data to array"
     if is_dask_array(data):
         return data
+
+    if is_sparse_matrix(data):
+        return data.toarray()
+
+    if in_torch_nn(data):
+        return tensor_to_numpy(layer_to_tensor(data))
+
+    if isinstance(data, Tensor):
+        return tensor_to_numpy(data)
+
     return numpy.array(data)
 
 
@@ -47,9 +94,14 @@ def to_array(data):
     Converts a data object to array. Returns also the list of attributes
     needed for reconstructing it.
     """
-    attrs = get_attrs(data)
+    check_support(data)
+
+    attrs = get_attrs(data, flag=True)
     data = _to_array(data)
-    attrs.update(get_array_attrs(data))
+
+    if isinstance(attrs, dict):
+        attrs.update(get_array_attrs(data))
+
     return data, attrs
 
 
@@ -80,5 +132,12 @@ def from_array(data, attrs=None):
     """
     Converts array to a data object. Undoes to_array.
     """
-    # TODO
+
+    if from_reduced(attrs):
+        return reconstruct_reduced(attrs)
+
+    if isinstance(attrs, dict):
+        if attrs["type"] == Tensor:
+            return tensor(data)
+
     return data
diff --git a/lyncs_io/traverse_reduced.py b/lyncs_io/traverse_reduced.py
@@ -0,0 +1,67 @@
+import numpy
+import torch
+from collections import OrderedDict
+from typing import Iterable
+from torch.nn import Conv1d
+from pprint import pprint
+
+
+class Dummy(list):
+    pass
+
+
+def from_dummy(ds):
+    for i, elt in enumerate(ds):
+        if isinstance(elt, Dummy):
+            ds[i] = from_dummy(elt)
+    if isinstance(ds, Dummy):
+        ds = tuple(ds)
+    return ds
+
+
+def to_dummy(ds):
+    if isinstance(ds, tuple):
+        ds = Dummy(ds)
+    for i, elt in enumerate(ds):
+        if isinstance(elt, tuple):
+            ds[i] = to_dummy(elt)
+    return ds
+
+
+def gen():
+    num = 0
+    while True:
+        yield num
+        num += 1
+
+
+gen = gen()
+global_dict = {}
+
+
+def fnc(s, reverse=False, gen=gen):
+    global global_dict
+
+    if isinstance(s, (dict, OrderedDict)):
+        x = {key: fnc(value, reverse) for key, value in s.items()}
+        return OrderedDict(x) if isinstance(s, OrderedDict) else x
+    elif isinstance(s, (list, tuple)):
+        x = [fnc(e, reverse) for e in s]
+        return tuple(x) if isinstance(s, tuple) else x
+    elif isinstance(s, torch.nn.Parameter):
+        placeholder_no = str(next(gen))
+        global_dict["placeholder" + placeholder_no] = s
+        return "placeholder" + str(placeholder_no)
+    elif isinstance(s, str) and reverse and s in global_dict.keys():
+        return global_dict[s]
+    return s
+
+
+c = Conv1d(4, 4, 3)
+reduced = c.__reduce__()
+
+result = fnc(reduced)
+after = fnc(result, reverse=True)
+
+test = reduced == after and reduced != result
+print(test)
diff --git a/lyncs_io/utils.py b/lyncs_io/utils.py
@@ -5,8 +5,23 @@
 from functools import wraps
 from pathlib import Path
 from os.path import splitext
+from inspect import getmembers
 from collections import defaultdict
+from warnings import warn
+import torch.nn
+from pandas import DataFrame
+from numpy import ndarray
+from torch import Tensor
 from lyncs_utils.io import FileLike
+from scipy.sparse import (
+    csc_matrix,
+    csr_matrix,
+    coo_matrix,
+    bsr_matrix,
+    dia_matrix,
+    dok_matrix,
+    lil_matrix,
+)
 
 
 def find_file(filename):
@@ -54,6 +69,85 @@ def is_dask_array(obj):
         return False
 
 
+""" !!!!!!!!!!! """
+
+
+def check_support(obj):
+    "Checks whether the object's type is supported"
+    if not (
+        is_sparse_matrix(obj)
+        or is_dask_array(obj)
+        or in_torch_nn(obj)
+        or isinstance(obj, (ndarray, DataFrame, Tensor, type(None)))
+    ):
+        raise TypeError(f"{obj} {type(obj)} is not supported yet")
+
+
+def in_torch_nn(obj):
+    "Checks if an object belongs in the torch.nn module (Layers)"
+    members = tuple([m[1] for m in getmembers(torch.nn) if isinstance(m[1], type)])
+    return isinstance(obj, members)
+
+
+def layer_to_tensor(layer):
+    "Converts a torch layer to a tensor"
+    _, _, kwargs = layer.__reduce__()
+    params = kwargs["_parameters"]
+    items = list(params.items())
+    param = items[0][1]
+    return param[:]
+
+
+def layers_are_equal(layer1, layer2):
+    "Compare two layers. Using double equals is inappropriate"
+    return layer1.__reduce__() == layer2.__reduce__()
+
+
+def tensor_to_numpy(tensor):
+    "Converts a tensor to a numpy array"
+    return tensor.detach().numpy()
+
+
+def is_sparse_matrix(obj):
+    "Check whether an object is a sparse matrix"
+    return isinstance(
+        obj,
+        (
+            csc_matrix,
+            csr_matrix,
+            coo_matrix,
+            bsr_matrix,
+            dia_matrix,
+            dok_matrix,
+            lil_matrix,
+        ),
+    )
+
+
+def from_state(attrs):
+    "Check whether an object matches the tuple's format returned by __getstate__"
+    return (
+        isinstance(attrs, tuple)
+        and len(attrs) == 2
+        and callable(attrs[0])
+        and isinstance(type(attrs[1]), dict)
+    )
+
+
+def from_reduced(attrs):
+    "Returns whether an object matches the tuple's format returned by __reduce__"
+    return (
+        isinstance(attrs, tuple)
+        and len(attrs) == 3
+        and callable(attrs[0])
+        and isinstance(attrs[1], tuple)
+        and isinstance(attrs[2], dict)
+    )
+
+
+""" !!!!!!!!!!! """
+
+
 def swap(fnc):
     "Returns a wrapper that swaps the first two arguments of the function"
     return wraps(fnc)(

diff --git a/test/serial/test_convert.py b/test/serial/test_convert.py
@@ -0,0 +1,80 @@
+from lyncs_io.convert import to_array, from_array
+from torch.nn import Conv1d
+from torch import Tensor
+import numpy as np
+import dask.array as da
+from pandas import DataFrame
+from scipy import sparse
+from lyncs_io.utils import layers_are_equal
+
+
+def test_to_from_array():
+
+    # TODO: [x] sparse matrices
+    # TODO: [x] ndarrays
+    # TODO: [x] built-ins
+    # TODO: [x] dask
+    # TODO: [x] torch
+    # TODO: [x] Dataframes
+
+    # ??
+    # TODO: [ ] keras
+    # TODO: [ ] tensorflow
+
+    # Test DataFrames
+    df = DataFrame({"A": [1, 2], "B": [3, 4]})
+    arr, attrs = to_array(df)
+    new_df = from_array(arr, attrs)
+
+    assert (arr == np.array(df)).all()
+    assert isinstance(new_df, type(df))
+    assert (df.all() == new_df.all()).all()
+
+    # Test sparse matrices
+    formats = ["csr", "csc", "coo", "bsr", "dia", "dok", "lil"]
+
+    for f in formats:
+        matrix = sparse.random(4, 4, format=f)
+        arr, attrs = to_array(matrix)
+        new_m = from_array(arr, attrs)
+
+        # TODO:
+
+        assert (arr == matrix.toarray()).all()
+        assert isinstance(new_m, type(matrix))
+        assert (matrix != new_m).nnz == 0
+        assert np.allclose(matrix.A, new_m.A)
+
+        # "For dense arrays >>> np.allclose
+        # is a good way of testing equality.
+        # And if the sparse arrays aren't too large, that might be good as well"
+
+    # Test ndarrays
+    ndarr = np.random.rand(2, 2)
+    arr, attrs = to_array(ndarr)
+    new_ndarr = from_array(arr, attrs)
+    assert (arr == np.array(ndarr)).all()
+    assert (ndarr == new_ndarr).all()
+    assert isinstance(new_ndarr, type(ndarr))
+
+    # Test dask
+    darr = da.random.random((10, 10))
+    arr, attrs = to_array(darr)
+    new_darr = from_array(arr, attrs)
+    assert (arr == np.array(darr)).all()
+    assert (darr == new_darr).all()
+    assert isinstance(new_darr, type(darr))
+
+    conv1d = Conv1d(4, 4, 3)
+    arr, attrs = to_array(conv1d)
+    new_conv = from_array(arr, attrs)
+    # assert numpy array
+    assert isinstance(arr, np.ndarray)
+    assert layers_are_equal(conv1d, new_conv)
+
+    tensor = Tensor(4, 4, 3)
+    arr, attrs = to_array(tensor)
+    new_tens = from_array(arr, attrs)
+    # assert numpy array
+    assert isinstance(arr, np.ndarray)
+    assert (tensor == new_tens).all()