Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
[![license](https://img.shields.io/github/license/Lyncs-API/lyncs.io?logo=github&logoColor=white)](https://github.com/Lyncs-API/lyncs.io/blob/master/LICENSE)
[![build & test](https://img.shields.io/github/workflow/status/Lyncs-API/lyncs.io/build%20&%20test?logo=github&logoColor=white)](https://github.com/Lyncs-API/lyncs.io/actions)
[![codecov](https://img.shields.io/codecov/c/github/Lyncs-API/lyncs.io?logo=codecov&logoColor=white)](https://codecov.io/gh/Lyncs-API/lyncs.io)
[![pylint](https://img.shields.io/badge/pylint%20score-9.6%2F10-green?logo=python&logoColor=white)](http://pylint.pycqa.org/)
[![pylint](https://img.shields.io/badge/pylint%20score-9.5%2F10-green?logo=python&logoColor=white)](http://pylint.pycqa.org/)
[![black](https://img.shields.io/badge/code%20style-black-000000.svg?logo=codefactor&logoColor=white)](https://github.com/ambv/black)

Lyncs IO offers two high-level functions `load` and `save` (or `dump` as alias of `save`).
Expand Down
71 changes: 65 additions & 6 deletions lyncs_io/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,57 @@

from datetime import datetime
import numpy
from .utils import is_dask_array
from dask.array.core import Array as darr
from torch import Tensor, tensor
from .utils import (
is_dask_array,
is_sparse_matrix,
from_reduced,
in_torch_nn,
layer_to_tensor,
tensor_to_numpy,
check_support,
)
from . import __version__


def get_attrs(data):
def reconstruct_reduced(attrs):
"Reconstructs an object from the tuple returned by __reduce__"
fnc, args, kwargs = attrs
obj = fnc(*args)

if hasattr(obj, "__setstate__"):
obj.__setstate__(kwargs)
else:
obj.__dict__.update(kwargs)

return obj


def get_attrs(data, flag=False):
"""
Returns the list of attributes needed for reconstructing a data object
"""
return {
_dict = {
"_lyncs_io": __version__,
"created": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"type": repr(type(data)),
}

_dict["type"] = type(data) if flag else _dict["type"]

if _dict["type"] not in (Tensor, numpy.ndarray, darr, type(None)):

if hasattr(data, "__reduce__"):
return data.__reduce__()
if hasattr(data, "__getstate__"):
return _dict["type"], data.__getstate__()

# No need for __dict__:
# "If the method is absent, the instance’s __dict__ is pickled as usual"

return _dict


def get_array_attrs(data):
"Returns attributes of an array"
Expand All @@ -39,6 +76,16 @@ def _to_array(data):
"Converts data to array"
if is_dask_array(data):
return data

if is_sparse_matrix(data):
return data.toarray()

if in_torch_nn(data):
return tensor_to_numpy(layer_to_tensor(data))

if isinstance(data, Tensor):
return tensor_to_numpy(data)

return numpy.array(data)


Expand All @@ -47,9 +94,14 @@ def to_array(data):
Converts a data object to array. Returns also the list of attributes
needed for reconstructing it.
"""
attrs = get_attrs(data)
check_support(data)

attrs = get_attrs(data, flag=True)
data = _to_array(data)
attrs.update(get_array_attrs(data))

if isinstance(attrs, dict):
attrs.update(get_array_attrs(data))

return data, attrs


Expand Down Expand Up @@ -80,5 +132,12 @@ def from_array(data, attrs=None):
"""
Converts array to a data object. Undoes to_array.
"""
# TODO

if from_reduced(attrs):
return reconstruct_reduced(attrs)

if isinstance(attrs, dict):
if attrs["type"] == Tensor:
return tensor(data)

return data
67 changes: 67 additions & 0 deletions lyncs_io/traverse_reduced.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import numpy
import torch
from collections import OrderedDict
from typing import Iterable
from torch.nn import Conv1d
from pprint import pprint


class Dummy(list):
pass


def from_dummy(ds):
for i, elt in enumerate(ds):
if isinstance(elt, Dummy):
ds[i] = from_dummy(elt)
if isinstance(ds, Dummy):
ds = tuple(ds)
return ds


def to_dummy(ds):
if isinstance(ds, tuple):
ds = Dummy(ds)
for i, elt in enumerate(ds):
if isinstance(elt, tuple):
ds[i] = to_dummy(elt)
return ds


def gen():
num = 0
while True:
yield num
num += 1


gen = gen()
global_dict = {}


def fnc(s, reverse=False, gen=gen):
global global_dict

if isinstance(s, (dict, OrderedDict)):
x = {key: fnc(value, reverse) for key, value in s.items()}
return OrderedDict(x) if isinstance(s, OrderedDict) else x
elif isinstance(s, (list, tuple)):
x = [fnc(e, reverse) for e in s]
return tuple(x) if isinstance(s, tuple) else x
elif isinstance(s, torch.nn.Parameter):
placeholder_no = str(next(gen))
global_dict["placeholder" + placeholder_no] = s
return "placeholder" + str(placeholder_no)
elif isinstance(s, str) and reverse and s in global_dict.keys():
return global_dict[s]
return s


c = Conv1d(4, 4, 3)
reduced = c.__reduce__()

result = fnc(reduced)
after = fnc(result, reverse=True)

test = reduced == after and reduced != result
print(test)
94 changes: 94 additions & 0 deletions lyncs_io/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,23 @@
from functools import wraps
from pathlib import Path
from os.path import splitext
from inspect import getmembers
from collections import defaultdict
from warnings import warn
import torch.nn
from pandas import DataFrame
from numpy import ndarray
from torch import Tensor
from lyncs_utils.io import FileLike
from scipy.sparse import (
csc_matrix,
csr_matrix,
coo_matrix,
bsr_matrix,
dia_matrix,
dok_matrix,
lil_matrix,
)


def find_file(filename):
Expand Down Expand Up @@ -54,6 +69,85 @@ def is_dask_array(obj):
return False


""" !!!!!!!!!!! """


def check_support(obj):
"Checks whether the object's type is supported"
if not (
is_sparse_matrix(obj)
or is_dask_array(obj)
or in_torch_nn(obj)
or isinstance(obj, (ndarray, DataFrame, Tensor, type(None)))
):
raise TypeError(f"{obj} {type(obj)} is not supported yet")


def in_torch_nn(obj):
"Checks if an object belongs in the torch.nn module (Layers)"
members = tuple([m[1] for m in getmembers(torch.nn) if isinstance(m[1], type)])
return isinstance(obj, members)


def layer_to_tensor(layer):
"Converts a torch layer to a tensor"
_, _, kwargs = layer.__reduce__()
params = kwargs["_parameters"]
items = list(params.items())
param = items[0][1]
return param[:]


def layers_are_equal(layer1, layer2):
"Compare two layers. Using double equals is inappropriate"
return layer1.__reduce__() == layer2.__reduce__()


def tensor_to_numpy(tensor):
"Converts a tensor to a numpy array"
return tensor.detach().numpy()


def is_sparse_matrix(obj):
"Check whether an object is a sparse matrix"
return isinstance(
obj,
(
csc_matrix,
csr_matrix,
coo_matrix,
bsr_matrix,
dia_matrix,
dok_matrix,
lil_matrix,
),
)


def from_state(attrs):
"Check whether an object matches the tuple's format returned by __getstate__"
return (
isinstance(attrs, tuple)
and len(attrs) == 2
and callable(attrs[0])
and isinstance(type(attrs[1]), dict)
)


def from_reduced(attrs):
"Returns whether an object matches the tuple's format returned by __reduce__"
return (
isinstance(attrs, tuple)
and len(attrs) == 3
and callable(attrs[0])
and isinstance(attrs[1], tuple)
and isinstance(attrs[2], dict)
)


""" !!!!!!!!!!! """


def swap(fnc):
"Returns a wrapper that swaps the first two arguments of the function"
return wraps(fnc)(
Expand Down
80 changes: 80 additions & 0 deletions test/serial/test_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from lyncs_io.convert import to_array, from_array
from torch.nn import Conv1d
from torch import Tensor
import numpy as np
import dask.array as da
from pandas import DataFrame
from scipy import sparse
from lyncs_io.utils import layers_are_equal


def test_to_from_array():

# TODO: [x] sparse matrices
# TODO: [x] ndarrays
# TODO: [x] built-ins
# TODO: [x] dask
# TODO: [x] torch
# TODO: [x] Dataframes

# ??
# TODO: [ ] keras
# TODO: [ ] tensorflow

# Test DataFrames
df = DataFrame({"A": [1, 2], "B": [3, 4]})
arr, attrs = to_array(df)
new_df = from_array(arr, attrs)

assert (arr == np.array(df)).all()
assert isinstance(new_df, type(df))
assert (df.all() == new_df.all()).all()

# Test sparse matrices
formats = ["csr", "csc", "coo", "bsr", "dia", "dok", "lil"]

for f in formats:
matrix = sparse.random(4, 4, format=f)
arr, attrs = to_array(matrix)
new_m = from_array(arr, attrs)

# TODO:

assert (arr == matrix.toarray()).all()
assert isinstance(new_m, type(matrix))
assert (matrix != new_m).nnz == 0
assert np.allclose(matrix.A, new_m.A)

# "For dense arrays >>> np.allclose
# is a good way of testing equality.
# And if the sparse arrays aren't too large, that might be good as well"

# Test ndarrays
ndarr = np.random.rand(2, 2)
arr, attrs = to_array(ndarr)
new_ndarr = from_array(arr, attrs)
assert (arr == np.array(ndarr)).all()
assert (ndarr == new_ndarr).all()
assert isinstance(new_ndarr, type(ndarr))

# Test dask
darr = da.random.random((10, 10))
arr, attrs = to_array(darr)
new_darr = from_array(arr, attrs)
assert (arr == np.array(darr)).all()
assert (darr == new_darr).all()
assert isinstance(new_darr, type(darr))

conv1d = Conv1d(4, 4, 3)
arr, attrs = to_array(conv1d)
new_conv = from_array(arr, attrs)
# assert numpy array
assert isinstance(arr, np.ndarray)
assert layers_are_equal(conv1d, new_conv)

tensor = Tensor(4, 4, 3)
arr, attrs = to_array(tensor)
new_tens = from_array(arr, attrs)
# assert numpy array
assert isinstance(arr, np.ndarray)
assert (tensor == new_tens).all()
Loading