From f37dd0a0c680b07b7a96ea013a88ad97df7d90ef Mon Sep 17 00:00:00 2001 From: Simon Nick Date: Tue, 13 Mar 2018 17:24:07 +0100 Subject: [PATCH] Enhancements for network class --- pathpy/classes/network.py | 509 +++++++++++++++++++++----------------- 1 file changed, 279 insertions(+), 230 deletions(-) diff --git a/pathpy/classes/network.py b/pathpy/classes/network.py index b14bbc7..68fc206 100644 --- a/pathpy/classes/network.py +++ b/pathpy/classes/network.py @@ -43,7 +43,7 @@ def __init__(self, directed=False): Generates an empty network. """ - self.directed = directed + self._directed = directed # A dictionary containing nodes as well as node properties self.nodes = _co.defaultdict(dict) @@ -60,151 +60,117 @@ def __init__(self, directed=False): # A dictionary containing the sets of predecessors of all nodes self.predecessors = _co.defaultdict(set) - @classmethod - def read_edges(cls, filename, separator=',', weighted=False, directed=False): - """ - Reads a network from an edge list file - Reads data from a file containing multiple lines of *edges* of the - form "v,w,frequency,X" (where frequency is optional and X are - arbitrary additional columns). The default separating character ',' - can be changed. In order to calculate the statistics of paths of any length, - by default all subpaths of length 0 (i.e. single nodes) contained in an edge - will be considered. - - Parameters - ---------- - filename : str - path to edgelist file - separator : str - character separating the nodes - weighted : bool - is a weight given? if ``True`` it is the last element in the edge - (i.e. ``a,b,2``) - directed : bool - are the edges directed or undirected - - Returns - ------- - Network - a ``Network`` object obtained from the edgelist - """ - n = cls(directed) - - with open(filename, 'r') as f: - Log.add('Reading edge list ... ') - for n, line in enumerate(f): - fields = line.rstrip().split(separator) - assert len(fields) >= 2, 'Error: malformed line: {0}'.format(line) + @property + def directed(self): + """ Returns True if network is a directed network, False otherwise. """ + return self._directed - if weighted: - n.add_edge(fields[0], fields[1], weight=int(fields[2])) - else: - n.add_edge(fields[0], fields[1]) - - Log.add('finished.') + @property + def vcount(self): + """ Returns the number of nodes of the network. """ + return len(self.nodes) - return n + @property + def ecount(self): + """ Returns the number of edges of the network. """ + return len(self.edges) + @property + def weighted(self): + """ Returns True if the multiplex is a weighted network, False otherwise. """ + for e in self.edges: + if self.edges[e]['weight'] != 1.0: + return True + return False - @classmethod - def from_sqlite(cls, cursor, directed=True): - """Reads links from an SQLite cursor and returns a new instance of - the class Network. The cursor is assumed to refer to a table that - minimally has two columns - - source target + @property + def total_edge_weight(self): + """ Returns the sum of all edge weights """ + return sum([self.edges[e]['weight'] for e in self.edges]) - and where each row refers to a link. Any additional columns will be used as - edge properties + def out_edges(self, v): + """ + Returns a list of all out-going edges of a node. - Important: Since columns are accessed by name this function requires that a - row factory object is set for the SQLite connection prior to cursor creation, - i.e. you should set + @param v: The node - connection.row_factory = sqlite3.Row + @return: List of tuples (source, target) + """ + return [(v,w) for w in self.successors[v]] - Parameters - ---------- - cursor: - The SQLite cursor to fetch rows - directed: bool + def in_edges(self, w): + """ + Returns a list of all in-going edges of a node. - Returns - ------- + @param v: The node + @return: List of tuples (source, target) """ - n = cls(directed=directed) + return [(v,w) for v in self.predecessors[w]] + + def degree(self, v): + """ Returns the sum of in- and out-degree of a node """ + return self.in_degree(v) + self.out_degree(v) + + def weight(self, v): + """ Returns the sum of in- and out-weight of a node """ + return self.in_weight(v) + self.out_weight(v) - assert cursor.connection.row_factory, \ - 'Cannot access columns by name. Please set ' \ - 'connection.row_factory = sqlite3.Row before creating DB cursor.' + def in_degree(self, v): + """ Returns the in-degree of a node """ + return len(self.predecessors[v]) - Log.add('Retrieving links from database ...') + def out_degree(self, v): + """ Returns the out-degree of a node """ + return len(self.successors[v]) - for row in cursor: - n.add_edge(str(row['source']), str(row['target'])) + def in_weight(self, v): + """ Returns the in-weight of a node """ + return sum([self.edges[e]['weight'] for e in self.in_edges(v)]) - return n + def out_weight(self, v): + """ Returns the out-weight of a node """ + return sum([self.edges[e]['weight'] for e in self.out_edges(v)]) def add_node(self, v, **kwargs): """ - Adds a node to a network + Adds a node to the network """ if v not in self.nodes: self.nodes[v] = {**self.nodes[v], **kwargs} - # set default values if not set already - if 'inweight' not in self.nodes[v]: - self.nodes[v]['inweight'] = 0.0 - if 'outweight' not in self.nodes[v]: - self.nodes[v]['outweight'] = 0.0 - if self.directed: - self.nodes[v]['indegree'] = 0 - self.nodes[v]['outdegree'] = 0 - else: - self.nodes[v]['degree'] = 0 def remove_node(self, v): """ - Removes a node from the network + Removes a node from the network. """ if v in self.nodes: - # remove all incident edges and update neighbors - if not self.directed: - for w in self.successors[v]: - edge = (v, w) - self.nodes[w]['degree'] -= 1 - self.nodes[w]['inweight'] -= self.edges[edge]['weight'] - self.nodes[w]['outweight'] -= self.edges[edge]['weight'] - self.successors[w].remove(v) - self.predecessors[w].remove(v) - del self.edges[edge] - else: - for w in self.successors[v]: - self.nodes[w]['indegree'] -= 1 - self.nodes[w]['inweight'] -= self.edges[(v, w)]['weight'] - self.predecessors[w].remove(v) - del self.edges[(v, w)] - for w in self.predecessors[v]: - self.nodes[w]['outdegree'] -= 1 - self.nodes[w]['outweight'] -= self.edges[(w, v)]['weight'] - self.successors[w].remove(v) - del self.edges[(w, v)] + + # remove all edges that contain the node + for e in self.out_edges(v): + self.remove_edge(*e) + + if self.directed: + for e in self.in_edges(v): + self.remove_edge(*e) + + # remove node itself del self.nodes[v] - del self.successors[v] - del self.predecessors[v] def add_edge(self, v, w, **kwargs): """ - Adds an edge to a network + Adds an edge to the network + + @param v: Source node + @param w: Target node """ self.add_node(v) self.add_node(w) - e = (v, w) + e = (v,w) self.edges[e] = {**self.edges[e], **kwargs} @@ -218,43 +184,26 @@ def add_edge(self, v, w, **kwargs): self.successors[w].add(v) self.predecessors[v].add(w) - # update degrees and node weights - if not self.directed: - # update degree, in- and outweight - self.nodes[v]['degree'] = len(self.successors[v]) - self.nodes[w]['degree'] = len(self.successors[w]) - - S = [self.edges[e]['weight'] for w in self.successors[v]] - if S: - self.nodes[v]['outweight'] = sum(S) - self.nodes[v]['inweight'] = self.nodes[v]['outweight'] - S = [self.edges[e]['weight'] for v in self.successors[w]] - if S: - self.nodes[w]['outweight'] = sum(S) - self.nodes[w]['inweight'] = self.nodes[w]['outweight'] - else: - self.nodes[v]['outdegree'] = len(self.successors[v]) - self.nodes[v]['indegree'] = len(self.predecessors[v]) - self.nodes[w]['outdegree'] = len(self.successors[w]) - self.nodes[w]['indegree'] = len(self.predecessors[w]) - - # Note: Weights will be 0 for nodes with empty successors or predecessors. This is a - # problem for higher-order networks, where the zero weight is assumed to be a vector - # (0,0), Not updating weights in this case will ensure that we keep the initial value - # of weights - - S = [self.edges[(v, x)]['weight'] for x in self.successors[v]] - if S: - self.nodes[v]['outweight'] = sum(S) - S = [self.edges[(x, v)]['weight'] for x in self.predecessors[v]] - if S: - self.nodes[v]['inweight'] = sum(S) - S = [self.edges[(w, x)]['weight'] for x in self.successors[w]] - if S: - self.nodes[w]['outweight'] = sum(S) - S = [self.edges[(x, w)]['weight'] for x in self.predecessors[w]] - if S: - self.nodes[w]['inweight'] = sum(S) + + def remove_edge(self, v, w): + """ + Removes an edge from the network + + @param v: Source node + @param w: Target node + """ + + if (v,w) in self.edges: + + # remove nodes from successors and predecessors + self.successors[v].remove(w) + self.predecessors[w].remove(v) + if not self.directed: + self.successors[w].remove(v) + self.predecessors[v].remove(w) + + # delete edge itself + del self.edges[(v,w)] def find_nodes(self, select_node=lambda v: True): @@ -263,31 +212,65 @@ def find_nodes(self, select_node=lambda v: True): """ return [n for n in self.nodes if select_node(self.nodes[n])] + def find_edges(self, select_nodes=lambda v, w: True, select_edges=lambda e: True): """ Returns all edges that satisfy a given condition. Edges can be selected based on attributes of the adjacent nodes as well as attributes of the edge """ - return [e for e in self.edges if (select_nodes(self.nodes[e[0]], self.nodes[e[1]]) and select_edges(self.edges[e]))] + if self.directed: + return [e for e in self.edges if (select_nodes(self.nodes[e[0]], self.nodes[e[1]]) and select_edges(self.edges[e]))] + else: + return [e for e in self.edges if ((select_nodes(self.nodes[e[0]], self.nodes[e[1]]) or select_nodes(self.nodes[e[1]], self.nodes[e[0]])) and select_edges(self.edges[e]))] + + + def to_directed(self, mutual=True): + """ + Converts an undirected network to directed - def vcount(self): - """ Returns the number of nodes """ - return len(self.nodes) + @param mutual: True if mutual directed edges should be created for every undirected edge. If False, a directed edge with arbitrary direction is created. + """ + if self.directed is False: - def ecount(self): - """ Returns the number of links """ - return len(self.edges) + oldEdges = self.edges + + # reset edges and successors / predecessors list + self.edges = _co.defaultdict(dict) + self.successors = _co.defaultdict(set) + self.predecessors = _co.defaultdict(set) + + for e in oldEdges: + self.add_edge(*e, **oldEdges[e]) + if mutual: + self.add_edge(*reversed(e), **oldEdges[e]) + + self._directed = True + + + def to_undirected(self): + """ + Converts a directed multiplex to undirected. If there exists a pair of links between two nodes then only the attributes of one of these links is used for the new undirected link. + """ + if self.directed is True: + + oldEdges = self.edges + + # reset edges and successors / predecessors list + self.edges = UnorderedDict() + self.successors = _co.defaultdict(set) + self.predecessors = _co.defaultdict(set) + + for e in oldEdges: + self.add_edge(*e, **oldEdges[e]) + + self._directed = False - def total_edge_weight(self): - """ Returns the sum of all edge weights """ - if self.edges: - return _np.sum(e['weight'] for e in self.edges.values()) - return 0 def node_to_name_map(self): """Returns a dictionary that can be used to map nodes to matrix/vector indices""" return {v: idx for idx, v in enumerate(self.nodes)} + def adjacency_matrix(self, weighted=True, transposed=False): """Returns a sparse adjacency matrix of the higher-order network. By default, the entry corresponding to a directed link source -> target is stored in row s and @@ -326,7 +309,7 @@ def adjacency_matrix(self, weighted=True, transposed=False): else: data = _np.array([float(e['weight']) for e in self.edges.values()]) - shape = (self.vcount(), self.vcount()) + shape = (self.vcount, self.vcount) return _sparse.coo_matrix((data, (row, col)), shape=shape).tocsr() @@ -345,7 +328,7 @@ def transition_matrix(self): col = [] data = [] # calculate weighted out-degrees - D = {n: self.nodes[n]['outweight'] for n in self.nodes} + D = {n: self.out_weight(n) for n in self.nodes} node_to_coord = self.node_to_name_map() @@ -371,7 +354,7 @@ def transition_matrix(self): data = _np.array(data) data = data.reshape(data.size, ) - shape = self.vcount(), self.vcount() + shape = self.vcount, self.vcount return _sparse.coo_matrix((data, (row, col)), shape=shape).tocsr() @@ -387,7 +370,7 @@ def laplacian_matrix(self): """ transition_matrix = self.transition_matrix() - identity_matrix = _sparse.identity(self.vcount()) + identity_matrix = _sparse.identity(self.vcount) return identity_matrix - transition_matrix @@ -429,6 +412,137 @@ def leading_eigenvector(A, normalized=True, lanczos_vecs=15, maxiter=1000): return pi + def read_nodes(self, filename, sep=','): + """ + Reads nodes from a file. Each line must have the format node,propertyName1,property1,propertyName2,property2,... + + @param filename: The path of the file + @param sep: The seperator used to seperate the columns + """ + with open(filename, 'r') as f: + Log.add('Reading node list ... ') + for n, line in enumerate(f): + fields = line.rstrip().split(sep) + + assert len(fields) % 2 == 1, 'Error: malformed line: {0}'.format(line) + + # read additional properties of each node + prop = dict() + for i in range(1, len(fields), 2): + prop[fields[i]] = fields[i+1] + + self.add_node(fields[0], **prop) + + Log.add('finished') + + + def write_nodes(self, filename, sep=','): + """ + Writes all nodes together with their respective properties to a file. Format is node,propertyName1,property1,propertyName2,property2,... + + @param filename: The path of the file + @param sep: The seperator used to seperate the columns + """ + msg = 'Writing {0} nodes to file {1}'.format(self.vcount, filename) + Log.add(msg, Severity.INFO) + with open(filename, 'w+') as f: + for n in self.nodes: + prop = "" + for k,v in self.nodes[n].items(): + prop += sep + str(k) + sep + str(v) + f.write(str(n) + prop + '\n') + + + def read_edges(self, filename, sep=',', weighted=False): + """ + Reads edges from a file. Each line must have the format source,target,(weight,)propertyName1,property1,propertyName2,property2,... + + @param filename: The path of the file + @param sep: The seperator used to seperate the columns + @param weighted: If True the weight of each edge is read from the fourth column + """ + with open(filename, 'r') as f: + Log.add('Reading edge list ... ') + for n, line in enumerate(f): + fields = line.rstrip().split(sep) + + s = 3 if weighted else 2 + assert len(fields) >= 3 and (len(fields) - s) % 2 == 0, 'Error: malformed line: {0}'.format(line) + + # read additional properties of each edge + prop = dict() + for i in range(s, len(fields), 2): + prop[fields[i]] = fields[i+1] + + if weighted: + self.add_edge(fields[0], fields[1], weight=float(fields[2]), **prop) + else: + self.add_edge(fields[0], fields[1], **prop) + + Log.add('finished.') + + + def write_edges(self, filename, sep=','): + """ + Writes all edges together with their respective properties to a file. Format is source,target,(weight,)propertyName1,property1,propertyName2,property2,... + + @param filename: The path of the file + @param sep: The seperator used to seperate the columns + """ + msg = 'Writing {0} edges to file {1}'.format(self.ecount, filename) + Log.add(msg, Severity.INFO) + with open(filename, 'w+') as f: + for e in self.edges: + out = str(e[0]) + sep + str(e[1]) + if self.weighted: + out += sep + str(self.edges[e]['weight']) + for k,v in self.edges[e].items(): + if k is not 'weight': + out += sep + str(k) + sep + str(v) + f.write(out + '\n') + + + @classmethod + def from_sqlite(cls, cursor, directed=True): + """Reads links from an SQLite cursor and returns a new instance of + the class Network. The cursor is assumed to refer to a table that + minimally has two columns + + source target + + and where each row refers to a link. Any additional columns will be used as + edge properties + + Important: Since columns are accessed by name this function requires that a + row factory object is set for the SQLite connection prior to cursor creation, + i.e. you should set + + connection.row_factory = sqlite3.Row + + Parameters + ---------- + cursor: + The SQLite cursor to fetch rows + directed: bool + + Returns + ------- + + """ + n = cls(directed=directed) + + assert cursor.connection.row_factory, \ + 'Cannot access columns by name. Please set ' \ + 'connection.row_factory = sqlite3.Row before creating DB cursor.' + + Log.add('Retrieving links from database ...') + + for row in cursor: + n.add_edge(str(row['source']), str(row['target'])) + + return n + + def summary(self): """Returns a string containing basic summary statistics of this network instance """ @@ -442,7 +556,7 @@ def summary(self): directed_str = 'Directed' else: directed_str = 'Undirected' - summary = summary_fmt.format(directed_str=directed_str, vcount=self.vcount(), ecount=self.ecount()) + summary = summary_fmt.format(directed_str=directed_str, vcount=self.vcount, ecount=self.ecount) return summary def __str__(self): @@ -506,74 +620,6 @@ def write_html(self, filename, width=600, height=600): f.write(html) - -def network_from_networkx(graph): - """method to load a networkx graph into a pathpy.Network instance - - Parameters - ---------- - garph - - Returns - ------- - Network - """ - try: - import networkx as nx - except ImportError: - raise PathpyError("To load a network from networkx it must be installed") - - if isinstance(graph, nx.DiGraph): - directed = True - elif isinstance(graph, nx.Graph): - directed = False - else: - raise PathpyNotImplemented("At the moment only DiGraph and Graph are supported.") - - net = Network(directed=directed) - for node_id in graph.nodes: - net.add_node(node_id, **graph.node[node_id]) - - for edge in graph.edges: - net.add_edge(edge[0], edge[1], **graph.edges[edge]) - - return net - - -def network_to_networkx(network): - """method to export a pathpy Network to a networkx compatible graph - - Parameters - ---------- - network: Network - - Returns - ------- - networkx Graph or DiGraph - """ - # keys to exclude since they are handled differently in networkx - excluded_node_props = {"degree", "inweight", "outweight", "indegree", "outdegree"} - try: - import networkx as nx - except ImportError: - raise PathpyError("To export a network to networkx it must be installed") - - directed = network.directed - if directed: - graph = nx.DiGraph() - else: - graph = nx.Graph() - - for node_id, node_props in network.nodes.items(): - valid_props = {k: v for k, v in node_props.items() if k not in excluded_node_props} - graph.add_node(node_id, **valid_props) - - for edge, edge_props in network.edges.items(): - graph.add_edge(*edge, **edge_props) - - return graph - - class UnorderedDict(dict): """A dictionary that applies an arbitrary key-altering function before accessing the keys @@ -618,3 +664,6 @@ def items(self): def __keytransform__(self, key): return tuple(sorted(key)) + + def __repr__(self): + return self.store.__repr__() \ No newline at end of file