initial commit
This commit is contained in:
5
algorithms/numpy/__init__.py
Normal file
5
algorithms/numpy/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""
|
||||
numpy version of functions in genome
|
||||
"""
|
||||
from .distance import distance
|
||||
from .utils import *
|
||||
58
algorithms/numpy/distance.py
Normal file
58
algorithms/numpy/distance.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import numpy as np
|
||||
|
||||
from .utils import flatten_connections, set_operation_analysis
|
||||
|
||||
|
||||
def distance(nodes1, connections1, nodes2, connections2):
|
||||
node_distance = gene_distance(nodes1, nodes2, 'node')
|
||||
|
||||
# refactor connections
|
||||
keys1, keys2 = nodes1[:, 0], nodes2[:, 0]
|
||||
cons1 = flatten_connections(keys1, connections1)
|
||||
cons2 = flatten_connections(keys2, connections2)
|
||||
|
||||
connection_distance = gene_distance(cons1, cons2, 'connection')
|
||||
return node_distance + connection_distance
|
||||
|
||||
|
||||
def gene_distance(ar1, ar2, gene_type, compatibility_coe=0.5, disjoint_coe=1.):
|
||||
if gene_type == 'node':
|
||||
keys1, keys2 = ar1[:, :1], ar2[:, :1]
|
||||
else: # connection
|
||||
keys1, keys2 = ar1[:, :2], ar2[:, :2]
|
||||
|
||||
n_sorted_indices, n_intersect_mask, n_union_mask = set_operation_analysis(keys1, keys2)
|
||||
nodes = np.concatenate((ar1, ar2), axis=0)
|
||||
sorted_nodes = nodes[n_sorted_indices]
|
||||
fr_sorted_nodes, sr_sorted_nodes = sorted_nodes[:-1], sorted_nodes[1:]
|
||||
|
||||
non_homologous_cnt = np.sum(n_union_mask) - np.sum(n_intersect_mask)
|
||||
if gene_type == 'node':
|
||||
node_distance = homologous_node_distance(fr_sorted_nodes, sr_sorted_nodes)
|
||||
else: # connection
|
||||
node_distance = homologous_connection_distance(fr_sorted_nodes, sr_sorted_nodes)
|
||||
|
||||
node_distance = np.where(np.isnan(node_distance), 0, node_distance)
|
||||
homologous_distance = np.sum(node_distance * n_intersect_mask[:-1])
|
||||
|
||||
gene_cnt1 = np.sum(np.all(~np.isnan(ar1), axis=1))
|
||||
gene_cnt2 = np.sum(np.all(~np.isnan(ar2), axis=1))
|
||||
|
||||
val = non_homologous_cnt * disjoint_coe + homologous_distance * compatibility_coe
|
||||
return val / np.where(gene_cnt1 > gene_cnt2, gene_cnt1, gene_cnt2)
|
||||
|
||||
|
||||
def homologous_node_distance(n1, n2):
|
||||
d = 0
|
||||
d += np.abs(n1[:, 1] - n2[:, 1]) # bias
|
||||
d += np.abs(n1[:, 2] - n2[:, 2]) # response
|
||||
d += n1[:, 3] != n2[:, 3] # activation
|
||||
d += n1[:, 4] != n2[:, 4]
|
||||
return d
|
||||
|
||||
|
||||
def homologous_connection_distance(c1, c2):
|
||||
d = 0
|
||||
d += np.abs(c1[:, 2] - c2[:, 2]) # weight
|
||||
d += c1[:, 3] != c2[:, 3] # enable
|
||||
return d
|
||||
55
algorithms/numpy/utils.py
Normal file
55
algorithms/numpy/utils.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import numpy as np
|
||||
|
||||
I_INT = np.iinfo(np.int32).max # infinite int
|
||||
|
||||
|
||||
def flatten_connections(keys, connections):
|
||||
indices_x, indices_y = np.meshgrid(keys, keys, indexing='ij')
|
||||
indices = np.stack((indices_x, indices_y), axis=-1).reshape(-1, 2)
|
||||
|
||||
# make (2, N, N) to (N, N, 2)
|
||||
con = np.transpose(connections, (1, 2, 0))
|
||||
# make (N, N, 2) to (N * N, 2)
|
||||
con = np.reshape(con, (-1, 2))
|
||||
|
||||
con = np.concatenate((indices, con), axis=1)
|
||||
return con
|
||||
|
||||
|
||||
def unflatten_connections(N, cons):
|
||||
cons = cons[:, 2:] # remove the indices
|
||||
unflatten_cons = np.moveaxis(cons.reshape(N, N, 2), -1, 0)
|
||||
return unflatten_cons
|
||||
|
||||
|
||||
def set_operation_analysis(ar1, ar2):
|
||||
ar = np.concatenate((ar1, ar2), axis=0)
|
||||
sorted_indices = np.lexsort(ar.T[::-1])
|
||||
aux = ar[sorted_indices]
|
||||
aux = np.concatenate((aux, np.full((1, ar1.shape[1]), np.nan)), axis=0)
|
||||
nan_mask = np.any(np.isnan(aux), axis=1)
|
||||
|
||||
fr, sr = aux[:-1], aux[1:] # first row, second row
|
||||
intersect_mask = np.all(fr == sr, axis=1) & ~nan_mask[:-1]
|
||||
union_mask = np.any(fr != sr, axis=1) & ~nan_mask[:-1]
|
||||
return sorted_indices, intersect_mask, union_mask
|
||||
|
||||
|
||||
def fetch_first(mask, default=I_INT):
|
||||
idx = np.argmax(mask)
|
||||
return np.where(mask[idx], idx, default)
|
||||
|
||||
|
||||
def fetch_last(mask, default=I_INT):
|
||||
reversed_idx = fetch_first(mask[::-1], default)
|
||||
return np.where(reversed_idx == -1, -1, mask.shape[0] - reversed_idx - 1)
|
||||
|
||||
|
||||
def fetch_random(rand_key, mask, default=I_INT):
|
||||
"""
|
||||
similar to fetch_first, but fetch a random True index
|
||||
"""
|
||||
true_cnt = np.sum(mask)
|
||||
cumsum = np.cumsum(mask)
|
||||
target = np.random.randint(rand_key, shape=(), minval=0, maxval=true_cnt + 1)
|
||||
return fetch_first(cumsum >= target, default)
|
||||
Reference in New Issue
Block a user