initial commit

This commit is contained in:
wls2002
2023-05-05 14:19:13 +08:00
commit 6faa07f507
43 changed files with 2517 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
"""
numpy version of functions in genome
"""
from .distance import distance
from .utils import *

View File

@@ -0,0 +1,58 @@
import numpy as np
from .utils import flatten_connections, set_operation_analysis
def distance(nodes1, connections1, nodes2, connections2):
node_distance = gene_distance(nodes1, nodes2, 'node')
# refactor connections
keys1, keys2 = nodes1[:, 0], nodes2[:, 0]
cons1 = flatten_connections(keys1, connections1)
cons2 = flatten_connections(keys2, connections2)
connection_distance = gene_distance(cons1, cons2, 'connection')
return node_distance + connection_distance
def gene_distance(ar1, ar2, gene_type, compatibility_coe=0.5, disjoint_coe=1.):
if gene_type == 'node':
keys1, keys2 = ar1[:, :1], ar2[:, :1]
else: # connection
keys1, keys2 = ar1[:, :2], ar2[:, :2]
n_sorted_indices, n_intersect_mask, n_union_mask = set_operation_analysis(keys1, keys2)
nodes = np.concatenate((ar1, ar2), axis=0)
sorted_nodes = nodes[n_sorted_indices]
fr_sorted_nodes, sr_sorted_nodes = sorted_nodes[:-1], sorted_nodes[1:]
non_homologous_cnt = np.sum(n_union_mask) - np.sum(n_intersect_mask)
if gene_type == 'node':
node_distance = homologous_node_distance(fr_sorted_nodes, sr_sorted_nodes)
else: # connection
node_distance = homologous_connection_distance(fr_sorted_nodes, sr_sorted_nodes)
node_distance = np.where(np.isnan(node_distance), 0, node_distance)
homologous_distance = np.sum(node_distance * n_intersect_mask[:-1])
gene_cnt1 = np.sum(np.all(~np.isnan(ar1), axis=1))
gene_cnt2 = np.sum(np.all(~np.isnan(ar2), axis=1))
val = non_homologous_cnt * disjoint_coe + homologous_distance * compatibility_coe
return val / np.where(gene_cnt1 > gene_cnt2, gene_cnt1, gene_cnt2)
def homologous_node_distance(n1, n2):
d = 0
d += np.abs(n1[:, 1] - n2[:, 1]) # bias
d += np.abs(n1[:, 2] - n2[:, 2]) # response
d += n1[:, 3] != n2[:, 3] # activation
d += n1[:, 4] != n2[:, 4]
return d
def homologous_connection_distance(c1, c2):
d = 0
d += np.abs(c1[:, 2] - c2[:, 2]) # weight
d += c1[:, 3] != c2[:, 3] # enable
return d

55
algorithms/numpy/utils.py Normal file
View File

@@ -0,0 +1,55 @@
import numpy as np
I_INT = np.iinfo(np.int32).max # infinite int
def flatten_connections(keys, connections):
indices_x, indices_y = np.meshgrid(keys, keys, indexing='ij')
indices = np.stack((indices_x, indices_y), axis=-1).reshape(-1, 2)
# make (2, N, N) to (N, N, 2)
con = np.transpose(connections, (1, 2, 0))
# make (N, N, 2) to (N * N, 2)
con = np.reshape(con, (-1, 2))
con = np.concatenate((indices, con), axis=1)
return con
def unflatten_connections(N, cons):
cons = cons[:, 2:] # remove the indices
unflatten_cons = np.moveaxis(cons.reshape(N, N, 2), -1, 0)
return unflatten_cons
def set_operation_analysis(ar1, ar2):
ar = np.concatenate((ar1, ar2), axis=0)
sorted_indices = np.lexsort(ar.T[::-1])
aux = ar[sorted_indices]
aux = np.concatenate((aux, np.full((1, ar1.shape[1]), np.nan)), axis=0)
nan_mask = np.any(np.isnan(aux), axis=1)
fr, sr = aux[:-1], aux[1:] # first row, second row
intersect_mask = np.all(fr == sr, axis=1) & ~nan_mask[:-1]
union_mask = np.any(fr != sr, axis=1) & ~nan_mask[:-1]
return sorted_indices, intersect_mask, union_mask
def fetch_first(mask, default=I_INT):
idx = np.argmax(mask)
return np.where(mask[idx], idx, default)
def fetch_last(mask, default=I_INT):
reversed_idx = fetch_first(mask[::-1], default)
return np.where(reversed_idx == -1, -1, mask.shape[0] - reversed_idx - 1)
def fetch_random(rand_key, mask, default=I_INT):
"""
similar to fetch_first, but fetch a random True index
"""
true_cnt = np.sum(mask)
cumsum = np.cumsum(mask)
target = np.random.randint(rand_key, shape=(), minval=0, maxval=true_cnt + 1)
return fetch_first(cumsum >= target, default)