initial commit

2023-05-05 14:19:13 +08:00
commit 6faa07f507
43 changed files with 2517 additions and 0 deletions
--- a/algorithms/numpy/init.py
+++ b/algorithms/numpy/init.py
@@ -0,0 +1,5 @@
+"""
+numpy version of functions in genome
+"""
+from .distance import distance
+from .utils import *
--- a/algorithms/numpy/distance.py
+++ b/algorithms/numpy/distance.py
@@ -0,0 +1,58 @@
+import numpy as np
+
+from .utils import flatten_connections, set_operation_analysis
+
+
+def distance(nodes1, connections1, nodes2, connections2):
+    node_distance = gene_distance(nodes1, nodes2, 'node')
+
+    # refactor connections
+    keys1, keys2 = nodes1[:, 0], nodes2[:, 0]
+    cons1 = flatten_connections(keys1, connections1)
+    cons2 = flatten_connections(keys2, connections2)
+
+    connection_distance = gene_distance(cons1, cons2, 'connection')
+    return node_distance + connection_distance
+
+
+def gene_distance(ar1, ar2, gene_type, compatibility_coe=0.5, disjoint_coe=1.):
+    if gene_type == 'node':
+        keys1, keys2 = ar1[:, :1], ar2[:, :1]
+    else:  # connection
+        keys1, keys2 = ar1[:, :2], ar2[:, :2]
+
+    n_sorted_indices, n_intersect_mask, n_union_mask = set_operation_analysis(keys1, keys2)
+    nodes = np.concatenate((ar1, ar2), axis=0)
+    sorted_nodes = nodes[n_sorted_indices]
+    fr_sorted_nodes, sr_sorted_nodes = sorted_nodes[:-1], sorted_nodes[1:]
+
+    non_homologous_cnt = np.sum(n_union_mask) - np.sum(n_intersect_mask)
+    if gene_type == 'node':
+        node_distance = homologous_node_distance(fr_sorted_nodes, sr_sorted_nodes)
+    else:  # connection
+        node_distance = homologous_connection_distance(fr_sorted_nodes, sr_sorted_nodes)
+
+    node_distance = np.where(np.isnan(node_distance), 0, node_distance)
+    homologous_distance = np.sum(node_distance * n_intersect_mask[:-1])
+
+    gene_cnt1 = np.sum(np.all(~np.isnan(ar1), axis=1))
+    gene_cnt2 = np.sum(np.all(~np.isnan(ar2), axis=1))
+
+    val = non_homologous_cnt * disjoint_coe + homologous_distance * compatibility_coe
+    return val / np.where(gene_cnt1 > gene_cnt2, gene_cnt1, gene_cnt2)
+
+
+def homologous_node_distance(n1, n2):
+    d = 0
+    d += np.abs(n1[:, 1] - n2[:, 1])  # bias
+    d += np.abs(n1[:, 2] - n2[:, 2])  # response
+    d += n1[:, 3] != n2[:, 3]  # activation
+    d += n1[:, 4] != n2[:, 4]
+    return d
+
+
+def homologous_connection_distance(c1, c2):
+    d = 0
+    d += np.abs(c1[:, 2] - c2[:, 2])  # weight
+    d += c1[:, 3] != c2[:, 3]  # enable
+    return d
--- a/algorithms/numpy/utils.py
+++ b/algorithms/numpy/utils.py
@@ -0,0 +1,55 @@
+import numpy as np
+
+I_INT = np.iinfo(np.int32).max  # infinite int
+
+
+def flatten_connections(keys, connections):
+    indices_x, indices_y = np.meshgrid(keys, keys, indexing='ij')
+    indices = np.stack((indices_x, indices_y), axis=-1).reshape(-1, 2)
+
+    # make (2, N, N) to (N, N, 2)
+    con = np.transpose(connections, (1, 2, 0))
+    # make (N, N, 2) to (N * N, 2)
+    con = np.reshape(con, (-1, 2))
+
+    con = np.concatenate((indices, con), axis=1)
+    return con
+
+
+def unflatten_connections(N, cons):
+    cons = cons[:, 2:]  # remove the indices
+    unflatten_cons = np.moveaxis(cons.reshape(N, N, 2), -1, 0)
+    return unflatten_cons
+
+
+def set_operation_analysis(ar1, ar2):
+    ar = np.concatenate((ar1, ar2), axis=0)
+    sorted_indices = np.lexsort(ar.T[::-1])
+    aux = ar[sorted_indices]
+    aux = np.concatenate((aux, np.full((1, ar1.shape[1]), np.nan)), axis=0)
+    nan_mask = np.any(np.isnan(aux), axis=1)
+
+    fr, sr = aux[:-1], aux[1:]  # first row, second row
+    intersect_mask = np.all(fr == sr, axis=1) & ~nan_mask[:-1]
+    union_mask = np.any(fr != sr, axis=1) & ~nan_mask[:-1]
+    return sorted_indices, intersect_mask, union_mask
+
+
+def fetch_first(mask, default=I_INT):
+    idx = np.argmax(mask)
+    return np.where(mask[idx], idx, default)
+
+
+def fetch_last(mask, default=I_INT):
+    reversed_idx = fetch_first(mask[::-1], default)
+    return np.where(reversed_idx == -1, -1, mask.shape[0] - reversed_idx - 1)
+
+
+def fetch_random(rand_key, mask, default=I_INT):
+    """
+    similar to fetch_first, but fetch a random True index
+    """
+    true_cnt = np.sum(mask)
+    cumsum = np.cumsum(mask)
+    target = np.random.randint(rand_key, shape=(), minval=0, maxval=true_cnt + 1)
+    return fetch_first(cumsum >= target, default)