tensorneat-mend/tensorneat/genome/operations/distance/default.py

from jax import vmap, numpy as jnp

from .base import BaseDistance
from ...utils import extract_node_attrs, extract_conn_attrs


class DefaultDistance(BaseDistance):
    def __init__(
        self,
        compatibility_disjoint: float = 1.0,
        compatibility_weight: float = 0.4,
    ):
        self.compatibility_disjoint = compatibility_disjoint
        self.compatibility_weight = compatibility_weight

    def __call__(self, state, nodes1, nodes2, conns1, conns2):
        """
        The distance between two genomes
        """
        d = self.node_distance(state, nodes1, nodes2) + self.conn_distance(
            state, conns1, conns2
        )
        return d

    def node_distance(self, state, nodes1, nodes2):
        """
        The distance of the nodes part for two genomes
        """
        node_cnt1 = jnp.sum(~jnp.isnan(nodes1[:, 0]))
        node_cnt2 = jnp.sum(~jnp.isnan(nodes2[:, 0]))
        max_cnt = jnp.maximum(node_cnt1, node_cnt2)

        # align homologous nodes
        # this process is similar to np.intersect1d.
        nodes = jnp.concatenate((nodes1, nodes2), axis=0)
        keys = nodes[:, 0]
        sorted_indices = jnp.argsort(keys, axis=0)
        nodes = nodes[sorted_indices]
        nodes = jnp.concatenate(
            [nodes, jnp.full((1, nodes.shape[1]), jnp.nan)], axis=0
        )  # add a nan row to the end
        fr, sr = nodes[:-1], nodes[1:]  # first row, second row

        # flag location of homologous nodes
        intersect_mask = (fr[:, 0] == sr[:, 0]) & ~jnp.isnan(nodes[:-1, 0])

        # calculate the count of non_homologous of two genomes
        non_homologous_cnt = node_cnt1 + node_cnt2 - 2 * jnp.sum(intersect_mask)

        # calculate the distance of homologous nodes
        fr_attrs = vmap(extract_node_attrs)(fr)
        sr_attrs = vmap(extract_node_attrs)(sr)
        hnd = vmap(self.genome.node_gene.distance, in_axes=(None, 0, 0))(
            state, fr_attrs, sr_attrs
        )  # homologous node distance
        hnd = jnp.where(jnp.isnan(hnd), 0, hnd)
        homologous_distance = jnp.sum(hnd * intersect_mask)

        val = (
            non_homologous_cnt * self.compatibility_disjoint
            + homologous_distance * self.compatibility_weight
        )

        val = jnp.where(max_cnt == 0, 0, val / max_cnt)  # normalize

        return val

    def conn_distance(self, state, conns1, conns2):
        """
        The distance of the conns part for two genomes
        """
        con_cnt1 = jnp.sum(~jnp.isnan(conns1[:, 0]))
        con_cnt2 = jnp.sum(~jnp.isnan(conns2[:, 0]))
        max_cnt = jnp.maximum(con_cnt1, con_cnt2)

        cons = jnp.concatenate((conns1, conns2), axis=0)
        keys = cons[:, :2]
        sorted_indices = jnp.lexsort(keys.T[::-1])
        cons = cons[sorted_indices]
        cons = jnp.concatenate(
            [cons, jnp.full((1, cons.shape[1]), jnp.nan)], axis=0
        )  # add a nan row to the end
        fr, sr = cons[:-1], cons[1:]  # first row, second row

        # both genome has such connection
        intersect_mask = jnp.all(fr[:, :2] == sr[:, :2], axis=1) & ~jnp.isnan(fr[:, 0])

        non_homologous_cnt = con_cnt1 + con_cnt2 - 2 * jnp.sum(intersect_mask)

        fr_attrs = vmap(extract_conn_attrs)(fr)
        sr_attrs = vmap(extract_conn_attrs)(sr)
        hcd = vmap(self.genome.conn_gene.distance, in_axes=(None, 0, 0))(
            state, fr_attrs, sr_attrs
        )  # homologous connection distance
        hcd = jnp.where(jnp.isnan(hcd), 0, hcd)
        homologous_distance = jnp.sum(hcd * intersect_mask)

        val = (
            non_homologous_cnt * self.compatibility_disjoint
            + homologous_distance * self.compatibility_weight
        )

        val = jnp.where(max_cnt == 0, 0, val / max_cnt)  # normalize

        return val