change repo structure; modify readme

2024-03-26 21:58:27 +08:00
parent 6970e6a6d5
commit 47dbcbea80
69 changed files with 74 additions and 60 deletions
--- a/tensorneat/algorithm/init.py
+++ b/tensorneat/algorithm/init.py
@@ -0,0 +1,2 @@
+from .base import BaseAlgorithm
+from .neat import NEAT
--- a/tensorneat/algorithm/base.py
+++ b/tensorneat/algorithm/base.py
@@ -0,0 +1,45 @@
+from utils import State
+
+
+class BaseAlgorithm:
+
+    def setup(self, randkey):
+        """initialize the state of the algorithm"""
+
+        raise NotImplementedError
+
+    def ask(self, state: State):
+        """require the population to be evaluated"""
+        raise NotImplementedError
+
+    def tell(self, state: State, fitness):
+        """update the state of the algorithm"""
+        raise NotImplementedError
+
+    def transform(self, individual):
+        """transform the genome into a neural network"""
+        raise NotImplementedError
+
+    def forward(self, inputs, transformed):
+        raise NotImplementedError
+
+    @property
+    def num_inputs(self):
+        raise NotImplementedError
+
+    @property
+    def num_outputs(self):
+        raise NotImplementedError
+
+    @property
+    def pop_size(self):
+        raise NotImplementedError
+
+    def member_count(self, state: State):
+        # to analysis the species
+        raise NotImplementedError
+
+    def generation(self, state: State):
+        # to analysis the algorithm
+        raise NotImplementedError
+
--- a/tensorneat/algorithm/hyperneat/init.py
+++ b/tensorneat/algorithm/hyperneat/init.py
@@ -0,0 +1,2 @@
+from .hyperneat import HyperNEAT
+from .substrate import BaseSubstrate, DefaultSubstrate, FullSubstrate
--- a/tensorneat/algorithm/hyperneat/hyperneat.py
+++ b/tensorneat/algorithm/hyperneat/hyperneat.py
@@ -0,0 +1,116 @@
+import jax, jax.numpy as jnp
+
+from utils import State, Act, Agg
+from .. import BaseAlgorithm, NEAT
+from ..neat.gene import BaseNodeGene, BaseConnGene
+from ..neat.genome import RecurrentGenome
+from .substrate import *
+
+
+class HyperNEAT(BaseAlgorithm):
+
+    def __init__(
+            self,
+            substrate: BaseSubstrate,
+            neat: NEAT,
+            below_threshold: float = 0.3,
+            max_weight: float = 5.,
+            activation=Act.sigmoid,
+            aggregation=Agg.sum,
+            activate_time: int = 10,
+    ):
+        assert substrate.query_coors.shape[1] == neat.num_inputs, \
+            "Substrate input size should be equal to NEAT input size"
+
+        self.substrate = substrate
+        self.neat = neat
+        self.below_threshold = below_threshold
+        self.max_weight = max_weight
+        self.hyper_genome = RecurrentGenome(
+            num_inputs=substrate.num_inputs,
+            num_outputs=substrate.num_outputs,
+            max_nodes=substrate.nodes_cnt,
+            max_conns=substrate.conns_cnt,
+            node_gene=HyperNodeGene(activation, aggregation),
+            conn_gene=HyperNEATConnGene(),
+            activate_time=activate_time,
+        )
+
+    def setup(self, randkey):
+        return State(
+            neat_state=self.neat.setup(randkey)
+        )
+
+    def ask(self, state: State):
+        return self.neat.ask(state.neat_state)
+
+    def tell(self, state: State, fitness):
+        return state.update(
+            neat_state=self.neat.tell(state.neat_state, fitness)
+        )
+
+    def transform(self, individual):
+        transformed = self.neat.transform(individual)
+        query_res = jax.vmap(self.neat.forward, in_axes=(0, None))(self.substrate.query_coors, transformed)
+
+        # mute the connection with weight below threshold
+        query_res = jnp.where(
+            (-self.below_threshold < query_res) & (query_res < self.below_threshold),
+            0.,
+            query_res
+        )
+
+        # make query res in range [-max_weight, max_weight]
+        query_res = jnp.where(query_res > 0, query_res - self.below_threshold, query_res)
+        query_res = jnp.where(query_res < 0, query_res + self.below_threshold, query_res)
+        query_res = query_res / (1 - self.below_threshold) * self.max_weight
+
+        h_nodes, h_conns = self.substrate.make_nodes(query_res), self.substrate.make_conn(query_res)
+        return self.hyper_genome.transform(h_nodes, h_conns)
+
+    def forward(self, inputs, transformed):
+        # add bias
+        inputs_with_bias = jnp.concatenate([inputs, jnp.array([1])])
+        return self.hyper_genome.forward(inputs_with_bias, transformed)
+
+    @property
+    def num_inputs(self):
+        return self.substrate.num_inputs - 1  # remove bias
+
+    @property
+    def num_outputs(self):
+        return self.substrate.num_outputs
+
+    @property
+    def pop_size(self):
+        return self.neat.pop_size
+
+    def member_count(self, state: State):
+        return self.neat.member_count(state.neat_state)
+
+    def generation(self, state: State):
+        return self.neat.generation(state.neat_state)
+
+
+class HyperNodeGene(BaseNodeGene):
+
+    def __init__(self,
+                 activation=Act.sigmoid,
+                 aggregation=Agg.sum,
+                 ):
+        super().__init__()
+        self.activation = activation
+        self.aggregation = aggregation
+
+    def forward(self, attrs, inputs):
+        return self.activation(
+            self.aggregation(inputs)
+        )
+
+
+class HyperNEATConnGene(BaseConnGene):
+    custom_attrs = ['weight']
+
+    def forward(self, attrs, inputs):
+        weight = attrs[0]
+        return inputs * weight
--- a/tensorneat/algorithm/hyperneat/substrate/init.py
+++ b/tensorneat/algorithm/hyperneat/substrate/init.py
@@ -0,0 +1,3 @@
+from .base import BaseSubstrate
+from .default import DefaultSubstrate
+from .full import FullSubstrate
--- a/tensorneat/algorithm/hyperneat/substrate/base.py
+++ b/tensorneat/algorithm/hyperneat/substrate/base.py
@@ -0,0 +1,27 @@
+class BaseSubstrate:
+
+    def make_nodes(self, query_res):
+        raise NotImplementedError
+
+    def make_conn(self, query_res):
+        raise NotImplementedError
+
+    @property
+    def query_coors(self):
+        raise NotImplementedError
+
+    @property
+    def num_inputs(self):
+        raise NotImplementedError
+
+    @property
+    def num_outputs(self):
+        raise NotImplementedError
+
+    @property
+    def nodes_cnt(self):
+        raise NotImplementedError
+
+    @property
+    def conns_cnt(self):
+        raise NotImplementedError
--- a/tensorneat/algorithm/hyperneat/substrate/default.py
+++ b/tensorneat/algorithm/hyperneat/substrate/default.py
@@ -0,0 +1,38 @@
+import jax.numpy as jnp
+from . import BaseSubstrate
+
+
+class DefaultSubstrate(BaseSubstrate):
+
+    def __init__(self, num_inputs, num_outputs, coors, nodes, conns):
+        self.inputs = num_inputs
+        self.outputs = num_outputs
+        self.coors = jnp.array(coors)
+        self.nodes = jnp.array(nodes)
+        self.conns = jnp.array(conns)
+
+    def make_nodes(self, query_res):
+        return self.nodes
+
+    def make_conn(self, query_res):
+        return self.conns.at[:, 3:].set(query_res)  # change weight
+
+    @property
+    def query_coors(self):
+        return self.coors
+
+    @property
+    def num_inputs(self):
+        return self.inputs
+
+    @property
+    def num_outputs(self):
+        return self.outputs
+
+    @property
+    def nodes_cnt(self):
+        return self.nodes.shape[0]
+
+    @property
+    def conns_cnt(self):
+        return self.conns.shape[0]
--- a/tensorneat/algorithm/hyperneat/substrate/full.py
+++ b/tensorneat/algorithm/hyperneat/substrate/full.py
@@ -0,0 +1,76 @@
+import numpy as np
+from .default import DefaultSubstrate
+
+
+class FullSubstrate(DefaultSubstrate):
+
+    def __init__(self,
+                 input_coors=((-1, -1), (0, -1), (1, -1)),
+                 hidden_coors=((-1, 0), (0, 0), (1, 0)),
+                 output_coors=((0, 1),),
+                 ):
+        query_coors, nodes, conns = analysis_substrate(input_coors, output_coors, hidden_coors)
+        super().__init__(
+            len(input_coors),
+            len(output_coors),
+            query_coors,
+            nodes,
+            conns
+        )
+
+
+def analysis_substrate(input_coors, output_coors, hidden_coors):
+    input_coors = np.array(input_coors)
+    output_coors = np.array(output_coors)
+    hidden_coors = np.array(hidden_coors)
+
+    cd = input_coors.shape[1]  # coordinate dimensions
+    si = input_coors.shape[0]  # input coordinate size
+    so = output_coors.shape[0]  # output coordinate size
+    sh = hidden_coors.shape[0]  # hidden coordinate size
+
+    input_idx = np.arange(si)
+    output_idx = np.arange(si, si + so)
+    hidden_idx = np.arange(si + so, si + so + sh)
+
+    total_conns = si * sh + sh * sh + sh * so
+    query_coors = np.zeros((total_conns, cd * 2))
+    correspond_keys = np.zeros((total_conns, 2))
+
+    # connect input to hidden
+    aux_coors, aux_keys = cartesian_product(input_idx, hidden_idx, input_coors, hidden_coors)
+    query_coors[0: si * sh, :] = aux_coors
+    correspond_keys[0: si * sh, :] = aux_keys
+
+    # connect hidden to hidden
+    aux_coors, aux_keys = cartesian_product(hidden_idx, hidden_idx, hidden_coors, hidden_coors)
+    query_coors[si * sh: si * sh + sh * sh, :] = aux_coors
+    correspond_keys[si * sh: si * sh + sh * sh, :] = aux_keys
+
+    # connect hidden to output
+    aux_coors, aux_keys = cartesian_product(hidden_idx, output_idx, hidden_coors, output_coors)
+    query_coors[si * sh + sh * sh:, :] = aux_coors
+    correspond_keys[si * sh + sh * sh:, :] = aux_keys
+
+    nodes = np.concatenate((input_idx, output_idx, hidden_idx))[..., np.newaxis]
+    conns = np.zeros((correspond_keys.shape[0], 4), dtype=np.float32)  # input_idx, output_idx, enabled, weight
+    conns[:, 0:2] = correspond_keys
+    conns[:, 2] = 1  # enabled is True
+
+    return query_coors, nodes, conns
+
+
+def cartesian_product(keys1, keys2, coors1, coors2):
+    len1 = keys1.shape[0]
+    len2 = keys2.shape[0]
+
+    repeated_coors1 = np.repeat(coors1, len2, axis=0)
+    repeated_keys1 = np.repeat(keys1, len2)
+
+    tiled_coors2 = np.tile(coors2, (len1, 1))
+    tiled_keys2 = np.tile(keys2, len1)
+
+    new_coors = np.concatenate((repeated_coors1, tiled_coors2), axis=1)
+    correspond_keys = np.column_stack((repeated_keys1, tiled_keys2))
+
+    return new_coors, correspond_keys
--- a/tensorneat/algorithm/neat/init.py
+++ b/tensorneat/algorithm/neat/init.py
@@ -0,0 +1,5 @@
+from .gene import *
+from .genome import *
+from .species import *
+from .neat import NEAT
+
--- a/tensorneat/algorithm/neat/ga/init.py
+++ b/tensorneat/algorithm/neat/ga/init.py
@@ -0,0 +1,2 @@
+from .crossover import BaseCrossover, DefaultCrossover
+from .mutation import BaseMutation, DefaultMutation
--- a/tensorneat/algorithm/neat/ga/crossover/init.py
+++ b/tensorneat/algorithm/neat/ga/crossover/init.py
@@ -0,0 +1,2 @@
+from .base import BaseCrossover
+from .default import DefaultCrossover
--- a/tensorneat/algorithm/neat/ga/crossover/base.py
+++ b/tensorneat/algorithm/neat/ga/crossover/base.py
@@ -0,0 +1,3 @@
+class BaseCrossover:
+    def __call__(self, randkey, genome, nodes1, nodes2, conns1, conns2):
+        raise NotImplementedError
--- a/tensorneat/algorithm/neat/ga/crossover/default.py
+++ b/tensorneat/algorithm/neat/ga/crossover/default.py
@@ -0,0 +1,67 @@
+import jax, jax.numpy as jnp
+
+from .base import BaseCrossover
+
+class DefaultCrossover(BaseCrossover):
+
+    def __call__(self, randkey, genome, nodes1, conns1, nodes2, conns2):
+        """
+        use genome1 and genome2 to generate a new genome
+        notice that genome1 should have higher fitness than genome2 (genome1 is winner!)
+        """
+        randkey_1, randkey_2, key = jax.random.split(randkey, 3)
+
+        # crossover nodes
+        keys1, keys2 = nodes1[:, 0], nodes2[:, 0]
+        # make homologous genes align in nodes2 align with nodes1
+        nodes2 = self.align_array(keys1, keys2, nodes2, False)
+
+        # For not homologous genes, use the value of nodes1(winner)
+        # For homologous genes, use the crossover result between nodes1 and nodes2
+        new_nodes = jnp.where(jnp.isnan(nodes1) | jnp.isnan(nodes2), nodes1, self.crossover_gene(randkey_1, nodes1, nodes2))
+
+        # crossover connections
+        con_keys1, con_keys2 = conns1[:, :2], conns2[:, :2]
+        conns2 = self.align_array(con_keys1, con_keys2, conns2, True)
+
+        new_conns = jnp.where(jnp.isnan(conns1) | jnp.isnan(conns2), conns1, self.crossover_gene(randkey_2, conns1, conns2))
+
+        return new_nodes, new_conns
+
+    def align_array(self, seq1, seq2, ar2, is_conn: bool):
+        """
+        After I review this code, I found that it is the most difficult part of the code. Please never change it!
+        make ar2 align with ar1.
+        :param seq1:
+        :param seq2:
+        :param ar2:
+        :param is_conn:
+        :return:
+        align means to intersect part of ar2 will be at the same position as ar1,
+        non-intersect part of ar2 will be set to Nan
+        """
+        seq1, seq2 = seq1[:, jnp.newaxis], seq2[jnp.newaxis, :]
+        mask = (seq1 == seq2) & (~jnp.isnan(seq1))
+
+        if is_conn:
+            mask = jnp.all(mask, axis=2)
+
+        intersect_mask = mask.any(axis=1)
+        idx = jnp.arange(0, len(seq1))
+        idx_fixed = jnp.dot(mask, idx)
+
+        refactor_ar2 = jnp.where(intersect_mask[:, jnp.newaxis], ar2[idx_fixed], jnp.nan)
+
+        return refactor_ar2
+
+    def crossover_gene(self, rand_key, g1, g2):
+        """
+        crossover two genes
+        :param rand_key:
+        :param g1:
+        :param g2:
+        :return:
+        only gene with the same key will be crossover, thus don't need to consider change key
+        """
+        r = jax.random.uniform(rand_key, shape=g1.shape)
+        return jnp.where(r > 0.5, g1, g2)
--- a/tensorneat/algorithm/neat/ga/mutation/init.py
+++ b/tensorneat/algorithm/neat/ga/mutation/init.py
@@ -0,0 +1,2 @@
+from .base import BaseMutation
+from .default import DefaultMutation
--- a/tensorneat/algorithm/neat/ga/mutation/base.py
+++ b/tensorneat/algorithm/neat/ga/mutation/base.py
@@ -0,0 +1,3 @@
+class BaseMutation:
+    def __call__(self, key, genome, nodes, conns, new_node_key):
+        raise NotImplementedError
--- a/tensorneat/algorithm/neat/ga/mutation/default.py
+++ b/tensorneat/algorithm/neat/ga/mutation/default.py
@@ -0,0 +1,202 @@
+import jax, jax.numpy as jnp
+from . import BaseMutation
+from utils import fetch_first, fetch_random, I_INT, unflatten_conns, check_cycles
+
+
+class DefaultMutation(BaseMutation):
+
+    def __init__(
+            self,
+            conn_add: float = 0.4,
+            conn_delete: float = 0,
+            node_add: float = 0.2,
+            node_delete: float = 0,
+    ):
+        self.conn_add = conn_add
+        self.conn_delete = conn_delete
+        self.node_add = node_add
+        self.node_delete = node_delete
+
+    def __call__(self, randkey, genome, nodes, conns, new_node_key):
+        k1, k2 = jax.random.split(randkey)
+
+        nodes, conns = self.mutate_structure(k1, genome, nodes, conns, new_node_key)
+        nodes, conns = self.mutate_values(k2, genome, nodes, conns)
+
+        return nodes, conns
+
+    def mutate_structure(self, randkey, genome, nodes, conns, new_node_key):
+        def mutate_add_node(key_, nodes_, conns_):
+            i_key, o_key, idx = self.choice_connection_key(key_, conns_)
+
+            def successful_add_node():
+                # disable the connection
+                new_conns = conns_.at[idx, 2].set(False)
+
+                # add a new node
+                new_nodes = genome.add_node(nodes_, new_node_key, genome.node_gene.new_custom_attrs())
+
+                # add two new connections
+                new_conns = genome.add_conn(new_conns, i_key, new_node_key, True, genome.conn_gene.new_custom_attrs())
+                new_conns = genome.add_conn(new_conns, new_node_key, o_key, True, genome.conn_gene.new_custom_attrs())
+
+                return new_nodes, new_conns
+
+            return jax.lax.cond(
+                idx == I_INT,
+                lambda: (nodes_, conns_),  # do nothing
+                successful_add_node
+            )
+
+        def mutate_delete_node(key_, nodes_, conns_):
+
+            # randomly choose a node
+            key, idx = self.choice_node_key(key_, nodes_, genome.input_idx, genome.output_idx,
+                                            allow_input_keys=False, allow_output_keys=False)
+
+            def successful_delete_node():
+                # delete the node
+                new_nodes = genome.delete_node_by_pos(nodes_, idx)
+
+                # delete all connections
+                new_conns = jnp.where(
+                    ((conns_[:, 0] == key) | (conns_[:, 1] == key))[:, None],
+                    jnp.nan,
+                    conns_
+                )
+
+                return new_nodes, new_conns
+
+            return jax.lax.cond(
+                idx == I_INT,
+                lambda: (nodes_, conns_),  # do nothing
+                successful_delete_node
+            )
+
+        def mutate_add_conn(key_, nodes_, conns_):
+            # randomly choose two nodes
+            k1_, k2_ = jax.random.split(key_, num=2)
+
+            # input node of the connection can be any node
+            i_key, from_idx = self.choice_node_key(k1_, nodes_, genome.input_idx, genome.output_idx,
+                                                   allow_input_keys=True, allow_output_keys=True)
+
+            # output node of the connection can be any node except input node
+            o_key, to_idx = self.choice_node_key(k2_, nodes_, genome.input_idx, genome.output_idx,
+                                                 allow_input_keys=False, allow_output_keys=True)
+
+            conn_pos = fetch_first((conns_[:, 0] == i_key) & (conns_[:, 1] == o_key))
+            is_already_exist = conn_pos != I_INT
+
+            def nothing():
+                return nodes_, conns_
+
+            def successful():
+                return nodes_, genome.add_conn(conns_, i_key, o_key, True, genome.conn_gene.new_custom_attrs())
+
+            def already_exist():
+                return nodes_, conns_.at[conn_pos, 2].set(True)
+
+            if genome.network_type == 'feedforward':
+                u_cons = unflatten_conns(nodes_, conns_)
+                cons_exist = ~jnp.isnan(u_cons[0, :, :])
+                is_cycle = check_cycles(nodes_, cons_exist, from_idx, to_idx)
+
+                return jax.lax.cond(
+                    is_already_exist,
+                    already_exist,
+                    lambda:
+                        jax.lax.cond(
+                            is_cycle,
+                            nothing,
+                            successful
+                        )
+                )
+
+            elif genome.network_type == 'recurrent':
+                return jax.lax.cond(
+                    is_already_exist,
+                    already_exist,
+                    successful
+                )
+
+            else:
+                raise ValueError(f"Invalid network type: {genome.network_type}")
+
+        def mutate_delete_conn(key_, nodes_, conns_):
+            # randomly choose a connection
+            i_key, o_key, idx = self.choice_connection_key(key_, conns_)
+
+            def successfully_delete_connection():
+                return nodes_, genome.delete_conn_by_pos(conns_, idx)
+
+            return jax.lax.cond(
+                idx == I_INT,
+                lambda: (nodes_, conns_),  # nothing
+                successfully_delete_connection
+            )
+
+        k1, k2, k3, k4 = jax.random.split(randkey, num=4)
+        r1, r2, r3, r4 = jax.random.uniform(k1, shape=(4,))
+
+        def no(key_, nodes_, conns_):
+            return nodes_, conns_
+
+        nodes, conns = jax.lax.cond(r1 < self.node_add, mutate_add_node, no, k1, nodes, conns)
+        nodes, conns = jax.lax.cond(r2 < self.node_delete, mutate_delete_node, no, k2, nodes, conns)
+        nodes, conns = jax.lax.cond(r3 < self.conn_add, mutate_add_conn, no, k3, nodes, conns)
+        nodes, conns = jax.lax.cond(r4 < self.conn_delete, mutate_delete_conn, no, k4, nodes, conns)
+
+        return nodes, conns
+
+    def mutate_values(self, randkey, genome, nodes, conns):
+        k1, k2 = jax.random.split(randkey, num=2)
+        nodes_keys = jax.random.split(k1, num=nodes.shape[0])
+        conns_keys = jax.random.split(k2, num=conns.shape[0])
+
+        new_nodes = jax.vmap(genome.node_gene.mutate, in_axes=(0, 0))(nodes_keys, nodes)
+        new_conns = jax.vmap(genome.conn_gene.mutate, in_axes=(0, 0))(conns_keys, conns)
+
+        # nan nodes not changed
+        new_nodes = jnp.where(jnp.isnan(nodes), jnp.nan, new_nodes)
+        new_conns = jnp.where(jnp.isnan(conns), jnp.nan, new_conns)
+
+        return new_nodes, new_conns
+
+    def choice_node_key(self, rand_key, nodes, input_idx, output_idx,
+                        allow_input_keys: bool = False, allow_output_keys: bool = False):
+        """
+        Randomly choose a node key from the given nodes. It guarantees that the chosen node not be the input or output node.
+        :param rand_key:
+        :param nodes:
+        :param input_idx:
+        :param output_idx:
+        :param allow_input_keys:
+        :param allow_output_keys:
+        :return: return its key and position(idx)
+        """
+
+        node_keys = nodes[:, 0]
+        mask = ~jnp.isnan(node_keys)
+
+        if not allow_input_keys:
+            mask = jnp.logical_and(mask, ~jnp.isin(node_keys, input_idx))
+
+        if not allow_output_keys:
+            mask = jnp.logical_and(mask, ~jnp.isin(node_keys, output_idx))
+
+        idx = fetch_random(rand_key, mask)
+        key = jnp.where(idx != I_INT, nodes[idx, 0], jnp.nan)
+        return key, idx
+
+    def choice_connection_key(self, rand_key, conns):
+        """
+        Randomly choose a connection key from the given connections.
+        :return: i_key, o_key, idx
+        """
+
+        idx = fetch_random(rand_key, ~jnp.isnan(conns[:, 0]))
+        i_key = jnp.where(idx != I_INT, conns[idx, 0], jnp.nan)
+        o_key = jnp.where(idx != I_INT, conns[idx, 1], jnp.nan)
+
+        return i_key, o_key, idx
--- a/tensorneat/algorithm/neat/gene/init.py
+++ b/tensorneat/algorithm/neat/gene/init.py
@@ -0,0 +1,3 @@
+from .base import BaseGene
+from .conn import *
+from .node import *
--- a/tensorneat/algorithm/neat/gene/base.py
+++ b/tensorneat/algorithm/neat/gene/base.py
@@ -0,0 +1,23 @@
+class BaseGene:
+    "Base class for node genes or connection genes."
+    fixed_attrs = []
+    custom_attrs = []
+
+    def __init__(self):
+        pass
+
+    def new_custom_attrs(self):
+        raise NotImplementedError
+
+    def mutate(self, randkey, gene):
+        raise NotImplementedError
+
+    def distance(self, gene1, gene2):
+        raise NotImplementedError
+
+    def forward(self, attrs, inputs):
+        raise NotImplementedError
+
+    @property
+    def length(self):
+        return len(self.fixed_attrs) + len(self.custom_attrs)
--- a/tensorneat/algorithm/neat/gene/conn/init.py
+++ b/tensorneat/algorithm/neat/gene/conn/init.py
@@ -0,0 +1,2 @@
+from .base import BaseConnGene
+from .default import DefaultConnGene
--- a/tensorneat/algorithm/neat/gene/conn/base.py
+++ b/tensorneat/algorithm/neat/gene/conn/base.py
@@ -0,0 +1,12 @@
+from .. import BaseGene
+
+
+class BaseConnGene(BaseGene):
+    "Base class for connection genes."
+    fixed_attrs = ['input_index', 'output_index', 'enabled']
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, attrs, inputs):
+        raise NotImplementedError
--- a/tensorneat/algorithm/neat/gene/conn/default.py
+++ b/tensorneat/algorithm/neat/gene/conn/default.py
@@ -0,0 +1,50 @@
+import jax.numpy as jnp
+
+from utils import mutate_float
+from . import BaseConnGene
+
+
+class DefaultConnGene(BaseConnGene):
+    "Default connection gene, with the same behavior as in NEAT-python."
+
+    custom_attrs = ['weight']
+
+    def __init__(
+            self,
+            weight_init_mean: float = 0.0,
+            weight_init_std: float = 1.0,
+            weight_mutate_power: float = 0.5,
+            weight_mutate_rate: float = 0.8,
+            weight_replace_rate: float = 0.1,
+    ):
+        super().__init__()
+        self.weight_init_mean = weight_init_mean
+        self.weight_init_std = weight_init_std
+        self.weight_mutate_power = weight_mutate_power
+        self.weight_mutate_rate = weight_mutate_rate
+        self.weight_replace_rate = weight_replace_rate
+
+    def new_custom_attrs(self):
+        return jnp.array([self.weight_init_mean])
+
+    def mutate(self, key, conn):
+        input_index = conn[0]
+        output_index = conn[1]
+        enabled = conn[2]
+        weight = mutate_float(key,
+                              conn[3],
+                              self.weight_init_mean,
+                              self.weight_init_std,
+                              self.weight_mutate_power,
+                              self.weight_mutate_rate,
+                              self.weight_replace_rate
+                              )
+
+        return jnp.array([input_index, output_index, enabled, weight])
+
+    def distance(self, attrs1, attrs2):
+        return (attrs1[2] != attrs2[2]) + jnp.abs(attrs1[3] - attrs2[3])  # enable + weight
+
+    def forward(self, attrs, inputs):
+        weight = attrs[0]
+        return inputs * weight
--- a/tensorneat/algorithm/neat/gene/node/init.py
+++ b/tensorneat/algorithm/neat/gene/node/init.py
@@ -0,0 +1,2 @@
+from .base import BaseNodeGene
+from .default import DefaultNodeGene
--- a/tensorneat/algorithm/neat/gene/node/base.py
+++ b/tensorneat/algorithm/neat/gene/node/base.py
@@ -0,0 +1,12 @@
+from .. import BaseGene
+
+
+class BaseNodeGene(BaseGene):
+    "Base class for node genes."
+    fixed_attrs = ["index"]
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, attrs, inputs):
+        raise NotImplementedError
--- a/tensorneat/algorithm/neat/gene/node/default.py
+++ b/tensorneat/algorithm/neat/gene/node/default.py
@@ -0,0 +1,95 @@
+from typing import Tuple
+
+import jax, jax.numpy as jnp
+
+from utils import Act, Agg, act, agg, mutate_int, mutate_float
+from . import BaseNodeGene
+
+
+class DefaultNodeGene(BaseNodeGene):
+    "Default node gene, with the same behavior as in NEAT-python."
+
+    custom_attrs = ['bias', 'response', 'aggregation', 'activation']
+
+    def __init__(
+            self,
+            bias_init_mean: float = 0.0,
+            bias_init_std: float = 1.0,
+            bias_mutate_power: float = 0.5,
+            bias_mutate_rate: float = 0.7,
+            bias_replace_rate: float = 0.1,
+
+            response_init_mean: float = 1.0,
+            response_init_std: float = 0.0,
+            response_mutate_power: float = 0.5,
+            response_mutate_rate: float = 0.7,
+            response_replace_rate: float = 0.1,
+
+            activation_default: callable = Act.sigmoid,
+            activation_options: Tuple = (Act.sigmoid,),
+            activation_replace_rate: float = 0.1,
+
+            aggregation_default: callable = Agg.sum,
+            aggregation_options: Tuple = (Agg.sum,),
+            aggregation_replace_rate: float = 0.1,
+    ):
+        super().__init__()
+        self.bias_init_mean = bias_init_mean
+        self.bias_init_std = bias_init_std
+        self.bias_mutate_power = bias_mutate_power
+        self.bias_mutate_rate = bias_mutate_rate
+        self.bias_replace_rate = bias_replace_rate
+
+        self.response_init_mean = response_init_mean
+        self.response_init_std = response_init_std
+        self.response_mutate_power = response_mutate_power
+        self.response_mutate_rate = response_mutate_rate
+        self.response_replace_rate = response_replace_rate
+
+        self.activation_default = activation_options.index(activation_default)
+        self.activation_options = activation_options
+        self.activation_indices = jnp.arange(len(activation_options))
+        self.activation_replace_rate = activation_replace_rate
+
+        self.aggregation_default = aggregation_options.index(aggregation_default)
+        self.aggregation_options = aggregation_options
+        self.aggregation_indices = jnp.arange(len(aggregation_options))
+        self.aggregation_replace_rate = aggregation_replace_rate
+
+    def new_custom_attrs(self):
+        return jnp.array(
+            [self.bias_init_mean, self.response_init_mean, self.activation_default, self.aggregation_default]
+        )
+
+    def mutate(self, key, node):
+        k1, k2, k3, k4 = jax.random.split(key, num=4)
+        index = node[0]
+
+        bias = mutate_float(k1, node[1], self.bias_init_mean, self.bias_init_std,
+                            self.bias_mutate_power, self.bias_mutate_rate, self.bias_replace_rate)
+
+        res = mutate_float(k2, node[2], self.response_init_mean, self.response_init_std,
+                           self.response_mutate_power, self.response_mutate_rate, self.response_replace_rate)
+
+        act = mutate_int(k3, node[3], self.activation_indices, self.activation_replace_rate)
+
+        agg = mutate_int(k4, node[4], self.aggregation_indices, self.aggregation_replace_rate)
+
+        return jnp.array([index, bias, res, act, agg])
+
+    def distance(self, node1, node2):
+        return (
+                jnp.abs(node1[1] - node2[1]) +
+                jnp.abs(node1[2] - node2[2]) +
+                (node1[3] != node2[3]) +
+                (node1[4] != node2[4])
+        )
+
+    def forward(self, attrs, inputs):
+        bias, res, act_idx, agg_idx = attrs
+
+        z = agg(agg_idx, inputs, self.aggregation_options)
+        z = bias + res * z
+        z = act(act_idx, z, self.activation_options)
+
+        return z
--- a/tensorneat/algorithm/neat/genome/init.py
+++ b/tensorneat/algorithm/neat/genome/init.py
@@ -0,0 +1,3 @@
+from .base import BaseGenome
+from .default import DefaultGenome
+from .recurrent import RecurrentGenome
--- a/tensorneat/algorithm/neat/genome/base.py
+++ b/tensorneat/algorithm/neat/genome/base.py
@@ -0,0 +1,65 @@
+import jax.numpy as jnp
+from ..gene import BaseNodeGene, BaseConnGene, DefaultNodeGene, DefaultConnGene
+from utils import fetch_first
+
+
+class BaseGenome:
+    network_type = None
+
+    def __init__(
+            self,
+            num_inputs: int,
+            num_outputs: int,
+            max_nodes: int,
+            max_conns: int,
+            node_gene: BaseNodeGene = DefaultNodeGene(),
+            conn_gene: BaseConnGene = DefaultConnGene(),
+    ):
+        self.num_inputs = num_inputs
+        self.num_outputs = num_outputs
+        self.input_idx = jnp.arange(num_inputs)
+        self.output_idx = jnp.arange(num_inputs, num_inputs + num_outputs)
+        self.max_nodes = max_nodes
+        self.max_conns = max_conns
+        self.node_gene = node_gene
+        self.conn_gene = conn_gene
+
+    def transform(self, nodes, conns):
+        raise NotImplementedError
+
+    def forward(self, inputs, transformed):
+        raise NotImplementedError
+
+    def add_node(self, nodes, new_key: int, attrs):
+        """
+        Add a new node to the genome.
+        The new node will place at the first NaN row.
+        """
+        exist_keys = nodes[:, 0]
+        pos = fetch_first(jnp.isnan(exist_keys))
+        new_nodes = nodes.at[pos, 0].set(new_key)
+        return new_nodes.at[pos, 1:].set(attrs)
+
+    def delete_node_by_pos(self, nodes, pos):
+        """
+        Delete a node from the genome.
+        Delete the node by its pos in nodes.
+        """
+        return nodes.at[pos].set(jnp.nan)
+
+    def add_conn(self, conns, i_key, o_key, enable: bool, attrs):
+        """
+        Add a new connection to the genome.
+        The new connection will place at the first NaN row.
+        """
+        con_keys = conns[:, 0]
+        pos = fetch_first(jnp.isnan(con_keys))
+        new_conns = conns.at[pos, 0:3].set(jnp.array([i_key, o_key, enable]))
+        return new_conns.at[pos, 3:].set(attrs)
+
+    def delete_conn_by_pos(self, conns, pos):
+        """
+        Delete a connection from the genome.
+        Delete the connection by its idx.
+        """
+        return conns.at[pos].set(jnp.nan)
--- a/tensorneat/algorithm/neat/genome/default.py
+++ b/tensorneat/algorithm/neat/genome/default.py
@@ -0,0 +1,90 @@
+from typing import Callable
+
+import jax, jax.numpy as jnp
+from utils import unflatten_conns, topological_sort, I_INT
+
+from . import BaseGenome
+from ..gene import BaseNodeGene, BaseConnGene, DefaultNodeGene, DefaultConnGene
+
+
+class DefaultGenome(BaseGenome):
+    """Default genome class, with the same behavior as the NEAT-Python"""
+
+    network_type = 'feedforward'
+
+    def __init__(self,
+                 num_inputs: int,
+                 num_outputs: int,
+                 max_nodes=5,
+                 max_conns=4,
+                 node_gene: BaseNodeGene = DefaultNodeGene(),
+                 conn_gene: BaseConnGene = DefaultConnGene(),
+                 output_transform: Callable = None
+                 ):
+        super().__init__(num_inputs, num_outputs, max_nodes, max_conns, node_gene, conn_gene)
+
+        if output_transform is not None:
+            try:
+                aux = output_transform(jnp.zeros(num_outputs))
+            except Exception as e:
+                raise ValueError(f"Output transform function failed: {e}")
+        self.output_transform = output_transform
+
+    def transform(self, nodes, conns):
+        u_conns = unflatten_conns(nodes, conns)
+
+        # DONE: Seems like there is a bug in this line
+        # conn_enable = jnp.where(~jnp.isnan(u_conns[0]), True, False)
+        # modified: exist conn and enable is true
+        # conn_enable = jnp.where( (~jnp.isnan(u_conns[0])) & (u_conns[0] == 1), True, False)
+        # advanced modified: when and only when enabled is True
+        conn_enable = u_conns[0] == 1
+
+        # remove enable attr
+        u_conns = jnp.where(conn_enable, u_conns[1:, :], jnp.nan)
+        seqs = topological_sort(nodes, conn_enable)
+
+        return seqs, nodes, u_conns
+
+    def forward(self, inputs, transformed):
+        cal_seqs, nodes, conns = transformed
+
+        N = nodes.shape[0]
+        ini_vals = jnp.full((N,), jnp.nan)
+        ini_vals = ini_vals.at[self.input_idx].set(inputs)
+        nodes_attrs = nodes[:, 1:]
+
+        def cond_fun(carry):
+            values, idx = carry
+            return (idx < N) & (cal_seqs[idx] != I_INT)
+
+        def body_func(carry):
+            values, idx = carry
+            i = cal_seqs[idx]
+
+            def hit():
+                ins = jax.vmap(self.conn_gene.forward, in_axes=(1, 0))(conns[:, :, i], values)
+                # ins = values * weights[:, i]
+
+                z = self.node_gene.forward(nodes_attrs[i], ins)
+                # z = agg(nodes[i, 4], ins, self.config.aggregation_options)  # z = agg(ins)
+                # z = z * nodes[i, 2] + nodes[i, 1]  # z = z * response + bias
+                # z = act(nodes[i, 3], z, self.config.activation_options)  # z = act(z)
+
+                new_values = values.at[i].set(z)
+                return new_values
+
+            def miss():
+                return values
+
+            # the val of input nodes is obtained by the task, not by calculation
+            values = jax.lax.cond(jnp.isin(i, self.input_idx), miss, hit)
+
+            return values, idx + 1
+
+        vals, _ = jax.lax.while_loop(cond_fun, body_func, (ini_vals, 0))
+
+        if self.output_transform is None:
+            return vals[self.output_idx]
+        else:
+            return self.output_transform(vals[self.output_idx])
--- a/tensorneat/algorithm/neat/genome/recurrent.py
+++ b/tensorneat/algorithm/neat/genome/recurrent.py
@@ -0,0 +1,60 @@
+import jax, jax.numpy as jnp
+from utils import unflatten_conns
+
+from . import BaseGenome
+from ..gene import BaseNodeGene, BaseConnGene, DefaultNodeGene, DefaultConnGene
+
+
+class RecurrentGenome(BaseGenome):
+    """Default genome class, with the same behavior as the NEAT-Python"""
+
+    network_type = 'recurrent'
+
+    def __init__(self,
+                 num_inputs: int,
+                 num_outputs: int,
+                 max_nodes: int,
+                 max_conns: int,
+                 node_gene: BaseNodeGene = DefaultNodeGene(),
+                 conn_gene: BaseConnGene = DefaultConnGene(),
+                 activate_time: int = 10,
+                 ):
+        super().__init__(num_inputs, num_outputs, max_nodes, max_conns, node_gene, conn_gene)
+        self.activate_time = activate_time
+
+    def transform(self, nodes, conns):
+        u_conns = unflatten_conns(nodes, conns)
+
+        # remove un-enable connections and remove enable attr
+        conn_enable = u_conns[0] == 1
+        u_conns = jnp.where(conn_enable, u_conns[1:, :], jnp.nan)
+
+        return nodes, u_conns
+
+    def forward(self, inputs, transformed):
+        nodes, conns = transformed
+
+        N = nodes.shape[0]
+        vals = jnp.full((N,), jnp.nan)
+        nodes_attrs = nodes[:, 1:]
+
+        def body_func(_, values):
+            # set input values
+            values = values.at[self.input_idx].set(inputs)
+
+            # calculate connections
+            node_ins = jax.vmap(
+                jax.vmap(
+                    self.conn_gene.forward,
+                    in_axes=(1, None)
+                ),
+                in_axes=(1, 0)
+            )(conns, values)
+
+            # calculate nodes
+            values = jax.vmap(self.node_gene.forward)(nodes_attrs, node_ins.T)
+            return values
+
+        vals = jax.lax.fori_loop(0, self.activate_time, body_func, vals)
+
+        return vals[self.output_idx]
--- a/tensorneat/algorithm/neat/neat.py
+++ b/tensorneat/algorithm/neat/neat.py
@@ -0,0 +1,113 @@
+import jax, jax.numpy as jnp
+from utils import State
+from .. import BaseAlgorithm
+from .species import *
+from .ga import *
+
+
+class NEAT(BaseAlgorithm):
+
+    def __init__(
+            self,
+            species: BaseSpecies,
+            mutation: BaseMutation = DefaultMutation(),
+            crossover: BaseCrossover = DefaultCrossover(),
+    ):
+        self.genome = species.genome
+        self.species = species
+        self.mutation = mutation
+        self.crossover = crossover
+
+    def setup(self, randkey):
+        k1, k2 = jax.random.split(randkey, 2)
+        return State(
+            randkey=k1,
+            generation=jnp.array(0.),
+            next_node_key=jnp.array(max(*self.genome.input_idx, *self.genome.output_idx) + 2, dtype=jnp.float32),
+            # inputs nodes, output nodes, 1 hidden node
+            species=self.species.setup(k2),
+        )
+
+    def ask(self, state: State):
+        return self.species.ask(state.species)
+
+    def tell(self, state: State, fitness):
+        k1, k2, randkey = jax.random.split(state.randkey, 3)
+
+        state = state.update(
+            generation=state.generation + 1,
+            randkey=randkey
+        )
+
+        species_state, winner, loser, elite_mask = self.species.update_species(state.species, fitness, state.generation)
+        state = state.update(species=species_state)
+
+        state = self.create_next_generation(k2, state, winner, loser, elite_mask)
+
+        species_state = self.species.speciate(state.species, state.generation)
+        state = state.update(species=species_state)
+        return state
+
+    def transform(self, individual):
+        """transform the genome into a neural network"""
+        nodes, conns = individual
+        return self.genome.transform(nodes, conns)
+
+    def forward(self, inputs, transformed):
+        return self.genome.forward(inputs, transformed)
+
+    @property
+    def num_inputs(self):
+        return self.genome.num_inputs
+
+    @property
+    def num_outputs(self):
+        return self.genome.num_outputs
+
+    @property
+    def pop_size(self):
+        return self.species.pop_size
+
+    def create_next_generation(self, randkey, state, winner, loser, elite_mask):
+        # prepare random keys
+        pop_size = self.species.pop_size
+        new_node_keys = jnp.arange(pop_size) + state.next_node_key
+
+        k1, k2 = jax.random.split(randkey, 2)
+        crossover_rand_keys = jax.random.split(k1, pop_size)
+        mutate_rand_keys = jax.random.split(k2, pop_size)
+
+        wpn, wpc = state.species.pop_nodes[winner], state.species.pop_conns[winner]
+        lpn, lpc = state.species.pop_nodes[loser], state.species.pop_conns[loser]
+
+        # batch crossover
+        n_nodes, n_conns = (jax.vmap(self.crossover, in_axes=(0, None, 0, 0, 0, 0))
+                            (crossover_rand_keys, self.genome, wpn, wpc, lpn, lpc))
+
+        # batch mutation
+        m_n_nodes, m_n_conns = (jax.vmap(self.mutation, in_axes=(0, None, 0, 0, 0))
+                                (mutate_rand_keys, self.genome, n_nodes, n_conns, new_node_keys))
+
+        # elitism don't mutate
+        pop_nodes = jnp.where(elite_mask[:, None, None], n_nodes, m_n_nodes)
+        pop_conns = jnp.where(elite_mask[:, None, None], n_conns, m_n_conns)
+
+        # update next node key
+        all_nodes_keys = pop_nodes[:, :, 0]
+        max_node_key = jnp.max(jnp.where(jnp.isnan(all_nodes_keys), -jnp.inf, all_nodes_keys))
+        next_node_key = max_node_key + 1
+
+        return state.update(
+            species=state.species.update(
+                pop_nodes=pop_nodes,
+                pop_conns=pop_conns,
+            ),
+            next_node_key=next_node_key,
+        )
+
+    def member_count(self, state: State):
+        return state.species.member_count
+
+    def generation(self, state: State):
+        # to analysis the algorithm
+        return state.generation
--- a/tensorneat/algorithm/neat/species/init.py
+++ b/tensorneat/algorithm/neat/species/init.py
@@ -0,0 +1,2 @@
+from .base import BaseSpecies
+from .default import DefaultSpecies
--- a/tensorneat/algorithm/neat/species/base.py
+++ b/tensorneat/algorithm/neat/species/base.py
@@ -0,0 +1,14 @@
+from utils import State
+
+class BaseSpecies:
+    def setup(self, randkey):
+        raise NotImplementedError
+
+    def ask(self, state: State):
+        raise NotImplementedError
+
+    def update_species(self, state, fitness, generation):
+        raise NotImplementedError
+
+    def speciate(self, state, generation):
+        raise NotImplementedError
--- a/tensorneat/algorithm/neat/species/default.py
+++ b/tensorneat/algorithm/neat/species/default.py
@@ -0,0 +1,519 @@
+import numpy as np
+import jax, jax.numpy as jnp
+from utils import State, rank_elements, argmin_with_mask, fetch_first
+from ..genome import BaseGenome
+from .base import BaseSpecies
+
+
+class DefaultSpecies(BaseSpecies):
+
+    def __init__(self,
+                 genome: BaseGenome,
+                 pop_size,
+                 species_size,
+                 compatibility_disjoint: float = 1.0,
+                 compatibility_weight: float = 0.4,
+                 max_stagnation: int = 15,
+                 species_elitism: int = 2,
+                 spawn_number_change_rate: float = 0.5,
+                 genome_elitism: int = 2,
+                 survival_threshold: float = 0.2,
+                 min_species_size: int = 1,
+                 compatibility_threshold: float = 3.
+                 ):
+        self.genome = genome
+        self.pop_size = pop_size
+        self.species_size = species_size
+
+        self.compatibility_disjoint = compatibility_disjoint
+        self.compatibility_weight = compatibility_weight
+        self.max_stagnation = max_stagnation
+        self.species_elitism = species_elitism
+        self.spawn_number_change_rate = spawn_number_change_rate
+        self.genome_elitism = genome_elitism
+        self.survival_threshold = survival_threshold
+        self.min_species_size = min_species_size
+        self.compatibility_threshold = compatibility_threshold
+
+        self.species_arange = jnp.arange(self.species_size)
+
+    def setup(self, randkey):
+        pop_nodes, pop_conns = initialize_population(self.pop_size, self.genome)
+
+        species_keys = jnp.full((self.species_size,), jnp.nan)  # the unique index (primary key) for each species
+        best_fitness = jnp.full((self.species_size,), jnp.nan)  # the best fitness of each species
+        last_improved = jnp.full((self.species_size,), jnp.nan)  # the last generation that the species improved
+        member_count = jnp.full((self.species_size,), jnp.nan)  # the number of members of each species
+        idx2species = jnp.zeros(self.pop_size)  # the species index of each individual
+
+        # nodes for each center genome of each species
+        center_nodes = jnp.full((self.species_size, self.genome.max_nodes, self.genome.node_gene.length), jnp.nan)
+
+        # connections for each center genome of each species
+        center_conns = jnp.full((self.species_size, self.genome.max_conns, self.genome.conn_gene.length), jnp.nan)
+
+        species_keys = species_keys.at[0].set(0)
+        best_fitness = best_fitness.at[0].set(-jnp.inf)
+        last_improved = last_improved.at[0].set(0)
+        member_count = member_count.at[0].set(self.pop_size)
+        center_nodes = center_nodes.at[0].set(pop_nodes[0])
+        center_conns = center_conns.at[0].set(pop_conns[0])
+
+        pop_nodes, pop_conns = jax.device_put((pop_nodes, pop_conns))
+
+        return State(
+            randkey=randkey,
+            pop_nodes=pop_nodes,
+            pop_conns=pop_conns,
+            species_keys=species_keys,
+            best_fitness=best_fitness,
+            last_improved=last_improved,
+            member_count=member_count,
+            idx2species=idx2species,
+            center_nodes=center_nodes,
+            center_conns=center_conns,
+            next_species_key=jnp.array(1),  # 0 is reserved for the first species
+        )
+
+    def ask(self, state):
+        return state.pop_nodes, state.pop_conns
+
+    def update_species(self, state, fitness, generation):
+        # update the fitness of each species
+        species_fitness = self.update_species_fitness(state, fitness)
+
+        # stagnation species
+        state, species_fitness = self.stagnation(state, generation, species_fitness)
+
+        # sort species_info by their fitness. (also push nan to the end)
+        sort_indices = jnp.argsort(species_fitness)[::-1]
+        state = state.update(
+            species_keys=state.species_keys[sort_indices],
+            best_fitness=state.best_fitness[sort_indices],
+            last_improved=state.last_improved[sort_indices],
+            member_count=state.member_count[sort_indices],
+            center_nodes=state.center_nodes[sort_indices],
+            center_conns=state.center_conns[sort_indices],
+        )
+
+        # decide the number of members of each species by their fitness
+        spawn_number = self.cal_spawn_numbers(state)
+
+        k1, k2 = jax.random.split(state.randkey)
+        # crossover info
+        winner, loser, elite_mask = self.create_crossover_pair(state, k1, spawn_number, fitness)
+
+        return state.update(randkey=k2), winner, loser, elite_mask
+
+    def update_species_fitness(self, state, fitness):
+        """
+        obtain the fitness of the species by the fitness of each individual.
+        use max criterion.
+        """
+
+        def aux_func(idx):
+            s_fitness = jnp.where(state.idx2species == state.species_keys[idx], fitness, -jnp.inf)
+            val = jnp.max(s_fitness)
+            return val
+
+        return jax.vmap(aux_func)(self.species_arange)
+
+    def stagnation(self, state, generation, species_fitness):
+        """
+        stagnation species.
+        those species whose fitness is not better than the best fitness of the species for a long time will be stagnation.
+        elitism species never stagnation
+
+        generation: the current generation
+        """
+
+        def check_stagnation(idx):
+            # determine whether the species stagnation
+            st = (
+                    (species_fitness[idx] <= state.best_fitness[
+                        idx]) &  # not better than the best fitness of the species
+                    (generation - state.last_improved[idx] > self.max_stagnation)  # for a long time
+            )
+
+            # update last_improved and best_fitness
+            li, bf = jax.lax.cond(
+                species_fitness[idx] > state.best_fitness[idx],
+                lambda: (generation, species_fitness[idx]),  # update
+                lambda: (state.last_improved[idx], state.best_fitness[idx])  # not update
+            )
+
+            return st, bf, li
+
+        spe_st, best_fitness, last_improved = jax.vmap(check_stagnation)(self.species_arange)
+
+        # elite species will not be stagnation
+        species_rank = rank_elements(species_fitness)
+        spe_st = jnp.where(species_rank < self.species_elitism, False, spe_st)  # elitism never stagnation
+
+        # set stagnation species to nan
+        def update_func(idx):
+            return jax.lax.cond(
+                spe_st[idx],
+                lambda: (
+                    jnp.nan,  # species_key
+                    jnp.nan,  # best_fitness
+                    jnp.nan,  # last_improved
+                    jnp.nan,  # member_count
+                    -jnp.inf,  # species_fitness
+                    jnp.full_like(state.center_nodes[idx], jnp.nan),  # center_nodes
+                    jnp.full_like(state.center_conns[idx], jnp.nan),  # center_conns
+                ),  # stagnation species
+                lambda: (
+                    state.species_keys[idx],
+                    best_fitness[idx],
+                    last_improved[idx],
+                    state.member_count[idx],
+                    species_fitness[idx],
+                    state.center_nodes[idx],
+                    state.center_conns[idx]
+                )  # not stagnation species
+            )
+
+        (
+            species_keys,
+            best_fitness,
+            last_improved,
+            member_count,
+            species_fitness,
+            center_nodes,
+            center_conns
+        ) = (
+            jax.vmap(update_func)(self.species_arange))
+
+        return state.update(
+            species_keys=species_keys,
+            best_fitness=best_fitness,
+            last_improved=last_improved,
+            member_count=member_count,
+            center_nodes=center_nodes,
+            center_conns=center_conns,
+        ), species_fitness
+
+    def cal_spawn_numbers(self, state):
+        """
+        decide the number of members of each species by their fitness rank.
+        the species with higher fitness will have more members
+        Linear ranking selection
+            e.g. N = 3, P=10 -> probability = [0.5, 0.33, 0.17], spawn_number = [5, 3, 2]
+        """
+
+        species_keys = state.species_keys
+
+        is_species_valid = ~jnp.isnan(species_keys)
+        valid_species_num = jnp.sum(is_species_valid)
+        denominator = (valid_species_num + 1) * valid_species_num / 2  # obtain 3 + 2 + 1 = 6
+
+        rank_score = valid_species_num - self.species_arange  # obtain [3, 2, 1]
+        spawn_number_rate = rank_score / denominator  # obtain [0.5, 0.33, 0.17]
+        spawn_number_rate = jnp.where(is_species_valid, spawn_number_rate, 0)  # set invalid species to 0
+
+        target_spawn_number = jnp.floor(spawn_number_rate * self.pop_size)  # calculate member
+
+        # Avoid too much variation of numbers for a species
+        previous_size = state.member_count
+        spawn_number = previous_size + (target_spawn_number - previous_size) * self.spawn_number_change_rate
+        spawn_number = spawn_number.astype(jnp.int32)
+
+        # must control the sum of spawn_number to be equal to pop_size
+        error = self.pop_size - jnp.sum(spawn_number)
+
+        # add error to the first species to control the sum of spawn_number
+        spawn_number = spawn_number.at[0].add(error)
+
+        return spawn_number
+
+    def create_crossover_pair(self, state, randkey, spawn_number, fitness):
+        s_idx = self.species_arange
+        p_idx = jnp.arange(self.pop_size)
+
+        def aux_func(key, idx):
+            members = state.idx2species == state.species_keys[idx]
+            members_num = jnp.sum(members)
+
+            members_fitness = jnp.where(members, fitness, -jnp.inf)
+            sorted_member_indices = jnp.argsort(members_fitness)[::-1]
+
+            survive_size = jnp.floor(self.survival_threshold * members_num).astype(jnp.int32)
+
+            select_pro = (p_idx < survive_size) / survive_size
+            fa, ma = jax.random.choice(key, sorted_member_indices, shape=(2, self.pop_size), replace=True, p=select_pro)
+
+            # elite
+            fa = jnp.where(p_idx < self.genome_elitism, sorted_member_indices, fa)
+            ma = jnp.where(p_idx < self.genome_elitism, sorted_member_indices, ma)
+            elite = jnp.where(p_idx < self.genome_elitism, True, False)
+            return fa, ma, elite
+
+        fas, mas, elites = jax.vmap(aux_func)(jax.random.split(randkey, self.species_size), s_idx)
+
+        spawn_number_cum = jnp.cumsum(spawn_number)
+
+        def aux_func(idx):
+            loc = jnp.argmax(idx < spawn_number_cum)
+
+            # elite genomes are at the beginning of the species
+            idx_in_species = jnp.where(loc > 0, idx - spawn_number_cum[loc - 1], idx)
+            return fas[loc, idx_in_species], mas[loc, idx_in_species], elites[loc, idx_in_species]
+
+        part1, part2, elite_mask = jax.vmap(aux_func)(p_idx)
+
+        is_part1_win = fitness[part1] >= fitness[part2]
+        winner = jnp.where(is_part1_win, part1, part2)
+        loser = jnp.where(is_part1_win, part2, part1)
+
+        return winner, loser, elite_mask
+
+    def speciate(self, state, generation):
+        # prepare distance functions
+        o2p_distance_func = jax.vmap(self.distance, in_axes=(None, None, 0, 0))  # one to population
+
+        # idx to specie key
+        idx2species = jnp.full((self.pop_size,), jnp.nan)  # NaN means not assigned to any species
+
+        # the distance between genomes to its center genomes
+        o2c_distances = jnp.full((self.pop_size,), jnp.inf)
+
+        # step 1: find new centers
+        def cond_func(carry):
+            # i, idx2species, center_nodes, center_conns, o2c_distances
+            i, i2s, cns, ccs, o2c = carry
+
+            return (
+                    (i < self.species_size) &
+                    (~jnp.isnan(state.species_keys[i]))
+            )  # current species is existing
+
+        def body_func(carry):
+            i, i2s, cns, ccs, o2c = carry
+
+            distances = o2p_distance_func(cns[i], ccs[i], state.pop_nodes, state.pop_conns)
+
+            # find the closest one
+            closest_idx = argmin_with_mask(distances, mask=jnp.isnan(i2s))
+
+            i2s = i2s.at[closest_idx].set(state.species_keys[i])
+            cns = cns.at[i].set(state.pop_nodes[closest_idx])
+            ccs = ccs.at[i].set(state.pop_conns[closest_idx])
+
+            # the genome with closest_idx will become the new center, thus its distance to center is 0.
+            o2c = o2c.at[closest_idx].set(0)
+
+            return i + 1, i2s, cns, ccs, o2c
+
+        _, idx2species, center_nodes, center_conns, o2c_distances = \
+            jax.lax.while_loop(cond_func, body_func,
+                               (0, idx2species, state.center_nodes, state.center_conns, o2c_distances))
+
+        state = state.update(
+            idx2species=idx2species,
+            center_nodes=center_nodes,
+            center_conns=center_conns,
+        )
+
+        # part 2: assign members to each species
+        def cond_func(carry):
+            # i, idx2species, center_nodes, center_conns, species_keys, o2c_distances, next_species_key
+            i, i2s, cns, ccs, sk, o2c, nsk = carry
+
+            current_species_existed = ~jnp.isnan(sk[i])
+            not_all_assigned = jnp.any(jnp.isnan(i2s))
+            not_reach_species_upper_bounds = i < self.species_size
+            return not_reach_species_upper_bounds & (current_species_existed | not_all_assigned)
+
+        def body_func(carry):
+            i, i2s, cns, ccs, sk, o2c, nsk = carry
+
+            _, i2s, cns, ccs, sk, o2c, nsk = jax.lax.cond(
+                jnp.isnan(sk[i]),  # whether the current species is existing or not
+                create_new_species,  # if not existing, create a new specie
+                update_exist_specie,  # if existing, update the specie
+                (i, i2s, cns, ccs, sk, o2c, nsk)
+            )
+
+            return i + 1, i2s, cns, ccs, sk, o2c, nsk
+
+        def create_new_species(carry):
+            i, i2s, cns, ccs, sk, o2c, nsk = carry
+
+            # pick the first one who has not been assigned to any species
+            idx = fetch_first(jnp.isnan(i2s))
+
+            # assign it to the new species
+            # [key, best score, last update generation, member_count]
+            sk = sk.at[i].set(nsk)  # nsk -> next species key
+            i2s = i2s.at[idx].set(nsk)
+            o2c = o2c.at[idx].set(0)
+
+            # update center genomes
+            cns = cns.at[i].set(state.pop_nodes[idx])
+            ccs = ccs.at[i].set(state.pop_conns[idx])
+
+            # find the members for the new species
+            i2s, o2c = speciate_by_threshold(i, i2s, cns, ccs, sk, o2c)
+
+            return i, i2s, cns, ccs, sk, o2c, nsk + 1  # change to next new speciate key
+
+        def update_exist_specie(carry):
+            i, i2s, cns, ccs, sk, o2c, nsk = carry
+
+            i2s, o2c = speciate_by_threshold(i, i2s, cns, ccs, sk, o2c)
+
+            # turn to next species
+            return i + 1, i2s, cns, ccs, sk, o2c, nsk
+
+        def speciate_by_threshold(i, i2s, cns, ccs, sk, o2c):
+            # distance between such center genome and ppo genomes
+            o2p_distance = o2p_distance_func(cns[i], ccs[i], state.pop_nodes, state.pop_conns)
+
+            close_enough_mask = o2p_distance < self.compatibility_threshold
+            # when a genome is not assigned or the distance between its current center is bigger than this center
+            catchable_mask = jnp.isnan(i2s) | (o2p_distance < o2c)
+
+            mask = close_enough_mask & catchable_mask
+
+            # update species info
+            i2s = jnp.where(mask, sk[i], i2s)
+
+            # update distance between centers
+            o2c = jnp.where(mask, o2p_distance, o2c)
+
+            return i2s, o2c
+
+        # update idx2species
+        _, idx2species, center_nodes, center_conns, species_keys, _, next_species_key = jax.lax.while_loop(
+            cond_func,
+            body_func,
+            (0, state.idx2species, center_nodes, center_conns, state.species_keys, o2c_distances,
+             state.next_species_key)
+        )
+
+        # if there are still some pop genomes not assigned to any species, add them to the last genome
+        # this condition can only happen when the number of species is reached species upper bounds
+        idx2species = jnp.where(jnp.isnan(idx2species), species_keys[-1], idx2species)
+
+        # complete info of species which is created in this generation
+        new_created_mask = (~jnp.isnan(species_keys)) & jnp.isnan(state.best_fitness)
+        best_fitness = jnp.where(new_created_mask, -jnp.inf, state.best_fitness)
+        last_improved = jnp.where(new_created_mask, generation, state.last_improved)
+
+        # update members count
+        def count_members(idx):
+            return jax.lax.cond(
+                jnp.isnan(species_keys[idx]),  # if the species is not existing
+                lambda: jnp.nan,  # nan
+                lambda: jnp.sum(idx2species == species_keys[idx], dtype=jnp.float32)  # count members
+            )
+
+        member_count = jax.vmap(count_members)(self.species_arange)
+
+        return state.update(
+            species_keys=species_keys,
+            best_fitness=best_fitness,
+            last_improved=last_improved,
+            member_count=member_count,
+            idx2species=idx2species,
+            center_nodes=center_nodes,
+            center_conns=center_conns,
+            next_species_key=next_species_key
+        )
+
+    def distance(self, nodes1, conns1, nodes2, conns2):
+        """
+        The distance between two genomes
+        """
+        d = self.node_distance(nodes1, nodes2) + self.conn_distance(conns1, conns2)
+        return d
+
+    def node_distance(self, nodes1, nodes2):
+        """
+        The distance of the nodes part for two genomes
+        """
+        node_cnt1 = jnp.sum(~jnp.isnan(nodes1[:, 0]))
+        node_cnt2 = jnp.sum(~jnp.isnan(nodes2[:, 0]))
+        max_cnt = jnp.maximum(node_cnt1, node_cnt2)
+
+        # align homologous nodes
+        # this process is similar to np.intersect1d.
+        nodes = jnp.concatenate((nodes1, nodes2), axis=0)
+        keys = nodes[:, 0]
+        sorted_indices = jnp.argsort(keys, axis=0)
+        nodes = nodes[sorted_indices]
+        nodes = jnp.concatenate([nodes, jnp.full((1, nodes.shape[1]), jnp.nan)], axis=0)  # add a nan row to the end
+        fr, sr = nodes[:-1], nodes[1:]  # first row, second row
+
+        # flag location of homologous nodes
+        intersect_mask = (fr[:, 0] == sr[:, 0]) & ~jnp.isnan(nodes[:-1, 0])
+
+        # calculate the count of non_homologous of two genomes
+        non_homologous_cnt = node_cnt1 + node_cnt2 - 2 * jnp.sum(intersect_mask)
+
+        # calculate the distance of homologous nodes
+        hnd = jax.vmap(self.genome.node_gene.distance, in_axes=(0, 0))(fr, sr)
+        hnd = jnp.where(jnp.isnan(hnd), 0, hnd)
+        homologous_distance = jnp.sum(hnd * intersect_mask)
+
+        val = non_homologous_cnt * self.compatibility_disjoint + homologous_distance * self.compatibility_weight
+
+        return jnp.where(max_cnt == 0, 0, val / max_cnt)  # avoid zero division
+
+    def conn_distance(self, conns1, conns2):
+        """
+        The distance of the conns part for two genomes
+        """
+        con_cnt1 = jnp.sum(~jnp.isnan(conns1[:, 0]))
+        con_cnt2 = jnp.sum(~jnp.isnan(conns2[:, 0]))
+        max_cnt = jnp.maximum(con_cnt1, con_cnt2)
+
+        cons = jnp.concatenate((conns1, conns2), axis=0)
+        keys = cons[:, :2]
+        sorted_indices = jnp.lexsort(keys.T[::-1])
+        cons = cons[sorted_indices]
+        cons = jnp.concatenate([cons, jnp.full((1, cons.shape[1]), jnp.nan)], axis=0)  # add a nan row to the end
+        fr, sr = cons[:-1], cons[1:]  # first row, second row
+
+        # both genome has such connection
+        intersect_mask = jnp.all(fr[:, :2] == sr[:, :2], axis=1) & ~jnp.isnan(fr[:, 0])
+
+        non_homologous_cnt = con_cnt1 + con_cnt2 - 2 * jnp.sum(intersect_mask)
+        hcd = jax.vmap(self.genome.conn_gene.distance, in_axes=(0, 0))(fr, sr)
+        hcd = jnp.where(jnp.isnan(hcd), 0, hcd)
+        homologous_distance = jnp.sum(hcd * intersect_mask)
+
+        val = non_homologous_cnt * self.compatibility_disjoint + homologous_distance * self.compatibility_weight
+
+        return jnp.where(max_cnt == 0, 0, val / max_cnt)
+
+
+def initialize_population(pop_size, genome):
+    o_nodes = np.full((genome.max_nodes, genome.node_gene.length), np.nan)  # original nodes
+    o_conns = np.full((genome.max_conns, genome.conn_gene.length), np.nan)  # original connections
+
+    input_idx, output_idx = genome.input_idx, genome.output_idx
+    new_node_key = max([*input_idx, *output_idx]) + 1
+
+    o_nodes[input_idx, 0] = genome.input_idx
+    o_nodes[output_idx, 0] = genome.output_idx
+    o_nodes[new_node_key, 0] = new_node_key  # one hidden node
+    o_nodes[np.concatenate([input_idx, output_idx]), 1:] = genome.node_gene.new_custom_attrs()
+    o_nodes[new_node_key, 1:] = genome.node_gene.new_custom_attrs()  # one hidden node
+
+    input_conns = np.c_[input_idx, np.full_like(input_idx, new_node_key)]  # input nodes to hidden
+    o_conns[input_idx, 0:2] = input_conns  # in key, out key
+    o_conns[input_idx, 2] = True  # enabled
+    o_conns[input_idx, 3:] = genome.conn_gene.new_custom_attrs()
+
+    output_conns = np.c_[np.full_like(output_idx, new_node_key), output_idx]  # hidden to output nodes
+    o_conns[output_idx, 0:2] = output_conns  # in key, out key
+    o_conns[output_idx, 2] = True  # enabled
+    o_conns[output_idx, 3:] = genome.conn_gene.new_custom_attrs()
+
+    # repeat origin genome for P times to create population
+    pop_nodes = np.tile(o_nodes, (pop_size, 1, 1))
+    pop_conns = np.tile(o_conns, (pop_size, 1, 1))
+
+    return pop_nodes, pop_conns