Using Evox to deal with RL tasks! With distributed Gym environment!

Three simple tasks in Gym[classical] are tested.
2023-07-04 15:44:08 +08:00
parent c4d34e877b
commit 7bf46575f4
18 changed files with 547 additions and 43 deletions
--- a/examples/evox_/init.py
+++ b/examples/evox_/init.py
--- a/examples/evox_/acrobot.ini
+++ b/examples/evox_/acrobot.ini
@@ -0,0 +1,22 @@
+[basic]
+num_inputs = 6
+num_outputs = 3
+maximum_nodes = 50
+maximum_connections = 50
+maximum_species = 10
+forward_way = "single"
+random_seed = 42
+
+[population]
+pop_size = 100
+
+[gene-activation]
+activation_default = "sigmoid"
+activation_option_names = ['sigmoid', 'tanh', 'sin', 'gauss', 'relu', 'identity', 'inv', 'log', 'exp', 'abs', 'hat', 'square']
+activation_replace_rate = 0.1
+
+[gene-aggregation]
+aggregation_default = "sum"
+aggregation_option_names = ['sum', 'product', 'max', 'min', 'maxabs', 'median', 'mean']
+aggregation_replace_rate = 0.1
+
--- a/examples/evox_/acrobot.py
+++ b/examples/evox_/acrobot.py
@@ -0,0 +1,62 @@
+import evox
+import jax
+from jax import jit, vmap, numpy as jnp
+
+from configs import Configer
+from algorithms.neat import create_forward_function, topological_sort, unflatten_connections
+from evox_adaptor import NEAT, Gym
+
+if __name__ == '__main__':
+    batch_policy = True
+    key = jax.random.PRNGKey(42)
+
+    monitor = evox.monitors.StdSOMonitor()
+    neat_config = Configer.load_config('acrobot.ini')
+    origin_forward_func = create_forward_function(neat_config)
+
+
+    def neat_transform(pop):
+        P = neat_config['pop_size']
+        N = neat_config['maximum_nodes']
+        C = neat_config['maximum_connections']
+
+        pop_nodes = pop[:P * N * 5].reshape((P, N, 5))
+        pop_cons = pop[P * N * 5:].reshape((P, C, 4))
+
+        u_pop_cons = vmap(unflatten_connections)(pop_nodes, pop_cons)
+        pop_seqs = vmap(topological_sort)(pop_nodes, u_pop_cons)
+        return pop_seqs, pop_nodes, u_pop_cons
+
+    # special policy for mountain car
+    def neat_forward(genome, x):
+        res = origin_forward_func(x, *genome)
+        out = jnp.argmax(res)  # {0, 1, 2}
+        return out
+
+
+    forward_func = lambda pop, x: origin_forward_func(x, *pop)
+
+    problem = Gym(
+        policy=jit(vmap(neat_forward)),
+        env_name="Acrobot-v1",
+        pop_size=100,
+    )
+
+    # create a pipeline
+    pipeline = evox.pipelines.StdPipeline(
+        algorithm=NEAT(neat_config),
+        problem=problem,
+        pop_transform=jit(neat_transform),
+        fitness_transform=monitor.record_fit,
+    )
+    # init the pipeline
+    state = pipeline.init(key)
+
+    # run the pipeline for 10 steps
+    for i in range(30):
+        state = pipeline.step(state)
+        print(i, monitor.get_min_fitness())
+
+    # obtain -62.0
+    min_fitness = monitor.get_min_fitness()
+    print(min_fitness)
--- a/examples/evox_/bipedalwalker.ini
+++ b/examples/evox_/bipedalwalker.ini
@@ -0,0 +1,22 @@
+[basic]
+num_inputs = 24
+num_outputs = 4
+maximum_nodes = 100
+maximum_connections = 200
+maximum_species = 10
+forward_way = "single"
+random_seed = 42
+
+[population]
+pop_size = 100
+
+[gene-activation]
+activation_default = "sigmoid"
+activation_option_names = ['sigmoid', 'tanh', 'sin', 'gauss', 'relu', 'identity', 'inv', 'log', 'exp', 'abs', 'hat', 'square']
+activation_replace_rate = 0.1
+
+[gene-aggregation]
+aggregation_default = "sum"
+aggregation_option_names = ['sum', 'product', 'max', 'min', 'maxabs', 'median', 'mean']
+aggregation_replace_rate = 0.1
+
--- a/examples/evox_/bipedalwalker.py
+++ b/examples/evox_/bipedalwalker.py
@@ -0,0 +1,62 @@
+import evox
+import jax
+from jax import jit, vmap, numpy as jnp
+
+from configs import Configer
+from algorithms.neat import create_forward_function, topological_sort, unflatten_connections
+from evox_adaptor import NEAT, Gym
+
+if __name__ == '__main__':
+    batch_policy = True
+    key = jax.random.PRNGKey(42)
+
+    monitor = evox.monitors.StdSOMonitor()
+    neat_config = Configer.load_config('bipedalwalker.ini')
+    origin_forward_func = create_forward_function(neat_config)
+
+
+    def neat_transform(pop):
+        P = neat_config['pop_size']
+        N = neat_config['maximum_nodes']
+        C = neat_config['maximum_connections']
+
+        pop_nodes = pop[:P * N * 5].reshape((P, N, 5))
+        pop_cons = pop[P * N * 5:].reshape((P, C, 4))
+
+        u_pop_cons = vmap(unflatten_connections)(pop_nodes, pop_cons)
+        pop_seqs = vmap(topological_sort)(pop_nodes, u_pop_cons)
+        return pop_seqs, pop_nodes, u_pop_cons
+
+    # special policy for mountain car
+    def neat_forward(genome, x):
+        res = origin_forward_func(x, *genome)
+        out = jnp.tanh(res)  # (-1, 1)
+        return out
+
+
+    forward_func = lambda pop, x: origin_forward_func(x, *pop)
+
+    problem = Gym(
+        policy=jit(vmap(neat_forward)),
+        env_name="BipedalWalker-v3",
+        pop_size=100,
+    )
+
+    # create a pipeline
+    pipeline = evox.pipelines.StdPipeline(
+        algorithm=NEAT(neat_config),
+        problem=problem,
+        pop_transform=jit(neat_transform),
+        fitness_transform=monitor.record_fit,
+    )
+    # init the pipeline
+    state = pipeline.init(key)
+
+    # run the pipeline for 10 steps
+    for i in range(30):
+        state = pipeline.step(state)
+        print(i, monitor.get_min_fitness())
+
+    # obtain 98.91529684268514
+    min_fitness = monitor.get_min_fitness()
+    print(min_fitness)
--- a/examples/evox_/cartpole.ini
+++ b/examples/evox_/cartpole.ini
@@ -0,0 +1,11 @@
+[basic]
+num_inputs = 4
+num_outputs = 1
+maximum_nodes = 50
+maximum_connections = 50
+maximum_species = 10
+forward_way = "single"
+random_seed = 42
+
+[population]
+pop_size = 40
--- a/examples/evox_/cartpole.py
+++ b/examples/evox_/cartpole.py
@@ -0,0 +1,62 @@
+import evox
+import jax
+from jax import jit, vmap, numpy as jnp
+
+from configs import Configer
+from algorithms.neat import create_forward_function, topological_sort, unflatten_connections
+from evox_adaptor import NEAT, Gym
+
+if __name__ == '__main__':
+    batch_policy = True
+    key = jax.random.PRNGKey(42)
+
+    monitor = evox.monitors.StdSOMonitor()
+    neat_config = Configer.load_config('cartpole.ini')
+    origin_forward_func = create_forward_function(neat_config)
+
+
+    def neat_transform(pop):
+        P = neat_config['pop_size']
+        N = neat_config['maximum_nodes']
+        C = neat_config['maximum_connections']
+
+        pop_nodes = pop[:P * N * 5].reshape((P, N, 5))
+        pop_cons = pop[P * N * 5:].reshape((P, C, 4))
+
+        u_pop_cons = vmap(unflatten_connections)(pop_nodes, pop_cons)
+        pop_seqs = vmap(topological_sort)(pop_nodes, u_pop_cons)
+        return pop_seqs, pop_nodes, u_pop_cons
+
+    # special policy for cartpole
+    def neat_forward(genome, x):
+        res = origin_forward_func(x, *genome)[0]
+        out = jnp.where(res > 0.5, 1, 0)
+        return out
+
+
+    forward_func = lambda pop, x: origin_forward_func(x, *pop)
+
+    problem = Gym(
+        policy=jit(vmap(neat_forward)),
+        env_name="CartPole-v1",
+        pop_size=40,
+    )
+
+    # create a pipeline
+    pipeline = evox.pipelines.StdPipeline(
+        algorithm=NEAT(neat_config),
+        problem=problem,
+        pop_transform=jit(neat_transform),
+        fitness_transform=monitor.record_fit,
+    )
+    # init the pipeline
+    state = pipeline.init(key)
+
+    # run the pipeline for 10 steps
+    for i in range(10):
+        state = pipeline.step(state)
+        print(monitor.get_min_fitness())
+
+    # obtain 500
+    min_fitness = monitor.get_min_fitness()
+    print(min_fitness)
--- a/examples/evox_/mountain_car.ini
+++ b/examples/evox_/mountain_car.ini
@@ -0,0 +1,22 @@
+[basic]
+num_inputs = 2
+num_outputs = 1
+maximum_nodes = 50
+maximum_connections = 50
+maximum_species = 10
+forward_way = "single"
+random_seed = 42
+
+[population]
+pop_size = 100
+
+[gene-activation]
+activation_default = "sigmoid"
+activation_option_names = ['sigmoid', 'tanh', 'sin', 'gauss', 'relu', 'identity', 'inv', 'log', 'exp', 'abs', 'hat', 'square']
+activation_replace_rate = 0.1
+
+[gene-aggregation]
+aggregation_default = "sum"
+aggregation_option_names = ['sum', 'product', 'max', 'min', 'maxabs', 'median', 'mean']
+aggregation_replace_rate = 0.1
+
--- a/examples/evox_/mountain_car.py
+++ b/examples/evox_/mountain_car.py
@@ -0,0 +1,62 @@
+import evox
+import jax
+from jax import jit, vmap, numpy as jnp
+
+from configs import Configer
+from algorithms.neat import create_forward_function, topological_sort, unflatten_connections
+from evox_adaptor import NEAT, Gym
+
+if __name__ == '__main__':
+    batch_policy = True
+    key = jax.random.PRNGKey(42)
+
+    monitor = evox.monitors.StdSOMonitor()
+    neat_config = Configer.load_config('mountain_car.ini')
+    origin_forward_func = create_forward_function(neat_config)
+
+
+    def neat_transform(pop):
+        P = neat_config['pop_size']
+        N = neat_config['maximum_nodes']
+        C = neat_config['maximum_connections']
+
+        pop_nodes = pop[:P * N * 5].reshape((P, N, 5))
+        pop_cons = pop[P * N * 5:].reshape((P, C, 4))
+
+        u_pop_cons = vmap(unflatten_connections)(pop_nodes, pop_cons)
+        pop_seqs = vmap(topological_sort)(pop_nodes, u_pop_cons)
+        return pop_seqs, pop_nodes, u_pop_cons
+
+    # special policy for mountain car
+    def neat_forward(genome, x):
+        res = origin_forward_func(x, *genome)
+        out = jnp.tanh(res)  # (-1, 1)
+        return out
+
+
+    forward_func = lambda pop, x: origin_forward_func(x, *pop)
+
+    problem = Gym(
+        policy=jit(vmap(neat_forward)),
+        env_name="MountainCarContinuous-v0",
+        pop_size=100,
+    )
+
+    # create a pipeline
+    pipeline = evox.pipelines.StdPipeline(
+        algorithm=NEAT(neat_config),
+        problem=problem,
+        pop_transform=jit(neat_transform),
+        fitness_transform=monitor.record_fit,
+    )
+    # init the pipeline
+    state = pipeline.init(key)
+
+    # run the pipeline for 10 steps
+    for i in range(30):
+        state = pipeline.step(state)
+        print(i, monitor.get_min_fitness())
+
+    # obtain 98.91529684268514
+    min_fitness = monitor.get_min_fitness()
+    print(min_fitness)
--- a/examples/xor3d.ini
+++ b/examples/xor3d.ini
@@ -12,7 +12,7 @@ random_seed = 42
 fitness_threshold = 8
 generation_limit = 1000
 fitness_criterion = "max"
-pop_size = 100000
+pop_size = 10000

 [genome]
 compatibility_disjoint = 1.0