Using Evox to deal with RL tasks! With distributed Gym environment!

Three simple tasks in Gym[classical] are tested.
This commit is contained in:
wls2002
2023-07-04 15:44:08 +08:00
parent c4d34e877b
commit 7bf46575f4
18 changed files with 547 additions and 43 deletions

View File

View File

@@ -0,0 +1,22 @@
[basic]
num_inputs = 6
num_outputs = 3
maximum_nodes = 50
maximum_connections = 50
maximum_species = 10
forward_way = "single"
random_seed = 42
[population]
pop_size = 100
[gene-activation]
activation_default = "sigmoid"
activation_option_names = ['sigmoid', 'tanh', 'sin', 'gauss', 'relu', 'identity', 'inv', 'log', 'exp', 'abs', 'hat', 'square']
activation_replace_rate = 0.1
[gene-aggregation]
aggregation_default = "sum"
aggregation_option_names = ['sum', 'product', 'max', 'min', 'maxabs', 'median', 'mean']
aggregation_replace_rate = 0.1

62
examples/evox_/acrobot.py Normal file
View File

@@ -0,0 +1,62 @@
import evox
import jax
from jax import jit, vmap, numpy as jnp
from configs import Configer
from algorithms.neat import create_forward_function, topological_sort, unflatten_connections
from evox_adaptor import NEAT, Gym
if __name__ == '__main__':
batch_policy = True
key = jax.random.PRNGKey(42)
monitor = evox.monitors.StdSOMonitor()
neat_config = Configer.load_config('acrobot.ini')
origin_forward_func = create_forward_function(neat_config)
def neat_transform(pop):
P = neat_config['pop_size']
N = neat_config['maximum_nodes']
C = neat_config['maximum_connections']
pop_nodes = pop[:P * N * 5].reshape((P, N, 5))
pop_cons = pop[P * N * 5:].reshape((P, C, 4))
u_pop_cons = vmap(unflatten_connections)(pop_nodes, pop_cons)
pop_seqs = vmap(topological_sort)(pop_nodes, u_pop_cons)
return pop_seqs, pop_nodes, u_pop_cons
# special policy for mountain car
def neat_forward(genome, x):
res = origin_forward_func(x, *genome)
out = jnp.argmax(res) # {0, 1, 2}
return out
forward_func = lambda pop, x: origin_forward_func(x, *pop)
problem = Gym(
policy=jit(vmap(neat_forward)),
env_name="Acrobot-v1",
pop_size=100,
)
# create a pipeline
pipeline = evox.pipelines.StdPipeline(
algorithm=NEAT(neat_config),
problem=problem,
pop_transform=jit(neat_transform),
fitness_transform=monitor.record_fit,
)
# init the pipeline
state = pipeline.init(key)
# run the pipeline for 10 steps
for i in range(30):
state = pipeline.step(state)
print(i, monitor.get_min_fitness())
# obtain -62.0
min_fitness = monitor.get_min_fitness()
print(min_fitness)

View File

@@ -0,0 +1,22 @@
[basic]
num_inputs = 24
num_outputs = 4
maximum_nodes = 100
maximum_connections = 200
maximum_species = 10
forward_way = "single"
random_seed = 42
[population]
pop_size = 100
[gene-activation]
activation_default = "sigmoid"
activation_option_names = ['sigmoid', 'tanh', 'sin', 'gauss', 'relu', 'identity', 'inv', 'log', 'exp', 'abs', 'hat', 'square']
activation_replace_rate = 0.1
[gene-aggregation]
aggregation_default = "sum"
aggregation_option_names = ['sum', 'product', 'max', 'min', 'maxabs', 'median', 'mean']
aggregation_replace_rate = 0.1

View File

@@ -0,0 +1,62 @@
import evox
import jax
from jax import jit, vmap, numpy as jnp
from configs import Configer
from algorithms.neat import create_forward_function, topological_sort, unflatten_connections
from evox_adaptor import NEAT, Gym
if __name__ == '__main__':
batch_policy = True
key = jax.random.PRNGKey(42)
monitor = evox.monitors.StdSOMonitor()
neat_config = Configer.load_config('bipedalwalker.ini')
origin_forward_func = create_forward_function(neat_config)
def neat_transform(pop):
P = neat_config['pop_size']
N = neat_config['maximum_nodes']
C = neat_config['maximum_connections']
pop_nodes = pop[:P * N * 5].reshape((P, N, 5))
pop_cons = pop[P * N * 5:].reshape((P, C, 4))
u_pop_cons = vmap(unflatten_connections)(pop_nodes, pop_cons)
pop_seqs = vmap(topological_sort)(pop_nodes, u_pop_cons)
return pop_seqs, pop_nodes, u_pop_cons
# special policy for mountain car
def neat_forward(genome, x):
res = origin_forward_func(x, *genome)
out = jnp.tanh(res) # (-1, 1)
return out
forward_func = lambda pop, x: origin_forward_func(x, *pop)
problem = Gym(
policy=jit(vmap(neat_forward)),
env_name="BipedalWalker-v3",
pop_size=100,
)
# create a pipeline
pipeline = evox.pipelines.StdPipeline(
algorithm=NEAT(neat_config),
problem=problem,
pop_transform=jit(neat_transform),
fitness_transform=monitor.record_fit,
)
# init the pipeline
state = pipeline.init(key)
# run the pipeline for 10 steps
for i in range(30):
state = pipeline.step(state)
print(i, monitor.get_min_fitness())
# obtain 98.91529684268514
min_fitness = monitor.get_min_fitness()
print(min_fitness)

View File

@@ -0,0 +1,11 @@
[basic]
num_inputs = 4
num_outputs = 1
maximum_nodes = 50
maximum_connections = 50
maximum_species = 10
forward_way = "single"
random_seed = 42
[population]
pop_size = 40

View File

@@ -0,0 +1,62 @@
import evox
import jax
from jax import jit, vmap, numpy as jnp
from configs import Configer
from algorithms.neat import create_forward_function, topological_sort, unflatten_connections
from evox_adaptor import NEAT, Gym
if __name__ == '__main__':
batch_policy = True
key = jax.random.PRNGKey(42)
monitor = evox.monitors.StdSOMonitor()
neat_config = Configer.load_config('cartpole.ini')
origin_forward_func = create_forward_function(neat_config)
def neat_transform(pop):
P = neat_config['pop_size']
N = neat_config['maximum_nodes']
C = neat_config['maximum_connections']
pop_nodes = pop[:P * N * 5].reshape((P, N, 5))
pop_cons = pop[P * N * 5:].reshape((P, C, 4))
u_pop_cons = vmap(unflatten_connections)(pop_nodes, pop_cons)
pop_seqs = vmap(topological_sort)(pop_nodes, u_pop_cons)
return pop_seqs, pop_nodes, u_pop_cons
# special policy for cartpole
def neat_forward(genome, x):
res = origin_forward_func(x, *genome)[0]
out = jnp.where(res > 0.5, 1, 0)
return out
forward_func = lambda pop, x: origin_forward_func(x, *pop)
problem = Gym(
policy=jit(vmap(neat_forward)),
env_name="CartPole-v1",
pop_size=40,
)
# create a pipeline
pipeline = evox.pipelines.StdPipeline(
algorithm=NEAT(neat_config),
problem=problem,
pop_transform=jit(neat_transform),
fitness_transform=monitor.record_fit,
)
# init the pipeline
state = pipeline.init(key)
# run the pipeline for 10 steps
for i in range(10):
state = pipeline.step(state)
print(monitor.get_min_fitness())
# obtain 500
min_fitness = monitor.get_min_fitness()
print(min_fitness)

View File

@@ -0,0 +1,22 @@
[basic]
num_inputs = 2
num_outputs = 1
maximum_nodes = 50
maximum_connections = 50
maximum_species = 10
forward_way = "single"
random_seed = 42
[population]
pop_size = 100
[gene-activation]
activation_default = "sigmoid"
activation_option_names = ['sigmoid', 'tanh', 'sin', 'gauss', 'relu', 'identity', 'inv', 'log', 'exp', 'abs', 'hat', 'square']
activation_replace_rate = 0.1
[gene-aggregation]
aggregation_default = "sum"
aggregation_option_names = ['sum', 'product', 'max', 'min', 'maxabs', 'median', 'mean']
aggregation_replace_rate = 0.1

View File

@@ -0,0 +1,62 @@
import evox
import jax
from jax import jit, vmap, numpy as jnp
from configs import Configer
from algorithms.neat import create_forward_function, topological_sort, unflatten_connections
from evox_adaptor import NEAT, Gym
if __name__ == '__main__':
batch_policy = True
key = jax.random.PRNGKey(42)
monitor = evox.monitors.StdSOMonitor()
neat_config = Configer.load_config('mountain_car.ini')
origin_forward_func = create_forward_function(neat_config)
def neat_transform(pop):
P = neat_config['pop_size']
N = neat_config['maximum_nodes']
C = neat_config['maximum_connections']
pop_nodes = pop[:P * N * 5].reshape((P, N, 5))
pop_cons = pop[P * N * 5:].reshape((P, C, 4))
u_pop_cons = vmap(unflatten_connections)(pop_nodes, pop_cons)
pop_seqs = vmap(topological_sort)(pop_nodes, u_pop_cons)
return pop_seqs, pop_nodes, u_pop_cons
# special policy for mountain car
def neat_forward(genome, x):
res = origin_forward_func(x, *genome)
out = jnp.tanh(res) # (-1, 1)
return out
forward_func = lambda pop, x: origin_forward_func(x, *pop)
problem = Gym(
policy=jit(vmap(neat_forward)),
env_name="MountainCarContinuous-v0",
pop_size=100,
)
# create a pipeline
pipeline = evox.pipelines.StdPipeline(
algorithm=NEAT(neat_config),
problem=problem,
pop_transform=jit(neat_transform),
fitness_transform=monitor.record_fit,
)
# init the pipeline
state = pipeline.init(key)
# run the pipeline for 10 steps
for i in range(30):
state = pipeline.step(state)
print(i, monitor.get_min_fitness())
# obtain 98.91529684268514
min_fitness = monitor.get_min_fitness()
print(min_fitness)

View File

@@ -12,7 +12,7 @@ random_seed = 42
fitness_threshold = 8
generation_limit = 1000
fitness_criterion = "max"
pop_size = 100000
pop_size = 10000
[genome]
compatibility_disjoint = 1.0