add brax env

2023-10-17 20:20:03 +08:00
parent f217d87ac6
commit 7f042e07c2
9 changed files with 201 additions and 6 deletions
--- a/examples/brax/ant.py
+++ b/examples/brax/ant.py
@@ -0,0 +1,37 @@
+import jax.numpy as jnp
+
+from config import *
+from pipeline import Pipeline
+from algorithm import NEAT
+from algorithm.neat.gene import NormalGene, NormalGeneConfig
+from problem.rl_env import BraxEnv, BraxConfig
+
+
+def example_conf():
+    return Config(
+        basic=BasicConfig(
+            seed=42,
+            fitness_target=10000,
+            pop_size=100
+        ),
+        neat=NeatConfig(
+            inputs=27,
+            outputs=8,
+        ),
+        gene=NormalGeneConfig(
+            activation_default=Act.tanh,
+            activation_options=(Act.tanh,),
+        ),
+        problem=BraxConfig(
+        )
+    )
+
+
+if __name__ == '__main__':
+    conf = example_conf()
+
+    algorithm = NEAT(conf, NormalGene)
+    pipeline = Pipeline(conf, algorithm, BraxEnv)
+    state = pipeline.setup()
+    pipeline.pre_compile(state)
+    state, best = pipeline.auto_run(state)
--- a/examples/brax/half_cheetah.py
+++ b/examples/brax/half_cheetah.py
@@ -0,0 +1,41 @@
+import jax.numpy as jnp
+
+from config import *
+from pipeline import Pipeline
+from algorithm import NEAT
+from algorithm.neat.gene import NormalGene, NormalGeneConfig
+from problem.rl_env import BraxEnv, BraxConfig
+
+
+# ['ant', 'halfcheetah', 'hopper', 'humanoid', 'humanoidstandup', 'inverted_pendulum', 'inverted_double_pendulum', 'pusher', 'reacher', 'walker2d']
+
+
+def example_conf():
+    return Config(
+        basic=BasicConfig(
+            seed=42,
+            fitness_target=10000,
+            pop_size=10000
+        ),
+        neat=NeatConfig(
+            inputs=17,
+            outputs=6,
+        ),
+        gene=NormalGeneConfig(
+            activation_default=Act.tanh,
+            activation_options=(Act.tanh,),
+        ),
+        problem=BraxConfig(
+            env_name="halfcheetah"
+        )
+    )
+
+
+if __name__ == '__main__':
+    conf = example_conf()
+
+    algorithm = NEAT(conf, NormalGene)
+    pipeline = Pipeline(conf, algorithm, BraxEnv)
+    state = pipeline.setup()
+    pipeline.pre_compile(state)
+    state, best = pipeline.auto_run(state)
--- a/examples/brax/reacher.py
+++ b/examples/brax/reacher.py
@@ -0,0 +1,38 @@
+import jax.numpy as jnp
+
+from config import *
+from pipeline import Pipeline
+from algorithm import NEAT
+from algorithm.neat.gene import NormalGene, NormalGeneConfig
+from problem.rl_env import BraxEnv, BraxConfig
+
+
+def example_conf():
+    return Config(
+        basic=BasicConfig(
+            seed=42,
+            fitness_target=10000,
+            pop_size=10000
+        ),
+        neat=NeatConfig(
+            inputs=11,
+            outputs=2,
+        ),
+        gene=NormalGeneConfig(
+            activation_default=Act.tanh,
+            activation_options=(Act.tanh,),
+        ),
+        problem=BraxConfig(
+            env_name="reacher"
+        )
+    )
+
+
+if __name__ == '__main__':
+    conf = example_conf()
+
+    algorithm = NEAT(conf, NormalGene)
+    pipeline = Pipeline(conf, algorithm, BraxEnv)
+    state = pipeline.setup()
+    pipeline.pre_compile(state)
+    state, best = pipeline.auto_run(state)
--- a/examples/brax_env.py
+++ b/examples/brax_env.py
@@ -0,0 +1,36 @@
+import jax
+
+import brax
+from brax import envs
+
+
+def inference_func(key, *args):
+    return jax.random.normal(key, shape=(env.action_size,))
+
+
+env_name = "ant"
+backend = "generalized"
+
+env = envs.create(env_name=env_name, backend=backend)
+
+jit_env_reset = jax.jit(env.reset)
+jit_env_step = jax.jit(env.step)
+jit_inference_fn = jax.jit(inference_func)
+
+
+rollout = []
+rng = jax.random.PRNGKey(seed=1)
+ori_state = jit_env_reset(rng=rng)
+state = ori_state
+
+for _ in range(100):
+    rollout.append(state.pipeline_state)
+    act_rng, rng = jax.random.split(rng)
+    act = jit_inference_fn(act_rng, state.obs)
+    state = jit_env_step(state, act)
+    reward = state.reward
+    # print(reward)
+
+a = 1
+
+
--- a/examples/general_xor.py
+++ b/examples/general_xor.py
@@ -4,11 +4,6 @@ from algorithm import NEAT
 from algorithm.neat.gene import NormalGene, NormalGeneConfig
 from problem.func_fit import XOR, FuncFitConfig

-def evaluate():
-    pass
-
-
-
 if __name__ == '__main__':
    config = Config(
        basic=BasicConfig(