add brax env
This commit is contained in:
37
examples/brax/ant.py
Normal file
37
examples/brax/ant.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import jax.numpy as jnp
|
||||||
|
|
||||||
|
from config import *
|
||||||
|
from pipeline import Pipeline
|
||||||
|
from algorithm import NEAT
|
||||||
|
from algorithm.neat.gene import NormalGene, NormalGeneConfig
|
||||||
|
from problem.rl_env import BraxEnv, BraxConfig
|
||||||
|
|
||||||
|
|
||||||
|
def example_conf():
|
||||||
|
return Config(
|
||||||
|
basic=BasicConfig(
|
||||||
|
seed=42,
|
||||||
|
fitness_target=10000,
|
||||||
|
pop_size=100
|
||||||
|
),
|
||||||
|
neat=NeatConfig(
|
||||||
|
inputs=27,
|
||||||
|
outputs=8,
|
||||||
|
),
|
||||||
|
gene=NormalGeneConfig(
|
||||||
|
activation_default=Act.tanh,
|
||||||
|
activation_options=(Act.tanh,),
|
||||||
|
),
|
||||||
|
problem=BraxConfig(
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
conf = example_conf()
|
||||||
|
|
||||||
|
algorithm = NEAT(conf, NormalGene)
|
||||||
|
pipeline = Pipeline(conf, algorithm, BraxEnv)
|
||||||
|
state = pipeline.setup()
|
||||||
|
pipeline.pre_compile(state)
|
||||||
|
state, best = pipeline.auto_run(state)
|
||||||
41
examples/brax/half_cheetah.py
Normal file
41
examples/brax/half_cheetah.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
import jax.numpy as jnp
|
||||||
|
|
||||||
|
from config import *
|
||||||
|
from pipeline import Pipeline
|
||||||
|
from algorithm import NEAT
|
||||||
|
from algorithm.neat.gene import NormalGene, NormalGeneConfig
|
||||||
|
from problem.rl_env import BraxEnv, BraxConfig
|
||||||
|
|
||||||
|
|
||||||
|
# ['ant', 'halfcheetah', 'hopper', 'humanoid', 'humanoidstandup', 'inverted_pendulum', 'inverted_double_pendulum', 'pusher', 'reacher', 'walker2d']
|
||||||
|
|
||||||
|
|
||||||
|
def example_conf():
|
||||||
|
return Config(
|
||||||
|
basic=BasicConfig(
|
||||||
|
seed=42,
|
||||||
|
fitness_target=10000,
|
||||||
|
pop_size=10000
|
||||||
|
),
|
||||||
|
neat=NeatConfig(
|
||||||
|
inputs=17,
|
||||||
|
outputs=6,
|
||||||
|
),
|
||||||
|
gene=NormalGeneConfig(
|
||||||
|
activation_default=Act.tanh,
|
||||||
|
activation_options=(Act.tanh,),
|
||||||
|
),
|
||||||
|
problem=BraxConfig(
|
||||||
|
env_name="halfcheetah"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
conf = example_conf()
|
||||||
|
|
||||||
|
algorithm = NEAT(conf, NormalGene)
|
||||||
|
pipeline = Pipeline(conf, algorithm, BraxEnv)
|
||||||
|
state = pipeline.setup()
|
||||||
|
pipeline.pre_compile(state)
|
||||||
|
state, best = pipeline.auto_run(state)
|
||||||
38
examples/brax/reacher.py
Normal file
38
examples/brax/reacher.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
import jax.numpy as jnp
|
||||||
|
|
||||||
|
from config import *
|
||||||
|
from pipeline import Pipeline
|
||||||
|
from algorithm import NEAT
|
||||||
|
from algorithm.neat.gene import NormalGene, NormalGeneConfig
|
||||||
|
from problem.rl_env import BraxEnv, BraxConfig
|
||||||
|
|
||||||
|
|
||||||
|
def example_conf():
|
||||||
|
return Config(
|
||||||
|
basic=BasicConfig(
|
||||||
|
seed=42,
|
||||||
|
fitness_target=10000,
|
||||||
|
pop_size=10000
|
||||||
|
),
|
||||||
|
neat=NeatConfig(
|
||||||
|
inputs=11,
|
||||||
|
outputs=2,
|
||||||
|
),
|
||||||
|
gene=NormalGeneConfig(
|
||||||
|
activation_default=Act.tanh,
|
||||||
|
activation_options=(Act.tanh,),
|
||||||
|
),
|
||||||
|
problem=BraxConfig(
|
||||||
|
env_name="reacher"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
conf = example_conf()
|
||||||
|
|
||||||
|
algorithm = NEAT(conf, NormalGene)
|
||||||
|
pipeline = Pipeline(conf, algorithm, BraxEnv)
|
||||||
|
state = pipeline.setup()
|
||||||
|
pipeline.pre_compile(state)
|
||||||
|
state, best = pipeline.auto_run(state)
|
||||||
36
examples/brax_env.py
Normal file
36
examples/brax_env.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
import jax
|
||||||
|
|
||||||
|
import brax
|
||||||
|
from brax import envs
|
||||||
|
|
||||||
|
|
||||||
|
def inference_func(key, *args):
|
||||||
|
return jax.random.normal(key, shape=(env.action_size,))
|
||||||
|
|
||||||
|
|
||||||
|
env_name = "ant"
|
||||||
|
backend = "generalized"
|
||||||
|
|
||||||
|
env = envs.create(env_name=env_name, backend=backend)
|
||||||
|
|
||||||
|
jit_env_reset = jax.jit(env.reset)
|
||||||
|
jit_env_step = jax.jit(env.step)
|
||||||
|
jit_inference_fn = jax.jit(inference_func)
|
||||||
|
|
||||||
|
|
||||||
|
rollout = []
|
||||||
|
rng = jax.random.PRNGKey(seed=1)
|
||||||
|
ori_state = jit_env_reset(rng=rng)
|
||||||
|
state = ori_state
|
||||||
|
|
||||||
|
for _ in range(100):
|
||||||
|
rollout.append(state.pipeline_state)
|
||||||
|
act_rng, rng = jax.random.split(rng)
|
||||||
|
act = jit_inference_fn(act_rng, state.obs)
|
||||||
|
state = jit_env_step(state, act)
|
||||||
|
reward = state.reward
|
||||||
|
# print(reward)
|
||||||
|
|
||||||
|
a = 1
|
||||||
|
|
||||||
|
|
||||||
@@ -4,11 +4,6 @@ from algorithm import NEAT
|
|||||||
from algorithm.neat.gene import NormalGene, NormalGeneConfig
|
from algorithm.neat.gene import NormalGene, NormalGeneConfig
|
||||||
from problem.func_fit import XOR, FuncFitConfig
|
from problem.func_fit import XOR, FuncFitConfig
|
||||||
|
|
||||||
def evaluate():
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
config = Config(
|
config = Config(
|
||||||
basic=BasicConfig(
|
basic=BasicConfig(
|
||||||
|
|||||||
@@ -24,6 +24,8 @@ class Pipeline:
|
|||||||
self.algorithm = algorithm
|
self.algorithm = algorithm
|
||||||
self.problem = problem_type(config.problem)
|
self.problem = problem_type(config.problem)
|
||||||
|
|
||||||
|
print(self.problem.input_shape, self.problem.output_shape)
|
||||||
|
|
||||||
if isinstance(algorithm, NEAT):
|
if isinstance(algorithm, NEAT):
|
||||||
assert config.neat.inputs == self.problem.input_shape[-1], f"problem input shape {self.problem.input_shape}"
|
assert config.neat.inputs == self.problem.input_shape[-1], f"problem input shape {self.problem.input_shape}"
|
||||||
|
|
||||||
|
|||||||
@@ -1 +1,2 @@
|
|||||||
from .gymnax_env import GymNaxEnv, GymNaxConfig
|
from .gymnax_env import GymNaxEnv, GymNaxConfig
|
||||||
|
from .brax_env import BraxEnv, BraxConfig
|
||||||
|
|||||||
45
problem/rl_env/brax_env.py
Normal file
45
problem/rl_env/brax_env.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
|
import jax.numpy as jnp
|
||||||
|
from brax import envs
|
||||||
|
from core import State
|
||||||
|
from .rl_jit import RLEnv, RLEnvConfig
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class BraxConfig(RLEnvConfig):
|
||||||
|
env_name: str = "ant"
|
||||||
|
backend: str = "generalized"
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
# TODO: Check if env_name is registered
|
||||||
|
# assert self.env_name in gymnax.registered_envs, f"Env {self.env_name} not registered"
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class BraxEnv(RLEnv):
|
||||||
|
def __init__(self, config: BraxConfig = BraxConfig()):
|
||||||
|
super().__init__(config)
|
||||||
|
self.config = config
|
||||||
|
self.env = envs.create(env_name=config.env_name, backend=config.backend)
|
||||||
|
|
||||||
|
def env_step(self, randkey, env_state, action):
|
||||||
|
state = self.env.step(env_state, action)
|
||||||
|
return state.obs, state, state.reward, state.done.astype(jnp.bool_), state.info
|
||||||
|
|
||||||
|
def env_reset(self, randkey):
|
||||||
|
init_state = self.env.reset(randkey)
|
||||||
|
return init_state.obs, init_state
|
||||||
|
|
||||||
|
@property
|
||||||
|
def input_shape(self):
|
||||||
|
return (self.env.observation_size, )
|
||||||
|
|
||||||
|
@property
|
||||||
|
def output_shape(self):
|
||||||
|
return (self.env.action_size, )
|
||||||
|
|
||||||
|
def show(self, randkey, state: State, act_func: Callable, params):
|
||||||
|
# TODO
|
||||||
|
raise NotImplementedError("im busy! to de done!")
|
||||||
@@ -29,10 +29,10 @@ class RLEnv(Problem):
|
|||||||
def cond_func(carry):
|
def cond_func(carry):
|
||||||
_, _, _, done, _ = carry
|
_, _, _, done, _ = carry
|
||||||
return ~done
|
return ~done
|
||||||
|
|
||||||
def body_func(carry):
|
def body_func(carry):
|
||||||
obs, env_state, rng, _, tr = carry # total reward
|
obs, env_state, rng, _, tr = carry # total reward
|
||||||
net_out = act_func(state, obs, params)
|
net_out = act_func(state, obs, params)
|
||||||
|
|
||||||
action = self.config.output_transform(net_out)
|
action = self.config.output_transform(net_out)
|
||||||
next_obs, next_env_state, reward, done, _ = self.step(rng, env_state, action)
|
next_obs, next_env_state, reward, done, _ = self.step(rng, env_state, action)
|
||||||
next_rng, _ = jax.random.split(rng)
|
next_rng, _ = jax.random.split(rng)
|
||||||
|
|||||||
Reference in New Issue
Block a user