update "show" in brax

This commit is contained in:
wls2002
2024-06-16 22:28:28 +08:00
parent fb2ae5d2fa
commit 907314bc80
2 changed files with 44 additions and 20 deletions

View File

@@ -0,0 +1,19 @@
import jax
from problem.rl_env import BraxEnv
def random_policy(randkey, forward_func, obs):
return jax.random.uniform(randkey, (6,), minval=-1, maxval=1)
if __name__ == "__main__":
problem = BraxEnv(env_name="walker2d", max_step=1000, action_policy=random_policy)
state = problem.setup()
randkey = jax.random.key(0)
problem.show(
state,
randkey,
act_func=lambda state, params, obs: obs,
params=None,
save_path="walker2d_random_policy",
)

View File

@@ -9,6 +9,7 @@ class BraxEnv(RLEnv):
self, env_name: str = "ant", backend: str = "generalized", *args, **kwargs self, env_name: str = "ant", backend: str = "generalized", *args, **kwargs
): ):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.env_name = env_name
self.env = envs.create(env_name=env_name, backend=backend) self.env = envs.create(env_name=env_name, backend=backend)
def env_step(self, randkey, env_state, action): def env_step(self, randkey, env_state, action):
@@ -34,45 +35,49 @@ class BraxEnv(RLEnv):
act_func, act_func,
params, params,
save_path=None, save_path=None,
height=512, height=480,
width=512, width=480,
duration=0.1,
*args, *args,
**kwargs **kwargs,
): ):
import jax import jax
import imageio import imageio
import numpy as np
from brax.io import image from brax.io import image
from tqdm import tqdm
obs, env_state = self.reset(randkey) obs, env_state = self.reset(randkey)
reward, done = 0.0, False reward, done = 0.0, False
state_histories = [] state_histories = [env_state.pipeline_state]
def step(key, env_state, obs): def step(key, env_state, obs):
key, _ = jax.random.split(key) key, _ = jax.random.split(key)
action = act_func(params, obs)
if self.action_policy is not None:
forward_func = lambda obs: act_func(state, params, obs)
action = self.action_policy(key, forward_func, obs)
else:
action = act_func(state, params, obs)
obs, env_state, r, done, _ = self.step(randkey, env_state, action) obs, env_state, r, done, _ = self.step(randkey, env_state, action)
return key, env_state, obs, r, done return key, env_state, obs, r, done
while not done: jit_step = jax.jit(step)
for _ in range(self.max_step):
key, env_state, obs, r, done = jit_step(randkey, env_state, obs)
state_histories.append(env_state.pipeline_state) state_histories.append(env_state.pipeline_state)
key, env_state, obs, r, done = jax.jit(step)(randkey, env_state, obs)
reward += r reward += r
if done:
break
imgs = [ imgs = image.render_array(
image.render_array(sys=self.env.sys, state=s, width=width, height=height) sys=self.env.sys, trajectory=state_histories, height=height, width=width
for s in tqdm(state_histories, desc="Rendering") )
]
def create_gif(image_list, gif_name, duration): if save_path is None:
with imageio.get_writer(gif_name, mode="I", duration=duration) as writer: save_path = f"{self.env_name}.gif"
for image in image_list:
formatted_image = np.array(image, dtype=np.uint8) imageio.mimsave(save_path, imgs, *args, **kwargs)
writer.append_data(formatted_image)
create_gif(imgs, save_path, duration=0.1)
print("Gif saved to: ", save_path) print("Gif saved to: ", save_path)
print("Total reward: ", reward) print("Total reward: ", reward)