diff --git a/tensorneat/examples/func_fit/xor_kan.py b/tensorneat/examples/func_fit/xor_kan.py index 5bdd1ed..60c5feb 100644 --- a/tensorneat/examples/func_fit/xor_kan.py +++ b/tensorneat/examples/func_fit/xor_kan.py @@ -16,7 +16,7 @@ if __name__ == "__main__": max_nodes=50, max_conns=100, node_gene=KANNode(), - conn_gene=BSplineConn(grid_cnt=10), + conn_gene=BSplineConn(grid_cnt=6), output_transform=Act.sigmoid, # the activation function for output node mutation=DefaultMutation( node_add=0.1, diff --git a/tensorneat/examples/gymnax/cartpole.py b/tensorneat/examples/gymnax/cartpole.py index 16fcbe5..1368ee8 100644 --- a/tensorneat/examples/gymnax/cartpole.py +++ b/tensorneat/examples/gymnax/cartpole.py @@ -5,6 +5,11 @@ from algorithm.neat import * from problem.rl_env import GymNaxEnv + +def action_policy(forward_func, obs): + return jnp.argmax(forward_func(obs)) + + if __name__ == "__main__": pipeline = Pipeline( algorithm=NEAT( @@ -14,18 +19,15 @@ if __name__ == "__main__": num_outputs=2, max_nodes=50, max_conns=100, - output_transform=lambda out: jnp.argmax( - out - ), # the action of cartpole is {0, 1} + # output_transform=lambda out: jnp.argmax( + # out + # ), # the action of cartpole is {0, 1} ), pop_size=10000, species_size=10, ), ), - problem=GymNaxEnv( - env_name="CartPole-v1", - repeat_times=5 - ), + problem=GymNaxEnv(env_name="CartPole-v1", repeat_times=5, action_policy=action_policy), generation_limit=10000, fitness_target=500, ) diff --git a/tensorneat/examples/jumanji/2048.py b/tensorneat/examples/jumanji/2048.py deleted file mode 100644 index 39ecd00..0000000 --- a/tensorneat/examples/jumanji/2048.py +++ /dev/null @@ -1,46 +0,0 @@ -import jax.numpy as jnp - -from pipeline import Pipeline -from algorithm.neat import * - -from problem.rl_env.jumanji.jumanji_2048 import Jumanji_2048 -from utils import Act, Agg - -if __name__ == "__main__": - pipeline = Pipeline( - algorithm=NEAT( - species=DefaultSpecies( - genome=DefaultGenome( - num_inputs=16, - num_outputs=4, - max_nodes=100, - max_conns=1000, - node_gene=DefaultNodeGene( - activation_default=Act.sigmoid, - activation_options=(Act.sigmoid, Act.relu, Act.tanh, Act.identity, Act.inv), - aggregation_default=Agg.sum, - aggregation_options=(Agg.sum, Agg.mean, Agg.max, Agg.product), - ), - mutation=DefaultMutation( - node_add=0.03, - conn_add=0.03, - ) - ), - pop_size=10000, - species_size=100, - survival_threshold=0.01, - ), - ), - problem=Jumanji_2048( - max_step=10000, - repeat_times=5 - ), - generation_limit=10000, - fitness_target=13000, - ) - - # initialize state - state = pipeline.setup() - # print(state) - # run until terminate - state, best = pipeline.auto_run(state) diff --git a/tensorneat/examples/jumanji/2048_random_policy.py b/tensorneat/examples/jumanji/2048_random_policy.py new file mode 100644 index 0000000..6f7172b --- /dev/null +++ b/tensorneat/examples/jumanji/2048_random_policy.py @@ -0,0 +1,25 @@ +import jax, jax.numpy as jnp +import jax.random +from problem.rl_env.jumanji.jumanji_2048 import Jumanji_2048 + + +def random_policy(state, params, obs): + # key = jax.random.key(obs.sum()) + # actions = jax.random.normal(key, (4,)) + # actions = actions.at[2:].set(-9999) + return jnp.array([4, 4, 0, 1]) + # return jnp.array([1, 2, 3, 4]) + return actions + + +if __name__ == "__main__": + problem = Jumanji_2048( + max_step=10000, repeat_times=1000, guarantee_invalid_action=True + ) + state = problem.setup() + jit_evaluate = jax.jit( + lambda state, randkey: problem.evaluate(state, randkey, random_policy, None) + ) + randkey = jax.random.PRNGKey(0) + reward = jit_evaluate(state, randkey) + print(reward) diff --git a/tensorneat/examples/jumanji/2048_test.ipynb b/tensorneat/examples/jumanji/2048_test.ipynb index e779bfd..aec52d5 100644 --- a/tensorneat/examples/jumanji/2048_test.ipynb +++ b/tensorneat/examples/jumanji/2048_test.ipynb @@ -2,13 +2,13 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { - "end_time": "2024-06-05T05:07:22.736605400Z", - "start_time": "2024-06-05T05:06:39.100164300Z" + "end_time": "2024-06-05T07:40:13.841629100Z", + "start_time": "2024-06-05T07:40:13.076164500Z" } }, "outputs": [ @@ -17,26 +17,7 @@ "output_type": "stream", "text": [ "initializing\n", - "initializing finished\n", - "start compile\n", - "compile finished, cost time: 18.307454s\n", - "Generation: 1.0, Cost time: 4551.03ms\n", - " \tnode counts: max: 21, min: 21, mean: 21.00\n", - " \tconn counts: max: 20, min: 20, mean: 20.00\n", - " \tspecies: 1, [10000]\n", - " \tfitness: valid cnt: 10000, max: 10124.0000, min: 44.0000, mean: 1758.1263, std: 1212.6823\n", - "Generation: 2.0, Cost time: 4636.33ms\n", - " \tnode counts: max: 22, min: 21, mean: 21.03\n", - " \tconn counts: max: 22, min: 20, mean: 20.05\n", - " \tspecies: 1, [10000]\n", - " \tfitness: valid cnt: 10000, max: 11000.0000, min: 48.0000, mean: 1870.1300, std: 1263.3086\n", - "Generation: 3.0, Cost time: 6271.12ms\n", - " \tnode counts: max: 23, min: 21, mean: 21.03\n", - " \tconn counts: max: 22, min: 20, mean: 20.05\n", - " \tspecies: 1, [10000]\n", - " \tfitness: valid cnt: 10000, max: 14624.0000, min: 28.0000, mean: 1943.9924, std: 1293.7146\n", - "\n", - "Fitness limit reached!\n" + "initializing finished\n" ] } ], @@ -45,84 +26,100 @@ "\n", "from pipeline import Pipeline\n", "from algorithm.neat import *\n", + "from algorithm.neat.gene.node.default_without_response import NodeGeneWithoutResponse\n", "\n", "from problem.rl_env.jumanji.jumanji_2048 import Jumanji_2048\n", "from utils import Act, Agg\n", "\n", - "if __name__ == \"__main__\":\n", - " pipeline = Pipeline(\n", - " algorithm=NEAT(\n", - " species=DefaultSpecies(\n", - " genome=DefaultGenome(\n", - " num_inputs=16,\n", - " num_outputs=4,\n", - " max_nodes=100,\n", - " max_conns=1000,\n", - " node_gene=DefaultNodeGene(\n", - " activation_default=Act.sigmoid,\n", - " activation_options=(Act.sigmoid, Act.relu, Act.tanh, Act.identity, Act.inv),\n", - " aggregation_default=Agg.sum,\n", - " aggregation_options=(Agg.sum, Agg.mean, Agg.max, Agg.product),\n", + "pipeline = Pipeline(\n", + " algorithm=NEAT(\n", + " species=DefaultSpecies(\n", + " genome=DefaultGenome(\n", + " num_inputs=16,\n", + " num_outputs=4,\n", + " max_nodes=100,\n", + " max_conns=1000,\n", + " node_gene=NodeGeneWithoutResponse(\n", + " activation_default=Act.sigmoid,\n", + " activation_options=(\n", + " Act.sigmoid,\n", + " Act.relu,\n", + " Act.tanh,\n", + " Act.identity,\n", " ),\n", - " mutation=DefaultMutation(\n", - " node_add=0.03,\n", - " conn_add=0.03,\n", - " )\n", + " aggregation_default=Agg.sum,\n", + " aggregation_options=(Agg.sum,),\n", + " activation_replace_rate=0.02,\n", + " aggregation_replace_rate=0.02,\n", + " bias_mutate_rate=0.03,\n", + " bias_init_std=0.5,\n", + " bias_mutate_power=0.2,\n", + " bias_replace_rate=0.01,\n", + " ),\n", + " conn_gene=DefaultConnGene(\n", + " weight_mutate_rate=0.015,\n", + " weight_replace_rate=0.003,\n", + " weight_mutate_power=0.5,\n", + " ),\n", + " mutation=DefaultMutation(\n", + " node_add=0.1, conn_add=0.2, conn_delete=0.2\n", " ),\n", - " pop_size=10000,\n", - " species_size=100,\n", - " survival_threshold=0.01,\n", " ),\n", + " pop_size=1000,\n", + " species_size=5,\n", + " survival_threshold=0.1,\n", + " max_stagnation=7,\n", + " genome_elitism=3,\n", + " compatibility_threshold=1.2,\n", " ),\n", - " problem=Jumanji_2048(\n", - " max_step=1000,\n", - " ),\n", - " generation_limit=10000,\n", - " fitness_target=13000,\n", - " )\n", - "\n", - " # initialize state\n", - " state = pipeline.setup()\n", - " # print(state)\n", - " # run until terminate\n", - " state, best = pipeline.auto_run(state)" + " ),\n", + " problem=Jumanji_2048(max_step=10000, repeat_times=5),\n", + " generation_limit=100,\n", + " fitness_target=13000,\n", + " save_path=\"2048.pkl\",\n", + ")\n", + "state = pipeline.setup()" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "outputs": [], "source": [ - "genome = pipeline.algorithm.genome" + "import numpy as np\n", + "\n", + "data = np.load('2048.npz')\n", + "nodes, conns = data['nodes'], data['conns']" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-06-05T05:08:14.332101Z", - "start_time": "2024-06-05T05:08:14.324101300Z" + "end_time": "2024-06-05T07:40:13.932015100Z", + "start_time": "2024-06-05T07:40:13.876631500Z" } }, "id": "a0915ecf8179f347" }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, "outputs": [], "source": [ - "transformed = genome.transform(state, *best)" + "genome = pipeline.algorithm.species.genome\n", + "transformed = genome.transform(state, nodes, conns)" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-06-05T05:08:49.132030500Z", - "start_time": "2024-06-05T05:08:48.495809200Z" + "end_time": "2024-06-05T07:40:14.585804800Z", + "start_time": "2024-06-05T07:40:14.568805Z" } }, "id": "cd1fa65e8a9d6e13" }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "outputs": [], "source": [ "def policy(board):\n", @@ -132,1089 +129,1229 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-06-05T05:09:32.355055100Z", - "start_time": "2024-06-05T05:09:32.350057Z" + "end_time": "2024-06-05T07:40:15.124383600Z", + "start_time": "2024-06-05T07:40:15.118384200Z" } }, "id": "61bc1895af304651" }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 14, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 0],\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [0, 0, 1, 0],\n", + " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(2, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", " [0, 0, 0, 0],\n", " [0, 0, 0, 0],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(2, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 2],\n", - " [0, 1, 0, 0],\n", - " [0, 0, 0, 0],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(4, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 1, 2],\n", - " [0, 0, 0, 0],\n", - " [0, 0, 0, 0],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(4, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 2, 2],\n", - " [0, 0, 0, 0],\n", - " [0, 0, 0, 0],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(4, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 3],\n", - " [0, 0, 0, 0],\n", - " [2, 0, 0, 0],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 0, 1, 3],\n", + " [1, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(2, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", " [0, 0, 0, 0],\n", " [0, 0, 0, 1],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 0, 1, 3],\n", - " [0, 0, 0, 1],\n", - " [0, 0, 0, 0],\n", - " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 1, 3],\n", - " [0, 0, 0, 1],\n", - " [0, 0, 0, 0],\n", - " [0, 0, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 1, 3],\n", - " [0, 0, 2, 1],\n", - " [0, 1, 0, 0],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 2, 1, 3],\n", - " [1, 0, 2, 1],\n", - " [0, 0, 0, 0],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 3, 1, 3],\n", - " [0, 1, 2, 1],\n", - " [0, 1, 0, 0],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 3, 1, 3],\n", - " [0, 2, 2, 1],\n", + " [1, 1, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(2, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", " [1, 0, 0, 0],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 3],\n", - " [0, 2, 2, 1],\n", " [0, 0, 0, 0],\n", - " [0, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 3],\n", - " [0, 2, 2, 1],\n", - " [0, 0, 1, 0],\n", - " [0, 0, 2, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 3],\n", - " [0, 0, 3, 1],\n", - " [0, 0, 1, 1],\n", - " [0, 0, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 3],\n", - " [0, 0, 3, 2],\n", - " [0, 0, 1, 2],\n", - " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 3],\n", - " [0, 0, 3, 3],\n", - " [0, 1, 1, 0],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", - " [0, 1, 3, 1],\n", - " [0, 0, 1, 0],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", - " [0, 1, 3, 1],\n", - " [0, 0, 0, 1],\n", - " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", - " [0, 2, 3, 2],\n", + " [1, 1, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(2, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 1, 0, 0],\n", " [0, 0, 0, 0],\n", - " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", - " [0, 2, 3, 2],\n", + " [2, 1, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(4, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [2, 2, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(4, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 2, 0, 0],\n", + " [0, 1, 0, 0],\n", + " [2, 2, 1, 1]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(4, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 0, 1, 2],\n", " [0, 0, 0, 1],\n", - " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", - " [1, 2, 3, 2],\n", - " [0, 0, 0, 1],\n", - " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", - " [1, 2, 3, 2],\n", - " [0, 0, 0, 2],\n", - " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", - " [1, 2, 3, 3],\n", - " [0, 0, 0, 1],\n", - " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", - " [1, 2, 3, 3],\n", - " [0, 1, 0, 1],\n", - " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", - " [1, 2, 3, 3],\n", - " [0, 2, 1, 1],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", - " [1, 3, 3, 3],\n", - " [0, 0, 1, 1],\n", - " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 4],\n", - " [1, 0, 3, 3],\n", - " [0, 1, 1, 2],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 4],\n", - " [1, 1, 3, 3],\n", - " [0, 0, 1, 2],\n", - " [0, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 4],\n", - " [1, 1, 3, 3],\n", - " [0, 1, 2, 2],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 4],\n", - " [1, 2, 3, 3],\n", - " [0, 1, 2, 2],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 4],\n", - " [1, 1, 2, 4],\n", - " [0, 0, 1, 3],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", - " [1, 1, 2, 3],\n", - " [0, 1, 1, 0],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", - " [1, 2, 2, 3],\n", - " [0, 0, 1, 0],\n", - " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", - " [1, 2, 2, 3],\n", - " [0, 1, 1, 0],\n", - " [0, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", - " [1, 2, 2, 3],\n", - " [0, 1, 2, 1],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", - " [1, 2, 3, 3],\n", - " [0, 1, 1, 1],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", - " [1, 1, 2, 4],\n", - " [0, 0, 1, 2],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", - " [0, 2, 2, 4],\n", - " [0, 0, 1, 2],\n", - " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", - " [1, 2, 2, 4],\n", - " [0, 0, 1, 2],\n", - " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", - " [2, 2, 2, 4],\n", - " [1, 0, 1, 2],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", - " [1, 2, 2, 4],\n", - " [0, 0, 1, 2],\n", - " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", - " [1, 2, 2, 4],\n", - " [0, 1, 1, 2],\n", - " [0, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", - " [1, 2, 2, 4],\n", - " [0, 1, 2, 2],\n", - " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", - " [2, 2, 3, 4],\n", - " [1, 1, 0, 2],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", - " [0, 3, 3, 4],\n", - " [0, 0, 2, 2],\n", - " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", - " [1, 3, 3, 4],\n", - " [0, 0, 2, 2],\n", - " [0, 0, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", - " [0, 1, 4, 4],\n", - " [0, 1, 0, 3],\n", - " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", - " [0, 2, 4, 4],\n", - " [0, 1, 0, 3],\n", - " [0, 0, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", - " [0, 0, 2, 5],\n", - " [1, 0, 1, 3],\n", - " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(64., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [1, 0, 2, 3],\n", - " [0, 1, 1, 1],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [1, 1, 2, 3],\n", - " [0, 0, 1, 1],\n", - " [2, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [1, 1, 2, 3],\n", - " [2, 0, 1, 1],\n", - " [0, 2, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [1, 1, 2, 3],\n", - " [2, 2, 1, 1],\n", - " [0, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [1, 1, 2, 3],\n", - " [2, 2, 2, 1],\n", - " [0, 0, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [1, 1, 3, 3],\n", - " [2, 2, 2, 1],\n", - " [0, 0, 0, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(28., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [0, 1, 2, 4],\n", - " [0, 2, 3, 1],\n", - " [0, 0, 0, 2]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [1, 2, 4, 0],\n", - " [2, 3, 1, 1],\n", - " [2, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [1, 2, 4, 1],\n", - " [3, 3, 1, 0],\n", - " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [1, 2, 4, 2],\n", - " [3, 3, 1, 0],\n", - " [0, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [1, 2, 4, 2],\n", - " [3, 3, 2, 0],\n", - " [0, 0, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [1, 2, 4, 2],\n", - " [0, 0, 4, 2],\n", - " [0, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(40., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [1, 2, 5, 3],\n", - " [0, 1, 1, 1],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [1, 2, 5, 3],\n", - " [0, 0, 1, 2],\n", - " [0, 0, 0, 1]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [1, 2, 5, 3],\n", - " [1, 2, 1, 0],\n", - " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [2, 3, 5, 3],\n", - " [1, 1, 1, 0],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [2, 3, 5, 3],\n", - " [0, 0, 1, 2],\n", - " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [2, 3, 5, 3],\n", - " [0, 1, 1, 2],\n", - " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", - " [2, 3, 5, 3],\n", - " [1, 2, 1, 2],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, False, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 0, 0],\n", - " [3, 4, 1, 6],\n", - " [2, 3, 5, 3],\n", - " [1, 2, 1, 2]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 1, 6],\n", - " [2, 4, 5, 3],\n", - " [1, 3, 1, 2],\n", - " [0, 2, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 1, 6],\n", - " [2, 4, 5, 3],\n", - " [1, 3, 1, 2],\n", - " [1, 0, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 1, 6],\n", - " [2, 4, 5, 3],\n", - " [2, 3, 1, 2],\n", - " [0, 1, 2, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 1, 6],\n", - " [3, 4, 5, 3],\n", - " [1, 3, 1, 2],\n", - " [0, 1, 2, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [1, 4, 5, 3],\n", - " [0, 3, 1, 2],\n", - " [2, 1, 2, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [1, 4, 5, 3],\n", - " [2, 3, 1, 2],\n", - " [1, 1, 2, 1]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [1, 4, 5, 3],\n", - " [2, 3, 1, 2],\n", - " [2, 2, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [1, 4, 5, 3],\n", - " [3, 3, 2, 2],\n", - " [1, 2, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [1, 4, 5, 3],\n", - " [0, 1, 4, 3],\n", - " [0, 1, 2, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [1, 4, 5, 4],\n", - " [1, 2, 4, 1],\n", - " [0, 0, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [2, 4, 5, 4],\n", - " [0, 2, 4, 1],\n", - " [0, 0, 2, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [2, 4, 5, 4],\n", - " [0, 2, 4, 2],\n", - " [0, 0, 2, 1]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [2, 4, 5, 4],\n", - " [2, 4, 2, 0],\n", - " [2, 1, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(40., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [3, 5, 5, 4],\n", - " [2, 1, 2, 0],\n", - " [0, 1, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [3, 5, 5, 4],\n", - " [2, 2, 2, 0],\n", - " [0, 0, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [3, 5, 5, 4],\n", - " [2, 2, 2, 1],\n", - " [0, 2, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [3, 5, 5, 4],\n", - " [2, 3, 2, 1],\n", - " [0, 1, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(68., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [0, 3, 6, 4],\n", - " [2, 3, 2, 1],\n", - " [0, 1, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [2, 4, 6, 4],\n", - " [1, 1, 2, 1],\n", - " [0, 0, 0, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [2, 4, 6, 4],\n", - " [0, 2, 2, 1],\n", - " [0, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [2, 4, 6, 4],\n", - " [1, 0, 3, 1],\n", - " [0, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [2, 4, 6, 4],\n", - " [0, 1, 3, 1],\n", - " [0, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [2, 4, 6, 4],\n", - " [0, 2, 3, 1],\n", - " [0, 2, 1, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [2, 4, 6, 4],\n", - " [0, 3, 3, 1],\n", - " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [2, 4, 6, 4],\n", - " [1, 3, 3, 1],\n", - " [0, 1, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [2, 4, 6, 4],\n", - " [2, 1, 4, 1],\n", - " [0, 0, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [3, 4, 6, 4],\n", - " [1, 1, 4, 1],\n", - " [0, 0, 2, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [3, 4, 6, 4],\n", - " [0, 2, 4, 1],\n", - " [0, 0, 1, 3]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [3, 4, 6, 4],\n", - " [2, 4, 1, 0],\n", - " [1, 3, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [3, 5, 6, 4],\n", - " [2, 3, 1, 1],\n", - " [1, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [3, 5, 6, 4],\n", - " [2, 3, 1, 2],\n", - " [1, 0, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [3, 5, 6, 4],\n", - " [2, 3, 1, 2],\n", - " [0, 0, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [3, 5, 6, 4],\n", - " [2, 3, 1, 3],\n", - " [0, 0, 2, 1]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [3, 5, 6, 4],\n", - " [2, 3, 1, 3],\n", - " [2, 1, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [3, 5, 6, 4],\n", - " [3, 3, 2, 3],\n", - " [0, 1, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", - " [4, 5, 6, 4],\n", - " [1, 3, 3, 3],\n", - " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 2, 1, 6],\n", - " [1, 5, 6, 4],\n", - " [0, 3, 3, 3],\n", - " [2, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 2, 1, 6],\n", - " [1, 5, 6, 4],\n", - " [2, 3, 3, 3],\n", - " [0, 1, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 2, 1, 6],\n", - " [1, 5, 6, 4],\n", - " [0, 2, 3, 4],\n", - " [1, 0, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 2, 1, 6],\n", - " [2, 5, 6, 5],\n", - " [1, 2, 3, 2],\n", - " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, False, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + " [0, 0, 3, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", - " [5, 2, 1, 6],\n", - " [2, 5, 6, 5],\n", - " [1, 2, 3, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 2, 2, 6],\n", - " [2, 5, 6, 5],\n", - " [1, 2, 3, 2],\n", - " [0, 1, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 5, 3, 6],\n", - " [2, 5, 6, 5],\n", - " [1, 2, 3, 2],\n", - " [0, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(64., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 6, 3, 6],\n", - " [1, 2, 6, 5],\n", - " [0, 1, 3, 2],\n", - " [0, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 6, 3, 6],\n", - " [1, 2, 6, 5],\n", - " [0, 2, 3, 2],\n", - " [0, 0, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 6, 3, 6],\n", - " [1, 3, 6, 5],\n", - " [0, 0, 3, 2],\n", - " [1, 0, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 6, 3, 6],\n", - " [2, 3, 6, 5],\n", - " [1, 0, 3, 2],\n", - " [0, 0, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", - " [1, 3, 6, 5],\n", - " [0, 0, 3, 2],\n", - " [1, 0, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", - " [2, 3, 6, 5],\n", - " [0, 1, 3, 2],\n", - " [0, 0, 1, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", - " [2, 3, 6, 5],\n", - " [0, 1, 3, 2],\n", - " [0, 0, 1, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", - " [2, 3, 6, 5],\n", - " [1, 1, 3, 3],\n", - " [0, 0, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", - " [2, 3, 6, 5],\n", - " [0, 0, 2, 4],\n", - " [0, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", - " [2, 3, 6, 5],\n", - " [1, 1, 2, 4],\n", - " [0, 0, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", - " [2, 3, 6, 5],\n", - " [0, 2, 2, 4],\n", - " [1, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", - " [2, 3, 6, 5],\n", - " [1, 2, 2, 4],\n", - " [0, 1, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", - " [2, 3, 6, 5],\n", - " [2, 1, 3, 4],\n", - " [0, 0, 0, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", - " [3, 3, 6, 5],\n", - " [0, 1, 3, 4],\n", - " [0, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [0, 3, 6, 5],\n", - " [0, 1, 3, 4],\n", - " [0, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [0, 3, 6, 5],\n", - " [0, 2, 3, 4],\n", - " [0, 2, 1, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [0, 3, 6, 5],\n", - " [0, 3, 3, 4],\n", - " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [1, 4, 6, 5],\n", - " [0, 2, 3, 4],\n", - " [0, 0, 1, 2]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [1, 4, 6, 5],\n", - " [2, 3, 4, 0],\n", - " [1, 2, 2, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [1, 4, 6, 5],\n", - " [0, 2, 3, 4],\n", - " [1, 0, 1, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [2, 4, 6, 5],\n", - " [1, 2, 3, 4],\n", - " [0, 0, 1, 3]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [2, 4, 6, 5],\n", - " [1, 2, 3, 4],\n", - " [1, 3, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [2, 4, 6, 5],\n", - " [2, 2, 3, 4],\n", - " [1, 3, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [3, 4, 6, 5],\n", - " [1, 2, 3, 4],\n", - " [1, 3, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [3, 4, 6, 5],\n", - " [2, 2, 3, 4],\n", - " [0, 3, 1, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [3, 4, 6, 5],\n", - " [0, 3, 3, 4],\n", - " [1, 0, 3, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [3, 4, 6, 5],\n", - " [1, 3, 4, 4],\n", - " [0, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [3, 4, 6, 5],\n", - " [0, 1, 3, 5],\n", - " [0, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(68., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", - " [3, 4, 6, 6],\n", - " [0, 2, 3, 2],\n", - " [2, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(128., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [2, 2, 3, 0],\n", - " [0, 1, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [0, 2, 3, 3],\n", - " [0, 0, 0, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [0, 0, 2, 4],\n", - " [0, 1, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [0, 1, 2, 4],\n", - " [0, 1, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [0, 2, 2, 4],\n", - " [0, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [0, 0, 3, 4],\n", - " [0, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [0, 1, 3, 4],\n", - " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [1, 1, 3, 4],\n", - " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [2, 1, 3, 4],\n", - " [1, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [2, 1, 3, 4],\n", - " [0, 1, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [2, 2, 3, 4],\n", - " [0, 1, 2, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [1, 3, 3, 4],\n", - " [0, 0, 1, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [0, 1, 4, 4],\n", - " [0, 1, 1, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [0, 2, 4, 4],\n", - " [1, 0, 1, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [1, 2, 4, 4],\n", - " [0, 1, 1, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [2, 1, 2, 5],\n", - " [0, 0, 2, 3]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [2, 1, 3, 5],\n", - " [1, 0, 0, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [2, 1, 3, 5],\n", - " [0, 1, 1, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [2, 2, 3, 5],\n", - " [0, 1, 1, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [1, 3, 3, 5],\n", - " [0, 0, 2, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [0, 1, 4, 5],\n", - " [0, 2, 2, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [0, 1, 4, 5],\n", - " [1, 0, 3, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [1, 1, 4, 5],\n", - " [0, 1, 3, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [1, 2, 4, 5],\n", - " [0, 1, 3, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [1, 2, 4, 5],\n", - " [0, 1, 1, 4]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [1, 2, 4, 5],\n", - " [0, 2, 2, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [1, 3, 4, 5],\n", - " [1, 0, 2, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [2, 3, 4, 5],\n", - " [0, 1, 2, 4]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [2, 3, 4, 5],\n", - " [1, 2, 4, 1]], dtype=int32), action_mask=Array([ True, False, True, False], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [2, 3, 5, 5],\n", - " [1, 2, 1, 1]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(68., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 6, 2],\n", - " [2, 3, 6, 1],\n", - " [1, 2, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(128., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 7, 2],\n", - " [2, 3, 2, 1],\n", - " [1, 2, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 7, 2],\n", - " [2, 3, 3, 1],\n", - " [1, 2, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 7, 2],\n", - " [2, 3, 3, 2],\n", - " [1, 2, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 7, 3],\n", - " [2, 3, 3, 1],\n", - " [1, 2, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 7, 3],\n", - " [2, 3, 3, 2],\n", - " [1, 2, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 7, 3],\n", - " [0, 2, 4, 2],\n", - " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 7, 3],\n", - " [1, 2, 4, 2],\n", - " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 7, 3],\n", - " [2, 2, 4, 2],\n", - " [2, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [3, 4, 7, 3],\n", - " [3, 2, 4, 2],\n", - " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", - " [4, 4, 7, 3],\n", - " [1, 2, 4, 2],\n", - " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [2, 4, 7, 3],\n", - " [0, 2, 4, 2],\n", - " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [2, 4, 7, 3],\n", - " [1, 2, 4, 2],\n", - " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [2, 4, 7, 3],\n", - " [2, 2, 4, 2],\n", - " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [3, 4, 7, 3],\n", - " [1, 2, 4, 2],\n", - " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [3, 4, 7, 3],\n", - " [2, 2, 4, 2],\n", - " [1, 1, 2, 1]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [3, 4, 7, 3],\n", - " [3, 4, 2, 0],\n", - " [2, 2, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(48., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 3],\n", - " [2, 2, 2, 1],\n", - " [1, 0, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 3],\n", - " [1, 2, 3, 1],\n", - " [0, 0, 0, 2]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 3],\n", - " [1, 2, 3, 1],\n", - " [2, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 3],\n", - " [1, 2, 3, 2],\n", - " [2, 0, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 3],\n", - " [1, 2, 3, 2],\n", - " [1, 0, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 3],\n", - " [2, 2, 3, 2],\n", - " [1, 0, 2, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 3],\n", - " [0, 3, 3, 2],\n", - " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 3],\n", - " [1, 3, 3, 2],\n", - " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 3],\n", - " [2, 3, 3, 2],\n", - " [1, 1, 2, 1]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 3],\n", - " [2, 4, 2, 1],\n", - " [2, 2, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 3],\n", - " [3, 4, 2, 1],\n", - " [0, 2, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 3],\n", - " [3, 4, 2, 2],\n", - " [0, 2, 1, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 3],\n", - " [1, 3, 4, 3],\n", - " [0, 0, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 4],\n", - " [1, 3, 4, 2],\n", - " [0, 0, 2, 1]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 4],\n", - " [1, 3, 4, 2],\n", - " [2, 1, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 4],\n", - " [1, 3, 4, 2],\n", - " [1, 0, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 4],\n", - " [2, 3, 4, 3],\n", - " [1, 0, 2, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 4],\n", - " [2, 3, 4, 3],\n", - " [1, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 4],\n", - " [2, 3, 4, 3],\n", - " [1, 0, 2, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 4],\n", - " [2, 3, 4, 3],\n", - " [0, 1, 1, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 4],\n", - " [2, 3, 4, 4],\n", - " [0, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 5],\n", - " [2, 3, 4, 2],\n", - " [0, 1, 1, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 5],\n", - " [2, 3, 4, 2],\n", - " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 5],\n", - " [2, 3, 4, 3],\n", - " [1, 0, 1, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 5],\n", - " [2, 3, 4, 3],\n", - " [1, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 5],\n", - " [2, 3, 4, 3],\n", - " [0, 1, 2, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 5],\n", - " [2, 3, 4, 3],\n", - " [1, 0, 1, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 5],\n", - " [2, 3, 4, 4],\n", - " [1, 0, 1, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 5],\n", - " [1, 2, 3, 5],\n", - " [0, 0, 1, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(64., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [1, 2, 3, 2],\n", - " [0, 1, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [1, 2, 3, 2],\n", - " [1, 0, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [2, 2, 3, 3],\n", - " [0, 0, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [0, 0, 3, 4],\n", - " [0, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [0, 1, 3, 4],\n", - " [0, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [0, 2, 3, 4],\n", - " [0, 1, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [0, 2, 3, 4],\n", - " [0, 0, 1, 2]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [2, 3, 4, 1],\n", - " [1, 2, 0, 0]], dtype=int32), action_mask=Array([False, True, True, False], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [2, 3, 4, 1],\n", - " [0, 1, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [2, 3, 4, 1],\n", - " [0, 1, 2, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [2, 3, 4, 1],\n", - " [0, 1, 1, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [2, 3, 4, 1],\n", - " [1, 0, 2, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [2, 3, 4, 1],\n", - " [1, 1, 2, 3]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [2, 3, 4, 1],\n", - " [2, 2, 3, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [3, 3, 4, 2],\n", - " [1, 2, 3, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [0, 4, 4, 2],\n", - " [1, 1, 2, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [1, 4, 4, 2],\n", - " [1, 1, 2, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [2, 4, 4, 2],\n", - " [1, 1, 2, 3]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [2, 5, 2, 0],\n", - " [2, 2, 3, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(72., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", - " [4, 6, 7, 6],\n", - " [3, 2, 2, 1],\n", - " [1, 0, 3, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(128., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 2, 7, 6],\n", + " [0, 0, 0, 2],\n", + " [0, 0, 1, 1],\n", + " [0, 0, 3, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 0, 0, 2],\n", + " [0, 1, 2, 1],\n", + " [0, 0, 3, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [0, 0, 0, 2],\n", + " [0, 0, 2, 1],\n", + " [0, 1, 3, 2]], dtype=int32), action_mask=Array([ True, False, False, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 0],\n", + " [2, 0, 0, 0],\n", + " [2, 1, 0, 0],\n", + " [1, 3, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 0, 1],\n", + " [3, 2, 0, 0],\n", + " [1, 3, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [1, 0, 0, 0],\n", + " [3, 2, 0, 0],\n", + " [1, 3, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 1, 0, 1],\n", + " [3, 2, 0, 0],\n", + " [1, 3, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [1, 1, 0, 0],\n", + " [3, 2, 0, 0],\n", + " [1, 3, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 1, 0, 0],\n", + " [3, 2, 1, 0],\n", + " [1, 3, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [1, 1, 0, 0],\n", + " [3, 2, 1, 1],\n", + " [1, 3, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 2, 0, 0],\n", + " [3, 2, 1, 1],\n", + " [1, 3, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 0, 1],\n", + " [3, 3, 1, 2],\n", + " [1, 3, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 1, 0],\n", + " [3, 0, 1, 1],\n", + " [1, 4, 2, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 1, 0],\n", " [3, 0, 2, 1],\n", - " [1, 0, 3, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 2, 7, 6],\n", - " [2, 3, 2, 1],\n", - " [0, 1, 3, 2]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 2, 7, 6],\n", - " [2, 3, 2, 1],\n", - " [1, 3, 2, 1]], dtype=int32), action_mask=Array([ True, False, True, False], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(28., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 2, 7, 6],\n", - " [2, 4, 3, 2],\n", - " [1, 1, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 2, 7, 6],\n", - " [2, 4, 3, 2],\n", - " [0, 0, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 2, 7, 6],\n", - " [2, 4, 3, 3],\n", - " [1, 0, 2, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 2, 7, 6],\n", - " [2, 2, 4, 4],\n", - " [0, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 3, 7, 6],\n", - " [2, 0, 4, 4],\n", - " [1, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 3, 7, 6],\n", - " [0, 0, 2, 5],\n", - " [0, 2, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 3, 7, 6],\n", - " [0, 2, 3, 5],\n", - " [0, 2, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 3, 7, 6],\n", - " [0, 3, 3, 5],\n", - " [0, 1, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 4, 7, 6],\n", + " [1, 4, 2, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 0, 1],\n", + " [3, 0, 1, 1],\n", + " [1, 4, 3, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 2],\n", + " [1, 0, 0, 0],\n", + " [3, 0, 1, 2],\n", + " [1, 4, 3, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 1, 0],\n", + " [3, 0, 1, 3],\n", + " [1, 4, 3, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 0, 1],\n", + " [3, 0, 2, 0],\n", + " [1, 4, 3, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 0, 1],\n", + " [3, 0, 2, 1],\n", + " [1, 4, 3, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [1, 0, 0, 0],\n", + " [3, 0, 2, 2],\n", + " [1, 4, 3, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 1, 2],\n", + " [3, 0, 2, 2],\n", + " [1, 4, 3, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 1, 1, 0],\n", + " [3, 0, 2, 3],\n", + " [1, 4, 3, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [1, 0, 1, 0],\n", + " [3, 1, 2, 3],\n", + " [1, 4, 3, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 0, 2, 0],\n", + " [3, 1, 2, 3],\n", + " [1, 4, 3, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 1, 0, 1],\n", + " [3, 1, 3, 3],\n", + " [1, 4, 3, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 1, 1],\n", + " [3, 2, 0, 3],\n", + " [1, 4, 4, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 0],\n", + " [1, 0, 0, 1],\n", + " [3, 2, 1, 3],\n", + " [1, 4, 4, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [2, 0, 0, 1],\n", + " [3, 2, 1, 3],\n", + " [1, 4, 4, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [2, 1, 0, 2],\n", + " [3, 2, 1, 3],\n", + " [1, 4, 4, 4]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [0, 2, 1, 2],\n", + " [3, 2, 1, 3],\n", + " [0, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 1, 0, 2],\n", + " [1, 3, 2, 3],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 1, 2],\n", + " [1, 3, 2, 3],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [0, 0, 1, 2],\n", + " [2, 3, 2, 3],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 1, 1, 2],\n", + " [2, 3, 2, 3],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 1, 2, 2],\n", + " [2, 3, 2, 3],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 1, 0, 2],\n", + " [2, 3, 3, 3],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 0, 1],\n", + " [0, 0, 2, 2],\n", + " [0, 2, 3, 4],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 1],\n", + " [0, 0, 2, 2],\n", + " [0, 3, 3, 4],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 0, 2, 2],\n", + " [1, 3, 3, 4],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 1],\n", + " [0, 0, 2, 2],\n", + " [2, 3, 3, 4],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 1],\n", + " [0, 1, 2, 2],\n", + " [2, 3, 3, 4],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 1],\n", + " [1, 1, 2, 2],\n", + " [2, 3, 3, 4],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 1],\n", + " [1, 2, 2, 2],\n", + " [2, 3, 3, 4],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 1],\n", + " [2, 2, 2, 2],\n", + " [2, 3, 3, 4],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 1],\n", + " [1, 2, 2, 2],\n", + " [3, 3, 3, 4],\n", + " [3, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 1, 1],\n", + " [0, 2, 2, 2],\n", + " [1, 3, 3, 4],\n", + " [4, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(28., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 0, 0],\n", + " [3, 2, 0, 2],\n", + " [1, 4, 4, 0],\n", + " [4, 1, 4, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 0, 0],\n", + " [3, 2, 1, 0],\n", + " [1, 4, 0, 2],\n", + " [4, 1, 5, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 0, 1],\n", + " [3, 2, 0, 0],\n", + " [1, 4, 1, 2],\n", + " [4, 1, 5, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 0, 0],\n", + " [3, 2, 1, 1],\n", + " [1, 4, 1, 2],\n", + " [4, 1, 5, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 1, 0],\n", + " [3, 2, 0, 1],\n", + " [1, 4, 2, 2],\n", + " [4, 1, 5, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 2, 0],\n", + " [3, 2, 1, 1],\n", + " [1, 4, 2, 2],\n", + " [4, 1, 5, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(76., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 1, 2],\n", + " [1, 3, 2, 2],\n", + " [0, 1, 4, 3],\n", + " [0, 4, 1, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 1, 0],\n", + " [0, 3, 2, 3],\n", + " [0, 1, 4, 3],\n", + " [1, 4, 1, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 2, 1, 0],\n", + " [0, 3, 2, 0],\n", + " [0, 1, 4, 4],\n", + " [2, 4, 1, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 1, 0],\n", + " [0, 3, 2, 2],\n", + " [0, 1, 4, 4],\n", + " [3, 4, 1, 6]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(40., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 2, 1],\n", + " [1, 0, 3, 3],\n", + " [0, 0, 1, 5],\n", + " [3, 4, 1, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [0, 1, 2, 3],\n", + " [1, 0, 3, 5],\n", + " [3, 4, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [0, 2, 2, 3],\n", " [1, 1, 3, 5],\n", - " [0, 0, 0, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [0, 5, 7, 6],\n", - " [0, 2, 3, 5],\n", - " [0, 1, 0, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [0, 5, 7, 6],\n", + " [3, 4, 2, 6]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [0, 0, 3, 3],\n", " [1, 2, 3, 5],\n", - " [0, 0, 1, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [1, 5, 7, 6],\n", - " [0, 2, 3, 5],\n", - " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [2, 5, 7, 6],\n", - " [0, 2, 3, 5],\n", - " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [2, 5, 7, 6],\n", - " [1, 2, 3, 5],\n", - " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [2, 5, 7, 6],\n", - " [2, 2, 3, 5],\n", - " [0, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [3, 5, 7, 6],\n", - " [0, 2, 3, 5],\n", - " [1, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [3, 5, 7, 6],\n", - " [1, 2, 3, 5],\n", - " [1, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [3, 5, 7, 6],\n", - " [2, 2, 3, 5],\n", - " [1, 1, 1, 2]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [3, 5, 7, 6],\n", - " [3, 3, 5, 1],\n", - " [2, 1, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [2, 3, 5, 1],\n", - " [1, 1, 2, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [2, 3, 5, 1],\n", - " [1, 0, 2, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [2, 3, 5, 1],\n", - " [0, 1, 1, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [2, 3, 5, 1],\n", - " [2, 0, 2, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [3, 3, 5, 1],\n", - " [1, 0, 2, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [0, 4, 5, 1],\n", - " [1, 1, 2, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "TimeStep(step_type=Array(2, dtype=int8), reward=Array(0., dtype=float32), discount=Array(0., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", - " [4, 5, 7, 6],\n", - " [1, 4, 5, 1],\n", - " [2, 1, 2, 3]], dtype=int32), action_mask=Array([False, False, False, False], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", - "3004.0\n" + " [3, 4, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 1],\n", + " [0, 0, 0, 3],\n", + " [1, 2, 4, 5],\n", + " [3, 4, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 2, 1],\n", + " [0, 0, 1, 3],\n", + " [1, 2, 4, 5],\n", + " [3, 4, 2, 6]], dtype=int32), action_mask=Array([ True, False, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 0, 0],\n", + " [1, 3, 0, 1],\n", + " [1, 2, 4, 5],\n", + " [3, 4, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 0],\n", + " [2, 3, 0, 1],\n", + " [2, 2, 4, 5],\n", + " [3, 4, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 1, 0],\n", + " [1, 3, 0, 1],\n", + " [3, 2, 4, 5],\n", + " [3, 4, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [1, 3, 1, 1],\n", + " [1, 2, 4, 5],\n", + " [4, 4, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [1, 3, 1, 1],\n", + " [2, 2, 4, 5],\n", + " [4, 4, 2, 6]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(44., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 1, 3, 2],\n", + " [0, 3, 4, 5],\n", + " [0, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 1],\n", + " [0, 1, 3, 2],\n", + " [0, 3, 4, 5],\n", + " [1, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 2, 3, 2],\n", + " [0, 3, 4, 5],\n", + " [1, 5, 2, 6]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 1],\n", + " [0, 2, 3, 2],\n", + " [0, 3, 4, 5],\n", + " [2, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 2],\n", + " [2, 2, 3, 2],\n", + " [0, 3, 4, 5],\n", + " [2, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 2, 3, 3],\n", + " [1, 3, 4, 5],\n", + " [3, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 2, 4],\n", + " [1, 3, 4, 5],\n", + " [3, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [0, 0, 2, 4],\n", + " [2, 3, 4, 5],\n", + " [3, 5, 2, 6]], dtype=int32), action_mask=Array([ True, False, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 0],\n", + " [2, 4, 0, 0],\n", + " [2, 3, 4, 5],\n", + " [3, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [1, 4, 1, 0],\n", + " [3, 3, 4, 5],\n", + " [3, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [1, 4, 1, 0],\n", + " [1, 3, 4, 5],\n", + " [4, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 1],\n", + " [0, 4, 1, 0],\n", + " [2, 3, 4, 5],\n", + " [4, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 0],\n", + " [0, 4, 1, 1],\n", + " [2, 3, 4, 5],\n", + " [4, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 2, 0],\n", + " [1, 4, 1, 1],\n", + " [2, 3, 4, 5],\n", + " [4, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 0, 0],\n", + " [1, 4, 2, 1],\n", + " [2, 3, 4, 5],\n", + " [4, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 0, 1],\n", + " [2, 4, 2, 1],\n", + " [2, 3, 4, 5],\n", + " [4, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 0, 0],\n", + " [0, 4, 2, 2],\n", + " [3, 3, 4, 5],\n", + " [4, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 2, 0],\n", + " [1, 4, 2, 2],\n", + " [3, 3, 4, 5],\n", + " [4, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 1, 0],\n", + " [1, 4, 3, 2],\n", + " [3, 3, 4, 5],\n", + " [4, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 0, 0],\n", + " [1, 4, 3, 2],\n", + " [4, 4, 5, 2],\n", + " [4, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(72., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [2, 1, 3, 0],\n", + " [1, 5, 5, 3],\n", + " [5, 5, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(64., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 1],\n", + " [2, 0, 3, 0],\n", + " [1, 1, 5, 3],\n", + " [5, 6, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 1, 0],\n", + " [2, 0, 3, 1],\n", + " [1, 1, 5, 3],\n", + " [5, 6, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 1],\n", + " [2, 2, 3, 1],\n", + " [1, 1, 5, 3],\n", + " [5, 6, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 1, 0],\n", + " [2, 2, 3, 2],\n", + " [1, 1, 5, 3],\n", + " [5, 6, 2, 6]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 2],\n", + " [1, 3, 3, 2],\n", + " [0, 2, 5, 3],\n", + " [5, 6, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [0, 3, 3, 3],\n", + " [1, 2, 5, 3],\n", + " [5, 6, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 1, 0],\n", + " [0, 3, 3, 0],\n", + " [1, 2, 5, 4],\n", + " [5, 6, 2, 6]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 2],\n", + " [0, 0, 1, 4],\n", + " [1, 2, 5, 4],\n", + " [5, 6, 2, 6]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [0, 0, 1, 2],\n", + " [1, 2, 5, 5],\n", + " [5, 6, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 0],\n", + " [0, 1, 1, 2],\n", + " [1, 2, 5, 5],\n", + " [5, 6, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [2, 1, 1, 2],\n", + " [2, 2, 5, 5],\n", + " [5, 6, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 1, 1, 2],\n", + " [3, 2, 5, 5],\n", + " [5, 6, 2, 6]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(68., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 1, 2, 2],\n", + " [1, 3, 2, 6],\n", + " [5, 6, 2, 6]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(136., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [0, 1, 0, 0],\n", + " [1, 3, 2, 2],\n", + " [5, 6, 3, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [0, 1, 0, 1],\n", + " [1, 3, 2, 2],\n", + " [5, 6, 3, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 1, 0, 2],\n", + " [1, 3, 2, 2],\n", + " [5, 6, 3, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 1, 0, 1],\n", + " [2, 3, 2, 3],\n", + " [5, 6, 3, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 0, 2],\n", + " [2, 3, 2, 3],\n", + " [5, 6, 3, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 0],\n", + " [0, 0, 1, 2],\n", + " [2, 3, 2, 3],\n", + " [5, 6, 3, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [1, 0, 1, 2],\n", + " [2, 3, 2, 3],\n", + " [5, 6, 3, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [1, 1, 1, 2],\n", + " [2, 3, 2, 3],\n", + " [5, 6, 3, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 2, 1, 2],\n", + " [2, 3, 2, 3],\n", + " [5, 6, 3, 7]], dtype=int32), action_mask=Array([ True, False, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 0],\n", + " [1, 2, 1, 2],\n", + " [2, 3, 2, 3],\n", + " [5, 6, 3, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 1, 0],\n", + " [2, 2, 1, 2],\n", + " [2, 3, 2, 3],\n", + " [5, 6, 3, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 1],\n", + " [0, 2, 2, 2],\n", + " [3, 3, 2, 3],\n", + " [5, 6, 3, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 1],\n", + " [0, 2, 0, 2],\n", + " [3, 3, 3, 3],\n", + " [5, 6, 3, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 1, 1],\n", + " [1, 2, 0, 2],\n", + " [3, 3, 0, 3],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 1],\n", + " [1, 2, 2, 2],\n", + " [3, 3, 1, 3],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(28., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 2],\n", + " [0, 1, 2, 3],\n", + " [0, 4, 1, 3],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 2, 0],\n", + " [0, 2, 2, 2],\n", + " [0, 4, 1, 4],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 2, 3, 2],\n", + " [1, 4, 1, 4],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, False, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [2, 3, 2, 0],\n", + " [1, 4, 1, 4],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 1],\n", + " [0, 2, 3, 2],\n", + " [1, 4, 1, 4],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 2],\n", + " [1, 2, 3, 2],\n", + " [1, 4, 1, 4],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 2, 3, 3],\n", + " [2, 4, 1, 4],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 0],\n", + " [0, 1, 2, 4],\n", + " [2, 4, 1, 4],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 1, 2, 1],\n", + " [2, 4, 1, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [0, 2, 2, 1],\n", + " [2, 4, 1, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [0, 2, 3, 1],\n", + " [2, 4, 1, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [0, 2, 3, 2],\n", + " [2, 4, 1, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, False, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 1, 0],\n", + " [2, 3, 2, 0],\n", + " [2, 4, 1, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 1, 0],\n", + " [1, 3, 2, 0],\n", + " [3, 4, 1, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 1, 0],\n", + " [2, 3, 2, 0],\n", + " [3, 4, 1, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 2],\n", + " [0, 2, 3, 2],\n", + " [3, 4, 1, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 2, 3, 3],\n", + " [3, 4, 1, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 0],\n", + " [1, 2, 4, 0],\n", + " [3, 4, 1, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 1, 0],\n", + " [2, 2, 4, 0],\n", + " [3, 4, 1, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 0, 0, 0],\n", + " [3, 4, 0, 1],\n", + " [3, 4, 1, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(48., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [2, 0, 1, 1],\n", + " [4, 5, 1, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [2, 0, 0, 1],\n", + " [4, 5, 2, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 0],\n", + " [2, 0, 1, 1],\n", + " [4, 5, 2, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 0, 2, 2],\n", + " [4, 5, 2, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 0, 1],\n", + " [1, 0, 0, 2],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 1],\n", + " [1, 2, 0, 2],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 0, 0, 1],\n", + " [2, 2, 0, 2],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 1],\n", + " [3, 2, 0, 2],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 1],\n", + " [3, 2, 1, 2],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 2],\n", + " [3, 2, 1, 2],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [3, 2, 2, 3],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 0],\n", + " [3, 3, 3, 0],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 0],\n", + " [3, 3, 1, 0],\n", + " [4, 5, 4, 5],\n", + " [5, 6, 4, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 1, 0],\n", + " [3, 3, 0, 0],\n", + " [4, 5, 1, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 1, 0],\n", + " [3, 3, 0, 0],\n", + " [4, 5, 2, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 1],\n", + " [3, 3, 1, 0],\n", + " [4, 5, 2, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 1, 0],\n", + " [3, 3, 1, 1],\n", + " [4, 5, 2, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 1],\n", + " [3, 3, 2, 1],\n", + " [4, 5, 2, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 1, 0],\n", + " [3, 3, 0, 2],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 1],\n", + " [3, 3, 1, 2],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 2],\n", + " [1, 4, 1, 2],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [1, 4, 2, 3],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 0, 0, 1],\n", + " [1, 4, 2, 3],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 2, 0],\n", + " [1, 4, 2, 3],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 1, 0],\n", + " [1, 4, 3, 3],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 0, 1],\n", + " [1, 4, 1, 3],\n", + " [4, 5, 4, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 2, 2],\n", + " [1, 4, 1, 3],\n", + " [4, 5, 4, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 0],\n", + " [1, 4, 1, 3],\n", + " [4, 5, 4, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 3, 1, 0],\n", + " [2, 4, 2, 3],\n", + " [4, 5, 4, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 3, 1],\n", + " [2, 4, 2, 3],\n", + " [4, 5, 4, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 1],\n", + " [2, 4, 2, 3],\n", + " [4, 5, 4, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 3, 2],\n", + " [2, 4, 2, 3],\n", + " [4, 5, 4, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 2, 1],\n", + " [2, 4, 2, 3],\n", + " [4, 5, 4, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, False, True, False], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 0, 1],\n", + " [3, 4, 3, 3],\n", + " [4, 5, 4, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 3, 1],\n", + " [0, 3, 4, 4],\n", + " [4, 5, 4, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 1, 1],\n", + " [1, 3, 3, 4],\n", + " [4, 5, 5, 5],\n", + " [5, 6, 5, 7]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(64., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 2, 1],\n", + " [1, 3, 1, 4],\n", + " [4, 5, 3, 5],\n", + " [5, 6, 6, 7]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(128., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 1, 0],\n", + " [1, 3, 1, 4],\n", + " [4, 5, 3, 5],\n", + " [5, 7, 7, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 2, 0, 0],\n", + " [2, 3, 2, 4],\n", + " [4, 5, 3, 5],\n", + " [5, 7, 7, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 1, 0],\n", + " [3, 3, 2, 4],\n", + " [4, 5, 3, 5],\n", + " [5, 7, 7, 1]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(272., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 2, 0],\n", + " [4, 2, 4, 0],\n", + " [4, 5, 3, 5],\n", + " [5, 8, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 2, 0],\n", + " [2, 2, 4, 0],\n", + " [5, 5, 3, 1],\n", + " [5, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(64., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 2, 1],\n", + " [0, 2, 4, 0],\n", + " [2, 5, 3, 1],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 2, 1],\n", + " [0, 2, 4, 0],\n", + " [2, 5, 3, 2],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 2, 0],\n", + " [1, 2, 4, 1],\n", + " [2, 5, 3, 2],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 0, 1],\n", + " [1, 2, 4, 1],\n", + " [2, 5, 3, 2],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [2, 3, 4, 2],\n", + " [2, 5, 3, 2],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 1],\n", + " [0, 3, 4, 0],\n", + " [3, 5, 3, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 1, 0],\n", + " [0, 3, 4, 1],\n", + " [3, 5, 3, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 0, 0, 0],\n", + " [3, 4, 1, 0],\n", + " [3, 5, 4, 1],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [2, 4, 1, 0],\n", + " [4, 5, 4, 1],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [2, 4, 1, 0],\n", + " [4, 5, 4, 2],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [2, 4, 1, 1],\n", + " [4, 5, 4, 2],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 1],\n", + " [0, 2, 4, 2],\n", + " [4, 5, 4, 2],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(40., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [1, 2, 0, 1],\n", + " [4, 5, 5, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(64., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [0, 1, 2, 1],\n", + " [1, 4, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 2, 0],\n", + " [0, 1, 2, 2],\n", + " [1, 4, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 1, 3, 2],\n", + " [1, 4, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [0, 1, 3, 2],\n", + " [2, 4, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 1, 3, 2],\n", + " [2, 4, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 0],\n", + " [2, 3, 2, 1],\n", + " [2, 4, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 2, 0],\n", + " [1, 3, 2, 1],\n", + " [3, 4, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [1, 3, 3, 1],\n", + " [3, 4, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 1],\n", + " [1, 4, 1, 0],\n", + " [3, 4, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [2, 0, 1, 1],\n", + " [3, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [2, 0, 2, 1],\n", + " [3, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [2, 0, 2, 2],\n", + " [3, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 1, 0],\n", + " [3, 2, 0, 0],\n", + " [3, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 2],\n", + " [1, 2, 1, 0],\n", + " [4, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 2, 1, 2],\n", + " [4, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, False, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 1],\n", + " [1, 2, 1, 2],\n", + " [4, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 1],\n", + " [2, 2, 1, 2],\n", + " [4, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 0, 1, 0],\n", + " [3, 1, 2, 0],\n", + " [4, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 0, 1],\n", + " [3, 1, 2, 0],\n", + " [4, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 0, 0],\n", + " [3, 2, 2, 1],\n", + " [4, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 0, 1],\n", + " [3, 3, 1, 0],\n", + " [4, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 1, 0],\n", + " [3, 3, 1, 1],\n", + " [4, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 0, 1],\n", + " [3, 3, 2, 1],\n", + " [4, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 0, 1],\n", + " [3, 3, 2, 2],\n", + " [4, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(28., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 2, 1, 0],\n", + " [4, 3, 0, 0],\n", + " [4, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 1, 0],\n", + " [2, 3, 1, 0],\n", + " [5, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 1, 0],\n", + " [2, 3, 2, 0],\n", + " [5, 5, 6, 3],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(64., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 0, 0],\n", + " [2, 3, 2, 0],\n", + " [6, 6, 3, 1],\n", + " [6, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(136., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [0, 3, 2, 1],\n", + " [3, 6, 3, 1],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 1, 0],\n", + " [0, 3, 2, 0],\n", + " [3, 6, 3, 2],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 0, 0, 1],\n", + " [3, 2, 0, 0],\n", + " [3, 6, 3, 2],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [2, 2, 0, 1],\n", + " [4, 6, 3, 2],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 0],\n", + " [2, 2, 0, 2],\n", + " [4, 6, 3, 2],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 0],\n", + " [2, 2, 1, 0],\n", + " [4, 6, 3, 3],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 0, 3, 1],\n", + " [0, 4, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 1, 3, 2],\n", + " [1, 4, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, False, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 0],\n", + " [1, 3, 2, 0],\n", + " [1, 4, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 0],\n", + " [1, 3, 2, 0],\n", + " [2, 4, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [2, 3, 2, 1],\n", + " [2, 4, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, False, True, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [0, 3, 2, 1],\n", + " [3, 4, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 0],\n", + " [0, 3, 2, 2],\n", + " [3, 4, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 0, 0],\n", + " [1, 3, 2, 2],\n", + " [3, 4, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 2],\n", + " [0, 1, 3, 3],\n", + " [3, 4, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 0, 0],\n", + " [1, 4, 1, 0],\n", + " [3, 4, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 0],\n", + " [2, 2, 1, 0],\n", + " [3, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 1],\n", + " [0, 0, 3, 1],\n", + " [3, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [1, 0, 3, 2],\n", + " [3, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 0, 0],\n", + " [1, 3, 2, 0],\n", + " [3, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 0, 1],\n", + " [2, 3, 2, 0],\n", + " [3, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 0, 0],\n", + " [2, 3, 2, 1],\n", + " [3, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 1, 2],\n", + " [2, 3, 2, 1],\n", + " [3, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 2, 0, 1],\n", + " [2, 3, 2, 1],\n", + " [3, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 0, 1],\n", + " [3, 3, 2, 2],\n", + " [3, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 1, 1],\n", + " [0, 3, 2, 2],\n", + " [4, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 2, 0, 0],\n", + " [3, 3, 1, 0],\n", + " [4, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 3],\n", + " [0, 0, 4, 1],\n", + " [4, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, False, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 0, 0],\n", + " [4, 1, 1, 0],\n", + " [4, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 3, 1, 0],\n", + " [1, 1, 1, 0],\n", + " [5, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 3, 0, 1],\n", + " [1, 1, 2, 0],\n", + " [5, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 3, 0, 1],\n", + " [1, 1, 2, 1],\n", + " [5, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 0, 0],\n", + " [1, 1, 2, 2],\n", + " [5, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 3, 0, 1],\n", + " [2, 1, 2, 2],\n", + " [5, 5, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(72., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 3, 1],\n", + " [0, 2, 1, 3],\n", + " [0, 6, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 3, 1],\n", + " [2, 2, 1, 3],\n", + " [1, 6, 6, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(136., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 1, 0, 1],\n", + " [3, 1, 3, 0],\n", + " [1, 7, 4, 0],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [4, 2, 3, 0],\n", + " [1, 7, 4, 1],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 1],\n", + " [0, 4, 2, 3],\n", + " [1, 7, 4, 1],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 1],\n", + " [0, 4, 2, 3],\n", + " [2, 7, 4, 1],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 1],\n", + " [1, 4, 2, 3],\n", + " [2, 7, 4, 1],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 2],\n", + " [1, 4, 2, 3],\n", + " [2, 7, 4, 1],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 0, 2],\n", + " [1, 4, 2, 3],\n", + " [2, 7, 4, 1],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 0, 2],\n", + " [2, 4, 2, 3],\n", + " [2, 7, 4, 1],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 1, 2],\n", + " [1, 4, 2, 3],\n", + " [3, 7, 4, 1],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, False, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 2, 1],\n", + " [1, 4, 2, 3],\n", + " [3, 7, 4, 1],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, False, True, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 1, 1],\n", + " [1, 4, 3, 3],\n", + " [3, 7, 4, 1],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 2, 1, 1],\n", + " [1, 4, 4, 0],\n", + " [3, 7, 4, 1],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 2, 2, 0],\n", + " [1, 4, 1, 0],\n", + " [3, 7, 5, 2],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 1, 0],\n", + " [1, 4, 1, 0],\n", + " [3, 7, 5, 2],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 0, 0],\n", + " [1, 4, 2, 1],\n", + " [3, 7, 5, 2],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 3, 2],\n", + " [1, 4, 2, 1],\n", + " [3, 7, 5, 2],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 3, 2],\n", + " [2, 4, 2, 1],\n", + " [3, 7, 5, 2],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, False, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 2, 1],\n", + " [2, 4, 2, 1],\n", + " [3, 7, 5, 2],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, False, True, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 0],\n", + " [2, 4, 3, 2],\n", + " [3, 7, 5, 2],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 1],\n", + " [2, 4, 3, 0],\n", + " [3, 7, 5, 3],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 1],\n", + " [2, 4, 3, 1],\n", + " [3, 7, 5, 3],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 1],\n", + " [2, 4, 3, 2],\n", + " [3, 7, 5, 3],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 3, 2],\n", + " [2, 4, 3, 2],\n", + " [3, 7, 5, 3],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 1],\n", + " [2, 4, 4, 3],\n", + " [3, 7, 5, 3],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 1],\n", + " [2, 4, 4, 1],\n", + " [3, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 1],\n", + " [2, 4, 4, 2],\n", + " [3, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 0, 1],\n", + " [2, 5, 2, 0],\n", + " [3, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 1],\n", + " [3, 5, 2, 1],\n", + " [3, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 1, 0],\n", + " [0, 5, 2, 2],\n", + " [4, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 0, 0, 0],\n", + " [5, 3, 0, 1],\n", + " [4, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 2],\n", + " [1, 5, 3, 1],\n", + " [4, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, False, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 0, 1, 0],\n", + " [1, 5, 3, 1],\n", + " [4, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 1, 0],\n", + " [1, 5, 3, 1],\n", + " [4, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 2, 1, 0],\n", + " [1, 5, 3, 1],\n", + " [4, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 1, 0, 1],\n", + " [1, 5, 3, 1],\n", + " [4, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 1, 1, 0],\n", + " [1, 5, 3, 2],\n", + " [4, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 1, 0],\n", + " [1, 5, 3, 2],\n", + " [4, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, True, False, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 2, 1],\n", + " [1, 5, 3, 2],\n", + " [4, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([ True, False, True, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "TimeStep(step_type=Array(2, dtype=int8), reward=Array(4., dtype=float32), discount=Array(0., dtype=float32), observation=Observation(board=Array([[1, 3, 2, 1],\n", + " [2, 5, 3, 2],\n", + " [4, 7, 5, 4],\n", + " [7, 8, 1, 5]], dtype=int32), action_mask=Array([False, False, False, False], dtype=bool)), extras={'highest_tile': Array(256, dtype=int32)})\n", + "3716.0\n" ] } ], @@ -1222,7 +1359,7 @@ "import jax, jumanji\n", "\n", "env = jumanji.make(\"Game2048-v1\")\n", - "key = jax.random.PRNGKey(48)\n", + "key = jax.random.PRNGKey(0)\n", "jit_reset = jax.jit(env.reset)\n", "jit_step = jax.jit(env.step)\n", "state, timestep = jax.jit(env.reset)(key)\n", @@ -1244,12 +1381,464 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-06-05T05:15:43.041491500Z", - "start_time": "2024-06-05T05:15:37.325953600Z" + "end_time": "2024-06-05T07:41:33.703431900Z", + "start_time": "2024-06-05T07:41:26.102578200Z" } }, "id": "f166e09c5be1a8fb" }, + { + "cell_type": "code", + "execution_count": 17, + "outputs": [], + "source": [ + "import jax.random\n", + "from problem.rl_env.jumanji.jumanji_2048 import Jumanji_2048\n", + "\n", + "\n", + "def random_policy(state, params, obs):\n", + " key = jax.random.key(obs.sum())\n", + " actions = jax.random.normal(key, (4,))\n", + " return actions\n", + "\n", + "problem = Jumanji_2048(max_step=10000, repeat_times=10, guarantee_invalid_action=True)\n", + "state = problem.setup()\n", + "jit_evaluate = jax.jit(lambda state, randkey: problem.evaluate(state, randkey, random_policy, None))" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-05T08:06:59.491563700Z", + "start_time": "2024-06-05T08:06:59.465404900Z" + } + }, + "id": "187326d08ac1eeb4" + }, + { + "cell_type": "code", + "execution_count": 24, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1193.2001\n" + ] + } + ], + "source": [ + "\n", + "reward = jit_evaluate(state, randkey)\n", + "print(reward)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-05T08:07:21.630420300Z", + "start_time": "2024-06-05T08:07:21.107419400Z" + } + }, + "id": "4b3506db87568d81" + }, + { + "cell_type": "code", + "execution_count": 34, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [0, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(2, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 1],\n", + " [1, 0, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(2, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [1, 1, 1, 1]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(2, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 1, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [2, 2, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(4, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [3, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [3, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 0],\n", + " [1, 0, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [3, 2, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [2, 0, 1, 0],\n", + " [3, 2, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [2, 1, 0, 0],\n", + " [3, 2, 0, 1]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 0, 0],\n", + " [0, 0, 0, 0],\n", + " [2, 1, 0, 0],\n", + " [3, 2, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 1, 0],\n", + " [2, 2, 0, 0],\n", + " [3, 0, 0, 1],\n", + " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 1, 1],\n", + " [2, 2, 0, 0],\n", + " [3, 0, 0, 0],\n", + " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [1, 1, 0, 0],\n", + " [2, 2, 0, 0],\n", + " [3, 1, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 1, 1],\n", + " [2, 2, 0, 0],\n", + " [3, 1, 0, 2],\n", + " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 0, 0, 0],\n", + " [2, 3, 1, 1],\n", + " [3, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 0, 0, 0],\n", + " [2, 3, 0, 1],\n", + " [3, 1, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [0, 0, 2, 1],\n", + " [0, 2, 3, 1],\n", + " [0, 3, 1, 3]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 0, 2, 1],\n", + " [0, 2, 3, 2],\n", + " [1, 3, 1, 3]], dtype=int32), action_mask=Array([ True, False, False, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 2, 1],\n", + " [0, 3, 3, 2],\n", + " [1, 0, 1, 3],\n", + " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 0, 2, 1],\n", + " [1, 2, 3, 2],\n", + " [2, 3, 1, 3]], dtype=int32), action_mask=Array([ True, False, False, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [2, 1, 1, 0],\n", + " [1, 2, 3, 2],\n", + " [2, 3, 1, 3]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [0, 0, 2, 2],\n", + " [1, 2, 3, 2],\n", + " [2, 3, 1, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 2, 1],\n", + " [2, 3, 3, 3],\n", + " [1, 0, 1, 3],\n", + " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 2, 1],\n", + " [2, 3, 3, 4],\n", + " [1, 0, 1, 0],\n", + " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(28., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 3, 1],\n", + " [0, 2, 4, 4],\n", + " [0, 0, 0, 2],\n", + " [1, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [1, 0, 0, 4],\n", + " [0, 1, 3, 2],\n", + " [1, 2, 4, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 1, 0],\n", + " [1, 4, 0, 0],\n", + " [1, 3, 2, 0],\n", + " [1, 2, 4, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 2],\n", + " [0, 0, 1, 4],\n", + " [0, 1, 3, 2],\n", + " [1, 2, 4, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 2, 2],\n", + " [0, 2, 3, 4],\n", + " [0, 0, 4, 2],\n", + " [1, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 2],\n", + " [1, 0, 2, 4],\n", + " [0, 1, 3, 2],\n", + " [2, 2, 4, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 2, 2],\n", + " [2, 2, 3, 4],\n", + " [0, 1, 4, 2],\n", + " [0, 0, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 2, 3],\n", + " [0, 3, 3, 4],\n", + " [1, 1, 4, 2],\n", + " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 2, 3],\n", + " [0, 1, 3, 4],\n", + " [0, 0, 4, 2],\n", + " [1, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 2, 3],\n", + " [1, 1, 3, 4],\n", + " [0, 0, 4, 2],\n", + " [0, 0, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 2, 3],\n", + " [0, 1, 3, 4],\n", + " [0, 1, 4, 3],\n", + " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 2, 3],\n", + " [0, 2, 3, 4],\n", + " [0, 0, 4, 3],\n", + " [0, 0, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 2, 3],\n", + " [0, 2, 3, 4],\n", + " [2, 0, 4, 3],\n", + " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 3, 2, 3],\n", + " [0, 2, 3, 4],\n", + " [0, 0, 4, 3],\n", + " [0, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 3, 2, 3],\n", + " [0, 2, 3, 4],\n", + " [0, 1, 4, 3],\n", + " [1, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 3, 1],\n", + " [2, 3, 4, 0],\n", + " [1, 4, 3, 0],\n", + " [2, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, False], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 3, 1],\n", + " [0, 2, 3, 4],\n", + " [0, 1, 4, 3],\n", + " [0, 0, 1, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 3, 1],\n", + " [2, 3, 4, 0],\n", + " [1, 4, 3, 0],\n", + " [1, 2, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 3, 1],\n", + " [2, 3, 4, 0],\n", + " [2, 4, 3, 1],\n", + " [0, 2, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 3, 1],\n", + " [2, 3, 4, 0],\n", + " [2, 4, 3, 1],\n", + " [2, 1, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 3, 1],\n", + " [2, 3, 4, 0],\n", + " [2, 4, 3, 1],\n", + " [2, 2, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 3, 1],\n", + " [0, 2, 3, 4],\n", + " [2, 4, 3, 1],\n", + " [0, 1, 2, 3]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 3, 1],\n", + " [2, 3, 4, 0],\n", + " [2, 4, 3, 1],\n", + " [1, 2, 3, 1]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 3, 1],\n", + " [1, 2, 3, 4],\n", + " [2, 4, 3, 1],\n", + " [1, 2, 3, 1]], dtype=int32), action_mask=Array([ True, False, True, False], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(44., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 0, 0, 1],\n", + " [1, 3, 0, 1],\n", + " [2, 4, 4, 4],\n", + " [1, 2, 4, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 1, 0, 0],\n", + " [1, 3, 1, 0],\n", + " [2, 5, 4, 1],\n", + " [1, 2, 4, 2]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 1, 1, 1],\n", + " [1, 3, 5, 2],\n", + " [2, 5, 0, 1],\n", + " [1, 2, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 4, 1, 2],\n", + " [1, 3, 5, 2],\n", + " [1, 2, 5, 1],\n", + " [0, 0, 1, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 1, 2, 0],\n", + " [1, 3, 5, 2],\n", + " [1, 2, 5, 1],\n", + " [1, 2, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(80., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 1, 2, 2],\n", + " [2, 3, 6, 2],\n", + " [1, 3, 0, 0],\n", + " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 4, 1, 3],\n", + " [2, 3, 6, 2],\n", + " [0, 0, 1, 3],\n", + " [0, 0, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 3],\n", + " [2, 3, 6, 2],\n", + " [0, 0, 1, 3],\n", + " [0, 0, 0, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 3],\n", + " [0, 3, 6, 2],\n", + " [0, 0, 1, 3],\n", + " [1, 0, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 3],\n", + " [1, 3, 6, 2],\n", + " [0, 0, 1, 3],\n", + " [0, 0, 1, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 3],\n", + " [1, 3, 6, 2],\n", + " [1, 3, 1, 0],\n", + " [1, 2, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 3],\n", + " [2, 4, 6, 2],\n", + " [1, 2, 1, 0],\n", + " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 5, 1, 3],\n", + " [2, 2, 6, 2],\n", + " [2, 0, 1, 0],\n", + " [0, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 1, 1, 0],\n", + " [3, 5, 6, 3],\n", + " [3, 2, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [1, 1, 1, 0],\n", + " [0, 5, 6, 3],\n", + " [4, 2, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 0],\n", + " [0, 0, 1, 2],\n", + " [1, 5, 6, 3],\n", + " [0, 4, 2, 3]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", + " [1, 2, 0, 0],\n", + " [1, 5, 6, 3],\n", + " [4, 2, 3, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 1],\n", + " [0, 2, 1, 0],\n", + " [2, 5, 6, 0],\n", + " [4, 2, 3, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 2, 1, 1],\n", + " [4, 5, 6, 3],\n", + " [0, 2, 3, 0],\n", + " [0, 0, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 0, 0],\n", + " [4, 5, 6, 3],\n", + " [2, 3, 1, 0],\n", + " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 6, 3],\n", + " [4, 5, 1, 0],\n", + " [2, 3, 1, 0],\n", + " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 6, 3],\n", + " [4, 5, 2, 0],\n", + " [2, 3, 0, 0],\n", + " [1, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 6, 3],\n", + " [4, 5, 2, 0],\n", + " [2, 3, 1, 0],\n", + " [2, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 6, 3],\n", + " [1, 4, 5, 2],\n", + " [0, 2, 3, 1],\n", + " [0, 0, 0, 2]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 6, 3],\n", + " [1, 4, 5, 2],\n", + " [2, 3, 1, 1],\n", + " [2, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 6, 3],\n", + " [1, 4, 5, 2],\n", + " [1, 2, 3, 2],\n", + " [0, 0, 0, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 6, 3],\n", + " [1, 4, 5, 2],\n", + " [1, 2, 3, 2],\n", + " [2, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 6, 0],\n", + " [3, 2, 5, 1],\n", + " [2, 4, 3, 3],\n", + " [2, 2, 1, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 6, 0],\n", + " [0, 2, 5, 1],\n", + " [3, 4, 3, 1],\n", + " [3, 2, 1, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 6],\n", + " [1, 2, 5, 1],\n", + " [3, 4, 3, 1],\n", + " [3, 2, 1, 4]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[6, 0, 0, 1],\n", + " [1, 2, 5, 1],\n", + " [3, 4, 3, 1],\n", + " [3, 2, 1, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 0, 0],\n", + " [6, 2, 5, 1],\n", + " [1, 4, 3, 2],\n", + " [4, 2, 1, 4]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 0, 2, 0],\n", + " [6, 2, 5, 1],\n", + " [1, 4, 3, 2],\n", + " [4, 2, 1, 4]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 2, 1],\n", + " [6, 4, 5, 2],\n", + " [1, 2, 3, 4],\n", + " [4, 1, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 3, 1],\n", + " [6, 4, 5, 2],\n", + " [1, 2, 3, 4],\n", + " [0, 0, 4, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 3, 1],\n", + " [6, 4, 5, 2],\n", + " [1, 2, 3, 4],\n", + " [0, 1, 4, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 1],\n", + " [6, 4, 5, 2],\n", + " [1, 2, 3, 4],\n", + " [1, 4, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 0],\n", + " [2, 4, 5, 1],\n", + " [6, 2, 3, 2],\n", + " [2, 4, 2, 4]], dtype=int32), action_mask=Array([ True, True, False, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 3, 1],\n", + " [2, 4, 5, 1],\n", + " [6, 2, 3, 2],\n", + " [2, 4, 2, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 3, 1],\n", + " [2, 4, 5, 1],\n", + " [6, 2, 3, 2],\n", + " [2, 4, 2, 4]], dtype=int32), action_mask=Array([ True, False, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 2, 3, 1],\n", + " [2, 4, 5, 2],\n", + " [6, 2, 3, 2],\n", + " [2, 4, 2, 4]], dtype=int32), action_mask=Array([ True, False, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "TimeStep(step_type=Array(2, dtype=int8), reward=Array(8., dtype=float32), discount=Array(0., dtype=float32), observation=Observation(board=Array([[1, 2, 3, 1],\n", + " [2, 4, 5, 3],\n", + " [6, 2, 3, 4],\n", + " [2, 4, 2, 1]], dtype=int32), action_mask=Array([False, False, False, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", + "636.0\n" + ] + } + ], + "source": [ + "randkey = jax.random.PRNGKey(14)\n", + "jit_policy = jax.jit(random_policy)\n", + "total_reward = 0\n", + "state, timestep = jax.jit(env.reset)(randkey )\n", + "while True:\n", + " board, action_mask = timestep[\"observation\"]\n", + " action = jit_policy(None, None, timestep[\"observation\"][0].reshape(-1))\n", + " score_with_mask = jnp.where(action_mask, action, -jnp.inf)\n", + " action = jnp.argmax(score_with_mask)\n", + " state, timestep = jit_step(state, action)\n", + " done = jnp.all(~timestep[\"observation\"][1])\n", + " print(timestep)\n", + " total_reward += timestep[\"reward\"]\n", + " if done:\n", + " break\n", + "print(total_reward)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-05T08:09:58.242414600Z", + "start_time": "2024-06-05T08:09:56.452642800Z" + } + }, + "id": "8bb888fb742b6b06" + }, { "cell_type": "code", "execution_count": null, @@ -1258,7 +1847,7 @@ "metadata": { "collapsed": false }, - "id": "187326d08ac1eeb4" + "id": "3d1b5c8c646d4f07" } ], "metadata": { diff --git a/tensorneat/examples/jumanji/train_2048.py b/tensorneat/examples/jumanji/train_2048.py new file mode 100644 index 0000000..336f07c --- /dev/null +++ b/tensorneat/examples/jumanji/train_2048.py @@ -0,0 +1,119 @@ +import jax, jax.numpy as jnp + +from pipeline import Pipeline +from algorithm.neat import * +from algorithm.neat.gene.node.default_without_response import NodeGeneWithoutResponse +from problem.rl_env.jumanji.jumanji_2048 import Jumanji_2048 +from utils import Act, Agg + + +def rot_li(li): + return li[1:] + [li[0]] + + +def rot_boards(board): + def rot(a, _): + a = jnp.rot90(a) + return a, a # carry, y + + # carry, np.stack(ys) + _, boards = jax.lax.scan(rot, board, jnp.arange(4, dtype=jnp.int32)) + return boards + + +direction = ["up", "right", "down", "left"] +lr_flip_direction = ["up", "left", "down", "right"] + +directions = [] +lr_flip_directions = [] +for _ in range(4): + direction = rot_li(direction) + lr_flip_direction = rot_li(lr_flip_direction) + directions.append(direction.copy()) + lr_flip_directions.append(lr_flip_direction.copy()) + +full_directions = directions + lr_flip_directions + + +def action_policy(forward_func, obs): + board = obs.reshape(4, 4) + lr_flip_board = jnp.fliplr(board) + + boards = rot_boards(board) + lr_flip_boards = rot_boards(lr_flip_board) + # stack + full_boards = jnp.concatenate([boards, lr_flip_boards], axis=0) + scores = jax.vmap(forward_func)(full_boards.reshape(8, -1)) + total_score = {"up": 0, "right": 0, "down": 0, "left": 0} + for i in range(8): + dire = full_directions[i] + for j in range(4): + total_score[dire[j]] += scores[i, j] + + return jnp.array( + [ + total_score["up"], + total_score["right"], + total_score["down"], + total_score["left"], + ] + ) + + +if __name__ == "__main__": + pipeline = Pipeline( + algorithm=NEAT( + species=DefaultSpecies( + genome=DefaultGenome( + num_inputs=16, + num_outputs=4, + max_nodes=100, + max_conns=1000, + node_gene=NodeGeneWithoutResponse( + activation_default=Act.sigmoid, + activation_options=( + Act.sigmoid, + Act.relu, + Act.tanh, + Act.identity, + ), + aggregation_default=Agg.sum, + aggregation_options=(Agg.sum,), + activation_replace_rate=0.02, + aggregation_replace_rate=0.02, + bias_mutate_rate=0.03, + bias_init_std=0.5, + bias_mutate_power=0.2, + bias_replace_rate=0.01, + ), + conn_gene=DefaultConnGene( + weight_mutate_rate=0.015, + weight_replace_rate=0.003, + weight_mutate_power=0.5, + ), + mutation=DefaultMutation(node_add=0.001, conn_add=0.002), + ), + pop_size=1000, + species_size=5, + survival_threshold=0.1, + max_stagnation=7, + genome_elitism=3, + compatibility_threshold=1.2, + ), + ), + problem=Jumanji_2048( + max_step=10000, + repeat_times=10, + guarantee_invalid_action=True, + action_policy=action_policy, + ), + generation_limit=1000, + fitness_target=13000, + save_path="2048.npz", + ) + + # initialize state + state = pipeline.setup() + # print(state) + # run until terminate + state, best = pipeline.auto_run(state) diff --git a/tensorneat/pipeline.py b/tensorneat/pipeline.py index edaead7..a31808b 100644 --- a/tensorneat/pipeline.py +++ b/tensorneat/pipeline.py @@ -19,6 +19,7 @@ class Pipeline: generation_limit: int = 1000, pre_update: bool = False, update_batch_size: int = 10000, + save_path=None, ): assert problem.jitable, "Currently, problem must be jitable" @@ -55,6 +56,7 @@ class Pipeline: assert not problem.record_episode, "record_episode must be False" elif isinstance(problem, FuncFit): assert not problem.return_data, "return_data must be False" + self.save_path = save_path def setup(self, state=State()): print("initializing") @@ -181,6 +183,17 @@ class Pipeline: self.best_fitness = fitnesses[max_idx] self.best_genome = pop[0][max_idx], pop[1][max_idx] + # save best if save path is not None + if self.save_path is not None: + best_genome = jax.device_get(self.best_genome) + with open(self.save_path, "wb") as f: + np.savez( + f, + nodes=best_genome[0], + conns=best_genome[1], + fitness=self.best_fitness, + ) + member_count = jax.device_get(self.algorithm.member_count(state)) species_sizes = [int(i) for i in member_count if i > 0] diff --git a/tensorneat/problem/rl_env/brax_env.py b/tensorneat/problem/rl_env/brax_env.py index 7df8040..f3adb15 100644 --- a/tensorneat/problem/rl_env/brax_env.py +++ b/tensorneat/problem/rl_env/brax_env.py @@ -5,8 +5,10 @@ from .rl_jit import RLEnv class BraxEnv(RLEnv): - def __init__(self, max_step=1000, repeat_times=1, record_episode=False, env_name: str = "ant", backend: str = "generalized"): - super().__init__(max_step, repeat_times, record_episode) + def __init__( + self, env_name: str = "ant", backend: str = "generalized", *args, **kwargs + ): + super().__init__(*args, **kwargs) self.env = envs.create(env_name=env_name, backend=backend) def env_step(self, randkey, env_state, action): diff --git a/tensorneat/problem/rl_env/gymnax_env.py b/tensorneat/problem/rl_env/gymnax_env.py index af75d60..da15122 100644 --- a/tensorneat/problem/rl_env/gymnax_env.py +++ b/tensorneat/problem/rl_env/gymnax_env.py @@ -4,8 +4,8 @@ from .rl_jit import RLEnv class GymNaxEnv(RLEnv): - def __init__(self, env_name, max_step=1000, repeat_times=1, record_episode=False): - super().__init__(max_step, repeat_times, record_episode) + def __init__(self, env_name, *args, **kwargs): + super().__init__(*args, **kwargs) assert env_name in gymnax.registered_envs, f"Env {env_name} not registered" self.env, self.env_params = gymnax.make(env_name) diff --git a/tensorneat/problem/rl_env/jumanji/jumanji_2048.py b/tensorneat/problem/rl_env/jumanji/jumanji_2048.py index e9b7274..8d23fca 100644 --- a/tensorneat/problem/rl_env/jumanji/jumanji_2048.py +++ b/tensorneat/problem/rl_env/jumanji/jumanji_2048.py @@ -7,14 +7,21 @@ from ..rl_jit import RLEnv class Jumanji_2048(RLEnv): def __init__( - self, max_step=1000, repeat_times=1, record_episode=False, guarantee_invalid_action=True + self, guarantee_invalid_action=True, *args, **kwargs ): - super().__init__(max_step, repeat_times, record_episode) + super().__init__(*args, **kwargs) self.guarantee_invalid_action = guarantee_invalid_action self.env = jumanji.make("Game2048-v1") def env_step(self, randkey, env_state, action): action_mask = env_state["action_mask"] + + ################################################################### + + action = jnp.concatenate([action, jnp.full((4 - action.shape[0], ), -99999)]) + action = (action - 1) / 15 + + ################################################################### if self.guarantee_invalid_action: score_with_mask = jnp.where(action_mask, action, -jnp.inf) action = jnp.argmax(score_with_mask) diff --git a/tensorneat/problem/rl_env/rl_jit.py b/tensorneat/problem/rl_env/rl_jit.py index 285a9a6..c5d54c8 100644 --- a/tensorneat/problem/rl_env/rl_jit.py +++ b/tensorneat/problem/rl_env/rl_jit.py @@ -11,11 +11,18 @@ from .. import BaseProblem class RLEnv(BaseProblem): jitable = True - def __init__(self, max_step=1000, repeat_times=1, record_episode=False): + def __init__( + self, + max_step=1000, + repeat_times=1, + record_episode=False, + action_policy: Callable = None, + ): super().__init__() self.max_step = max_step self.record_episode = record_episode self.repeat_times = repeat_times + self.action_policy = action_policy def evaluate(self, state: State, randkey, act_func: Callable, params): keys = jax.random.split(randkey, self.repeat_times) @@ -63,7 +70,11 @@ class RLEnv(BaseProblem): def body_func(carry): obs, env_state, rng, done, tr, count, epis = carry # tr -> total reward - action = act_func(state, params, obs) + if self.action_policy is not None: + forward_func = lambda obs: act_func(state, params, obs) + action = self.action_policy(forward_func, obs) + else: + action = act_func(state, params, obs) next_obs, next_env_state, reward, done, _ = self.step( rng, env_state, action ) diff --git a/tensorneat/test/test_efficient_b_spline.ipynb b/tensorneat/test/test_efficient_b_spline.ipynb new file mode 100644 index 0000000..5c62a9c --- /dev/null +++ b/tensorneat/test/test_efficient_b_spline.ipynb @@ -0,0 +1,283 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2024-06-03T03:34:30.787475200Z", + "start_time": "2024-06-03T03:34:28.159120700Z" + } + }, + "outputs": [], + "source": [ + "from algorithm.neat.gene.conn.bspline import BSplineConn\n", + "from algorithm.neat.gene.conn.cache_bspline import CacheBSplineConn\n", + "import jax, jax.numpy as jnp" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "outputs": [], + "source": [ + "normal_gene = BSplineConn(grid_cnt=6, spline_order=3, grid_init_range=[-1, 1])\n", + "cache_gene = CacheBSplineConn(grid_cnt=6, spline_order=3, grid_range=[-1, 1], cache_num=1000000)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-03T03:41:35.807924900Z", + "start_time": "2024-06-03T03:41:35.793415500Z" + } + }, + "id": "a09d6ccf956606b3" + }, + { + "cell_type": "code", + "execution_count": 15, + "outputs": [ + { + "data": { + "text/plain": "State ({'kan_initial_grids': Array([-1. , -0.6 , -0.20000002, 0.20000005, 0.6 ,\n 1. ], dtype=float32), 'bspline_cache': Array([[0.16666666, 0.6666666 , 0.16666667, ..., 0. , 0. ,\n 0. ],\n [0.16666412, 0.6666665 , 0.1666692 , ..., 0. , 0. ,\n 0. ],\n [0.16666159, 0.66666657, 0.16667175, ..., 0. , 0. ,\n 0. ],\n ...,\n [0. , 0. , 0. , ..., 0.1666717 , 0.6666666 ,\n 0.16666172],\n [0. , 0. , 0. , ..., 0.16666915, 0.6666666 ,\n 0.16666426],\n [0. , 0. , 0. , ..., 0.16666675, 0.6666665 ,\n 0.16666663]], dtype=float32)})" + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "state = normal_gene.setup()\n", + "state = cache_gene.setup(state)\n", + "state" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-03T03:41:37.462678200Z", + "start_time": "2024-06-03T03:41:36.459771900Z" + } + }, + "id": "57fbeab2e4d4c511" + }, + { + "cell_type": "code", + "execution_count": 16, + "outputs": [ + { + "data": { + "text/plain": "Array([ 0.08086783, -0.38624713, -0.37565565, 1.6689739 , -1.2758198 ,\n 2.1192005 , -0.85821223, 1.1305932 ], dtype=float32)" + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "key = jax.random.PRNGKey(0)\n", + "normal_attrs = normal_gene.new_random_attrs(state, key)\n", + "normal_attrs\n", + "weights = normal_attrs[normal_gene.grid_cnt:]\n", + "weights" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-03T03:41:38.867812Z", + "start_time": "2024-06-03T03:41:38.789154200Z" + } + }, + "id": "9d9cacf5af5f38c3" + }, + { + "cell_type": "code", + "execution_count": 27, + "outputs": [ + { + "data": { + "text/plain": "(Array(-0.0304966, dtype=float32), Array(-0.03049916, dtype=float32))" + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t = 0.99999\n", + "normal_res = normal_gene.forward(state, normal_attrs, t)\n", + "cache_res = cache_gene.forward(state, weights, t)\n", + "normal_res, cache_res" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-03T03:43:23.083384100Z", + "start_time": "2024-06-03T03:43:23.002384100Z" + } + }, + "id": "9177b012b7ab25cd" + }, + { + "cell_type": "code", + "execution_count": 48, + "outputs": [], + "source": [ + "batch = 100000\n", + "t = jnp.linspace(-1, 1, batch)\n", + "batch_normal_forward = jax.jit(jax.vmap(normal_gene.forward, in_axes=(None, None, 0)))\n", + "batch_cache_forward = jax.jit(jax.vmap(cache_gene.forward, in_axes=(None, None, 0)))" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-03T03:50:05.806318700Z", + "start_time": "2024-06-03T03:50:05.785312900Z" + } + }, + "id": "878f5eda35df17f7" + }, + { + "cell_type": "code", + "execution_count": 54, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.34 ms ± 99.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%timeit batch_normal_forward(state, normal_attrs, t).block_until_ready()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-03T03:52:23.070679300Z", + "start_time": "2024-06-03T03:52:21.768451Z" + } + }, + "id": "2f649d81e1945757" + }, + { + "cell_type": "code", + "execution_count": 56, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "134 µs ± 28.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%timeit batch_cache_forward(state, weights, t).block_until_ready()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-03T03:52:43.066619300Z", + "start_time": "2024-06-03T03:52:42.599706100Z" + } + }, + "id": "cac106d6792ea53b" + }, + { + "cell_type": "code", + "execution_count": 57, + "outputs": [], + "source": [ + "normal_res = batch_normal_forward(state, normal_attrs, t).block_until_ready()\n", + "cache_res = batch_cache_forward(state, weights, t).block_until_ready()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-03T03:53:15.153532800Z", + "start_time": "2024-06-03T03:53:15.145531800Z" + } + }, + "id": "27bc7e5abf9b65d4" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "1b25ee5a2fe153c4" + }, + { + "cell_type": "code", + "execution_count": 58, + "outputs": [ + { + "data": { + "text/plain": "[]" + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "text/plain": "
", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAi8AAAGdCAYAAADaPpOnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAA9hAAAPYQGoP6dpAABr50lEQVR4nO3deVxU9f7H8dfMsCubsiuCCKi4gCvikprkklm22qqZ1W1fvHXL9lvdrF+2X8uyLLs306w0KzPLpVzIBUVcAEVBcAFFlF22Ob8/TnKjUAGZ+c4Mn+fjMY97G87MvMcReHs+33OOQdM0DSGEEEIIO2FUHUAIIYQQoimkvAghhBDCrkh5EUIIIYRdkfIihBBCCLsi5UUIIYQQdkXKixBCCCHsipQXIYQQQtgVKS9CCCGEsCtOqgO0NLPZzJEjR/D09MRgMKiOI4QQQohG0DSNkpISQkJCMBrPvW/F4crLkSNHCA0NVR1DCCGEEM2Qm5tLx44dz7mNw5UXT09PQH/zXl5eitMIIYQQojGKi4sJDQ2t+z1+Lg5XXs6Miry8vKS8CCGEEHamMUs+ZMGuEEIIIeyKlBchhBBC2BUpL0IIIYSwK1JehBBCCGFXpLwIIYQQwq5IeRFCCCGEXZHyIoQQQgi7IuVFCCGEEHZFyosQQggh7IqUFyGEEELYFSkvQgghhLArUl6EEEIIYVcc7sKMQgghWifNrLHzgyROfrsejh9H8/HFa/Qget93EU5u8uvOkcinKYQQwu5tfX45vv96hN5VafW/8BMcfjyU/bc8x9APp2I0nf+KxcL2SXkRQghht2ora1iX8A9GbH8DgBLasqvDWCqDwnAuOEK3nJV0qM2lwyfTSF62kM6bFtIusp3i1OJCyZoXIYQQdqnmdA2butxYV1x+6fMQWs4hEg4tZsTWWQzJXkCbwkP8ctmrlONOv8KfOBWTwJGkg4qTiwsl5UUIIYTd0cwav/W+k8GHF1OFMxunL2b4tjfwCvWut52bjxvDv32EQ19t5rCpExHVe6kankh+ar6i5KIlSHkRQghhd365bjZD931MDSa2/WMRg1+75pzbR1/VE0PSRnKdwgmvzqRw8Hgqi05bKa1oaVJehBBC2JU987cw+KvpAGyYOItBr1zZqMeFDOiAecVPnDC0p3tZMr/FP2DJmMKCpLwIIYSwG5UlVTj/bSouVJMUcjUXffVgkx4fNiqSrBcXYMbA8Iy5JD240EJJhSVJeRFCCGE3kia+QlTlbgoM/nT75X0MxqYf+tz/idH8MuxpAKLfuY9ju461dExhYVJehBBC2IWDq/eTsPpFAPbe+xa+ke2b/VxDfniKDLdY2msn2D/uvpaKKKxEyosQQgi7cOjWJ3GliuR2l5Dw1vUX9FwubZzhY33Bb8KhxaS8tqqFUgprkPIihBDC5u2Zv4UhuYswY8Dr/VebNS76s67X92F9r3sAaPPkQ9Scrrng5xTWYdHy8uuvvzJhwgRCQkIwGAwsXbr0vI9Zu3Ytffv2xdXVlcjISD755BNLRhRCCGEHKh9+HICNETcTdU1siz1v7JLnOGnwJapyFxtv+7DFnldYlkXLS1lZGbGxscyePbtR22dlZTF+/HhGjhxJSkoKDz30ELfffjs//vijJWMKIYSwYSnvbqTPydVU4UzY/Bda9Ll9u7Qj9ap/AhC96HlOn6xo0ecXlmHQNE2zygsZDCxZsoSJEyeedZvHHnuM77//nl27dtXdd/3113Pq1ClWrFjRqNcpLi7G29uboqIivLy8LjS2EEIIxTYHTmDgse9Y33UaQ9Nbfu9IZUkVx32j6Fibwy8T32D4koda/DXE+TXl97dNrXlJSkoiMTGx3n1jxowhKSnprI+prKykuLi43k0IIYRj2Pf1TgYe+w4zBjq+/Q+LvIarpwsHbngKgO7LXqbseLlFXke0HJsqL3l5eQQGBta7LzAwkOLiYioqGt6VN3PmTLy9vetuoaGh1ogqhBDCCo4/8goAv3W8hvDR0RZ7nYT3byXXKZwAcz7Jd8yx2OuIlmFT5aU5ZsyYQVFRUd0tNzdXdSQhhBAtIG/7UQZkLQLA+1+PWfS1nD2cOTDpCQC6fPcmNRXVFn09cWFsqrwEBQWRn1//Sp/5+fl4eXnh7u7e4GNcXV3x8vKqdxNCCGH/0v8+F2dqSPUcTI/J/Sz+egPfuYXjhgA61Oay+R9fWvz1RPPZVHlJSEhg1ar6Jwr66aefSEhIUJRICCGECjUV1XT75X0ASm651yqv6e7rxq4R+tl2fT+ahWa2yvEsohksWl5KS0tJSUkhJSUF0A+FTklJIScnB9BHPpMnT67b/q677uLAgQP84x//ID09nXfffZcvvviChx9+2JIxhRBC2Jhtz31DkPkIxwwB9J95tdVet/d7d1OOO90rtrH9zV+s9rqiaSxaXrZu3UqfPn3o06cPANOnT6dPnz4888wzABw9erSuyAB07tyZ77//np9++onY2Fhee+01PvzwQ8aMGWPJmEIIIWyM84f6otmdg+7A1cvVaq/bvqsfW2OmAFD12ttWe13RNFY7z4u1yHlehBDCvuWsO0jHizpjRCPnlyw6XRRu1dfft3Q3UVf2pAYTBck5BPUNserrt1Z2e54XIYQQIvPZ/2BEI8V3pNWLC0DUxB7s8BqKE7WkPTrP6q8vzk/KixBCCJuhmTUi1s8HoOy6W5XlKLvpbwBE//IBNZW1ynKIhkl5EUIIYTN2fbCR8OpMSmhL3PPWW6j7Z/1mXkOhoR0danPZ+mLjLk8jrEfKixBCCJtR9NYnAKR0uYY2AW2U5XD1dmNX/6kAGD+QM+7aGikvQgghbEJl0Wl6p38BgMc9t6oNA3T4550A9Du2nOM78xSnEX8k5UUIIYRN2DbzR7wo5oipI3H3D1Mdhy7jotnRdjAmzOx5aoHqOOIPpLwIIYSwCbWf63tdMuOuxeRsG7+eTl12CwBBKz9VnET8kW387RBCCNGqlZ+oIDZnGQD+916nOM3/9H7xOipxoevpHWQsTlUdR/xOyosQQgjlUv9vBZ6UcsjUiW5T4lXHqePbpR3bQy4D4Mgr/1GcRpwh5UUIIYRy5oW/j4z6XIfBaFCcpj7jrfo1+Lpv/0zO+WIjpLwIIYRQ6nRhOb1zvgWg/d22MzI6o88T4zhhaE+Q+Sipb6xSHUcg5UUIIYRiO19ZTlvKyDGF02NKf9Vx/sK5jQs7u+ulqvzjRYrTCJDyIoQQQrHKxd8AsD/2aowm2xoZneE1TS8vPfYtobqsSnEaIeVFCCGEMlXlNfTI/h6A9rddoTjN2fW+dxjHjIH4aifZ8bqMjlST8iKEEEKZnXM24Kud5IShPT3vSFAd56ycXE2kxVwDQMWnXyhOI6S8CCGEUKbkv/rIKK3LZRhdnBSnOTefO/XRUe/MJVSWyOhIJSkvQgghlNDMGp136iemc77qcsVpzq/XXUPIMwbjTREpr/6kOk6rJuVFCCGEEge+TyOsZj+VuNDj4dGq45yX0dnE3l5XA1D12WLFaVo3KS9CCCGUOPSuvtdlh98o2ga1VZymcbxv/310lLVUjjpSSMqLEEIIJdpv0MtLeaLtHmX0Zz3vHMwxQwDeWhG7Zv+iOk6rJeVFCCGE1RWmHyOm5DcAIh+6THGaxjO5mEiPmgD8b7GxsD4pL0IIIawu7a2VGNFIc+tDx/gOquM0idskfU9Rlz3L0Mya4jStk5QXIYQQVmf+YQUA+X3HKk7SdL0eSqQMDzrU5pL2+XbVcVolKS9CCCGsqrbaTLeclQD43ThGcZqmc2/nzq4QPXfeHBkdqSDlRQghhFWlLdiOv3acEtrSbartnlX3XGov00dHIVulvKhg26czFEI0iabBkQ1ZHFm7l5rCYtw7tKPj+Fj8uvmpjiZEnWP/+RGAtOBRDPRwUZymeWIeHU/tB0a6nd5B1ppsOo8MVx2pVZHyIoQDOJl5gu13vU+XXz4irOYA9ZY/PgK72sRz6uZ7GfTWjTi5mlTFFAKAdlv08lI50v5GRmf4RPqR6jOU3qd+JfvtZXQe+YDqSK2KjI2EsGNarZm1178H0VFcvOpJwmoOUI0Te916scNrKFnOUQD0LNvE0Pcns893AGkLZIGhUKf4UDE9ijcCEH6n/ZYXgJPD9dGR9y8yOrI2KS9C2KnC/SfZGjyBEYvuwVc7SYZbb9b/7VOq8k4SXZFKbNE6Olft5diOo6y95F+cMvjQvWI74TcNZv2dn6qOL1qptNmrcaaGLOcoQodHqI5zQcLu18tL75O/UHzwpOI0rYuUFyHs0OGkHIq7D2TA8eVU4MYvV79F5Klkhs65hTaB9U+zHtA7iBErn6B6ZwZbAsbjzmmGzp3CL9e8oyi9aM2qlumHSOd0s++9LgDho7qwzyUGJ2pJe1su1GhNUl6EsDMH1xzAPOwiwqszyTWFkbtwI8O/fACT67mXsPn3CKDf4WX8MuARAIZ/9QC/Tv7QGpGF0Gka4Xv19S5uE+3v/C4Nye05DoCaZcsVJ2ldpLwIYUeO78rHOHoUobUHyXKJxrRxPdGT+jT68UYnIxf99n+sGfQ4AAn/uZuUN9daKK0Q9eWs2kdoTbZ+Fel7R6iO0yK8Jl0KQNSBFZhrzIrTtB5SXoSwE+UnKsiLv4LQmmyynSLx2LSWkIEdm/w8BqOBERteYkPYDThTQ8fp15K/85gFEgtRX/aH+mhll89Q2ga2UZymZfS6eygltCXAnE/GIlkMby1SXoSwA5pZY8eA2+lVvomTBl8My78nMC642c9nMBrou+0j9rr1xk8rIGP0fWhyiRZhYU7r1gBQMmCU4iQtx9XThd1BiQDkzftBcZrWQ8qLEHbg16kfk5C1gBpMZM36mrBLoi/4Od3bueP030+owcRFeYtZeefiFkgqRMNqq810O6qXl4BJIxWnaVlVl+ijo/abZd2LtUh5EcLGHfg+jf6f3g/A+rEv0nf6iBZ77oir+5A8+gkA+n50L8d2H2+x5xbijzIWp9JOK6SEtkTf2F91nBYVdb++aLdn6W8UZJxQnKZ1kPIihA2rrayh6rqbaUM529onctG3/2jx1+i/9Cky3Xvirx0nc8JDLf78QgDkL9T3umT4D8PJ3VlxmpYVPKAje916YURjz5srVcdpFSxeXmbPnk14eDhubm7Ex8ezefPmc27/5ptv0rVrV9zd3QkNDeXhhx/m9OnTlo4phE1ad9UbdCvfxkl8CfrxU4xOLf8ta3J3ofK9jzFjYHDWAnbMPff3qBDN4bFpNQBl8RcrTmIZR+P0vS/GFTI6sgaLlpdFixYxffp0nn32WbZt20ZsbCxjxozh2LGGj2xYsGABjz/+OM8++yxpaWl89NFHLFq0iCeeeMKSMYWwSdmr9jNw+bMA7Jr6GiH9mr9A93x6TOlPUtRkALRHHkEzy+pd0XJqTtfQ7divAATe4Jjlxft6fd1Lt4MrMFfXKk7j+CxaXl5//XXuuOMOpk6dSkxMDHPmzMHDw4N58+Y1uP3GjRsZMmQIN954I+Hh4YwePZobbrjhvHtrhHA4mkbh9XfjQQXb213M0A9vtfhLRnz2IhW4EVe8juRnl1n89UTrkb5gG94Uc8rgQ/S1sarjWETM7YMpwgs/rYB9X8gh05ZmsfJSVVVFcnIyiYmJ/3sxo5HExESSkpIafMzgwYNJTk6uKysHDhxg+fLlXHrppWd9ncrKSoqLi+vdhLB3W575lr4FP1GJCz6fz8FgNFj8NYMHdGRj/MMAeM16Rk64JVrM8cX6epf0oBEYnR3zquYubZxJC9SPosr7r1wqwNIsVl4KCgqora0lMDCw3v2BgYHk5eU1+Jgbb7yR559/nqFDh+Ls7EyXLl0YMWLEOcdGM2fOxNvbu+4WGhraou9DCGurLKnC7xX9FP4b4x+m8+goq712n/8+QjGeRJ9O5benvrPa6wrH5rlFX+9SmeBYh0j/2elhlwDg/Zss2rU0mzraaO3atbz00ku8++67bNu2ja+//prvv/+eF1544ayPmTFjBkVFRXW33NxcKyYWouVtuOU9Olfv47gxgP5fWXe9V7vIdqQMvhcAn3dekLUv4oJVlVbR/cR6AIJvcsz1Lmd0un00ADGnNlB+vExxGsdmsfLi5+eHyWQiPz+/3v35+fkEBQU1+Jinn36aW265hdtvv51evXpx5ZVX8tJLLzFz5kzM5oZ3Ybu6uuLl5VXvJoS9Kj54kj7L/gnA3ptfwLOD9f8+d//gYcpxJ6Z8K6mv/mj11xeOJf3TzbShnOMGfyKv6KE6jkV1viSSXFMYLlSzZ86vquM4NIuVFxcXF/r168eqVavq7jObzaxatYqEhIQGH1NeXo7RWD+SyaTPRzU5d7loBbbf8jq+2kn2uvYk/oNpSjL49wjgt95/A8D8f7OUZBCOo/Arfb3Lvg4jMZosv3ZLJYPRQFakPjoqXSrrXizJomOj6dOnM3fuXObPn09aWhp33303ZWVlTJ06FYDJkyczY8aMuu0nTJjAe++9x8KFC8nKyuKnn37i6aefZsKECXUlRghHdWLvCfquexOAgvufx8lV3d/5yH8/RC1G+hSuIv3LXcpyCPvnvU1f71I11LFHRmc4X6qPjjrslnUvluRkySefNGkSx48f55lnniEvL4+4uDhWrFhRt4g3Jyen3p6Wp556CoPBwFNPPcXhw4fx9/dnwoQJ/Otf/7JkTCFsQsotsxhFKenufRj08kSlWToNC2NTxyuJP/QV+U++TbdrPlCaR9in06dO0/2UfnRp6GTHXqx7Rrd7Lsb8hoGoyt0cTT5CcL8Q1ZEckkFzsHlMcXEx3t7eFBUVyfoXYTcK0o7jFtOZtpSR/Owy+j03QXUkdr23jp73XEQFblRk5NIu2k91JGFnUt7+lbgHh5NnDCaw+rBVDvm3BXvaDiCmbCvrbp/PsLmTVcexG035/W1TRxsJ0Vrtuf012lLGbo/+9H3mMtVxAOjxt6GkuffBndPsemiu6jjCDp1ctg6AAx2GtZriAnA8Vl/3ws+y7sVSpLwIoVhxbhFxG98DoOiBZ2zmh7zBaODY9Q8CEPHT+3LSOtFknil6eamOH6Y4iXX5TtLXvXQ9+BPmWocabtgMKS9CKLbtrg/wophMlxgGvTBedZx6+v/fdZzCh441B0l+5WfVcYQdqamsJfrERgCCrhmqOI11db8tgTI8CNDyyfhyp+o4DknKixAKnS6uInrFWwDk3fyIRa4afSHa+LmzM/ZmAKreldGRaLy9X6biRQlFeBF5ZS/VcazKua0raf7DATj2mYyOLMG2flIK0cokPfg5IebD5BuDGfjmjarjNCj4mTsAGHDkG47vbviK8EL82bGv9JHRXv8hmFxa36kuSgbro6O2SVJeLEHKixCKaGaNjp+/CsDecQ/i4umqOFHDIq/qza42A/Wzhj7+qeo4wk64btbLS1mf1rXe5Yzgm0YB0K1gHdVlVYrTOB4pL0IosuOVFURV7qaEtvSe/TfVcc6p4IrbAei08kO53pE4L82sEXFUv55Ru8tb13qXM6Kv7MFxgz9tKCfjv1tUx3E4Ul6EUKT2jbcB2BJ7B95hPmrDnEfszOspw4POVRmk/3er6jjCxmWv2k+gOY9KXOh68wDVcZQwOhnZFzICgBOLV6sN44CkvAihwMFVmfQ7vgIzBsJfvVd1nPPy7eRJSqcrAMh77b+K0whbd+hzfWSU7jUQV283xWnUqR6mXxLBK3mN4iSOR8qLEAoceGwOAMl+Y4m4pIviNI3jdrt+1FHPnZ9TWVqtOI2waev08nKyR+tc73LGmUsidD+1kYqTpxWncSxSXoSwspL8cuKS5wFguPcexWkaL+7RSygw+uOvHSdllpzzRZxdx4P6epe2Y1vnepczOo+J5qgxBDcqSZuXpDqOQ5HyIoSVJf9jEb6c5JBTOH2fHKc6TqOZ3JzZ0/t6AGo+kdGRaFj+jjw6V+/DjIGoKYNVx1HKYDRwoJO+96Voiax7aUlSXoSwJk0j4MvZAGSNuQujs32d/6Ld/froqM/BJZTnlyhOI2zR/k/1vS773Hrb/EJ0a9BG6ute2qdKeWlJUl6EsKL0/2whpjyZ07gS89o01XGarMetA8hyisKDCna+sFR1HGGDqlbp613yolr3epczOt/2+7qXks2UHC1VnMZxSHkRwopOzHwfgC1h19K+q5/iNE1nMBo4EH8DAMavFitOI2xR4F69vLhc3LrXu5zRYWhncp3CcaaGtLnrVcdxGFJehLCS0rxSYtMXAeDx0J2K0zRfx4evBaB33o8U5RYrTiNsSVFuMdEVOwDoPFn2vJxxMELf+1L6rRwy3VKkvAhhJdufWExbysh2jqTP/fb7r9LoK3twwKUrrlSx48VvVccRNmTfp0mYMHPQKYKgviGq49gMU6K+7iVwt6x7aSlSXoSwkrZffgzAwYtvw2gyKE7TfAajgcMJ+t4Xp2++VJxG2JKSHzcCkNtpiOIktqXL7fqel24V2zix/5TaMA5CyosQVrD3u730KVlHLUZiXp6sOs4F6/DgNQD0zf+BokNy1JHQee3Wz2VSO7B1HyL9ZwF9OpDlEo0JM3s//FV1HIcg5UUIK8h98RMAtgeOxT+ug9owLSBiYm+ynaNwo5Jdr3yvOo6wAbVVtUQV/gZA4MQExWlsz6FIfe9L5QpZ99ISpLwIYWE1lbXEbJkPgHnKbYrTtBCDgez++t4X0xIZHQnY/+0evCihhLZETuypOo7NMV2ir3sJTpd1Ly1ByosQFrbj1ZUEm49QYPAj7ukJquO0GP97fj/q6PByTp8oU5xGqJa3RB8Z7fMdiJOrfZ180Roibx8BQNfTqZzad1xtGAcg5UUIC6v5QL+O0c5eN+HS1kVxmpYTc2Mc2aYueFDBrlkrVMcRihk26eWlOEZGRg0J6BnAPpcYAPbOk/O9XCgpL0JYUHHOKeJylwHg9+hUxWlalsFo4ECvKwCoXLxMcRqhWoccvby0SZTycjaHuwwH4PSPvyhOYv+kvAhhQanPfoUrVex16UnPm2JVx2lxvlMuB6Dbge+pOV2jOI1QpXDfCSKqMgDoctMgxWlsl/OoiwAISJcjji6UlBchLMhj6WcAHB5xEwb7PbXLWfW6awiFhna0106QOmej6jhCkf2f6UcZZTlH0y6qveI0titiqr7nJboihaKDp9SGsXNSXoSwkEObDhN3ai0A0c/eoDaMhTi5OZEeMR6Ak5/K6Ki1KvtZHxkd6iTndzmX4L7BZDlHYURj78cbVMexa1JehLCQ9OcWYkQj1XsoHQaHqY5jMU5X66OjLju/QTNritMIFbz26OVFGyTrXc4nt7M+OipbLuteLoSUFyEsJGStPjIqufwmxUksK+ahMVTiQnhNJgeWp6uOI6ystqqWqJObAQi6UsrL+RhH6KMjvz2y7uVCSHkRwgL2LUsj5vR2qnGi53PXqo5jUW2DPUn100/AdehdGR21NplLd+FJKcV40mVCjOo4Ni98sr7npVvZVkqOlipOY7+kvAhhAYf/T9/rsi1wHN4Rjr+AsexifXTUboOUl9Ymf+nvJ6drF4/JRU5Odz4dh4RxyBSGE7VkfJKkOo7dkvIiRAvTzBpdNi0AoObaGxWnsY6Ih/Ty0qM4iVN7jylOI6zJtEk/ykxOTtd4B8P0vS8l38m6l+aS8iJEC9v76W+E1mRRQltin75cdRyr6JTQgTS3OIxopL/1o+o4woo65Op7D9peIuWlsbSL9HUvvrtk3UtzSXkRooUd+/ciALZ3uoK2AR6K01jP0T6XAmD+/gfFSYS1nEg/Tnh1JgBRt8jJ6Rqr0836npfuxZsoK6hQnMY+SXkRogVptWYiU/SrLDvdcJ3iNNbld/M4ALrl/EhNZa3iNMIa9i/QT06336UbPp19FaexH6EjI8k3BuNKFWnzN6uOY5ekvAjRgnZ99BvBtYcpxpM+j41WHceqYm4bxCmDD+20QnZ/LD+QW4PyVfrI6EiYjIyawmA0kBWq730pWibrXprD4uVl9uzZhIeH4+bmRnx8PJs3n/uH2qlTp7j33nsJDg7G1dWV6Oholi9fbumYQrSIE+8tBmBn+OW4+7opTmNdTm5OpIfqha3gPzI6ag28605OJ2fWbaraofq6F+9UWffSHBYtL4sWLWL69Ok8++yzbNu2jdjYWMaMGcOxYw0fjVBVVcUll1xCdnY2X375JRkZGcydO5cOHTpYMqYQLaK22kx0qj4ycrnJsc/tcjbaWH10FLRdyoujqzldQ/Qp/R+jwVfJnpem6nijvucl5tRGKoqqFKexPwZN0yx2Pu/4+HgGDBjAv//9bwDMZjOhoaHcf//9PP7443/Zfs6cObz66qukp6fj7OzcrNcsLi7G29uboqIivLy8Lii/EE2x7d3f6HtvAsV44lZ0DBev1rXnBaBgVx5+vYIBOJaaR0CvQMWJhKVkLNxO1xv6cgpvvKoLMTrJKoSm0Mwahc4BtDcXsO3fG+l7rxTApvz+ttjftqqqKpKTk0lMTPzfixmNJCYmkpTU8Il5li1bRkJCAvfeey+BgYH07NmTl156idrasy/+q6yspLi4uN5NCBVOzdVHRrsjJrTK4gLg1zOINI++AOx9Rw6ZdmT5yzYBcKDdACkuzWAwGtgfMgyAk0tl3UtTWexvXEFBAbW1tQQG1v+XV2BgIHl5eQ0+5sCBA3z55ZfU1tayfPlynn76aV577TVefPHFs77OzJkz8fb2rruFhoa26PsQojHMtRrRO/WRkfMNrXNkdEZ+H310ZPpR1qo5MsMWvbwUd49XnMR+VQ3S17203S7rXprKpuqy2WwmICCADz74gH79+jFp0iSefPJJ5syZc9bHzJgxg6Kiorpbbm6uFRMLodvzyWY61uZQQlt6/n2M6jhK+d74+yHTuSsxV9UoTiMsJThXX+/iMVLKS3MFXff7+V5OrKe6Qr5XmsJi5cXPzw+TyUR+fn69+/Pz8wkKCmrwMcHBwURHR2My/e/6GN27dycvL4+qqoYXNLm6uuLl5VXvJoS1nZijj4xSO03AzdddcRq1YqbGcxJffLWTZPxHDpl2RMWHiomoTAOg83UDFKexXxETe1Nk8MaLEjK+2KE6jl2xWHlxcXGhX79+rFq1qu4+s9nMqlWrSEhoeGHSkCFDyMzMxGw21923d+9egoODcXFxsVRUIS6IZtbo8vuJ6QzXte6REYCzuxO7O1wCwPHPVipOIyxh/6KtGNE4ZOqEf6+G/zEqzs/obGJvwFAAjn8p616awqJjo+nTpzN37lzmz59PWload999N2VlZUydOhWAyZMnM2PGjLrt7777bgoLC3nwwQfZu3cv33//PS+99BL33nuvJWMKcUEyP99Cx5qDlNCW3v8YqzqOTagZoZcX3+SfFCcRllD0k75HLTdYRkYXqry/vu7Ffause2kKJ0s++aRJkzh+/DjPPPMMeXl5xMXFsWLFirpFvDk5ORiN/+tPoaGh/Pjjjzz88MP07t2bDh068OCDD/LYY49ZMqYQFyTv3a+JAlI6jGeYf+seGZ3R5a5L4DP92i3FuUV4hXqrjiRakFuqXl6q4gYqTmL//K66CL6H6Px1mGvMcuRWI1n0PC8qyHlehLVluXajc1UGv96zkItmT1Idx2ZkuXSlc/VeNj2+hPiZE1XHES3oqKkDweYj7Hj7F2Lvv0h1HLtWXV5NZRtf2lLGvq9Sibqql+pIytjEeV6EaA2yfkinc1UGVTjT6x/jVMexKbnd9NHR6W9ldORIjm49TLD5CLUYiZzUT3Ucu+fs4UxGO/3yCnmLZN1LY0l5EeICHHxrKQA72o/CN0z29P2RxxV6eemUsRLH2r/buh1crI+MMt160iagjeI0jqEoTl/34pwk5aWxpLwIcQHar18KQMXYK9UGsUHd7xlJDSY612SStSZbdRzRQk7/op+cLj9cFuu2FJ/L9dFbxOF1aGZp+o0h5UWIZjq85Qi9yjZhxkD3xy5XHcfmtAn2Is17EAA5H8noyFF4Z+h7XgwDZbFuS+l2ywBO40qAOZ9Da/apjmMXpLwI0UwZry4DYI/nIDnXxVmc7KePjlx/kfO9OILaqloiT20BIOhyKS8txaOdG2me+p6s3M/kkOnGkPIiRDO1+XkpACeHT1Saw5a1u340AN2OrKK26uwXWBX2IeuHdDwppZQ2REzooTqOQyns+ftRW+ukvDSGlBchmqEwq4g+J1cDEDF9otowNqzbLQM4hTe+2kn2fp6sOo64QEe/+X2xrnc/TC6m82wtmqLtpXp56ZQt5aUxpLwI0Qy7X12OC9Xsd+1Oh5HRquPYLCc3J9KDRgJw/HNZ92LvtE36Yt2iaBkZtbSutyZQg4mONQc5tuWg6jg2T8qLEM1g+nYpAIf6TVSawx5UDNNHR96bpbzYO/8sfc+LyzA50qil+XRsS5qHft6crE/XKU5j+6S8CNFElcWV9Dq0HAD/OyaqDWMHOt6aCEC3k0lUnixXnEY0V0VhBVEVqQB0ukb2vFhCfld9dFSzRkZH5yPlRYgm2v3Oajwp5agxhG4391cdx+ZFjo3ksLEjrlSRPm+j6jiimTK/2IYTteQbgwiJD1UdxyG5XaKXl5BMKS/nI+VFiCYqX7AUgIxuV8hF1BrBYDRwIOxiAE59vVpxGtFcJ1boI6PsgIEYjAbFaRxT1NShmDHQuTKDor35quPYNPnJK0QTaLVmotO/AcBt0kS1YezJxXp5aZ8q5cVeuWzXy0tFTxkZWUpgN18yXPQLM+7/RNa9nIuUFyGaYO/CZALM+RTjSeyDI1THsRsR0/QjjrqXbqE4t0hxGtEcHY/oRxp5JspiXUs60kUfHVWslNHRuUh5EaIJ8j78HoBdwaNx93ZRnMZ+dEjoRLZzJCbMpM2Vf1HamxPpx+lUkwVA5PWyzsuSjCP18hKQJuXlXKS8CNEEfpv1o4xqx1yqOIn9yYnUR0cV38noyN4cWKRfEmC/Sze8w3zUhnFwEZOHAdClPJWKIycVp7FdUl6EaKSjKfn0KNd/iHd7eJziNPbHZYxeXoLSpLzYm7LV+sjoaEdZ72JpnQYGsd8UjRGNzPkbVMexWVJehGik9DdX6P/bpi/+vYMVp7E/0XeOAKDb6R3k7y5QG0Y0Sds9+mLd2n5SXizNYIDsMH10VPK9jI7ORsqLEI1kWqmPjE4MlJFRc7TrHsg+t54A7P1grdowotE0s0bECb28+I2XxbrWoA3Vy4vvTikvZyPlRYhGKCuqoffRHwEIniblpbnyYvTRUc1KGR3Zi5w1+2mnFVKJC5FX9VYdp1XoeKNeXiKLk6k5Vao4jW2S8iJEI2z790Z8KKLQ2J7Ok2TXeXN5XKofMt1pv5QXe3F4ib7XZV+bPrh6yhF21hB9SRi5xk44U8P+z35THccmSXkRohFKFukjowNRYzE4mRSnsV9Rtw/HjIEu1Rkc2XJYdRzRCNUb9MW6BZEyMrIWoxEyg/W9L4VLZXTUECkvQpxHbS2E79HLi/vVMjK6EF5hvmR49AUga94axWlEY7TP1Pe8OCXIHkdrqhqklxfP7VJeGiLlRYjzSFmWQ0ztTmoxEn3/GNVx7F5+T33di7ZKRke2rrqsisjS7QB0vFr2vFhT0HW/r3s58RvmikrFaWyPlBchzuPQ3B8AyGw/COeg9orT2L82l+nlpXPWKtA0xWnEuWR+nYoblZw0+BJ2cRfVcVqVmInRHCMANyo5+OUW1XFsjpQXIc7De6M+MiofISOjltB12lCqcaJDTQ5HNmSpjiPO4fj3+shofzu5krS1ObsYSAvQ974c+1JGR38m5UWIcziadZoBRT8DEHbPeMVpHINXSFv2tNVHEAc/ltGRLTNu1ctLSYyMjFQo76eXF7fNUl7+TMqLEOew451faUM5x52DaTcyVnUch3Gi53D9//zyi9og4pyCc/UjjdqMlMW6KrS/Ui8vXfI3oFXXKE5jW6S8CHEONcv0kVFuz0v183aLFtH2shEAhGX/IutebFRRThFdqtIBCL9OyosKPa/vyUl8aKuVcmR5iuo4NkXKixBnUVUF3Q98D4Dn9TIyakndpw2mGidCanM5vCFbdRzRgDNXks5x6kxAD3/FaVonD08Tu32GAnBk0TrFaWyLlBchzmLbon100TKpwpkuf0tUHceheAa1Ib1tfwCy58voyBYV/ayXl0PBAxQnad2KYvXRkdNGWffyR1JehDiLvI/1kdG+4IswensqTuN4CnqOAEBbu1ZpDtEwt536Yt3KWFmsq5L3BL28hOeuA7NZcRrbIeVFiLPw36yPjKoT5RBpS/Acry/aDcuWPS+2KCxfLy/txsp6F5V63NKXMjzwNZ+gYF2a6jg2Q8qLEA3I3lVK/zL9l2rEfVJeLKHrbUOowURoTTaHNuaojiP+4OjWwwSbj1CDichr+6iO06r5BjiT2nYwADn/ldHRGVJehGjA7ndW40oVh90i8BrQVXUch+QZ4klGm9+vc/SJ7H2xJTlf6ntdMt160iagjeI04kSMPjrSfpXycoaUFyEaYPxBHxnl9ZFDpC1J1r3Ypopf9fJyLEwW69qCNuP08hKa9aucWuB3Ul6E+JPyMo1eufpi3fa3yMjIktpeqq976ZQle15siVe6Xl4YIOtdbEG3yQOpxIWA6iMUpxxQHccmWKW8zJ49m/DwcNzc3IiPj2fz5s2NetzChQsxGAxMnDjRsgGF+IPk+bvoyCEqDO6ETRmhOo5D6zptKLUYCa/ZT+5vh1XHEYC5xkyXk1sBCLhMyostCI5wZ6eb/lkcmC+jI7BCeVm0aBHTp0/n2WefZdu2bcTGxjJmzBiOHTt2zsdlZ2fzyCOPMGzYMEtHFKKeE//R97pkhl6MwcNdcRrH1raDN3vb6AtCD3wse19sQfbKvXhTTDnudLm8h+o44nf5XfXRUfUqKS9ghfLy+uuvc8cddzB16lRiYmKYM2cOHh4ezJs376yPqa2t5aabbuKf//wnERERlo4oRB1Ng5Dt+noX7VIZGVlDQYw+OtLWrFUbRABwZKm+Z3yfVz+c3Z0UpxFnuF6il5fgfVJewMLlpaqqiuTkZBIT/3d2UqPRSGJiIklJSWd93PPPP09AQADTpk0772tUVlZSXFxc7yZEc+3ddJK+lRsBiLxfyos1tPl93UuorHuxCebf9PJyMlJGRrak69TB1GKkY+UBStMPqY6jnEXLS0FBAbW1tQQGBta7PzAwkLy8vAYfs379ej766CPmzp3bqNeYOXMm3t7edbfQ0NALzi1ar72zf8KJWg62icEjJlx1nFYhetowzBjoUrOX3M1HVcdp9dof0MuL81ApL7YkNMaTXS76qQX2fyLXObKpo41KSkq45ZZbmDt3Ln5+fo16zIwZMygqKqq75ebmWjilcGQuP+sjo4J42etiLW1Dfcn0iAXgwCeyS1ylyuJKospSAAi9Ug6TtjVHI/XR0emV8n1i0YGmn58fJpOJ/Pz8evfn5+cTFBT0l+33799PdnY2EyZMqLvP/Pu1HJycnMjIyKBLly71HuPq6oqrq6sF0ovWpviUmT55PwAQdJtcRdqajnUfTnRyCrWr1wKTVMdptfYvSSWGak4Y2hN6UWfVccSfOI+6CPa8TkC6lBeL7nlxcXGhX79+rFq1qu4+s9nMqlWrSEhI+Mv23bp1Y+fOnaSkpNTdLr/8ckaOHElKSoqMhIRFbX0/mQCOU2r0pMN1Q1THaVU8xv2+7uWArHtR6fj3+sjoQPuBGIxyckZb02XKUAA6V+yh/OBxxWnUsvjYaPr06cydO5f58+eTlpbG3XffTVlZGVOnTgVg8uTJzJgxAwA3Nzd69uxZ7+bj44Onpyc9e/bExcXF0nFFK1ayUB8Z7Y8YDc7OitO0LtHT9FMiRFWnkZt87tMoCMsxJevlpTRG1rvYorC+7Ul36glA5vz1itOoZfHyMmnSJGbNmsUzzzxDXFwcKSkprFixom4Rb05ODkePyiI9oZbZDJ126+d3cbpCRkbW1jbcj33uvQDYL+telAk+pJeXNiOlvNgigwFyO+vrXsqWt+7vE4OmOdaFEoqLi/H29qaoqAgvLy/VcYSdSP0pn96j9XVYldlHcQ3765osYVkb+t3PkG3/ZlX3exm159+q47Q6RTlFeIf5AHB89zH8Y/zVBhIN+vmORSR+eD0ZbfvStSRZdZwW1ZTf3zZ1tJEQqmTPWQFApnc/KS6KuI/R1710zJR1Lyoc+EK/JECOU2cpLjYs/BZ9xBpZmsLp/CLFadSR8iIE4LFWHxkVD5VDpFWJmqbvDu9avYvDOwoUp2l9in7eAsChIDlE2pZ1GRbCAVMkJszsm79RdRxlpLyIVq8gr4b+hT8C0PFOKS+qeHYJ4IBbDACZH8tJuKzNLVVf71IVJ+tdbJnBANmd9KJf/F3rXfci5UW0ettnb8SHIgpNfgSMl391qpTXVR8dVf+8Vm2QVigsXy8vPqOlvNg6baheXrxTpbwI0WpVfK2PjA52Gwsmk+I0rZvr7+teOuyTdS/WlJd8mGDzYWoxEnldX9VxxHmE3qSXl+iiLVSdKlecRg0pL6JVq62FLhl6eXG7SkZGqkXeppeXrlWpHNl9UnGa1uPgl/p6l0y3nrQNbKM4jTifqEvCOWzsiAvV7PvvJtVxlJDyIlq1Hd/m0KN2p/4vznvHqI7T6nl3DSLbtStGNPbNk3Uv1lLxqz4yyu8kIyN7YDAa2B+i730pXNo6R0dSXkSrdvhD/VpG+9on4BzYTnEaAXA0Wt/7UrVyrdogrYhXul5etAFSXuxFdYJeXjy3S3kRotXx2qCPjMqGy8jIVriO1stLsKx7sQpzjZkuhfo5XgIvkwXr9iLk+t/XvRQmUVNepTiN9Ul5Ea3WsZzT9D/1MwCd7pZLAtiKLr+ve+lemUJeRus9CZe1ZP+0D2+KKMedLpf3UB1HNFL05d0oMPjhQQV7P3esM+02hpQX0Wql/vtX2lDOMecQ/Ef1Vh1H/M47pgO5Ll0wYSbjo9Z98TlrOPKNPjLa59kXZw+5IKm9MDkZ2Buo730p+Lr1jY6kvIhWq/qb3w+R7nGpfuYnYTOOROl7XypXyujI0mo36uXlZKSsd7E3p+P18uK+VcqLEK1CbS1EZ34PQJtrZWRka5wT9fISmCHlxdL8DujlxXmIlBd7E3jt7+tejq2ntqpWcRrrkvIiWqUdX+6jizmTKpyJvnuU6jjiTyKm6uWlx+lk8jNLFKdxXFWlVUSWpQDQ8SopL/am6zW9KcILb4rZ+0WK6jhWJeVFtEpH5+kjo4yAi3Dy9VScRvyZT2wYR5zDcKKW9Hmt9+Jzlpb5dSquVHHC0J5OwzurjiOayMnVREagXvTzP1+tOI11SXkRrVL7JH1kdHqUjIxs1aFI/YdyxQoZHVnK8eX6yOhA+wEYjLLuyx5VJFwMQJvNUl6EcGj5+0vpU6L/Qux8r5zfxVY5XayXl4B0KS+WYtqql5fS7jIyslchN+vlpXvBOqpKW8/5XqS8iFZn9zurcaWKQy4R+A2OVh1HnEXnW/Xy0rNiC8eyW+fF5ywtOFcvL21GSnmxV12u6Mlxgz9tKSPt0y2q41iNlBfR6pi/1UdGubHj5RBpG+bbL4J85w64UE3avCTVcRxOcW4RnavSAQi/Vs6sa6+MTkb2dRwJQOGXrWd0JOVFtCo11Rrds/TFut7Xy8jIphkM5ESMAKDsBxkdtbT9XyRjRCPXFE5AzwDVccQFqBmmj458t61SnMR6pLyIViV1wS46aIcox52ufxuhOo44D9NIfXTkv0fKS0s79bM+YsgNlpGRvet06+/rXoqSKC9oHSNWKS+iVTn+iT4ySgsZhamNm+I04nzCf1/30qt8E8dzTytO41jcd2wCoDpORkb2LmxUJEdMHXGlqtWcWkDKi2hV/LfoI6OaS2RkZA/aDYziuFMQblSy5+NNquM4DM2sEZ7/GwC+4wYpTiMulMFoYH+4frLN4qWtY92LlBfRauSlnaR3mf6vki73S3mxCwYDOZ31vS+l38voqKUc3nSIIPNRajAReV1f1XFES7hYHx35pUp5EcKhpL+9Eidq2e/eA79+YarjiEYyDNfLi99uKS8tJecLfa/LPo9YPPw8FKcRLSHiNv2Io+5lWyjKKVKcxvKkvIjW4wd9ZHQ0Tva62JOwyb+veylLouBI6zkJlyVV/aqXl2MRMjJyFB0GhZLlHIUJMxlzHf8q01JeRKtQU2WmR84PAPjeLJcEsCfth3bnhMkfDyrY9UnrOQmXJfnu09cPmYZIeXEkOZH66Kjie8cfHUl5Ea3Czo+34q8dp9jgRbfbBquOI5rCYCAn7CIASr+T0dGFqiqtIrokGYCOV8UrTiNaktMYfdFuUJqUFyEcwon/6COjtI6jMbk5K04jmuz3dS/tdkp5uVD7vkrFndOcNPgSlhilOo5oQdF3jACg6+lUCvYcUxvGwqS8iFYhaJteXszjZGRkjzrdopeX3qUbKMyvVpzGvhV8p693yfQbJFeSdjD+Mf5kuPUGYO/ctWrDWJiUF+Hwjqbk07NCXysR/cBYxWlEc7Qf3pNTpna0pYyd87epjmPXTFv19S5lPWW9iyM62k1f91Kz4mfFSSxLyotweHvfXgFAWpt+tO8RpDiNaBajkezQYQAUfyujowvR8bC+56XtKFnv4ojaTLwEgIh9K9HMmuI0liPlRTg855X6JQGO9ZeRkT3ThumjI59UKS/NdSKjgPDqTAAib5RrGjmimLuHU4kLHWsPkrVyn+o4FiPlRTi06vJqehz+EQD/W6W82LPQm39f91K8npMFtYrT2KcDn+sjo/0u3fDp7Ks4jbCENgFt2O07FIDcD39UnMZypLwIh7bng/V4U8xxQwDdbu6vOo64AH6jYik2euNNMamfpqiOY5fKVuvl5UiorHdxZEWDxgDgvm6l4iSWI+VFOLSiBfrIKL3zOIxO8tfdrplMZHfU/0V56hsZHTWH5259vYs2UNa7OLKQKaMBiDm2hqpSxzwrtfw0Fw6tY6peXgyXycjIEdQO0UdH3jukvDSVucZMZKG+5yXwCtnz4siiru7NMWMgbSljz9wNquNYhFXKy+zZswkPD8fNzY34+Hg2b9581m3nzp3LsGHD8PX1xdfXl8TExHNuL8TZHFm3n4jKdKpxIuah0arjiBbQ8abf170UreNUoVlxGvuStSIDb4opw4MuV/RUHUdYkNHJSEaY/jPv1GLHHB1ZvLwsWrSI6dOn8+yzz7Jt2zZiY2MZM2YMx441fPa/tWvXcsMNN7BmzRqSkpIIDQ1l9OjRHD582NJRhYM58I6+1yXVexjtOnsrTiNagv+YvpQa2tKOk6T+N1V1HLty5Gt9ZLTXewBObk6K0whLM4zWy0tQimMu2rV4eXn99de54447mDp1KjExMcyZMwcPDw/mzZvX4PafffYZ99xzD3FxcXTr1o0PP/wQs9nMqlWrLB1VOBiPNXp5KRoiIyOH4eREdochABQuldFRU2i/6eWlqKusd2kNou7Rz/fSrWI7BbvzFadpeRYtL1VVVSQnJ5OYmPi/FzQaSUxMJCkpqVHPUV5eTnV1Ne3atWvw65WVlRQXF9e7CVF+rJQeBWsB6Pg3KS+OpGawPjry2i7lpSkCs/Ty4jZC1ru0BoG9A0l3iwNg77uOd7Zdi5aXgoICamtrCQwMrHd/YGAgeXl5jXqOxx57jJCQkHoF6I9mzpyJt7d33S00NPSCcwv7t+edVbhSxUGnCKIu66o6jmhBHW78fd3LqV8pOinrXhqjNK+UyNO7AAifJHteWoujvfVDprUfHW90ZNNHG7388sssXLiQJUuW4Obm1uA2M2bMoKioqO6Wm5tr5ZTCFlV8pY+MsrqPl4vPORj/cf2pMLjjxwlSFuxRHccuZC7cigkzh0ydCOobojqOsBKva/XyEpW1Eq3WsYq+RcuLn58fJpOJ/Pz687b8/HyCgs59jZlZs2bx8ssvs3LlSnr37n3W7VxdXfHy8qp3E62bZtaIytDLi8d1lylOI1qciwtZwYMBKFwio6PGOPWjPjLKCZa9Lq1JjzsGU4YHAeZ89i/dqTpOi7JoeXFxcaFfv371FtueWXybkJBw1sf93//9Hy+88AIrVqygf385K6pomn2LUwgyH6GUNvS6b7jqOMICqhP0z9Vzm5SXxnBL0ctLdV8pL62Jm7cru/1HAnDoI8caHVl8bDR9+nTmzp3L/PnzSUtL4+6776asrIypU6cCMHnyZGbMmFG3/SuvvMLTTz/NvHnzCA8PJy8vj7y8PEpLSy0dVTiIox/qe112BSbi7uOqOI2whODr9fLS6+QvFBc57pVzW4Jm1uiSvxEAv8uHKE4jrK1yuH7ItNfGHxQnaVkWLy+TJk1i1qxZPPPMM8TFxZGSksKKFSvqFvHm5ORw9OjRuu3fe+89qqqquOaaawgODq67zZo1y9JRhYNo/5teXioT5SgjRxVw2UBOG9wI5BgpizJUx7Fp2T/tw187zmlcibquj+o4wso636f/HOxVtJ5T2afUhmlBBk3THOqfLcXFxXh7e1NUVCTrX1qhE+nH8e0eiBGNI5sPETKgg+pIwkLSg0fSLW8tX1/yHletvEt1HJu1btonDJs3lR1eQ4ktWqc6jlDggGt3IqrS2fDAQoa8NUl1nLNqyu9vmz7aSIimSntzBUY00t3ipLg4uNMJ+izfK3mN4iS2TdugX9vmZHcZGbVWOXETANCWfac4ScuR8iIcimm5/s15tJ8cZeTogm68GIDehWsoKXKsw0BbUocsvby4jxqsOIlQxfdm/edhzMHl1FbWKE7TMqS8CIdRXV5NTK6+ot5viqx3cXRBlw+k3OBBAMdJ/Xy36jg26eT+QrpUpQEQOVnKS2vV447BnDT40k4rZM+831THaRFSXoTD2PXBRrwposDgR8yUAarjCEtzcWF/8DAACr9crTiMbcr8j34ZlgPOXWnf1U9xGqGKk5sTuzuNA6Dw028Vp2kZUl6EwyhaoB9llN55HCYXk+I0whoqh+qjI59tUl4aUv6TPjI6HCZ7XVo7bby+7qXjdsdY9yLlRTiMjjv08mKaICOj1iLkpt/XvZxcS+kpx5jltyTv3fr5XbTBsli3tYt5eAw1mOhSuYcj6w+ojnPBpLwIh3Dwl2wiq/ZQg4mYh8eojiOsJGR8H4qMPnhTzK5Pt6mOY1Oqy6uJLtoMQMg1sueltWsf6Uuqlz5mPfC2/e99kfIiHEL228sA2O09BO8wH7VhhPWYTGR2GAFA0RIZHf1R5uLteFBBoaEdEePkyuoCTg7Rjzpqs9r+171IeREOwWvNNwAUDrtCcRJhbdXD9NGR73YpL390/Bt9ZLTPfzBGJ/lRLyD8fn3dS48Tv1CUW6w4zYWRv9HC7hVln6TXSf0CfZ0fkvLS2nScrJeXnkXrKSusVJzGdrhs0RfrlsfKyEjouoyLJts5Cheq2TnLvi/UKOVF2L3ds37AiVr2ufYgfFQX1XGElXW4JIbjxgA8qCDtk02q49gGTaPzEb28+IyXxbrif3L76v/AM3z9leIkF0bKi7B7hmX6yOhQX9nr0hoZjAYyO+l7X4qXyugI4EjSQQLNR6nGieib5JxH4n/877oagN6Hvqfi5GnFaZpPyouwa1UllfTI1S/17jdNyktrVXuRXl7a7ZDyApDzub7XJcOjL2383BWnEbak6y0DOWLqiCelpM5aqTpOs0l5EXZt5ztr8aKEPGMwPab0Vx1HKBI6RS8vMcW/UX68THEa9ap/0cvL8WgZGYn6DCYj+3pdBUD1IvsdHUl5EXat7HN9ZJQRfbkcUdGKdRoRwSFTJ1yoZu/HG1THUS5gn36kkevFUl7EX3lP1UdHPQ8so7qsSnGa5pGf9sJuaWaNqDT9/C7uky5XnEaoZDAaOBCm730pauXrXk4eLCby9E5ALsYoGtbrriEcNwTgo51i1ztrVMdpFikvwm5lLEgmuPYwpbSh14MXq44jVBul/x3wS23d5WXvxxswYeagcxcCYoNVxxE2yORiYk/XKwEonW+foyMpL8Ju5X+gj4x2hozF3ddNcRqhWtQdIwHoVpbMyaxTasMoVL5CP+dRbueLFCcRtqztFH101C1jKdUV9nddMCkvwm4Fb9HLS814OcpIQPCAjmS5RGPCzJ45v6iOo4zf7l8BMAwfrjiJsGWxD47ghKE9/tpxtr9uf6MjKS/CLh1al0X06Z36hRgflatIC92hbokAVH33k+IkapQdL6db6RYAwifLnhdxdk7uzuzpcR0AlfM+U5ym6aS8CLu0/w19oe5O72G0j2qnOI2wFe6XjwYgfK/9nr/iQmR8koQzNRwxdaTDkHDVcYSN87nnRgBiD3xN+YkKxWmaRsqLsEtnLsR4aoSMjMT/dLt7JDWY6Fyzj4Nrs1THsbri7/SRUVbocDAYFKcRtq7nnYM5ZArDixJ2/Os71XGaRMqLsDunDhTS65T+QzpCLsQo/qBtiBd7vAYBkDW39Y2OfHboa31qh8jISJyfwWRk34AbADAuXKA4TdNIeRF2Z+cry3Gilr2uvQgb0Vl1HGFjTvbXR0eua1vX6KiyuJJuRb8B0PEmWawrGifkkZsAiDu6nKKDJxWnaTwpL8LuOC/Tz0twNH6i2iDCJvndqJeX7kdWUVNZqziN9WT8dwtuVHLcEEDnMdGq4wg7EX1VTzJce+FKFTue/lJ1nEaT8iLsSsnRUmLzVgAQ8sA1itMIW9Tt5v6cMvjgwynS/7tVdRyrOfmNPjLKDLkIg1HWu4jGMRggb9TNAPgs+URtmCaQ8iLsys5XluPOabKdI4m8spfqOMIGmVydSAsZBUDBgtYzOmq7TV8HVjlIRkaiaWJm3kINJnqXbiTz2zTVcRpFyouwK4av9N2a2f2uln9dirOqHqGPjny3to7yUltZQ9cC/YKUQdfJYl3RNP69g0kO0s+Xdfj5jxSnaRwpL8JulJ+ooNeh5QAE3C0jI3F24XdcAkCP4iRKDhcrTmN5exduoy1lnDT4EnVlT9VxhD2adjsAPZI/tYsrTUt5EXYj9dUfaUsZh0xhdL+5n+o4woZ1Gt6ZLOconKgl7V37O/V5U+Uv1kdGewOHYXKWH+ui6fo+OY48YzB+2nG2P/+t6jjnJX/Lhd2oXaSPjDJjZWQkzu9gtD46Ov2t44+O2mzSr6RdGS/rXUTzOLs7sWfArQAYPrb90ZGUF2EXKosr6Zmt/2ug3R1XK04j7IHb75cK6JTu2OWlqrSK7gX6npfgm0cpTiPsWZd/3QZAv+MryF59QHGac5PyIuxC6us/400xR40h9Lx9kOo4wg70uHcE1TgRXp1J1irb/kF8IdI+3UJbyigw+NFlohyBJ5ovbFQkW/3GYEQj+5F/q45zTlJehF2oXKCfmC6jx1UYneSvrTg/zw5e7PEZDED2ez8oTmM5J79cBcC+DiPle0NcuPsfAKDP9o8oPVqiOMzZyd90YfNqKqrpkbkUAO+pMjISjVc0RD/8s80v3ytOYjk+2/TyUn2RjIzEhev7xFgOOEfjTTHbH5qvOs5ZSXkRNi/ljTX4aic5bvCn1z3DVMcRdqTDnXp56VWwhrLj5YrTtLzygnJiipIA6HTrxYrTCEdgdDJy8PL7AQhd8jbmGrPiRA2T8iJsXsXHCwFI6341Tq4mxWmEPYm4LIZDpjDcOc2ud1arjtPi0j9cjwvVHDaFEjYqUnUc4SD6vTWFIrwIr97Hpqe/Ux2nQVYpL7NnzyY8PBw3Nzfi4+PZvHnzObdfvHgx3bp1w83NjV69erF8+XJrxBQ2qLK4kl6ZXwPgfdcNitMIe2MwGtjfXd/7UvmV442OSr7RR0YHwkfJ6QNEi/Hq4ElKwt36/3/7BTSzpjjRX1m8vCxatIjp06fz7LPPsm3bNmJjYxkzZgzHjh1rcPuNGzdyww03MG3aNLZv387EiROZOHEiu3btsnRUYYNSXl6BD0UcNXag191DVccRdsj9ar28dMn43iZ/CF8I/52/7026WEZGomX1+HA65bjTo3wr22b+qDrOXxg0TbPod3N8fDwDBgzg3//WD7sym82EhoZy//338/jjj/9l+0mTJlFWVsZ33/1vV9WgQYOIi4tjzpw553294uJivL29KSoqwsvLq+XeiFBiY9j1DM5ZxNp+0xmx9TXVcYQdKi8ox+DfHndOs39JqsMcTlx08CSe4e0xonF482E6DAhRHUk4mLV9pzNi+xvs9BpMz5PrLb53rym/vy2656Wqqork5GQSExP/94JGI4mJiSQlJTX4mKSkpHrbA4wZM+as21dWVlJcXFzvJhxD2bEyYnP0E9MF3H+94jTCXnn4ebDTX98zcfgDxxkdpb+3FiMa+527SXERFtH1w0c5jSu9ijey9eWfVcepx6LlpaCggNraWgIDA+vdHxgYSF5eXoOPycvLa9L2M2fOxNvbu+4WGhraMuGFcjteWEYbyjno1IXut/RXHUfYsbLh+ujIZ6PjlJeK7/T1Loe7yshIWEZw32A297kLAK8XHqG2qlZxov+x+6ONZsyYQVFRUd0tNzdXdSTRQpy+/ByArPjrZTGiuCCd79PLS4+ijRRnFypOc+E0DcIy9MseuE9IPM/WQjRfry+e5pTBh66nU/ntrk9Ux6lj0fLi5+eHyWQiPz+/3v35+fkEBQU1+JigoKAmbe/q6oqXl1e9m7B/RdknictbAUCHR+QoI3FhwoeHsdelBybM7HnD9hYfNtXB1fvpXLOPapzofp+cnE5Yjm9ke7Zf9gwAkfOfoviwbZx116LlxcXFhX79+rFq1aq6+8xmM6tWrSIhIaHBxyQkJNTbHuCnn3466/bCMe16/mtcqGava0+iJvZQHUc4gMOx+t6X2mX2PzrKfl8vYLu9h9A2RP7BJiwr4b/3ctC5C4HmPJIvfUp1HMAKY6Pp06czd+5c5s+fT1paGnfffTdlZWVMnToVgMmTJzNjxoy67R988EFWrFjBa6+9Rnp6Os899xxbt27lvvvus3RUYUPcl+ojo8PDZK+LaBl+U/Ty0v3gD1Sftp3ZfXO4/6rvlTw1aKziJKI1cPNyofCFdwEYnvoOu+asV5zICuVl0qRJzJo1i2eeeYa4uDhSUlJYsWJF3aLcnJwcjh49Wrf94MGDWbBgAR988AGxsbF8+eWXLF26lJ49e1o6qrARRzYfIu6kfv6KqKflKCPRMmJuH8xJgy/ttEJ2zFb/w7e5qkoq6ZGvf38ET5XyIqyjz2Oj+TXyNoxotHngNkrzy5Tmsfh5XqxNzvNi/1aP+z8uXvEYqd5D6X1qneo4woFsjJrC4MxPWRP7ICNT3lQdp1m2v7aaPo+MIt8YhH/VEYwmWcwurKPwwCmqonoQZD7CuqjbGLb3oxZ9fps5z4sQTaWZNTqu+Q8AJVfcojiNcDTO110JQNSuJXZ7tt3iL/SR0d7wMVJchFW1i/Ah77XPqMWIU3kx1eXVyrJIeRE2ZfeCHURX7qISF3o9f63qOMLB9Jw+mjI86FibQ/rn21XHaZbgVL28GMbJyEhYX9xDI8iYv4lBOV/g7OGsLIeUF2FTjs2aD8CO0Al4hfkqTiMcjXt7D3aG6L/08+csUZym6fK3HSb69E7MGOh63yWq44hWKmZyf+Xn3pLyImxGVWkVsan6yMjpjqmK0whHVTNBHx113GJ/5SX9Lf0Q6T1tBuLfrb3iNEKoI+VF2Iztz39Le+0EecZgYv8xRnUc4aB6/GM81TgRWbmbnFX7VMdpEpeV+rW+CuPHKU4ihFpSXoTNcJqvr1zfM/BWTK5OitMIR+Ub4csO35EAZL1uP3tfKgor6J2nXxIg6M7LFacRQi0pL8Im5CcfIu6Yvks89GkZGQnLKh+jj47a/WI/5WXnW6tpQzmHTaFEXRunOo4QSkl5ETYh7YlPMWFmu9dFRF0apTqOcHAxT0zEjIFeZb9xcH2O6jiNUvnFNwBkxlyufLGkEKpJeRHKmWs1Oq+eB0DptbcpTiNaA79ewaT6DAdg/0tfKE5zfuYaM9F79fUuba6XkZEQUl6EctvfXkdYzX6K8aTvS9eojiNaiZLx+qUngtYuVJzk/DI+20qgOY9iPOl133DVcYRQTsqLUK7srQ8BSO1+PW0C2ihOI1qLmKevpgYTMRXJZP9k20cdHftwGQC7OozF1ctVcRoh1JPyIpQqSC9g4EF9t33AE7crTiNak/Zd/UhpnwhA9iuLFKc5t+AtenmpHS8jIyFAyotQbPcjH+NGJXs8+hN980DVcUQrU3GFPjrqsN52R0f7f9hLdOVOqnGi9+OXqo4jhE2Q8iKU0WrNRKx8D4Dj19ytOI1ojXo9PZFKXIiq3E3m0l2q4zQo57XFAKT6jcK7czvFaYSwDVJehDLbX/6R0OosTuJD3/+7XnUc0Qr5hPuwPUg/W23OKwsUp2lYyAZ9rFox4TrFSYSwHVJehDI177wLwPbYqXgGeihOI1orw403AtBt83+orapVnKa+rBUZdD2dSjVO9Hhyouo4QtgMKS9CiZxfs+mf/z0A4S/fpTiNaM3inrmcUwYfQsyH2P76GtVx6jkzMkrxS8S3i4yMhDhDyotQYu+jH2BEY1v7RCLGRquOI1oxV283UnvcAMDp9z9RG+ZPQtb/PjK6TEZGQvyRlBdhdWUFFcRtmQuA9jdZqCvU83tEv55W3+yvKc4tUpxGd+CHDKJO76QKZxkZCfEnUl6E1SU/9B/8tAJynDrT57krVMcRgu639CfTJQYPKtjx1GLVcQDI/tdnAOzwv4T2kb6K0whhW6S8CKvSas10XPw6AAcuexCjs0lxIiHAYDSQO+pWALyXfKw2DPq1jKJ++xSAmhtuUZxGCNsj5UVYVcrMH4ioyqAIL+LeloswCtsR89LN1GCid8lG9n6t9pwvqbPXEVp7UP8+eVb2TgrxZ1JehHW9ru91Se57Jz6hnorDCPE/gXHBbO2gF4Wjz7ynNEvp7PkA7Ii+Dvd27kqzCGGLpLwIq0n7PIU+J1dTg4nod+5XHUeIv3B9+F4A+u7+lJIjJUoyVBSUEbtPX3fjdd9kJRmEsHVSXoTVHH/iDQC2hF1Lx8GdFKcR4q/iHh7JAZeueFLK9un/UZIh+akleFLKQacIet8zVEkGIWydlBdhFTnrc0jI1k+/3v7FhxWnEaJhBqOBnPH3ABCy9F00s2b1DJ4L3gcg+6IpGE0Gq7++EPZAyouwiux7/g9natje7mK5erSwaXFvTKEMDyIrd5Py1i9Wfe29X+0ktmQ9NZjoNut2q762EPZEyouwuBO78xi480MAtBlPKk4jxLn5hHmzrYe+1qR65iyrvnbec/pC4a0dJhLYJ8Sqry2EPZHyIixu97TXcaOSHW0S6DN9pOo4QpxXp7f+Ti1GBh7/nr1f7bTKa5blldBnl77OxuWhe6zymkLYKykvwqJO7D1B30361aMrpj+JwSgzfGH7wkZFsin0GgCOP/p/VnnNrQ98iielHHDuStzDUvKFOBcpL8KiUm57m7aUke4eR/xzl6qOI0Sj+bz0GADxWZ9zaH22RV+r5nQNnZe8BsDhiffKQl0hzkPKi7CYwgOn6LvhbQBK7ntC9roIuxJzc1+2+l6CE7Xsv+Nli77Wlse+pFNNFgUGP/q9O82iryWEI5DyIixmxy2z8OUUmW496P/SVarjCNFkLi88DcDg9I/I/jnTIq+hmTXazX0FgJ3D78fDz8MiryOEI5HyIiyiMC2fARvfBKDgwRcxOMkFGIX96X3vMLb4X4ozNRy+/RmLvMaWF3+ka0UKZXjQa869FnkNIRyNlBdhEbtunklbytjlPoCB/5ILywn75fnOSwAMOfg5GQu3t+hzm2vMeL6snz5ga7+78OvavkWfXwhHJeVFtLhDG3OI36afr6L8yX/J4kNh17pNimV92I0AnP7bAy161t1Njy6me8U2ivGk538fb7HnFcLRWay8FBYWctNNN+Hl5YWPjw/Tpk2jtLT0nNvff//9dO3aFXd3dzp16sQDDzxAUVGRpSIKC8mc8gKuVLHdZwQDZiSqjiPEBYtY9DKltCG2eD0b7vq0RZ6zsriS4NlPAZA84hHad/NvkecVojWwWHm56aab2L17Nz/99BPfffcdv/76K3feeedZtz9y5AhHjhxh1qxZ7Nq1i08++YQVK1YwbZqsvLcnGV/tYljmPADcZv1LjjASDiEkPpQt454FoNuHj1C478QFP2fS1bMIr87kmDGQfp9Nv+DnE6I1MWia1uJXHktLSyMmJoYtW7bQv39/AFasWMGll17KoUOHCAlp3GmvFy9ezM0330xZWRlOTk6NekxxcTHe3t4UFRXh5eXV7PcgmkHTSA4YS7+ClWzqcBXxh75SnUiIFlNVVs3B9n2IqtxNUodrGJTzRbPLee7a/fiN7Ik7p9lwz2cMmX1jC6cVwv405fe3Rfa8JCUl4ePjU1dcABITEzEajWzatKnRz3PmDZyruFRWVlJcXFzvJtTY/tIP9CtYSSUuBM63zllJhbAWlzbO1Mz9hGqcSDj8JRtu/7hZz1NbVcuJK27DndMk+4xi8Ds3tHBSIRyfRcpLXl4eAQEB9e5zcnKiXbt25OXlNeo5CgoKeOGFF845agKYOXMm3t7edbfQ0NBm5xbNV11ejffz+q7v9X0fJHxUF8WJhGh53W/pz4YxLwDQ9+P72DN/S5OfY/3YF4kr/pUS2uL/1RwZrQrRDE0qL48//jgGg+Gct/T09AsOVVxczPjx44mJieG5554757YzZsygqKio7pabm3vBry+abt1Nc4ioyuC4wZ9+X8uVo4XjGrbsUbb6j8WDCtrfdnmTLh3w2+NLGbrmeQBS7nyPThdHWiilEI6tcQtJfvf3v/+dW2+99ZzbREREEBQUxLFjx+rdX1NTQ2FhIUFBQed8fElJCWPHjsXT05MlS5bg7Ox8zu1dXV1xdXVtVH5hGcd2HaPPUn0x496bnmdImLfiREJYjsnFRPS2ReyNHEp05U4OjRhO9g8/E35J1Dkfl/zqamJfuQETZtbF/I2hc262UmIhHE+Tyou/vz/+/uc/nC8hIYFTp06RnJxMv379AFi9ejVms5n4+PizPq64uJgxY8bg6urKsmXLcHNza0o8oci+iY8whJNkuMeR8NHtquMIYXFeHb0oW7ecrCGj6Fy9l8Ixg9g8Y16DJ2TUzBrrp80j/pO7caGazQGXkZD8bwwyLRKi2SxytBHAuHHjyM/PZ86cOVRXVzN16lT69+/PggULADh8+DCjRo3i008/ZeDAgRQXFzN69GjKy8tZsmQJbdq0qXsuf39/TKbGnV5ejjayrtQ3V9P74VGYMbD7w9/oNW2g6khCWM3x3cc4NvAyepTra1+2+o3FfOddhF2rfx9kL9qE0/v/pt/JVQAkdbiGvrv/g6u3/MNMiD9ryu9vi5WXwsJC7rvvPr799luMRiNXX301b7/9Nm3btgUgOzubzp07s2bNGkaMGMHatWsZOXJkg8+VlZVFeHh4o15Xyov1nC6q5Kh/bzpX72V1zL1cvPvfqiMJYXVVpVVsHPkkQ7e+gRO1DW5TiQtJY//JsG8exeQi1/kSoiE2UV5UkfJiPauGP8+oX58l3xiE64F0fGSti2jFDq7eT/bf3yF813d0rMkCINc5guxeE4h66346DO2sOKEQtk3Ki5QXi9u7eAfh1w3AhWo2TV9I/GuTVEcSwmbUnK7BYDTIXhYhmqApv7+btGBXCIDqsiqYMkUvLsETiZ91nepIQtgUJzf50SqEJclVpUWTbRz3AtEVOzhhaE/YD3OQwyaEEEJYk5QX0SSpH21hyLqZAKQ/+B5BsYGKEwkhhGhtpLyIRis5XIzXXTfgRC2/hU1iyBvXqo4khBCiFZLyIhpFM2vsHvo3wmv2c8jUiZi176qOJIQQopWS8iIa5ddbP2JQ9kJqMFHwzkK8wtupjiSEEKKVkvIizmv3f7cz4D8PALB+7L+IuztBcSIhhBCtmZQXcU4n0o7hfetEPKggOXAcw797VHUkIYQQrZyUF3FWVaVVHEq4ho61OWQ5RxG5aQEGk/yVEUIIoZb8JhIN0swaG/veR2zROorwovrLZXiH+aiOJYQQQkh5EQ1bm/giI/bNxYyB/f/8jOjLu6mOJIQQQgBSXkQD1t/6ISPXPAPAxhv+Td9nLlOcSAghhPgfKS+invUPfUnC/L8BsCrhSYYuuEdxIiGEEKI+KS+izuZHFzPoresxYWZDt9u4eP0LqiMJIYQQfyHlRQCw4eHF9J2ln/p/fcRkElI/wGCUCy4KIYSwPVJeBL/e9D6D3rweJ2r5pfMUBu2Zh9HZpDqWEEII0SApL02w5Z/LqSqtUh2jxWhmjV8veoqLFtz1+6hoGkPTP8LJVYqLEEII2yXlpZF2f7yZAc+N50i7Hmx6chmaWVMd6YKUHStjQ/hNXLTuXwCsGvosg3fPxeQixUUIIYRtk/LSSOU5BeQbgwivziT+pSvY5j+afUt2qY7VLAd/3seRsEEMzf2cGkysmzyXUeuekzUuQggh7IKUl0Ya8OyleOTuZe2gx6nEhX6FPxNxVSy/xtzF0S2HVMdrFM2sse5v/8Xnkv5End5FvjGIPe+sZtj821VHE0IIIRpNyksTeIZ4MiJpJvlr0vgt5CpMmLko7X3aDezCL3EPcGzHUdURzypvRz5bQq9k2Ae34E0xqV5Dqdm0jd73XaQ6mhBCCNEkUl6aodOICAYd/oodb/9CivdFuFLF8B3v4BkXwS+xD3Bw9X7VEevUnK5h7XXv4hbXjYFHvqEKZ9Ze8i96HFtDh/7BquMJIYQQTSbl5QLE3n8RsYVr2fbqKlI9B+POaYanvkPoqCh+C7mSHW//omxhr7nGzMaHvyDXqwcjFt+LD6dI9+jD/oVbGbHyCUyuTkpyCSGEEBfKoGmafR828yfFxcV4e3tTVFSEl5eX1V5XM2tsn7WK2lmvM+D4D3X3H3COJmf4ZKKev4UOCZ0snqP8RAXJ0z8jaNFbRFXqC4pPGNqz+9p/MuQ/d8nRREIIIWxSU35/S3mxgP3fpXHo0bfon/4f2lAOgBkDu7wGUzhkAqH3XE7Epd1a7Ogec42Zne+t59TcL+i5ayHttRMAFOPJtpGP0PfTh/DqqObPQgghhGgMKS+Ky8sZpUdLSHn6K9p8NZ8+p9bW+9phUygHOwyheuAQ/C4dSMdRXfHu5N2o5z196jQ5P2WQt2wzpnVriMpdTYA5v+7ruU7h7B97H3HvTMMn3KcF35EQQghhGVJebKS8/NHh33LJfONbPFZ/S++C1bjy1zP1HjMGku/RmQqP9lS1bU+tmwcGsxnQcC49hXvJMbzLj9Cpej8mzPUeW4Q3O7tMxHXyJPo+PlrGQ0IIIeyKlBcbLC9/VJpXyt7/bqZo+QY8UzfQ8WQqQeamHWZ9Ch+yvWM5FTscnytH0v22BFy9XC2UWAghhLAsKS82Xl4aUpxbxKHVeylJO0TV0RPUHDuBobISDAYwGDD4eOPUIQCP8EBCRnYloHeQnBFXCCGEw2jK7285XtZGeIV6EzNlADBAdRQhhBDCpsl5XoQQQghhV6S8CCGEEMKuSHkRQgghhF2R8iKEEEIIuyLlRQghhBB2RcqLEEIIIeyKxcpLYWEhN910E15eXvj4+DBt2jRKS0sb9VhN0xg3bhwGg4GlS5daKqIQQggh7JDFystNN93E7t27+emnn/juu+/49ddfufPOOxv12DfffBODQU7AJoQQQoi/sshJ6tLS0lixYgVbtmyhf//+ALzzzjtceumlzJo1i5CQkLM+NiUlhddee42tW7cSHBxsiXhCCCGEsGMW2fOSlJSEj49PXXEBSExMxGg0smnTprM+rry8nBtvvJHZs2cTFBTUqNeqrKykuLi43k0IIYQQjssi5SUvL4+AgIB69zk5OdGuXTvy8vLO+riHH36YwYMHc8UVVzT6tWbOnIm3t3fdLTQ0tNm5hRBCCGH7mlReHn/8cQwGwzlv6enpzQqybNkyVq9ezZtvvtmkx82YMYOioqK6W25ubrNeXwghhBD2oUlrXv7+979z6623nnObiIgIgoKCOHbsWL37a2pqKCwsPOs4aPXq1ezfvx8fH59691999dUMGzaMtWvXNvg4V1dXXF1dG/sWhBBCCGHnmlRe/P398ff3P+92CQkJnDp1iuTkZPr16wfo5cRsNhMfH9/gYx5//HFuv/32evf16tWLN954gwkTJjQ6o6ZpALL2RQghhLAjZ35vn/k9fk6ahYwdO1br06ePtmnTJm39+vVaVFSUdsMNN9R9/dChQ1rXrl21TZs2nfU5AG3JkiVNet3c3FwNkJvc5CY3uclNbnZ4y83NPe/veoscKg3w2Wefcd999zFq1CiMRiNXX301b7/9dt3Xq6urycjIoLy8vEVfNyQkhNzcXDw9PVv8XDHFxcWEhoaSm5uLl5dXiz63LZD3Z/8c/T06+vsDx3+P8v7sn6Xeo6ZplJSUnPN0KmdYrLy0a9eOBQsWnPXr4eHh5901dL6vN8RoNNKxY8cmP64pvLy8HPYvJcj7cwSO/h4d/f2B479HeX/2zxLv0dvbu1HbybWNhBBCCGFXpLwIIYQQwq5IeWkCV1dXnn32WYc9NFven/1z9Pfo6O8PHP89yvuzf7bwHg1acxaWCCGEEEIoIntehBBCCGFXpLwIIYQQwq5IeRFCCCGEXZHyIoQQQgi7IuXlD/71r38xePBgPDw8/nKByLPRNI1nnnmG4OBg3N3dSUxMZN++ffW2KSws5KabbsLLywsfHx+mTZtGaWmpBd7BuTU1R3Z29lmvHr548eK67Rr6+sKFC63xlv6iOX/WI0aM+Ev+u+66q942OTk5jB8/Hg8PDwICAnj00Uepqamx5FtpUFPfX2FhIffffz9du3bF3d2dTp068cADD1BUVFRvO5Wf4ezZswkPD8fNzY34+Hg2b958zu0XL15Mt27dcHNzo1evXixfvrze1xvzPWlNTXl/c+fOZdiwYfj6+uLr60tiYuJftr/11lv/8lmNHTvW0m/jnJryHj/55JO/5Hdzc6u3jT1/hg39PDEYDIwfP75uG1v6DH/99VcmTJhASEgIBoOBpUuXnvcxa9eupW/fvri6uhIZGcknn3zyl22a+n3dZE26cJCDe+aZZ7TXX39dmz59uubt7d2ox7z88suat7e3tnTpUm3Hjh3a5ZdfrnXu3FmrqKio22bs2LFabGys9ttvv2nr1q3TIiMj613nyVqamqOmpkY7evRovds///lPrW3btlpJSUnddoD28ccf19vuj+/fmprzZz18+HDtjjvuqJe/qKio7us1NTVaz549tcTERG379u3a8uXLNT8/P23GjBmWfjt/0dT3t3PnTu2qq67Sli1bpmVmZmqrVq3SoqKitKuvvrredqo+w4ULF2ouLi7avHnztN27d2t33HGH5uPjo+Xn5ze4/YYNGzSTyaT93//9n7Znzx7tqaee0pydnbWdO3fWbdOY70lraer7u/HGG7XZs2dr27dv19LS0rRbb71V8/b21g4dOlS3zZQpU7SxY8fW+6wKCwut9Zb+oqnv8eOPP9a8vLzq5c/Ly6u3jT1/hidOnKj33nbt2qWZTCbt448/rtvGlj7D5cuXa08++aT29ddfa3D+6wkeOHBA8/Dw0KZPn67t2bNHe+eddzSTyaStWLGibpum/pk1h5SXBnz88ceNKi9ms1kLCgrSXn311br7Tp06pbm6umqff/65pmmatmfPHg3QtmzZUrfNDz/8oBkMBu3w4cMtnv1sWipHXFycdtttt9W7rzF/4a2hue9x+PDh2oMPPnjWry9fvlwzGo31fsC+9957mpeXl1ZZWdki2RujpT7DL774QnNxcdGqq6vr7lP1GQ4cOFC799576/67trZWCwkJ0WbOnNng9tddd502fvz4evfFx8drf/vb3zRNa9z3pDU19f39WU1Njebp6anNnz+/7r4pU6ZoV1xxRUtHbbamvsfz/Xx1tM/wjTfe0Dw9PbXS0tK6+2ztMzyjMT8H/vGPf2g9evSod9+kSZO0MWPG1P33hf6ZNYaMjS5AVlYWeXl5JCYm1t3n7e1NfHw8SUlJACQlJeHj40P//v3rtklMTMRoNLJp0yarZW2JHMnJyaSkpDBt2rS/fO3ee+/Fz8+PgQMHMm/evGZdl+pCXch7/Oyzz/Dz86Nnz57MmDGj3gVDk5KS6NWrF4GBgXX3jRkzhuLiYnbv3t3yb+QsWurvUlFREV5eXjg51b+0mbU/w6qqKpKTk+t9/xiNRhITE+u+f/4sKSmp3vagfxZntm/M96S1NOf9/Vl5eTnV1dW0a9eu3v1r164lICCArl27cvfdd3PixIkWzd5YzX2PpaWlhIWFERoayhVXXFHv+8jRPsOPPvqI66+/njZt2tS731Y+w6Y63/dgS/yZNYbFLszYGuTl5QHU+6V25r/PfC0vL4+AgIB6X3dycqJdu3Z121hDS+T46KOP6N69O4MHD653//PPP8/FF1+Mh4cHK1eu5J577qG0tJQHHnigxfI3RnPf44033khYWBghISGkpqby2GOPkZGRwddff133vA19xme+Zi0t8RkWFBTwwgsvcOedd9a7X8VnWFBQQG1tbYN/tunp6Q0+5myfxR+/387cd7ZtrKU57+/PHnvsMUJCQur9Ihg7dixXXXUVnTt3Zv/+/TzxxBOMGzeOpKQkTCZTi76H82nOe+zatSvz5s2jd+/eFBUVMWvWLAYPHszu3bvp2LGjQ32GmzdvZteuXXz00Uf17relz7CpzvY9WFxcTEVFBSdPnrzgv/eN4fDl5fHHH+eVV1455zZpaWl069bNSolaVmPf34WqqKhgwYIFPP3003/52h/v69OnD2VlZbz66qst9ovP0u/xj7/Ie/XqRXBwMKNGjWL//v106dKl2c/bWNb6DIuLixk/fjwxMTE899xz9b5m6c9QNN3LL7/MwoULWbt2bb0Frddff33d/+/Vqxe9e/emS5curF27llGjRqmI2iQJCQkkJCTU/ffgwYPp3r0777//Pi+88ILCZC3vo48+olevXgwcOLDe/fb+GdoChy8vf//737n11lvPuU1ERESznjsoKAiA/Px8goOD6+7Pz88nLi6ubptjx47Ve1xNTQ2FhYV1j78QjX1/F5rjyy+/pLy8nMmTJ5932/j4eF544QUqKytb5NoX1nqPZ8THxwOQmZlJly5dCAoK+stK+fz8fAC7+QxLSkoYO3Ysnp6eLFmyBGdn53Nu39KfYUP8/PwwmUx1f5Zn5Ofnn/X9BAUFnXP7xnxPWktz3t8Zs2bN4uWXX+bnn3+md+/e59w2IiICPz8/MjMzrf6L70Le4xnOzs706dOHzMxMwHE+w7KyMhYuXMjzzz9/3tdR+Rk21dm+B728vHB3d8dkMl3w34lGabHVMw6kqQt2Z82aVXdfUVFRgwt2t27dWrfNjz/+qGzBbnNzDB8+/C9HqJzNiy++qPn6+jY7a3O11J/1+vXrNUDbsWOHpmn/W7D7x5Xy77//vubl5aWdPn265d7AeTT3/RUVFWmDBg3Shg8frpWVlTXqtaz1GQ4cOFC777776v67trZW69ChwzkX7F522WX17ktISPjLgt1zfU9aU1Pfn6Zp2iuvvKJ5eXlpSUlJjXqN3NxczWAwaN98880F522O5rzHP6qpqdG6du2qPfzww5qmOcZnqGn67xFXV1etoKDgvK+h+jM8g0Yu2O3Zs2e9+2644Ya/LNi9kL8TjcraYs/kAA4ePKht37697nDg7du3a9u3b693WHDXrl21r7/+uu6/X375Zc3Hx0f75ptvtNTUVO2KK65o8FDpPn36aJs2bdLWr1+vRUVFKTtU+lw5Dh06pHXt2lXbtGlTvcft27dPMxgM2g8//PCX51y2bJk2d+5cbefOndq+ffu0d999V/Pw8NCeeeYZi7+fhjT1PWZmZmrPP/+8tnXrVi0rK0v75ptvtIiICO2iiy6qe8yZQ6VHjx6tpaSkaCtWrND8/f2VHSrdlPdXVFSkxcfHa7169dIyMzPrHZpZU1OjaZraz3DhwoWaq6ur9sknn2h79uzR7rzzTs3Hx6fuyK5bbrlFe/zxx+u237Bhg+bk5KTNmjVLS0tL05599tkGD5U+3/ektTT1/b388suai4uL9uWXX9b7rM78DCopKdEeeeQRLSkpScvKytJ+/vlnrW/fvlpUVJRVi/SFvMd//vOf2o8//qjt379fS05O1q6//nrNzc1N2717d9029vwZnjF06FBt0qRJf7nf1j7DkpKSut91gPb6669r27dv1w4ePKhpmqY9/vjj2i233FK3/ZlDpR999FEtLS1Nmz17doOHSp/rz6wlSHn5gylTpmjAX25r1qyp24bfz4dxhtls1p5++mktMDBQc3V11UaNGqVlZGTUe94TJ05oN9xwg9a2bVvNy8tLmzp1ar1CZC3ny5GVlfWX96tpmjZjxgwtNDRUq62t/ctz/vDDD1pcXJzWtm1brU2bNlpsbKw2Z86cBre1hqa+x5ycHO2iiy7S2rVrp7m6umqRkZHao48+Wu88L5qmadnZ2dq4ceM0d3d3zc/PT/v73/9e71Bja2nq+1uzZk2Df6cBLSsrS9M09Z/hO++8o3Xq1ElzcXHRBg4cqP322291Xxs+fLg2ZcqUett/8cUXWnR0tObi4qL16NFD+/777+t9vTHfk9bUlPcXFhbW4Gf17LPPapqmaeXl5dro0aM1f39/zdnZWQsLC9PuuOOOFv2l0BxNeY8PPfRQ3baBgYHapZdeqm3btq3e89nzZ6hpmpaenq4B2sqVK//yXLb2GZ7tZ8SZ9zRlyhRt+PDhf3lMXFyc5uLiokVERNT7nXjGuf7MWoJB0xQc0yqEEEII0UxynhchhBBC2BUpL0IIIYSwK1JehBBCCGFXpLwIIYQQwq5IeRFCCCGEXZHyIoQQQgi7IuVFCCGEEHZFyosQQggh7IqUFyGEEELYFSkvQgghhLArUl6EEEIIYVekvAghhBDCrvw/kENWvzcBaGkAAAAASUVORK5CYII=" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from matplotlib import pyplot as plt\n", + "plt.plot(t, normal_res, color='blue')\n", + "plt.plot(t, cache_res, color='red')" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-03T03:53:58.803332700Z", + "start_time": "2024-06-03T03:53:57.867105200Z" + } + }, + "id": "97ace2f9183fef16" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "4edc26eea7760479" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tensorneat/tmp.ipynb b/tensorneat/tmp.ipynb new file mode 100644 index 0000000..60f38da --- /dev/null +++ b/tensorneat/tmp.ipynb @@ -0,0 +1,221 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 22, + "id": "initial_id", + "metadata": { + "collapsed": true, + "ExecuteTime": { + "end_time": "2024-06-06T11:55:39.434327400Z", + "start_time": "2024-06-06T11:55:39.361327400Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": "Array([[[2, 4],\n [1, 3]],\n\n [[4, 3],\n [2, 1]],\n\n [[3, 1],\n [4, 2]],\n\n [[1, 2],\n [3, 4]],\n\n [[2, 4],\n [1, 3]],\n\n [[4, 3],\n [2, 1]],\n\n [[3, 1],\n [4, 2]],\n\n [[1, 2],\n [3, 4]]], dtype=int32)" + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import jax, jax.numpy as jnp\n", + "a = jnp.array([\n", + " [1, 2],\n", + " [3, 4]\n", + "])\n", + "def rot_boards(board):\n", + " def rot(a, _):\n", + " a = jnp.rot90(a)\n", + " return a, a # carry, y\n", + " \n", + " _, boards = jax.lax.scan(rot, board, jnp.arange(4, dtype=jnp.int32))\n", + " return boards\n", + "a1 = rot_boards(a)\n", + "a2 = rot_boards(a)\n", + "\n", + "a = jnp.concatenate([a1, a2], axis=0)\n", + "a" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "outputs": [ + { + "data": { + "text/plain": "Array([[2, 4, 1, 3],\n [4, 3, 2, 1],\n [3, 1, 4, 2],\n [1, 2, 3, 4],\n [2, 4, 1, 3],\n [4, 3, 2, 1],\n [3, 1, 4, 2],\n [1, 2, 3, 4]], dtype=int32)" + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = a.reshape(8, -1)\n", + "a" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-06T11:55:31.121054800Z", + "start_time": "2024-06-06T11:55:31.075517200Z" + } + }, + "id": "639cdecea840351d" + }, + { + "cell_type": "code", + "execution_count": 13, + "outputs": [], + "source": [ + "action = [\"up\", \"right\", \"down\", \"left\"]\n", + "lr_flip_action = [\"up\", \"left\", \"down\", \"right\"]\n", + "def action_rot90(li):\n", + " first = li[0]\n", + " return li[1:] + [first]\n", + "\n", + "a = a\n", + "rl_flip_a = jnp.fliplr(a)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-06T11:22:36.417287600Z", + "start_time": "2024-06-06T11:22:36.414285500Z" + } + }, + "id": "92b75cd0e870a28c" + }, + { + "cell_type": "code", + "execution_count": 14, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1 2]\n", + " [3 4]] ['up', 'right', 'down', 'left']\n", + "[[2 1]\n", + " [4 3]] ['up', 'left', 'down', 'right']\n", + "[[2 4]\n", + " [1 3]] ['right', 'down', 'left', 'up']\n", + "[[1 3]\n", + " [2 4]] ['left', 'down', 'right', 'up']\n", + "[[4 3]\n", + " [2 1]] ['down', 'left', 'up', 'right']\n", + "[[3 4]\n", + " [1 2]] ['down', 'right', 'up', 'left']\n", + "[[3 1]\n", + " [4 2]] ['left', 'up', 'right', 'down']\n", + "[[4 2]\n", + " [3 1]] ['right', 'up', 'left', 'down']\n" + ] + } + ], + "source": [ + "for i in range(4):\n", + " print(a, action)\n", + " print(rl_flip_a, lr_flip_action)\n", + " a = jnp.rot90(a)\n", + " rl_flip_a = jnp.rot90(rl_flip_a)\n", + " action = action_rot90(action)\n", + " lr_flip_action = action_rot90(lr_flip_action)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-06T11:22:36.919614600Z", + "start_time": "2024-06-06T11:22:36.860704600Z" + } + }, + "id": "55e802e0dbcc9c7f" + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [ + { + "data": { + "text/plain": "Array([[4, 3],\n [2, 1]], dtype=int32)" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "jnp.rot90(a, k=2)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-06T11:12:48.186719Z", + "start_time": "2024-06-06T11:12:48.151161900Z" + } + }, + "id": "16f8de3cadaa257a" + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [ + { + "data": { + "text/plain": "Array([[2, 1],\n [4, 3]], dtype=int32)" + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# flip left-right\n", + "jnp.fliplr(a)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-06-06T11:14:28.668195300Z", + "start_time": "2024-06-06T11:14:28.631570500Z" + } + }, + "id": "1fffa4e597ab5732" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "ca53c916dcff12ae" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}