{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2024-06-05T05:07:22.736605400Z", "start_time": "2024-06-05T05:06:39.100164300Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "initializing\n", "initializing finished\n", "start compile\n", "compile finished, cost time: 18.307454s\n", "Generation: 1.0, Cost time: 4551.03ms\n", " \tnode counts: max: 21, min: 21, mean: 21.00\n", " \tconn counts: max: 20, min: 20, mean: 20.00\n", " \tspecies: 1, [10000]\n", " \tfitness: valid cnt: 10000, max: 10124.0000, min: 44.0000, mean: 1758.1263, std: 1212.6823\n", "Generation: 2.0, Cost time: 4636.33ms\n", " \tnode counts: max: 22, min: 21, mean: 21.03\n", " \tconn counts: max: 22, min: 20, mean: 20.05\n", " \tspecies: 1, [10000]\n", " \tfitness: valid cnt: 10000, max: 11000.0000, min: 48.0000, mean: 1870.1300, std: 1263.3086\n", "Generation: 3.0, Cost time: 6271.12ms\n", " \tnode counts: max: 23, min: 21, mean: 21.03\n", " \tconn counts: max: 22, min: 20, mean: 20.05\n", " \tspecies: 1, [10000]\n", " \tfitness: valid cnt: 10000, max: 14624.0000, min: 28.0000, mean: 1943.9924, std: 1293.7146\n", "\n", "Fitness limit reached!\n" ] } ], "source": [ "import jax.numpy as jnp\n", "\n", "from pipeline import Pipeline\n", "from algorithm.neat import *\n", "\n", "from problem.rl_env.jumanji.jumanji_2048 import Jumanji_2048\n", "from utils import Act, Agg\n", "\n", "if __name__ == \"__main__\":\n", " pipeline = Pipeline(\n", " algorithm=NEAT(\n", " species=DefaultSpecies(\n", " genome=DefaultGenome(\n", " num_inputs=16,\n", " num_outputs=4,\n", " max_nodes=100,\n", " max_conns=1000,\n", " node_gene=DefaultNodeGene(\n", " activation_default=Act.sigmoid,\n", " activation_options=(Act.sigmoid, Act.relu, Act.tanh, Act.identity, Act.inv),\n", " aggregation_default=Agg.sum,\n", " aggregation_options=(Agg.sum, Agg.mean, Agg.max, Agg.product),\n", " ),\n", " mutation=DefaultMutation(\n", " node_add=0.03,\n", " conn_add=0.03,\n", " )\n", " ),\n", " pop_size=10000,\n", " species_size=100,\n", " survival_threshold=0.01,\n", " ),\n", " ),\n", " problem=Jumanji_2048(\n", " max_step=1000,\n", " ),\n", " generation_limit=10000,\n", " fitness_target=13000,\n", " )\n", "\n", " # initialize state\n", " state = pipeline.setup()\n", " # print(state)\n", " # run until terminate\n", " state, best = pipeline.auto_run(state)" ] }, { "cell_type": "code", "execution_count": 3, "outputs": [], "source": [ "genome = pipeline.algorithm.genome" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-06-05T05:08:14.332101Z", "start_time": "2024-06-05T05:08:14.324101300Z" } }, "id": "a0915ecf8179f347" }, { "cell_type": "code", "execution_count": 4, "outputs": [], "source": [ "transformed = genome.transform(state, *best)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-06-05T05:08:49.132030500Z", "start_time": "2024-06-05T05:08:48.495809200Z" } }, "id": "cd1fa65e8a9d6e13" }, { "cell_type": "code", "execution_count": 5, "outputs": [], "source": [ "def policy(board):\n", " action_scores = genome.forward(state, transformed, board)\n", " return action_scores" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-06-05T05:09:32.355055100Z", "start_time": "2024-06-05T05:09:32.350057Z" } }, "id": "61bc1895af304651" }, { "cell_type": "code", "execution_count": 11, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 1, 0, 0],\n", " [0, 0, 0, 0],\n", " [0, 0, 0, 0],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(2, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 0, 2],\n", " [0, 1, 0, 0],\n", " [0, 0, 0, 0],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(4, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 1, 2],\n", " [0, 0, 0, 0],\n", " [0, 0, 0, 0],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(4, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 1, 2, 2],\n", " [0, 0, 0, 0],\n", " [0, 0, 0, 0],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(4, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 3],\n", " [0, 0, 0, 0],\n", " [2, 0, 0, 0],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 0, 1, 3],\n", " [0, 0, 0, 0],\n", " [0, 0, 0, 1],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 0, 1, 3],\n", " [0, 0, 0, 1],\n", " [0, 0, 0, 0],\n", " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 1, 3],\n", " [0, 0, 0, 1],\n", " [0, 0, 0, 0],\n", " [0, 0, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 1, 1, 3],\n", " [0, 0, 2, 1],\n", " [0, 1, 0, 0],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 2, 1, 3],\n", " [1, 0, 2, 1],\n", " [0, 0, 0, 0],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 3, 1, 3],\n", " [0, 1, 2, 1],\n", " [0, 1, 0, 0],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 3, 1, 3],\n", " [0, 2, 2, 1],\n", " [1, 0, 0, 0],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 3],\n", " [0, 2, 2, 1],\n", " [0, 0, 0, 0],\n", " [0, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 3],\n", " [0, 2, 2, 1],\n", " [0, 0, 1, 0],\n", " [0, 0, 2, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 3],\n", " [0, 0, 3, 1],\n", " [0, 0, 1, 1],\n", " [0, 0, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[1, 3, 1, 3],\n", " [0, 0, 3, 2],\n", " [0, 0, 1, 2],\n", " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 3],\n", " [0, 0, 3, 3],\n", " [0, 1, 1, 0],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(8, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", " [0, 1, 3, 1],\n", " [0, 0, 1, 0],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", " [0, 1, 3, 1],\n", " [0, 0, 0, 1],\n", " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", " [0, 2, 3, 2],\n", " [0, 0, 0, 0],\n", " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", " [0, 2, 3, 2],\n", " [0, 0, 0, 1],\n", " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", " [1, 2, 3, 2],\n", " [0, 0, 0, 1],\n", " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", " [1, 2, 3, 2],\n", " [0, 0, 0, 2],\n", " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", " [1, 2, 3, 3],\n", " [0, 0, 0, 1],\n", " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", " [1, 2, 3, 3],\n", " [0, 1, 0, 1],\n", " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", " [1, 2, 3, 3],\n", " [0, 2, 1, 1],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 3, 1, 4],\n", " [1, 3, 3, 3],\n", " [0, 0, 1, 1],\n", " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 4],\n", " [1, 0, 3, 3],\n", " [0, 1, 1, 2],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 4],\n", " [1, 1, 3, 3],\n", " [0, 0, 1, 2],\n", " [0, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 4],\n", " [1, 1, 3, 3],\n", " [0, 1, 2, 2],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 4],\n", " [1, 2, 3, 3],\n", " [0, 1, 2, 2],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 4],\n", " [1, 1, 2, 4],\n", " [0, 0, 1, 3],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(16, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", " [1, 1, 2, 3],\n", " [0, 1, 1, 0],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", " [1, 2, 2, 3],\n", " [0, 0, 1, 0],\n", " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", " [1, 2, 2, 3],\n", " [0, 1, 1, 0],\n", " [0, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", " [1, 2, 2, 3],\n", " [0, 1, 2, 1],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", " [1, 2, 3, 3],\n", " [0, 1, 1, 1],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", " [1, 1, 2, 4],\n", " [0, 0, 1, 2],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", " [0, 2, 2, 4],\n", " [0, 0, 1, 2],\n", " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", " [1, 2, 2, 4],\n", " [0, 0, 1, 2],\n", " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 4, 1, 5],\n", " [2, 2, 2, 4],\n", " [1, 0, 1, 2],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", " [1, 2, 2, 4],\n", " [0, 0, 1, 2],\n", " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", " [1, 2, 2, 4],\n", " [0, 1, 1, 2],\n", " [0, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", " [1, 2, 2, 4],\n", " [0, 1, 2, 2],\n", " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", " [2, 2, 3, 4],\n", " [1, 1, 0, 2],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", " [0, 3, 3, 4],\n", " [0, 0, 2, 2],\n", " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", " [1, 3, 3, 4],\n", " [0, 0, 2, 2],\n", " [0, 0, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", " [0, 1, 4, 4],\n", " [0, 1, 0, 3],\n", " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", " [0, 2, 4, 4],\n", " [0, 1, 0, 3],\n", " [0, 0, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 5],\n", " [0, 0, 2, 5],\n", " [1, 0, 1, 3],\n", " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(32, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(64., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [1, 0, 2, 3],\n", " [0, 1, 1, 1],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [1, 1, 2, 3],\n", " [0, 0, 1, 1],\n", " [2, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [1, 1, 2, 3],\n", " [2, 0, 1, 1],\n", " [0, 2, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [1, 1, 2, 3],\n", " [2, 2, 1, 1],\n", " [0, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [1, 1, 2, 3],\n", " [2, 2, 2, 1],\n", " [0, 0, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [1, 1, 3, 3],\n", " [2, 2, 2, 1],\n", " [0, 0, 0, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(28., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [0, 1, 2, 4],\n", " [0, 2, 3, 1],\n", " [0, 0, 0, 2]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [1, 2, 4, 0],\n", " [2, 3, 1, 1],\n", " [2, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [1, 2, 4, 1],\n", " [3, 3, 1, 0],\n", " [0, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [1, 2, 4, 2],\n", " [3, 3, 1, 0],\n", " [0, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [1, 2, 4, 2],\n", " [3, 3, 2, 0],\n", " [0, 0, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [1, 2, 4, 2],\n", " [0, 0, 4, 2],\n", " [0, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(40., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [1, 2, 5, 3],\n", " [0, 1, 1, 1],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [1, 2, 5, 3],\n", " [0, 0, 1, 2],\n", " [0, 0, 0, 1]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [1, 2, 5, 3],\n", " [1, 2, 1, 0],\n", " [1, 0, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [2, 3, 5, 3],\n", " [1, 1, 1, 0],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [2, 3, 5, 3],\n", " [0, 0, 1, 2],\n", " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [2, 3, 5, 3],\n", " [0, 1, 1, 2],\n", " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 4, 1, 6],\n", " [2, 3, 5, 3],\n", " [1, 2, 1, 2],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, False, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 2, 0, 0],\n", " [3, 4, 1, 6],\n", " [2, 3, 5, 3],\n", " [1, 2, 1, 2]], dtype=int32), action_mask=Array([ True, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 1, 6],\n", " [2, 4, 5, 3],\n", " [1, 3, 1, 2],\n", " [0, 2, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 1, 6],\n", " [2, 4, 5, 3],\n", " [1, 3, 1, 2],\n", " [1, 0, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 1, 6],\n", " [2, 4, 5, 3],\n", " [2, 3, 1, 2],\n", " [0, 1, 2, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 2, 1, 6],\n", " [3, 4, 5, 3],\n", " [1, 3, 1, 2],\n", " [0, 1, 2, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [1, 4, 5, 3],\n", " [0, 3, 1, 2],\n", " [2, 1, 2, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [1, 4, 5, 3],\n", " [2, 3, 1, 2],\n", " [1, 1, 2, 1]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [1, 4, 5, 3],\n", " [2, 3, 1, 2],\n", " [2, 2, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [1, 4, 5, 3],\n", " [3, 3, 2, 2],\n", " [1, 2, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [1, 4, 5, 3],\n", " [0, 1, 4, 3],\n", " [0, 1, 2, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [1, 4, 5, 4],\n", " [1, 2, 4, 1],\n", " [0, 0, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [2, 4, 5, 4],\n", " [0, 2, 4, 1],\n", " [0, 0, 2, 1]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [2, 4, 5, 4],\n", " [0, 2, 4, 2],\n", " [0, 0, 2, 1]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [2, 4, 5, 4],\n", " [2, 4, 2, 0],\n", " [2, 1, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(40., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [3, 5, 5, 4],\n", " [2, 1, 2, 0],\n", " [0, 1, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [3, 5, 5, 4],\n", " [2, 2, 2, 0],\n", " [0, 0, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [3, 5, 5, 4],\n", " [2, 2, 2, 1],\n", " [0, 2, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [3, 5, 5, 4],\n", " [2, 3, 2, 1],\n", " [0, 1, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(68., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [0, 3, 6, 4],\n", " [2, 3, 2, 1],\n", " [0, 1, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [2, 4, 6, 4],\n", " [1, 1, 2, 1],\n", " [0, 0, 0, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [2, 4, 6, 4],\n", " [0, 2, 2, 1],\n", " [0, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [2, 4, 6, 4],\n", " [1, 0, 3, 1],\n", " [0, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [2, 4, 6, 4],\n", " [0, 1, 3, 1],\n", " [0, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [2, 4, 6, 4],\n", " [0, 2, 3, 1],\n", " [0, 2, 1, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [2, 4, 6, 4],\n", " [0, 3, 3, 1],\n", " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [2, 4, 6, 4],\n", " [1, 3, 3, 1],\n", " [0, 1, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [2, 4, 6, 4],\n", " [2, 1, 4, 1],\n", " [0, 0, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [3, 4, 6, 4],\n", " [1, 1, 4, 1],\n", " [0, 0, 2, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [3, 4, 6, 4],\n", " [0, 2, 4, 1],\n", " [0, 0, 1, 3]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [3, 4, 6, 4],\n", " [2, 4, 1, 0],\n", " [1, 3, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [3, 5, 6, 4],\n", " [2, 3, 1, 1],\n", " [1, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [3, 5, 6, 4],\n", " [2, 3, 1, 2],\n", " [1, 0, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [3, 5, 6, 4],\n", " [2, 3, 1, 2],\n", " [0, 0, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [3, 5, 6, 4],\n", " [2, 3, 1, 3],\n", " [0, 0, 2, 1]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [3, 5, 6, 4],\n", " [2, 3, 1, 3],\n", " [2, 1, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [3, 5, 6, 4],\n", " [3, 3, 2, 3],\n", " [0, 1, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 2, 1, 6],\n", " [4, 5, 6, 4],\n", " [1, 3, 3, 3],\n", " [0, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 2, 1, 6],\n", " [1, 5, 6, 4],\n", " [0, 3, 3, 3],\n", " [2, 1, 0, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 2, 1, 6],\n", " [1, 5, 6, 4],\n", " [2, 3, 3, 3],\n", " [0, 1, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 2, 1, 6],\n", " [1, 5, 6, 4],\n", " [0, 2, 3, 4],\n", " [1, 0, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 2, 1, 6],\n", " [2, 5, 6, 5],\n", " [1, 2, 3, 2],\n", " [0, 0, 0, 0]], dtype=int32), action_mask=Array([False, False, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 0, 1, 0],\n", " [5, 2, 1, 6],\n", " [2, 5, 6, 5],\n", " [1, 2, 3, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 2, 2, 6],\n", " [2, 5, 6, 5],\n", " [1, 2, 3, 2],\n", " [0, 1, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[0, 5, 3, 6],\n", " [2, 5, 6, 5],\n", " [1, 2, 3, 2],\n", " [0, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(64., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 6, 3, 6],\n", " [1, 2, 6, 5],\n", " [0, 1, 3, 2],\n", " [0, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 6, 3, 6],\n", " [1, 2, 6, 5],\n", " [0, 2, 3, 2],\n", " [0, 0, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 6, 3, 6],\n", " [1, 3, 6, 5],\n", " [0, 0, 3, 2],\n", " [1, 0, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[2, 6, 3, 6],\n", " [2, 3, 6, 5],\n", " [1, 0, 3, 2],\n", " [0, 0, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", " [1, 3, 6, 5],\n", " [0, 0, 3, 2],\n", " [1, 0, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", " [2, 3, 6, 5],\n", " [0, 1, 3, 2],\n", " [0, 0, 1, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", " [2, 3, 6, 5],\n", " [0, 1, 3, 2],\n", " [0, 0, 1, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", " [2, 3, 6, 5],\n", " [1, 1, 3, 3],\n", " [0, 0, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", " [2, 3, 6, 5],\n", " [0, 0, 2, 4],\n", " [0, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", " [2, 3, 6, 5],\n", " [1, 1, 2, 4],\n", " [0, 0, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", " [2, 3, 6, 5],\n", " [0, 2, 2, 4],\n", " [1, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", " [2, 3, 6, 5],\n", " [1, 2, 2, 4],\n", " [0, 1, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", " [2, 3, 6, 5],\n", " [2, 1, 3, 4],\n", " [0, 0, 0, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[3, 6, 3, 6],\n", " [3, 3, 6, 5],\n", " [0, 1, 3, 4],\n", " [0, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [0, 3, 6, 5],\n", " [0, 1, 3, 4],\n", " [0, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [0, 3, 6, 5],\n", " [0, 2, 3, 4],\n", " [0, 2, 1, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [0, 3, 6, 5],\n", " [0, 3, 3, 4],\n", " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [1, 4, 6, 5],\n", " [0, 2, 3, 4],\n", " [0, 0, 1, 2]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [1, 4, 6, 5],\n", " [2, 3, 4, 0],\n", " [1, 2, 2, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [1, 4, 6, 5],\n", " [0, 2, 3, 4],\n", " [1, 0, 1, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [2, 4, 6, 5],\n", " [1, 2, 3, 4],\n", " [0, 0, 1, 3]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [2, 4, 6, 5],\n", " [1, 2, 3, 4],\n", " [1, 3, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [2, 4, 6, 5],\n", " [2, 2, 3, 4],\n", " [1, 3, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [3, 4, 6, 5],\n", " [1, 2, 3, 4],\n", " [1, 3, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, False], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [3, 4, 6, 5],\n", " [2, 2, 3, 4],\n", " [0, 3, 1, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [3, 4, 6, 5],\n", " [0, 3, 3, 4],\n", " [1, 0, 3, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [3, 4, 6, 5],\n", " [1, 3, 4, 4],\n", " [0, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [3, 4, 6, 5],\n", " [0, 1, 3, 5],\n", " [0, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(68., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 6],\n", " [3, 4, 6, 6],\n", " [0, 2, 3, 2],\n", " [2, 0, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(64, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(128., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [2, 2, 3, 0],\n", " [0, 1, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [0, 2, 3, 3],\n", " [0, 0, 0, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [0, 0, 2, 4],\n", " [0, 1, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [0, 1, 2, 4],\n", " [0, 1, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [0, 2, 2, 4],\n", " [0, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [0, 0, 3, 4],\n", " [0, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [0, 1, 3, 4],\n", " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [1, 1, 3, 4],\n", " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [2, 1, 3, 4],\n", " [1, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [2, 1, 3, 4],\n", " [0, 1, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [2, 2, 3, 4],\n", " [0, 1, 2, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [1, 3, 3, 4],\n", " [0, 0, 1, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [0, 1, 4, 4],\n", " [0, 1, 1, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [0, 2, 4, 4],\n", " [1, 0, 1, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [1, 2, 4, 4],\n", " [0, 1, 1, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [2, 1, 2, 5],\n", " [0, 0, 2, 3]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [2, 1, 3, 5],\n", " [1, 0, 0, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [2, 1, 3, 5],\n", " [0, 1, 1, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [2, 2, 3, 5],\n", " [0, 1, 1, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [1, 3, 3, 5],\n", " [0, 0, 2, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [0, 1, 4, 5],\n", " [0, 2, 2, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [0, 1, 4, 5],\n", " [1, 0, 3, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [1, 1, 4, 5],\n", " [0, 1, 3, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [1, 2, 4, 5],\n", " [0, 1, 3, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [1, 2, 4, 5],\n", " [0, 1, 1, 4]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [1, 2, 4, 5],\n", " [0, 2, 2, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [1, 3, 4, 5],\n", " [1, 0, 2, 4]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [2, 3, 4, 5],\n", " [0, 1, 2, 4]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [2, 3, 4, 5],\n", " [1, 2, 4, 1]], dtype=int32), action_mask=Array([ True, False, True, False], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [2, 3, 5, 5],\n", " [1, 2, 1, 1]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(68., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 6, 2],\n", " [2, 3, 6, 1],\n", " [1, 2, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(128., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 7, 2],\n", " [2, 3, 2, 1],\n", " [1, 2, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 7, 2],\n", " [2, 3, 3, 1],\n", " [1, 2, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 7, 2],\n", " [2, 3, 3, 2],\n", " [1, 2, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 7, 3],\n", " [2, 3, 3, 1],\n", " [1, 2, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 7, 3],\n", " [2, 3, 3, 2],\n", " [1, 2, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 7, 3],\n", " [0, 2, 4, 2],\n", " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 7, 3],\n", " [1, 2, 4, 2],\n", " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 7, 3],\n", " [2, 2, 4, 2],\n", " [2, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [3, 4, 7, 3],\n", " [3, 2, 4, 2],\n", " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[4, 6, 3, 7],\n", " [4, 4, 7, 3],\n", " [1, 2, 4, 2],\n", " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [2, 4, 7, 3],\n", " [0, 2, 4, 2],\n", " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [2, 4, 7, 3],\n", " [1, 2, 4, 2],\n", " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [2, 4, 7, 3],\n", " [2, 2, 4, 2],\n", " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [3, 4, 7, 3],\n", " [1, 2, 4, 2],\n", " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [3, 4, 7, 3],\n", " [2, 2, 4, 2],\n", " [1, 1, 2, 1]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [3, 4, 7, 3],\n", " [3, 4, 2, 0],\n", " [2, 2, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(48., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 3],\n", " [2, 2, 2, 1],\n", " [1, 0, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 3],\n", " [1, 2, 3, 1],\n", " [0, 0, 0, 2]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 3],\n", " [1, 2, 3, 1],\n", " [2, 0, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 3],\n", " [1, 2, 3, 2],\n", " [2, 0, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 3],\n", " [1, 2, 3, 2],\n", " [1, 0, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 3],\n", " [2, 2, 3, 2],\n", " [1, 0, 2, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 3],\n", " [0, 3, 3, 2],\n", " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 3],\n", " [1, 3, 3, 2],\n", " [1, 1, 2, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 3],\n", " [2, 3, 3, 2],\n", " [1, 1, 2, 1]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(20., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 3],\n", " [2, 4, 2, 1],\n", " [2, 2, 1, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 3],\n", " [3, 4, 2, 1],\n", " [0, 2, 1, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 3],\n", " [3, 4, 2, 2],\n", " [0, 2, 1, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 3],\n", " [1, 3, 4, 3],\n", " [0, 0, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 4],\n", " [1, 3, 4, 2],\n", " [0, 0, 2, 1]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 4],\n", " [1, 3, 4, 2],\n", " [2, 1, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 4],\n", " [1, 3, 4, 2],\n", " [1, 0, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 4],\n", " [2, 3, 4, 3],\n", " [1, 0, 2, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 4],\n", " [2, 3, 4, 3],\n", " [1, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 4],\n", " [2, 3, 4, 3],\n", " [1, 0, 2, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 4],\n", " [2, 3, 4, 3],\n", " [0, 1, 1, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 4],\n", " [2, 3, 4, 4],\n", " [0, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(32., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 5],\n", " [2, 3, 4, 2],\n", " [0, 1, 1, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 5],\n", " [2, 3, 4, 2],\n", " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 5],\n", " [2, 3, 4, 3],\n", " [1, 0, 1, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 5],\n", " [2, 3, 4, 3],\n", " [1, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 5],\n", " [2, 3, 4, 3],\n", " [0, 1, 2, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 5],\n", " [2, 3, 4, 3],\n", " [1, 0, 1, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 5],\n", " [2, 3, 4, 4],\n", " [1, 0, 1, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 5],\n", " [1, 2, 3, 5],\n", " [0, 0, 1, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(64., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [1, 2, 3, 2],\n", " [0, 1, 1, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [1, 2, 3, 2],\n", " [1, 0, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [2, 2, 3, 3],\n", " [0, 0, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(24., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [0, 0, 3, 4],\n", " [0, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [0, 1, 3, 4],\n", " [0, 1, 0, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [0, 2, 3, 4],\n", " [0, 1, 0, 1]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [0, 2, 3, 4],\n", " [0, 0, 1, 2]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [2, 3, 4, 1],\n", " [1, 2, 0, 0]], dtype=int32), action_mask=Array([False, True, True, False], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [2, 3, 4, 1],\n", " [0, 1, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [2, 3, 4, 1],\n", " [0, 1, 2, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [2, 3, 4, 1],\n", " [0, 1, 1, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [2, 3, 4, 1],\n", " [1, 0, 2, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [2, 3, 4, 1],\n", " [1, 1, 2, 3]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [2, 3, 4, 1],\n", " [2, 2, 3, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [3, 3, 4, 2],\n", " [1, 2, 3, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [0, 4, 4, 2],\n", " [1, 1, 2, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [1, 4, 4, 2],\n", " [1, 1, 2, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [2, 4, 4, 2],\n", " [1, 1, 2, 3]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 5, 7, 6],\n", " [2, 5, 2, 0],\n", " [2, 2, 3, 1]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(72., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 6, 3, 7],\n", " [4, 6, 7, 6],\n", " [3, 2, 2, 1],\n", " [1, 0, 3, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(128., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 2, 7, 6],\n", " [3, 0, 2, 1],\n", " [1, 0, 3, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 2, 7, 6],\n", " [2, 3, 2, 1],\n", " [0, 1, 3, 2]], dtype=int32), action_mask=Array([False, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 2, 7, 6],\n", " [2, 3, 2, 1],\n", " [1, 3, 2, 1]], dtype=int32), action_mask=Array([ True, False, True, False], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(28., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 2, 7, 6],\n", " [2, 4, 3, 2],\n", " [1, 1, 0, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 2, 7, 6],\n", " [2, 4, 3, 2],\n", " [0, 0, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 2, 7, 6],\n", " [2, 4, 3, 3],\n", " [1, 0, 2, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 2, 7, 6],\n", " [2, 2, 4, 4],\n", " [0, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 3, 7, 6],\n", " [2, 0, 4, 4],\n", " [1, 0, 1, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 3, 7, 6],\n", " [0, 0, 2, 5],\n", " [0, 2, 2, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 3, 7, 6],\n", " [0, 2, 3, 5],\n", " [0, 2, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 3, 7, 6],\n", " [0, 3, 3, 5],\n", " [0, 1, 0, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 4, 7, 6],\n", " [1, 1, 3, 5],\n", " [0, 0, 0, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(36., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [0, 5, 7, 6],\n", " [0, 2, 3, 5],\n", " [0, 1, 0, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [0, 5, 7, 6],\n", " [1, 2, 3, 5],\n", " [0, 0, 1, 2]], dtype=int32), action_mask=Array([ True, False, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [1, 5, 7, 6],\n", " [0, 2, 3, 5],\n", " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [2, 5, 7, 6],\n", " [0, 2, 3, 5],\n", " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [2, 5, 7, 6],\n", " [1, 2, 3, 5],\n", " [1, 0, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [2, 5, 7, 6],\n", " [2, 2, 3, 5],\n", " [0, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [3, 5, 7, 6],\n", " [0, 2, 3, 5],\n", " [1, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(0., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [3, 5, 7, 6],\n", " [1, 2, 3, 5],\n", " [1, 1, 1, 2]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [3, 5, 7, 6],\n", " [2, 2, 3, 5],\n", " [1, 1, 1, 2]], dtype=int32), action_mask=Array([False, True, False, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(12., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [3, 5, 7, 6],\n", " [3, 3, 5, 1],\n", " [2, 1, 2, 0]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 5, 7, 6],\n", " [2, 3, 5, 1],\n", " [1, 1, 2, 0]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 5, 7, 6],\n", " [2, 3, 5, 1],\n", " [1, 0, 2, 2]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 5, 7, 6],\n", " [2, 3, 5, 1],\n", " [0, 1, 1, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(4., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 5, 7, 6],\n", " [2, 3, 5, 1],\n", " [2, 0, 2, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(8., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 5, 7, 6],\n", " [3, 3, 5, 1],\n", " [1, 0, 2, 3]], dtype=int32), action_mask=Array([False, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(1, dtype=int8), reward=Array(16., dtype=float32), discount=Array(1., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 5, 7, 6],\n", " [0, 4, 5, 1],\n", " [1, 1, 2, 3]], dtype=int32), action_mask=Array([ True, True, True, True], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "TimeStep(step_type=Array(2, dtype=int8), reward=Array(0., dtype=float32), discount=Array(0., dtype=float32), observation=Observation(board=Array([[5, 7, 3, 7],\n", " [4, 5, 7, 6],\n", " [1, 4, 5, 1],\n", " [2, 1, 2, 3]], dtype=int32), action_mask=Array([False, False, False, False], dtype=bool)), extras={'highest_tile': Array(128, dtype=int32)})\n", "3004.0\n" ] } ], "source": [ "import jax, jumanji\n", "\n", "env = jumanji.make(\"Game2048-v1\")\n", "key = jax.random.PRNGKey(48)\n", "jit_reset = jax.jit(env.reset)\n", "jit_step = jax.jit(env.step)\n", "state, timestep = jax.jit(env.reset)(key)\n", "jit_policy = jax.jit(policy)\n", "total_reward = 0\n", "while True:\n", " board, action_mask = timestep[\"observation\"]\n", " action = jit_policy(timestep[\"observation\"][0].reshape(-1))\n", " score_with_mask = jnp.where(action_mask, action, -jnp.inf)\n", " action = jnp.argmax(score_with_mask)\n", " state, timestep = jit_step(state, action)\n", " done = jnp.all(~timestep[\"observation\"][1])\n", " print(timestep)\n", " total_reward += timestep[\"reward\"]\n", " if done:\n", " break\n", "print(total_reward)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-06-05T05:15:43.041491500Z", "start_time": "2024-06-05T05:15:37.325953600Z" } }, "id": "f166e09c5be1a8fb" }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [], "metadata": { "collapsed": false }, "id": "187326d08ac1eeb4" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }