FrozenLake 4x4 (Action History)¶
[61]:
import gymnasium as gym
[62]:
from gymcts.gymcts_agent import GymctsAgent
from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
[63]:
from gymcts.logger import log
[64]:
# set log level to 20 (INFO)
# set log level to 10 (DEBUG) to see more detailed information
log.setLevel(20)
[65]:
if __name__ == '__main__':
# 0. create the environment
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="ansi")
env.reset()
# 1. wrap the environment with the wrapper
env = ActionHistoryMCTSGymEnvWrapper(env)
# 2. create the agent
agent = GymctsAgent(
env=env,
clear_mcts_tree_after_step=False,
render_tree_after_step=True,
number_of_simulations_per_step=50,
exclude_unvisited_nodes_from_render=True
)
# 3. solve the environment
actions = agent.solve()
# 4. render the environment solution in the terminal
print(env.render())
for a in actions:
obs, rew, term, trun, info = env.step(a)
print(env.render())
# 5. print the solution
# read the solution from the info provided by the RecordEpisodeStatistics wrapper
# (that DeterministicSoloMCTSGymEnvWrapper uses internally)
episode_length = info["episode"]["l"]
episode_return = info["episode"]["r"]
if episode_return == 1.0:
print(f"Environment solved in {episode_length} steps.")
else:
print(f"Environment not solved in {episode_length} steps.")
(N=50, Q_v=0.00, best=0.00)
├── (a=0, N=13, Q_v=0.00, best=0.00, ubc=0.39)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=12, Q_v=0.00, best=0.00, ubc=0.40)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=2, N=12, Q_v=0.00, best=0.00, ubc=0.40)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
└── (a=3, N=12, Q_v=0.00, best=0.00, ubc=0.40)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
[16:45:51] INFO selected action 0 after 50 simulations.
INFO current action list: [0]
(a=0, N=63, Q_v=0.00, best=0.00, ubc=0.19)
├── (a=0, N=16, Q_v=0.00, best=0.00, ubc=0.36)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=16, Q_v=0.00, best=0.00, ubc=0.36)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=15, Q_v=0.00, best=0.00, ubc=0.37)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.58)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.58)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=15, Q_v=0.00, best=0.00, ubc=0.37)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.58)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.58)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
INFO selected action 0 after 50 simulations.
INFO current action list: [0, 0]
(a=0, N=66, Q_v=0.00, best=0.00, ubc=0.19)
├── (a=0, N=17, Q_v=0.00, best=0.00, ubc=0.35)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=16, Q_v=0.00, best=0.00, ubc=0.36)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=16, Q_v=0.00, best=0.00, ubc=0.36)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=16, Q_v=0.00, best=0.00, ubc=0.36)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
INFO selected action 0 after 50 simulations.
INFO current action list: [0, 0, 0]
(a=0, N=67, Q_v=0.00, best=0.00, ubc=0.19)
├── (a=0, N=17, Q_v=0.00, best=0.00, ubc=0.35)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=17, Q_v=0.00, best=0.00, ubc=0.35)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=16, Q_v=0.00, best=0.00, ubc=0.36)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=16, Q_v=0.00, best=0.00, ubc=0.36)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
INFO selected action 0 after 50 simulations.
INFO current action list: [0, 0, 0, 0]
(a=0, N=67, Q_v=0.07, best=1.00, ubc=0.26)
├── (a=0, N=12, Q_v=0.00, best=0.00, ubc=0.42)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=1, N=12, Q_v=0.00, best=0.00, ubc=0.42)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
│ └── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=2, N=12, Q_v=0.00, best=0.00, ubc=0.42)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
└── (a=3, N=30, Q_v=0.17, best=1.00, ubc=0.43)
├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.58)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.65)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=16, Q_v=0.31, best=1.00, ubc=0.64)
│ ├── (a=0, N=4, Q_v=0.25, best=1.00, ubc=0.84)
│ ├── (a=1, N=3, Q_v=0.33, best=1.00, ubc=1.01)
│ ├── (a=2, N=5, Q_v=0.40, best=1.00, ubc=0.93)
│ └── (a=3, N=3, Q_v=0.33, best=1.00, ubc=1.01)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.65)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
INFO selected action 3 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 3]
(a=3, N=80, Q_v=0.11, best=1.00, ubc=0.28)
├── (a=0, N=12, Q_v=0.00, best=0.00, ubc=0.43)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=1, N=11, Q_v=0.00, best=0.00, ubc=0.45)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.63)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.63)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.77)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.77)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=2, N=30, Q_v=0.17, best=1.00, ubc=0.44)
│ ├── (a=0, N=7, Q_v=0.14, best=1.00, ubc=0.64)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.70)
│ │ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.70)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.99)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.99)
│ ├── (a=1, N=7, Q_v=0.14, best=1.00, ubc=0.64)
│ ├── (a=2, N=9, Q_v=0.22, best=1.00, ubc=0.66)
│ │ ├── (a=0, N=3, Q_v=0.33, best=1.00, ubc=0.94)
│ │ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.74)
│ │ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.05)
│ └── (a=3, N=6, Q_v=0.17, best=1.00, ubc=0.70)
│ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.67)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.95)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.95)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.95)
└── (a=3, N=26, Q_v=0.15, best=1.00, ubc=0.44)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=13, Q_v=0.31, best=1.00, ubc=0.66)
│ ├── (a=0, N=3, Q_v=0.33, best=1.00, ubc=0.99)
│ ├── (a=1, N=3, Q_v=0.33, best=1.00, ubc=0.99)
│ ├── (a=2, N=3, Q_v=0.33, best=1.00, ubc=0.99)
│ └── (a=3, N=3, Q_v=0.33, best=1.00, ubc=0.99)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.64)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
INFO selected action 2 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 3, 2]
(a=2, N=80, Q_v=0.11, best=1.00, ubc=0.29)
├── (a=0, N=16, Q_v=0.06, best=1.00, ubc=0.43)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=15, Q_v=0.07, best=1.00, ubc=0.45)
├── (a=2, N=33, Q_v=0.18, best=1.00, ubc=0.44)
│ ├── (a=0, N=17, Q_v=0.29, best=1.00, ubc=0.61)
│ │ ├── (a=0, N=10, Q_v=0.40, best=1.00, ubc=0.78)
│ │ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.84)
│ │ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.84)
│ │ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.84)
│ ├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=5, Q_v=0.00, best=0.00, ubc=0.59)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=15, Q_v=0.07, best=1.00, ubc=0.45)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.58)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.58)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
INFO selected action 2 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 3, 2, 2]
(a=2, N=83, Q_v=0.12, best=1.00, ubc=0.29)
├── (a=0, N=40, Q_v=0.23, best=1.00, ubc=0.46)
│ ├── (a=0, N=16, Q_v=0.25, best=1.00, ubc=0.59)
│ │ ├── (a=0, N=4, Q_v=0.25, best=1.00, ubc=0.84)
│ │ ├── (a=1, N=4, Q_v=0.25, best=1.00, ubc=0.84)
│ │ ├── (a=2, N=4, Q_v=0.25, best=1.00, ubc=0.84)
│ │ └── (a=3, N=3, Q_v=0.33, best=1.00, ubc=1.01)
│ ├── (a=1, N=6, Q_v=0.00, best=0.00, ubc=0.55)
│ ├── (a=2, N=6, Q_v=0.00, best=0.00, ubc=0.55)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.67)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.95)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.95)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.95)
│ └── (a=3, N=11, Q_v=0.36, best=1.00, ubc=0.77)
│ ├── (a=0, N=7, Q_v=0.57, best=1.00, ubc=0.99)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=1.09)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=1.09)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.09)
├── (a=1, N=14, Q_v=0.00, best=0.00, ubc=0.40)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.57)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=14, Q_v=0.00, best=0.00, ubc=0.40)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.57)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=14, Q_v=0.00, best=0.00, ubc=0.40)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.57)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.66)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
INFO selected action 0 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 3, 2, 2, 0]
(a=0, N=90, Q_v=0.10, best=1.00, ubc=0.26)
├── (a=0, N=31, Q_v=0.13, best=1.00, ubc=0.40)
│ ├── (a=0, N=8, Q_v=0.13, best=1.00, ubc=0.59)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
│ ├── (a=1, N=8, Q_v=0.13, best=1.00, ubc=0.59)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
│ ├── (a=2, N=7, Q_v=0.14, best=1.00, ubc=0.64)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.70)
│ │ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.70)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.99)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.99)
│ └── (a=3, N=7, Q_v=0.14, best=1.00, ubc=0.64)
│ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.70)
│ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.70)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.99)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.99)
├── (a=1, N=14, Q_v=0.00, best=0.00, ubc=0.40)
├── (a=2, N=14, Q_v=0.00, best=0.00, ubc=0.40)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.57)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=30, Q_v=0.13, best=1.00, ubc=0.41)
├── (a=0, N=14, Q_v=0.29, best=1.00, ubc=0.63)
│ ├── (a=0, N=4, Q_v=0.25, best=1.00, ubc=0.82)
│ ├── (a=1, N=3, Q_v=0.33, best=1.00, ubc=1.00)
│ ├── (a=2, N=3, Q_v=0.33, best=1.00, ubc=1.00)
│ └── (a=3, N=3, Q_v=0.33, best=1.00, ubc=1.00)
├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.58)
├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.58)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=5, Q_v=0.00, best=0.00, ubc=0.58)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
INFO selected action 3 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3]
(a=3, N=80, Q_v=0.10, best=1.00, ubc=0.28)
├── (a=0, N=40, Q_v=0.20, best=1.00, ubc=0.43)
│ ├── (a=0, N=8, Q_v=0.13, best=1.00, ubc=0.61)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
│ ├── (a=1, N=8, Q_v=0.13, best=1.00, ubc=0.61)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
│ ├── (a=2, N=7, Q_v=0.14, best=1.00, ubc=0.66)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.70)
│ │ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.70)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.99)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.99)
│ └── (a=3, N=16, Q_v=0.31, best=1.00, ubc=0.65)
│ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=9, Q_v=0.44, best=1.00, ubc=0.84)
│ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=13, Q_v=0.00, best=0.00, ubc=0.41)
├── (a=2, N=13, Q_v=0.00, best=0.00, ubc=0.41)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=13, Q_v=0.00, best=0.00, ubc=0.41)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.65)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.65)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
INFO selected action 0 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0]
(a=0, N=90, Q_v=0.13, best=1.00, ubc=0.30)
├── (a=0, N=16, Q_v=0.06, best=1.00, ubc=0.44)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=15, Q_v=0.07, best=1.00, ubc=0.45)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.58)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.58)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=29, Q_v=0.17, best=1.00, ubc=0.45)
│ ├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.58)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.58)
│ ├── (a=2, N=14, Q_v=0.29, best=1.00, ubc=0.63)
│ │ ├── (a=0, N=4, Q_v=0.25, best=1.00, ubc=0.82)
│ │ ├── (a=1, N=3, Q_v=0.33, best=1.00, ubc=1.00)
│ │ ├── (a=2, N=3, Q_v=0.33, best=1.00, ubc=1.00)
│ │ └── (a=3, N=3, Q_v=0.33, best=1.00, ubc=1.00)
│ └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.65)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=29, Q_v=0.17, best=1.00, ubc=0.45)
├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.58)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=14, Q_v=0.29, best=1.00, ubc=0.63)
│ ├── (a=0, N=4, Q_v=0.25, best=1.00, ubc=0.82)
│ ├── (a=1, N=3, Q_v=0.33, best=1.00, ubc=1.00)
│ ├── (a=2, N=3, Q_v=0.33, best=1.00, ubc=1.00)
│ └── (a=3, N=3, Q_v=0.33, best=1.00, ubc=1.00)
├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.58)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.65)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
INFO selected action 2 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2]
(a=2, N=79, Q_v=0.06, best=1.00, ubc=0.24)
├── (a=0, N=15, Q_v=0.00, best=0.00, ubc=0.38)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.58)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.58)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=15, Q_v=0.00, best=0.00, ubc=0.38)
├── (a=2, N=33, Q_v=0.12, best=1.00, ubc=0.38)
│ ├── (a=0, N=8, Q_v=0.13, best=1.00, ubc=0.59)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
│ ├── (a=1, N=8, Q_v=0.13, best=1.00, ubc=0.59)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
│ ├── (a=2, N=8, Q_v=0.13, best=1.00, ubc=0.59)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
│ └── (a=3, N=8, Q_v=0.13, best=1.00, ubc=0.59)
│ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
└── (a=3, N=15, Q_v=0.00, best=0.00, ubc=0.38)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.58)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.58)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
INFO selected action 2 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2, 2]
(a=2, N=83, Q_v=0.05, best=1.00, ubc=0.22)
├── (a=0, N=21, Q_v=0.05, best=1.00, ubc=0.37)
│ ├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.55)
│ ├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=5, Q_v=0.00, best=0.00, ubc=0.55)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=21, Q_v=0.05, best=1.00, ubc=0.37)
│ ├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.55)
│ ├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.55)
│ └── (a=3, N=5, Q_v=0.00, best=0.00, ubc=0.55)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=20, Q_v=0.05, best=1.00, ubc=0.38)
│ ├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.55)
│ ├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.61)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=20, Q_v=0.05, best=1.00, ubc=0.38)
├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.55)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.55)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.55)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.61)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
INFO selected action 2 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2, 2, 2]
(a=2, N=70, Q_v=0.01, best=1.00, ubc=0.20)
├── (a=0, N=18, Q_v=0.00, best=0.00, ubc=0.34)
│ ├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=17, Q_v=0.00, best=0.00, ubc=0.35)
├── (a=2, N=17, Q_v=0.00, best=0.00, ubc=0.35)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=17, Q_v=0.00, best=0.00, ubc=0.35)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
INFO selected action 0 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2, 2, 2, 0]
(a=0, N=68, Q_v=0.10, best=1.00, ubc=0.29)
├── (a=0, N=11, Q_v=0.00, best=0.00, ubc=0.44)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.63)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.63)
│ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.77)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.59)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.77)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=1, N=11, Q_v=0.00, best=0.00, ubc=0.44)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.63)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.63)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.77)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.77)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=2, N=11, Q_v=0.00, best=0.00, ubc=0.44)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.63)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.63)
│ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.77)
│ │ └── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.59)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.77)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
└── (a=3, N=34, Q_v=0.21, best=1.00, ubc=0.45)
├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.59)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=20, Q_v=0.35, best=1.00, ubc=0.65)
│ ├── (a=0, N=5, Q_v=0.40, best=1.00, ubc=0.95)
│ ├── (a=1, N=5, Q_v=0.40, best=1.00, ubc=0.95)
│ ├── (a=2, N=5, Q_v=0.40, best=1.00, ubc=0.95)
│ └── (a=3, N=4, Q_v=0.25, best=1.00, ubc=0.86)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.66)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.66)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
INFO selected action 3 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2, 2, 2, 0, 3]
(a=3, N=84, Q_v=0.08, best=1.00, ubc=0.25)
├── (a=0, N=14, Q_v=0.00, best=0.00, ubc=0.40)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.57)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=41, Q_v=0.17, best=1.00, ubc=0.40)
│ ├── (a=0, N=11, Q_v=0.18, best=1.00, ubc=0.59)
│ ├── (a=1, N=11, Q_v=0.18, best=1.00, ubc=0.59)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.77)
│ │ ├── (a=1, N=4, Q_v=0.25, best=1.00, ubc=0.80)
│ │ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.77)
│ │ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.77)
│ ├── (a=2, N=10, Q_v=0.20, best=1.00, ubc=0.63)
│ └── (a=3, N=8, Q_v=0.13, best=1.00, ubc=0.61)
│ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
├── (a=2, N=14, Q_v=0.00, best=0.00, ubc=0.40)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.57)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=14, Q_v=0.00, best=0.00, ubc=0.40)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.57)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.66)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.66)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
INFO selected action 1 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2, 2, 2, 0, 3, 1]
(a=1, N=91, Q_v=0.08, best=1.00, ubc=0.24)
├── (a=0, N=24, Q_v=0.08, best=1.00, ubc=0.39)
├── (a=1, N=24, Q_v=0.08, best=1.00, ubc=0.39)
│ ├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.56)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=8, Q_v=0.13, best=1.00, ubc=0.57)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
│ ├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.56)
│ └── (a=3, N=5, Q_v=0.00, best=0.00, ubc=0.56)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=23, Q_v=0.09, best=1.00, ubc=0.40)
└── (a=3, N=19, Q_v=0.05, best=1.00, ubc=0.40)
├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.54)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.54)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.61)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.61)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
INFO selected action 2 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2, 2, 2, 0, 3, 1, 2]
INFO Final action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2, 2, 2, 0, 3, 1, 2]
(Right)
SFFF
FHFH
FFFH
HFFG
(Left)
SFFF
FHFH
FFFH
HFFG
(Left)
SFFF
FHFH
FFFH
HFFG
(Left)
SFFF
FHFH
FFFH
HFFG
(Left)
SFFF
FHFH
FFFH
HFFG
(Up)
SFFF
FHFH
FFFH
HFFG
(Right)
SFFF
FHFH
FFFH
HFFG
(Right)
SFFF
FHFH
FFFH
HFFG
(Left)
SFFF
FHFH
FFFH
HFFG
(Up)
SFFF
FHFH
FFFH
HFFG
(Left)
SFFF
FHFH
FFFH
HFFG
(Right)
SFFF
FHFH
FFFH
HFFG
(Right)
SFFF
FHFH
FFFH
HFFG
(Right)
SFFF
FHFH
FFFH
HFFG
(Left)
SFFF
FHFH
FFFH
HFFG
(Up)
SFFF
FHFH
FFFH
HFFG
(Down)
SFFF
FHFH
FFFH
HFFG
(Right)
SFFF
FHFH
FFFH
HFFG
Environment solved in 36 steps.