FrozenLake 4x4 (Action History)

[61]:
import gymnasium as gym
[62]:
from gymcts.gymcts_agent import GymctsAgent
from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
[63]:
from gymcts.logger import log
[64]:
# set log level to 20 (INFO)
# set log level to 10 (DEBUG) to see more detailed information
log.setLevel(20)
[65]:
if __name__ == '__main__':
    # 0. create the environment
    env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="ansi")
    env.reset()

    # 1. wrap the environment with the wrapper
    env = ActionHistoryMCTSGymEnvWrapper(env)

    # 2. create the agent
    agent = GymctsAgent(
        env=env,
        clear_mcts_tree_after_step=False,
        render_tree_after_step=True,
        number_of_simulations_per_step=50,
        exclude_unvisited_nodes_from_render=True
    )

    # 3. solve the environment
    actions = agent.solve()

    # 4. render the environment solution in the terminal
    print(env.render())
    for a in actions:
        obs, rew, term, trun, info = env.step(a)
        print(env.render())

    # 5. print the solution
    # read the solution from the info provided by the RecordEpisodeStatistics wrapper
    # (that DeterministicSoloMCTSGymEnvWrapper uses internally)
    episode_length = info["episode"]["l"]
    episode_return = info["episode"]["r"]

    if episode_return == 1.0:
        print(f"Environment solved in {episode_length} steps.")
    else:
        print(f"Environment not solved in {episode_length} steps.")
(N=50, Q_v=0.00, best=0.00)
├── (a=0, N=13, Q_v=0.00, best=0.00, ubc=0.39)
│   ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│   ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│   └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│   ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│   ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│   └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│   ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│   ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│   └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│   └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│       ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│       └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=12, Q_v=0.00, best=0.00, ubc=0.40)
│   ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│   └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│   ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│   └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│   ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
│       └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=2, N=12, Q_v=0.00, best=0.00, ubc=0.40)
│   ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│   └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│   ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│   └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│   └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
│       └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
└── (a=3, N=12, Q_v=0.00, best=0.00, ubc=0.40)
    ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
        └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
[16:45:51] INFO     selected action 0 after 50 simulations.
           INFO     current action list: [0]
(a=0, N=63, Q_v=0.00, best=0.00, ubc=0.19)
├── (a=0, N=16, Q_v=0.00, best=0.00, ubc=0.36)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=16, Q_v=0.00, best=0.00, ubc=0.36)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=15, Q_v=0.00, best=0.00, ubc=0.37)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.58)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.58)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=15, Q_v=0.00, best=0.00, ubc=0.37)
    ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.58)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.58)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
        └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
           INFO     selected action 0 after 50 simulations.
           INFO     current action list: [0, 0]
(a=0, N=66, Q_v=0.00, best=0.00, ubc=0.19)
├── (a=0, N=17, Q_v=0.00, best=0.00, ubc=0.35)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=16, Q_v=0.00, best=0.00, ubc=0.36)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=16, Q_v=0.00, best=0.00, ubc=0.36)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=16, Q_v=0.00, best=0.00, ubc=0.36)
    ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
        └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
           INFO     selected action 0 after 50 simulations.
           INFO     current action list: [0, 0, 0]
(a=0, N=67, Q_v=0.00, best=0.00, ubc=0.19)
├── (a=0, N=17, Q_v=0.00, best=0.00, ubc=0.35)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=17, Q_v=0.00, best=0.00, ubc=0.35)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=16, Q_v=0.00, best=0.00, ubc=0.36)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=16, Q_v=0.00, best=0.00, ubc=0.36)
    ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
        └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
           INFO     selected action 0 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0]
(a=0, N=67, Q_v=0.07, best=1.00, ubc=0.26)
├── (a=0, N=12, Q_v=0.00, best=0.00, ubc=0.42)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=1, N=12, Q_v=0.00, best=0.00, ubc=0.42)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
    └── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=2, N=12, Q_v=0.00, best=0.00, ubc=0.42)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
└── (a=3, N=30, Q_v=0.17, best=1.00, ubc=0.43)
    ├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.58)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.65)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=2, N=16, Q_v=0.31, best=1.00, ubc=0.64)
    ├── (a=0, N=4, Q_v=0.25, best=1.00, ubc=0.84)
    ├── (a=1, N=3, Q_v=0.33, best=1.00, ubc=1.01)
    ├── (a=2, N=5, Q_v=0.40, best=1.00, ubc=0.93)
    └── (a=3, N=3, Q_v=0.33, best=1.00, ubc=1.01)
    └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.65)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
           INFO     selected action 3 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 3]
(a=3, N=80, Q_v=0.11, best=1.00, ubc=0.28)
├── (a=0, N=12, Q_v=0.00, best=0.00, ubc=0.43)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=1, N=11, Q_v=0.00, best=0.00, ubc=0.45)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.63)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.63)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.77)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.77)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=2, N=30, Q_v=0.17, best=1.00, ubc=0.44)
├── (a=0, N=7, Q_v=0.14, best=1.00, ubc=0.64)
├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.70)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.70)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.99)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.99)
├── (a=1, N=7, Q_v=0.14, best=1.00, ubc=0.64)
├── (a=2, N=9, Q_v=0.22, best=1.00, ubc=0.66)
├── (a=0, N=3, Q_v=0.33, best=1.00, ubc=0.94)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.05)
└── (a=3, N=6, Q_v=0.17, best=1.00, ubc=0.70)
    ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.67)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.95)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.95)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.95)
└── (a=3, N=26, Q_v=0.15, best=1.00, ubc=0.44)
    ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.64)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=13, Q_v=0.31, best=1.00, ubc=0.66)
    ├── (a=0, N=3, Q_v=0.33, best=1.00, ubc=0.99)
    ├── (a=1, N=3, Q_v=0.33, best=1.00, ubc=0.99)
    ├── (a=2, N=3, Q_v=0.33, best=1.00, ubc=0.99)
    └── (a=3, N=3, Q_v=0.33, best=1.00, ubc=0.99)
    ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.64)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.64)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
           INFO     selected action 2 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 3, 2]
(a=2, N=80, Q_v=0.11, best=1.00, ubc=0.29)
├── (a=0, N=16, Q_v=0.06, best=1.00, ubc=0.43)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=15, Q_v=0.07, best=1.00, ubc=0.45)
├── (a=2, N=33, Q_v=0.18, best=1.00, ubc=0.44)
├── (a=0, N=17, Q_v=0.29, best=1.00, ubc=0.61)
├── (a=0, N=10, Q_v=0.40, best=1.00, ubc=0.78)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.84)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.84)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.84)
├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=5, Q_v=0.00, best=0.00, ubc=0.59)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=15, Q_v=0.07, best=1.00, ubc=0.45)
    ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.58)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.58)
    ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
        └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
           INFO     selected action 2 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 3, 2, 2]
(a=2, N=83, Q_v=0.12, best=1.00, ubc=0.29)
├── (a=0, N=40, Q_v=0.23, best=1.00, ubc=0.46)
├── (a=0, N=16, Q_v=0.25, best=1.00, ubc=0.59)
├── (a=0, N=4, Q_v=0.25, best=1.00, ubc=0.84)
├── (a=1, N=4, Q_v=0.25, best=1.00, ubc=0.84)
├── (a=2, N=4, Q_v=0.25, best=1.00, ubc=0.84)
└── (a=3, N=3, Q_v=0.33, best=1.00, ubc=1.01)
├── (a=1, N=6, Q_v=0.00, best=0.00, ubc=0.55)
├── (a=2, N=6, Q_v=0.00, best=0.00, ubc=0.55)
├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.67)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.95)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.95)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.95)
└── (a=3, N=11, Q_v=0.36, best=1.00, ubc=0.77)
    ├── (a=0, N=7, Q_v=0.57, best=1.00, ubc=0.99)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=1.09)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=1.09)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.09)
├── (a=1, N=14, Q_v=0.00, best=0.00, ubc=0.40)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.57)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.66)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.66)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.66)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=14, Q_v=0.00, best=0.00, ubc=0.40)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.57)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.66)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.66)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.66)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=14, Q_v=0.00, best=0.00, ubc=0.40)
    ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.57)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.66)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.66)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.66)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
        └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
           INFO     selected action 0 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 3, 2, 2, 0]
(a=0, N=90, Q_v=0.10, best=1.00, ubc=0.26)
├── (a=0, N=31, Q_v=0.13, best=1.00, ubc=0.40)
├── (a=0, N=8, Q_v=0.13, best=1.00, ubc=0.59)
├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
├── (a=1, N=8, Q_v=0.13, best=1.00, ubc=0.59)
├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
├── (a=2, N=7, Q_v=0.14, best=1.00, ubc=0.64)
├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.70)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.70)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.99)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.99)
└── (a=3, N=7, Q_v=0.14, best=1.00, ubc=0.64)
    ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.70)
    ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.70)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.99)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.99)
├── (a=1, N=14, Q_v=0.00, best=0.00, ubc=0.40)
├── (a=2, N=14, Q_v=0.00, best=0.00, ubc=0.40)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.57)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.66)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.66)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.66)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=30, Q_v=0.13, best=1.00, ubc=0.41)
    ├── (a=0, N=14, Q_v=0.29, best=1.00, ubc=0.63)
    ├── (a=0, N=4, Q_v=0.25, best=1.00, ubc=0.82)
    ├── (a=1, N=3, Q_v=0.33, best=1.00, ubc=1.00)
    ├── (a=2, N=3, Q_v=0.33, best=1.00, ubc=1.00)
    └── (a=3, N=3, Q_v=0.33, best=1.00, ubc=1.00)
    ├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.58)
    ├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.58)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=5, Q_v=0.00, best=0.00, ubc=0.58)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
        ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
        ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
        └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
           INFO     selected action 3 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3]
(a=3, N=80, Q_v=0.10, best=1.00, ubc=0.28)
├── (a=0, N=40, Q_v=0.20, best=1.00, ubc=0.43)
├── (a=0, N=8, Q_v=0.13, best=1.00, ubc=0.61)
├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
├── (a=1, N=8, Q_v=0.13, best=1.00, ubc=0.61)
├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
├── (a=2, N=7, Q_v=0.14, best=1.00, ubc=0.66)
├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.70)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.70)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.99)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.99)
└── (a=3, N=16, Q_v=0.31, best=1.00, ubc=0.65)
    ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=9, Q_v=0.44, best=1.00, ubc=0.84)
    ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=13, Q_v=0.00, best=0.00, ubc=0.41)
├── (a=2, N=13, Q_v=0.00, best=0.00, ubc=0.41)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.65)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.65)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.65)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.65)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=13, Q_v=0.00, best=0.00, ubc=0.41)
    ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.65)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.65)
    ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.65)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.65)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
        └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
           INFO     selected action 0 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0]
(a=0, N=90, Q_v=0.13, best=1.00, ubc=0.30)
├── (a=0, N=16, Q_v=0.06, best=1.00, ubc=0.44)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=15, Q_v=0.07, best=1.00, ubc=0.45)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.58)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.58)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=29, Q_v=0.17, best=1.00, ubc=0.45)
├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.58)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.58)
├── (a=2, N=14, Q_v=0.29, best=1.00, ubc=0.63)
├── (a=0, N=4, Q_v=0.25, best=1.00, ubc=0.82)
├── (a=1, N=3, Q_v=0.33, best=1.00, ubc=1.00)
├── (a=2, N=3, Q_v=0.33, best=1.00, ubc=1.00)
└── (a=3, N=3, Q_v=0.33, best=1.00, ubc=1.00)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.65)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=29, Q_v=0.17, best=1.00, ubc=0.45)
    ├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.58)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=14, Q_v=0.29, best=1.00, ubc=0.63)
    ├── (a=0, N=4, Q_v=0.25, best=1.00, ubc=0.82)
    ├── (a=1, N=3, Q_v=0.33, best=1.00, ubc=1.00)
    ├── (a=2, N=3, Q_v=0.33, best=1.00, ubc=1.00)
    └── (a=3, N=3, Q_v=0.33, best=1.00, ubc=1.00)
    ├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.58)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.65)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
           INFO     selected action 2 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2]
(a=2, N=79, Q_v=0.06, best=1.00, ubc=0.24)
├── (a=0, N=15, Q_v=0.00, best=0.00, ubc=0.38)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.58)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.58)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=15, Q_v=0.00, best=0.00, ubc=0.38)
├── (a=2, N=33, Q_v=0.12, best=1.00, ubc=0.38)
├── (a=0, N=8, Q_v=0.13, best=1.00, ubc=0.59)
├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
├── (a=1, N=8, Q_v=0.13, best=1.00, ubc=0.59)
├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
├── (a=2, N=8, Q_v=0.13, best=1.00, ubc=0.59)
├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
└── (a=3, N=8, Q_v=0.13, best=1.00, ubc=0.59)
    ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
    ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
    ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
└── (a=3, N=15, Q_v=0.00, best=0.00, ubc=0.38)
    ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.58)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.58)
    ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
        └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
           INFO     selected action 2 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2, 2]
(a=2, N=83, Q_v=0.05, best=1.00, ubc=0.22)
├── (a=0, N=21, Q_v=0.05, best=1.00, ubc=0.37)
├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.55)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.55)
├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.55)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=5, Q_v=0.00, best=0.00, ubc=0.55)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=21, Q_v=0.05, best=1.00, ubc=0.37)
├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.55)
├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.55)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.55)
└── (a=3, N=5, Q_v=0.00, best=0.00, ubc=0.55)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=20, Q_v=0.05, best=1.00, ubc=0.38)
├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.55)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.55)
├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.55)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.61)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=20, Q_v=0.05, best=1.00, ubc=0.38)
    ├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.55)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.55)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.55)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.61)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
           INFO     selected action 2 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2, 2, 2]
(a=2, N=70, Q_v=0.01, best=1.00, ubc=0.20)
├── (a=0, N=18, Q_v=0.00, best=0.00, ubc=0.34)
├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.54)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=17, Q_v=0.00, best=0.00, ubc=0.35)
├── (a=2, N=17, Q_v=0.00, best=0.00, ubc=0.35)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=17, Q_v=0.00, best=0.00, ubc=0.35)
    ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
           INFO     selected action 0 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2, 2, 2, 0]
(a=0, N=68, Q_v=0.10, best=1.00, ubc=0.29)
├── (a=0, N=11, Q_v=0.00, best=0.00, ubc=0.44)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.63)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.63)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.77)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.59)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.77)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=1, N=11, Q_v=0.00, best=0.00, ubc=0.44)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.63)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.63)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.77)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.77)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=2, N=11, Q_v=0.00, best=0.00, ubc=0.44)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.63)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.63)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.77)
└── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.59)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.77)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
└── (a=3, N=34, Q_v=0.21, best=1.00, ubc=0.45)
    ├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.59)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=20, Q_v=0.35, best=1.00, ubc=0.65)
    ├── (a=0, N=5, Q_v=0.40, best=1.00, ubc=0.95)
    ├── (a=1, N=5, Q_v=0.40, best=1.00, ubc=0.95)
    ├── (a=2, N=5, Q_v=0.40, best=1.00, ubc=0.95)
    └── (a=3, N=4, Q_v=0.25, best=1.00, ubc=0.86)
    ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.66)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.66)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
           INFO     selected action 3 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2, 2, 2, 0, 3]
(a=3, N=84, Q_v=0.08, best=1.00, ubc=0.25)
├── (a=0, N=14, Q_v=0.00, best=0.00, ubc=0.40)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.57)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.66)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.66)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.66)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=41, Q_v=0.17, best=1.00, ubc=0.40)
├── (a=0, N=11, Q_v=0.18, best=1.00, ubc=0.59)
├── (a=1, N=11, Q_v=0.18, best=1.00, ubc=0.59)
├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.77)
├── (a=1, N=4, Q_v=0.25, best=1.00, ubc=0.80)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.77)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.77)
├── (a=2, N=10, Q_v=0.20, best=1.00, ubc=0.63)
└── (a=3, N=8, Q_v=0.13, best=1.00, ubc=0.61)
    ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
    ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
    ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
├── (a=2, N=14, Q_v=0.00, best=0.00, ubc=0.40)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.57)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.66)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.66)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.66)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=14, Q_v=0.00, best=0.00, ubc=0.40)
    ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.57)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.66)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.66)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
    └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.66)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
        └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
           INFO     selected action 1 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2, 2, 2, 0, 3, 1]
(a=1, N=91, Q_v=0.08, best=1.00, ubc=0.24)
├── (a=0, N=24, Q_v=0.08, best=1.00, ubc=0.39)
├── (a=1, N=24, Q_v=0.08, best=1.00, ubc=0.39)
├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.56)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=1, N=8, Q_v=0.13, best=1.00, ubc=0.57)
├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.72)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.72)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.02)
├── (a=2, N=5, Q_v=0.00, best=0.00, ubc=0.56)
└── (a=3, N=5, Q_v=0.00, best=0.00, ubc=0.56)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=23, Q_v=0.09, best=1.00, ubc=0.40)
└── (a=3, N=19, Q_v=0.05, best=1.00, ubc=0.40)
    ├── (a=0, N=5, Q_v=0.00, best=0.00, ubc=0.54)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.54)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
    ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.61)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.61)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
           INFO     selected action 2 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2, 2, 2, 0, 3, 1, 2]
           INFO     Final action list: [0, 0, 0, 0, 3, 2, 2, 0, 3, 0, 2, 2, 2, 0, 3, 1, 2]
  (Right)
SFFF
FHFH
FFFH
HFFG

  (Left)
SFFF
FHFH
FFFH
HFFG

  (Left)
SFFF
FHFH
FFFH
HFFG

  (Left)
SFFF
FHFH
FFFH
HFFG

  (Left)
SFFF
FHFH
FFFH
HFFG

  (Up)
SFFF
FHFH
FFFH
HFFG

  (Right)
SFFF
FHFH
FFFH
HFFG

  (Right)
SFFF
FHFH
FFFH
HFFG

  (Left)
SFFF
FHFH
FFFH
HFFG

  (Up)
SFFF
FHFH
FFFH
HFFG

  (Left)
SFFF
FHFH
FFFH
HFFG

  (Right)
SFFF
FHFH
FFFH
HFFG

  (Right)
SFFF
FHFH
FFFH
HFFG

  (Right)
SFFF
FHFH
FFFH
HFFG

  (Left)
SFFF
FHFH
FFFH
HFFG

  (Up)
SFFF
FHFH
FFFH
HFFG

  (Down)
SFFF
FHFH
FFFH
HFFG

  (Right)
SFFF
FHFH
FFFH
HFFG

Environment solved in 36 steps.