graph-jsp-env (Deepcopy Wrapper)

[1]:
from graph_jsp_env.disjunctive_graph_jsp_env import DisjunctiveGraphJspEnv
from jsp_instance_utils.instances import ft06, ft06_makespan


     0100100100100000001111
     0000110011001000000100     ▐███▌         ▟███▛▟███████▛▐███▌
     1111011011000111100101     ▐███▌        ▟███▛▟███████▛ ▐███▌
     10▐███▐██▙▟██00▟█▙0100     ▐███▌ ▟███  ▟███▛    ▟███▛  ▐███▌
     101▐█1▐█1▜▛▐█0▟█▀█▙001     ▐███▌▟████ ▟███▛    ▟███▛   ▐███▌
     00▐███▐█000▐█▟█▛▀▀█▙01     ▐██████▛▐█████▛    ▟████████▐█████████▛
     0010010010000000111100     ▐█████▛ ▐████▛    ▟█████████▐████████▛


     ▐█▀▜▙█▙   ███████▐█ ▐█    ▟█▙   ▟█▙  ▟███▐█ ▐█▐█▀▀▐█▙ █
     ▐█▄▟▛▜█▙▟▙██ ▐█  ▐████   ▟▛ ▜▙ ▟▛ ▜▙ █▍  ▐████▐█▀▀▐██▙█
     ▐█ ▜▙ ▜█▛▜██ ▐█  ▐█ ▐█  ▟█▛▀▜█▙█▛▀▜█▙▜███▐█ ▐█▐█▆▆▐█ ▜█
           ▐█  ▐█▐█▙ █▐███▜█▙ ▟███▀▀▐█▀▜▙▟█▀▜█▐███▐█████▙ ▟▛
           ▐█  ▐█▐██▙█ ▐█  ▜█▄█▛▐█▀▀▐█▄▟▛▜█▆▆▄ ▐█  ▐█  ▜█▄▛
            ▜███▛▐█ ▜█▐███  ▜█▛ ▐███▐█ ▜▙▐█▆▆▛▐███ ▐█   ██

    
    Disjunctive Graph Job Shop Problem Environment
    

    Version:    0.3.3
[2]:
from gymcts.gymcts_agent import GymctsAgent
from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
from gymnasium.wrappers import TransformReward
from gymcts.logger import log
[3]:
import gymnasium as gym
import numpy as np
[4]:
if __name__ == '__main__':
    log.setLevel(20)


    env_kwargs = {
        "jps_instance": ft06,
        "default_visualisations": ["gantt_console", "graph_console"],
        "reward_function_parameters": {
            "scaling_divisor": ft06_makespan
        }
    }

    env = DisjunctiveGraphJspEnv(**env_kwargs)
    # map reward to [1, -inf]
    # ideally you want the reward to be in the range of [-1, 1] for the UBC score
    env = TransformReward(env, lambda r: r / ft06_makespan + 2 if r != 0 else 0.0)
    env.reset()



    def mask_fn(env: gym.Env) -> np.ndarray:
        # Do whatever you'd like in this function to return the action mask
        # for the current env. In this example, we assume the env has a
        # helpful method we can rely on.
        return env.unwrapped.valid_action_mask()

    env = DeepCopyMCTSGymEnvWrapper(
        env,
        action_mask_fn=mask_fn
    )


    agent = GymctsAgent(
        env=env,
        render_tree_after_step=True,
        exclude_unvisited_nodes_from_render=True,
        number_of_simulations_per_step=50,
    )

    root = agent.search_root_node.get_root()

    actions = agent.solve(render_tree_after_step=True)
    for a in actions:
        obs, rew, term, trun, info = env.step(a)

    env.render()
    makespan = env.unwrapped.get_makespan()
    print(f"makespan: {makespan}")
(N=50, Q_v=1.98, best=1.98)
├── (a=0, N=8, Q_v=1.98, best=1.98, ubc=2.47)
│   ├── (a=1, N=1, Q_v=1.97, best=1.97, ubc=2.99)
│   ├── (a=6, N=1, Q_v=1.97, best=1.97, ubc=2.99)
│   ├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
│   ├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
│   ├── (a=24, N=1, Q_v=1.97, best=1.97, ubc=2.99)
│   └── (a=30, N=2, Q_v=1.98, best=1.98, ubc=2.70)
│       └── (a=6, N=1, Q_v=1.98, best=1.98, ubc=2.56)
├── (a=6, N=8, Q_v=1.98, best=1.98, ubc=2.47)
│   ├── (a=0, N=1, Q_v=1.97, best=1.97, ubc=2.99)
│   ├── (a=7, N=1, Q_v=1.98, best=1.98, ubc=3.00)
│   ├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
│   ├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.99)
│   ├── (a=24, N=1, Q_v=1.98, best=1.98, ubc=3.00)
│   └── (a=30, N=2, Q_v=1.98, best=1.98, ubc=2.70)
│       └── (a=7, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=12, N=8, Q_v=1.98, best=1.98, ubc=2.47)
│   ├── (a=0, N=1, Q_v=1.98, best=1.98, ubc=3.00)
│   ├── (a=6, N=2, Q_v=1.98, best=1.98, ubc=2.70)
│   └── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.56)
│   ├── (a=13, N=1, Q_v=1.97, best=1.97, ubc=2.99)
│   ├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
│   ├── (a=24, N=1, Q_v=1.98, best=1.98, ubc=3.00)
│   └── (a=30, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=18, N=8, Q_v=1.97, best=1.98, ubc=2.47)
│   ├── (a=0, N=1, Q_v=1.98, best=1.98, ubc=3.00)
│   ├── (a=6, N=1, Q_v=1.97, best=1.97, ubc=2.99)
│   ├── (a=12, N=2, Q_v=1.98, best=1.98, ubc=2.70)
│   └── (a=0, N=1, Q_v=1.97, best=1.97, ubc=2.56)
│   ├── (a=19, N=1, Q_v=1.97, best=1.97, ubc=2.99)
│   ├── (a=24, N=1, Q_v=1.98, best=1.98, ubc=3.00)
│   └── (a=30, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=24, N=8, Q_v=1.98, best=1.98, ubc=2.47)
│   ├── (a=0, N=1, Q_v=1.97, best=1.97, ubc=2.99)
│   ├── (a=6, N=1, Q_v=1.98, best=1.98, ubc=3.00)
│   ├── (a=12, N=1, Q_v=1.97, best=1.97, ubc=2.99)
│   ├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.99)
│   ├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
│   └── (a=30, N=2, Q_v=1.98, best=1.98, ubc=2.70)
│       └── (a=6, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=30, N=9, Q_v=1.98, best=1.98, ubc=2.44)
    ├── (a=0, N=1, Q_v=1.98, best=1.98, ubc=3.02)
    ├── (a=6, N=1, Q_v=1.98, best=1.98, ubc=3.03)
    ├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.03)
    ├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=19, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=24, N=1, Q_v=1.97, best=1.97, ubc=3.02)
    └── (a=31, N=2, Q_v=1.98, best=1.98, ubc=2.72)
        └── (a=6, N=1, Q_v=1.98, best=1.98, ubc=2.56)
[16:50:50] INFO     selected action 30 after 50 simulations.
           INFO     current action list: [30]
(N=50, Q_v=1.98, best=1.98)
├── (a=0, N=8, Q_v=1.97, best=1.97, ubc=2.47)
├── (a=1, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=6, N=2, Q_v=1.97, best=1.97, ubc=2.70)
└── (a=1, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=12, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=24, N=1, Q_v=1.97, best=1.97, ubc=2.99)
└── (a=31, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=6, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=0, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=7, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=24, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=31, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=7, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=12, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=0, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=6, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=31, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=13, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=24, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=31, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=18, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=0, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=6, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=19, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=19, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=24, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=31, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=24, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=0, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=6, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=12, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=31, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=25, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=6, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=31, N=1, Q_v=1.97, best=1.97, ubc=3.02)
└── (a=31, N=8, Q_v=1.98, best=1.98, ubc=2.47)
    ├── (a=0, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=6, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=32, N=1, Q_v=1.97, best=1.97, ubc=2.56)
    ├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=24, N=1, Q_v=1.97, best=1.97, ubc=2.99)
    └── (a=32, N=1, Q_v=1.97, best=1.97, ubc=2.99)
[16:50:51] INFO     selected action 24 after 50 simulations.
           INFO     current action list: [30, 24]
(N=50, Q_v=1.98, best=1.98)
├── (a=0, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=6, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=12, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=31, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=31, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=6, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=0, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=12, N=1, Q_v=1.98, best=1.98, ubc=2.56)
├── (a=7, N=1, Q_v=1.98, best=1.98, ubc=3.02)
├── (a=12, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=7, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=3.02)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.02)
└── (a=31, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=12, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=0, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=6, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=25, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=0, N=1, Q_v=1.98, best=1.98, ubc=2.56)
└── (a=31, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=0, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=6, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=25, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=31, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=25, N=8, Q_v=1.97, best=1.98, ubc=2.47)
├── (a=0, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=6, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=31, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=31, N=1, Q_v=1.97, best=1.97, ubc=2.99)
└── (a=31, N=8, Q_v=1.98, best=1.98, ubc=2.47)
    ├── (a=0, N=1, Q_v=1.98, best=1.98, ubc=2.99)
    ├── (a=6, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=12, N=1, Q_v=1.97, best=1.97, ubc=2.99)
    ├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=25, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=0, N=1, Q_v=1.98, best=1.98, ubc=2.56)
    └── (a=32, N=1, Q_v=1.98, best=1.98, ubc=3.00)
           INFO     selected action 6 after 50 simulations.
           INFO     current action list: [30, 24, 6]
(N=50, Q_v=1.98, best=1.98)
├── (a=0, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=7, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=25, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=12, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=31, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=7, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=0, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=12, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=8, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=25, N=1, Q_v=1.97, best=1.97, ubc=2.99)
└── (a=31, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=0, N=1, Q_v=1.98, best=1.98, ubc=3.02)
├── (a=7, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=13, N=1, Q_v=1.97, best=1.97, ubc=3.02)
├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=0, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=25, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=31, N=1, Q_v=1.98, best=1.98, ubc=3.02)
├── (a=18, N=8, Q_v=1.97, best=1.98, ubc=2.47)
├── (a=0, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=7, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=12, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=25, N=1, Q_v=1.97, best=1.97, ubc=2.99)
└── (a=31, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=7, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=25, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=0, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=7, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.56)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=31, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=31, N=8, Q_v=1.98, best=1.98, ubc=2.47)
    ├── (a=0, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=7, N=1, Q_v=1.98, best=1.98, ubc=2.56)
    ├── (a=7, N=1, Q_v=1.97, best=1.97, ubc=2.99)
    ├── (a=12, N=1, Q_v=1.97, best=1.97, ubc=2.99)
    ├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.99)
    ├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    └── (a=32, N=1, Q_v=1.97, best=1.97, ubc=2.99)
[16:50:52] INFO     selected action 0 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0]
(N=50, Q_v=1.98, best=1.98)
├── (a=1, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=2, Q_v=1.97, best=1.98, ubc=2.69)
└── (a=25, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=7, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=31, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=7, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=8, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=12, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=1, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=31, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=31, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=7, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=31, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=18, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=7, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=19, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=19, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=31, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=25, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=7, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=31, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=31, N=1, Q_v=1.97, best=1.97, ubc=2.99)
└── (a=31, N=9, Q_v=1.98, best=1.98, ubc=2.44)
    ├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.02)
    ├── (a=7, N=1, Q_v=1.98, best=1.98, ubc=3.02)
    ├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.02)
    ├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=1, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=25, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    └── (a=32, N=1, Q_v=1.97, best=1.97, ubc=3.02)
[16:50:53] INFO     selected action 31 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31]
(N=50, Q_v=1.98, best=1.98)
├── (a=1, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=7, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=2, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=25, N=1, Q_v=1.97, best=1.97, ubc=2.99)
└── (a=32, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=7, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.02)
├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=12, N=2, Q_v=1.97, best=1.98, ubc=2.71)
└── (a=13, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.03)
└── (a=32, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=25, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=12, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=7, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=13, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=32, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=7, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=19, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=32, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=25, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=7, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=32, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.56)
└── (a=32, N=8, Q_v=1.98, best=1.98, ubc=2.47)
    ├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=7, N=1, Q_v=1.98, best=1.98, ubc=2.99)
    ├── (a=12, N=1, Q_v=1.97, best=1.97, ubc=2.99)
    ├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    └── (a=33, N=2, Q_v=1.98, best=1.98, ubc=2.70)
        └── (a=7, N=1, Q_v=1.97, best=1.97, ubc=2.56)
           INFO     selected action 7 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7]
(N=50, Q_v=1.98, best=1.98)
├── (a=1, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=25, N=1, Q_v=1.98, best=1.98, ubc=2.56)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=32, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=8, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=9, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=1, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=32, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=25, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.56)
└── (a=32, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=12, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=2.99)
├── (a=19, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=25, N=1, Q_v=1.97, best=1.97, ubc=2.99)
└── (a=32, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=25, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=8, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.56)
├── (a=12, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=32, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=32, N=9, Q_v=1.98, best=1.98, ubc=2.44)
    ├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.03)
    ├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.03)
    ├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.03)
    ├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.03)
    ├── (a=25, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.56)
    └── (a=33, N=2, Q_v=1.98, best=1.98, ubc=2.72)
        └── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.57)
[16:50:54] INFO     selected action 32 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32]
(N=50, Q_v=1.98, best=1.98)
├── (a=1, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=12, N=1, Q_v=1.98, best=1.98, ubc=2.56)
├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=8, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=9, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=12, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=12, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=25, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=18, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=8, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=25, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=12, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=25, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=1, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.56)
├── (a=8, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=9, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=26, N=1, Q_v=1.97, best=1.97, ubc=3.02)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=3.02)
└── (a=33, N=8, Q_v=1.98, best=1.98, ubc=2.47)
    ├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=8, N=1, Q_v=1.97, best=1.97, ubc=2.99)
    ├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=34, N=1, Q_v=1.97, best=1.97, ubc=2.56)
    ├── (a=25, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    └── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
           INFO     selected action 25 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25]
(N=50, Q_v=1.98, best=1.98)
├── (a=1, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=8, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=12, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=8, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=9, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=12, N=1, Q_v=1.98, best=1.98, ubc=2.56)
├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=26, N=1, Q_v=1.97, best=1.97, ubc=2.99)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=8, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.02)
└── (a=33, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=1, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=18, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=1, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=33, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=26, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=33, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=12, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=27, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=8, Q_v=1.98, best=1.98, ubc=2.47)
    ├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=8, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=34, N=1, Q_v=1.97, best=1.97, ubc=2.56)
    ├── (a=12, N=1, Q_v=1.97, best=1.97, ubc=2.99)
    ├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.99)
    ├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    └── (a=34, N=1, Q_v=1.97, best=1.97, ubc=2.99)
[16:50:55] INFO     selected action 12 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12]
(N=50, Q_v=1.98, best=1.98)
├── (a=1, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.02)
├── (a=13, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=2, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=8, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=33, N=1, Q_v=1.97, best=1.97, ubc=3.02)
├── (a=8, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=9, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=9, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.97, best=1.97, ubc=2.99)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=8, N=1, Q_v=1.98, best=1.98, ubc=2.56)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=1, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=1, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=27, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=8, Q_v=1.98, best=1.98, ubc=2.47)
    ├── (a=1, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=8, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    └── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
           INFO     selected action 1 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1]
(N=50, Q_v=1.98, best=1.98)
├── (a=2, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=3, N=1, Q_v=1.98, best=1.98, ubc=3.02)
├── (a=8, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=13, N=1, Q_v=1.98, best=1.98, ubc=2.56)
├── (a=13, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=8, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.02)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.03)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=3.02)
├── (a=8, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=9, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=8, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.56)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=8, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=19, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=13, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=27, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=8, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.99)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=8, Q_v=1.98, best=1.98, ubc=2.47)
    ├── (a=2, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=8, N=1, Q_v=1.97, best=1.97, ubc=2.99)
    ├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=26, N=1, Q_v=1.97, best=1.97, ubc=2.99)
    └── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
[16:50:56] INFO     selected action 8 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8]
(N=50, Q_v=1.98, best=1.98)
├── (a=2, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=9, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.97, best=1.97, ubc=2.99)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=9, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=10, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=19, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=9, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=18, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=9, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=33, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=9, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=9, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=27, N=1, Q_v=1.97, best=1.97, ubc=2.99)
└── (a=33, N=1, Q_v=1.97, best=1.97, ubc=2.99)
└── (a=33, N=9, Q_v=1.98, best=1.98, ubc=2.44)
    ├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.02)
    ├── (a=9, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=2, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=13, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.02)
    ├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.03)
    └── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.03)
           INFO     selected action 33 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33]
(N=50, Q_v=1.98, best=1.98)
├── (a=2, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=3, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=9, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=13, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=9, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=10, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=18, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=19, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.03)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=2, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=13, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=9, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=14, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=9, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=9, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=27, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=8, Q_v=1.98, best=1.98, ubc=2.47)
    ├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=9, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.99)
[16:50:57] INFO     selected action 9 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9]
(N=50, Q_v=1.98, best=1.98)
├── (a=2, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=3, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=10, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.56)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=10, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=11, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=2, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=2, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=10, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.02)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.03)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=18, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=19, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=10, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=19, N=1, Q_v=1.97, best=1.97, ubc=2.99)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=10, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=13, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=27, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.97, best=1.97, ubc=2.99)
└── (a=34, N=8, Q_v=1.98, best=1.98, ubc=2.47)
    ├── (a=2, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=13, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=10, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=13, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=18, N=1, Q_v=1.97, best=1.97, ubc=2.99)
    ├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=3.00)
           INFO     selected action 13 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13]
(N=50, Q_v=1.98, best=1.98)
├── (a=2, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=3, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=10, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=10, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=14, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=11, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=14, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=10, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=15, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=10, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=10, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=27, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=8, Q_v=1.98, best=1.98, ubc=2.47)
    ├── (a=2, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=10, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    └── (a=35, N=2, Q_v=1.98, best=1.98, ubc=2.70)
        └── (a=2, N=1, Q_v=1.98, best=1.98, ubc=2.57)
[16:50:58] INFO     selected action 2 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2]
(N=50, Q_v=1.98, best=1.98)
├── (a=3, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=4, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=10, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=14, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=10, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=3, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=11, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=14, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=3, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=10, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=15, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=3, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=10, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=3, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=10, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=11, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=27, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=9, Q_v=1.98, best=1.98, ubc=2.44)
    ├── (a=3, N=1, Q_v=1.98, best=1.98, ubc=3.03)
    ├── (a=10, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=14, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.02)
    ├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.03)
    ├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=3.03)
           INFO     selected action 10 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10]
(N=50, Q_v=1.98, best=1.98)
├── (a=3, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=4, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=11, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=14, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=18, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=11, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=14, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=3, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=3, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=11, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=3, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=15, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=19, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=11, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=3, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.03)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=26, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=3, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=11, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=14, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=27, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=8, Q_v=1.98, best=1.98, ubc=2.47)
    ├── (a=3, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=11, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=14, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=15, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=18, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=3.00)
[16:50:59] INFO     selected action 18 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18]
(N=50, Q_v=1.98, best=1.98)
├── (a=3, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=4, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=11, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=14, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=11, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=4, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=3, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=14, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=11, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=15, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=19, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=11, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=20, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=26, N=8, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.70)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=11, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.00)
├── (a=27, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.00)
└── (a=34, N=8, Q_v=1.98, best=1.98, ubc=2.47)
    ├── (a=3, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=11, N=2, Q_v=1.98, best=1.98, ubc=2.70)
    └── (a=19, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    ├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.00)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=3.00)
           INFO     selected action 11 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11]
(N=50, Q_v=1.98, best=1.98)
├── (a=3, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=4, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=5, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=19, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.05)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=4, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=19, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=15, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=19, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.05)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=19, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=15, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.05)
├── (a=26, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=3, N=1, Q_v=1.98, best=1.98, ubc=3.02)
├── (a=14, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=19, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=19, N=1, Q_v=1.98, best=1.98, ubc=3.02)
├── (a=27, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=1, Q_v=1.97, best=1.97, ubc=2.56)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=3, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=10, Q_v=1.98, best=1.98, ubc=2.42)
    ├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=14, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=19, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=3.05)
[16:51:00] INFO     selected action 19 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19]
(N=50, Q_v=1.98, best=1.98)
├── (a=3, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=4, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=5, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.05)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=15, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=15, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=15, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.05)
├── (a=20, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=21, N=1, Q_v=1.97, best=1.97, ubc=2.56)
├── (a=14, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=21, N=1, Q_v=1.98, best=1.98, ubc=3.02)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=14, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=14, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=14, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=20, N=1, Q_v=1.98, best=1.98, ubc=3.05)
├── (a=27, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=14, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=10, Q_v=1.98, best=1.98, ubc=2.42)
    ├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=4, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=14, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=20, N=1, Q_v=1.98, best=1.98, ubc=3.05)
    ├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    └── (a=35, N=2, Q_v=1.98, best=1.98, ubc=2.74)
        └── (a=3, N=1, Q_v=1.98, best=1.98, ubc=2.57)
           INFO     selected action 14 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14]
(N=50, Q_v=1.98, best=1.98)
├── (a=3, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=4, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=15, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=4, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.05)
├── (a=15, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=16, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=3, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.05)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=20, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=15, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=3, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=22, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=3, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.05)
├── (a=26, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=4, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=15, N=1, Q_v=1.98, best=1.98, ubc=3.05)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=27, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=15, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=9, Q_v=1.98, best=1.98, ubc=2.44)
    ├── (a=3, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=15, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=3, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=20, N=1, Q_v=1.98, best=1.98, ubc=3.03)
    ├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.03)
    └── (a=35, N=2, Q_v=1.98, best=1.98, ubc=2.72)
        └── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.57)
[16:51:01] INFO     selected action 3 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3]
(N=50, Q_v=1.98, best=1.98)
├── (a=4, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=5, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=15, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=20, N=1, Q_v=1.98, best=1.98, ubc=3.05)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=5, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=15, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=4, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=3.05)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=4, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=4, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=20, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=4, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=15, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=4, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.05)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=4, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=15, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=27, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=28, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.05)
└── (a=34, N=9, Q_v=1.98, best=1.98, ubc=2.44)
    ├── (a=4, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=15, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=15, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=20, N=1, Q_v=1.98, best=1.98, ubc=3.02)
    ├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=3.03)
           INFO     selected action 4 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4]
(N=50, Q_v=1.98, best=1.98)
├── (a=5, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=15, N=3, Q_v=1.98, best=1.98, ubc=2.60)
├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=15, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=15, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=5, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=16, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=5, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.05)
├── (a=20, N=9, Q_v=1.98, best=1.98, ubc=2.44)
├── (a=5, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=15, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.03)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=26, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=5, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=15, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=15, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=27, N=1, Q_v=1.98, best=1.98, ubc=3.05)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=15, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=10, Q_v=1.98, best=1.98, ubc=2.42)
    ├── (a=5, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=15, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=5, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=3.05)
[16:51:02] INFO     selected action 15 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4, 15]
(N=50, Q_v=1.98, best=1.98)
├── (a=5, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=16, N=3, Q_v=1.98, best=1.98, ubc=2.60)
├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=16, N=9, Q_v=1.98, best=1.98, ubc=2.45)
├── (a=5, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=17, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=5, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=1, Q_v=1.98, best=1.98, ubc=3.03)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=3.03)
├── (a=20, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=5, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=16, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=21, N=1, Q_v=1.98, best=1.98, ubc=3.05)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=10, Q_v=1.98, best=1.98, ubc=2.42)
├── (a=5, N=1, Q_v=1.98, best=1.98, ubc=3.05)
├── (a=16, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=27, N=2, Q_v=1.98, best=1.98, ubc=2.74)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=10, Q_v=1.98, best=1.98, ubc=2.42)
    ├── (a=5, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=16, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=20, N=1, Q_v=1.98, best=1.98, ubc=3.05)
    ├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.74)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    └── (a=35, N=2, Q_v=1.98, best=1.98, ubc=2.74)
        └── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.57)
           INFO     selected action 5 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4, 15, 5]
(N=50, Q_v=1.98, best=1.98)
├── (a=16, N=12, Q_v=1.98, best=1.98, ubc=2.38)
├── (a=17, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=20, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=26, N=2, Q_v=1.98, best=1.98, ubc=2.77)
└── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=3, Q_v=1.98, best=1.98, ubc=2.62)
    ├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=20, N=12, Q_v=1.98, best=1.98, ubc=2.38)
├── (a=16, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.77)
└── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=26, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=3, Q_v=1.98, best=1.98, ubc=2.62)
    ├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=26, N=13, Q_v=1.98, best=1.98, ubc=2.37)
├── (a=16, N=3, Q_v=1.98, best=1.98, ubc=2.63)
├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=20, N=3, Q_v=1.98, best=1.98, ubc=2.63)
├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=27, N=3, Q_v=1.98, best=1.98, ubc=2.63)
├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=3, Q_v=1.98, best=1.98, ubc=2.63)
    ├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=12, Q_v=1.98, best=1.98, ubc=2.38)
    ├── (a=16, N=3, Q_v=1.98, best=1.98, ubc=2.62)
    ├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    ├── (a=20, N=3, Q_v=1.98, best=1.98, ubc=2.62)
    ├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=26, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    ├── (a=26, N=3, Q_v=1.98, best=1.98, ubc=2.62)
    ├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=35, N=2, Q_v=1.98, best=1.98, ubc=2.77)
        └── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.57)
           INFO     selected action 26 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4, 15, 5, 26]
(N=50, Q_v=1.98, best=1.98)
├── (a=16, N=13, Q_v=1.98, best=1.98, ubc=2.37)
├── (a=17, N=3, Q_v=1.98, best=1.98, ubc=2.63)
├── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=20, N=3, Q_v=1.98, best=1.98, ubc=2.63)
├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=27, N=3, Q_v=1.98, best=1.98, ubc=2.63)
├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=3, Q_v=1.98, best=1.98, ubc=2.63)
    ├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=20, N=12, Q_v=1.98, best=1.98, ubc=2.38)
├── (a=16, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.77)
└── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=27, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=3, Q_v=1.98, best=1.98, ubc=2.62)
    ├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=27, N=12, Q_v=1.98, best=1.98, ubc=2.38)
├── (a=16, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=20, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=28, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=28, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.77)
    └── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=12, Q_v=1.98, best=1.98, ubc=2.38)
    ├── (a=16, N=3, Q_v=1.98, best=1.98, ubc=2.62)
    ├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    ├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.77)
    └── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.57)
    ├── (a=27, N=3, Q_v=1.98, best=1.98, ubc=2.62)
    ├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=28, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=35, N=3, Q_v=1.98, best=1.98, ubc=2.62)
        ├── (a=16, N=1, Q_v=1.98, best=1.98, ubc=2.72)
        └── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.72)
[16:51:03] INFO     selected action 16 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4, 15, 5, 26, 16]
(N=50, Q_v=1.98, best=1.98)
├── (a=17, N=13, Q_v=1.98, best=1.98, ubc=2.37)
├── (a=20, N=4, Q_v=1.98, best=1.98, ubc=2.55)
├── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.81)
├── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.81)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.81)
├── (a=27, N=4, Q_v=1.98, best=1.98, ubc=2.55)
├── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.81)
├── (a=28, N=1, Q_v=1.98, best=1.98, ubc=2.81)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.81)
└── (a=34, N=4, Q_v=1.98, best=1.98, ubc=2.55)
    ├── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.81)
    ├── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.81)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.81)
├── (a=20, N=12, Q_v=1.98, best=1.98, ubc=2.38)
├── (a=17, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=21, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=27, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.77)
    └── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=27, N=12, Q_v=1.98, best=1.98, ubc=2.38)
├── (a=17, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=28, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=20, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.72)
├── (a=28, N=3, Q_v=1.98, best=1.98, ubc=2.62)
├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.72)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.77)
    └── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=12, Q_v=1.98, best=1.98, ubc=2.38)
    ├── (a=17, N=3, Q_v=1.98, best=1.98, ubc=2.62)
    ├── (a=20, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    ├── (a=20, N=3, Q_v=1.98, best=1.98, ubc=2.62)
    ├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    ├── (a=27, N=3, Q_v=1.98, best=1.98, ubc=2.62)
    ├── (a=17, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.72)
    └── (a=35, N=2, Q_v=1.98, best=1.98, ubc=2.77)
        └── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.57)
           INFO     selected action 17 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4, 15, 5, 26, 16, 17]
(N=50, Q_v=1.98, best=1.98)
├── (a=20, N=16, Q_v=1.98, best=1.98, ubc=2.33)
├── (a=21, N=5, Q_v=1.98, best=1.98, ubc=2.51)
├── (a=22, N=1, Q_v=1.98, best=1.98, ubc=2.88)
├── (a=27, N=2, Q_v=1.98, best=1.98, ubc=2.61)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.88)
├── (a=27, N=5, Q_v=1.98, best=1.98, ubc=2.51)
├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.61)
├── (a=28, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=21, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    ├── (a=27, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
├── (a=27, N=17, Q_v=1.98, best=1.98, ubc=2.32)
├── (a=20, N=6, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.65)
├── (a=28, N=2, Q_v=1.98, best=1.98, ubc=2.65)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.93)
├── (a=28, N=5, Q_v=1.98, best=1.98, ubc=2.51)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.61)
├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=28, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=16, Q_v=1.98, best=1.98, ubc=2.33)
    ├── (a=20, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=27, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    ├── (a=27, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=28, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=5, Q_v=1.98, best=1.98, ubc=2.51)
        ├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.61)
        └── (a=27, N=2, Q_v=1.98, best=1.98, ubc=2.61)
           INFO     selected action 27 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4, 15, 5, 26, 16, 17, 27]
(N=50, Q_v=1.98, best=1.98)
├── (a=20, N=17, Q_v=1.98, best=1.98, ubc=2.32)
├── (a=21, N=6, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=22, N=2, Q_v=1.98, best=1.98, ubc=2.65)
├── (a=28, N=2, Q_v=1.98, best=1.98, ubc=2.65)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.93)
├── (a=28, N=5, Q_v=1.98, best=1.98, ubc=2.51)
├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.61)
├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=28, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
├── (a=28, N=16, Q_v=1.98, best=1.98, ubc=2.33)
├── (a=20, N=5, Q_v=1.98, best=1.98, ubc=2.51)
├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.61)
├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.88)
├── (a=29, N=5, Q_v=1.98, best=1.98, ubc=2.51)
├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.61)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.61)
└── (a=34, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=16, Q_v=1.98, best=1.98, ubc=2.33)
    ├── (a=20, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=28, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    ├── (a=28, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=5, Q_v=1.98, best=1.98, ubc=2.51)
        ├── (a=20, N=2, Q_v=1.98, best=1.98, ubc=2.61)
        └── (a=28, N=2, Q_v=1.98, best=1.98, ubc=2.61)
[16:51:04] INFO     selected action 20 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4, 15, 5, 26, 16, 17, 27, 20]
(N=50, Q_v=1.98, best=1.98)
├── (a=21, N=17, Q_v=1.98, best=1.98, ubc=2.32)
├── (a=22, N=6, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=23, N=2, Q_v=1.98, best=1.98, ubc=2.65)
├── (a=28, N=2, Q_v=1.98, best=1.98, ubc=2.65)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.93)
├── (a=28, N=5, Q_v=1.98, best=1.98, ubc=2.51)
├── (a=22, N=2, Q_v=1.98, best=1.98, ubc=2.61)
├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=22, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=28, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
├── (a=28, N=16, Q_v=1.98, best=1.98, ubc=2.33)
├── (a=21, N=5, Q_v=1.98, best=1.98, ubc=2.51)
├── (a=22, N=2, Q_v=1.98, best=1.98, ubc=2.61)
├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.88)
├── (a=29, N=5, Q_v=1.98, best=1.98, ubc=2.51)
├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.61)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.61)
└── (a=34, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=16, Q_v=1.98, best=1.98, ubc=2.33)
    ├── (a=21, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=22, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=28, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    ├── (a=28, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=5, Q_v=1.98, best=1.98, ubc=2.51)
        ├── (a=21, N=2, Q_v=1.98, best=1.98, ubc=2.61)
        └── (a=28, N=2, Q_v=1.98, best=1.98, ubc=2.61)
           INFO     selected action 21 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4, 15, 5, 26, 16, 17, 27, 20, 21]
(N=50, Q_v=1.98, best=1.98)
├── (a=22, N=17, Q_v=1.98, best=1.98, ubc=2.32)
├── (a=23, N=6, Q_v=1.98, best=1.98, ubc=2.47)
├── (a=28, N=3, Q_v=1.98, best=1.98, ubc=2.53)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.65)
├── (a=28, N=5, Q_v=1.98, best=1.98, ubc=2.51)
├── (a=23, N=2, Q_v=1.98, best=1.98, ubc=2.61)
├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=23, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=28, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
├── (a=28, N=16, Q_v=1.98, best=1.98, ubc=2.33)
├── (a=22, N=5, Q_v=1.98, best=1.98, ubc=2.51)
├── (a=23, N=2, Q_v=1.98, best=1.98, ubc=2.61)
├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=1, Q_v=1.98, best=1.98, ubc=2.88)
├── (a=29, N=5, Q_v=1.98, best=1.98, ubc=2.51)
├── (a=22, N=2, Q_v=1.98, best=1.98, ubc=2.61)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.61)
└── (a=34, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=22, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=16, Q_v=1.98, best=1.98, ubc=2.33)
    ├── (a=22, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=23, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=28, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    ├── (a=28, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=22, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=5, Q_v=1.98, best=1.98, ubc=2.51)
        ├── (a=22, N=2, Q_v=1.98, best=1.98, ubc=2.61)
        └── (a=28, N=2, Q_v=1.98, best=1.98, ubc=2.61)
           INFO     selected action 22 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4, 15, 5, 26, 16, 17, 27, 20, 21, 22]
(N=50, Q_v=1.98, best=1.98)
├── (a=23, N=17, Q_v=1.98, best=1.98, ubc=2.32)
├── (a=28, N=8, Q_v=1.98, best=1.98, ubc=2.40)
├── (a=29, N=4, Q_v=1.98, best=1.98, ubc=2.49)
└── (a=34, N=3, Q_v=1.98, best=1.98, ubc=2.57)
└── (a=34, N=8, Q_v=1.98, best=1.98, ubc=2.40)
    ├── (a=28, N=4, Q_v=1.98, best=1.98, ubc=2.49)
    └── (a=35, N=3, Q_v=1.98, best=1.98, ubc=2.57)
├── (a=28, N=16, Q_v=1.98, best=1.98, ubc=2.33)
├── (a=23, N=5, Q_v=1.98, best=1.98, ubc=2.51)
├── (a=29, N=2, Q_v=1.98, best=1.98, ubc=2.61)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.61)
├── (a=29, N=5, Q_v=1.98, best=1.98, ubc=2.51)
├── (a=23, N=2, Q_v=1.98, best=1.98, ubc=2.61)
└── (a=34, N=2, Q_v=1.98, best=1.98, ubc=2.61)
└── (a=34, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=23, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
└── (a=34, N=16, Q_v=1.98, best=1.98, ubc=2.33)
    ├── (a=23, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=28, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    └── (a=35, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=28, N=5, Q_v=1.98, best=1.98, ubc=2.51)
    ├── (a=23, N=2, Q_v=1.98, best=1.98, ubc=2.61)
    ├── (a=29, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=1, Q_v=1.98, best=1.98, ubc=2.88)
    └── (a=35, N=5, Q_v=1.98, best=1.98, ubc=2.51)
        ├── (a=23, N=2, Q_v=1.98, best=1.98, ubc=2.61)
        └── (a=28, N=2, Q_v=1.98, best=1.98, ubc=2.61)
           INFO     selected action 23 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4, 15, 5, 26, 16, 17, 27, 20, 21, 22, 23]
(N=50, Q_v=1.98, best=1.98)
├── (a=28, N=25, Q_v=1.98, best=1.98, ubc=2.26)
├── (a=29, N=12, Q_v=1.98, best=1.98, ubc=2.35)
└── (a=34, N=11, Q_v=1.98, best=1.98, ubc=2.32)
└── (a=34, N=12, Q_v=1.98, best=1.98, ubc=2.35)
    ├── (a=29, N=6, Q_v=1.98, best=1.98, ubc=2.43)
    └── (a=35, N=5, Q_v=1.98, best=1.98, ubc=2.48)
└── (a=34, N=24, Q_v=1.98, best=1.98, ubc=2.26)
    ├── (a=28, N=12, Q_v=1.98, best=1.98, ubc=2.34)
    ├── (a=29, N=6, Q_v=1.98, best=1.98, ubc=2.43)
    └── (a=35, N=5, Q_v=1.98, best=1.98, ubc=2.48)
    └── (a=35, N=11, Q_v=1.98, best=1.98, ubc=2.36)
        └── (a=28, N=10, Q_v=1.98, best=1.98, ubc=2.33)
           INFO     selected action 28 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4, 15, 5, 26, 16, 17, 27, 20, 21, 22, 23, 28]
(N=50, Q_v=1.98, best=1.98)
├── (a=29, N=25, Q_v=1.98, best=1.98, ubc=2.26)
└── (a=34, N=24, Q_v=1.98, best=1.98, ubc=2.24)
    └── (a=35, N=23, Q_v=1.98, best=1.98, ubc=2.24)
└── (a=34, N=24, Q_v=1.98, best=1.98, ubc=2.26)
    ├── (a=29, N=12, Q_v=1.98, best=1.98, ubc=2.34)
    └── (a=35, N=11, Q_v=1.98, best=1.98, ubc=2.32)
    └── (a=35, N=11, Q_v=1.98, best=1.98, ubc=2.36)
        └── (a=29, N=10, Q_v=1.98, best=1.98, ubc=2.33)
           INFO     selected action 29 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4, 15, 5, 26, 16, 17, 27, 20, 21, 22, 23, 28, 29]
(N=50, Q_v=1.98, best=1.98)
└── (a=34, N=49, Q_v=1.98, best=1.98, ubc=2.18)
    └── (a=35, N=48, Q_v=1.98, best=1.98, ubc=2.18)
[16:51:05] INFO     selected action 34 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4, 15, 5, 26, 16, 17, 27, 20, 21, 22, 23, 28, 29, 34]
(N=50, Q_v=1.98, best=1.98)
└── (a=35, N=49, Q_v=1.98, best=1.98, ubc=2.18)
           INFO     selected action 35 after 50 simulations.
           INFO     current action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14,
                    3, 4, 15, 5, 26, 16, 17, 27, 20, 21, 22, 23, 28, 29, 34, 35]
           INFO     Final action list: [30, 24, 6, 0, 31, 7, 32, 25, 12, 1, 8, 33, 9, 13, 2, 10, 18, 11, 19, 14, 3,
                    4, 15, 5, 26, 16, 17, 27, 20, 21, 22, 23, 28, 29, 34, 35]
Job 0     -----     Machine 0 
Job 1     -----     Machine 1 
Job 2     -----     Machine 2 
Job 3     -----     Machine 3 
Job 4     -----     Machine 4 
Job 5     -----     Machine 5 
         ╔═══════════════════════════════════════════════════════╗
Job 0    ║        ║ Machine 0   
Job 1    ║   ║ Machine 1   
Job 2    ║              ║ Machine 2   
Job 3    ║                  ║ Machine 3   
Job 4    ║║ Machine 4   
Job 5    ║║ Machine 5   
         ╚╦════╤════╤════╤════╤════╦════╤════╤════╤════╤════╦════╝
          0.0                      29.1                     58.2
makespan: 63
[4]: