minimal-jsp-env (Action History Wrapper)

[1]:
from collections import namedtuple
from copy import deepcopy, copy
[2]:
from gymnasium.spaces import Discrete
from jsp_instance_utils.instances import ft06
from jsp_vis.console import gantt_chart_console
[3]:
from gymcts.gymcts_agent import GymctsAgent
from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
from gymnasium.wrappers import TransformReward, NormalizeReward
from gymcts.logger import log
[4]:
import gymnasium as gym
[5]:
import pandas as pd
import numpy as np
[6]:
Operation = namedtuple("Operation", ["job_id", "op_id", "unique_op_id", "machine_type", "duration"])
[7]:
import copy
import random
[8]:
class JSPInstance:
    def __init__(
            self,
            jobs: list,
            num_ops_per_job: int = None,
            max_op_time: int = None,
            num_machines: int = None,
            id: str = None,
            opt_time: float = None,
            spt_time: float = None,
            intra_instance_op_entropy=None
    ):
        self.jobs = jobs
        self.num_jobs = len(jobs)
        self.num_ops_per_job = num_ops_per_job  # todo infer if not given
        self.max_op_time = max_op_time  # todo infer if not given
        self.num_machines = num_machines if num_machines else num_ops_per_job
        self.id = id
        self.spt_time = spt_time
        self.opt_time = opt_time
        self.intra_instance_op_entropy = intra_instance_op_entropy
[9]:
def jsp_instance_adapter(jsp_instance):
    _, n_jobs, n_machines = jsp_instance.shape
    machine_order = jsp_instance[0]
    processing_times = jsp_instance[1]

    """
    Generates jobs consisting of operations with random durations and orders in which to be carried out,
    and returns a JSPInstance based on these jobs
    """
    jobs = []
    unique_op_id = 0
    max_op_duration = np.max(processing_times)
    for i in range(0, n_jobs):
        operations = []
        for j in range(0, n_machines):
            duration = processing_times[i, j]
            machine_type = machine_order[i, j]
            operations.append(Operation(i, j, unique_op_id, machine_type, duration))
            unique_op_id += 1

        jobs.append(operations)

    return JSPInstance(jobs, num_ops_per_job=n_machines, num_machines=n_machines,
                       max_op_time=max_op_duration)
[10]:
class JobShopModel():
    def __init__(self, **kwargs):
        pass

    @staticmethod
    def random_problem(num_jobs, num_machines, max_duration=10):
        remaining_operations = []
        op_id = 0
        for j in range(num_jobs):
            job = []
            for m in range(num_machines):
                job.append(
                    Operation(j, m, op_id, random.randint(0, num_machines - 1), random.randint(0, max_duration - 1)))
                op_id += 1
            remaining_operations.append(job)

        schedule = [[] for i in range(num_machines)]

        last_job_ops = [-1 for _ in range(num_jobs)]
        return {'remaining_operations': remaining_operations, 'schedule': schedule, 'last_job_ops': last_job_ops}

    @staticmethod
    def _schedule_op(job_id, remaining_operations, schedule):
        possible = False

        if len(remaining_operations[job_id]) > 0:
            op = remaining_operations[job_id].pop(0)
            machine = op.machine_type
            start_time = JobShopModel._determine_start_time(op, schedule)
            schedule[machine].append((op, start_time, start_time + op.duration))
            possible = True
        return remaining_operations, schedule, possible

    @staticmethod
    def _schedule_op(job_id, remaining_operations, schedule, last_job_ops):
        possible = False

        if len(remaining_operations[job_id]) > 0:
            possible = True

            op = remaining_operations[job_id].pop(0)
            machine = op.machine_type
            start_time = JobShopModel._last_op_end(last_job_ops, op)
            machine_schedule = schedule[op.machine_type]
            if len(machine_schedule) == 0:
                schedule[machine].append((op, start_time, start_time + op.duration))
                last_job_ops[op.job_id] = start_time + op.duration
                return remaining_operations, schedule, last_job_ops, possible

            left_shift, left_shift_time, insertion_index = JobShopModel._left_shift_possible(start_time,
                                                                                             machine_schedule,
                                                                                             op.duration)
            if left_shift:
                schedule[machine].insert(insertion_index, (op, left_shift_time, left_shift_time + op.duration))
                new_time = left_shift_time + op.duration
                last_job_ops[op.job_id] = new_time if new_time > last_job_ops[op.job_id] else last_job_ops[op.job_id]

            else:
                last_op, start, end = machine_schedule[-1]

                if end > start_time:
                    start_time = end

                schedule[machine].append((op, start_time, start_time + op.duration))
                last_job_ops[op.job_id] = start_time + op.duration

        return remaining_operations, schedule, last_job_ops, possible

    @staticmethod
    def _left_shift_possible(earliest_start, machine_schedule, op_duration):
        if earliest_start < 0:
            earliest_start = 0

        last_end = earliest_start
        for index, (op, start_time, end_time) in enumerate(machine_schedule):
            if end_time < last_end:
                continue

            if (start_time - last_end) >= op_duration:
                return True, last_end, index

            last_end = end_time

        return False, -1, -1

    @staticmethod
    def _last_op_end(last_job_ops, op: Operation):
        start_time = 0

        if last_job_ops[op.job_id] > 0:
            start_time = last_job_ops[op.job_id]

        return start_time

    @staticmethod
    def _is_done(remaining_operations):
        for j in remaining_operations:
            if len(j) > 0: return False

        return True

    @staticmethod
    def _makespan(schedule):
        makespan = 0

        for machine, machine_schedule in enumerate(schedule):
            if len(machine_schedule) > 0:
                _, _, end_time = machine_schedule[-1]
                if end_time > makespan:
                    makespan = end_time

        return makespan

    @staticmethod
    def step(state, action):
        remaining_ops, schedule, last_job_ops, possible = JobShopModel._schedule_op(action,
                                                                                    state['remaining_operations'],
                                                                                    state['schedule'],
                                                                                    state['last_job_ops'])

        reward = 0
        if not possible: reward = -1
        done = JobShopModel._is_done(remaining_ops)
        if done:
            reward = - JobShopModel._makespan(schedule)
        return {'remaining_operations': remaining_ops, 'schedule': schedule, 'last_job_ops': last_job_ops}, reward, done

    @staticmethod
    def legal_actions(state):
        return [job_id for job_id in range(len(state['remaining_operations'])) if
                len(state['remaining_operations'][job_id]) > 0]
[11]:
class JobShopEnv(gym.Env):

    def __init__(self, jsp_instance: JSPInstance, **kwargs):
        self.model = JobShopModel()
        self._jsp_instance = jsp_instance
        self._initial_jsp_instance = copy.copy(jsp_instance)

        self.set_instance(instance=self._initial_jsp_instance)

        # Define the space for an Operation
        operation_space = gym.spaces.Tuple((
            gym.spaces.Discrete(np.iinfo(np.int32).max),  # job_id
            gym.spaces.Discrete(np.iinfo(np.int32).max),  # op_id
            gym.spaces.Discrete(np.iinfo(np.int32).max),  # unique_op_id
            gym.spaces.Discrete(np.iinfo(np.int32).max),  # machine_type
            gym.spaces.Discrete(np.iinfo(np.int32).max)  # duration
        ))

        # Define the space for a ScheduledOperation
        scheduled_operation_space = gym.spaces.Tuple((
            operation_space,  # Operation
            gym.spaces.Discrete(np.iinfo(np.int32).max),  # start_time
            gym.spaces.Discrete(np.iinfo(np.int32).max)  # end_time
        ))
        observation_space = gym.spaces.Dict({
            'remaining_operations': gym.spaces.Tuple([
                operation_space for _ in range(self._jsp_instance.num_jobs * self._jsp_instance.num_machines)
            ]),
            'last_job_ops': gym.spaces.Tuple([
                gym.spaces.Discrete(2) for _ in range(6)
            ]),
            'schedule': gym.spaces.Tuple([
                gym.spaces.Tuple([scheduled_operation_space for _ in range(self._jsp_instance.num_machines)]) for _ in
                range(self._jsp_instance.num_jobs)
            ])
        })
        self.observation_space = observation_space
        self.action_space = Discrete(6)

        self.reset()

    def set_instance(self, instance):
        self.done = False
        self.steps = 0
        self.instance = instance
        self.ops_per_job = self.instance.num_ops_per_job
        self.num_machines = self.instance.num_ops_per_job
        self.max_op_duration = self.instance.max_op_time
        self.num_jobs = self.instance.num_jobs

        schedule = [[] for _ in range(self.num_machines)]
        last_job_ops = [-1 for _ in range(self.num_jobs)]

        s_ = {'remaining_operations': deepcopy(self.instance.jobs), 'schedule': schedule,
              'last_job_ops': last_job_ops}

        self.state = s_
        return self.state

    def reset(self, **kwargs):
        self.done = False
        self.steps = 0
        self.set_instance(self._initial_jsp_instance)

        return self.state, {}

    def set_state(self, state):
        self.state = state
        if len(state['remaining_operations']) > 0:
            self.done = False

    def step(self, action):
        self.state, reward, self.done = self.model.step(self.state, action)
        self.steps += 1

        return self.state, reward, self.done, False, {}

    def render(self):
        allocation = []
        latest_finish_time = 0
        for mache_ops in self.state['schedule']:
            if mache_ops and len(mache_ops):

                for ops_elem, start_time, finish_time in mache_ops:
                    entry = {
                        'Task': f'Job {ops_elem.job_id}',
                        'Start': start_time,
                        'Finish': finish_time,
                        'Resource': f'Machine {ops_elem.machine_type}'
                    }
                    latest_finish_time = max(finish_time, latest_finish_time)
                    allocation.append(entry)

        df = pd.DataFrame(allocation)
        num_of_machines = self._jsp_instance.num_machines
        gantt_chart_console(df, num_of_machines)
        print(f'Makespan: {latest_finish_time}')

    def raw_state(self):
        return self.state

    def current_instance(self):
        return self.instance

    def max_num_actions(self):
        return len(self.state['remaining_operations'])

    def current_num_steps(self) -> int:
        return self.steps

    def get_legal_action_mask(self) -> list[bool]:
        legal_action = self.model.legal_actions(self.state)
        legal_action_mask = [False for _ in range(self.action_space.n)]
        for action in legal_action:
            legal_action_mask[action] = True
        return legal_action_mask
[12]:
if __name__ == '__main__':
    log.setLevel(20)

    mk_jsp_instance = jsp_instance_adapter(ft06)

    env = JobShopEnv(
        jsp_instance=mk_jsp_instance,
    )

    env.reset()
    env = NormalizeReward(env, gamma=0.99, epsilon=1e-8)
    env = TransformReward(env, lambda r: r / 36)


    def mask_fn(env: gym.Env) -> np.ndarray:
        # Do whatever you'd like in this function to return the action mask
        # for the current env. In this example, we assume the env has a
        # helpful method we can rely on.
        return env.unwrapped.get_legal_action_mask()


    env = ActionHistoryMCTSGymEnvWrapper(
        env,
        action_mask_fn=mask_fn
    )

    agent = GymctsAgent(
        env=env,
        clear_mcts_tree_after_step=False,
        render_tree_after_step=True,
        exclude_unvisited_nodes_from_render=True,
        number_of_simulations_per_step=125,
    )

    root = agent.search_root_node.get_root()

    actions = agent.solve(render_tree_after_step=True)

    env.reset()
    for a in actions:
        obs, rew, term, trun, info = env.step(a)

    env.unwrapped.render()
(N=125, Q_v=-0.13, best=-0.06)
├── (a=0, N=21, Q_v=-0.13, best=-0.07, ubc=0.21)
│   ├── (a=0, N=4, Q_v=-0.14, best=-0.08, ubc=0.48)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│   ├── (a=1, N=4, Q_v=-0.14, best=-0.08, ubc=0.48)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│   ├── (a=2, N=3, Q_v=-0.13, best=-0.08, ubc=0.59)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.63)
│   ├── (a=3, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
│   ├── (a=4, N=3, Q_v=-0.14, best=-0.09, ubc=0.57)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
│   └── (a=5, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
│       ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│       └── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.61)
├── (a=1, N=21, Q_v=-0.13, best=-0.06, ubc=0.21)
│   ├── (a=0, N=4, Q_v=-0.14, best=-0.08, ubc=0.48)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   └── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│   ├── (a=1, N=4, Q_v=-0.13, best=-0.06, ubc=0.49)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│   ├── (a=2, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.62)
│   ├── (a=3, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
│   ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.61)
│   └── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   ├── (a=4, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=1, N=1, Q_v=-0.16, best=-0.16, ubc=0.58)
│   └── (a=5, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│       ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│       └── (a=5, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
├── (a=2, N=21, Q_v=-0.13, best=-0.07, ubc=0.21)
│   ├── (a=0, N=4, Q_v=-0.14, best=-0.08, ubc=0.48)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│   ├── (a=1, N=4, Q_v=-0.14, best=-0.08, ubc=0.48)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│   └── (a=2, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   ├── (a=2, N=3, Q_v=-0.13, best=-0.08, ubc=0.59)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.63)
│   ├── (a=3, N=3, Q_v=-0.14, best=-0.08, ubc=0.58)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=3, N=1, Q_v=-0.15, best=-0.15, ubc=0.60)
│   ├── (a=4, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=4, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
│   └── (a=5, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
│       ├── (a=0, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
│       └── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
├── (a=3, N=21, Q_v=-0.13, best=-0.07, ubc=0.21)
│   ├── (a=0, N=3, Q_v=-0.13, best=-0.08, ubc=0.59)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.63)
│   ├── (a=1, N=4, Q_v=-0.14, best=-0.08, ubc=0.48)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│   ├── (a=2, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=1, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
│   ├── (a=3, N=3, Q_v=-0.14, best=-0.08, ubc=0.58)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
│   ├── (a=4, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=3, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
│   └── (a=5, N=4, Q_v=-0.14, best=-0.08, ubc=0.48)
│       ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│       ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│       └── (a=2, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
├── (a=4, N=20, Q_v=-0.14, best=-0.07, ubc=0.21)
│   ├── (a=0, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=3, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
│   ├── (a=1, N=3, Q_v=-0.13, best=-0.08, ubc=0.57)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
│   ├── (a=2, N=4, Q_v=-0.14, best=-0.08, ubc=0.47)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│   └── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│   ├── (a=3, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=1, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
│   ├── (a=4, N=3, Q_v=-0.15, best=-0.10, ubc=0.55)
│   ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│   └── (a=5, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│       ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│       └── (a=5, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
└── (a=5, N=20, Q_v=-0.13, best=-0.07, ubc=0.22)
    ├── (a=0, N=4, Q_v=-0.14, best=-0.08, ubc=0.47)
    ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
    ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
    └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    ├── (a=1, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
    ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
    └── (a=2, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
    ├── (a=2, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
    ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
    └── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.61)
    ├── (a=3, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
    ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
    └── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.62)
    ├── (a=4, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
    ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
    └── (a=5, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
    └── (a=5, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
        ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
        └── (a=2, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
[17:05:06] INFO     selected action 1 after 125 simulations.
           INFO     current action list: [1]
(a=1, N=146, Q_v=-0.15, best=-0.06, ubc=-0.01)
├── (a=0, N=24, Q_v=-0.15, best=-0.08, ubc=0.18)
├── (a=0, N=3, Q_v=-0.16, best=-0.15, ubc=0.57)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.60)
└── (a=5, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
├── (a=1, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
├── (a=2, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=3, N=4, Q_v=-0.15, best=-0.15, ubc=0.48)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=5, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=4, N=4, Q_v=-0.13, best=-0.12, ubc=0.50)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=5, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
    ├── (a=0, N=1, Q_v=-0.14, best=-0.14, ubc=0.70)
    ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=25, Q_v=-0.14, best=-0.06, ubc=0.17)
├── (a=0, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
├── (a=1, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
├── (a=2, N=4, Q_v=-0.13, best=-0.08, ubc=0.51)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
├── (a=3, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
├── (a=0, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=4, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
└── (a=5, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    └── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.70)
├── (a=2, N=24, Q_v=-0.15, best=-0.08, ubc=0.18)
├── (a=0, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=5, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
├── (a=1, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=5, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
├── (a=2, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=3, N=4, Q_v=-0.13, best=-0.12, ubc=0.50)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=4, N=3, Q_v=-0.16, best=-0.15, ubc=0.56)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.60)
└── (a=4, N=1, Q_v=-0.17, best=-0.17, ubc=0.57)
└── (a=5, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
    └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=3, N=24, Q_v=-0.15, best=-0.08, ubc=0.17)
├── (a=0, N=4, Q_v=-0.14, best=-0.13, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
├── (a=1, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=2, N=3, Q_v=-0.16, best=-0.15, ubc=0.56)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.60)
└── (a=4, N=1, Q_v=-0.17, best=-0.17, ubc=0.58)
├── (a=3, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=2, N=1, Q_v=-0.14, best=-0.14, ubc=0.70)
├── (a=4, N=4, Q_v=-0.15, best=-0.15, ubc=0.48)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=5, N=4, Q_v=-0.15, best=-0.12, ubc=0.48)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=4, N=24, Q_v=-0.15, best=-0.08, ubc=0.17)
├── (a=0, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
├── (a=0, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=4, Q_v=-0.14, best=-0.13, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=2, N=4, Q_v=-0.15, best=-0.12, ubc=0.48)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=3, N=4, Q_v=-0.16, best=-0.15, ubc=0.47)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=4, N=1, Q_v=-0.16, best=-0.16, ubc=0.67)
├── (a=4, N=3, Q_v=-0.16, best=-0.15, ubc=0.56)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.60)
└── (a=1, N=1, Q_v=-0.17, best=-0.17, ubc=0.57)
└── (a=5, N=4, Q_v=-0.15, best=-0.15, ubc=0.48)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    └── (a=5, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=5, N=24, Q_v=-0.15, best=-0.08, ubc=0.17)
    ├── (a=0, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
    ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
    ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    ├── (a=1, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    └── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
    ├── (a=2, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    └── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
    ├── (a=3, N=4, Q_v=-0.16, best=-0.15, ubc=0.47)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
    ├── (a=4, N=3, Q_v=-0.16, best=-0.15, ubc=0.57)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.60)
    └── (a=3, N=1, Q_v=-0.16, best=-0.16, ubc=0.58)
    └── (a=5, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
        ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
        ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
        └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
           INFO     selected action 1 after 125 simulations.
           INFO     current action list: [1, 1]
(a=1, N=150, Q_v=-0.14, best=-0.06, ubc=-0.01)
├── (a=0, N=25, Q_v=-0.14, best=-0.11, ubc=0.17)
├── (a=0, N=4, Q_v=-0.14, best=-0.13, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
├── (a=1, N=4, Q_v=-0.15, best=-0.13, ubc=0.49)
├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=2, N=4, Q_v=-0.14, best=-0.13, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=5, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
├── (a=3, N=4, Q_v=-0.14, best=-0.11, ubc=0.50)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=4, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.14, best=-0.13, ubc=0.49)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    └── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
├── (a=1, N=25, Q_v=-0.14, best=-0.10, ubc=0.18)
├── (a=0, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=4, Q_v=-0.14, best=-0.11, ubc=0.50)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=3, N=4, Q_v=-0.14, best=-0.10, ubc=0.50)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.13, best=-0.10, ubc=0.50)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.14, best=-0.13, ubc=0.49)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
    └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=2, N=25, Q_v=-0.14, best=-0.08, ubc=0.18)
├── (a=0, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=2, N=4, Q_v=-0.13, best=-0.10, ubc=0.50)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.15, best=-0.14, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=2, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
├── (a=4, N=4, Q_v=-0.15, best=-0.13, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
└── (a=5, N=4, Q_v=-0.14, best=-0.13, ubc=0.49)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    └── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
├── (a=3, N=25, Q_v=-0.14, best=-0.10, ubc=0.17)
├── (a=0, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=4, Q_v=-0.14, best=-0.10, ubc=0.50)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=3, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=4, N=4, Q_v=-0.15, best=-0.13, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
└── (a=5, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
    ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=4, N=25, Q_v=-0.14, best=-0.10, ubc=0.17)
├── (a=0, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=2, N=4, Q_v=-0.14, best=-0.10, ubc=0.50)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=4, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.15, best=-0.13, ubc=0.49)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    └── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
└── (a=5, N=24, Q_v=-0.14, best=-0.11, ubc=0.18)
    ├── (a=0, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    ├── (a=1, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    ├── (a=2, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    └── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.70)
    ├── (a=3, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    ├── (a=4, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
    ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
    └── (a=5, N=3, Q_v=-0.15, best=-0.15, ubc=0.58)
        ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
        └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
           INFO     selected action 2 after 125 simulations.
           INFO     current action list: [1, 1, 2]
(a=2, N=150, Q_v=-0.12, best=-0.08, ubc=0.02)
├── (a=0, N=25, Q_v=-0.12, best=-0.10, ubc=0.19)
├── (a=0, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=3, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=25, Q_v=-0.12, best=-0.10, ubc=0.20)
├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=25, Q_v=-0.12, best=-0.09, ubc=0.20)
├── (a=0, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=1, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=2, N=4, Q_v=-0.13, best=-0.11, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
├── (a=3, N=4, Q_v=-0.13, best=-0.11, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=3, N=25, Q_v=-0.12, best=-0.10, ubc=0.19)
├── (a=0, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=4, Q_v=-0.13, best=-0.11, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=2, N=4, Q_v=-0.12, best=-0.11, ubc=0.52)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=3, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=24, Q_v=-0.12, best=-0.10, ubc=0.20)
├── (a=0, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=3, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=3, Q_v=-0.13, best=-0.11, ubc=0.60)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.63)
└── (a=5, N=1, Q_v=-0.13, best=-0.13, ubc=0.62)
└── (a=5, N=4, Q_v=-0.13, best=-0.11, ubc=0.50)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=5, N=25, Q_v=-0.12, best=-0.10, ubc=0.19)
    ├── (a=0, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
    ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=2, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=3, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=4, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
        ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
        ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
        └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
           INFO     selected action 1 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1]
(a=1, N=150, Q_v=-0.11, best=-0.09, ubc=0.03)
├── (a=0, N=25, Q_v=-0.11, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.12, best=-0.11, ubc=0.52)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=3, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=5, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=25, Q_v=-0.11, best=-0.10, ubc=0.21)
├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=2, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=3, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=4, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
    ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=2, N=25, Q_v=-0.11, best=-0.10, ubc=0.21)
├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=4, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=25, Q_v=-0.11, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=24, Q_v=-0.11, best=-0.10, ubc=0.21)
├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=2, N=3, Q_v=-0.12, best=-0.11, ubc=0.61)
├── (a=0, N=1, Q_v=-0.14, best=-0.14, ubc=0.61)
└── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.64)
├── (a=3, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=5, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=5, N=25, Q_v=-0.11, best=-0.10, ubc=0.21)
    ├── (a=0, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    ├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    ├── (a=3, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=4, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
        ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
        ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
        └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
[17:05:07] INFO     selected action 0 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0]
(a=0, N=150, Q_v=-0.10, best=-0.09, ubc=0.04)
├── (a=0, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=24, Q_v=-0.11, best=-0.10, ubc=0.22)
├── (a=0, N=3, Q_v=-0.11, best=-0.10, ubc=0.62)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.64)
└── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.61)
├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=4, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=3, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
    ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
        ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
        ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
        └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
           INFO     selected action 0 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0]
(a=0, N=150, Q_v=-0.10, best=-0.09, ubc=0.04)
├── (a=0, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
└── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=24, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=3, Q_v=-0.10, best=-0.10, ubc=0.62)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.64)
└── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.63)
├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=3, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
    ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
        ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
        ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
        └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
           INFO     selected action 1 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1]
(a=1, N=150, Q_v=-0.10, best=-0.09, ubc=0.03)
├── (a=0, N=24, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=3, Q_v=-0.11, best=-0.10, ubc=0.62)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.63)
    └── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.62)
├── (a=1, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=2, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
    ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
        ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
        ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
        └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
           INFO     selected action 5 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5]
(a=5, N=150, Q_v=-0.10, best=-0.08, ubc=0.04)
├── (a=0, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
├── (a=1, N=25, Q_v=-0.10, best=-0.08, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
└── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=3, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=24, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=3, Q_v=-0.11, best=-0.10, ubc=0.62)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.64)
└── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.63)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
    ├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
        ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
        ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
        └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
           INFO     selected action 1 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1]
(a=1, N=150, Q_v=-0.10, best=-0.08, ubc=0.03)
├── (a=0, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=24, Q_v=-0.10, best=-0.08, ubc=0.22)
├── (a=0, N=5, Q_v=-0.10, best=-0.09, ubc=0.46)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=2, N=5, Q_v=-0.11, best=-0.10, ubc=0.46)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
└── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=3, N=5, Q_v=-0.10, best=-0.08, ubc=0.47)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
└── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=4, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=3, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=25, Q_v=-0.10, best=-0.08, ubc=0.21)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.10, best=-0.08, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
    ├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
        ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
        ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
        └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
[17:05:08] INFO     selected action 4 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4]
(a=4, N=150, Q_v=-0.11, best=-0.08, ubc=0.03)
├── (a=0, N=25, Q_v=-0.11, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=3, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=25, Q_v=-0.11, best=-0.09, ubc=0.21)
├── (a=0, N=5, Q_v=-0.11, best=-0.10, ubc=0.45)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=2, N=5, Q_v=-0.11, best=-0.10, ubc=0.45)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=3, N=5, Q_v=-0.11, best=-0.09, ubc=0.46)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
└── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=4, N=5, Q_v=-0.10, best=-0.09, ubc=0.46)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
    ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=25, Q_v=-0.11, best=-0.08, ubc=0.21)
├── (a=0, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=5, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    └── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=3, N=25, Q_v=-0.11, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
    ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=4, N=25, Q_v=-0.11, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=2, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
    ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=5, N=24, Q_v=-0.11, best=-0.09, ubc=0.21)
    ├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
    ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
    ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
    ├── (a=2, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
    ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
    ├── (a=3, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
    ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=4, N=3, Q_v=-0.12, best=-0.11, ubc=0.61)
    ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.62)
    └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.62)
    └── (a=5, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
        ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
        ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
        └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
           INFO     selected action 2 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2]
(a=2, N=150, Q_v=-0.10, best=-0.08, ubc=0.03)
├── (a=0, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=24, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=2, N=5, Q_v=-0.10, best=-0.10, ubc=0.46)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=3, N=5, Q_v=-0.10, best=-0.09, ubc=0.47)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=4, N=5, Q_v=-0.10, best=-0.09, ubc=0.46)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
    ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
    ├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=2, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
        ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
        ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
        └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
           INFO     selected action 3 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3]
(a=3, N=150, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=24, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=3, Q_v=-0.10, best=-0.09, ubc=0.63)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.66)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.64)
├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=4, N=5, Q_v=-0.09, best=-0.08, ubc=0.48)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=4, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
    ├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=3, N=4, Q_v=-0.08, best=-0.08, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
    ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
        ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
        ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
        └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
           INFO     selected action 2 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2]
(a=2, N=150, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=25, Q_v=-0.09, best=-0.09, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=5, Q_v=-0.08, best=-0.08, ubc=0.48)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=5, Q_v=-0.09, best=-0.08, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=4, N=5, Q_v=-0.09, best=-0.08, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=24, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=3, Q_v=-0.09, best=-0.09, ubc=0.64)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.65)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.64)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=25, Q_v=-0.09, best=-0.09, ubc=0.23)
    ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
        ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
        ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
        └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
[17:05:09] INFO     selected action 3 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3]
(a=3, N=150, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=25, Q_v=-0.09, best=-0.09, ubc=0.23)
├── (a=0, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=24, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=3, Q_v=-0.09, best=-0.09, ubc=0.64)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.65)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.64)
├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
    ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
        ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
        ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
        └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
           INFO     selected action 5 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5]
(a=5, N=150, Q_v=-0.09, best=-0.08, ubc=0.04)
├── (a=0, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=24, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=5, Q_v=-0.09, best=-0.08, ubc=0.47)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.82)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.47)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=3, N=5, Q_v=-0.09, best=-0.09, ubc=0.47)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=25, Q_v=-0.09, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
    ├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
        ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
        ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
        └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
           INFO     selected action 5 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5]
(a=5, N=150, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=25, Q_v=-0.09, best=-0.09, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
    └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=25, Q_v=-0.09, best=-0.09, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=25, Q_v=-0.09, best=-0.09, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=24, Q_v=-0.09, best=-0.09, ubc=0.23)
    ├── (a=0, N=3, Q_v=-0.10, best=-0.09, ubc=0.63)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.65)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.64)
    ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
        ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
        ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
        └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
           INFO     selected action 3 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3]
(a=3, N=150, Q_v=-0.09, best=-0.08, ubc=0.04)
├── (a=0, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=24, Q_v=-0.10, best=-0.09, ubc=0.23)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=5, Q_v=-0.10, best=-0.09, ubc=0.47)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
├── (a=3, N=5, Q_v=-0.09, best=-0.09, ubc=0.47)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=5, Q_v=-0.10, best=-0.09, ubc=0.47)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
    ├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
    ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
        ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
        ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
        └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
[17:05:10] INFO     selected action 4 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4]
(a=4, N=150, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=5, Q_v=-0.08, best=-0.08, ubc=0.48)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.82)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=5, Q_v=-0.09, best=-0.08, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.82)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=5, Q_v=-0.09, best=-0.08, ubc=0.48)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
    ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.82)
    └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=24, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=3, Q_v=-0.09, best=-0.09, ubc=0.63)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.65)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.65)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
    ├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
    ├── (a=1, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
    └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
    └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
        ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
        ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
        └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
           INFO     selected action 1 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1]
(a=1, N=150, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=30, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.50)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=30, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=5, Q_v=-0.09, best=-0.09, ubc=0.50)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=30, Q_v=-0.09, best=-0.09, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.50)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
    └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=4, N=29, Q_v=-0.09, best=-0.08, ubc=0.21)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=30, Q_v=-0.09, best=-0.08, ubc=0.20)
    ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.50)
        ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
        ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
        ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
        └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
           INFO     selected action 0 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0]
(a=0, N=155, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=31, Q_v=-0.09, best=-0.09, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=30, Q_v=-0.09, best=-0.08, ubc=0.20)
    ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
    └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
        ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
           INFO     selected action 2 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2]
(a=2, N=156, Q_v=-0.09, best=-0.08, ubc=0.04)
├── (a=0, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=2, N=31, Q_v=-0.10, best=-0.09, ubc=0.19)
├── (a=0, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=4, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
└── (a=5, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=4, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
├── (a=0, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=3, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
└── (a=5, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
    ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
    ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
    ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
    ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
    └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
        ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
        ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
        ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
        └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
[17:05:11] INFO     selected action 0 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0]
(a=0, N=156, Q_v=-0.10, best=-0.08, ubc=0.04)
├── (a=0, N=31, Q_v=-0.10, best=-0.08, ubc=0.19)
├── (a=0, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=6, Q_v=-0.10, best=-0.10, ubc=0.43)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=31, Q_v=-0.10, best=-0.08, ubc=0.19)
├── (a=0, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=6, Q_v=-0.10, best=-0.10, ubc=0.43)
├── (a=0, N=2, Q_v=-0.10, best=-0.10, ubc=0.57)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=31, Q_v=-0.10, best=-0.08, ubc=0.19)
├── (a=0, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=31, Q_v=-0.10, best=-0.08, ubc=0.19)
├── (a=0, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=31, Q_v=-0.10, best=-0.08, ubc=0.19)
    ├── (a=0, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    └── (a=5, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
        ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
        ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
        ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
        ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
        └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
           INFO     selected action 5 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5]
(a=5, N=156, Q_v=-0.09, best=-0.08, ubc=0.04)
├── (a=0, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
    ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
        ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
        ├── (a=2, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
        ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
           INFO     selected action 2 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2]
(a=2, N=156, Q_v=-0.09, best=-0.08, ubc=0.04)
├── (a=0, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=2, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
├── (a=0, N=7, Q_v=-0.10, best=-0.08, ubc=0.40)
├── (a=0, N=2, Q_v=-0.10, best=-0.10, ubc=0.60)
├── (a=3, N=2, Q_v=-0.09, best=-0.08, ubc=0.61)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.89)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.89)
├── (a=3, N=7, Q_v=-0.09, best=-0.08, ubc=0.40)
├── (a=0, N=2, Q_v=-0.10, best=-0.10, ubc=0.60)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.89)
├── (a=4, N=2, Q_v=-0.09, best=-0.08, ubc=0.61)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.89)
├── (a=4, N=8, Q_v=-0.09, best=-0.08, ubc=0.37)
├── (a=0, N=2, Q_v=-0.10, best=-0.10, ubc=0.62)
├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
├── (a=4, N=2, Q_v=-0.09, best=-0.08, ubc=0.63)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.92)
└── (a=5, N=8, Q_v=-0.09, best=-0.08, ubc=0.37)
    ├── (a=0, N=2, Q_v=-0.09, best=-0.08, ubc=0.63)
    ├── (a=3, N=2, Q_v=-0.09, best=-0.08, ubc=0.63)
    ├── (a=4, N=2, Q_v=-0.09, best=-0.08, ubc=0.63)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.92)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
    ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
        ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
        ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
        ├── (a=3, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
        └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
[17:05:12] INFO     selected action 4 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2,
                    4]
(a=4, N=156, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=2, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=7, Q_v=-0.09, best=-0.09, ubc=0.40)
├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.60)
├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.60)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.89)
├── (a=3, N=8, Q_v=-0.09, best=-0.08, ubc=0.38)
├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.64)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
├── (a=4, N=7, Q_v=-0.09, best=-0.09, ubc=0.41)
├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
└── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
└── (a=5, N=8, Q_v=-0.09, best=-0.08, ubc=0.38)
    ├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
    ├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.64)
    ├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
    ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
    ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
    ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
        ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
        └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
           INFO     selected action 3 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2,
                    4, 3]
(a=3, N=156, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=2, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=2, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
├── (a=3, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=4, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=7, Q_v=-0.09, best=-0.09, ubc=0.41)
├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.89)
├── (a=3, N=8, Q_v=-0.09, best=-0.08, ubc=0.38)
├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.64)
├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
├── (a=4, N=8, Q_v=-0.09, best=-0.09, ubc=0.38)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.64)
└── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
└── (a=5, N=7, Q_v=-0.09, best=-0.09, ubc=0.41)
    ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.89)
    ├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
    ├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
    ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
    ├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
        ├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
        ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
           INFO     selected action 4 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2,
                    4, 3, 4]
(a=4, N=156, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=8, Q_v=-0.09, best=-0.08, ubc=0.38)
├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
├── (a=3, N=8, Q_v=-0.09, best=-0.08, ubc=0.38)
├── (a=0, N=2, Q_v=-0.09, best=-0.08, ubc=0.64)
├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
└── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
├── (a=4, N=7, Q_v=-0.09, best=-0.08, ubc=0.41)
├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
└── (a=5, N=7, Q_v=-0.09, best=-0.09, ubc=0.41)
    ├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
    ├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.58)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.58)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.58)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
    ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
        ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        └── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
           INFO     selected action 0 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2,
                    4, 3, 4, 0]
(a=0, N=156, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=31, Q_v=-0.08, best=-0.08, ubc=0.20)
├── (a=2, N=7, Q_v=-0.08, best=-0.08, ubc=0.41)
├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
└── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
├── (a=3, N=7, Q_v=-0.08, best=-0.08, ubc=0.41)
├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
└── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
├── (a=4, N=8, Q_v=-0.08, best=-0.08, ubc=0.38)
├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
└── (a=5, N=8, Q_v=-0.08, best=-0.08, ubc=0.38)
    ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
    ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
    ├── (a=4, N=2, Q_v=-0.09, best=-0.08, ubc=0.64)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
├── (a=2, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=8, Q_v=-0.08, best=-0.08, ubc=0.38)
├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.51)
├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
└── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
├── (a=3, N=8, Q_v=-0.08, best=-0.08, ubc=0.38)
├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
├── (a=4, N=7, Q_v=-0.09, best=-0.08, ubc=0.41)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
└── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
└── (a=5, N=7, Q_v=-0.09, best=-0.08, ubc=0.41)
    ├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
    └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.58)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
    ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.58)
    ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
    ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
    └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
    └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
        ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
        ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.58)
        └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
[17:05:13] INFO     selected action 0 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2,
                    4, 3, 4, 0, 0]
(a=0, N=156, Q_v=-0.08, best=-0.08, ubc=0.05)
├── (a=2, N=38, Q_v=-0.08, best=-0.08, ubc=0.17)
├── (a=3, N=12, Q_v=-0.08, best=-0.08, ubc=0.31)
├── (a=3, N=4, Q_v=-0.08, best=-0.08, ubc=0.47)
├── (a=4, N=4, Q_v=-0.08, best=-0.08, ubc=0.47)
└── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.56)
├── (a=4, N=12, Q_v=-0.08, best=-0.08, ubc=0.31)
├── (a=3, N=4, Q_v=-0.08, best=-0.08, ubc=0.47)
├── (a=4, N=4, Q_v=-0.08, best=-0.08, ubc=0.47)
└── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.56)
└── (a=5, N=13, Q_v=-0.08, best=-0.08, ubc=0.29)
    ├── (a=3, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
    ├── (a=4, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
    └── (a=5, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
├── (a=3, N=39, Q_v=-0.08, best=-0.08, ubc=0.17)
├── (a=2, N=10, Q_v=-0.08, best=-0.08, ubc=0.34)
├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
└── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
├── (a=3, N=9, Q_v=-0.08, best=-0.08, ubc=0.37)
├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
└── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
├── (a=4, N=9, Q_v=-0.08, best=-0.08, ubc=0.37)
├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
└── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
└── (a=5, N=10, Q_v=-0.08, best=-0.08, ubc=0.34)
    ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.68)
    ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.68)
    ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
    └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
├── (a=4, N=39, Q_v=-0.08, best=-0.08, ubc=0.17)
├── (a=2, N=10, Q_v=-0.08, best=-0.08, ubc=0.34)
├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
└── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
├── (a=3, N=10, Q_v=-0.08, best=-0.08, ubc=0.34)
├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.68)
├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.68)
├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
└── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.68)
├── (a=4, N=9, Q_v=-0.08, best=-0.08, ubc=0.37)
├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
└── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
└── (a=5, N=9, Q_v=-0.08, best=-0.08, ubc=0.37)
    ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
    ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
    ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
    └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
└── (a=5, N=39, Q_v=-0.08, best=-0.08, ubc=0.17)
    ├── (a=2, N=10, Q_v=-0.08, best=-0.08, ubc=0.34)
    ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
    ├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
    └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
    ├── (a=3, N=10, Q_v=-0.08, best=-0.08, ubc=0.34)
    ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.68)
    ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.68)
    ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
    └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
    ├── (a=4, N=9, Q_v=-0.08, best=-0.08, ubc=0.37)
    ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
    ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
    ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
    └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
    └── (a=5, N=9, Q_v=-0.08, best=-0.08, ubc=0.37)
        ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
        ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
        └── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
           INFO     selected action 4 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2,
                    4, 3, 4, 0, 0, 4]
(a=4, N=164, Q_v=-0.08, best=-0.08, ubc=0.05)
├── (a=2, N=41, Q_v=-0.08, best=-0.08, ubc=0.17)
├── (a=3, N=13, Q_v=-0.08, best=-0.08, ubc=0.29)
├── (a=3, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
├── (a=4, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
└── (a=5, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
├── (a=4, N=13, Q_v=-0.08, best=-0.08, ubc=0.29)
├── (a=3, N=6, Q_v=-0.08, best=-0.08, ubc=0.38)
└── (a=5, N=6, Q_v=-0.08, best=-0.08, ubc=0.38)
└── (a=5, N=14, Q_v=-0.08, best=-0.08, ubc=0.28)
    ├── (a=3, N=5, Q_v=-0.08, best=-0.08, ubc=0.43)
    ├── (a=4, N=4, Q_v=-0.08, best=-0.08, ubc=0.49)
    └── (a=5, N=4, Q_v=-0.08, best=-0.08, ubc=0.49)
├── (a=3, N=41, Q_v=-0.08, best=-0.08, ubc=0.17)
├── (a=2, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
└── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
├── (a=3, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
├── (a=2, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
└── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
├── (a=4, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
├── (a=2, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
└── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
└── (a=5, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
    ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
    ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
    ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
    └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
├── (a=4, N=41, Q_v=-0.08, best=-0.08, ubc=0.16)
├── (a=2, N=13, Q_v=-0.08, best=-0.08, ubc=0.29)
├── (a=3, N=6, Q_v=-0.08, best=-0.08, ubc=0.38)
└── (a=5, N=6, Q_v=-0.08, best=-0.08, ubc=0.38)
├── (a=3, N=14, Q_v=-0.08, best=-0.08, ubc=0.28)
├── (a=2, N=5, Q_v=-0.08, best=-0.08, ubc=0.43)
├── (a=3, N=4, Q_v=-0.08, best=-0.08, ubc=0.49)
└── (a=5, N=4, Q_v=-0.08, best=-0.08, ubc=0.49)
└── (a=5, N=13, Q_v=-0.08, best=-0.08, ubc=0.29)
    ├── (a=2, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
    ├── (a=3, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
    └── (a=5, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
└── (a=5, N=40, Q_v=-0.08, best=-0.08, ubc=0.17)
    ├── (a=2, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
    ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
    ├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
    └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
    ├── (a=3, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
    ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
    ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
    ├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
    └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
    ├── (a=4, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
    ├── (a=2, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
    ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
    └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
    └── (a=5, N=9, Q_v=-0.08, best=-0.08, ubc=0.37)
        ├── (a=2, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
        ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
        └── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
           INFO     selected action 3 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2,
                    4, 3, 4, 0, 0, 4, 3]
(a=3, N=166, Q_v=-0.08, best=-0.08, ubc=0.05)
├── (a=2, N=41, Q_v=-0.08, best=-0.08, ubc=0.16)
├── (a=3, N=13, Q_v=-0.09, best=-0.08, ubc=0.29)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
├── (a=4, N=14, Q_v=-0.08, best=-0.08, ubc=0.28)
├── (a=3, N=7, Q_v=-0.09, best=-0.08, ubc=0.35)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
└── (a=5, N=13, Q_v=-0.09, best=-0.08, ubc=0.29)
    ├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.48)
    ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.48)
    └── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.48)
├── (a=3, N=41, Q_v=-0.08, best=-0.08, ubc=0.16)
├── (a=2, N=13, Q_v=-0.09, best=-0.08, ubc=0.29)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
├── (a=4, N=13, Q_v=-0.09, best=-0.08, ubc=0.29)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
└── (a=5, N=14, Q_v=-0.08, best=-0.08, ubc=0.28)
    ├── (a=2, N=5, Q_v=-0.09, best=-0.08, ubc=0.43)
    ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.49)
    └── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.49)
├── (a=4, N=42, Q_v=-0.08, best=-0.08, ubc=0.16)
├── (a=2, N=13, Q_v=-0.09, best=-0.08, ubc=0.29)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
├── (a=3, N=14, Q_v=-0.08, best=-0.08, ubc=0.28)
├── (a=2, N=7, Q_v=-0.09, best=-0.08, ubc=0.35)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
└── (a=5, N=14, Q_v=-0.09, best=-0.08, ubc=0.28)
    ├── (a=2, N=5, Q_v=-0.09, best=-0.08, ubc=0.43)
    ├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.49)
    └── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.49)
└── (a=5, N=41, Q_v=-0.08, best=-0.08, ubc=0.16)
    ├── (a=2, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
    ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
    ├── (a=4, N=3, Q_v=-0.09, best=-0.08, ubc=0.53)
    └── (a=5, N=3, Q_v=-0.09, best=-0.09, ubc=0.53)
    ├── (a=3, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
    ├── (a=2, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
    ├── (a=4, N=3, Q_v=-0.09, best=-0.08, ubc=0.53)
    └── (a=5, N=3, Q_v=-0.09, best=-0.09, ubc=0.53)
    ├── (a=4, N=10, Q_v=-0.09, best=-0.08, ubc=0.35)
    ├── (a=2, N=3, Q_v=-0.09, best=-0.08, ubc=0.53)
    ├── (a=3, N=3, Q_v=-0.09, best=-0.09, ubc=0.53)
    └── (a=5, N=3, Q_v=-0.09, best=-0.08, ubc=0.53)
    └── (a=5, N=10, Q_v=-0.09, best=-0.08, ubc=0.35)
        ├── (a=2, N=3, Q_v=-0.09, best=-0.08, ubc=0.53)
        ├── (a=3, N=3, Q_v=-0.09, best=-0.09, ubc=0.53)
        └── (a=4, N=3, Q_v=-0.09, best=-0.08, ubc=0.53)
[17:05:14] INFO     selected action 4 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2,
                    4, 3, 4, 0, 0, 4, 3, 4]
(a=4, N=167, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=2, N=55, Q_v=-0.09, best=-0.08, ubc=0.13)
├── (a=3, N=27, Q_v=-0.09, best=-0.08, ubc=0.19)
└── (a=5, N=26, Q_v=-0.09, best=-0.09, ubc=0.17)
└── (a=5, N=27, Q_v=-0.09, best=-0.08, ubc=0.19)
    ├── (a=3, N=13, Q_v=-0.09, best=-0.09, ubc=0.27)
    └── (a=5, N=13, Q_v=-0.09, best=-0.08, ubc=0.27)
├── (a=3, N=56, Q_v=-0.09, best=-0.08, ubc=0.13)
├── (a=2, N=28, Q_v=-0.09, best=-0.08, ubc=0.18)
└── (a=5, N=27, Q_v=-0.09, best=-0.08, ubc=0.16)
└── (a=5, N=27, Q_v=-0.09, best=-0.08, ubc=0.19)
    ├── (a=2, N=13, Q_v=-0.09, best=-0.09, ubc=0.27)
    └── (a=5, N=13, Q_v=-0.09, best=-0.09, ubc=0.27)
└── (a=5, N=55, Q_v=-0.09, best=-0.08, ubc=0.13)
    ├── (a=2, N=18, Q_v=-0.09, best=-0.08, ubc=0.25)
    ├── (a=3, N=8, Q_v=-0.09, best=-0.09, ubc=0.34)
    └── (a=5, N=9, Q_v=-0.09, best=-0.09, ubc=0.32)
    ├── (a=3, N=18, Q_v=-0.09, best=-0.08, ubc=0.25)
    ├── (a=2, N=9, Q_v=-0.09, best=-0.09, ubc=0.32)
    └── (a=5, N=8, Q_v=-0.09, best=-0.09, ubc=0.34)
    └── (a=5, N=18, Q_v=-0.09, best=-0.08, ubc=0.25)
        ├── (a=2, N=8, Q_v=-0.09, best=-0.09, ubc=0.34)
        └── (a=3, N=9, Q_v=-0.09, best=-0.09, ubc=0.32)
           INFO     selected action 3 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2,
                    4, 3, 4, 0, 0, 4, 3, 4, 3]
(a=3, N=181, Q_v=-0.09, best=-0.08, ubc=0.04)
├── (a=2, N=90, Q_v=-0.09, best=-0.08, ubc=0.08)
└── (a=5, N=89, Q_v=-0.09, best=-0.08, ubc=0.07)
    └── (a=5, N=88, Q_v=-0.09, best=-0.09, ubc=0.07)
└── (a=5, N=90, Q_v=-0.09, best=-0.08, ubc=0.08)
    ├── (a=2, N=44, Q_v=-0.09, best=-0.09, ubc=0.14)
    └── (a=5, N=43, Q_v=-0.09, best=-0.09, ubc=0.12)
    └── (a=5, N=45, Q_v=-0.09, best=-0.09, ubc=0.14)
        └── (a=2, N=44, Q_v=-0.09, best=-0.09, ubc=0.12)
           INFO     selected action 2 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2,
                    4, 3, 4, 0, 0, 4, 3, 4, 3, 2]
(a=2, N=215, Q_v=-0.09, best=-0.08, ubc=0.03)
└── (a=5, N=214, Q_v=-0.09, best=-0.08, ubc=0.03)
    └── (a=5, N=213, Q_v=-0.09, best=-0.09, ubc=0.03)
           INFO     selected action 5 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2,
                    4, 3, 4, 0, 0, 4, 3, 4, 3, 2, 5]
(a=5, N=339, Q_v=-0.09, best=-0.08, ubc=0.01)
└── (a=5, N=338, Q_v=-0.09, best=-0.09, ubc=0.01)
           INFO     selected action 5 after 125 simulations.
           INFO     current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2,
                    4, 3, 4, 0, 0, 4, 3, 4, 3, 2, 5, 5]
           INFO     Final action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2, 4,
                    3, 4, 0, 0, 4, 3, 4, 3, 2, 5, 5]
         ╔═══════════════════════════════════════════════════════╗
Job 0    ║     ║ Machine 0   
Job 1    ║║ Machine 1   
Job 2    ║║ Machine 2   
Job 3    ║          ║ Machine 3   
Job 4    ║            ║ Machine 4   
Job 5    ║        ║ Machine 5   
         ╚╦════╤════╤════╤════╤════╦════╤════╤════╤════╤════╦════╝
          0.0                      27.7                     55.5
Makespan: 60