minimal-jsp-env (Action History Wrapper)¶
[1]:
from collections import namedtuple
from copy import deepcopy, copy
[2]:
from gymnasium.spaces import Discrete
from jsp_instance_utils.instances import ft06
from jsp_vis.console import gantt_chart_console
[3]:
from gymcts.gymcts_agent import GymctsAgent
from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
from gymnasium.wrappers import TransformReward, NormalizeReward
from gymcts.logger import log
[4]:
import gymnasium as gym
[5]:
import pandas as pd
import numpy as np
[6]:
Operation = namedtuple("Operation", ["job_id", "op_id", "unique_op_id", "machine_type", "duration"])
[7]:
import copy
import random
[8]:
class JSPInstance:
def __init__(
self,
jobs: list,
num_ops_per_job: int = None,
max_op_time: int = None,
num_machines: int = None,
id: str = None,
opt_time: float = None,
spt_time: float = None,
intra_instance_op_entropy=None
):
self.jobs = jobs
self.num_jobs = len(jobs)
self.num_ops_per_job = num_ops_per_job # todo infer if not given
self.max_op_time = max_op_time # todo infer if not given
self.num_machines = num_machines if num_machines else num_ops_per_job
self.id = id
self.spt_time = spt_time
self.opt_time = opt_time
self.intra_instance_op_entropy = intra_instance_op_entropy
[9]:
def jsp_instance_adapter(jsp_instance):
_, n_jobs, n_machines = jsp_instance.shape
machine_order = jsp_instance[0]
processing_times = jsp_instance[1]
"""
Generates jobs consisting of operations with random durations and orders in which to be carried out,
and returns a JSPInstance based on these jobs
"""
jobs = []
unique_op_id = 0
max_op_duration = np.max(processing_times)
for i in range(0, n_jobs):
operations = []
for j in range(0, n_machines):
duration = processing_times[i, j]
machine_type = machine_order[i, j]
operations.append(Operation(i, j, unique_op_id, machine_type, duration))
unique_op_id += 1
jobs.append(operations)
return JSPInstance(jobs, num_ops_per_job=n_machines, num_machines=n_machines,
max_op_time=max_op_duration)
[10]:
class JobShopModel():
def __init__(self, **kwargs):
pass
@staticmethod
def random_problem(num_jobs, num_machines, max_duration=10):
remaining_operations = []
op_id = 0
for j in range(num_jobs):
job = []
for m in range(num_machines):
job.append(
Operation(j, m, op_id, random.randint(0, num_machines - 1), random.randint(0, max_duration - 1)))
op_id += 1
remaining_operations.append(job)
schedule = [[] for i in range(num_machines)]
last_job_ops = [-1 for _ in range(num_jobs)]
return {'remaining_operations': remaining_operations, 'schedule': schedule, 'last_job_ops': last_job_ops}
@staticmethod
def _schedule_op(job_id, remaining_operations, schedule):
possible = False
if len(remaining_operations[job_id]) > 0:
op = remaining_operations[job_id].pop(0)
machine = op.machine_type
start_time = JobShopModel._determine_start_time(op, schedule)
schedule[machine].append((op, start_time, start_time + op.duration))
possible = True
return remaining_operations, schedule, possible
@staticmethod
def _schedule_op(job_id, remaining_operations, schedule, last_job_ops):
possible = False
if len(remaining_operations[job_id]) > 0:
possible = True
op = remaining_operations[job_id].pop(0)
machine = op.machine_type
start_time = JobShopModel._last_op_end(last_job_ops, op)
machine_schedule = schedule[op.machine_type]
if len(machine_schedule) == 0:
schedule[machine].append((op, start_time, start_time + op.duration))
last_job_ops[op.job_id] = start_time + op.duration
return remaining_operations, schedule, last_job_ops, possible
left_shift, left_shift_time, insertion_index = JobShopModel._left_shift_possible(start_time,
machine_schedule,
op.duration)
if left_shift:
schedule[machine].insert(insertion_index, (op, left_shift_time, left_shift_time + op.duration))
new_time = left_shift_time + op.duration
last_job_ops[op.job_id] = new_time if new_time > last_job_ops[op.job_id] else last_job_ops[op.job_id]
else:
last_op, start, end = machine_schedule[-1]
if end > start_time:
start_time = end
schedule[machine].append((op, start_time, start_time + op.duration))
last_job_ops[op.job_id] = start_time + op.duration
return remaining_operations, schedule, last_job_ops, possible
@staticmethod
def _left_shift_possible(earliest_start, machine_schedule, op_duration):
if earliest_start < 0:
earliest_start = 0
last_end = earliest_start
for index, (op, start_time, end_time) in enumerate(machine_schedule):
if end_time < last_end:
continue
if (start_time - last_end) >= op_duration:
return True, last_end, index
last_end = end_time
return False, -1, -1
@staticmethod
def _last_op_end(last_job_ops, op: Operation):
start_time = 0
if last_job_ops[op.job_id] > 0:
start_time = last_job_ops[op.job_id]
return start_time
@staticmethod
def _is_done(remaining_operations):
for j in remaining_operations:
if len(j) > 0: return False
return True
@staticmethod
def _makespan(schedule):
makespan = 0
for machine, machine_schedule in enumerate(schedule):
if len(machine_schedule) > 0:
_, _, end_time = machine_schedule[-1]
if end_time > makespan:
makespan = end_time
return makespan
@staticmethod
def step(state, action):
remaining_ops, schedule, last_job_ops, possible = JobShopModel._schedule_op(action,
state['remaining_operations'],
state['schedule'],
state['last_job_ops'])
reward = 0
if not possible: reward = -1
done = JobShopModel._is_done(remaining_ops)
if done:
reward = - JobShopModel._makespan(schedule)
return {'remaining_operations': remaining_ops, 'schedule': schedule, 'last_job_ops': last_job_ops}, reward, done
@staticmethod
def legal_actions(state):
return [job_id for job_id in range(len(state['remaining_operations'])) if
len(state['remaining_operations'][job_id]) > 0]
[11]:
class JobShopEnv(gym.Env):
def __init__(self, jsp_instance: JSPInstance, **kwargs):
self.model = JobShopModel()
self._jsp_instance = jsp_instance
self._initial_jsp_instance = copy.copy(jsp_instance)
self.set_instance(instance=self._initial_jsp_instance)
# Define the space for an Operation
operation_space = gym.spaces.Tuple((
gym.spaces.Discrete(np.iinfo(np.int32).max), # job_id
gym.spaces.Discrete(np.iinfo(np.int32).max), # op_id
gym.spaces.Discrete(np.iinfo(np.int32).max), # unique_op_id
gym.spaces.Discrete(np.iinfo(np.int32).max), # machine_type
gym.spaces.Discrete(np.iinfo(np.int32).max) # duration
))
# Define the space for a ScheduledOperation
scheduled_operation_space = gym.spaces.Tuple((
operation_space, # Operation
gym.spaces.Discrete(np.iinfo(np.int32).max), # start_time
gym.spaces.Discrete(np.iinfo(np.int32).max) # end_time
))
observation_space = gym.spaces.Dict({
'remaining_operations': gym.spaces.Tuple([
operation_space for _ in range(self._jsp_instance.num_jobs * self._jsp_instance.num_machines)
]),
'last_job_ops': gym.spaces.Tuple([
gym.spaces.Discrete(2) for _ in range(6)
]),
'schedule': gym.spaces.Tuple([
gym.spaces.Tuple([scheduled_operation_space for _ in range(self._jsp_instance.num_machines)]) for _ in
range(self._jsp_instance.num_jobs)
])
})
self.observation_space = observation_space
self.action_space = Discrete(6)
self.reset()
def set_instance(self, instance):
self.done = False
self.steps = 0
self.instance = instance
self.ops_per_job = self.instance.num_ops_per_job
self.num_machines = self.instance.num_ops_per_job
self.max_op_duration = self.instance.max_op_time
self.num_jobs = self.instance.num_jobs
schedule = [[] for _ in range(self.num_machines)]
last_job_ops = [-1 for _ in range(self.num_jobs)]
s_ = {'remaining_operations': deepcopy(self.instance.jobs), 'schedule': schedule,
'last_job_ops': last_job_ops}
self.state = s_
return self.state
def reset(self, **kwargs):
self.done = False
self.steps = 0
self.set_instance(self._initial_jsp_instance)
return self.state, {}
def set_state(self, state):
self.state = state
if len(state['remaining_operations']) > 0:
self.done = False
def step(self, action):
self.state, reward, self.done = self.model.step(self.state, action)
self.steps += 1
return self.state, reward, self.done, False, {}
def render(self):
allocation = []
latest_finish_time = 0
for mache_ops in self.state['schedule']:
if mache_ops and len(mache_ops):
for ops_elem, start_time, finish_time in mache_ops:
entry = {
'Task': f'Job {ops_elem.job_id}',
'Start': start_time,
'Finish': finish_time,
'Resource': f'Machine {ops_elem.machine_type}'
}
latest_finish_time = max(finish_time, latest_finish_time)
allocation.append(entry)
df = pd.DataFrame(allocation)
num_of_machines = self._jsp_instance.num_machines
gantt_chart_console(df, num_of_machines)
print(f'Makespan: {latest_finish_time}')
def raw_state(self):
return self.state
def current_instance(self):
return self.instance
def max_num_actions(self):
return len(self.state['remaining_operations'])
def current_num_steps(self) -> int:
return self.steps
def get_legal_action_mask(self) -> list[bool]:
legal_action = self.model.legal_actions(self.state)
legal_action_mask = [False for _ in range(self.action_space.n)]
for action in legal_action:
legal_action_mask[action] = True
return legal_action_mask
[12]:
if __name__ == '__main__':
log.setLevel(20)
mk_jsp_instance = jsp_instance_adapter(ft06)
env = JobShopEnv(
jsp_instance=mk_jsp_instance,
)
env.reset()
env = NormalizeReward(env, gamma=0.99, epsilon=1e-8)
env = TransformReward(env, lambda r: r / 36)
def mask_fn(env: gym.Env) -> np.ndarray:
# Do whatever you'd like in this function to return the action mask
# for the current env. In this example, we assume the env has a
# helpful method we can rely on.
return env.unwrapped.get_legal_action_mask()
env = ActionHistoryMCTSGymEnvWrapper(
env,
action_mask_fn=mask_fn
)
agent = GymctsAgent(
env=env,
clear_mcts_tree_after_step=False,
render_tree_after_step=True,
exclude_unvisited_nodes_from_render=True,
number_of_simulations_per_step=125,
)
root = agent.search_root_node.get_root()
actions = agent.solve(render_tree_after_step=True)
env.reset()
for a in actions:
obs, rew, term, trun, info = env.step(a)
env.unwrapped.render()
(N=125, Q_v=-0.13, best=-0.06)
├── (a=0, N=21, Q_v=-0.13, best=-0.07, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.14, best=-0.08, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.14, best=-0.08, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=2, N=3, Q_v=-0.13, best=-0.08, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.63)
│ ├── (a=3, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
│ ├── (a=4, N=3, Q_v=-0.14, best=-0.09, ubc=0.57)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
│ └── (a=5, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
│ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ └── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.61)
├── (a=1, N=21, Q_v=-0.13, best=-0.06, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.14, best=-0.08, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ └── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=1, N=4, Q_v=-0.13, best=-0.06, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=2, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.62)
│ ├── (a=3, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
│ │ ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.61)
│ │ └── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ ├── (a=4, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=1, N=1, Q_v=-0.16, best=-0.16, ubc=0.58)
│ └── (a=5, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ └── (a=5, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
├── (a=2, N=21, Q_v=-0.13, best=-0.07, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.14, best=-0.08, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=4, Q_v=-0.14, best=-0.08, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ ├── (a=2, N=3, Q_v=-0.13, best=-0.08, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.63)
│ ├── (a=3, N=3, Q_v=-0.14, best=-0.08, ubc=0.58)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=3, N=1, Q_v=-0.15, best=-0.15, ubc=0.60)
│ ├── (a=4, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=4, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
│ └── (a=5, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
│ ├── (a=0, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
│ └── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
├── (a=3, N=21, Q_v=-0.13, best=-0.07, ubc=0.21)
│ ├── (a=0, N=3, Q_v=-0.13, best=-0.08, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.63)
│ ├── (a=1, N=4, Q_v=-0.14, best=-0.08, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=2, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=1, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
│ ├── (a=3, N=3, Q_v=-0.14, best=-0.08, ubc=0.58)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
│ ├── (a=4, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=3, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
│ └── (a=5, N=4, Q_v=-0.14, best=-0.08, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ └── (a=2, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
├── (a=4, N=20, Q_v=-0.14, best=-0.07, ubc=0.21)
│ ├── (a=0, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=3, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
│ ├── (a=1, N=3, Q_v=-0.13, best=-0.08, ubc=0.57)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
│ ├── (a=2, N=4, Q_v=-0.14, best=-0.08, ubc=0.47)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ │ └── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=3, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=1, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
│ ├── (a=4, N=3, Q_v=-0.15, best=-0.10, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ │ └── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ └── (a=5, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ └── (a=5, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
└── (a=5, N=20, Q_v=-0.13, best=-0.07, ubc=0.22)
├── (a=0, N=4, Q_v=-0.14, best=-0.08, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ ├── (a=1, N=1, Q_v=-0.18, best=-0.18, ubc=0.65)
│ └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ └── (a=2, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
├── (a=2, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
│ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ └── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.61)
├── (a=3, N=3, Q_v=-0.13, best=-0.08, ubc=0.58)
│ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ └── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.62)
├── (a=4, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
│ ├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
│ └── (a=5, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
└── (a=5, N=3, Q_v=-0.14, best=-0.08, ubc=0.57)
├── (a=0, N=1, Q_v=-0.18, best=-0.18, ubc=0.56)
└── (a=2, N=1, Q_v=-0.14, best=-0.14, ubc=0.60)
[17:05:06] INFO selected action 1 after 125 simulations.
INFO current action list: [1]
(a=1, N=146, Q_v=-0.15, best=-0.06, ubc=-0.01)
├── (a=0, N=24, Q_v=-0.15, best=-0.08, ubc=0.18)
│ ├── (a=0, N=3, Q_v=-0.16, best=-0.15, ubc=0.57)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.60)
│ │ └── (a=5, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
│ ├── (a=1, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
│ ├── (a=2, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ ├── (a=3, N=4, Q_v=-0.15, best=-0.15, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=5, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ ├── (a=4, N=4, Q_v=-0.13, best=-0.12, ubc=0.50)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ └── (a=5, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.14, best=-0.14, ubc=0.70)
│ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=25, Q_v=-0.14, best=-0.06, ubc=0.17)
│ ├── (a=0, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ ├── (a=1, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ ├── (a=2, N=4, Q_v=-0.13, best=-0.08, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ ├── (a=3, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ ├── (a=4, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
│ └── (a=5, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ └── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.70)
├── (a=2, N=24, Q_v=-0.15, best=-0.08, ubc=0.18)
│ ├── (a=0, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=5, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ ├── (a=1, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=5, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
│ ├── (a=2, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ ├── (a=3, N=4, Q_v=-0.13, best=-0.12, ubc=0.50)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=4, N=3, Q_v=-0.16, best=-0.15, ubc=0.56)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.60)
│ │ └── (a=4, N=1, Q_v=-0.17, best=-0.17, ubc=0.57)
│ └── (a=5, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
│ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=3, N=24, Q_v=-0.15, best=-0.08, ubc=0.17)
│ ├── (a=0, N=4, Q_v=-0.14, best=-0.13, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
│ ├── (a=1, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ ├── (a=2, N=3, Q_v=-0.16, best=-0.15, ubc=0.56)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.60)
│ │ └── (a=4, N=1, Q_v=-0.17, best=-0.17, ubc=0.58)
│ ├── (a=3, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=2, N=1, Q_v=-0.14, best=-0.14, ubc=0.70)
│ ├── (a=4, N=4, Q_v=-0.15, best=-0.15, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ └── (a=5, N=4, Q_v=-0.15, best=-0.12, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=4, N=24, Q_v=-0.15, best=-0.08, ubc=0.17)
│ ├── (a=0, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ ├── (a=1, N=4, Q_v=-0.14, best=-0.13, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ ├── (a=2, N=4, Q_v=-0.15, best=-0.12, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ ├── (a=3, N=4, Q_v=-0.16, best=-0.15, ubc=0.47)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ │ └── (a=4, N=1, Q_v=-0.16, best=-0.16, ubc=0.67)
│ ├── (a=4, N=3, Q_v=-0.16, best=-0.15, ubc=0.56)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.60)
│ │ └── (a=1, N=1, Q_v=-0.17, best=-0.17, ubc=0.57)
│ └── (a=5, N=4, Q_v=-0.15, best=-0.15, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ └── (a=5, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=5, N=24, Q_v=-0.15, best=-0.08, ubc=0.17)
├── (a=0, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=1, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ └── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
├── (a=2, N=4, Q_v=-0.15, best=-0.13, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ └── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
├── (a=3, N=4, Q_v=-0.16, best=-0.15, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
│ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
├── (a=4, N=3, Q_v=-0.16, best=-0.15, ubc=0.57)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.60)
│ └── (a=3, N=1, Q_v=-0.16, best=-0.16, ubc=0.58)
└── (a=5, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.69)
INFO selected action 1 after 125 simulations.
INFO current action list: [1, 1]
(a=1, N=150, Q_v=-0.14, best=-0.06, ubc=-0.01)
├── (a=0, N=25, Q_v=-0.14, best=-0.11, ubc=0.17)
│ ├── (a=0, N=4, Q_v=-0.14, best=-0.13, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ ├── (a=1, N=4, Q_v=-0.15, best=-0.13, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=2, N=4, Q_v=-0.14, best=-0.13, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=5, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ ├── (a=3, N=4, Q_v=-0.14, best=-0.11, ubc=0.50)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=4, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=4, Q_v=-0.14, best=-0.13, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ └── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
├── (a=1, N=25, Q_v=-0.14, best=-0.10, ubc=0.18)
│ ├── (a=0, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=1, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=2, N=4, Q_v=-0.14, best=-0.11, ubc=0.50)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=3, N=4, Q_v=-0.14, best=-0.10, ubc=0.50)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.13, best=-0.10, ubc=0.50)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.14, best=-0.13, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=2, N=25, Q_v=-0.14, best=-0.08, ubc=0.18)
│ ├── (a=0, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=1, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=2, N=4, Q_v=-0.13, best=-0.10, ubc=0.50)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.15, best=-0.14, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=2, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
│ ├── (a=4, N=4, Q_v=-0.15, best=-0.13, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ └── (a=5, N=4, Q_v=-0.14, best=-0.13, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ └── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
├── (a=3, N=25, Q_v=-0.14, best=-0.10, ubc=0.17)
│ ├── (a=0, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=1, N=4, Q_v=-0.14, best=-0.10, ubc=0.50)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=3, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=4, N=4, Q_v=-0.15, best=-0.13, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ └── (a=5, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=4, N=25, Q_v=-0.14, best=-0.10, ubc=0.17)
│ ├── (a=0, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=2, N=4, Q_v=-0.14, best=-0.10, ubc=0.50)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=4, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ │ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=4, Q_v=-0.15, best=-0.13, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ └── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
└── (a=5, N=24, Q_v=-0.14, best=-0.11, ubc=0.18)
├── (a=0, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=2, N=4, Q_v=-0.15, best=-0.14, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=1, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ └── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.70)
├── (a=3, N=4, Q_v=-0.14, best=-0.11, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
├── (a=4, N=4, Q_v=-0.14, best=-0.12, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ └── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.68)
└── (a=5, N=3, Q_v=-0.15, best=-0.15, ubc=0.58)
├── (a=0, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
└── (a=2, N=1, Q_v=-0.15, best=-0.15, ubc=0.59)
INFO selected action 2 after 125 simulations.
INFO current action list: [1, 1, 2]
(a=2, N=150, Q_v=-0.12, best=-0.08, ubc=0.02)
├── (a=0, N=25, Q_v=-0.12, best=-0.10, ubc=0.19)
│ ├── (a=0, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=1, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=2, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=3, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=25, Q_v=-0.12, best=-0.10, ubc=0.20)
│ ├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=2, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=25, Q_v=-0.12, best=-0.09, ubc=0.20)
│ ├── (a=0, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.13, best=-0.11, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ ├── (a=3, N=4, Q_v=-0.13, best=-0.11, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=3, N=25, Q_v=-0.12, best=-0.10, ubc=0.19)
│ ├── (a=0, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=4, Q_v=-0.13, best=-0.11, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=2, N=4, Q_v=-0.12, best=-0.11, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=3, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=4, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=24, Q_v=-0.12, best=-0.10, ubc=0.20)
│ ├── (a=0, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=2, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=3, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=3, Q_v=-0.13, best=-0.11, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.63)
│ │ └── (a=5, N=1, Q_v=-0.13, best=-0.13, ubc=0.62)
│ └── (a=5, N=4, Q_v=-0.13, best=-0.11, ubc=0.50)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=5, N=25, Q_v=-0.12, best=-0.10, ubc=0.19)
├── (a=0, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=3, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
INFO selected action 1 after 125 simulations.
INFO current action list: [1, 1, 2, 1]
(a=1, N=150, Q_v=-0.11, best=-0.09, ubc=0.03)
├── (a=0, N=25, Q_v=-0.11, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.12, best=-0.11, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=25, Q_v=-0.11, best=-0.10, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=2, N=25, Q_v=-0.11, best=-0.10, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=25, Q_v=-0.11, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=24, Q_v=-0.11, best=-0.10, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=2, N=3, Q_v=-0.12, best=-0.11, ubc=0.61)
│ │ ├── (a=0, N=1, Q_v=-0.14, best=-0.14, ubc=0.61)
│ │ └── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.64)
│ ├── (a=3, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=4, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=5, N=25, Q_v=-0.11, best=-0.10, ubc=0.21)
├── (a=0, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=3, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
[17:05:07] INFO selected action 0 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0]
(a=0, N=150, Q_v=-0.10, best=-0.09, ubc=0.04)
├── (a=0, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=24, Q_v=-0.11, best=-0.10, ubc=0.22)
│ ├── (a=0, N=3, Q_v=-0.11, best=-0.10, ubc=0.62)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.64)
│ │ └── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.61)
│ ├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=3, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
INFO selected action 0 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0]
(a=0, N=150, Q_v=-0.10, best=-0.09, ubc=0.04)
├── (a=0, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=24, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=3, Q_v=-0.10, best=-0.10, ubc=0.62)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.64)
│ │ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.63)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=3, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
INFO selected action 1 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1]
(a=1, N=150, Q_v=-0.10, best=-0.09, ubc=0.03)
├── (a=0, N=24, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=3, Q_v=-0.11, best=-0.10, ubc=0.62)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.63)
│ └── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.62)
├── (a=1, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.11, best=-0.11, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=2, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
INFO selected action 5 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5]
(a=5, N=150, Q_v=-0.10, best=-0.08, ubc=0.04)
├── (a=0, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
├── (a=1, N=25, Q_v=-0.10, best=-0.08, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=3, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=24, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=3, Q_v=-0.11, best=-0.10, ubc=0.62)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.64)
│ │ └── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.63)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
INFO selected action 1 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1]
(a=1, N=150, Q_v=-0.10, best=-0.08, ubc=0.03)
├── (a=0, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=24, Q_v=-0.10, best=-0.08, ubc=0.22)
│ ├── (a=0, N=5, Q_v=-0.10, best=-0.09, ubc=0.46)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ │ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ │ ├── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=5, Q_v=-0.11, best=-0.10, ubc=0.46)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ ├── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ │ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=3, N=5, Q_v=-0.10, best=-0.08, ubc=0.47)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ │ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=4, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=2, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=3, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=25, Q_v=-0.10, best=-0.08, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.08, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=4, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
[17:05:08] INFO selected action 4 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4]
(a=4, N=150, Q_v=-0.11, best=-0.08, ubc=0.03)
├── (a=0, N=25, Q_v=-0.11, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ └── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=3, N=4, Q_v=-0.11, best=-0.11, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=4, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=25, Q_v=-0.11, best=-0.09, ubc=0.21)
│ ├── (a=0, N=5, Q_v=-0.11, best=-0.10, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ │ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ └── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=5, Q_v=-0.11, best=-0.10, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ │ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ └── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=3, N=5, Q_v=-0.11, best=-0.09, ubc=0.46)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ │ ├── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ │ └── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=4, N=5, Q_v=-0.10, best=-0.09, ubc=0.46)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ │ ├── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=25, Q_v=-0.11, best=-0.08, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ └── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=3, N=25, Q_v=-0.11, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=3, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=4, N=25, Q_v=-0.11, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=2, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.11, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=5, N=24, Q_v=-0.11, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=4, Q_v=-0.12, best=-0.11, ubc=0.51)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
├── (a=2, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=3, N=4, Q_v=-0.11, best=-0.09, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=3, Q_v=-0.12, best=-0.11, ubc=0.61)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.62)
│ └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.62)
└── (a=5, N=4, Q_v=-0.12, best=-0.10, ubc=0.51)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
INFO selected action 2 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2]
(a=2, N=150, Q_v=-0.10, best=-0.08, ubc=0.03)
├── (a=0, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=24, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=2, N=5, Q_v=-0.10, best=-0.10, ubc=0.46)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=3, N=5, Q_v=-0.10, best=-0.09, ubc=0.47)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=4, N=5, Q_v=-0.10, best=-0.09, ubc=0.46)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=2, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=25, Q_v=-0.10, best=-0.09, ubc=0.21)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
├── (a=1, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=4, Q_v=-0.11, best=-0.10, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.11, best=-0.10, ubc=0.52)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
INFO selected action 3 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3]
(a=3, N=150, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=24, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=3, N=3, Q_v=-0.10, best=-0.09, ubc=0.63)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.66)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.64)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=3, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=4, N=5, Q_v=-0.09, best=-0.08, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=4, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.08, best=-0.08, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
INFO selected action 2 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2]
(a=2, N=150, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=25, Q_v=-0.09, best=-0.09, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=5, Q_v=-0.08, best=-0.08, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=3, N=5, Q_v=-0.09, best=-0.08, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=4, N=5, Q_v=-0.09, best=-0.08, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=24, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=3, Q_v=-0.09, best=-0.09, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.65)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.64)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=25, Q_v=-0.09, best=-0.09, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
[17:05:09] INFO selected action 3 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3]
(a=3, N=150, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=25, Q_v=-0.09, best=-0.09, ubc=0.23)
│ ├── (a=0, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ ├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=24, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=3, N=3, Q_v=-0.09, best=-0.09, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.65)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.64)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
INFO selected action 5 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5]
(a=5, N=150, Q_v=-0.09, best=-0.08, ubc=0.04)
├── (a=0, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=24, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=5, Q_v=-0.09, best=-0.08, ubc=0.47)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.82)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.47)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=3, N=5, Q_v=-0.09, best=-0.09, ubc=0.47)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=2, N=25, Q_v=-0.09, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
INFO selected action 5 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5]
(a=5, N=150, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=25, Q_v=-0.09, best=-0.09, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=3, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=25, Q_v=-0.09, best=-0.09, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=25, Q_v=-0.09, best=-0.09, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=24, Q_v=-0.09, best=-0.09, ubc=0.23)
├── (a=0, N=3, Q_v=-0.10, best=-0.09, ubc=0.63)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.65)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.64)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
INFO selected action 3 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3]
(a=3, N=150, Q_v=-0.09, best=-0.08, ubc=0.04)
├── (a=0, N=25, Q_v=-0.10, best=-0.09, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=24, Q_v=-0.10, best=-0.09, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.53)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=5, Q_v=-0.10, best=-0.09, ubc=0.47)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
│ ├── (a=3, N=5, Q_v=-0.09, best=-0.09, ubc=0.47)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
│ │ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=5, Q_v=-0.10, best=-0.09, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.10, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=25, Q_v=-0.09, best=-0.08, ubc=0.22)
├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
[17:05:10] INFO selected action 4 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4]
(a=4, N=150, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=5, Q_v=-0.08, best=-0.08, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.82)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=5, Q_v=-0.09, best=-0.08, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.82)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=3, N=5, Q_v=-0.09, best=-0.09, ubc=0.48)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ └── (a=5, N=5, Q_v=-0.09, best=-0.08, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.82)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=3, N=24, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=4, N=3, Q_v=-0.09, best=-0.09, ubc=0.63)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.65)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.65)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=4, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
│ ├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.55)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=25, Q_v=-0.09, best=-0.08, ubc=0.23)
├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=1, N=4, Q_v=-0.09, best=-0.08, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
└── (a=5, N=4, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
INFO selected action 1 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1]
(a=1, N=150, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=30, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.50)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=30, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=5, Q_v=-0.09, best=-0.09, ubc=0.50)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=30, Q_v=-0.09, best=-0.09, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.50)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=4, N=29, Q_v=-0.09, best=-0.08, ubc=0.21)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ │ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=30, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.50)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
INFO selected action 0 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0]
(a=0, N=155, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=31, Q_v=-0.09, best=-0.09, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=30, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
INFO selected action 2 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2]
(a=2, N=156, Q_v=-0.09, best=-0.08, ubc=0.04)
├── (a=0, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=2, N=31, Q_v=-0.10, best=-0.09, ubc=0.19)
│ ├── (a=0, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ ├── (a=4, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ └── (a=5, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ ├── (a=4, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
│ ├── (a=0, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ ├── (a=3, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
└── (a=5, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
[17:05:11] INFO selected action 0 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0]
(a=0, N=156, Q_v=-0.10, best=-0.08, ubc=0.04)
├── (a=0, N=31, Q_v=-0.10, best=-0.08, ubc=0.19)
│ ├── (a=0, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=6, Q_v=-0.10, best=-0.10, ubc=0.43)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=31, Q_v=-0.10, best=-0.08, ubc=0.19)
│ ├── (a=0, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.10, best=-0.10, ubc=0.43)
│ │ ├── (a=0, N=2, Q_v=-0.10, best=-0.10, ubc=0.57)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=31, Q_v=-0.10, best=-0.08, ubc=0.19)
│ ├── (a=0, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=31, Q_v=-0.10, best=-0.08, ubc=0.19)
│ ├── (a=0, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=31, Q_v=-0.10, best=-0.08, ubc=0.19)
├── (a=0, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
INFO selected action 5 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5]
(a=5, N=156, Q_v=-0.09, best=-0.08, ubc=0.04)
├── (a=0, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
INFO selected action 2 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2]
(a=2, N=156, Q_v=-0.09, best=-0.08, ubc=0.04)
├── (a=0, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=2, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
│ ├── (a=0, N=7, Q_v=-0.10, best=-0.08, ubc=0.40)
│ │ ├── (a=0, N=2, Q_v=-0.10, best=-0.10, ubc=0.60)
│ │ ├── (a=3, N=2, Q_v=-0.09, best=-0.08, ubc=0.61)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.89)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.89)
│ ├── (a=3, N=7, Q_v=-0.09, best=-0.08, ubc=0.40)
│ │ ├── (a=0, N=2, Q_v=-0.10, best=-0.10, ubc=0.60)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.89)
│ │ ├── (a=4, N=2, Q_v=-0.09, best=-0.08, ubc=0.61)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.89)
│ ├── (a=4, N=8, Q_v=-0.09, best=-0.08, ubc=0.37)
│ │ ├── (a=0, N=2, Q_v=-0.10, best=-0.10, ubc=0.62)
│ │ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ │ ├── (a=4, N=2, Q_v=-0.09, best=-0.08, ubc=0.63)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.92)
│ └── (a=5, N=8, Q_v=-0.09, best=-0.08, ubc=0.37)
│ ├── (a=0, N=2, Q_v=-0.09, best=-0.08, ubc=0.63)
│ ├── (a=3, N=2, Q_v=-0.09, best=-0.08, ubc=0.63)
│ ├── (a=4, N=2, Q_v=-0.09, best=-0.08, ubc=0.63)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.92)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=31, Q_v=-0.09, best=-0.08, ubc=0.19)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=6, Q_v=-0.10, best=-0.08, ubc=0.44)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=3, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
[17:05:12] INFO selected action 4 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2, 4]
(a=4, N=156, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=2, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=7, Q_v=-0.09, best=-0.09, ubc=0.40)
│ │ ├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.60)
│ │ ├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.60)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.89)
│ ├── (a=3, N=8, Q_v=-0.09, best=-0.08, ubc=0.38)
│ │ ├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
│ │ ├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
│ │ ├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.64)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
│ ├── (a=4, N=7, Q_v=-0.09, best=-0.09, ubc=0.41)
│ │ ├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
│ │ └── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
│ └── (a=5, N=8, Q_v=-0.09, best=-0.08, ubc=0.38)
│ ├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ ├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.64)
│ ├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ │ ├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
INFO selected action 3 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2, 4, 3]
(a=3, N=156, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=2, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ ├── (a=2, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ ├── (a=3, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ ├── (a=4, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.87)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=7, Q_v=-0.09, best=-0.09, ubc=0.41)
│ │ ├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
│ │ ├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
│ │ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.89)
│ ├── (a=3, N=8, Q_v=-0.09, best=-0.08, ubc=0.38)
│ │ ├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.64)
│ │ ├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
│ │ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
│ ├── (a=4, N=8, Q_v=-0.09, best=-0.09, ubc=0.38)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
│ │ ├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
│ │ ├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.64)
│ │ └── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
│ └── (a=5, N=7, Q_v=-0.09, best=-0.09, ubc=0.41)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.89)
│ ├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
│ ├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ ├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
INFO selected action 4 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2, 4, 3, 4]
(a=4, N=156, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=8, Q_v=-0.09, best=-0.08, ubc=0.38)
│ │ ├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ │ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ │ ├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
│ ├── (a=3, N=8, Q_v=-0.09, best=-0.08, ubc=0.38)
│ │ ├── (a=0, N=2, Q_v=-0.09, best=-0.08, ubc=0.64)
│ │ ├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
│ │ └── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
│ ├── (a=4, N=7, Q_v=-0.09, best=-0.08, ubc=0.41)
│ │ ├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
│ │ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
│ └── (a=5, N=7, Q_v=-0.09, best=-0.09, ubc=0.41)
│ ├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
│ ├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.58)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.58)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.58)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
INFO selected action 0 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2, 4, 3, 4, 0]
(a=0, N=156, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=0, N=31, Q_v=-0.08, best=-0.08, ubc=0.20)
│ ├── (a=2, N=7, Q_v=-0.08, best=-0.08, ubc=0.41)
│ │ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
│ │ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
│ │ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
│ ├── (a=3, N=7, Q_v=-0.08, best=-0.08, ubc=0.41)
│ │ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
│ │ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
│ ├── (a=4, N=8, Q_v=-0.08, best=-0.08, ubc=0.38)
│ │ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ │ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ │ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
│ └── (a=5, N=8, Q_v=-0.08, best=-0.08, ubc=0.38)
│ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ ├── (a=4, N=2, Q_v=-0.09, best=-0.08, ubc=0.64)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
├── (a=2, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=8, Q_v=-0.08, best=-0.08, ubc=0.38)
│ │ ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.51)
│ │ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ │ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ ├── (a=3, N=8, Q_v=-0.08, best=-0.08, ubc=0.38)
│ │ ├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ │ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ │ ├── (a=4, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
│ ├── (a=4, N=7, Q_v=-0.09, best=-0.08, ubc=0.41)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
│ │ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
│ │ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
│ └── (a=5, N=7, Q_v=-0.09, best=-0.08, ubc=0.41)
│ ├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
│ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.61)
├── (a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ │ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
│ ├── (a=0, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ │ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=2, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.58)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ │ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ │ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ │ └── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
│ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=31, Q_v=-0.09, best=-0.08, ubc=0.20)
├── (a=0, N=6, Q_v=-0.09, best=-0.09, ubc=0.45)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.58)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.58)
│ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.58)
└── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
[17:05:13] INFO selected action 0 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2, 4, 3, 4, 0, 0]
(a=0, N=156, Q_v=-0.08, best=-0.08, ubc=0.05)
├── (a=2, N=38, Q_v=-0.08, best=-0.08, ubc=0.17)
│ ├── (a=3, N=12, Q_v=-0.08, best=-0.08, ubc=0.31)
│ │ ├── (a=3, N=4, Q_v=-0.08, best=-0.08, ubc=0.47)
│ │ ├── (a=4, N=4, Q_v=-0.08, best=-0.08, ubc=0.47)
│ │ └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.56)
│ ├── (a=4, N=12, Q_v=-0.08, best=-0.08, ubc=0.31)
│ │ ├── (a=3, N=4, Q_v=-0.08, best=-0.08, ubc=0.47)
│ │ ├── (a=4, N=4, Q_v=-0.08, best=-0.08, ubc=0.47)
│ │ └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.56)
│ └── (a=5, N=13, Q_v=-0.08, best=-0.08, ubc=0.29)
│ ├── (a=3, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
│ ├── (a=4, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
│ └── (a=5, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
├── (a=3, N=39, Q_v=-0.08, best=-0.08, ubc=0.17)
│ ├── (a=2, N=10, Q_v=-0.08, best=-0.08, ubc=0.34)
│ │ ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ │ ├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ │ └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ ├── (a=3, N=9, Q_v=-0.08, best=-0.08, ubc=0.37)
│ │ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
│ │ ├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
│ │ └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
│ ├── (a=4, N=9, Q_v=-0.08, best=-0.08, ubc=0.37)
│ │ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
│ │ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
│ │ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
│ │ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
│ └── (a=5, N=10, Q_v=-0.08, best=-0.08, ubc=0.34)
│ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.68)
│ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.68)
│ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
│ └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
├── (a=4, N=39, Q_v=-0.08, best=-0.08, ubc=0.17)
│ ├── (a=2, N=10, Q_v=-0.08, best=-0.08, ubc=0.34)
│ │ ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ │ ├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ │ └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ ├── (a=3, N=10, Q_v=-0.08, best=-0.08, ubc=0.34)
│ │ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.68)
│ │ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.68)
│ │ ├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ │ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.68)
│ ├── (a=4, N=9, Q_v=-0.08, best=-0.08, ubc=0.37)
│ │ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
│ │ ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
│ │ └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
│ └── (a=5, N=9, Q_v=-0.08, best=-0.08, ubc=0.37)
│ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
│ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
│ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
│ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
└── (a=5, N=39, Q_v=-0.08, best=-0.08, ubc=0.17)
├── (a=2, N=10, Q_v=-0.08, best=-0.08, ubc=0.34)
│ ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ ├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
├── (a=3, N=10, Q_v=-0.08, best=-0.08, ubc=0.34)
│ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.68)
│ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.68)
│ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
│ └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
├── (a=4, N=9, Q_v=-0.08, best=-0.08, ubc=0.37)
│ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
│ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
│ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
│ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
└── (a=5, N=9, Q_v=-0.08, best=-0.08, ubc=0.37)
├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
└── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
INFO selected action 4 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2, 4, 3, 4, 0, 0, 4]
(a=4, N=164, Q_v=-0.08, best=-0.08, ubc=0.05)
├── (a=2, N=41, Q_v=-0.08, best=-0.08, ubc=0.17)
│ ├── (a=3, N=13, Q_v=-0.08, best=-0.08, ubc=0.29)
│ │ ├── (a=3, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
│ │ ├── (a=4, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
│ │ └── (a=5, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
│ ├── (a=4, N=13, Q_v=-0.08, best=-0.08, ubc=0.29)
│ │ ├── (a=3, N=6, Q_v=-0.08, best=-0.08, ubc=0.38)
│ │ └── (a=5, N=6, Q_v=-0.08, best=-0.08, ubc=0.38)
│ └── (a=5, N=14, Q_v=-0.08, best=-0.08, ubc=0.28)
│ ├── (a=3, N=5, Q_v=-0.08, best=-0.08, ubc=0.43)
│ ├── (a=4, N=4, Q_v=-0.08, best=-0.08, ubc=0.49)
│ └── (a=5, N=4, Q_v=-0.08, best=-0.08, ubc=0.49)
├── (a=3, N=41, Q_v=-0.08, best=-0.08, ubc=0.17)
│ ├── (a=2, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
│ │ ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
│ │ ├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ │ └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
│ ├── (a=3, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
│ │ ├── (a=2, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
│ │ ├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
│ │ └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ ├── (a=4, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
│ │ ├── (a=2, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
│ │ ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ │ └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ └── (a=5, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
│ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
│ ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
│ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
├── (a=4, N=41, Q_v=-0.08, best=-0.08, ubc=0.16)
│ ├── (a=2, N=13, Q_v=-0.08, best=-0.08, ubc=0.29)
│ │ ├── (a=3, N=6, Q_v=-0.08, best=-0.08, ubc=0.38)
│ │ └── (a=5, N=6, Q_v=-0.08, best=-0.08, ubc=0.38)
│ ├── (a=3, N=14, Q_v=-0.08, best=-0.08, ubc=0.28)
│ │ ├── (a=2, N=5, Q_v=-0.08, best=-0.08, ubc=0.43)
│ │ ├── (a=3, N=4, Q_v=-0.08, best=-0.08, ubc=0.49)
│ │ └── (a=5, N=4, Q_v=-0.08, best=-0.08, ubc=0.49)
│ └── (a=5, N=13, Q_v=-0.08, best=-0.08, ubc=0.29)
│ ├── (a=2, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
│ ├── (a=3, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
│ └── (a=5, N=4, Q_v=-0.08, best=-0.08, ubc=0.48)
└── (a=5, N=40, Q_v=-0.08, best=-0.08, ubc=0.17)
├── (a=2, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
│ ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
│ ├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
│ └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
├── (a=3, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
│ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
│ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
│ ├── (a=4, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
│ └── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.67)
├── (a=4, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
│ ├── (a=2, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
│ ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.54)
│ └── (a=5, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
└── (a=5, N=9, Q_v=-0.08, best=-0.08, ubc=0.37)
├── (a=2, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.52)
└── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.66)
INFO selected action 3 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2, 4, 3, 4, 0, 0, 4, 3]
(a=3, N=166, Q_v=-0.08, best=-0.08, ubc=0.05)
├── (a=2, N=41, Q_v=-0.08, best=-0.08, ubc=0.16)
│ ├── (a=3, N=13, Q_v=-0.09, best=-0.08, ubc=0.29)
│ │ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
│ │ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
│ ├── (a=4, N=14, Q_v=-0.08, best=-0.08, ubc=0.28)
│ │ ├── (a=3, N=7, Q_v=-0.09, best=-0.08, ubc=0.35)
│ │ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
│ └── (a=5, N=13, Q_v=-0.09, best=-0.08, ubc=0.29)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.48)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.48)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.48)
├── (a=3, N=41, Q_v=-0.08, best=-0.08, ubc=0.16)
│ ├── (a=2, N=13, Q_v=-0.09, best=-0.08, ubc=0.29)
│ │ ├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
│ │ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
│ ├── (a=4, N=13, Q_v=-0.09, best=-0.08, ubc=0.29)
│ │ ├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
│ │ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
│ └── (a=5, N=14, Q_v=-0.08, best=-0.08, ubc=0.28)
│ ├── (a=2, N=5, Q_v=-0.09, best=-0.08, ubc=0.43)
│ ├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.49)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.49)
├── (a=4, N=42, Q_v=-0.08, best=-0.08, ubc=0.16)
│ ├── (a=2, N=13, Q_v=-0.09, best=-0.08, ubc=0.29)
│ │ ├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
│ │ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
│ ├── (a=3, N=14, Q_v=-0.08, best=-0.08, ubc=0.28)
│ │ ├── (a=2, N=7, Q_v=-0.09, best=-0.08, ubc=0.35)
│ │ └── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.38)
│ └── (a=5, N=14, Q_v=-0.09, best=-0.08, ubc=0.28)
│ ├── (a=2, N=5, Q_v=-0.09, best=-0.08, ubc=0.43)
│ ├── (a=3, N=4, Q_v=-0.09, best=-0.08, ubc=0.49)
│ └── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.49)
└── (a=5, N=41, Q_v=-0.08, best=-0.08, ubc=0.16)
├── (a=2, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
│ ├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
│ ├── (a=4, N=3, Q_v=-0.09, best=-0.08, ubc=0.53)
│ └── (a=5, N=3, Q_v=-0.09, best=-0.09, ubc=0.53)
├── (a=3, N=10, Q_v=-0.08, best=-0.08, ubc=0.35)
│ ├── (a=2, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
│ ├── (a=4, N=3, Q_v=-0.09, best=-0.08, ubc=0.53)
│ └── (a=5, N=3, Q_v=-0.09, best=-0.09, ubc=0.53)
├── (a=4, N=10, Q_v=-0.09, best=-0.08, ubc=0.35)
│ ├── (a=2, N=3, Q_v=-0.09, best=-0.08, ubc=0.53)
│ ├── (a=3, N=3, Q_v=-0.09, best=-0.09, ubc=0.53)
│ └── (a=5, N=3, Q_v=-0.09, best=-0.08, ubc=0.53)
└── (a=5, N=10, Q_v=-0.09, best=-0.08, ubc=0.35)
├── (a=2, N=3, Q_v=-0.09, best=-0.08, ubc=0.53)
├── (a=3, N=3, Q_v=-0.09, best=-0.09, ubc=0.53)
└── (a=4, N=3, Q_v=-0.09, best=-0.08, ubc=0.53)
[17:05:14] INFO selected action 4 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2, 4, 3, 4, 0, 0, 4, 3, 4]
(a=4, N=167, Q_v=-0.09, best=-0.08, ubc=0.05)
├── (a=2, N=55, Q_v=-0.09, best=-0.08, ubc=0.13)
│ ├── (a=3, N=27, Q_v=-0.09, best=-0.08, ubc=0.19)
│ │ └── (a=5, N=26, Q_v=-0.09, best=-0.09, ubc=0.17)
│ └── (a=5, N=27, Q_v=-0.09, best=-0.08, ubc=0.19)
│ ├── (a=3, N=13, Q_v=-0.09, best=-0.09, ubc=0.27)
│ └── (a=5, N=13, Q_v=-0.09, best=-0.08, ubc=0.27)
├── (a=3, N=56, Q_v=-0.09, best=-0.08, ubc=0.13)
│ ├── (a=2, N=28, Q_v=-0.09, best=-0.08, ubc=0.18)
│ │ └── (a=5, N=27, Q_v=-0.09, best=-0.08, ubc=0.16)
│ └── (a=5, N=27, Q_v=-0.09, best=-0.08, ubc=0.19)
│ ├── (a=2, N=13, Q_v=-0.09, best=-0.09, ubc=0.27)
│ └── (a=5, N=13, Q_v=-0.09, best=-0.09, ubc=0.27)
└── (a=5, N=55, Q_v=-0.09, best=-0.08, ubc=0.13)
├── (a=2, N=18, Q_v=-0.09, best=-0.08, ubc=0.25)
│ ├── (a=3, N=8, Q_v=-0.09, best=-0.09, ubc=0.34)
│ └── (a=5, N=9, Q_v=-0.09, best=-0.09, ubc=0.32)
├── (a=3, N=18, Q_v=-0.09, best=-0.08, ubc=0.25)
│ ├── (a=2, N=9, Q_v=-0.09, best=-0.09, ubc=0.32)
│ └── (a=5, N=8, Q_v=-0.09, best=-0.09, ubc=0.34)
└── (a=5, N=18, Q_v=-0.09, best=-0.08, ubc=0.25)
├── (a=2, N=8, Q_v=-0.09, best=-0.09, ubc=0.34)
└── (a=3, N=9, Q_v=-0.09, best=-0.09, ubc=0.32)
INFO selected action 3 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2, 4, 3, 4, 0, 0, 4, 3, 4, 3]
(a=3, N=181, Q_v=-0.09, best=-0.08, ubc=0.04)
├── (a=2, N=90, Q_v=-0.09, best=-0.08, ubc=0.08)
│ └── (a=5, N=89, Q_v=-0.09, best=-0.08, ubc=0.07)
│ └── (a=5, N=88, Q_v=-0.09, best=-0.09, ubc=0.07)
└── (a=5, N=90, Q_v=-0.09, best=-0.08, ubc=0.08)
├── (a=2, N=44, Q_v=-0.09, best=-0.09, ubc=0.14)
│ └── (a=5, N=43, Q_v=-0.09, best=-0.09, ubc=0.12)
└── (a=5, N=45, Q_v=-0.09, best=-0.09, ubc=0.14)
└── (a=2, N=44, Q_v=-0.09, best=-0.09, ubc=0.12)
INFO selected action 2 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2, 4, 3, 4, 0, 0, 4, 3, 4, 3, 2]
(a=2, N=215, Q_v=-0.09, best=-0.08, ubc=0.03)
└── (a=5, N=214, Q_v=-0.09, best=-0.08, ubc=0.03)
└── (a=5, N=213, Q_v=-0.09, best=-0.09, ubc=0.03)
INFO selected action 5 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2, 4, 3, 4, 0, 0, 4, 3, 4, 3, 2, 5]
(a=5, N=339, Q_v=-0.09, best=-0.08, ubc=0.01)
└── (a=5, N=338, Q_v=-0.09, best=-0.09, ubc=0.01)
INFO selected action 5 after 125 simulations.
INFO current action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2, 4, 3, 4, 0, 0, 4, 3, 4, 3, 2, 5, 5]
INFO Final action list: [1, 1, 2, 1, 0, 0, 1, 5, 1, 4, 2, 3, 2, 3, 5, 5, 3, 4, 1, 0, 2, 0, 5, 2, 4, 3, 4, 0, 0, 4, 3, 4, 3, 2, 5, 5]
╔═══════════════════════════════════════════════════════╗
Job 0 ║ ████ ████████████ ████████ ║ Machine 0 █
Job 1 ║███████████████████████████████████████████ ║ Machine 1 █
Job 2 ║████████████████ █████████ ██████ ║ Machine 2 █
Job 3 ║ █████████ █████ ██████████ ████████ ║ Machine 3 █
Job 4 ║ ████████████████ ████ ████ ║ Machine 4 █
Job 5 ║ █████ ████████ █████████ █████║ Machine 5 █
╚╦════╤════╤════╤════╤════╦════╤════╤════╤════╤════╦════╝
0.0 27.7 55.5
Makespan: 60