gnn_jsp_env (Action History Wrapper)¶
[1]:
from collections import namedtuple
from copy import deepcopy, copy
[2]:
from gymnasium.spaces import Box, Discrete
from jsp_instance_utils.instances import ft06, ft06_makespan
from jsp_vis.console import gantt_chart_console
[3]:
from gymcts.gymcts_agent import GymctsAgent
from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
from gymnasium.wrappers import TransformReward, NormalizeReward
from gymcts.logger import log
[4]:
import gymnasium as gym
[5]:
from gymnasium.core import ActType, ObsType
from typing import Any, SupportsFloat
[6]:
import random
[7]:
import pandas as pd
import numpy as np
[8]:
Operation = namedtuple("Operation", ["job_id", "op_id", "unique_op_id", "machine_type", "duration"])
[9]:
def get_legal_pos(op_dur, job_ready_time, possible_pos, mch_infos):
"""
Returns the positions which fit the given operation duration,
considering that the operation can only start when the required machine is free
and the job is ready (the previous operations of the job are completed)
"""
earliest_start_time = max(job_ready_time, mch_infos['end_times'][possible_pos[0] - 1])
possible_pos_end_times = np.append(earliest_start_time, mch_infos['end_times'][possible_pos])[:-1]
possible_gaps = mch_infos['start_times'][possible_pos] - possible_pos_end_times
legal_pos_idx = np.where(op_dur <= possible_gaps)[0]
legal_pos = np.take(possible_pos, legal_pos_idx)
return legal_pos_idx, legal_pos, possible_pos_end_times
[10]:
def put_in_the_end(op, job_ready_time, mch_ready_time, mch_infos):
"""
Puts an operation at the end of the already scheduled operations
"""
index = np.where(mch_infos['start_times'] == -1)[0][0]
op_start_time = max(job_ready_time, mch_ready_time)
mch_infos['op_ids'][index] = op.unique_op_id
mch_infos['start_times'][index] = op_start_time
mch_infos['end_times'][index] = op_start_time + op.duration
return op_start_time
[11]:
def put_in_between(op, legal_pos_idx, legal_pos, possible_pos_end_times, mch_infos):
"""
Puts an operation between already scheduled operations
"""
earliest_idx = legal_pos_idx[0]
earliest_pos = legal_pos[0]
start_time = possible_pos_end_times[earliest_idx]
mch_infos['op_ids'][:] = np.insert(mch_infos['op_ids'], earliest_pos, op.unique_op_id)[:-1]
mch_infos['start_times'][:] = np.insert(mch_infos['start_times'], earliest_pos, start_time)[:-1]
mch_infos['end_times'][:] = np.insert(mch_infos['end_times'], earliest_pos, start_time + op.duration)[:-1]
return start_time
[12]:
def get_end_time_lbs(jobs, machine_infos):
"""
Calculates the end time lower bounds for all operations
:param jobs: array if jobs, where each job is an array of operations
:param machine_infos: dictionary where the keys are machine indices and the values contain
the ids of the operations scheduled on the machine (in the scheduled order), and the
corresponding start and end times
:returns: np array containing the end time lower bounds of all operations
"""
end_times = [m['end_times'][i] for m in machine_infos.values() for i in range(len(m['end_times']))]
op_ids = [m['op_ids'][i] for m in machine_infos.values() for i in range(len(m['op_ids']))]
lbs = -1 * np.ones((len(jobs), len(jobs[0])))
for i, job in enumerate(jobs):
for j, op in enumerate(job):
if op.unique_op_id in op_ids:
lbs[i][j] = end_times[op_ids.index(op.unique_op_id)]
elif j > 0:
lbs[i][j] = lbs[i][j - 1] + op.duration
else:
lbs[i][j] = op.duration
return lbs
[13]:
def get_op_nbghs(op, machine_infos):
"""
Finds a given operation's predecessor and successor on the machine where the operation is carried out
"""
for key, value in machine_infos.items():
if op.unique_op_id in value['op_ids']:
action_coord = [key, np.where(op.unique_op_id == value['op_ids'])[0][0]]
break
assert action_coord, "The operation's unique id was not found in the machine informations"
if action_coord[1].item() > 0:
pred_id = action_coord[0], action_coord[1] - 1
else:
pred_id = action_coord[0], action_coord[1]
pred = machine_infos[pred_id[0]]['op_ids'][pred_id[1]]
if action_coord[1].item() + 1 < machine_infos[action_coord[0]]['op_ids'].shape[-1]:
succ_temp_id = action_coord[0], action_coord[1] + 1
else:
succ_temp_id = action_coord[0], action_coord[1]
succ_temp = machine_infos[succ_temp_id[0]]['op_ids'][succ_temp_id[1]]
succ = op.unique_op_id if succ_temp < 0 else succ_temp
return pred, succ
[14]:
def get_first_ops(state):
"""
Returns an array containing the unique indices of the first operations of each job.
"""
num_ops = len(state['features'])
num_jobs = len(state['jobs'])
first_col = np.arange(start=0, stop=num_ops, step=1).reshape(num_jobs, -1)[:, 0]
return first_col
[15]:
class GNNJobShopModel():
def __init__(self, **kwargs):
pass
@staticmethod
def random_problem(num_jobs, num_ops_per_job, num_machines, max_duration=10):
remaining_operations = []
unique_op_id = 0
for i in range(num_jobs):
job = []
for j in range(num_ops_per_job):
job.append(Operation(i, j, unique_op_id, random.randint(0, num_machines - 1),
random.randint(0, max_duration - 1)))
unique_op_id += 1
remaining_operations.append(job)
schedule = [[] for _ in range(num_machines)]
num_ops = num_jobs * num_machines
# Number of operations scheduled on each machine
ops_per_machine = [len([op for job in remaining_operations for op in job if op.machine_type == m]) for m in
range(num_machines)]
# Information for each machine: the ids of the operations scheduled on it (in the scheduled order), and the
# corresponding start and end times
machine_infos = {m: {'op_ids': -1 * np.ones(ops_per_machine[m], dtype=np.int32),
'start_times': -1 * np.ones(ops_per_machine[m], dtype=np.int32),
'end_times': -1 * np.ones(ops_per_machine[m], dtype=np.int32)} for m in
range(num_machines)}
# Time at which the last scheduled operation ends for each job
last_job_ops = [-1 for _ in range(num_jobs)]
# Time at which the last scheduled operation ends on each machine
last_machine_ops = [-1 for _ in range(num_machines)]
jobs = deepcopy(remaining_operations)
adj_matrix = GNNJobShopModel.init_adj_matrix(num_ops, num_jobs)
features = GNNJobShopModel.init_features(jobs)
node_states = np.array([1 if i % num_ops_per_job == 0 else 0 for i in range(num_ops)],
dtype=np.single)
return {'remaining_ops': remaining_operations, 'schedule': schedule, 'machine_infos': machine_infos,
'last_job_ops': last_job_ops, 'last_mch_ops': last_machine_ops, 'adj_matrix': adj_matrix,
'features': features, 'node_states': node_states, 'jobs': jobs}
@staticmethod
def _schedule_op(job_id, state):
possible = False
if len(state['remaining_ops'][job_id]) > 0:
op = state['remaining_ops'][job_id].pop(0)
start_time, flag = GNNJobShopModel._determine_start_time(op, state['last_job_ops'],
state['last_mch_ops'], state['machine_infos'])
# Insert the operation at the correct position so that the entries remain sorted according to start_time
state['schedule'][op.machine_type].append((op, start_time, start_time + op.duration))
state['schedule'][op.machine_type] = sorted(state['schedule'][op.machine_type], key=lambda x: x[1])
# Update state
if state['last_job_ops'][op.job_id] < start_time + op.duration:
state['last_job_ops'][op.job_id] = start_time + op.duration
if state['last_mch_ops'][op.machine_type] < start_time + op.duration:
state['last_mch_ops'][op.machine_type] = start_time + op.duration
GNNJobShopModel._update_adj_matrix(state, op, flag)
GNNJobShopModel._update_features(state, op)
GNNJobShopModel._update_node_states(state, op)
possible = True
return state, possible
@staticmethod
def _update_adj_matrix(state, op, flag):
# Update the adjacency matrix after a new operation has been scheduled
pred, succ = get_op_nbghs(op, state['machine_infos'])
state['adj_matrix'][op.unique_op_id] = 0
state['adj_matrix'][op.unique_op_id, op.unique_op_id] = 1
state['adj_matrix'][op.unique_op_id, pred] = 1
state['adj_matrix'][succ, op.unique_op_id] = 1
if op.unique_op_id not in get_first_ops(state):
state['adj_matrix'][op.unique_op_id, op.unique_op_id - 1] = 1
# Remove the old arc when a new operation inserts between two operations
if flag and pred != op.unique_op_id and succ != op.unique_op_id:
state['adj_matrix'][succ, pred] = 0
@staticmethod
def _update_features(state, op):
# Update the operations' features after a new operation has been scheduled
lower_bounds = get_end_time_lbs(state['jobs'], state['machine_infos']) # recalculate lower bounds
finished = np.array([f[1] if i != op.unique_op_id
else 1 for i, f in enumerate(state['features'])]) # set op as finished
assert norm_coeff > 0, "The normalization coefficient has not been initialized"
state['features'] = np.concatenate((lower_bounds.reshape(-1, 1) / norm_coeff,
finished.reshape(-1, 1)), axis=1)
@staticmethod
def _update_node_states(state, op):
succ = op.unique_op_id + 1 if ((op.unique_op_id + 1) % len(state['jobs'][0]) != 0) else op.unique_op_id
if succ != op.unique_op_id:
state['node_states'][op.unique_op_id] = 0 # TODO node_states type changes -> fix
state['node_states'][succ] = 1 # TODO add -1 condition?
@staticmethod
def _determine_start_time(op: Operation, last_job_ops, last_mch_ops, machine_infos):
job_ready_time = last_job_ops[op.job_id] if last_job_ops[op.job_id] != -1 else 0
mch_ready_time = last_mch_ops[op.machine_type] if last_mch_ops[op.machine_type] != -1 else 0
# Whether the operation is scheduled between already scheduled operations (True) or in the end (False)
flag = False
# Positions between already scheduled operations on the machine required by the operation
possible_pos = np.where(job_ready_time < machine_infos[op.machine_type]['start_times'])[0]
if len(possible_pos) == 0:
# Not possible to schedule the operation between other operations -> put in the end
op_start_time = put_in_the_end(op, job_ready_time, mch_ready_time, machine_infos[op.machine_type])
else:
# Positions which fit the length of the operation (there is enough time before the next operation)
legal_pos_idx, legal_pos, possible_pos_end_times = get_legal_pos(op.duration, job_ready_time,
possible_pos,
machine_infos[op.machine_type])
if len(legal_pos) == 0:
# No position which can fit the operation -> put in the end
op_start_time = put_in_the_end(op, job_ready_time, mch_ready_time, machine_infos[op.machine_type])
else:
# Schedule the operation between other operations
op_start_time = put_in_between(op, legal_pos_idx, legal_pos, possible_pos_end_times,
machine_infos[op.machine_type])
flag = True
return op_start_time, flag
@staticmethod
def _is_done(remaining_ops):
for j in remaining_ops:
if len(j) > 0:
return False
return True
@staticmethod
def _makespan(schedule):
makespan = 0
for machine, machine_schedule in enumerate(schedule):
if len(machine_schedule) > 0:
_, _, end_time = machine_schedule[-1]
if end_time > makespan:
makespan = end_time
return makespan
@staticmethod
def _get_norm_coeff(max_duration, num_ops_per_job, num_jobs):
i = 10
while i < max_duration * num_ops_per_job * num_jobs:
i *= 10
return i
@staticmethod
def step(state, action):
new_state, possible = GNNJobShopModel._schedule_op(action, deepcopy(state))
reward = 0
if not possible:
reward = -1
done = GNNJobShopModel._is_done(new_state['remaining_ops'])
if done:
reward = - GNNJobShopModel._makespan(new_state['schedule'])
return new_state, reward, done
@staticmethod
def legal_actions(state):
return [job_id for job_id in range(len(state['remaining_ops'])) if
len(state['remaining_ops'][job_id]) > 0]
@staticmethod
def init_adj_matrix(num_ops, num_jobs):
# task ids for first column (array containing the first tasks for each job)
first_col = np.arange(start=0, stop=num_ops, step=1).reshape(num_jobs, -1)[:, 0]
# task ids for last column (array containing the last tasks for each job)
last_col = np.arange(start=0, stop=num_ops, step=1).reshape(num_jobs, -1)[:, -1]
# conjunctive arcs showing precedence relations between tasks of the same job
# np array with 1s on the row above the main diagonal and 0s everywhere else
conj_nei_up_stream = np.eye(num_ops, k=-1, dtype=np.single)
# np array with 1s on the row below the main diagonal and 0s everywhere else
conj_nei_low_stream = np.eye(num_ops, k=1, dtype=np.single)
# first column does not have upper stream conj_nei
conj_nei_up_stream[first_col] = 0
# last column does not have lower stream conj_nei
conj_nei_low_stream[last_col] = 0
# self edges for all nodes
# np array with 1s on the main diagonal and 0s everywhere else
self_as_nei = np.eye(num_ops, dtype=np.single)
adj = self_as_nei + conj_nei_up_stream
return adj
@staticmethod
def init_features(jobs):
durations = np.array([[op.duration for op in job] for job in jobs])
lower_bounds = np.cumsum(durations, axis=1, dtype=np.single) # lower bounds of operations' completion times
machine_types = np.array([[op.machine_type for op in job] for job in jobs])
finished_mark = np.zeros_like(machine_types, dtype=np.single) # 0 for unfinished, 1 for finished
global norm_coeff
norm_coeff = GNNJobShopModel._get_norm_coeff(max(durations.flatten()), len(jobs[0]), len(jobs))
# node features: normalized end time lower bounds and binary indicator of whether the action has been scheduled
features = np.concatenate((lower_bounds.reshape(-1, 1) / norm_coeff, # normalize the lower bounds
finished_mark.reshape(-1, 1)), axis=1) # 1 if scheduled, 0 otherwise
return features
[16]:
class GNNJobShopModelEnv(gym.Env):
def _jsp_instance_adapter(self, jsp_instance):
_, n_jobs, n_machines = jsp_instance.shape
machine_order = jsp_instance[0]
processing_times = jsp_instance[1]
remaining_operations = []
unique_op_id = 0
for i in range(n_jobs):
job = []
num_ops_per_job = n_machines
for j in range(num_ops_per_job):
job.append(
Operation(
i, j,
unique_op_id,
machine_order[i][j], # machine_type
processing_times[i][j] # duration
)
)
unique_op_id += 1
remaining_operations.append(job)
schedule = [[] for _ in range(n_machines)]
num_ops = n_jobs * n_machines
# Number of operations scheduled on each machine
ops_per_machine = [len([op for job in remaining_operations for op in job if op.machine_type == m]) for m in
range(n_machines)]
# Information for each machine: the ids of the operations scheduled on it (in the scheduled order), and the
# corresponding start and end times
machine_infos = {m: {'op_ids': -1 * np.ones(ops_per_machine[m], dtype=np.int32),
'start_times': -1 * np.ones(ops_per_machine[m], dtype=np.int32),
'end_times': -1 * np.ones(ops_per_machine[m], dtype=np.int32)} for m in
range(n_machines)}
# Time at which the last scheduled operation ends for each job
last_job_ops = [-1 for _ in range(n_jobs)]
# Time at which the last scheduled operation ends on each machine
last_machine_ops = [-1 for _ in range(n_machines)]
jobs = deepcopy(remaining_operations)
adj_matrix = GNNJobShopModel.init_adj_matrix(num_ops, n_jobs)
features = GNNJobShopModel.init_features(jobs)
node_states = np.array([1 if i % num_ops_per_job == 0 else 0 for i in range(num_ops)],
dtype=np.single)
return {'remaining_ops': remaining_operations, 'schedule': schedule, 'machine_infos': machine_infos,
'last_job_ops': last_job_ops, 'last_mch_ops': last_machine_ops, 'adj_matrix': adj_matrix,
'features': features, 'node_states': node_states, 'jobs': jobs}
def __init__(self, jsp_instance:np.array, **kwargs):
self.model = GNNJobShopModel()
_, n_jobs, n_machines = jsp_instance.shape
self.n_jobs = n_jobs
# self.n_ops_per_job = 6
self.n_machines = n_machines
# self.state = self.model.random_problem(6, 6, 6)
self.state = self._jsp_instance_adapter(jsp_instance)
self._initial_state = copy(self.state)
# Define the space for an Operation
operation_space = gym.spaces.Tuple((
gym.spaces.Discrete(np.iinfo(np.int32).max), # job_id
gym.spaces.Discrete(np.iinfo(np.int32).max), # op_id
gym.spaces.Discrete(np.iinfo(np.int32).max), # unique_op_id
gym.spaces.Discrete(np.iinfo(np.int32).max), # machine_type
gym.spaces.Discrete(np.iinfo(np.int32).max) # duration
))
# Define the space for a ScheduledOperation
scheduled_operation_space = gym.spaces.Tuple((
operation_space, # Operation
gym.spaces.Discrete(np.iinfo(np.int32).max), # start_time
gym.spaces.Discrete(np.iinfo(np.int32).max) # end_time
))
observation_space = gym.spaces.Dict({
'adj_matrix': Box(low=-1.0, high=1.0, shape=(3,), dtype=np.float32),
'features': Box(low=-1.0, high=1.0, shape=(3,), dtype=np.float32),
'last_job_ops': Box(low=-1.0, high=1.0, shape=(3,), dtype=np.float32),
'last_mch_ops': Box(low=-1.0, high=1.0, shape=(3,), dtype=np.float32),
'machine_infos': gym.spaces.Dict({
key_idx: gym.spaces.Dict({
'end_times': Box(low=-1, high=np.iinfo(np.int32).max, shape=(5,), dtype=np.int32),
'op_ids': Box(low=-1, high=np.iinfo(np.int32).max, shape=(5,), dtype=np.int32),
'start_times': Box(low=-1, high=np.iinfo(np.int32).max, shape=(5,), dtype=np.int32),
}) for key_idx in range(6)
}),
# 'remaining_ops': ,
'schedule': gym.spaces.Tuple([
gym.spaces.Tuple([scheduled_operation_space for _ in range(self.n_machines)]) for _ in
range(self.n_jobs)
])
})
self.observation_space = observation_space
self.action_space = Discrete(6)
self.done = False
def set_state(self, state: dict):
self.state = state
if len(state['remaining_ops']) > 0:
self.done = False
def step(self, action: ActType) -> tuple[ObsType, SupportsFloat, bool, bool, dict[str, Any]]:
self.state, reward, self.done = self.model.step(self.state, action)
return self.state, reward, self.done, False, {'makespan': - reward}
def render(self) -> None:
allocation = []
latest_finish_time = 0
for mache_ops in self.state['schedule']:
if mache_ops and len(mache_ops):
for ops_elem, start_time, finish_time in mache_ops:
entry = {
'Task': f'Job {ops_elem.job_id}',
'Start': start_time,
'Finish': finish_time,
'Resource': f'Machine {ops_elem.machine_type}'
}
latest_finish_time = max(finish_time, latest_finish_time)
allocation.append(entry)
df = pd.DataFrame(allocation)
num_of_machines = self.n_machines
gantt_chart_console(df, num_of_machines)
print(f'Makespan: {latest_finish_time}')
def get_state(self) -> dict:
return self.state
def reset(
self,
*,
seed: int | None = None,
options: dict[str, Any] | None = None,
) -> tuple[ObsType, dict[str, Any]]:
self.set_state(self._initial_state)
return self.state, {}
def get_legal_action_mask(self) -> list[bool]:
legal_action = self.model.legal_actions(self.state)
legal_action_mask = [False for _ in range(self.action_space.n)]
for action in legal_action:
legal_action_mask[action] = True
return legal_action_mask
[17]:
if __name__ == '__main__':
log.setLevel(20)
# model = GNNJobShopModel()
# jsp_state = model.random_problem(6, 6, 6)
# print(pprint.pformat(jsp_state))
env = GNNJobShopModelEnv(jsp_instance=ft06)
env.reset()
env = NormalizeReward(env, gamma=0.99, epsilon=1e-8)
env = TransformReward(env, lambda r: r / 36)
def mask_fn(env: gym.Env) -> np.ndarray:
# Do whatever you'd like in this function to return the action mask
# for the current env. In this example, we assume the env has a
# helpful method we can rely on.
return env.unwrapped.get_legal_action_mask()
env = ActionHistoryMCTSGymEnvWrapper(
env,
action_mask_fn=mask_fn
)
agent = GymctsAgent(
env=env,
clear_mcts_tree_after_step=False,
render_tree_after_step=True,
exclude_unvisited_nodes_from_render=True,
number_of_simulations_per_step=25,
)
root = agent.search_root_node.get_root()
actions = agent.solve(render_tree_after_step=True)
env.reset()
for a in actions:
obs, rew, term, trun, info = env.step(a)
env.unwrapped.render()
(N=25, Q_v=-0.09, best=-0.06)
├── (a=0, N=4, Q_v=-0.09, best=-0.06, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=1, N=4, Q_v=-0.08, best=-0.06, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.06, best=-0.06, ubc=0.77)
├── (a=2, N=4, Q_v=-0.09, best=-0.06, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.76)
├── (a=3, N=4, Q_v=-0.09, best=-0.06, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=4, N=4, Q_v=-0.09, best=-0.06, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=4, Q_v=-0.10, best=-0.06, ubc=0.54)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
└── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
[17:03:15] INFO selected action 1 after 25 simulations.
INFO current action list: [1]
(a=1, N=29, Q_v=-0.11, best=-0.06, ubc=0.15)
├── (a=0, N=4, Q_v=-0.11, best=-0.11, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
├── (a=1, N=5, Q_v=-0.10, best=-0.09, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=2, N=5, Q_v=-0.10, best=-0.06, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=3, N=5, Q_v=-0.11, best=-0.11, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=4, N=4, Q_v=-0.11, best=-0.11, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
└── (a=5, N=5, Q_v=-0.11, best=-0.11, ubc=0.47)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
└── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
INFO selected action 2 after 25 simulations.
INFO current action list: [1, 2]
(a=2, N=30, Q_v=-0.12, best=-0.06, ubc=0.14)
├── (a=0, N=5, Q_v=-0.12, best=-0.11, ubc=0.46)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ └── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=1, N=5, Q_v=-0.12, best=-0.11, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=2, N=5, Q_v=-0.12, best=-0.11, ubc=0.46)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
├── (a=3, N=5, Q_v=-0.12, best=-0.11, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
├── (a=4, N=4, Q_v=-0.12, best=-0.11, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
│ └── (a=5, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
└── (a=5, N=5, Q_v=-0.12, best=-0.12, ubc=0.46)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
└── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
[17:03:16] INFO selected action 1 after 25 simulations.
INFO current action list: [1, 2, 1]
(a=1, N=30, Q_v=-0.13, best=-0.11, ubc=0.13)
├── (a=0, N=5, Q_v=-0.13, best=-0.12, ubc=0.46)
│ ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.77)
│ ├── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ └── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
├── (a=1, N=4, Q_v=-0.13, best=-0.12, ubc=0.52)
│ ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
│ └── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
├── (a=2, N=5, Q_v=-0.13, best=-0.12, ubc=0.46)
│ ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ ├── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ └── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
├── (a=3, N=5, Q_v=-0.12, best=-0.11, ubc=0.46)
│ ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.77)
│ └── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
├── (a=4, N=5, Q_v=-0.13, best=-0.12, ubc=0.46)
│ ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ └── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
└── (a=5, N=5, Q_v=-0.13, best=-0.12, ubc=0.46)
├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
├── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
└── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
[17:03:17] INFO selected action 3 after 25 simulations.
INFO current action list: [1, 2, 1, 3]
(a=3, N=30, Q_v=-0.12, best=-0.11, ubc=0.14)
├── (a=0, N=5, Q_v=-0.12, best=-0.11, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=1, N=5, Q_v=-0.12, best=-0.11, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
├── (a=2, N=5, Q_v=-0.12, best=-0.12, ubc=0.46)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
├── (a=3, N=4, Q_v=-0.12, best=-0.12, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
│ └── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
├── (a=4, N=5, Q_v=-0.12, best=-0.12, ubc=0.46)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
└── (a=5, N=5, Q_v=-0.12, best=-0.12, ubc=0.47)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
└── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
[17:03:18] INFO selected action 0 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0]
(a=0, N=30, Q_v=-0.12, best=-0.11, ubc=0.14)
├── (a=0, N=5, Q_v=-0.11, best=-0.11, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=1, N=5, Q_v=-0.12, best=-0.12, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=2, N=4, Q_v=-0.12, best=-0.12, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
│ └── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=3, N=5, Q_v=-0.11, best=-0.11, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=4, N=5, Q_v=-0.11, best=-0.11, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
└── (a=5, N=5, Q_v=-0.12, best=-0.12, ubc=0.47)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
└── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
INFO selected action 4 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4]
(a=4, N=30, Q_v=-0.11, best=-0.10, ubc=0.14)
├── (a=0, N=5, Q_v=-0.11, best=-0.11, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
│ └── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.77)
├── (a=1, N=5, Q_v=-0.11, best=-0.11, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
│ └── (a=5, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=2, N=5, Q_v=-0.11, best=-0.11, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
│ └── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=3, N=4, Q_v=-0.12, best=-0.11, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=3, N=1, Q_v=-0.14, best=-0.14, ubc=0.69)
├── (a=4, N=5, Q_v=-0.11, best=-0.10, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
└── (a=5, N=5, Q_v=-0.11, best=-0.11, ubc=0.47)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
└── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
[17:03:19] INFO selected action 4 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4]
(a=4, N=30, Q_v=-0.11, best=-0.10, ubc=0.14)
├── (a=0, N=5, Q_v=-0.11, best=-0.10, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=1, N=5, Q_v=-0.11, best=-0.10, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
├── (a=2, N=4, Q_v=-0.12, best=-0.11, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.71)
├── (a=3, N=5, Q_v=-0.11, best=-0.11, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=4, N=5, Q_v=-0.12, best=-0.11, ubc=0.47)
│ ├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ ├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
│ └── (a=4, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
└── (a=5, N=5, Q_v=-0.11, best=-0.10, ubc=0.47)
├── (a=0, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=1, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
├── (a=2, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
└── (a=3, N=1, Q_v=-0.12, best=-0.12, ubc=0.78)
[17:03:20] INFO selected action 0 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0]
(a=0, N=30, Q_v=-0.12, best=-0.10, ubc=0.14)
├── (a=0, N=5, Q_v=-0.12, best=-0.11, ubc=0.46)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ ├── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ └── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
├── (a=1, N=5, Q_v=-0.12, best=-0.10, ubc=0.46)
│ ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
│ └── (a=3, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
├── (a=2, N=4, Q_v=-0.12, best=-0.12, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
│ ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.71)
│ └── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.70)
├── (a=3, N=5, Q_v=-0.12, best=-0.11, ubc=0.46)
│ ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ ├── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.78)
├── (a=4, N=5, Q_v=-0.12, best=-0.12, ubc=0.46)
│ ├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ ├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ ├── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
│ └── (a=4, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
└── (a=5, N=5, Q_v=-0.12, best=-0.10, ubc=0.47)
├── (a=0, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
├── (a=1, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
├── (a=2, N=1, Q_v=-0.13, best=-0.13, ubc=0.77)
└── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
[17:03:21] INFO selected action 5 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5]
(a=5, N=30, Q_v=-0.10, best=-0.09, ubc=0.16)
├── (a=0, N=5, Q_v=-0.10, best=-0.09, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=1, N=4, Q_v=-0.11, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
├── (a=2, N=5, Q_v=-0.10, best=-0.09, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=3, N=5, Q_v=-0.10, best=-0.09, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=4, N=5, Q_v=-0.10, best=-0.09, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
└── (a=5, N=5, Q_v=-0.10, best=-0.09, ubc=0.48)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
[17:03:23] INFO selected action 4 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4]
(a=4, N=30, Q_v=-0.11, best=-0.09, ubc=0.15)
├── (a=0, N=5, Q_v=-0.10, best=-0.09, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=1, N=5, Q_v=-0.10, best=-0.09, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=2, N=5, Q_v=-0.11, best=-0.09, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=3, N=5, Q_v=-0.11, best=-0.09, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=4, N=4, Q_v=-0.11, best=-0.11, ubc=0.54)
│ ├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
│ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.72)
└── (a=5, N=5, Q_v=-0.10, best=-0.09, ubc=0.48)
├── (a=0, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=2, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
[17:03:24] INFO selected action 5 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5]
(a=5, N=30, Q_v=-0.09, best=-0.09, ubc=0.16)
├── (a=0, N=5, Q_v=-0.10, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=1, N=5, Q_v=-0.10, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=4, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=3, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=4, N=4, Q_v=-0.10, best=-0.09, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
└── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
[17:03:25] INFO selected action 3 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3]
(a=3, N=30, Q_v=-0.09, best=-0.09, ubc=0.17)
├── (a=0, N=4, Q_v=-0.10, best=-0.09, ubc=0.56)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=3, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
└── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
[17:03:27] INFO selected action 1 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1]
(a=1, N=30, Q_v=-0.09, best=-0.09, ubc=0.17)
├── (a=0, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=1, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=4, Q_v=-0.09, best=-0.09, ubc=0.56)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=5, N=1, Q_v=-0.11, best=-0.11, ubc=0.79)
[17:03:28] INFO selected action 4 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4]
(a=4, N=30, Q_v=-0.09, best=-0.09, ubc=0.17)
├── (a=0, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=1, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=4, Q_v=-0.09, best=-0.09, ubc=0.56)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=3, N=1, Q_v=-0.11, best=-0.11, ubc=0.73)
├── (a=3, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.79)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
[17:03:30] INFO selected action 0 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0]
(a=0, N=30, Q_v=-0.09, best=-0.09, ubc=0.16)
├── (a=0, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=1, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.56)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=5, Q_v=-0.10, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
└── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
[17:03:31] INFO selected action 2 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2]
(a=2, N=30, Q_v=-0.09, best=-0.09, ubc=0.17)
├── (a=0, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=1, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=2, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=4, Q_v=-0.10, best=-0.09, ubc=0.56)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
└── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
[17:03:33] INFO selected action 5 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5]
(a=5, N=30, Q_v=-0.09, best=-0.08, ubc=0.17)
├── (a=0, N=5, Q_v=-0.09, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=1, N=5, Q_v=-0.09, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=2, N=4, Q_v=-0.10, best=-0.09, ubc=0.56)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.74)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=4, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
└── (a=5, N=5, Q_v=-0.09, best=-0.09, ubc=0.49)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.80)
[17:03:35] INFO selected action 0 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0]
(a=0, N=30, Q_v=-0.10, best=-0.08, ubc=0.16)
├── (a=0, N=5, Q_v=-0.10, best=-0.09, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=1, N=5, Q_v=-0.10, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=2, N=5, Q_v=-0.10, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=3, N=5, Q_v=-0.10, best=-0.09, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=4, N=5, Q_v=-0.10, best=-0.09, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=5, N=4, Q_v=-0.10, best=-0.09, ubc=0.55)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
[17:03:36] INFO selected action 1 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1]
(a=1, N=30, Q_v=-0.10, best=-0.08, ubc=0.16)
├── (a=0, N=5, Q_v=-0.10, best=-0.10, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=1, N=5, Q_v=-0.09, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=2, N=5, Q_v=-0.10, best=-0.10, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=3, N=5, Q_v=-0.10, best=-0.10, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=4, N=5, Q_v=-0.10, best=-0.09, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=5, N=4, Q_v=-0.10, best=-0.10, ubc=0.55)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
[17:03:38] INFO selected action 1 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1]
(a=1, N=30, Q_v=-0.10, best=-0.08, ubc=0.16)
├── (a=0, N=5, Q_v=-0.10, best=-0.10, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=1, N=5, Q_v=-0.10, best=-0.10, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=2, N=4, Q_v=-0.10, best=-0.10, ubc=0.55)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=3, N=5, Q_v=-0.10, best=-0.10, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=4, N=5, Q_v=-0.10, best=-0.10, ubc=0.48)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=5, N=5, Q_v=-0.10, best=-0.08, ubc=0.49)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
[17:03:40] INFO selected action 5 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5]
(a=5, N=30, Q_v=-0.09, best=-0.08, ubc=0.17)
├── (a=0, N=5, Q_v=-0.09, best=-0.08, ubc=0.50)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=1, N=5, Q_v=-0.09, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=2, N=5, Q_v=-0.09, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=3, N=5, Q_v=-0.09, best=-0.08, ubc=0.50)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=4, N=5, Q_v=-0.09, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.56)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
[17:03:42] INFO selected action 0 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0]
(a=0, N=30, Q_v=-0.10, best=-0.08, ubc=0.16)
├── (a=0, N=5, Q_v=-0.10, best=-0.08, ubc=0.49)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=1, N=5, Q_v=-0.10, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=2, N=5, Q_v=-0.09, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=3, N=5, Q_v=-0.10, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=4, N=5, Q_v=-0.10, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=5, N=4, Q_v=-0.10, best=-0.08, ubc=0.56)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
[17:03:44] INFO selected action 2 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2]
(a=2, N=30, Q_v=-0.09, best=-0.08, ubc=0.17)
├── (a=0, N=5, Q_v=-0.09, best=-0.08, ubc=0.49)
│ ├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=1, N=5, Q_v=-0.09, best=-0.08, ubc=0.50)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=2, N=5, Q_v=-0.09, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=3, N=5, Q_v=-0.08, best=-0.08, ubc=0.50)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=4, N=4, Q_v=-0.09, best=-0.08, ubc=0.56)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
└── (a=5, N=5, Q_v=-0.09, best=-0.08, ubc=0.50)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
└── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
[17:03:47] INFO selected action 3 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2, 3]
(a=3, N=30, Q_v=-0.09, best=-0.08, ubc=0.16)
├── (a=0, N=4, Q_v=-0.10, best=-0.08, ubc=0.56)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=5, Q_v=-0.10, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=2, N=5, Q_v=-0.10, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=3, N=5, Q_v=-0.09, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=4, N=5, Q_v=-0.10, best=-0.08, ubc=0.49)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
│ └── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=5, N=5, Q_v=-0.09, best=-0.08, ubc=0.49)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=1, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
[17:03:49] INFO selected action 3 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2, 3, 3]
(a=3, N=30, Q_v=-0.09, best=-0.08, ubc=0.17)
├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.56)
│ ├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.75)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
├── (a=1, N=5, Q_v=-0.09, best=-0.08, ubc=0.50)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.82)
│ └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=2, N=5, Q_v=-0.09, best=-0.08, ubc=0.50)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=3, N=5, Q_v=-0.09, best=-0.08, ubc=0.50)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=4, N=5, Q_v=-0.08, best=-0.08, ubc=0.50)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
└── (a=5, N=5, Q_v=-0.09, best=-0.08, ubc=0.50)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=1, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
└── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
[17:03:51] INFO selected action 4 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2, 3, 3, 4]
(a=4, N=30, Q_v=-0.09, best=-0.08, ubc=0.17)
├── (a=0, N=4, Q_v=-0.09, best=-0.08, ubc=0.56)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.73)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.75)
├── (a=1, N=5, Q_v=-0.08, best=-0.08, ubc=0.50)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.82)
│ └── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=5, Q_v=-0.08, best=-0.08, ubc=0.50)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=3, N=5, Q_v=-0.08, best=-0.08, ubc=0.50)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.81)
├── (a=4, N=5, Q_v=-0.08, best=-0.08, ubc=0.50)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=5, N=5, Q_v=-0.09, best=-0.08, ubc=0.50)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=1, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.81)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
[17:03:54] INFO selected action 1 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2, 3, 3, 4, 1]
(a=1, N=30, Q_v=-0.09, best=-0.08, ubc=0.16)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=2, Q_v=-0.09, best=-0.08, ubc=0.58)
│ │ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.50)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.44)
│ ├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
├── (a=4, N=6, Q_v=-0.10, best=-0.09, ubc=0.44)
│ ├── (a=0, N=2, Q_v=-0.10, best=-0.10, ubc=0.57)
│ │ └── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.49)
│ ├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ ├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
│ └── (a=5, N=1, Q_v=-0.10, best=-0.10, ubc=0.85)
└── (a=5, N=5, Q_v=-0.10, best=-0.09, ubc=0.49)
├── (a=0, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=2, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
├── (a=3, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
└── (a=4, N=1, Q_v=-0.10, best=-0.10, ubc=0.80)
[17:03:56] INFO selected action 3 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2, 3, 3, 4, 1, 3]
(a=3, N=31, Q_v=-0.09, best=-0.08, ubc=0.17)
├── (a=0, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.58)
│ │ └── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.50)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ │ └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.51)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.51)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=6, Q_v=-0.09, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
└── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
[17:03:58] INFO selected action 2 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2, 3, 3, 4, 1, 3, 2]
(a=2, N=31, Q_v=-0.08, best=-0.08, ubc=0.17)
├── (a=0, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=4, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.50)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
├── (a=3, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ ├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.58)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.50)
│ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ ├── (a=4, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
│ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.59)
│ │ └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.51)
│ ├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
│ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=6, Q_v=-0.08, best=-0.08, ubc=0.45)
├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
├── (a=4, N=1, Q_v=-0.08, best=-0.08, ubc=0.86)
└── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.86)
[17:04:01] INFO selected action 4 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2, 3, 3, 4, 1, 3, 2, 4]
(a=4, N=31, Q_v=-0.09, best=-0.08, ubc=0.17)
├── (a=0, N=7, Q_v=-0.09, best=-0.09, ubc=0.41)
│ ├── (a=2, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
│ │ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.50)
│ ├── (a=3, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.50)
│ └── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.50)
├── (a=2, N=8, Q_v=-0.08, best=-0.08, ubc=0.38)
│ ├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ │ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.51)
│ ├── (a=2, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ │ └── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.51)
│ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
│ │ └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.50)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
├── (a=3, N=8, Q_v=-0.08, best=-0.08, ubc=0.38)
│ ├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.64)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.50)
│ ├── (a=2, N=3, Q_v=-0.08, best=-0.08, ubc=0.50)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.66)
│ │ └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.66)
│ └── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.50)
└── (a=5, N=7, Q_v=-0.09, best=-0.08, ubc=0.41)
├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.50)
├── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.90)
└── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.61)
└── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.50)
[17:04:04] INFO selected action 2 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2, 3, 3, 4, 1, 3, 2, 4, 2]
(a=2, N=33, Q_v=-0.09, best=-0.08, ubc=0.16)
├── (a=0, N=8, Q_v=-0.09, best=-0.08, ubc=0.38)
│ ├── (a=2, N=2, Q_v=-0.09, best=-0.08, ubc=0.64)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.50)
│ ├── (a=3, N=3, Q_v=-0.09, best=-0.08, ubc=0.50)
│ │ ├── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.66)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.65)
│ └── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
│ └── (a=2, N=1, Q_v=-0.09, best=-0.09, ubc=0.50)
├── (a=2, N=8, Q_v=-0.08, best=-0.08, ubc=0.38)
│ ├── (a=0, N=3, Q_v=-0.08, best=-0.08, ubc=0.50)
│ │ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.65)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.66)
│ ├── (a=3, N=2, Q_v=-0.09, best=-0.08, ubc=0.64)
│ │ └── (a=5, N=1, Q_v=-0.08, best=-0.08, ubc=0.50)
│ └── (a=5, N=2, Q_v=-0.09, best=-0.08, ubc=0.64)
│ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.50)
├── (a=3, N=8, Q_v=-0.09, best=-0.08, ubc=0.38)
│ ├── (a=0, N=2, Q_v=-0.09, best=-0.08, ubc=0.64)
│ │ └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.50)
│ ├── (a=2, N=3, Q_v=-0.09, best=-0.08, ubc=0.50)
│ │ ├── (a=0, N=1, Q_v=-0.08, best=-0.08, ubc=0.66)
│ │ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.65)
│ └── (a=5, N=2, Q_v=-0.09, best=-0.08, ubc=0.64)
│ └── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.50)
└── (a=5, N=8, Q_v=-0.09, best=-0.08, ubc=0.38)
├── (a=0, N=2, Q_v=-0.09, best=-0.09, ubc=0.63)
│ └── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.50)
├── (a=2, N=2, Q_v=-0.09, best=-0.08, ubc=0.64)
│ └── (a=3, N=1, Q_v=-0.08, best=-0.08, ubc=0.50)
├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.93)
└── (a=5, N=2, Q_v=-0.08, best=-0.08, ubc=0.64)
└── (a=2, N=1, Q_v=-0.08, best=-0.08, ubc=0.50)
[17:04:07] INFO selected action 2 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2, 3, 3, 4, 1, 3, 2, 4, 2, 2]
(a=2, N=33, Q_v=-0.08, best=-0.08, ubc=0.16)
├── (a=0, N=11, Q_v=-0.08, best=-0.08, ubc=0.31)
│ ├── (a=3, N=5, Q_v=-0.09, best=-0.08, ubc=0.40)
│ │ └── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.36)
│ └── (a=5, N=5, Q_v=-0.08, best=-0.08, ubc=0.40)
│ ├── (a=3, N=2, Q_v=-0.08, best=-0.08, ubc=0.55)
│ └── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.55)
├── (a=3, N=11, Q_v=-0.09, best=-0.08, ubc=0.31)
│ ├── (a=0, N=5, Q_v=-0.08, best=-0.08, ubc=0.40)
│ │ └── (a=5, N=4, Q_v=-0.09, best=-0.08, ubc=0.36)
│ └── (a=5, N=5, Q_v=-0.08, best=-0.08, ubc=0.40)
│ ├── (a=0, N=2, Q_v=-0.08, best=-0.08, ubc=0.55)
│ └── (a=5, N=2, Q_v=-0.09, best=-0.09, ubc=0.55)
└── (a=5, N=10, Q_v=-0.09, best=-0.08, ubc=0.33)
├── (a=0, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
│ ├── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.66)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.66)
├── (a=3, N=3, Q_v=-0.08, best=-0.08, ubc=0.53)
│ ├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.66)
│ └── (a=5, N=1, Q_v=-0.09, best=-0.09, ubc=0.66)
└── (a=5, N=3, Q_v=-0.09, best=-0.08, ubc=0.53)
├── (a=0, N=1, Q_v=-0.09, best=-0.09, ubc=0.66)
└── (a=3, N=1, Q_v=-0.09, best=-0.09, ubc=0.66)
[17:04:09] INFO selected action 0 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2, 3, 3, 4, 1, 3, 2, 4, 2, 2, 0]
(a=0, N=36, Q_v=-0.09, best=-0.08, ubc=0.15)
├── (a=3, N=17, Q_v=-0.09, best=-0.08, ubc=0.24)
│ └── (a=5, N=16, Q_v=-0.09, best=-0.08, ubc=0.21)
│ └── (a=5, N=15, Q_v=-0.09, best=-0.09, ubc=0.22)
└── (a=5, N=18, Q_v=-0.08, best=-0.08, ubc=0.23)
├── (a=3, N=9, Q_v=-0.09, best=-0.08, ubc=0.32)
│ └── (a=5, N=8, Q_v=-0.09, best=-0.09, ubc=0.29)
└── (a=5, N=8, Q_v=-0.09, best=-0.09, ubc=0.34)
└── (a=3, N=7, Q_v=-0.09, best=-0.09, ubc=0.30)
[17:04:11] INFO selected action 5 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2, 3, 3, 4, 1, 3, 2, 4, 2, 2, 0, 5]
(a=5, N=43, Q_v=-0.09, best=-0.08, ubc=0.13)
├── (a=3, N=21, Q_v=-0.09, best=-0.08, ubc=0.21)
│ └── (a=5, N=20, Q_v=-0.09, best=-0.09, ubc=0.19)
└── (a=5, N=21, Q_v=-0.09, best=-0.09, ubc=0.21)
└── (a=3, N=20, Q_v=-0.09, best=-0.09, ubc=0.19)
[17:04:12] INFO selected action 3 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2, 3, 3, 4, 1, 3, 2, 4, 2, 2, 0, 5, 3]
(a=3, N=46, Q_v=-0.09, best=-0.08, ubc=0.13)
└── (a=5, N=45, Q_v=-0.09, best=-0.09, ubc=0.12)
[17:04:13] INFO selected action 5 after 25 simulations.
INFO current action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2, 3, 3, 4, 1, 3, 2, 4, 2, 2, 0, 5, 3, 5]
INFO Final action list: [1, 2, 1, 3, 0, 4, 4, 0, 5, 4, 5, 3, 1, 4, 0, 2, 5, 0, 1, 1, 5, 0, 2, 3, 3, 4, 1, 3, 2, 4, 2, 2, 0, 5, 3, 5]
╔═══════════════════════════════════════════════════════╗
Job 0 ║ ████ █████████████ ████████ ║ Machine 0 █
Job 1 ║██████████████████████ ███████████████████████║ Machine 1 █
Job 2 ║████████████████ ██████████ ██████ ║ Machine 2 █
Job 3 ║ █████████ █████ ██████████ █████████ ║ Machine 3 █
Job 4 ║ ███████████████████ ████ ║ Machine 4 █
Job 5 ║ ███████████████████████ █████║ Machine 5 █
╚╦════╤════╤════╤════╤════╦════╤════╤════╤════╤════╦════╝
0.0 26.8 53.6
Makespan: 58