JSSEnv (Deepcopy Wrapper)¶
[1]:
from jsp_vis.console import gantt_chart_console
[2]:
from gymcts.gymcts_agent import GymctsAgent
from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
from gymnasium.wrappers import TransformReward, NormalizeReward
from gymcts.logger import log
[3]:
import gymnasium as gym
from typing import Any
[4]:
import bisect
import datetime
import random
[5]:
import pandas as pd
import numpy as np
import plotly.figure_factory as ff
from pathlib import Path
[6]:
class JssEnv(gym.Env):
def __init__(self, env_config=None):
"""
This environment model the job shop scheduling problem as a single agent problem:
-The actions correspond to a job allocation + one action for no allocation at this time step (NOPE action)
-We keep a time with next possible time steps
-Each time we allocate a job, the end of the job is added to the stack of time steps
-If we don't have a legal action (i.e. we can't allocate a job),
we automatically go to the next time step until we have a legal action
-
:param env_config: Ray dictionary of config parameter
"""
if env_config is None:
env_config = {
"instance_path": Path(__file__).parent.absolute() / "instances" / "ta80"
}
instance_path = env_config["instance_path"]
# initial values for variables used for instance
self.jobs = 0
self.machines = 0
self.instance_matrix = None
self.jobs_length = None
self.max_time_op = 0
self.max_time_jobs = 0
self.nb_legal_actions = 0
self.nb_machine_legal = 0
# initial values for variables used for solving (to reinitialize when reset() is called)
self.solution = None
self.last_solution = None
self.last_time_step = float("inf")
self.current_time_step = float("inf")
self.next_time_step = list()
self.next_jobs = list()
self.legal_actions = None
self.time_until_available_machine = None
self.time_until_finish_current_op_jobs = None
self.todo_time_step_job = None
self.total_perform_op_time_jobs = None
self.needed_machine_jobs = None
self.total_idle_time_jobs = None
self.idle_time_jobs_last_op = None
self.state = None
self.illegal_actions = None
self.action_illegal_no_op = None
self.machine_legal = None
# initial values for variables used for representation
self.start_timestamp = datetime.datetime.now().timestamp()
self.sum_op = 0
with open(instance_path, "r") as instance_file:
for line_cnt, line_str in enumerate(instance_file, start=1):
split_data = list(map(int, line_str.split()))
if line_cnt == 1:
self.jobs, self.machines = split_data
self.instance_matrix = np.zeros((self.jobs, self.machines), dtype=(int, 2))
self.jobs_length = np.zeros(self.jobs, dtype=int)
else:
assert len(split_data) % 2 == 0 and len(split_data) // 2 == self.machines
job_nb = line_cnt - 2
for i in range(0, len(split_data), 2):
machine, time = split_data[i], split_data[i + 1]
self.instance_matrix[job_nb][i // 2] = (machine, time)
self.max_time_op = max(self.max_time_op, time)
self.jobs_length[job_nb] += time
self.sum_op += time
self.max_time_jobs = max(self.jobs_length)
# check the parsed data are correct
assert self.max_time_op > 0
assert self.max_time_jobs > 0
assert self.jobs > 0
assert self.machines > 1, "We need at least 2 machines"
assert self.instance_matrix is not None
# allocate a job + one to wait
self.action_space = gym.spaces.Discrete(self.jobs + 1)
# used for plotting
self.colors = [
tuple([random.random() for _ in range(3)]) for _ in range(self.machines)
]
"""
matrix with the following attributes for each job:
-Legal job
-Left over time on the current op
-Current operation %
-Total left over time
-When next machine available
-Time since IDLE: 0 if not available, time otherwise
-Total IDLE time in the schedule
"""
self.observation_space = gym.spaces.Dict(
{
"action_mask": gym.spaces.Box(0, 1, shape=(self.jobs + 1,)),
"real_obs": gym.spaces.Box(
low=0.0, high=1.0, shape=(self.jobs, 7), dtype=float
),
}
)
def _get_current_state_representation(self):
self.state[:, 0] = self.legal_actions[:-1]
return {
"real_obs": self.state,
"action_mask": self.legal_actions,
}
def get_legal_actions(self):
return self.legal_actions
def reset(self, seed=None, options=None):
self.current_time_step = 0
self.next_time_step = list()
self.next_jobs = list()
self.nb_legal_actions = self.jobs
self.nb_machine_legal = 0
# represent all the legal actions
self.legal_actions = np.ones(self.jobs + 1, dtype=bool)
self.legal_actions[self.jobs] = False
# used to represent the solution
self.solution = np.full((self.jobs, self.machines), -1, dtype=int)
self.time_until_available_machine = np.zeros(self.machines, dtype=int)
self.time_until_finish_current_op_jobs = np.zeros(self.jobs, dtype=int)
self.todo_time_step_job = np.zeros(self.jobs, dtype=int)
self.total_perform_op_time_jobs = np.zeros(self.jobs, dtype=int)
self.needed_machine_jobs = np.zeros(self.jobs, dtype=int)
self.total_idle_time_jobs = np.zeros(self.jobs, dtype=int)
self.idle_time_jobs_last_op = np.zeros(self.jobs, dtype=int)
self.illegal_actions = np.zeros((self.machines, self.jobs), dtype=bool)
self.action_illegal_no_op = np.zeros(self.jobs, dtype=bool)
self.machine_legal = np.zeros(self.machines, dtype=bool)
for job in range(self.jobs):
needed_machine = self.instance_matrix[job][0][0]
self.needed_machine_jobs[job] = needed_machine
if not self.machine_legal[needed_machine]:
self.machine_legal[needed_machine] = True
self.nb_machine_legal += 1
self.state = np.zeros((self.jobs, 7), dtype=float)
return self._get_current_state_representation(), {}
def _prioritization_non_final(self):
if self.nb_machine_legal >= 1:
for machine in range(self.machines):
if self.machine_legal[machine]:
final_job = list()
non_final_job = list()
min_non_final = float("inf")
for job in range(self.jobs):
if (
self.needed_machine_jobs[job] == machine
and self.legal_actions[job]
):
if self.todo_time_step_job[job] == (self.machines - 1):
final_job.append(job)
else:
current_time_step_non_final = self.todo_time_step_job[
job
]
time_needed_legal = self.instance_matrix[job][
current_time_step_non_final
][1]
machine_needed_nextstep = self.instance_matrix[job][
current_time_step_non_final + 1
][0]
if (
self.time_until_available_machine[
machine_needed_nextstep
]
== 0
):
min_non_final = min(
min_non_final, time_needed_legal
)
non_final_job.append(job)
if len(non_final_job) > 0:
for job in final_job:
current_time_step_final = self.todo_time_step_job[job]
time_needed_legal = self.instance_matrix[job][
current_time_step_final
][1]
if time_needed_legal > min_non_final:
self.legal_actions[job] = False
self.nb_legal_actions -= 1
def _check_no_op(self):
self.legal_actions[self.jobs] = False
if (
len(self.next_time_step) > 0
and self.nb_machine_legal <= 3
and self.nb_legal_actions <= 4
):
machine_next = set()
next_time_step = self.next_time_step[0]
max_horizon = self.current_time_step
max_horizon_machine = [
self.current_time_step + self.max_time_op for _ in range(self.machines)
]
for job in range(self.jobs):
if self.legal_actions[job]:
time_step = self.todo_time_step_job[job]
machine_needed = self.instance_matrix[job][time_step][0]
time_needed = self.instance_matrix[job][time_step][1]
end_job = self.current_time_step + time_needed
if end_job < next_time_step:
return
max_horizon_machine[machine_needed] = min(
max_horizon_machine[machine_needed], end_job
)
max_horizon = max(max_horizon, max_horizon_machine[machine_needed])
for job in range(self.jobs):
if not self.legal_actions[job]:
if (
self.time_until_finish_current_op_jobs[job] > 0
and self.todo_time_step_job[job] + 1 < self.machines
):
time_step = self.todo_time_step_job[job] + 1
time_needed = (
self.current_time_step
+ self.time_until_finish_current_op_jobs[job]
)
while (
time_step < self.machines - 1 and max_horizon > time_needed
):
machine_needed = self.instance_matrix[job][time_step][0]
if (
max_horizon_machine[machine_needed] > time_needed
and self.machine_legal[machine_needed]
):
machine_next.add(machine_needed)
if len(machine_next) == self.nb_machine_legal:
self.legal_actions[self.jobs] = True
return
time_needed += self.instance_matrix[job][time_step][1]
time_step += 1
elif (
not self.action_illegal_no_op[job]
and self.todo_time_step_job[job] < self.machines
):
time_step = self.todo_time_step_job[job]
machine_needed = self.instance_matrix[job][time_step][0]
time_needed = (
self.current_time_step
+ self.time_until_available_machine[machine_needed]
)
while (
time_step < self.machines - 1 and max_horizon > time_needed
):
machine_needed = self.instance_matrix[job][time_step][0]
if (
max_horizon_machine[machine_needed] > time_needed
and self.machine_legal[machine_needed]
):
machine_next.add(machine_needed)
if len(machine_next) == self.nb_machine_legal:
self.legal_actions[self.jobs] = True
return
time_needed += self.instance_matrix[job][time_step][1]
time_step += 1
def step(self, action: int):
reward = 0.0
if action == self.jobs:
self.nb_machine_legal = 0
self.nb_legal_actions = 0
for job in range(self.jobs):
if self.legal_actions[job]:
self.legal_actions[job] = False
needed_machine = self.needed_machine_jobs[job]
self.machine_legal[needed_machine] = False
self.illegal_actions[needed_machine][job] = True
self.action_illegal_no_op[job] = True
while self.nb_machine_legal == 0:
reward -= self.increase_time_step()
scaled_reward = self._reward_scaler(reward)
self._prioritization_non_final()
self._check_no_op()
return (
self._get_current_state_representation(),
scaled_reward,
self._is_done(),
False,
{},
)
else:
current_time_step_job = self.todo_time_step_job[action]
machine_needed = self.needed_machine_jobs[action]
time_needed = self.instance_matrix[action][current_time_step_job][1]
reward += time_needed
self.time_until_available_machine[machine_needed] = time_needed
self.time_until_finish_current_op_jobs[action] = time_needed
self.state[action][1] = time_needed / self.max_time_op
to_add_time_step = self.current_time_step + time_needed
if to_add_time_step not in self.next_time_step:
index = bisect.bisect_left(self.next_time_step, to_add_time_step)
self.next_time_step.insert(index, to_add_time_step)
self.next_jobs.insert(index, action)
self.solution[action][current_time_step_job] = self.current_time_step
for job in range(self.jobs):
if (
self.needed_machine_jobs[job] == machine_needed
and self.legal_actions[job]
):
self.legal_actions[job] = False
self.nb_legal_actions -= 1
self.nb_machine_legal -= 1
self.machine_legal[machine_needed] = False
for job in range(self.jobs):
if self.illegal_actions[machine_needed][job]:
self.action_illegal_no_op[job] = False
self.illegal_actions[machine_needed][job] = False
# if we can't allocate new job in the current timestep, we pass to the next one
while self.nb_machine_legal == 0 and len(self.next_time_step) > 0:
reward -= self.increase_time_step()
self._prioritization_non_final()
self._check_no_op()
# we then need to scale the reward
scaled_reward = self._reward_scaler(reward)
return (
self._get_current_state_representation(),
scaled_reward,
self._is_done(),
False,
{},
)
def _reward_scaler(self, reward):
return reward / self.max_time_op
def increase_time_step(self):
"""
The heart of the logic his here, we need to increase every counter when we have a nope action called
and return the time elapsed
:return: time elapsed
"""
hole_planning = 0
next_time_step_to_pick = self.next_time_step.pop(0)
self.next_jobs.pop(0)
difference = next_time_step_to_pick - self.current_time_step
self.current_time_step = next_time_step_to_pick
for job in range(self.jobs):
was_left_time = self.time_until_finish_current_op_jobs[job]
if was_left_time > 0:
performed_op_job = min(difference, was_left_time)
self.time_until_finish_current_op_jobs[job] = max(
0, self.time_until_finish_current_op_jobs[job] - difference
)
self.state[job][1] = (
self.time_until_finish_current_op_jobs[job] / self.max_time_op
)
self.total_perform_op_time_jobs[job] += performed_op_job
self.state[job][3] = (
self.total_perform_op_time_jobs[job] / self.max_time_jobs
)
if self.time_until_finish_current_op_jobs[job] == 0:
self.total_idle_time_jobs[job] += difference - was_left_time
self.state[job][6] = self.total_idle_time_jobs[job] / self.sum_op
self.idle_time_jobs_last_op[job] = difference - was_left_time
self.state[job][5] = self.idle_time_jobs_last_op[job] / self.sum_op
self.todo_time_step_job[job] += 1
self.state[job][2] = self.todo_time_step_job[job] / self.machines
if self.todo_time_step_job[job] < self.machines:
self.needed_machine_jobs[job] = self.instance_matrix[job][
self.todo_time_step_job[job]
][0]
self.state[job][4] = (
max(
0,
self.time_until_available_machine[
self.needed_machine_jobs[job]
]
- difference,
)
/ self.max_time_op
)
else:
self.needed_machine_jobs[job] = -1
# this allow to have 1 is job is over (not 0 because, 0 strongly indicate that the job is a
# good candidate)
self.state[job][4] = 1.0
if self.legal_actions[job]:
self.legal_actions[job] = False
self.nb_legal_actions -= 1
elif self.todo_time_step_job[job] < self.machines:
self.total_idle_time_jobs[job] += difference
self.idle_time_jobs_last_op[job] += difference
self.state[job][5] = self.idle_time_jobs_last_op[job] / self.sum_op
self.state[job][6] = self.total_idle_time_jobs[job] / self.sum_op
for machine in range(self.machines):
if self.time_until_available_machine[machine] < difference:
empty = difference - self.time_until_available_machine[machine]
hole_planning += empty
self.time_until_available_machine[machine] = max(
0, self.time_until_available_machine[machine] - difference
)
if self.time_until_available_machine[machine] == 0:
for job in range(self.jobs):
if (
self.needed_machine_jobs[job] == machine
and not self.legal_actions[job]
and not self.illegal_actions[machine][job]
):
self.legal_actions[job] = True
self.nb_legal_actions += 1
if not self.machine_legal[machine]:
self.machine_legal[machine] = True
self.nb_machine_legal += 1
return hole_planning
def _is_done(self):
if self.nb_legal_actions == 0:
self.last_time_step = self.current_time_step
self.last_solution = self.solution
return True
return False
def render(self, mode="human"):
df = []
for job in range(self.jobs):
i = 0
while i < self.machines and self.solution[job][i] != -1:
dict_op = dict()
dict_op["Task"] = "Job {}".format(job)
start_sec = self.solution[job][i]
finish_sec = start_sec + self.instance_matrix[job][i][1]
dict_op["Start"] = start_sec
dict_op["Finish"] = finish_sec
dict_op["Resource"] = "Machine {}".format(
self.instance_matrix[job][i][0]
)
df.append(dict_op)
i += 1
fig = None
if len(df) > 0:
df = pd.DataFrame(df)
fig = ff.create_gantt(
df,
index_col="Resource",
colors=self.colors,
show_colorbar=True,
group_tasks=True,
)
fig.update_yaxes(
autorange="reversed"
) # otherwise tasks are listed from the bottom up
gantt_chart_console(df, n_machines=self.machines)
return fig
[7]:
class JSSEnvRewardWrapper(gym.Wrapper):
def __init__(self, env: gym.Env, lower_bound):
super().__init__(env)
self.lower_bound = lower_bound
def step(self, action: Any) -> tuple[Any, float, bool, bool, dict]:
observation, reward, done, truncated, info = self.env.step(action)
# Add custom step logic here
reward = -env.unwrapped.last_time_step / self.lower_bound + 2 if self.env.unwrapped._is_done() else 0.0
return observation, reward, done, truncated, info
[8]:
if __name__ == '__main__':
log.setLevel(20)
jsp_std_path = "ft06.txt"
gym.envs.registration.register(
id="jss-v1",
entry_point="JSSEnv.envs:JssEnv",
)
env = JssEnv(env_config={'instance_path': jsp_std_path})
env.reset()
env = NormalizeReward(env, gamma=0.99, epsilon=1e-8)
env = TransformReward(env, lambda r: r / 36)
# env = JSSEnvRewardWrapper(env, lower_bound=55.0)
def mask_fn(env: gym.Env) -> np.ndarray:
# Do whatever you'd like in this function to return the action mask
# for the current env. In this example, we assume the env has a
# helpful method we can rely on.
return env.unwrapped.legal_actions
env = DeepCopyMCTSGymEnvWrapper(
env,
action_mask_fn=mask_fn
)
agent = GymctsAgent(
env=env,
clear_mcts_tree_after_step=False,
render_tree_after_step=True,
exclude_unvisited_nodes_from_render=True,
number_of_simulations_per_step=125,
)
root = agent.search_root_node.get_root()
actions = agent.solve(render_tree_after_step=True)
env.reset()
for a in actions:
obs, rew, term, trun, info = env.step(a)
env.unwrapped.render(mode="human")
print(f"makespan: {env.unwrapped.last_time_step}")
(N=125, Q_v=1.65, best=2.00)
├── (a=0, N=2, Q_v=0.44, best=0.46, ubc=1.54)
│ └── (a=3, N=1, Q_v=0.46, best=0.46, ubc=1.05)
├── (a=1, N=31, Q_v=1.65, best=1.83, ubc=1.93)
│ ├── (a=0, N=10, Q_v=1.66, best=1.79, ubc=2.07)
│ │ ├── (a=0, N=3, Q_v=1.64, best=1.74, ubc=2.26)
│ │ ├── (a=2, N=3, Q_v=1.75, best=1.79, ubc=2.36)
│ │ └── (a=4, N=3, Q_v=1.63, best=1.66, ubc=2.24)
│ ├── (a=2, N=10, Q_v=1.67, best=1.83, ubc=2.09)
│ │ ├── (a=0, N=3, Q_v=1.69, best=1.83, ubc=2.31)
│ │ ├── (a=2, N=3, Q_v=1.69, best=1.81, ubc=2.31)
│ │ └── (a=4, N=3, Q_v=1.64, best=1.71, ubc=2.26)
│ └── (a=4, N=10, Q_v=1.64, best=1.78, ubc=2.05)
│ ├── (a=3, N=3, Q_v=1.58, best=1.72, ubc=2.19)
│ ├── (a=5, N=3, Q_v=1.70, best=1.78, ubc=2.32)
│ └── (a=6, N=3, Q_v=1.69, best=1.77, ubc=2.31)
├── (a=2, N=6, Q_v=1.27, best=1.36, ubc=1.90)
│ ├── (a=1, N=2, Q_v=1.23, best=1.36, ubc=1.90)
│ │ └── (a=0, N=1, Q_v=1.10, best=1.10, ubc=1.69)
│ ├── (a=3, N=1, Q_v=1.19, best=1.19, ubc=2.13)
│ └── (a=5, N=2, Q_v=1.33, best=1.33, ubc=2.00)
│ └── (a=5, N=1, Q_v=1.33, best=1.33, ubc=1.92)
├── (a=3, N=5, Q_v=1.21, best=1.29, ubc=1.91)
│ ├── (a=0, N=1, Q_v=1.16, best=1.16, ubc=2.05)
│ ├── (a=2, N=1, Q_v=1.29, best=1.29, ubc=2.19)
│ └── (a=4, N=2, Q_v=1.25, best=1.29, ubc=1.88)
│ └── (a=1, N=1, Q_v=1.20, best=1.20, ubc=1.79)
├── (a=4, N=78, Q_v=1.77, best=2.00, ubc=1.94)
│ ├── (a=1, N=24, Q_v=1.75, best=1.92, ubc=2.06)
│ │ ├── (a=3, N=7, Q_v=1.75, best=1.92, ubc=2.23)
│ │ ├── (a=5, N=10, Q_v=1.79, best=1.90, ubc=2.19)
│ │ └── (a=6, N=6, Q_v=1.72, best=1.81, ubc=2.23)
│ ├── (a=3, N=21, Q_v=1.74, best=1.96, ubc=2.06)
│ │ ├── (a=1, N=5, Q_v=1.67, best=1.72, ubc=2.22)
│ │ ├── (a=3, N=8, Q_v=1.77, best=1.96, ubc=2.21)
│ │ └── (a=5, N=7, Q_v=1.76, best=1.87, ubc=2.23)
│ └── (a=5, N=32, Q_v=1.80, best=2.00, ubc=2.06)
│ ├── (a=1, N=8, Q_v=1.72, best=1.91, ubc=2.18)
│ ├── (a=3, N=10, Q_v=1.80, best=1.95, ubc=2.22)
│ └── (a=5, N=13, Q_v=1.84, best=2.00, ubc=2.20)
└── (a=5, N=2, Q_v=0.71, best=0.73, ubc=1.81)
└── (a=4, N=1, Q_v=0.73, best=0.73, ubc=1.32)
[17:00:10] INFO selected action 4 after 125 simulations.
INFO current action list: [4]
(a=4, N=203, Q_v=1.80, best=2.00, ubc=1.92)
├── (a=1, N=55, Q_v=1.77, best=1.95, ubc=1.99)
│ ├── (a=3, N=17, Q_v=1.75, best=1.92, ubc=2.10)
│ │ ├── (a=0, N=5, Q_v=1.70, best=1.83, ubc=2.24)
│ │ ├── (a=1, N=4, Q_v=1.65, best=1.70, ubc=2.24)
│ │ └── (a=2, N=7, Q_v=1.87, best=1.92, ubc=2.32)
│ ├── (a=5, N=23, Q_v=1.82, best=1.95, ubc=2.12)
│ │ ├── (a=0, N=6, Q_v=1.76, best=1.86, ubc=2.27)
│ │ ├── (a=1, N=8, Q_v=1.86, best=1.95, ubc=2.30)
│ │ └── (a=2, N=8, Q_v=1.84, best=1.94, ubc=2.28)
│ └── (a=6, N=14, Q_v=1.73, best=1.88, ubc=2.11)
│ ├── (a=0, N=3, Q_v=1.76, best=1.83, ubc=2.42)
│ ├── (a=1, N=4, Q_v=1.72, best=1.85, ubc=2.30)
│ ├── (a=2, N=3, Q_v=1.71, best=1.88, ubc=2.37)
│ └── (a=4, N=3, Q_v=1.72, best=1.81, ubc=2.39)
├── (a=3, N=49, Q_v=1.76, best=1.96, ubc=2.00)
│ ├── (a=1, N=11, Q_v=1.70, best=1.84, ubc=2.12)
│ │ └── (a=3, N=10, Q_v=1.70, best=1.84, ubc=2.05)
│ ├── (a=3, N=17, Q_v=1.77, best=1.96, ubc=2.11)
│ │ ├── (a=1, N=5, Q_v=1.67, best=1.86, ubc=2.20)
│ │ └── (a=5, N=11, Q_v=1.83, best=1.96, ubc=2.19)
│ └── (a=5, N=20, Q_v=1.80, best=1.92, ubc=2.11)
│ └── (a=3, N=19, Q_v=1.80, best=1.92, ubc=2.08)
└── (a=5, N=98, Q_v=1.83, best=2.00, ubc=2.00)
├── (a=1, N=28, Q_v=1.81, best=1.97, ubc=2.10)
│ └── (a=5, N=27, Q_v=1.82, best=1.97, ubc=2.07)
├── (a=3, N=33, Q_v=1.83, best=1.95, ubc=2.10)
│ └── (a=5, N=32, Q_v=1.84, best=1.95, ubc=2.07)
└── (a=5, N=36, Q_v=1.85, best=2.00, ubc=2.10)
├── (a=1, N=20, Q_v=1.87, best=2.00, ubc=2.17)
└── (a=3, N=15, Q_v=1.81, best=1.98, ubc=2.16)
[17:00:11] INFO selected action 5 after 125 simulations.
INFO current action list: [4, 5]
(a=5, N=223, Q_v=1.84, best=2.01, ubc=1.96)
├── (a=1, N=64, Q_v=1.83, best=2.00, ubc=2.03)
│ └── (a=5, N=63, Q_v=1.83, best=2.00, ubc=2.01)
│ └── (a=5, N=62, Q_v=1.83, best=2.00, ubc=2.02)
├── (a=3, N=76, Q_v=1.84, best=1.99, ubc=2.03)
│ └── (a=5, N=75, Q_v=1.85, best=1.99, ubc=2.02)
│ └── (a=5, N=74, Q_v=1.84, best=1.99, ubc=2.02)
└── (a=5, N=82, Q_v=1.85, best=2.01, ubc=2.03)
├── (a=1, N=46, Q_v=1.87, best=2.01, ubc=2.09)
│ └── (a=5, N=45, Q_v=1.87, best=2.01, ubc=2.08)
└── (a=3, N=35, Q_v=1.84, best=1.98, ubc=2.09)
└── (a=5, N=34, Q_v=1.83, best=1.98, ubc=2.06)
INFO selected action 5 after 125 simulations.
INFO current action list: [4, 5, 5]
(a=5, N=207, Q_v=1.85, best=2.01, ubc=1.97)
├── (a=1, N=115, Q_v=1.86, best=2.01, ubc=2.01)
│ └── (a=5, N=114, Q_v=1.86, best=2.01, ubc=2.00)
│ ├── (a=0, N=76, Q_v=1.88, best=2.01, ubc=2.06)
│ └── (a=2, N=37, Q_v=1.80, best=2.00, ubc=2.06)
└── (a=3, N=91, Q_v=1.84, best=1.99, ubc=2.01)
└── (a=5, N=90, Q_v=1.84, best=1.99, ubc=2.00)
├── (a=1, N=31, Q_v=1.84, best=1.98, ubc=2.11)
├── (a=3, N=27, Q_v=1.82, best=1.99, ubc=2.11)
└── (a=6, N=31, Q_v=1.85, best=1.98, ubc=2.12)
INFO selected action 1 after 125 simulations.
INFO current action list: [4, 5, 5, 1]
(a=1, N=240, Q_v=1.87, best=2.02, ubc=1.98)
└── (a=5, N=239, Q_v=1.87, best=2.02, ubc=1.98)
├── (a=0, N=166, Q_v=1.89, best=2.02, ubc=2.02)
│ ├── (a=0, N=70, Q_v=1.88, best=2.01, ubc=2.07)
│ └── (a=2, N=95, Q_v=1.90, best=2.02, ubc=2.07)
└── (a=2, N=72, Q_v=1.82, best=2.00, ubc=2.02)
├── (a=3, N=29, Q_v=1.79, best=2.00, ubc=2.07)
└── (a=4, N=42, Q_v=1.85, best=1.99, ubc=2.07)
[17:00:12] INFO selected action 5 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5]
(a=5, N=364, Q_v=1.87, best=2.02, ubc=1.96)
├── (a=0, N=265, Q_v=1.89, best=2.02, ubc=2.00)
│ ├── (a=0, N=111, Q_v=1.88, best=2.01, ubc=2.04)
│ │ ├── (a=2, N=57, Q_v=1.88, best=2.01, ubc=2.08)
│ │ └── (a=6, N=53, Q_v=1.87, best=2.01, ubc=2.09)
│ └── (a=2, N=153, Q_v=1.90, best=2.02, ubc=2.04)
│ └── (a=0, N=152, Q_v=1.90, best=2.02, ubc=2.03)
└── (a=2, N=98, Q_v=1.82, best=2.00, ubc=1.99)
├── (a=3, N=38, Q_v=1.80, best=2.00, ubc=2.04)
│ ├── (a=0, N=12, Q_v=1.77, best=1.89, ubc=2.16)
│ ├── (a=1, N=13, Q_v=1.81, best=1.97, ubc=2.19)
│ └── (a=2, N=12, Q_v=1.81, best=2.00, ubc=2.19)
└── (a=4, N=59, Q_v=1.84, best=1.99, ubc=2.04)
├── (a=0, N=10, Q_v=1.81, best=1.97, ubc=2.26)
├── (a=1, N=15, Q_v=1.88, best=1.98, ubc=2.24)
├── (a=2, N=10, Q_v=1.81, best=1.93, ubc=2.27)
├── (a=3, N=10, Q_v=1.82, best=1.99, ubc=2.27)
└── (a=4, N=13, Q_v=1.86, best=1.96, ubc=2.26)
INFO selected action 0 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0]
(a=0, N=390, Q_v=1.89, best=2.02, ubc=1.98)
├── (a=0, N=171, Q_v=1.88, best=2.02, ubc=2.01)
│ ├── (a=2, N=95, Q_v=1.89, best=2.02, ubc=2.05)
│ │ ├── (a=3, N=29, Q_v=1.88, best=2.01, ubc=2.16)
│ │ ├── (a=4, N=41, Q_v=1.93, best=2.02, ubc=2.16)
│ │ └── (a=6, N=24, Q_v=1.85, best=1.99, ubc=2.15)
│ └── (a=6, N=75, Q_v=1.87, best=2.01, ubc=2.05)
│ ├── (a=1, N=20, Q_v=1.84, best=2.00, ubc=2.17)
│ ├── (a=3, N=26, Q_v=1.86, best=1.98, ubc=2.15)
│ └── (a=4, N=28, Q_v=1.89, best=2.01, ubc=2.17)
└── (a=2, N=218, Q_v=1.90, best=2.02, ubc=2.01)
└── (a=0, N=217, Q_v=1.90, best=2.02, ubc=2.01)
├── (a=3, N=68, Q_v=1.89, best=2.01, ubc=2.09)
├── (a=4, N=99, Q_v=1.92, best=2.02, ubc=2.09)
└── (a=6, N=49, Q_v=1.85, best=1.97, ubc=2.09)
INFO selected action 2 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2]
(a=2, N=343, Q_v=1.90, best=2.02, ubc=1.99)
└── (a=0, N=342, Q_v=1.90, best=2.02, ubc=1.99)
├── (a=3, N=98, Q_v=1.88, best=2.01, ubc=2.06)
│ ├── (a=1, N=32, Q_v=1.88, best=2.01, ubc=2.15)
│ ├── (a=2, N=37, Q_v=1.90, best=2.01, ubc=2.15)
│ └── (a=5, N=28, Q_v=1.87, best=1.99, ubc=2.15)
├── (a=4, N=174, Q_v=1.93, best=2.02, ubc=2.06)
│ ├── (a=0, N=61, Q_v=1.93, best=2.02, ubc=2.14)
│ ├── (a=3, N=60, Q_v=1.93, best=2.02, ubc=2.14)
│ └── (a=4, N=52, Q_v=1.92, best=2.02, ubc=2.14)
└── (a=6, N=69, Q_v=1.85, best=1.97, ubc=2.06)
└── (a=0, N=68, Q_v=1.85, best=1.97, ubc=2.03)
INFO selected action 0 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0]
(a=0, N=467, Q_v=1.90, best=2.03, ubc=1.98)
├── (a=3, N=133, Q_v=1.89, best=2.02, ubc=2.04)
│ ├── (a=1, N=43, Q_v=1.89, best=2.01, ubc=2.12)
│ │ ├── (a=2, N=18, Q_v=1.86, best=1.97, ubc=2.18)
│ │ └── (a=5, N=24, Q_v=1.90, best=2.00, ubc=2.18)
│ ├── (a=2, N=49, Q_v=1.90, best=2.02, ubc=2.12)
│ │ ├── (a=1, N=23, Q_v=1.89, best=2.02, ubc=2.18)
│ │ └── (a=5, N=25, Q_v=1.91, best=2.00, ubc=2.19)
│ └── (a=5, N=40, Q_v=1.88, best=1.99, ubc=2.13)
│ ├── (a=1, N=18, Q_v=1.86, best=1.99, ubc=2.18)
│ └── (a=2, N=21, Q_v=1.89, best=1.99, ubc=2.19)
├── (a=4, N=249, Q_v=1.93, best=2.03, ubc=2.04)
│ ├── (a=0, N=86, Q_v=1.93, best=2.02, ubc=2.11)
│ │ └── (a=4, N=85, Q_v=1.93, best=2.02, ubc=2.10)
│ ├── (a=3, N=83, Q_v=1.93, best=2.02, ubc=2.11)
│ │ └── (a=4, N=82, Q_v=1.93, best=2.02, ubc=2.09)
│ └── (a=4, N=79, Q_v=1.92, best=2.03, ubc=2.11)
│ ├── (a=0, N=44, Q_v=1.94, best=2.03, ubc=2.16)
│ └── (a=3, N=34, Q_v=1.91, best=2.02, ubc=2.16)
└── (a=6, N=84, Q_v=1.85, best=1.97, ubc=2.04)
└── (a=0, N=83, Q_v=1.85, best=1.97, ubc=2.01)
├── (a=1, N=27, Q_v=1.85, best=1.97, ubc=2.14)
├── (a=2, N=26, Q_v=1.84, best=1.97, ubc=2.13)
└── (a=5, N=29, Q_v=1.86, best=1.97, ubc=2.13)
[17:00:13] INFO selected action 4 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4]
(a=4, N=374, Q_v=1.93, best=2.03, ubc=2.02)
├── (a=0, N=138, Q_v=1.94, best=2.02, ubc=2.09)
│ └── (a=4, N=137, Q_v=1.94, best=2.02, ubc=2.07)
│ ├── (a=1, N=41, Q_v=1.93, best=2.02, ubc=2.17)
│ ├── (a=2, N=42, Q_v=1.93, best=2.02, ubc=2.17)
│ └── (a=5, N=53, Q_v=1.96, best=2.02, ubc=2.17)
├── (a=3, N=119, Q_v=1.93, best=2.02, ubc=2.09)
│ └── (a=4, N=118, Q_v=1.93, best=2.02, ubc=2.07)
│ ├── (a=1, N=40, Q_v=1.93, best=2.02, ubc=2.17)
│ ├── (a=2, N=35, Q_v=1.91, best=2.02, ubc=2.17)
│ └── (a=5, N=42, Q_v=1.94, best=2.01, ubc=2.18)
└── (a=4, N=116, Q_v=1.93, best=2.03, ubc=2.09)
├── (a=0, N=66, Q_v=1.94, best=2.03, ubc=2.13)
│ ├── (a=1, N=20, Q_v=1.93, best=2.02, ubc=2.25)
│ ├── (a=2, N=24, Q_v=1.96, best=2.02, ubc=2.25)
│ └── (a=5, N=21, Q_v=1.93, best=2.03, ubc=2.25)
└── (a=3, N=49, Q_v=1.91, best=2.02, ubc=2.13)
├── (a=1, N=14, Q_v=1.89, best=2.02, ubc=2.26)
├── (a=2, N=17, Q_v=1.91, best=2.02, ubc=2.25)
└── (a=5, N=17, Q_v=1.92, best=2.01, ubc=2.26)
INFO selected action 0 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0]
(a=0, N=263, Q_v=1.94, best=2.02, ubc=2.05)
└── (a=4, N=262, Q_v=1.94, best=2.02, ubc=2.05)
├── (a=1, N=80, Q_v=1.94, best=2.02, ubc=2.12)
│ ├── (a=2, N=33, Q_v=1.91, best=2.02, ubc=2.17)
│ └── (a=5, N=46, Q_v=1.95, best=2.02, ubc=2.17)
├── (a=2, N=85, Q_v=1.94, best=2.02, ubc=2.12)
│ ├── (a=1, N=38, Q_v=1.93, best=2.02, ubc=2.17)
│ └── (a=5, N=46, Q_v=1.95, best=2.02, ubc=2.17)
└── (a=5, N=96, Q_v=1.95, best=2.02, ubc=2.12)
├── (a=1, N=48, Q_v=1.95, best=2.02, ubc=2.17)
└── (a=2, N=47, Q_v=1.95, best=2.02, ubc=2.17)
INFO selected action 4 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4]
(a=4, N=387, Q_v=1.95, best=2.03, ubc=2.03)
├── (a=1, N=120, Q_v=1.94, best=2.03, ubc=2.10)
│ ├── (a=2, N=49, Q_v=1.92, best=2.02, ubc=2.14)
│ │ ├── (a=5, N=33, Q_v=1.95, best=2.02, ubc=2.20)
│ │ └── (a=6, N=15, Q_v=1.84, best=1.94, ubc=2.20)
│ └── (a=5, N=70, Q_v=1.95, best=2.03, ubc=2.14)
│ └── (a=2, N=69, Q_v=1.96, best=2.03, ubc=2.13)
├── (a=2, N=119, Q_v=1.94, best=2.02, ubc=2.10)
│ ├── (a=1, N=53, Q_v=1.93, best=2.02, ubc=2.14)
│ │ ├── (a=5, N=32, Q_v=1.96, best=2.02, ubc=2.21)
│ │ └── (a=6, N=20, Q_v=1.88, best=1.95, ubc=2.20)
│ └── (a=5, N=65, Q_v=1.95, best=2.02, ubc=2.14)
│ └── (a=1, N=64, Q_v=1.95, best=2.02, ubc=2.13)
└── (a=5, N=147, Q_v=1.96, best=2.02, ubc=2.10)
├── (a=1, N=74, Q_v=1.96, best=2.02, ubc=2.14)
│ └── (a=2, N=73, Q_v=1.96, best=2.02, ubc=2.13)
└── (a=2, N=72, Q_v=1.95, best=2.02, ubc=2.14)
└── (a=1, N=71, Q_v=1.96, best=2.02, ubc=2.13)
INFO selected action 5 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5]
(a=5, N=272, Q_v=1.96, best=2.02, ubc=2.06)
├── (a=1, N=141, Q_v=1.96, best=2.02, ubc=2.10)
│ └── (a=2, N=140, Q_v=1.96, best=2.02, ubc=2.09)
│ ├── (a=2, N=64, Q_v=1.95, best=2.02, ubc=2.15)
│ └── (a=4, N=75, Q_v=1.97, best=2.02, ubc=2.15)
└── (a=2, N=130, Q_v=1.95, best=2.02, ubc=2.10)
└── (a=1, N=129, Q_v=1.95, best=2.02, ubc=2.09)
├── (a=2, N=59, Q_v=1.95, best=2.02, ubc=2.15)
└── (a=4, N=69, Q_v=1.96, best=2.02, ubc=2.15)
[17:00:14] INFO selected action 1 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1]
(a=1, N=266, Q_v=1.96, best=2.02, ubc=2.06)
└── (a=2, N=265, Q_v=1.96, best=2.02, ubc=2.06)
├── (a=2, N=128, Q_v=1.96, best=2.02, ubc=2.10)
│ ├── (a=0, N=43, Q_v=1.96, best=2.02, ubc=2.20)
│ ├── (a=1, N=44, Q_v=1.96, best=2.02, ubc=2.20)
│ └── (a=3, N=40, Q_v=1.94, best=2.02, ubc=2.19)
└── (a=4, N=136, Q_v=1.96, best=2.02, ubc=2.10)
├── (a=0, N=44, Q_v=1.96, best=2.02, ubc=2.20)
├── (a=1, N=48, Q_v=1.97, best=2.02, ubc=2.19)
└── (a=3, N=43, Q_v=1.96, best=2.02, ubc=2.20)
INFO selected action 2 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2]
(a=2, N=390, Q_v=1.96, best=2.02, ubc=2.05)
├── (a=2, N=184, Q_v=1.96, best=2.02, ubc=2.08)
│ ├── (a=0, N=58, Q_v=1.95, best=2.02, ubc=2.16)
│ │ ├── (a=1, N=30, Q_v=1.96, best=2.02, ubc=2.22)
│ │ └── (a=3, N=27, Q_v=1.94, best=2.01, ubc=2.21)
│ ├── (a=1, N=66, Q_v=1.96, best=2.02, ubc=2.16)
│ │ ├── (a=0, N=33, Q_v=1.96, best=2.02, ubc=2.22)
│ │ └── (a=3, N=32, Q_v=1.96, best=2.02, ubc=2.22)
│ └── (a=3, N=59, Q_v=1.95, best=2.02, ubc=2.16)
│ ├── (a=0, N=26, Q_v=1.94, best=2.02, ubc=2.22)
│ └── (a=1, N=32, Q_v=1.97, best=2.02, ubc=2.22)
└── (a=4, N=205, Q_v=1.96, best=2.02, ubc=2.08)
├── (a=0, N=67, Q_v=1.96, best=2.02, ubc=2.16)
│ ├── (a=1, N=37, Q_v=1.98, best=2.02, ubc=2.21)
│ └── (a=3, N=29, Q_v=1.94, best=2.02, ubc=2.21)
├── (a=1, N=71, Q_v=1.97, best=2.02, ubc=2.16)
│ ├── (a=0, N=35, Q_v=1.97, best=2.02, ubc=2.22)
│ └── (a=3, N=35, Q_v=1.97, best=2.02, ubc=2.21)
└── (a=3, N=66, Q_v=1.96, best=2.02, ubc=2.16)
├── (a=0, N=30, Q_v=1.95, best=2.02, ubc=2.21)
└── (a=1, N=35, Q_v=1.97, best=2.02, ubc=2.21)
INFO selected action 4 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4]
(a=4, N=330, Q_v=1.97, best=2.02, ubc=2.06)
├── (a=0, N=106, Q_v=1.96, best=2.02, ubc=2.13)
│ ├── (a=1, N=59, Q_v=1.98, best=2.02, ubc=2.17)
│ │ └── (a=3, N=58, Q_v=1.98, best=2.02, ubc=2.16)
│ └── (a=3, N=46, Q_v=1.95, best=2.02, ubc=2.17)
│ ├── (a=1, N=26, Q_v=1.97, best=2.02, ubc=2.24)
│ └── (a=6, N=19, Q_v=1.92, best=1.96, ubc=2.23)
├── (a=1, N=121, Q_v=1.97, best=2.02, ubc=2.13)
│ ├── (a=0, N=60, Q_v=1.97, best=2.02, ubc=2.17)
│ │ └── (a=3, N=59, Q_v=1.97, best=2.02, ubc=2.16)
│ └── (a=3, N=60, Q_v=1.97, best=2.02, ubc=2.17)
│ └── (a=0, N=59, Q_v=1.97, best=2.02, ubc=2.16)
└── (a=3, N=102, Q_v=1.96, best=2.02, ubc=2.13)
├── (a=0, N=46, Q_v=1.95, best=2.02, ubc=2.17)
│ ├── (a=1, N=26, Q_v=1.97, best=2.02, ubc=2.24)
│ └── (a=6, N=19, Q_v=1.92, best=1.96, ubc=2.24)
└── (a=1, N=55, Q_v=1.97, best=2.02, ubc=2.17)
└── (a=0, N=54, Q_v=1.97, best=2.02, ubc=2.16)
INFO selected action 1 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1]
(a=1, N=246, Q_v=1.97, best=2.02, ubc=2.09)
├── (a=0, N=124, Q_v=1.98, best=2.02, ubc=2.12)
│ └── (a=3, N=123, Q_v=1.98, best=2.02, ubc=2.12)
│ ├── (a=2, N=75, Q_v=1.99, best=2.02, ubc=2.17)
│ └── (a=6, N=47, Q_v=1.95, best=1.97, ubc=2.17)
└── (a=3, N=121, Q_v=1.97, best=2.02, ubc=2.12)
└── (a=0, N=120, Q_v=1.97, best=2.02, ubc=2.11)
├── (a=2, N=72, Q_v=1.99, best=2.02, ubc=2.17)
└── (a=6, N=47, Q_v=1.95, best=1.97, ubc=2.17)
INFO selected action 0 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0]
(a=0, N=249, Q_v=1.98, best=2.02, ubc=2.09)
└── (a=3, N=248, Q_v=1.98, best=2.02, ubc=2.08)
├── (a=2, N=158, Q_v=1.99, best=2.02, ubc=2.13)
│ ├── (a=3, N=92, Q_v=2.01, best=2.02, ubc=2.17)
│ └── (a=4, N=65, Q_v=1.97, best=2.01, ubc=2.17)
└── (a=6, N=89, Q_v=1.95, best=1.97, ubc=2.12)
├── (a=3, N=44, Q_v=1.95, best=1.97, ubc=2.17)
└── (a=4, N=44, Q_v=1.95, best=1.97, ubc=2.17)
[17:00:15] INFO selected action 3 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3]
(a=3, N=373, Q_v=1.98, best=2.02, ubc=2.07)
├── (a=2, N=250, Q_v=1.99, best=2.02, ubc=2.10)
│ ├── (a=3, N=148, Q_v=2.01, best=2.02, ubc=2.14)
│ │ ├── (a=3, N=49, Q_v=2.01, best=2.02, ubc=2.23)
│ │ ├── (a=4, N=49, Q_v=2.01, best=2.02, ubc=2.23)
│ │ └── (a=5, N=49, Q_v=2.01, best=2.02, ubc=2.23)
│ └── (a=4, N=101, Q_v=1.98, best=2.01, ubc=2.14)
│ ├── (a=3, N=60, Q_v=2.00, best=2.01, ubc=2.19)
│ └── (a=4, N=40, Q_v=1.95, best=2.01, ubc=2.19)
└── (a=6, N=122, Q_v=1.95, best=1.97, ubc=2.10)
├── (a=3, N=61, Q_v=1.95, best=1.97, ubc=2.15)
│ └── (a=0, N=60, Q_v=1.95, best=1.97, ubc=2.13)
└── (a=4, N=60, Q_v=1.95, best=1.97, ubc=2.15)
└── (a=0, N=59, Q_v=1.95, best=1.97, ubc=2.13)
INFO selected action 2 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2]
(a=2, N=375, Q_v=2.00, best=2.02, ubc=2.09)
├── (a=3, N=224, Q_v=2.01, best=2.02, ubc=2.12)
│ ├── (a=3, N=75, Q_v=2.01, best=2.02, ubc=2.20)
│ │ ├── (a=4, N=36, Q_v=2.00, best=2.02, ubc=2.25)
│ │ └── (a=5, N=38, Q_v=2.01, best=2.02, ubc=2.25)
│ ├── (a=4, N=74, Q_v=2.01, best=2.02, ubc=2.20)
│ │ ├── (a=3, N=37, Q_v=2.01, best=2.02, ubc=2.25)
│ │ └── (a=5, N=36, Q_v=2.00, best=2.02, ubc=2.25)
│ └── (a=5, N=74, Q_v=2.01, best=2.02, ubc=2.20)
│ ├── (a=3, N=37, Q_v=2.01, best=2.02, ubc=2.25)
│ └── (a=4, N=36, Q_v=2.00, best=2.02, ubc=2.25)
└── (a=4, N=150, Q_v=1.98, best=2.01, ubc=2.12)
├── (a=3, N=90, Q_v=2.00, best=2.01, ubc=2.16)
│ └── (a=4, N=89, Q_v=2.00, best=2.01, ubc=2.15)
└── (a=4, N=59, Q_v=1.96, best=2.01, ubc=2.16)
├── (a=3, N=41, Q_v=1.99, best=2.01, ubc=2.22)
└── (a=6, N=17, Q_v=1.87, best=1.91, ubc=2.22)
INFO selected action 3 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3]
(a=3, N=349, Q_v=2.01, best=2.02, ubc=2.10)
├── (a=3, N=116, Q_v=2.01, best=2.02, ubc=2.17)
│ ├── (a=4, N=57, Q_v=2.01, best=2.02, ubc=2.21)
│ │ └── (a=5, N=56, Q_v=2.01, best=2.02, ubc=2.19)
│ └── (a=5, N=58, Q_v=2.01, best=2.02, ubc=2.21)
│ ├── (a=4, N=28, Q_v=2.01, best=2.02, ubc=2.27)
│ └── (a=6, N=29, Q_v=2.01, best=2.02, ubc=2.27)
├── (a=4, N=115, Q_v=2.00, best=2.02, ubc=2.16)
│ ├── (a=3, N=57, Q_v=2.01, best=2.02, ubc=2.21)
│ │ └── (a=5, N=56, Q_v=2.00, best=2.02, ubc=2.19)
│ └── (a=5, N=57, Q_v=2.00, best=2.02, ubc=2.21)
│ └── (a=3, N=56, Q_v=2.00, best=2.02, ubc=2.19)
└── (a=5, N=117, Q_v=2.01, best=2.02, ubc=2.16)
├── (a=3, N=59, Q_v=2.01, best=2.02, ubc=2.21)
│ ├── (a=4, N=29, Q_v=2.01, best=2.02, ubc=2.27)
│ └── (a=6, N=29, Q_v=2.01, best=2.02, ubc=2.27)
└── (a=4, N=57, Q_v=2.01, best=2.02, ubc=2.21)
└── (a=3, N=56, Q_v=2.01, best=2.02, ubc=2.20)
INFO selected action 5 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5]
(a=5, N=242, Q_v=2.01, best=2.02, ubc=2.12)
├── (a=3, N=122, Q_v=2.01, best=2.02, ubc=2.16)
│ ├── (a=4, N=59, Q_v=2.01, best=2.02, ubc=2.21)
│ │ ├── (a=0, N=31, Q_v=2.01, best=2.02, ubc=2.27)
│ │ └── (a=1, N=27, Q_v=1.99, best=2.01, ubc=2.27)
│ └── (a=6, N=62, Q_v=2.01, best=2.02, ubc=2.21)
│ ├── (a=0, N=21, Q_v=2.01, best=2.02, ubc=2.33)
│ ├── (a=1, N=20, Q_v=2.01, best=2.01, ubc=2.33)
│ └── (a=2, N=20, Q_v=2.01, best=2.02, ubc=2.33)
└── (a=4, N=119, Q_v=2.01, best=2.02, ubc=2.16)
└── (a=3, N=118, Q_v=2.01, best=2.02, ubc=2.15)
├── (a=0, N=65, Q_v=2.01, best=2.02, ubc=2.21)
└── (a=1, N=52, Q_v=1.99, best=2.01, ubc=2.21)
INFO selected action 3 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3]
(a=3, N=247, Q_v=2.01, best=2.02, ubc=2.12)
├── (a=4, N=120, Q_v=2.01, best=2.02, ubc=2.16)
│ ├── (a=0, N=65, Q_v=2.01, best=2.02, ubc=2.21)
│ │ ├── (a=2, N=32, Q_v=2.01, best=2.02, ubc=2.27)
│ │ └── (a=4, N=32, Q_v=2.01, best=2.02, ubc=2.27)
│ └── (a=1, N=54, Q_v=2.00, best=2.01, ubc=2.21)
│ ├── (a=2, N=29, Q_v=2.01, best=2.01, ubc=2.27)
│ └── (a=4, N=24, Q_v=1.98, best=2.01, ubc=2.27)
└── (a=6, N=126, Q_v=2.01, best=2.02, ubc=2.16)
├── (a=0, N=43, Q_v=2.01, best=2.02, ubc=2.25)
│ └── (a=2, N=42, Q_v=2.01, best=2.02, ubc=2.22)
├── (a=1, N=40, Q_v=2.01, best=2.01, ubc=2.25)
│ └── (a=2, N=39, Q_v=2.01, best=2.01, ubc=2.22)
└── (a=2, N=42, Q_v=2.01, best=2.02, ubc=2.25)
├── (a=0, N=21, Q_v=2.01, best=2.02, ubc=2.31)
└── (a=1, N=20, Q_v=2.01, best=2.01, ubc=2.31)
INFO selected action 6 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6]
(a=6, N=251, Q_v=2.01, best=2.02, ubc=2.12)
├── (a=0, N=87, Q_v=2.01, best=2.02, ubc=2.19)
│ └── (a=2, N=86, Q_v=2.01, best=2.02, ubc=2.17)
│ ├── (a=0, N=43, Q_v=2.02, best=2.02, ubc=2.24)
│ └── (a=1, N=42, Q_v=2.01, best=2.02, ubc=2.24)
├── (a=1, N=80, Q_v=2.01, best=2.01, ubc=2.19)
│ └── (a=2, N=79, Q_v=2.01, best=2.01, ubc=2.17)
│ ├── (a=3, N=39, Q_v=2.01, best=2.01, ubc=2.24)
│ └── (a=5, N=39, Q_v=2.01, best=2.01, ubc=2.24)
└── (a=2, N=83, Q_v=2.01, best=2.02, ubc=2.19)
├── (a=0, N=42, Q_v=2.01, best=2.02, ubc=2.24)
│ ├── (a=0, N=21, Q_v=2.02, best=2.02, ubc=2.31)
│ └── (a=1, N=20, Q_v=2.01, best=2.02, ubc=2.32)
└── (a=1, N=40, Q_v=2.01, best=2.01, ubc=2.24)
├── (a=3, N=20, Q_v=2.01, best=2.01, ubc=2.31)
└── (a=5, N=19, Q_v=2.01, best=2.01, ubc=2.32)
INFO selected action 0 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0]
(a=0, N=212, Q_v=2.01, best=2.02, ubc=2.13)
└── (a=2, N=211, Q_v=2.01, best=2.02, ubc=2.13)
├── (a=0, N=108, Q_v=2.02, best=2.02, ubc=2.17)
│ └── (a=1, N=107, Q_v=2.02, best=2.02, ubc=2.16)
└── (a=1, N=102, Q_v=2.01, best=2.02, ubc=2.17)
├── (a=0, N=53, Q_v=2.02, best=2.02, ubc=2.22)
└── (a=6, N=48, Q_v=2.01, best=2.01, ubc=2.23)
INFO selected action 2 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0, 2]
(a=2, N=336, Q_v=2.01, best=2.02, ubc=2.11)
├── (a=0, N=173, Q_v=2.02, best=2.02, ubc=2.15)
│ └── (a=1, N=172, Q_v=2.02, best=2.02, ubc=2.14)
│ ├── (a=3, N=86, Q_v=2.02, best=2.02, ubc=2.19)
│ └── (a=5, N=85, Q_v=2.02, best=2.02, ubc=2.19)
└── (a=1, N=162, Q_v=2.01, best=2.02, ubc=2.14)
├── (a=0, N=85, Q_v=2.02, best=2.02, ubc=2.19)
│ ├── (a=3, N=42, Q_v=2.02, best=2.02, ubc=2.25)
│ └── (a=5, N=42, Q_v=2.02, best=2.02, ubc=2.25)
└── (a=6, N=76, Q_v=2.01, best=2.01, ubc=2.19)
├── (a=3, N=38, Q_v=2.01, best=2.01, ubc=2.24)
└── (a=5, N=37, Q_v=2.01, best=2.01, ubc=2.25)
[17:00:16] INFO selected action 0 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0, 2, 0]
(a=0, N=298, Q_v=2.02, best=2.02, ubc=2.12)
└── (a=1, N=297, Q_v=2.02, best=2.02, ubc=2.11)
├── (a=3, N=148, Q_v=2.02, best=2.02, ubc=2.15)
│ └── (a=5, N=147, Q_v=2.02, best=2.02, ubc=2.15)
└── (a=5, N=148, Q_v=2.02, best=2.02, ubc=2.15)
└── (a=3, N=147, Q_v=2.02, best=2.02, ubc=2.15)
INFO selected action 1 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0, 2, 0, 1]
(a=1, N=422, Q_v=2.02, best=2.02, ubc=2.10)
├── (a=3, N=211, Q_v=2.02, best=2.02, ubc=2.14)
│ └── (a=5, N=210, Q_v=2.02, best=2.02, ubc=2.13)
│ ├── (a=2, N=69, Q_v=2.02, best=2.02, ubc=2.21)
│ ├── (a=3, N=70, Q_v=2.02, best=2.02, ubc=2.21)
│ └── (a=4, N=70, Q_v=2.02, best=2.02, ubc=2.21)
└── (a=5, N=210, Q_v=2.02, best=2.02, ubc=2.14)
└── (a=3, N=209, Q_v=2.02, best=2.02, ubc=2.13)
├── (a=2, N=69, Q_v=2.02, best=2.02, ubc=2.21)
├── (a=3, N=70, Q_v=2.02, best=2.02, ubc=2.21)
└── (a=4, N=69, Q_v=2.02, best=2.02, ubc=2.21)
INFO selected action 3 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0, 2, 0, 1, 3]
(a=3, N=336, Q_v=2.02, best=2.02, ubc=2.11)
└── (a=5, N=335, Q_v=2.02, best=2.02, ubc=2.11)
├── (a=2, N=111, Q_v=2.02, best=2.02, ubc=2.18)
│ ├── (a=3, N=55, Q_v=2.02, best=2.02, ubc=2.22)
│ └── (a=4, N=55, Q_v=2.02, best=2.02, ubc=2.22)
├── (a=3, N=112, Q_v=2.02, best=2.02, ubc=2.18)
│ ├── (a=2, N=55, Q_v=2.02, best=2.02, ubc=2.22)
│ └── (a=4, N=56, Q_v=2.02, best=2.02, ubc=2.22)
└── (a=4, N=111, Q_v=2.02, best=2.02, ubc=2.18)
├── (a=2, N=55, Q_v=2.02, best=2.02, ubc=2.22)
└── (a=3, N=55, Q_v=2.02, best=2.02, ubc=2.22)
INFO selected action 5 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0, 2, 0, 1, 3, 5]
(a=5, N=460, Q_v=2.02, best=2.02, ubc=2.10)
├── (a=2, N=152, Q_v=2.02, best=2.02, ubc=2.16)
│ ├── (a=3, N=76, Q_v=2.02, best=2.02, ubc=2.20)
│ │ └── (a=4, N=75, Q_v=2.02, best=2.02, ubc=2.19)
│ └── (a=4, N=75, Q_v=2.02, best=2.02, ubc=2.20)
│ └── (a=3, N=74, Q_v=2.02, best=2.02, ubc=2.19)
├── (a=3, N=154, Q_v=2.02, best=2.02, ubc=2.16)
│ ├── (a=2, N=76, Q_v=2.02, best=2.02, ubc=2.20)
│ │ └── (a=4, N=75, Q_v=2.02, best=2.02, ubc=2.19)
│ └── (a=4, N=77, Q_v=2.02, best=2.02, ubc=2.20)
│ └── (a=2, N=76, Q_v=2.02, best=2.02, ubc=2.18)
└── (a=4, N=153, Q_v=2.02, best=2.02, ubc=2.16)
├── (a=2, N=76, Q_v=2.02, best=2.02, ubc=2.20)
│ └── (a=3, N=75, Q_v=2.02, best=2.02, ubc=2.19)
└── (a=3, N=76, Q_v=2.02, best=2.02, ubc=2.20)
└── (a=2, N=75, Q_v=2.02, best=2.02, ubc=2.19)
INFO selected action 3 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0, 2, 0, 1, 3, 5, 3]
(a=3, N=279, Q_v=2.02, best=2.02, ubc=2.12)
├── (a=2, N=139, Q_v=2.02, best=2.02, ubc=2.16)
│ └── (a=4, N=138, Q_v=2.02, best=2.02, ubc=2.15)
│ └── (a=4, N=137, Q_v=2.02, best=2.02, ubc=2.15)
└── (a=4, N=139, Q_v=2.02, best=2.02, ubc=2.16)
└── (a=2, N=138, Q_v=2.02, best=2.02, ubc=2.15)
└── (a=4, N=137, Q_v=2.02, best=2.02, ubc=2.15)
INFO selected action 4 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0, 2, 0, 1, 3, 5, 3, 4]
(a=4, N=264, Q_v=2.02, best=2.02, ubc=2.12)
└── (a=2, N=263, Q_v=2.02, best=2.02, ubc=2.12)
└── (a=4, N=262, Q_v=2.02, best=2.02, ubc=2.12)
└── (a=1, N=261, Q_v=2.02, best=2.02, ubc=2.12)
INFO selected action 2 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0, 2, 0, 1, 3, 5, 3, 4, 2]
(a=2, N=388, Q_v=2.02, best=2.02, ubc=2.10)
└── (a=4, N=387, Q_v=2.02, best=2.02, ubc=2.10)
└── (a=1, N=386, Q_v=2.02, best=2.02, ubc=2.10)
├── (a=2, N=192, Q_v=2.02, best=2.02, ubc=2.14)
└── (a=3, N=193, Q_v=2.02, best=2.02, ubc=2.14)
INFO selected action 4 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0, 2, 0, 1, 3, 5, 3, 4, 2, 4]
(a=4, N=512, Q_v=2.02, best=2.02, ubc=2.09)
└── (a=1, N=511, Q_v=2.02, best=2.02, ubc=2.09)
├── (a=2, N=255, Q_v=2.02, best=2.02, ubc=2.13)
│ └── (a=3, N=254, Q_v=2.02, best=2.02, ubc=2.12)
└── (a=3, N=255, Q_v=2.02, best=2.02, ubc=2.13)
└── (a=2, N=254, Q_v=2.02, best=2.02, ubc=2.12)
INFO selected action 1 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0, 2, 0, 1, 3, 5, 3, 4, 2, 4, 1]
(a=1, N=636, Q_v=2.02, best=2.02, ubc=2.09)
├── (a=2, N=317, Q_v=2.02, best=2.02, ubc=2.12)
│ └── (a=3, N=316, Q_v=2.02, best=2.02, ubc=2.11)
│ └── (a=1, N=315, Q_v=2.02, best=2.02, ubc=2.11)
└── (a=3, N=318, Q_v=2.02, best=2.02, ubc=2.12)
└── (a=2, N=317, Q_v=2.02, best=2.02, ubc=2.11)
└── (a=1, N=316, Q_v=2.02, best=2.02, ubc=2.11)
INFO selected action 3 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0, 2, 0, 1, 3, 5, 3, 4, 2, 4, 1, 3]
(a=3, N=443, Q_v=2.02, best=2.02, ubc=2.10)
└── (a=2, N=442, Q_v=2.02, best=2.02, ubc=2.10)
└── (a=1, N=441, Q_v=2.02, best=2.02, ubc=2.10)
INFO selected action 2 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0, 2, 0, 1, 3, 5, 3, 4, 2, 4, 1, 3, 2]
(a=2, N=567, Q_v=2.02, best=2.02, ubc=2.09)
└── (a=1, N=566, Q_v=2.02, best=2.02, ubc=2.09)
INFO selected action 1 after 125 simulations.
INFO current action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0, 2, 0, 1, 3, 5, 3, 4, 2, 4, 1, 3, 2, 1]
INFO Final action list: [4, 5, 5, 1, 5, 0, 2, 0, 4, 0, 4, 5, 1, 2, 4, 1, 0, 3, 2, 3, 5, 3, 6, 0, 2, 0, 1, 3, 5, 3, 4, 2, 4, 1, 3, 2, 1]
╔═══════════════════════════════════════════════════════╗
Job 0 ║ ████ ████████████ █████████ ║ Machine 0 █
Job 1 ║ ████████ ██████████████ ███████████████████████║ Machine 1 █
Job 2 ║ ████████ █████████████████ ███████ ║ Machine 2 █
Job 3 ║ █████████████████ ████████████████ ║ Machine 3 █
Job 4 ║█████████ ███████████ ████ ║ Machine 4 █
Job 5 ║████████████████████████ ████ █ ║ Machine 5 █
╚╦════╤════╤════╤════╤════╦════╤════╤════╤════╤════╦════╝
0.0 26.8 53.6
makespan: 58