FrozenLake 4x4¶
[6]:
import gymnasium as gym
[7]:
from gymcts.gymcts_agent import GymctsAgent
from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
[8]:
from gymcts.logger import log
[9]:
# set log level to 20 (INFO)
# set log level to 10 (DEBUG) to see more detailed information
log.setLevel(20)
[10]:
if __name__ == '__main__':
# 0. create the environment
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=True, render_mode="ansi")
env.reset()
# 1. wrap the environment with the naive wrapper or a custom gymcts wrapper
env = DeepCopyMCTSGymEnvWrapper(env)
# 2. create the agent
agent = GymctsAgent(
env=env,
clear_mcts_tree_after_step=False,
render_tree_after_step=True,
number_of_simulations_per_step=50,
exclude_unvisited_nodes_from_render=True
)
# 3. solve the environment
actions = agent.solve()
# 4. render the environment solution in the terminal
print(env.render())
for a in actions:
obs, rew, term, trun, info = env.step(a)
print(env.render())
# 5. print the solution
# read the solution from the info provided by the RecordEpisodeStatistics wrapper
# (that NaiveSoloMCTSGymEnvWrapper uses internally)
episode_length = info["episode"]["l"]
episode_return = info["episode"]["r"]
if episode_return == 1.0:
print(f"Environment solved in {episode_length} steps.")
else:
print(f"Environment not solved in {episode_length} steps.")
(N=50, Q_v=0.00, best=0.00)
├── (a=0, N=13, Q_v=0.00, best=0.00, ubc=0.39)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=12, Q_v=0.00, best=0.00, ubc=0.40)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=2, N=12, Q_v=0.00, best=0.00, ubc=0.40)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.59)
└── (a=3, N=12, Q_v=0.00, best=0.00, ubc=0.40)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
[16:43:09] INFO selected action 0 after 50 simulations.
INFO current action list: [0]
(a=0, N=63, Q_v=0.00, best=0.00, ubc=0.19)
├── (a=0, N=16, Q_v=0.00, best=0.00, ubc=0.36)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=16, Q_v=0.00, best=0.00, ubc=0.36)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.59)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.68)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=15, Q_v=0.00, best=0.00, ubc=0.37)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.58)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.58)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=15, Q_v=0.00, best=0.00, ubc=0.37)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.58)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.58)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
INFO selected action 0 after 50 simulations.
INFO current action list: [0, 0]
(a=0, N=66, Q_v=0.09, best=1.00, ubc=0.28)
├── (a=0, N=22, Q_v=0.14, best=1.00, ubc=0.44)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.62)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=10, Q_v=0.30, best=1.00, ubc=0.69)
│ │ ├── (a=0, N=3, Q_v=0.33, best=1.00, ubc=0.95)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=1.07)
│ │ ├── (a=2, N=4, Q_v=0.50, best=1.00, ubc=1.04)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.07)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.62)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.72)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=15, Q_v=0.07, best=1.00, ubc=0.44)
│ ├── (a=0, N=5, Q_v=0.20, best=1.00, ubc=0.72)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ └── (a=3, N=1, Q_v=1.00, best=1.00, ubc=1.90)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=15, Q_v=0.07, best=1.00, ubc=0.44)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=5, Q_v=0.20, best=1.00, ubc=0.72)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ ├── (a=1, N=1, Q_v=1.00, best=1.00, ubc=1.90)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.67)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=13, Q_v=0.08, best=1.00, ubc=0.48)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.65)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=5, Q_v=0.20, best=1.00, ubc=0.71)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=1.00, best=1.00, ubc=1.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.80)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.59)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.80)
└── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.59)
INFO selected action 0 after 50 simulations.
INFO current action list: [0, 0, 0]
(a=0, N=72, Q_v=0.08, best=1.00, ubc=0.26)
├── (a=0, N=12, Q_v=0.00, best=0.00, ubc=0.42)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=1, N=35, Q_v=0.17, best=1.00, ubc=0.42)
│ ├── (a=0, N=10, Q_v=0.20, best=1.00, ubc=0.62)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.76)
│ │ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.76)
│ │ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.76)
│ │ └── (a=3, N=3, Q_v=0.33, best=1.00, ubc=0.95)
│ ├── (a=1, N=5, Q_v=0.00, best=0.00, ubc=0.60)
│ ├── (a=2, N=14, Q_v=0.29, best=1.00, ubc=0.64)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.81)
│ │ ├── (a=1, N=4, Q_v=0.25, best=1.00, ubc=0.82)
│ │ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.81)
│ │ └── (a=3, N=5, Q_v=0.40, best=1.00, ubc=0.91)
│ └── (a=3, N=5, Q_v=0.00, best=0.00, ubc=0.60)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=12, Q_v=0.00, best=0.00, ubc=0.42)
└── (a=3, N=12, Q_v=0.00, best=0.00, ubc=0.42)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
└── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.59)
INFO selected action 1 after 50 simulations.
INFO current action list: [0, 0, 0, 1]
(a=1, N=85, Q_v=0.27, best=1.00, ubc=0.44)
├── (a=0, N=17, Q_v=0.12, best=1.00, ubc=0.48)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.69)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.69)
│ └── (a=3, N=6, Q_v=0.17, best=1.00, ubc=0.65)
│ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.67)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.95)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.95)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.95)
├── (a=1, N=10, Q_v=0.00, best=0.00, ubc=0.47)
├── (a=2, N=47, Q_v=0.45, best=1.00, ubc=0.66)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.69)
│ ├── (a=1, N=9, Q_v=0.22, best=1.00, ubc=0.68)
│ │ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.74)
│ │ ├── (a=1, N=3, Q_v=0.33, best=1.00, ubc=0.94)
│ │ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.05)
│ ├── (a=2, N=7, Q_v=0.14, best=1.00, ubc=0.67)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.99)
│ │ ├── (a=1, N=3, Q_v=0.33, best=1.00, ubc=0.90)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.99)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.99)
│ └── (a=3, N=26, Q_v=0.65, best=1.00, ubc=0.93)
│ ├── (a=0, N=19, Q_v=0.84, best=1.00, ubc=1.13)
│ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.90)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.90)
└── (a=3, N=10, Q_v=0.00, best=0.00, ubc=0.47)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.62)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=0.76)
│ └── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.76)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.76)
INFO selected action 2 after 50 simulations.
INFO current action list: [0, 0, 0, 1, 2]
(a=2, N=97, Q_v=0.69, best=1.00, ubc=0.85)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.76)
├── (a=1, N=9, Q_v=0.22, best=1.00, ubc=0.73)
│ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=3, Q_v=0.33, best=1.00, ubc=0.94)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=2, N=1, Q_v=1.00, best=1.00, ubc=1.74)
│ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.59)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.05)
├── (a=2, N=7, Q_v=0.14, best=1.00, ubc=0.71)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.99)
│ ├── (a=1, N=3, Q_v=0.33, best=1.00, ubc=0.90)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.99)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.99)
└── (a=3, N=76, Q_v=0.83, best=1.00, ubc=1.00)
├── (a=0, N=69, Q_v=0.90, best=1.00, ubc=1.08)
│ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=1.03)
│ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=1.03)
│ ├── (a=2, N=6, Q_v=0.50, best=1.00, ubc=1.09)
│ └── (a=3, N=58, Q_v=1.00, best=1.00, ubc=1.19)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=1.04)
├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=1.04)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=1.04)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.59)
INFO selected action 3 after 50 simulations.
INFO current action list: [0, 0, 0, 1, 2, 3]
(a=3, N=126, Q_v=0.87, best=1.00, ubc=1.01)
├── (a=0, N=116, Q_v=0.94, best=1.00, ubc=1.08)
│ ├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=1.09)
│ │ └── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.59)
│ ├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=1.09)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
│ ├── (a=2, N=6, Q_v=0.50, best=1.00, ubc=1.13)
│ │ ├── (a=0, N=2, Q_v=0.50, best=1.00, ubc=1.17)
│ │ ├── (a=1, N=1, Q_v=1.00, best=1.00, ubc=1.95)
│ │ ├── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.95)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.95)
│ └── (a=3, N=105, Q_v=1.00, best=1.00, ubc=1.15)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.90)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=3, N=3, Q_v=0.00, best=0.00, ubc=0.90)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.74)
[16:43:10] INFO selected action 0 after 50 simulations.
INFO current action list: [0, 0, 0, 1, 2, 3, 0]
(a=0, N=166, Q_v=0.95, best=1.00, ubc=1.07)
├── (a=0, N=2, Q_v=0.00, best=0.00, ubc=1.13)
│ └── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=1, N=2, Q_v=0.00, best=0.00, ubc=1.13)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
├── (a=2, N=28, Q_v=0.82, best=1.00, ubc=1.12)
│ ├── (a=0, N=3, Q_v=0.33, best=1.00, ubc=1.08)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ │ └── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.74)
│ ├── (a=1, N=21, Q_v=1.00, best=1.00, ubc=1.28)
│ ├── (a=2, N=2, Q_v=0.00, best=0.00, ubc=0.91)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.59)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=1.29)
└── (a=3, N=133, Q_v=1.00, best=1.00, ubc=1.14)
INFO selected action 3 after 50 simulations.
INFO current action list: [0, 0, 0, 1, 2, 3, 0, 3]
INFO Final action list: [0, 0, 0, 1, 2, 3, 0, 3]
SFFF
FHFH
FFFH
HFFG
(Left)
SFFF
FHFH
FFFH
HFFG
(Left)
SFFF
FHFH
FFFH
HFFG
(Left)
SFFF
FHFH
FFFH
HFFG
(Down)
SFFF
FHFH
FFFH
HFFG
(Right)
SFFF
FHFH
FFFH
HFFG
(Up)
SFFF
FHFH
FFFH
HFFG
(Left)
SFFF
FHFH
FFFH
HFFG
(Up)
SFFF
FHFH
FFFH
HFFG
Environment solved in 8 steps.