MCTS Tree Visualisation¶
[6]:
import gymnasium as gym
[7]:
from gymcts.gymcts_agent import GymctsAgent
from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
[8]:
from gymcts.logger import log
[9]:
# set log level to 20 (INFO)
# set log level to 10 (DEBUG) to see more detailed information
log.setLevel(20)
[10]:
if __name__ == '__main__':
# create the environment
env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="ansi")
env.reset()
# wrap the environment with the naive wrapper or a custom gymcts wrapper
env = ActionHistoryMCTSGymEnvWrapper(env)
# create the agent
agent = GymctsAgent(
env=env,
clear_mcts_tree_after_step=False,
render_tree_after_step=False,
number_of_simulations_per_step=50,
exclude_unvisited_nodes_from_render=True, # weather to exclude unvisited nodes from the render
render_tree_max_depth=2 # the maximum depth of the tree to render
)
# solve the environment
actions = agent.solve()
# render the MCTS tree from the root
# search_root_node is the node that corresponds to the current state of the environment in the search process
# since we called agent.solve() we are at the end of the search process
log.info(f"MCTS Tree starting at the final state of the environment (actions: {agent.search_root_node.state})")
agent.show_mcts_tree(
start_node=agent.search_root_node,
)
# the parent of the terminal node (which we are rendering below) is the search root node of the previous step in the
# MCTS solving process
log.info(
f"MCTS Tree starting at the pre-final state of the environment (actions: {agent.search_root_node.parent.state})")
agent.show_mcts_tree(
start_node=agent.search_root_node.parent,
)
# render the MCTS tree from the root
log.info(f"MCTS Tree starting at the root state (actions: {agent.search_root_node.get_root().state})")
agent.show_mcts_tree(
start_node=agent.search_root_node.get_root(),
# you can limit the depth of the tree to render to any number
tree_max_depth=1
)
[16:46:49] INFO selected action 0 after 50 simulations.
INFO current action list: [0]
INFO selected action 0 after 50 simulations.
INFO current action list: [0, 0]
INFO selected action 0 after 50 simulations.
INFO current action list: [0, 0, 0]
INFO selected action 0 after 50 simulations.
INFO current action list: [0, 0, 0, 0]
INFO selected action 0 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 0]
INFO selected action 2 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 0, 2]
INFO selected action 2 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 0, 2, 2]
INFO selected action 1 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 0, 2, 2, 1]
INFO selected action 0 after 50 simulations.
INFO current action list: [0, 0, 0, 0, 0, 2, 2, 1, 0]
INFO Final action list: [0, 0, 0, 0, 0, 2, 2, 1, 0]
INFO MCTS Tree starting at the final state of the environment (actions: [0, 0, 0, 0, 0, 2, 2, 1, 0])
(a=0, N=18, Q_v=0.00, best=0.00, ubc=0.34)
INFO MCTS Tree starting at the pre-final state of the environment (actions: [0, 0, 0, 0, 0, 2, 2, 1])
(a=1, N=70, Q_v=0.01, best=1.00, ubc=0.20)
├── (a=0, N=18, Q_v=0.00, best=0.00, ubc=0.34)
├── (a=1, N=17, Q_v=0.00, best=0.00, ubc=0.35)
│ ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ │ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ │ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=17, Q_v=0.00, best=0.00, ubc=0.35)
└── (a=3, N=17, Q_v=0.00, best=0.00, ubc=0.35)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│ ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│ └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
INFO MCTS Tree starting at the root state (actions: [])
(N=450, Q_v=0.01, best=1.00)
├── (a=0, N=413, Q_v=0.01, best=1.00, ubc=0.10)
│ ├── (a=0, N=366, Q_v=0.01, best=1.00, ubc=0.10)
│ ├── (a=1, N=16, Q_v=0.00, best=0.00, ubc=0.43)
│ ├── (a=2, N=15, Q_v=0.00, best=0.00, ubc=0.45)
│ └── (a=3, N=15, Q_v=0.00, best=0.00, ubc=0.45)
├── (a=1, N=12, Q_v=0.00, best=0.00, ubc=0.50)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
├── (a=2, N=12, Q_v=0.00, best=0.00, ubc=0.50)
│ ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│ └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
└── (a=3, N=12, Q_v=0.00, best=0.00, ubc=0.50)
├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
└── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)