MCTS Tree Visualisation

[6]:
import gymnasium as gym
[7]:
from gymcts.gymcts_agent import GymctsAgent
from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper
[8]:
from gymcts.logger import log
[9]:
# set log level to 20 (INFO)
# set log level to 10 (DEBUG) to see more detailed information
log.setLevel(20)
[10]:
if __name__ == '__main__':
    # create the environment
    env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="ansi")
    env.reset()

    # wrap the environment with the naive wrapper or a custom gymcts wrapper
    env = ActionHistoryMCTSGymEnvWrapper(env)

    # create the agent
    agent = GymctsAgent(
        env=env,
        clear_mcts_tree_after_step=False,
        render_tree_after_step=False,
        number_of_simulations_per_step=50,
        exclude_unvisited_nodes_from_render=True,  # weather to exclude unvisited nodes from the render
        render_tree_max_depth=2  # the maximum depth of the tree to render
    )

    # solve the environment
    actions = agent.solve()

    # render the MCTS tree from the root
    # search_root_node is the node that corresponds to the current state of the environment in the search process
    # since we called agent.solve() we are at the end of the search process
    log.info(f"MCTS Tree starting at the final state of the environment (actions: {agent.search_root_node.state})")
    agent.show_mcts_tree(
        start_node=agent.search_root_node,
    )

    # the parent of the terminal node (which we are rendering below) is the search root node of the previous step in the
    # MCTS solving process
    log.info(
        f"MCTS Tree starting at the pre-final state of the environment (actions: {agent.search_root_node.parent.state})")
    agent.show_mcts_tree(
        start_node=agent.search_root_node.parent,
    )

    # render the MCTS tree from the root
    log.info(f"MCTS Tree starting at the root state (actions: {agent.search_root_node.get_root().state})")
    agent.show_mcts_tree(
        start_node=agent.search_root_node.get_root(),
        # you can limit the depth of the tree to render to any number
        tree_max_depth=1
    )
[16:46:49] INFO     selected action 0 after 50 simulations.
           INFO     current action list: [0]
           INFO     selected action 0 after 50 simulations.
           INFO     current action list: [0, 0]
           INFO     selected action 0 after 50 simulations.
           INFO     current action list: [0, 0, 0]
           INFO     selected action 0 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0]
           INFO     selected action 0 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 0]
           INFO     selected action 2 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 0, 2]
           INFO     selected action 2 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 0, 2, 2]
           INFO     selected action 1 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 0, 2, 2, 1]
           INFO     selected action 0 after 50 simulations.
           INFO     current action list: [0, 0, 0, 0, 0, 2, 2, 1, 0]
           INFO     Final action list: [0, 0, 0, 0, 0, 2, 2, 1, 0]
           INFO     MCTS Tree starting at the final state of the environment (actions: [0, 0, 0, 0, 0, 2, 2, 1, 0])
(a=0, N=18, Q_v=0.00, best=0.00, ubc=0.34)
           INFO     MCTS Tree starting at the pre-final state of the environment (actions: [0, 0, 0, 0, 0, 2, 2,
                    1])
(a=1, N=70, Q_v=0.01, best=1.00, ubc=0.20)
├── (a=0, N=18, Q_v=0.00, best=0.00, ubc=0.34)
├── (a=1, N=17, Q_v=0.00, best=0.00, ubc=0.35)
├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
└── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
└── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=17, Q_v=0.00, best=0.00, ubc=0.35)
└── (a=3, N=17, Q_v=0.00, best=0.00, ubc=0.35)
    ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
           INFO     MCTS Tree starting at the root state (actions: [])
(N=450, Q_v=0.01, best=1.00)
├── (a=0, N=413, Q_v=0.01, best=1.00, ubc=0.10)
│   ├── (a=0, N=366, Q_v=0.01, best=1.00, ubc=0.10)
│   ├── (a=1, N=16, Q_v=0.00, best=0.00, ubc=0.43)
│   ├── (a=2, N=15, Q_v=0.00, best=0.00, ubc=0.45)
│   └── (a=3, N=15, Q_v=0.00, best=0.00, ubc=0.45)
├── (a=1, N=12, Q_v=0.00, best=0.00, ubc=0.50)
│   ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
├── (a=2, N=12, Q_v=0.00, best=0.00, ubc=0.50)
│   ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
└── (a=3, N=12, Q_v=0.00, best=0.00, ubc=0.50)
    ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
    ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
    ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
    └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)