MCTS Tree Visualisation¶

[6]:

import gymnasium as gym

[7]:

from gymcts.gymcts_agent import GymctsAgent
from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper

[8]:

from gymcts.logger import log

[9]:

# set log level to 20 (INFO)
# set log level to 10 (DEBUG) to see more detailed information
log.setLevel(20)

[10]:

if __name__ == '__main__':
    # create the environment
    env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="ansi")
    env.reset()

    # wrap the environment with the naive wrapper or a custom gymcts wrapper
    env = ActionHistoryMCTSGymEnvWrapper(env)

    # create the agent
    agent = GymctsAgent(
        env=env,
        clear_mcts_tree_after_step=False,
        render_tree_after_step=False,
        number_of_simulations_per_step=50,
        exclude_unvisited_nodes_from_render=True,  # weather to exclude unvisited nodes from the render
        render_tree_max_depth=2  # the maximum depth of the tree to render
    )

    # solve the environment
    actions = agent.solve()

    # render the MCTS tree from the root
    # search_root_node is the node that corresponds to the current state of the environment in the search process
    # since we called agent.solve() we are at the end of the search process
    log.info(f"MCTS Tree starting at the final state of the environment (actions: {agent.search_root_node.state})")
    agent.show_mcts_tree(
        start_node=agent.search_root_node,
    )

    # the parent of the terminal node (which we are rendering below) is the search root node of the previous step in the
    # MCTS solving process
    log.info(
        f"MCTS Tree starting at the pre-final state of the environment (actions: {agent.search_root_node.parent.state})")
    agent.show_mcts_tree(
        start_node=agent.search_root_node.parent,
    )

    # render the MCTS tree from the root
    log.info(f"MCTS Tree starting at the root state (actions: {agent.search_root_node.get_root().state})")
    agent.show_mcts_tree(
        start_node=agent.search_root_node.get_root(),
        # you can limit the depth of the tree to render to any number
        tree_max_depth=1
    )

[16:46:49] INFO     selected action 0 after 50 simulations.

           INFO     current action list: [0]

           INFO     selected action 0 after 50 simulations.

           INFO     current action list: [0, 0]

           INFO     selected action 0 after 50 simulations.

           INFO     current action list: [0, 0, 0]

           INFO     selected action 0 after 50 simulations.

           INFO     current action list: [0, 0, 0, 0]

           INFO     selected action 0 after 50 simulations.

           INFO     current action list: [0, 0, 0, 0, 0]

           INFO     selected action 2 after 50 simulations.

           INFO     current action list: [0, 0, 0, 0, 0, 2]

           INFO     selected action 2 after 50 simulations.

           INFO     current action list: [0, 0, 0, 0, 0, 2, 2]

           INFO     selected action 1 after 50 simulations.

           INFO     current action list: [0, 0, 0, 0, 0, 2, 2, 1]

           INFO     selected action 0 after 50 simulations.

           INFO     current action list: [0, 0, 0, 0, 0, 2, 2, 1, 0]

           INFO     Final action list: [0, 0, 0, 0, 0, 2, 2, 1, 0]

           INFO     MCTS Tree starting at the final state of the environment (actions: [0, 0, 0, 0, 0, 2, 2, 1, 0])

(a=0, N=18, Q_v=0.00, best=0.00, ubc=0.34)

           INFO     MCTS Tree starting at the pre-final state of the environment (actions: [0, 0, 0, 0, 0, 2, 2,
                    1])

(a=1, N=70, Q_v=0.01, best=1.00, ubc=0.20)
├── (a=0, N=18, Q_v=0.00, best=0.00, ubc=0.34)
├── (a=1, N=17, Q_v=0.00, best=0.00, ubc=0.35)
│   ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│   │   ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│   │   ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│   │   └── (a=2, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│   ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│   │   ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│   │   ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│   │   └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│   ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│   └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
│       ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│       ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
│       └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
├── (a=2, N=17, Q_v=0.00, best=0.00, ubc=0.35)
└── (a=3, N=17, Q_v=0.00, best=0.00, ubc=0.35)
    ├── (a=0, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    │   ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    │   ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    │   └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=1, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    │   ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    │   ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    │   └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    ├── (a=2, N=4, Q_v=0.00, best=0.00, ubc=0.60)
    │   ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    │   ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    │   └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)
    └── (a=3, N=4, Q_v=0.00, best=0.00, ubc=0.60)
        ├── (a=0, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        ├── (a=1, N=1, Q_v=0.00, best=0.00, ubc=0.83)
        └── (a=3, N=1, Q_v=0.00, best=0.00, ubc=0.83)

           INFO     MCTS Tree starting at the root state (actions: [])

(N=450, Q_v=0.01, best=1.00)
├── (a=0, N=413, Q_v=0.01, best=1.00, ubc=0.10)
│   ├── (a=0, N=366, Q_v=0.01, best=1.00, ubc=0.10)
│   ├── (a=1, N=16, Q_v=0.00, best=0.00, ubc=0.43)
│   ├── (a=2, N=15, Q_v=0.00, best=0.00, ubc=0.45)
│   └── (a=3, N=15, Q_v=0.00, best=0.00, ubc=0.45)
├── (a=1, N=12, Q_v=0.00, best=0.00, ubc=0.50)
│   ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
├── (a=2, N=12, Q_v=0.00, best=0.00, ubc=0.50)
│   ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
│   └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)
└── (a=3, N=12, Q_v=0.00, best=0.00, ubc=0.50)
    ├── (a=0, N=3, Q_v=0.00, best=0.00, ubc=0.64)
    ├── (a=1, N=3, Q_v=0.00, best=0.00, ubc=0.64)
    ├── (a=2, N=3, Q_v=0.00, best=0.00, ubc=0.64)
    └── (a=3, N=2, Q_v=0.00, best=0.00, ubc=0.79)