{ "cells": [ { "cell_type": "markdown", "id": "2f7b7f4e", "metadata": {}, "source": [ "# MCTS Tree Visualisation" ] }, { "cell_type": "code", "execution_count": 6, "id": "6e9b847e", "metadata": { "ExecuteTime": { "end_time": "2025-03-04T15:46:49.560667Z", "start_time": "2025-03-04T15:46:49.558216Z" } }, "outputs": [], "source": [ "import gymnasium as gym" ] }, { "cell_type": "code", "execution_count": 7, "id": "c27bdda7", "metadata": { "ExecuteTime": { "end_time": "2025-03-04T15:46:49.563541Z", "start_time": "2025-03-04T15:46:49.561642Z" } }, "outputs": [], "source": [ "from gymcts.gymcts_agent import GymctsAgent\n", "from gymcts.gymcts_action_history_wrapper import ActionHistoryMCTSGymEnvWrapper" ] }, { "cell_type": "code", "execution_count": 8, "id": "674bc17e", "metadata": { "ExecuteTime": { "end_time": "2025-03-04T15:46:49.567009Z", "start_time": "2025-03-04T15:46:49.564439Z" } }, "outputs": [], "source": [ "from gymcts.logger import log" ] }, { "cell_type": "code", "execution_count": 9, "id": "603b77b8", "metadata": { "ExecuteTime": { "end_time": "2025-03-04T15:46:49.571891Z", "start_time": "2025-03-04T15:46:49.568981Z" } }, "outputs": [], "source": [ "# set log level to 20 (INFO)\n", "# set log level to 10 (DEBUG) to see more detailed information\n", "log.setLevel(20)" ] }, { "cell_type": "code", "execution_count": 10, "id": "2a97e0c7", "metadata": { "ExecuteTime": { "end_time": "2025-03-04T15:46:49.705612Z", "start_time": "2025-03-04T15:46:49.574239Z" } }, "outputs": [ { "data": { "text/html": [ "
[16:46:49] INFO selected action 0 after 50 simulations. \n", "\n" ], "text/plain": [ "\u001B[2;36m[16:46:49]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m0\u001B[0m after \u001B[1;36m50\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO current action list: [0] \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m0\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO selected action 0 after 50 simulations. \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m0\u001B[0m after \u001B[1;36m50\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO current action list: [0, 0] \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO selected action 0 after 50 simulations. \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m0\u001B[0m after \u001B[1;36m50\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO current action list: [0, 0, 0] \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO selected action 0 after 50 simulations. \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m0\u001B[0m after \u001B[1;36m50\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO current action list: [0, 0, 0, 0] \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO selected action 0 after 50 simulations. \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m0\u001B[0m after \u001B[1;36m50\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO current action list: [0, 0, 0, 0, 0] \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO selected action 2 after 50 simulations. \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m2\u001B[0m after \u001B[1;36m50\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO current action list: [0, 0, 0, 0, 0, 2] \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO selected action 2 after 50 simulations. \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m2\u001B[0m after \u001B[1;36m50\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO current action list: [0, 0, 0, 0, 0, 2, 2] \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m2\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO selected action 1 after 50 simulations. \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m1\u001B[0m after \u001B[1;36m50\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO current action list: [0, 0, 0, 0, 0, 2, 2, 1] \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO selected action 0 after 50 simulations. \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m0\u001B[0m after \u001B[1;36m50\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO current action list: [0, 0, 0, 0, 0, 2, 2, 1, 0] \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO Final action list: [0, 0, 0, 0, 0, 2, 2, 1, 0] \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Final action list: \u001B[1m[\u001B[0m\u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
INFO MCTS Tree starting at the final state of the environment (actions: [0, 0, 0, 0, 0, 2, 2, 1, 0])\n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m MCTS Tree starting at the final state of the environment \u001B[1m(\u001B[0mactions: \u001B[1m[\u001B[0m\u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m\u001B[1m]\u001B[0m\u001B[1m)\u001B[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m18\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.34\u001B[0m)\n" ] }, { "data": { "text/html": [ "
INFO MCTS Tree starting at the pre-final state of the environment (actions: [0, 0, 0, 0, 0, 2, 2, \n", " 1]) \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m MCTS Tree starting at the pre-final state of the environment \u001B[1m(\u001B[0mactions: \u001B[1m[\u001B[0m\u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m2\u001B[0m, \n", "\u001B[2;36m \u001B[0m \u001B[1;36m1\u001B[0m\u001B[1m]\u001B[0m\u001B[1m)\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m70\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;252;101;128m0.01\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[96m1.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.20\u001B[0m)\n", "\u001B[38;2;0;180;235m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m18\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.34\u001B[0m)\n", "\u001B[38;2;0;180;235m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m17\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.35\u001B[0m)\n", "\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m4\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.60\u001B[0m)\n", "\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;127;0;255m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;127;0;255m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;127;0;255m└── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;128;254;179m2\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m4\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.60\u001B[0m)\n", "\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m└── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;255;178;96m3\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;128;254;179m2\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m4\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.60\u001B[0m)\n", "\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m└── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;255;178;96m3\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m4\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.60\u001B[0m)\n", "\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m│ \u001B[0m\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m└── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;255;178;96m3\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;128;254;179m2\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m17\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.35\u001B[0m)\n", "\u001B[38;2;0;180;235m└── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;255;178;96m3\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m17\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.35\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m4\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.60\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m│ \u001B[0m\u001B[38;2;127;0;255m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m│ \u001B[0m\u001B[38;2;127;0;255m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m│ \u001B[0m\u001B[38;2;127;0;255m└── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;255;178;96m3\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m4\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.60\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m│ \u001B[0m\u001B[38;2;0;180;235m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m│ \u001B[0m\u001B[38;2;0;180;235m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m│ \u001B[0m\u001B[38;2;0;180;235m└── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;255;178;96m3\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;128;254;179m2\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m4\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.60\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m│ \u001B[0m\u001B[38;2;128;254;179m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m│ \u001B[0m\u001B[38;2;128;254;179m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m│ \u001B[0m\u001B[38;2;128;254;179m└── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;255;178;96m3\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m└── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;255;178;96m3\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m4\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.60\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m \u001B[0m\u001B[38;2;255;178;96m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m \u001B[0m\u001B[38;2;255;178;96m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n", "\u001B[38;2;0;180;235m \u001B[0m\u001B[38;2;255;178;96m \u001B[0m\u001B[38;2;255;178;96m└── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;255;178;96m3\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m1\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.83\u001B[0m)\n" ] }, { "data": { "text/html": [ "
INFO MCTS Tree starting at the root state (actions: []) \n", "\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m MCTS Tree starting at the root state \u001B[1m(\u001B[0mactions: \u001B[1m[\u001B[0m\u001B[1m]\u001B[0m\u001B[1m)\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "(\u001B[33mN\u001B[0m=450, \u001B[33mQ_v\u001B[0m=0.01, \u001B[33mbest\u001B[0m=1.00)\n", "├── (\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m413\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;253;100;128m0.01\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[96m1.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.10\u001B[0m)\n", "│ \u001B[38;2;127;0;255m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m366\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;253;100;128m0.01\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[96m1.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.10\u001B[0m)\n", "│ \u001B[38;2;127;0;255m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m16\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.43\u001B[0m)\n", "│ \u001B[38;2;127;0;255m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;128;254;179m2\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m15\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.45\u001B[0m)\n", "│ \u001B[38;2;127;0;255m└── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;255;178;96m3\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m15\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.45\u001B[0m)\n", "├── (\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m12\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.50\u001B[0m)\n", "│ \u001B[38;2;0;180;235m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m3\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.64\u001B[0m)\n", "│ \u001B[38;2;0;180;235m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m3\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.64\u001B[0m)\n", "│ \u001B[38;2;0;180;235m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;128;254;179m2\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m3\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.64\u001B[0m)\n", "│ \u001B[38;2;0;180;235m└── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;255;178;96m3\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m2\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.79\u001B[0m)\n", "├── (\u001B[33ma\u001B[0m=\u001B[38;2;128;254;179m2\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m12\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.50\u001B[0m)\n", "│ \u001B[38;2;128;254;179m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m3\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.64\u001B[0m)\n", "│ \u001B[38;2;128;254;179m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m3\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.64\u001B[0m)\n", "│ \u001B[38;2;128;254;179m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;128;254;179m2\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m3\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.64\u001B[0m)\n", "│ \u001B[38;2;128;254;179m└── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;255;178;96m3\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m2\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.79\u001B[0m)\n", "└── (\u001B[33ma\u001B[0m=\u001B[38;2;255;178;96m3\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m12\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.50\u001B[0m)\n", " \u001B[38;2;255;178;96m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;127;0;255m0\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m3\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.64\u001B[0m)\n", " \u001B[38;2;255;178;96m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;0;180;235m1\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m3\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.64\u001B[0m)\n", " \u001B[38;2;255;178;96m├── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;128;254;179m2\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m3\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.64\u001B[0m)\n", " \u001B[38;2;255;178;96m└── \u001B[0m(\u001B[33ma\u001B[0m=\u001B[38;2;255;178;96m3\u001B[0m, \u001B[33mN\u001B[0m=\u001B[96m2\u001B[0m, \u001B[33mQ_v\u001B[0m=\u001B[38;2;255;100;128m0.00\u001B[0m, \u001B[33mbest\u001B[0m=\u001B[31m0.00\u001B[0m, \u001B[33mubc\u001B[0m=\u001B[96m0.79\u001B[0m)\n" ] } ], "source": [ "if __name__ == '__main__':\n", " # create the environment\n", " env = gym.make('FrozenLake-v1', desc=None, map_name=\"4x4\", is_slippery=False, render_mode=\"ansi\")\n", " env.reset()\n", "\n", " # wrap the environment with the naive wrapper or a custom gymcts wrapper\n", " env = ActionHistoryMCTSGymEnvWrapper(env)\n", "\n", " # create the agent\n", " agent = GymctsAgent(\n", " env=env,\n", " clear_mcts_tree_after_step=False,\n", " render_tree_after_step=False,\n", " number_of_simulations_per_step=50,\n", " exclude_unvisited_nodes_from_render=True, # weather to exclude unvisited nodes from the render\n", " render_tree_max_depth=2 # the maximum depth of the tree to render\n", " )\n", "\n", " # solve the environment\n", " actions = agent.solve()\n", "\n", " # render the MCTS tree from the root\n", " # search_root_node is the node that corresponds to the current state of the environment in the search process\n", " # since we called agent.solve() we are at the end of the search process\n", " log.info(f\"MCTS Tree starting at the final state of the environment (actions: {agent.search_root_node.state})\")\n", " agent.show_mcts_tree(\n", " start_node=agent.search_root_node,\n", " )\n", "\n", " # the parent of the terminal node (which we are rendering below) is the search root node of the previous step in the\n", " # MCTS solving process\n", " log.info(\n", " f\"MCTS Tree starting at the pre-final state of the environment (actions: {agent.search_root_node.parent.state})\")\n", " agent.show_mcts_tree(\n", " start_node=agent.search_root_node.parent,\n", " )\n", "\n", " # render the MCTS tree from the root\n", " log.info(f\"MCTS Tree starting at the root state (actions: {agent.search_root_node.get_root().state})\")\n", " agent.show_mcts_tree(\n", " start_node=agent.search_root_node.get_root(),\n", " # you can limit the depth of the tree to render to any number\n", " tree_max_depth=1\n", " )" ] } ], "metadata": { "jupytext": { "cell_metadata_filter": "-all", "main_language": "python", "notebook_metadata_filter": "-all" }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 5 }