{ "cells": [ { "cell_type": "markdown", "id": "cdf95c4d", "metadata": {}, "source": [ "# FrozenLake 6x6" ] }, { "cell_type": "code", "execution_count": 1, "id": "7c1861a0", "metadata": { "ExecuteTime": { "end_time": "2025-03-04T15:47:16.929681Z", "start_time": "2025-03-04T15:47:16.819817Z" } }, "outputs": [], "source": [ "import gymnasium as gym" ] }, { "cell_type": "code", "execution_count": 2, "id": "5e2b1e8c", "metadata": { "ExecuteTime": { "end_time": "2025-03-04T15:47:16.960091Z", "start_time": "2025-03-04T15:47:16.930773Z" } }, "outputs": [], "source": [ "from gymcts.gymcts_agent import GymctsAgent\n", "from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper" ] }, { "cell_type": "code", "execution_count": 3, "id": "939b8006", "metadata": { "ExecuteTime": { "end_time": "2025-03-04T15:47:16.962718Z", "start_time": "2025-03-04T15:47:16.960880Z" } }, "outputs": [], "source": [ "from gymcts.logger import log" ] }, { "cell_type": "code", "execution_count": 4, "id": "1139a220", "metadata": { "ExecuteTime": { "end_time": "2025-03-04T15:47:16.965129Z", "start_time": "2025-03-04T15:47:16.963303Z" } }, "outputs": [], "source": [ "log.setLevel(20)" ] }, { "cell_type": "code", "execution_count": 5, "id": "503eb755", "metadata": { "ExecuteTime": { "end_time": "2025-03-04T15:47:31.112138Z", "start_time": "2025-03-04T15:47:16.966014Z" } }, "outputs": [ { "data": { "text/html": [ "
[16:47:17] INFO     selected action 1 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:17]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m1\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1]                                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
[16:47:18] INFO     selected action 1 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:18]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m1\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1]                                                                    \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
[16:47:19] INFO     selected action 0 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:19]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m0\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0]                                                                 \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     selected action 0 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m0\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0]                                                              \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
[16:47:20] INFO     selected action 1 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:20]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m1\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1]                                                           \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
[16:47:21] INFO     selected action 2 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:21]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m2\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2]                                                        \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     selected action 0 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m0\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0]                                                     \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
[16:47:22] INFO     selected action 2 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:22]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m2\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2]                                                  \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
[16:47:23] INFO     selected action 1 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:23]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m1\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1]                                               \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
[16:47:24] INFO     selected action 3 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:24]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m3\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3]                                            \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     selected action 1 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m1\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1]                                         \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
[16:47:25] INFO     selected action 0 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:25]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m0\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0]                                      \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
[16:47:26] INFO     selected action 3 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:26]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m3\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3]                                   \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     selected action 2 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m2\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2]                                \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
[16:47:27] INFO     selected action 1 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:27]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m1\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1]                             \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
[16:47:28] INFO     selected action 2 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:28]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m2\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2]                          \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     selected action 1 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m1\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1]                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
[16:47:29] INFO     selected action 0 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:29]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m0\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0]                    \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
[16:47:30] INFO     selected action 3 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:30]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m3\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0, 3]                 \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     selected action 2 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m2\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0, 3, 2]              \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     selected action 1 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m1\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0, 3, 2, 1]           \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     selected action 0 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m0\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0, 3, 2, 1, 0]        \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     selected action 2 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m2\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0, 3, 2, 1, 0, 2]     \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
[16:47:31] INFO     selected action 1 after 200 simulations.                                                       \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m[16:47:31]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m selected action \u001B[1;36m1\u001B[0m after \u001B[1;36m200\u001B[0m simulations. \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0, 3, 2, 1, 0, 2, 1]  \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m current action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
           INFO     Final action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0, 3, 2, 1, 0, 2, 1]    \n",
       "
\n" ], "text/plain": [ "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Final action list: \u001B[1m[\u001B[0m\u001B[1;36m1\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m3\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m, \u001B[1;36m0\u001B[0m, \u001B[1;36m2\u001B[0m, \u001B[1;36m1\u001B[0m\u001B[1m]\u001B[0m \n" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "\u001B[41mS\u001B[0mFFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Down)\n", "S\u001B[41mF\u001B[0mFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Down)\n", "\u001B[41mS\u001B[0mFFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Left)\n", "\u001B[41mS\u001B[0mFFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Left)\n", "\u001B[41mS\u001B[0mFFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Down)\n", "\u001B[41mS\u001B[0mFFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Right)\n", "S\u001B[41mF\u001B[0mFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Left)\n", "\u001B[41mS\u001B[0mFFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Right)\n", "S\u001B[41mF\u001B[0mFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Down)\n", "SFFFFF\n", "F\u001B[41mF\u001B[0mFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Up)\n", "SFFFFF\n", "\u001B[41mF\u001B[0mFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Down)\n", "SFFFFF\n", "\u001B[41mF\u001B[0mFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Left)\n", "SFFFFF\n", "\u001B[41mF\u001B[0mFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Up)\n", "\u001B[41mS\u001B[0mFFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Right)\n", "SFFFFF\n", "\u001B[41mF\u001B[0mFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Down)\n", "SFFFFF\n", "\u001B[41mF\u001B[0mFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Right)\n", "SFFFFF\n", "F\u001B[41mF\u001B[0mFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Down)\n", "SFFFFF\n", "FFFFFF\n", "F\u001B[41mF\u001B[0mFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Left)\n", "SFFFFF\n", "FFFFFF\n", "FFFHFF\n", "F\u001B[41mF\u001B[0mFFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Up)\n", "SFFFFF\n", "FFFFFF\n", "FFFHFF\n", "FF\u001B[41mF\u001B[0mFFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Right)\n", "SFFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFF\u001B[41mF\u001B[0mFH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Down)\n", "SFFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFFF\u001B[41mF\u001B[0mH\n", "FHFFFF\n", "FFFFFG\n", "\n", " (Left)\n", "SFFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFF\u001B[41mF\u001B[0mF\n", "FFFFFG\n", "\n", " (Right)\n", "SFFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFF\u001B[41mF\u001B[0m\n", "FFFFFG\n", "\n", " (Down)\n", "SFFFFF\n", "FFFFFF\n", "FFFHFF\n", "FFFFFH\n", "FHFFFF\n", "FFFFF\u001B[41mG\u001B[0m\n" ] } ], "source": [ "if __name__ == '__main__':\n", " log.debug(\"Starting example\")\n", "\n", " # 0. create the environment\n", " custom_map = [\n", " \"SFFFFF\",\n", " \"FFFFFF\",\n", " \"FFFHFF\",\n", " \"FFFFFH\",\n", " \"FHFFFF\",\n", " \"FFFFFG\"\n", " ]\n", " env = gym.make(\n", " 'FrozenLake-v1',\n", " desc=custom_map,\n", " map_name=None,\n", " is_slippery=True,\n", " render_mode=\"ansi\"\n", " )\n", " env.reset()\n", "\n", " # 1. wrap the environment with the naive wrapper or a custom gymcts wrapper\n", " env = DeepCopyMCTSGymEnvWrapper(env)\n", "\n", " # 2. create the agent\n", " agent = GymctsAgent(env=env, clear_mcts_tree_after_step=False)\n", "\n", " # 3. solve the environment\n", " actions = agent.solve(num_simulations_per_step=200)\n", "\n", " # 4. render the environment solution in the terminal\n", " print(env.render())\n", " for a in actions:\n", " obs, rew, term, trun, info = env.step(a)\n", " print(env.render())" ] } ], "metadata": { "jupytext": { "cell_metadata_filter": "-all", "main_language": "python", "notebook_metadata_filter": "-all" }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 5 }