FrozenLake 6x6ΒΆ
[1]:
import gymnasium as gym
[2]:
from gymcts.gymcts_agent import GymctsAgent
from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper
[3]:
from gymcts.logger import log
[4]:
log.setLevel(20)
[5]:
if __name__ == '__main__':
log.debug("Starting example")
# 0. create the environment
custom_map = [
"SFFFFF",
"FFFFFF",
"FFFHFF",
"FFFFFH",
"FHFFFF",
"FFFFFG"
]
env = gym.make(
'FrozenLake-v1',
desc=custom_map,
map_name=None,
is_slippery=True,
render_mode="ansi"
)
env.reset()
# 1. wrap the environment with the naive wrapper or a custom gymcts wrapper
env = DeepCopyMCTSGymEnvWrapper(env)
# 2. create the agent
agent = GymctsAgent(env=env, clear_mcts_tree_after_step=False)
# 3. solve the environment
actions = agent.solve(num_simulations_per_step=200)
# 4. render the environment solution in the terminal
print(env.render())
for a in actions:
obs, rew, term, trun, info = env.step(a)
print(env.render())
[16:47:17] INFO selected action 1 after 200 simulations.
INFO current action list: [1]
[16:47:18] INFO selected action 1 after 200 simulations.
INFO current action list: [1, 1]
[16:47:19] INFO selected action 0 after 200 simulations.
INFO current action list: [1, 1, 0]
INFO selected action 0 after 200 simulations.
INFO current action list: [1, 1, 0, 0]
[16:47:20] INFO selected action 1 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1]
[16:47:21] INFO selected action 2 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2]
INFO selected action 0 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0]
[16:47:22] INFO selected action 2 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2]
[16:47:23] INFO selected action 1 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1]
[16:47:24] INFO selected action 3 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3]
INFO selected action 1 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1]
[16:47:25] INFO selected action 0 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0]
[16:47:26] INFO selected action 3 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3]
INFO selected action 2 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2]
[16:47:27] INFO selected action 1 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1]
[16:47:28] INFO selected action 2 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2]
INFO selected action 1 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1]
[16:47:29] INFO selected action 0 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0]
[16:47:30] INFO selected action 3 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0, 3]
INFO selected action 2 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0, 3, 2]
INFO selected action 1 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0, 3, 2, 1]
INFO selected action 0 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0, 3, 2, 1, 0]
INFO selected action 2 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0, 3, 2, 1, 0, 2]
[16:47:31] INFO selected action 1 after 200 simulations.
INFO current action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0, 3, 2, 1, 0, 2, 1]
INFO Final action list: [1, 1, 0, 0, 1, 2, 0, 2, 1, 3, 1, 0, 3, 2, 1, 2, 1, 0, 3, 2, 1, 0, 2, 1]
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Down)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Down)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Left)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Left)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Down)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Right)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Left)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Right)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Down)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Up)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Down)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Left)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Up)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Right)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Down)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Right)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Down)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Left)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Up)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Right)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Down)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Left)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Right)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG
(Down)
SFFFFF
FFFFFF
FFFHFF
FFFFFH
FHFFFF
FFFFFG