Files
evotraders/tuner/frozen_lake/_utils.py
2026-01-19 12:25:13 +08:00

210 lines
5.7 KiB
Python

# -*- coding: utf-8 -*-
"""
Utils for the FrozenLake environment.
Modified from rllm
"""
from typing import Literal, Optional, Tuple
import numpy as np
from pydantic import BaseModel, Field
# Map gym state in integer
MAP_LOOKUP = {
b"P": 0,
b"F": 1,
b"H": 2,
b"G": 3,
}
# Define rules to transform to rendered text observation of the environment
GRID_LOOKUP = {
0: " P \t", # player
1: " _ \t", # frozen
2: " O \t", # hole
3: " G \t", # goal
4: " X \t", # player fall into hole
5: "\t", # player on goal
}
ACTION_LOOKUP = {
0: "None",
1: "Left",
2: "Down",
3: "Right",
4: "Up",
}
# Prompting format inspired by the RAGEN project
SYSTEM_PROMPT = """You are Qwen, created by Alibaba Cloud. \
You are a helpful assistant. You are walking on a frozen lake.
FrozenLake Quick Guide
Goal: Reach the goal (G). Player (P) and Goal (G) must overlap.
Symbols:
_ Frozen | O Hole | G Goal | P Player
Rules:
1. Avoid falling into holes (O).
2. Frozen tiles are slippery, you may move perpendicular to
your intended direction.
Valid Action (separated by | ):
Up | Down | Left | Right
Rewards:
Fall into hole: 0
Reach goal: +1.0
You will be provided the current observation, please decide on
the next Action.
You should show your thought process and then input the final
action in ``` ```.
You should only output the NEXT ACTION at each iteration in
the ``` ```. For example, if you want to move up, you should
output ```Up```.
You should plan ahead and need to achieve it in minimum number
of steps.
You should be aware that frozen tiles can be slippery, but the
chance is small and you should not overthink it.
Please show your thinking process and put the final action in
``` ```. In every turn, the final action MUST be one of Up,
Down, Left, Right.
"""
class FrozenLakeAction(BaseModel):
"""Action model for FrozenLake environment."""
action: Literal["Up", "Down", "Left", "Right"] = Field(
description=(
"The action to take in the FrozenLake environment, "
"must be one of Up, Down, Left, Right"
),
)
def is_valid(board: list[list[str]], max_size: int, max_steps: int) -> bool:
"""DFS to check that it's a valid path.
Args:
board: The board representation as a list of lists.
max_size: Maximum size of the board.
max_steps: Maximum number of steps allowed.
Returns:
True if there's a valid path from start to goal within max_steps,
False otherwise.
"""
frontier, discovered = [], set()
# find the start point
start_r, start_c = np.where(np.array(board) == "S")
frontier.append((start_r[0], start_c[0], 0)) # row, col steps
# dfs to check if there is a path from start to goal
while frontier:
r, c, steps = frontier.pop()
if steps > max_steps:
continue
if (r, c) not in discovered:
discovered.add((r, c))
directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
for x, y in directions:
r_new = r + x
c_new = c + y
if (
r_new < 0
or r_new >= max_size
or c_new < 0
or c_new >= max_size
): # noqa: PLR2004
continue
if board[r_new][c_new] == "G":
return True
if board[r_new][c_new] != "H":
frontier.append((r_new, c_new, steps + 1))
return False
def generate_random_map(
size: int = 8,
p: float = 0.8,
seed: int = 0,
max_steps: int = 5,
) -> Tuple[list[str], Tuple[int, int]]:
"""Generates a random valid map (one that has a path from start to goal).
Args:
size: Size of each side of the grid.
p: Probability that a tile is frozen.
seed: Seed to ensure the generation of reproducible maps.
max_steps: Maximum number of steps allowed.
Returns:
A tuple containing a random valid map and the goal position (row, col).
"""
valid = False
board: list[list[str]] = [] # initialize to make pyright happy
try:
from gymnasium.utils import seeding
np_random, _ = seeding.np_random(seed)
except ImportError as exc:
raise ImportError(
"Gymnasium is not installed. "
"Please install gymnasium first before "
"running the frozen_lake workflow.",
) from exc
# generate random start and end points
while not valid:
p = min(1, p)
board = np_random.choice(
["F", "H"],
(size, size),
p=[p, 1 - p],
).tolist()
while True:
start_r = int(np_random.integers(0, size))
start_c = int(np_random.integers(0, size))
goal_r = int(np_random.integers(0, size))
goal_c = int(np_random.integers(0, size))
# Ensure start and goal are different positions
if (start_r, start_c) != (goal_r, goal_c):
break
board[start_r][start_c] = "S"
board[goal_r][goal_c] = "G"
valid = is_valid(board, size, max_steps)
return ["".join(x) for x in board], (goal_r, goal_c)
def get_goal_position(
random_map: np.ndarray,
) -> Optional[Tuple[int, int]]:
"""Get the goal position from a random map.
Args:
random_map: The map as a numpy array.
Returns:
Tuple of (row, col) if goal found, None otherwise.
"""
positions = np.argwhere(random_map == b"G")
if positions.size == 0:
return None # G not found
return tuple(positions[0]) # returns (row, col)
__all__ = [
"SYSTEM_PROMPT",
"FrozenLakeAction",
"generate_random_map",
"get_goal_position",
]