Quick Start Guide¶
This guide will walk you through creating your first RL agent with EldenGym.
Basic Setup¶
import eldengym
# Create environment (use eldengym.make() for registered environments)
env = eldengym.make("Margit-v0", launch_game=False)
Simple Random Agent¶
# Reset environment
observation, info = env.reset()
# observation is a dict with 'frame' and memory attributes
print(f"Observation keys: {observation.keys()}")
print(f"Frame shape: {observation['frame'].shape}")
# Run for 100 steps
for step in range(100):
# Sample random action (MultiBinary action space)
action = env.action_space.sample()
# Take action
observation, reward, terminated, truncated, info = env.step(action)
# Info contains normalized HP values
hero_hp_pct = info.get('normalized_hero_hp', 0) * 100
print(f"Step {step}: Reward={reward:.2f}, HP={hero_hp_pct:.1f}%")
# Reset if episode ends
if terminated or truncated:
observation, info = env.reset()
print("Episode ended - resetting")
env.close()
With Stable-Baselines3¶
Note: You'll need to apply wrappers to flatten the Dict observation space for SB3:
import eldengym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
# Create environment with wrappers
def make_env():
env = eldengym.make("Margit-v0", launch_game=False)
# Apply preprocessing wrappers
env = eldengym.DictResizeFrame(env, width=84, height=84)
env = eldengym.DictGrayscaleFrame(env)
env = eldengym.DictFrameStack(env, num_stack=4)
env = eldengym.NormalizeMemoryAttributes(env)
# TODO: Add FlattenDictObservation wrapper for SB3 compatibility
return env
# Create vectorized environment
env = DummyVecEnv([make_env])
# Initialize PPO agent
model = PPO(
"MultiInputPolicy", # Use MultiInputPolicy for Dict observations
env,
verbose=1,
learning_rate=3e-4,
n_steps=2048,
)
# Train the agent
model.learn(total_timesteps=100_000)
# Save the model
model.save("margit_ppo")
# Test the trained agent
obs = env.reset()
for i in range(1000):
action, _states = model.predict(obs, deterministic=True)
obs, reward, done, info = env.step(action)
if done:
obs = env.reset()
env.close()
Custom Reward Function¶
import eldengym
from eldengym.rewards import RewardFunction
class AggressiveReward(RewardFunction):
"""Reward function that encourages aggressive play."""
def calculate_reward(self, obs, info, prev_info):
reward = 0.0
if prev_info is None:
return 0.0
# Get current normalized HP values
hero_hp = info.get('normalized_hero_hp', 0)
npc_hp = info.get('normalized_npc_hp', 1)
# Previous normalized HP values
prev_hero_hp = prev_info.get('normalized_hero_hp', hero_hp)
prev_npc_hp = prev_info.get('normalized_npc_hp', npc_hp)
# Reward for damaging the boss (HP delta is negative when damaged)
npc_damage = prev_npc_hp - npc_hp
if npc_damage > 0:
reward += npc_damage * 100.0
# Penalty for taking damage
hero_damage = prev_hero_hp - hero_hp
if hero_damage > 0:
reward -= hero_damage * 50.0
return reward
def check_termination(self, obs, info):
"""End episode when player or boss dies."""
hero_hp = info.get('normalized_hero_hp', 1)
npc_hp = info.get('normalized_npc_hp', 1)
return hero_hp <= 0 or npc_hp <= 0
# Use custom reward
env = eldengym.make(
"Margit-v0",
launch_game=False,
reward_function=AggressiveReward()
)
Action Space (MultiBinary)¶
The environment uses MultiBinary action space where each element represents a key:
env = eldengym.make("Margit-v0", launch_game=False)
# Check the action keys
print(f"Action keys: {env.action_keys}")
# ['W', 'A', 'S', 'D', 'SPACE', 'E', 'Q', 'R']
# Create action (each element is 0 or 1)
action = [1, 0, 1, 0, 1, 0, 0, 0] # Press W + S + SPACE simultaneously
# Keys are toggled intelligently - only changed when state differs
obs, reward, terminated, truncated, info = env.step(action)
Keys are configured in keybinds.json and can be customized per environment.
Environment Options¶
env = eldengym.make(
"Margit-v0", # Registered environment
launch_game=False, # Don't launch if game already running
memory_attributes=[ # Memory values to poll
"HeroHp", "HeroMaxHp",
"NpcHp", "NpcMaxHp",
"HeroAnimId", "NpcAnimId"
],
frame_format="jpeg", # Frame format ('jpeg' or 'raw')
frame_quality=85, # JPEG quality (1-100)
max_steps=1000, # Max steps per episode
save_file_name="margit.sl2", # Backup save to copy on reset
save_file_dir=r"C:\...\EldenRing\...", # Save file directory
reward_function=eldengym.ScoreDeltaReward(), # Reward calculator
)
Monitoring Training¶
from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback
import eldengym
# Save model checkpoints
checkpoint_callback = CheckpointCallback(
save_freq=10000,
save_path="./checkpoints/",
name_prefix="margit_model"
)
# Create evaluation environment
eval_env = eldengym.make("Margit-v0", launch_game=False)
# Apply same wrappers as training env
eval_env = eldengym.DictResizeFrame(eval_env, 84, 84)
eval_env = eldengym.DictGrayscaleFrame(eval_env)
eval_callback = EvalCallback(
eval_env,
best_model_save_path="./best_model/",
log_path="./logs/",
eval_freq=5000,
)
# Train with callbacks
model.learn(
total_timesteps=500_000,
callback=[checkpoint_callback, eval_callback]
)
Next Steps¶
- Explore Action Spaces
- Learn about Observations
- See Examples