Example runs

The simplest run

Operate the robot with the arrow keys without reinforcement learning.

import gym
import gym.spaces
import gym_foodhunting
import pybullet as p

def getAction():
    keys = p.getKeyboardEvents()
    if p.B3G_UP_ARROW in keys and keys[p.B3G_UP_ARROW] & p.KEY_IS_DOWN:
        return 0
    elif p.B3G_LEFT_ARROW in keys and keys[p.B3G_LEFT_ARROW] & p.KEY_IS_DOWN:
        return 1
    elif p.B3G_RIGHT_ARROW in keys and keys[p.B3G_RIGHT_ARROW] & p.KEY_IS_DOWN:
        return 2
    else:
        return 0

def main():
    env = gym.make('FoodHuntingDiscreteGUI-v0')
    # env = gym.make('FoodHuntingDiscrete-v0')
    print(env.observation_space, env.action_space)
    obs = env.reset()
    while True:
        action = getAction()
        # action = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        # print(action, obs, reward, done, info)
        if done:
            obs = env.reset()
    env.close()

if __name__ == '__main__':
    main()

Simple reinforcement learning

PPO is used as the RN algorithm and CNN as the NN model.

import argparse
import gym
import gym_foodhunting

from stable_baselines.common.policies import CnnPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2

def learn(env_name, save_file, total_timesteps):
    env = DummyVecEnv([lambda: gym.make(env_name)])
    model = PPO2(CnnPolicy, env, verbose=1)
    model.learn(total_timesteps=total_timesteps)
    model.save(save_file)
    del model
    env.close()

def play(env_name, load_file, total_timesteps):
    env = DummyVecEnv([lambda: gym.make(env_name)])
    model = PPO2.load(load_file, verbose=1)
    obs = env.reset()
    for i in range(total_timesteps):
        action, _states = model.predict(obs)
        obs, reward, done, info = env.step(action)
        # env.render() # dummy
        if done:
            print(info)
    del model
    env.close()

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--play', action='store_true', help='play or learn.')
    parser.add_argument('--env_name', type=str, default='FoodHuntingDiscreteGUI-v0', help='environment name.')
    parser.add_argument('--filename', type=str, default='saved_model', help='filename to save/load model.')
    parser.add_argument('--total_timesteps', type=int, default=10000, help='total timesteps.')
    args = parser.parse_args()
    if args.play:
        play(args.env_name, args.filename, args.total_timesteps)
    else:
        learn(args.env_name, args.filename, args.total_timesteps)

Learning:

# Learn
python examples/example_rl.py --env_name="FoodHuntingDiscrete-v0" --total_timesteps=10000 --filename="saved_model"

Evaluation:

# Play without GUI
python examples/example_rl.py --env_name="FoodHuntingDiscrete-v0" --total_timesteps=10000 --filename="saved_model" --play

# Play with GUI
python examples/example_rl.py --env_name="FoodHuntingDiscreteGUI-v0" --total_timesteps=10000 --filename="saved_model" --play

More practical reinforcement learning

Multi-processing is used to simultaneously execute multiple environments to reduce computation time (n_cpu parameter). Monitoring during calculation is performed and the model file is saved when the maximum value of the average episode reward is updated. When the average episode reward exceeds the specified threshold, learning is terminated (reward_threshold parameter).

Source code
- PyLIS/gym-foodhunting/agents/ppo_agent.py

cd gym-foodhunting

# Run this to enable SubprocVecEnv on Mac OS X.
# export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
# see see https://github.com/rtomayko/shotgun/issues/69#issuecomment-338401331

# See available options.

python agents/ppo_agent.py --help

# Learn
# This may take a few hours.

time python agents/ppo_agent.py --env_name="FoodHuntingHSRDiscrete-v1" --total_timesteps=500000 --n_cpu=8 --reward_threshold=3.0 --tensorboard_log="tblog"
# Monitor
tensorboard --logdir tblog

# Open web browser and access http://localhost:6006/
# Play with GUI
# This will open PyBullet window.

time python agents/ppo_agent.py --env_name="FoodHuntingHSRDiscrete-v1" --load_file="FoodHuntingHSR-v1_best.pkl" --total_timesteps=500000 --n_cpu=8 --play

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Example runs

The simplest run

Simple reinforcement learning

More practical reinforcement learning

Clone this wiki locally