import gym from envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv from baselines import deepq import datetime def callback(lcl, glb): # stop training if reward exceeds 199 total = sum(lcl['episode_rewards'][-101:-1]) / 100 totalt = lcl['t'] is_solved = totalt > 2000 and total >= -50 return is_solved def main(): env = RacecarZEDGymEnv(renders=False) model = deepq.models.mlp([64]) act = deepq.learn( env, q_func=model, lr=1e-3, max_timesteps=10000, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02, print_freq=10, callback=callback ) print("Saving model to racecar_model.pkl") act.save("racecar_model.pkl") if __name__ == '__main__': main()