Merge pull request #1203 from erwincoumans/master

add kukaCamGymEnv.py with camera observations (preliminary)
2017-06-22 10:58:51 -07:00
parent e24d79a2c0 fbff4c57ca
commit 40dae99435
19 changed files with 297 additions and 14 deletions
--- a/examples/ExampleBrowser/OpenGLExampleBrowser.cpp
+++ b/examples/ExampleBrowser/OpenGLExampleBrowser.cpp
@@ -1296,8 +1296,10 @@ void OpenGLExampleBrowser::update(float deltaTime)
 				float pitch = s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraPitch();
 				float yaw = s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraYaw();
 				float camTarget[3];
 				float camPos[3];
 				s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraPosition(camPos);
 				s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraTargetPosition(camTarget);
-				sprintf(msg,"dist=%f, pitch=%f, yaw=%f,target=%f,%f,%f", camDist,pitch,yaw,camTarget[0],camTarget[1],camTarget[2]);
+				sprintf(msg,"camPos=%f,%f,%f, dist=%f, pitch=%f, yaw=%f,target=%f,%f,%f", camPos[0],camPos[1],camPos[2],camDist,pitch,yaw,camTarget[0],camTarget[1],camTarget[2]);
 				gui2->setStatusBarMessage(msg, true);	
 			}
--- a/examples/Importers/ImportMeshUtility/b3ImportMeshUtility.cpp
+++ b/examples/Importers/ImportMeshUtility/b3ImportMeshUtility.cpp
@@ -6,7 +6,7 @@
 #include "../ImportObjDemo/Wavefront2GLInstanceGraphicsShape.h"
 #include "../../Utils/b3ResourcePath.h"
 #include "Bullet3Common/b3FileUtils.h"
-#include "../../ThirdPartyLibs/stb_image/stb_image.h"
+#include "stb_image/stb_image.h"
 #include "../ImportObjDemo/LoadMeshFromObj.h"
 bool b3ImportMeshUtility::loadAndRegisterMeshFromFileInternal(const std::string& fileName, b3ImportMeshData& meshData)
 {
--- a/examples/OpenGLWindow/CMakeLists.txt
+++ b/examples/OpenGLWindow/CMakeLists.txt
@@ -1,6 +1,7 @@
 INCLUDE_DIRECTORIES(
 	..
 	../ThirdPartyLibs
 	../../src
 )
--- a/examples/OpenGLWindow/GLInstancingRenderer.cpp
+++ b/examples/OpenGLWindow/GLInstancingRenderer.cpp
@@ -1406,7 +1406,7 @@ void GLInstancingRenderer::updateCamera(int upAxis)
 //#define STB_IMAGE_WRITE_IMPLEMENTATION
-#include "stb_image_write.h"
+#include "stb_image/stb_image_write.h"
 void writeTextureToPng(int textureWidth, int textureHeight, const char* fileName, int numComponents)
 {
--- a/examples/OpenGLWindow/SimpleOpenGL3App.cpp
+++ b/examples/OpenGLWindow/SimpleOpenGL3App.cpp
@@ -919,7 +919,7 @@ void SimpleOpenGL3App::getScreenPixels(unsigned char* rgbaBuffer, int bufferSize
 }
 //#define STB_IMAGE_WRITE_IMPLEMENTATION
-#include "stb_image_write.h"
+#include "stb_image/stb_image_write.h"
 static void writeTextureToFile(int textureWidth, int textureHeight, const char* fileName, FILE* ffmpegVideo)
 {
 	int numComponents = 4;
--- a/examples/OpenGLWindow/fontstash.cpp
+++ b/examples/OpenGLWindow/fontstash.cpp
@@ -37,7 +37,7 @@
 #define STB_TRUETYPE_IMPLEMENTATION
 #define STBTT_malloc(x,u)    malloc(x)
 #define STBTT_free(x,u)      free(x)
-#include "stb_truetype.h"
+#include "stb_image/stb_truetype.h"
 #define HASH_LUT_SIZE 256
--- a/examples/OpenGLWindow/opengl_fontstashcallbacks.cpp
+++ b/examples/OpenGLWindow/opengl_fontstashcallbacks.cpp
@@ -10,7 +10,7 @@
 #include <stdlib.h>
 #include <string.h>
 #define STB_IMAGE_WRITE_IMPLEMENTATION
-#include "stb_image_write.h"
+#include "stb_image/stb_image_write.h"
 static unsigned int s_indexData[INDEX_COUNT];
--- a/examples/OpenGLWindow/premake4.lua
+++ b/examples/OpenGLWindow/premake4.lua
@@ -9,7 +9,7 @@
 		initGlew()
 		includedirs {
-		 	
+		 	"../ThirdPartyLibs",
 		 	"../../src",
 		}
--- a/examples/SharedMemory/TinyRendererVisualShapeConverter.cpp
+++ b/examples/SharedMemory/TinyRendererVisualShapeConverter.cpp
@@ -34,7 +34,7 @@ subject to the following restrictions:
 #include "../Importers/ImportURDFDemo/UrdfParser.h"
 #include "../SharedMemory/SharedMemoryPublic.h"//for b3VisualShapeData
 #include "../TinyRenderer/model.h"
-#include "../ThirdPartyLibs/stb_image/stb_image.h"
+#include "stb_image/stb_image.h"
 struct MyTexture2
 {
--- a/examples/ThirdPartyLibs/stb_image/stb_image_write.h
+++ b/examples/ThirdPartyLibs/stb_image/stb_image_write.h
--- a/examples/ThirdPartyLibs/stb_image/stb_truetype.h
+++ b/examples/ThirdPartyLibs/stb_image/stb_truetype.h
--- a/examples/TinyRenderer/TinyRenderer.cpp
+++ b/examples/TinyRenderer/TinyRenderer.cpp
@@ -124,7 +124,7 @@ struct Shader : public IShader {
 		m_projectionLightViewMat = m_projectionMat*m_lightModelView;
    }
    virtual Vec4f vertex(int iface, int nthvert) {
-		B3_PROFILE("vertex");
+		//B3_PROFILE("vertex");
        Vec2f uv = m_model->uv(iface, nthvert);
        varying_uv.set_col(nthvert, uv);
        varying_nrm.set_col(nthvert, proj<3>(m_invModelMat*embed<4>(m_model->normal(iface, nthvert), 0.f)));
@@ -142,7 +142,7 @@ struct Shader : public IShader {
    }
    virtual bool fragment(Vec3f bar, TGAColor &color) {
-		B3_PROFILE("fragment");
+		//B3_PROFILE("fragment");
        Vec4f p = m_viewportMat*(varying_tri_light_view*bar);
        float depth = p[2];
        p = p/p[3];
@@ -485,6 +485,7 @@ static bool clipTriangleAgainstNearplane(const mat<4,3,float>& triangleIn, b3Ali
 void TinyRenderer::renderObject(TinyRenderObjectData& renderData)
 {
 	B3_PROFILE("renderObject");
    int width = renderData.m_rgbColorBuffer.get_width();
    int height = renderData.m_rgbColorBuffer.get_height();
@@ -517,10 +518,11 @@ void TinyRenderer::renderObject(TinyRenderObjectData& renderData)
        Shader shader(model, light_dir_local, light_color, modelViewMatrix, lightModelViewMatrix, renderData.m_projectionMatrix,renderData.m_modelMatrix, renderData.m_viewportMatrix, localScaling, model->getColorRGBA(), width, height, shadowBufferPtr, renderData.m_lightAmbientCoeff, renderData.m_lightDiffuseCoeff, renderData.m_lightSpecularCoeff);
 		{
 		B3_PROFILE("face");
        for (int i=0; i<model->nfaces(); i++)
        {
 			B3_PROFILE("face");
            for (int j=0; j<3; j++) {
                shader.vertex(i, j);
            }
@@ -553,6 +555,7 @@ void TinyRenderer::renderObject(TinyRenderObjectData& renderData)
 			}
        }
 		}
    }
 }
--- a/examples/pybullet/gym/envs/init.py
+++ b/examples/pybullet/gym/envs/init.py
@@ -43,3 +43,10 @@ register(
    timestep_limit=1000,
    reward_threshold=5.0,
 )
 register(
    id='KukaCamBulletEnv-v0',
    entry_point='envs.bullet:KukaCamGymEnv',
    timestep_limit=1000,
    reward_threshold=5.0,
 )
--- a/examples/pybullet/gym/envs/bullet/init.py
+++ b/examples/pybullet/gym/envs/bullet/init.py
@@ -4,3 +4,4 @@ from envs.bullet.racecarGymEnv import RacecarGymEnv
 from envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
 from envs.bullet.humanoidGymEnv import HumanoidGymEnv
 from envs.bullet.kukaGymEnv import KukaGymEnv
 from envs.bullet.kukaCamGymEnv import KukaCamGymEnv
--- a/examples/pybullet/gym/envs/bullet/kukaCamGymEnv.py
+++ b/examples/pybullet/gym/envs/bullet/kukaCamGymEnv.py
@@ -0,0 +1,191 @@
 import math
 import gym
 from gym import spaces
 from gym.utils import seeding
 import numpy as np
 import time
 import pybullet as p
 from . import kuka
 import random
 class KukaCamGymEnv(gym.Env):
  metadata = {
      'render.modes': ['human', 'rgb_array'],
      'video.frames_per_second' : 50
  }
  def __init__(self,
               urdfRoot="",
               actionRepeat=1,
               isEnableSelfCollision=True,
               renders=True):
    print("init")
    self._timeStep = 1./240.
    self._urdfRoot = urdfRoot
    self._actionRepeat = actionRepeat
    self._isEnableSelfCollision = isEnableSelfCollision
    self._observation = []
    self._envStepCounter = 0
    self._renders = renders
    self._width = 341
    self._height = 256
    self.terminated = 0
    self._p = p
    if self._renders:
      p.connect(p.GUI)
      p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33])
    else:
      p.connect(p.DIRECT)
    #timinglog = p.startStateLogging(p.STATE_LOGGING_PROFILE_TIMINGS, "kukaTimings.json")
    self._seed()
    self.reset()
    observationDim = len(self.getExtendedObservation())
    #print("observationDim")
    #print(observationDim)
    observation_high = np.array([np.finfo(np.float32).max] * observationDim)    
    self.action_space = spaces.Discrete(7)
    self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4))
    self.viewer = None
  def _reset(self):
    print("reset")
    self.terminated = 0
    p.resetSimulation()
    p.setPhysicsEngineParameter(numSolverIterations=150)
    p.setTimeStep(self._timeStep)
    p.loadURDF("%splane.urdf" % self._urdfRoot,[0,0,-1])
    p.loadURDF("table/table.urdf", 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
    xpos = 0.5 +0.05*random.random()
    ypos = 0 +0.05*random.random()
    ang = 3.1415925438*random.random()
    orn = p.getQuaternionFromEuler([0,0,ang])
    self.blockUid =p.loadURDF("block.urdf", xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
    p.setGravity(0,0,-10)
    self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
    self._envStepCounter = 0
    p.stepSimulation()
    self._observation = self.getExtendedObservation()
    return np.array(self._observation)
  def __del__(self):
    p.disconnect()
  def _seed(self, seed=None):
    self.np_random, seed = seeding.np_random(seed)
    return [seed]
  def getExtendedObservation(self):
     #camEyePos = [0.03,0.236,0.54]
     #distance = 1.06
     #pitch=-56
     #yaw = 258
     #roll=0
     #upAxisIndex = 2
     #camInfo = p.getDebugVisualizerCamera()
     #print("width,height")
     #print(camInfo[0])
     #print(camInfo[1])
     #print("viewMatrix")
     #print(camInfo[2])
     #print("projectionMatrix")
     #print(camInfo[3])
     #viewMat = camInfo[2]
     #viewMat = p.computeViewMatrixFromYawPitchRoll(camEyePos,distance,yaw, pitch,roll,upAxisIndex)
     viewMat = [-0.5120397806167603, 0.7171027660369873, -0.47284144163131714, 0.0, -0.8589617609977722, -0.42747554183006287, 0.28186774253845215, 0.0, 0.0, 0.5504802465438843, 0.8348482847213745, 0.0, 0.1925382763147354, -0.24935829639434814, -0.4401884973049164, 1.0]
     #projMatrix = camInfo[3]#[0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
     projMatrix = [0.75, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
     img_arr = p.getCameraImage(width=self._width,height=self._height,viewMatrix=viewMat,projectionMatrix=projMatrix)
     rgb=img_arr[2]
     np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
     self._observation = np_img_arr
     return self._observation
  def _step(self, action):
    dv = 0.01
    dx = [0,-dv,dv,0,0,0,0][action]
    dy = [0,0,0,-dv,dv,0,0][action]
    da = [0,0,0,0,0,-0.1,0.1][action]
    f = 0.3
    realAction = [dx,dy,-0.002,da,f]
    return self.step2( realAction)
  def step2(self, action):
    self._kuka.applyAction(action)
    for i in range(self._actionRepeat):
      p.stepSimulation()
      if self._renders:
        time.sleep(self._timeStep)
      self._observation = self.getExtendedObservation()
      if self._termination():
        break
      self._envStepCounter += 1
    #print("self._envStepCounter")
    #print(self._envStepCounter)
    done = self._termination()
    reward = self._reward()
    #print("len=%r" % len(self._observation))
    return np.array(self._observation), reward, done, {}
  def _render(self, mode='human', close=False):
      return
  def _termination(self):
    #print (self._kuka.endEffectorPos[2])
    state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
    actualEndEffectorPos = state[0]
    #print("self._envStepCounter")
    #print(self._envStepCounter)
    if (self.terminated or self._envStepCounter>1000):
      self._observation = self.getExtendedObservation()
      return True
    if (actualEndEffectorPos[2] <= 0.10):
      self.terminated = 1
      #print("closing gripper, attempting grasp")
      #start grasp and terminate
      fingerAngle = 0.3
      for i in range (1000):
        graspAction = [0,0,0.001,0,fingerAngle]
        self._kuka.applyAction(graspAction)
        p.stepSimulation()
        fingerAngle = fingerAngle-(0.3/100.)
        if (fingerAngle<0):
          fingerAngle=0
      self._observation = self.getExtendedObservation()
      return True
    return False
  def _reward(self):
    #rewards is height of target object
    blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
    closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000) 
    reward = -1000    
    numPt = len(closestPoints)
    #print(numPt)
    if (numPt>0):
      #print("reward:")
      reward = -closestPoints[0][8]*10
    if (blockPos[2] >0.2):
      print("grasped a block!!!")
      print("self._envStepCounter")
      print(self._envStepCounter)
      reward = reward+1000
    #print("reward")
    #print(reward)
    return reward
--- a/examples/pybullet/gym/train_kuka_cam_grasping.py
+++ b/examples/pybullet/gym/train_kuka_cam_grasping.py
@@ -0,0 +1,44 @@
 import gym
 from envs.bullet.kukaCamGymEnv import KukaCamGymEnv
 from baselines import deepq
 import datetime
 def callback(lcl, glb):
    # stop training if reward exceeds 199
    total = sum(lcl['episode_rewards'][-101:-1]) / 100
    totalt = lcl['t']
    #print("totalt")
    #print(totalt)
    is_solved = totalt > 2000 and total >= 10
    return is_solved
 def main():
    env = KukaCamGymEnv(renders=True)
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=False
    )
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-3,
        max_timesteps=10000000,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
        print_freq=10,
        callback=callback
    )
    print("Saving model to kuka_cam_model.pkl")
    act.save("kuka_cam_model.pkl")
 if __name__ == '__main__':
    main()
--- a/examples/pybullet/gym/train_pybullet_cartpole.py
+++ b/examples/pybullet/gym/train_pybullet_cartpole.py
@@ -0,0 +1,33 @@
 import gym
 from envs.bullet.cartpole_bullet import CartPoleBulletEnv
 from baselines import deepq
 def callback(lcl, glb):
    # stop training if reward exceeds 199
    is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199
    return is_solved
 def main():
    env = gym.make('CartPoleBulletEnv-v0')
    model = deepq.models.mlp([64])
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-3,
        max_timesteps=100000,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
        print_freq=10,
        callback=callback
    )
    print("Saving model to cartpole_model.pkl")
    act.save("cartpole_model.pkl")
 if __name__ == '__main__':
    main()
--- a/test/InverseDynamics/CMakeLists.txt
+++ b/test/InverseDynamics/CMakeLists.txt
@@ -37,6 +37,7 @@ INCLUDE_DIRECTORIES(
        ../../src
        ../gtest-1.7.0/include
        ../../Extras/InverseDynamics
        ../../examples/ThirdPartyLibs
 )
--- a/test/InverseDynamics/premake4.lua
+++ b/test/InverseDynamics/premake4.lua
@@ -85,8 +85,8 @@
                "../../examples/InverseDynamics",
                "../../examples/ThirdPartyLibs",
                "../../Extras/InverseDynamics",
-                "../gtest-1.7.0/include"
+                "../gtest-1.7.0/include",
-
+                "../../examples/ThirdPartyLibs",
        }