From 9213f944f19080908acb3e1ac3609acebbd68ce1 Mon Sep 17 00:00:00 2001 From: Erwin Coumans Date: Wed, 21 Jun 2017 09:33:46 -0700 Subject: [PATCH] add kukaCamGymEnv.py with camera observations (preliminary) show camera position in example browser disable per-vertex and per-fragment profile timings --- .../ExampleBrowser/OpenGLExampleBrowser.cpp | 4 +- examples/TinyRenderer/TinyRenderer.cpp | 11 +- examples/pybullet/gym/envs/__init__.py | 7 + examples/pybullet/gym/envs/bullet/__init__.py | 1 + .../pybullet/gym/envs/bullet/kukaCamGymEnv.py | 191 ++++++++++++++++++ .../pybullet/gym/train_kuka_cam_grasping.py | 44 ++++ .../pybullet/gym/train_pybullet_cartpole.py | 33 +++ 7 files changed, 286 insertions(+), 5 deletions(-) create mode 100644 examples/pybullet/gym/envs/bullet/kukaCamGymEnv.py create mode 100644 examples/pybullet/gym/train_kuka_cam_grasping.py create mode 100644 examples/pybullet/gym/train_pybullet_cartpole.py diff --git a/examples/ExampleBrowser/OpenGLExampleBrowser.cpp b/examples/ExampleBrowser/OpenGLExampleBrowser.cpp index b4e7ea3d6..7459729af 100644 --- a/examples/ExampleBrowser/OpenGLExampleBrowser.cpp +++ b/examples/ExampleBrowser/OpenGLExampleBrowser.cpp @@ -1296,8 +1296,10 @@ void OpenGLExampleBrowser::update(float deltaTime) float pitch = s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraPitch(); float yaw = s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraYaw(); float camTarget[3]; + float camPos[3]; + s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraPosition(camPos); s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraTargetPosition(camTarget); - sprintf(msg,"dist=%f, pitch=%f, yaw=%f,target=%f,%f,%f", camDist,pitch,yaw,camTarget[0],camTarget[1],camTarget[2]); + sprintf(msg,"camPos=%f,%f,%f, dist=%f, pitch=%f, yaw=%f,target=%f,%f,%f", camPos[0],camPos[1],camPos[2],camDist,pitch,yaw,camTarget[0],camTarget[1],camTarget[2]); gui2->setStatusBarMessage(msg, true); } diff --git a/examples/TinyRenderer/TinyRenderer.cpp b/examples/TinyRenderer/TinyRenderer.cpp index 23f9fda10..a2ea07307 100644 --- a/examples/TinyRenderer/TinyRenderer.cpp +++ b/examples/TinyRenderer/TinyRenderer.cpp @@ -124,7 +124,7 @@ struct Shader : public IShader { m_projectionLightViewMat = m_projectionMat*m_lightModelView; } virtual Vec4f vertex(int iface, int nthvert) { - B3_PROFILE("vertex"); + //B3_PROFILE("vertex"); Vec2f uv = m_model->uv(iface, nthvert); varying_uv.set_col(nthvert, uv); varying_nrm.set_col(nthvert, proj<3>(m_invModelMat*embed<4>(m_model->normal(iface, nthvert), 0.f))); @@ -142,7 +142,7 @@ struct Shader : public IShader { } virtual bool fragment(Vec3f bar, TGAColor &color) { - B3_PROFILE("fragment"); + //B3_PROFILE("fragment"); Vec4f p = m_viewportMat*(varying_tri_light_view*bar); float depth = p[2]; p = p/p[3]; @@ -485,6 +485,7 @@ static bool clipTriangleAgainstNearplane(const mat<4,3,float>& triangleIn, b3Ali void TinyRenderer::renderObject(TinyRenderObjectData& renderData) { + B3_PROFILE("renderObject"); int width = renderData.m_rgbColorBuffer.get_width(); int height = renderData.m_rgbColorBuffer.get_height(); @@ -517,10 +518,11 @@ void TinyRenderer::renderObject(TinyRenderObjectData& renderData) Shader shader(model, light_dir_local, light_color, modelViewMatrix, lightModelViewMatrix, renderData.m_projectionMatrix,renderData.m_modelMatrix, renderData.m_viewportMatrix, localScaling, model->getColorRGBA(), width, height, shadowBufferPtr, renderData.m_lightAmbientCoeff, renderData.m_lightDiffuseCoeff, renderData.m_lightSpecularCoeff); - + { + B3_PROFILE("face"); + for (int i=0; infaces(); i++) { - B3_PROFILE("face"); for (int j=0; j<3; j++) { shader.vertex(i, j); } @@ -552,6 +554,7 @@ void TinyRenderer::renderObject(TinyRenderObjectData& renderData) triangle(shader.varying_tri, shader, frame, &zbuffer[0], segmentationMaskBufferPtr, renderData.m_viewportMatrix, renderData.m_objectIndex); } } + } } } diff --git a/examples/pybullet/gym/envs/__init__.py b/examples/pybullet/gym/envs/__init__.py index d8b096113..ae0c1c81f 100644 --- a/examples/pybullet/gym/envs/__init__.py +++ b/examples/pybullet/gym/envs/__init__.py @@ -42,4 +42,11 @@ register( entry_point='envs.bullet:KukaGymEnv', timestep_limit=1000, reward_threshold=5.0, +) + +register( + id='KukaCamBulletEnv-v0', + entry_point='envs.bullet:KukaCamGymEnv', + timestep_limit=1000, + reward_threshold=5.0, ) \ No newline at end of file diff --git a/examples/pybullet/gym/envs/bullet/__init__.py b/examples/pybullet/gym/envs/bullet/__init__.py index 708ae4e53..f86a691b1 100644 --- a/examples/pybullet/gym/envs/bullet/__init__.py +++ b/examples/pybullet/gym/envs/bullet/__init__.py @@ -4,3 +4,4 @@ from envs.bullet.racecarGymEnv import RacecarGymEnv from envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv from envs.bullet.humanoidGymEnv import HumanoidGymEnv from envs.bullet.kukaGymEnv import KukaGymEnv +from envs.bullet.kukaCamGymEnv import KukaCamGymEnv \ No newline at end of file diff --git a/examples/pybullet/gym/envs/bullet/kukaCamGymEnv.py b/examples/pybullet/gym/envs/bullet/kukaCamGymEnv.py new file mode 100644 index 000000000..2af6774cd --- /dev/null +++ b/examples/pybullet/gym/envs/bullet/kukaCamGymEnv.py @@ -0,0 +1,191 @@ +import math +import gym +from gym import spaces +from gym.utils import seeding +import numpy as np +import time +import pybullet as p +from . import kuka +import random + +class KukaCamGymEnv(gym.Env): + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second' : 50 + } + + def __init__(self, + urdfRoot="", + actionRepeat=1, + isEnableSelfCollision=True, + renders=True): + print("init") + self._timeStep = 1./240. + self._urdfRoot = urdfRoot + self._actionRepeat = actionRepeat + self._isEnableSelfCollision = isEnableSelfCollision + self._observation = [] + self._envStepCounter = 0 + self._renders = renders + self._width = 341 + self._height = 256 + self.terminated = 0 + self._p = p + if self._renders: + p.connect(p.GUI) + p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33]) + else: + p.connect(p.DIRECT) + #timinglog = p.startStateLogging(p.STATE_LOGGING_PROFILE_TIMINGS, "kukaTimings.json") + self._seed() + self.reset() + observationDim = len(self.getExtendedObservation()) + #print("observationDim") + #print(observationDim) + + observation_high = np.array([np.finfo(np.float32).max] * observationDim) + self.action_space = spaces.Discrete(7) + self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4)) + self.viewer = None + + def _reset(self): + print("reset") + self.terminated = 0 + p.resetSimulation() + p.setPhysicsEngineParameter(numSolverIterations=150) + p.setTimeStep(self._timeStep) + p.loadURDF("%splane.urdf" % self._urdfRoot,[0,0,-1]) + + p.loadURDF("table/table.urdf", 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0) + + xpos = 0.5 +0.05*random.random() + ypos = 0 +0.05*random.random() + ang = 3.1415925438*random.random() + orn = p.getQuaternionFromEuler([0,0,ang]) + self.blockUid =p.loadURDF("block.urdf", xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3]) + + p.setGravity(0,0,-10) + self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep) + self._envStepCounter = 0 + p.stepSimulation() + self._observation = self.getExtendedObservation() + return np.array(self._observation) + + def __del__(self): + p.disconnect() + + def _seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def getExtendedObservation(self): + + #camEyePos = [0.03,0.236,0.54] + #distance = 1.06 + #pitch=-56 + #yaw = 258 + #roll=0 + #upAxisIndex = 2 + #camInfo = p.getDebugVisualizerCamera() + #print("width,height") + #print(camInfo[0]) + #print(camInfo[1]) + #print("viewMatrix") + #print(camInfo[2]) + #print("projectionMatrix") + #print(camInfo[3]) + #viewMat = camInfo[2] + #viewMat = p.computeViewMatrixFromYawPitchRoll(camEyePos,distance,yaw, pitch,roll,upAxisIndex) + viewMat = [-0.5120397806167603, 0.7171027660369873, -0.47284144163131714, 0.0, -0.8589617609977722, -0.42747554183006287, 0.28186774253845215, 0.0, 0.0, 0.5504802465438843, 0.8348482847213745, 0.0, 0.1925382763147354, -0.24935829639434814, -0.4401884973049164, 1.0] + #projMatrix = camInfo[3]#[0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0] + projMatrix = [0.75, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0] + + img_arr = p.getCameraImage(width=self._width,height=self._height,viewMatrix=viewMat,projectionMatrix=projMatrix) + rgb=img_arr[2] + np_img_arr = np.reshape(rgb, (self._height, self._width, 4)) + self._observation = np_img_arr + return self._observation + + def _step(self, action): + dv = 0.01 + dx = [0,-dv,dv,0,0,0,0][action] + dy = [0,0,0,-dv,dv,0,0][action] + da = [0,0,0,0,0,-0.1,0.1][action] + f = 0.3 + realAction = [dx,dy,-0.002,da,f] + return self.step2( realAction) + + def step2(self, action): + self._kuka.applyAction(action) + for i in range(self._actionRepeat): + p.stepSimulation() + if self._renders: + time.sleep(self._timeStep) + self._observation = self.getExtendedObservation() + if self._termination(): + break + self._envStepCounter += 1 + #print("self._envStepCounter") + #print(self._envStepCounter) + + done = self._termination() + reward = self._reward() + #print("len=%r" % len(self._observation)) + + return np.array(self._observation), reward, done, {} + + def _render(self, mode='human', close=False): + return + + def _termination(self): + #print (self._kuka.endEffectorPos[2]) + state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex) + actualEndEffectorPos = state[0] + + #print("self._envStepCounter") + #print(self._envStepCounter) + if (self.terminated or self._envStepCounter>1000): + self._observation = self.getExtendedObservation() + return True + + if (actualEndEffectorPos[2] <= 0.10): + self.terminated = 1 + + #print("closing gripper, attempting grasp") + #start grasp and terminate + fingerAngle = 0.3 + + for i in range (1000): + graspAction = [0,0,0.001,0,fingerAngle] + self._kuka.applyAction(graspAction) + p.stepSimulation() + fingerAngle = fingerAngle-(0.3/100.) + if (fingerAngle<0): + fingerAngle=0 + + self._observation = self.getExtendedObservation() + return True + return False + + def _reward(self): + + #rewards is height of target object + blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid) + closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000) + + reward = -1000 + numPt = len(closestPoints) + #print(numPt) + if (numPt>0): + #print("reward:") + reward = -closestPoints[0][8]*10 + + if (blockPos[2] >0.2): + print("grasped a block!!!") + print("self._envStepCounter") + print(self._envStepCounter) + reward = reward+1000 + + #print("reward") + #print(reward) + return reward diff --git a/examples/pybullet/gym/train_kuka_cam_grasping.py b/examples/pybullet/gym/train_kuka_cam_grasping.py new file mode 100644 index 000000000..93d4a2b78 --- /dev/null +++ b/examples/pybullet/gym/train_kuka_cam_grasping.py @@ -0,0 +1,44 @@ +import gym +from envs.bullet.kukaCamGymEnv import KukaCamGymEnv + +from baselines import deepq + +import datetime + + + +def callback(lcl, glb): + # stop training if reward exceeds 199 + total = sum(lcl['episode_rewards'][-101:-1]) / 100 + totalt = lcl['t'] + #print("totalt") + #print(totalt) + is_solved = totalt > 2000 and total >= 10 + return is_solved + + +def main(): + + env = KukaCamGymEnv(renders=True) + model = deepq.models.cnn_to_mlp( + convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], + hiddens=[256], + dueling=False + ) + act = deepq.learn( + env, + q_func=model, + lr=1e-3, + max_timesteps=10000000, + buffer_size=50000, + exploration_fraction=0.1, + exploration_final_eps=0.02, + print_freq=10, + callback=callback + ) + print("Saving model to kuka_cam_model.pkl") + act.save("kuka_cam_model.pkl") + + +if __name__ == '__main__': + main() diff --git a/examples/pybullet/gym/train_pybullet_cartpole.py b/examples/pybullet/gym/train_pybullet_cartpole.py new file mode 100644 index 000000000..353fa4a86 --- /dev/null +++ b/examples/pybullet/gym/train_pybullet_cartpole.py @@ -0,0 +1,33 @@ +import gym +from envs.bullet.cartpole_bullet import CartPoleBulletEnv + +from baselines import deepq + + +def callback(lcl, glb): + # stop training if reward exceeds 199 + is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199 + return is_solved + + +def main(): + + env = gym.make('CartPoleBulletEnv-v0') + model = deepq.models.mlp([64]) + act = deepq.learn( + env, + q_func=model, + lr=1e-3, + max_timesteps=100000, + buffer_size=50000, + exploration_fraction=0.1, + exploration_final_eps=0.02, + print_freq=10, + callback=callback + ) + print("Saving model to cartpole_model.pkl") + act.save("cartpole_model.pkl") + + +if __name__ == '__main__': + main()