add kukaCamGymEnv.py with camera observations (preliminary)

show camera position in example browser
disable per-vertex and per-fragment profile timings
This commit is contained in:
Erwin Coumans
2017-06-21 09:33:46 -07:00
parent 71170d6384
commit 9213f944f1
7 changed files with 286 additions and 5 deletions

View File

@@ -1296,8 +1296,10 @@ void OpenGLExampleBrowser::update(float deltaTime)
float pitch = s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraPitch(); float pitch = s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraPitch();
float yaw = s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraYaw(); float yaw = s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraYaw();
float camTarget[3]; float camTarget[3];
float camPos[3];
s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraPosition(camPos);
s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraTargetPosition(camTarget); s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraTargetPosition(camTarget);
sprintf(msg,"dist=%f, pitch=%f, yaw=%f,target=%f,%f,%f", camDist,pitch,yaw,camTarget[0],camTarget[1],camTarget[2]); sprintf(msg,"camPos=%f,%f,%f, dist=%f, pitch=%f, yaw=%f,target=%f,%f,%f", camPos[0],camPos[1],camPos[2],camDist,pitch,yaw,camTarget[0],camTarget[1],camTarget[2]);
gui2->setStatusBarMessage(msg, true); gui2->setStatusBarMessage(msg, true);
} }

View File

@@ -124,7 +124,7 @@ struct Shader : public IShader {
m_projectionLightViewMat = m_projectionMat*m_lightModelView; m_projectionLightViewMat = m_projectionMat*m_lightModelView;
} }
virtual Vec4f vertex(int iface, int nthvert) { virtual Vec4f vertex(int iface, int nthvert) {
B3_PROFILE("vertex"); //B3_PROFILE("vertex");
Vec2f uv = m_model->uv(iface, nthvert); Vec2f uv = m_model->uv(iface, nthvert);
varying_uv.set_col(nthvert, uv); varying_uv.set_col(nthvert, uv);
varying_nrm.set_col(nthvert, proj<3>(m_invModelMat*embed<4>(m_model->normal(iface, nthvert), 0.f))); varying_nrm.set_col(nthvert, proj<3>(m_invModelMat*embed<4>(m_model->normal(iface, nthvert), 0.f)));
@@ -142,7 +142,7 @@ struct Shader : public IShader {
} }
virtual bool fragment(Vec3f bar, TGAColor &color) { virtual bool fragment(Vec3f bar, TGAColor &color) {
B3_PROFILE("fragment"); //B3_PROFILE("fragment");
Vec4f p = m_viewportMat*(varying_tri_light_view*bar); Vec4f p = m_viewportMat*(varying_tri_light_view*bar);
float depth = p[2]; float depth = p[2];
p = p/p[3]; p = p/p[3];
@@ -485,6 +485,7 @@ static bool clipTriangleAgainstNearplane(const mat<4,3,float>& triangleIn, b3Ali
void TinyRenderer::renderObject(TinyRenderObjectData& renderData) void TinyRenderer::renderObject(TinyRenderObjectData& renderData)
{ {
B3_PROFILE("renderObject");
int width = renderData.m_rgbColorBuffer.get_width(); int width = renderData.m_rgbColorBuffer.get_width();
int height = renderData.m_rgbColorBuffer.get_height(); int height = renderData.m_rgbColorBuffer.get_height();
@@ -517,10 +518,11 @@ void TinyRenderer::renderObject(TinyRenderObjectData& renderData)
Shader shader(model, light_dir_local, light_color, modelViewMatrix, lightModelViewMatrix, renderData.m_projectionMatrix,renderData.m_modelMatrix, renderData.m_viewportMatrix, localScaling, model->getColorRGBA(), width, height, shadowBufferPtr, renderData.m_lightAmbientCoeff, renderData.m_lightDiffuseCoeff, renderData.m_lightSpecularCoeff); Shader shader(model, light_dir_local, light_color, modelViewMatrix, lightModelViewMatrix, renderData.m_projectionMatrix,renderData.m_modelMatrix, renderData.m_viewportMatrix, localScaling, model->getColorRGBA(), width, height, shadowBufferPtr, renderData.m_lightAmbientCoeff, renderData.m_lightDiffuseCoeff, renderData.m_lightSpecularCoeff);
{
B3_PROFILE("face");
for (int i=0; i<model->nfaces(); i++) for (int i=0; i<model->nfaces(); i++)
{ {
B3_PROFILE("face");
for (int j=0; j<3; j++) { for (int j=0; j<3; j++) {
shader.vertex(i, j); shader.vertex(i, j);
} }
@@ -553,6 +555,7 @@ void TinyRenderer::renderObject(TinyRenderObjectData& renderData)
} }
} }
} }
}
} }

View File

@@ -43,3 +43,10 @@ register(
timestep_limit=1000, timestep_limit=1000,
reward_threshold=5.0, reward_threshold=5.0,
) )
register(
id='KukaCamBulletEnv-v0',
entry_point='envs.bullet:KukaCamGymEnv',
timestep_limit=1000,
reward_threshold=5.0,
)

View File

@@ -4,3 +4,4 @@ from envs.bullet.racecarGymEnv import RacecarGymEnv
from envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv from envs.bullet.racecarZEDGymEnv import RacecarZEDGymEnv
from envs.bullet.humanoidGymEnv import HumanoidGymEnv from envs.bullet.humanoidGymEnv import HumanoidGymEnv
from envs.bullet.kukaGymEnv import KukaGymEnv from envs.bullet.kukaGymEnv import KukaGymEnv
from envs.bullet.kukaCamGymEnv import KukaCamGymEnv

View File

@@ -0,0 +1,191 @@
import math
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
import time
import pybullet as p
from . import kuka
import random
class KukaCamGymEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second' : 50
}
def __init__(self,
urdfRoot="",
actionRepeat=1,
isEnableSelfCollision=True,
renders=True):
print("init")
self._timeStep = 1./240.
self._urdfRoot = urdfRoot
self._actionRepeat = actionRepeat
self._isEnableSelfCollision = isEnableSelfCollision
self._observation = []
self._envStepCounter = 0
self._renders = renders
self._width = 341
self._height = 256
self.terminated = 0
self._p = p
if self._renders:
p.connect(p.GUI)
p.resetDebugVisualizerCamera(1.3,180,-41,[0.52,-0.2,-0.33])
else:
p.connect(p.DIRECT)
#timinglog = p.startStateLogging(p.STATE_LOGGING_PROFILE_TIMINGS, "kukaTimings.json")
self._seed()
self.reset()
observationDim = len(self.getExtendedObservation())
#print("observationDim")
#print(observationDim)
observation_high = np.array([np.finfo(np.float32).max] * observationDim)
self.action_space = spaces.Discrete(7)
self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 4))
self.viewer = None
def _reset(self):
print("reset")
self.terminated = 0
p.resetSimulation()
p.setPhysicsEngineParameter(numSolverIterations=150)
p.setTimeStep(self._timeStep)
p.loadURDF("%splane.urdf" % self._urdfRoot,[0,0,-1])
p.loadURDF("table/table.urdf", 0.5000000,0.00000,-.820000,0.000000,0.000000,0.0,1.0)
xpos = 0.5 +0.05*random.random()
ypos = 0 +0.05*random.random()
ang = 3.1415925438*random.random()
orn = p.getQuaternionFromEuler([0,0,ang])
self.blockUid =p.loadURDF("block.urdf", xpos,ypos,-0.1,orn[0],orn[1],orn[2],orn[3])
p.setGravity(0,0,-10)
self._kuka = kuka.Kuka(urdfRootPath=self._urdfRoot, timeStep=self._timeStep)
self._envStepCounter = 0
p.stepSimulation()
self._observation = self.getExtendedObservation()
return np.array(self._observation)
def __del__(self):
p.disconnect()
def _seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def getExtendedObservation(self):
#camEyePos = [0.03,0.236,0.54]
#distance = 1.06
#pitch=-56
#yaw = 258
#roll=0
#upAxisIndex = 2
#camInfo = p.getDebugVisualizerCamera()
#print("width,height")
#print(camInfo[0])
#print(camInfo[1])
#print("viewMatrix")
#print(camInfo[2])
#print("projectionMatrix")
#print(camInfo[3])
#viewMat = camInfo[2]
#viewMat = p.computeViewMatrixFromYawPitchRoll(camEyePos,distance,yaw, pitch,roll,upAxisIndex)
viewMat = [-0.5120397806167603, 0.7171027660369873, -0.47284144163131714, 0.0, -0.8589617609977722, -0.42747554183006287, 0.28186774253845215, 0.0, 0.0, 0.5504802465438843, 0.8348482847213745, 0.0, 0.1925382763147354, -0.24935829639434814, -0.4401884973049164, 1.0]
#projMatrix = camInfo[3]#[0.7499999403953552, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
projMatrix = [0.75, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, -1.0000200271606445, -1.0, 0.0, 0.0, -0.02000020071864128, 0.0]
img_arr = p.getCameraImage(width=self._width,height=self._height,viewMatrix=viewMat,projectionMatrix=projMatrix)
rgb=img_arr[2]
np_img_arr = np.reshape(rgb, (self._height, self._width, 4))
self._observation = np_img_arr
return self._observation
def _step(self, action):
dv = 0.01
dx = [0,-dv,dv,0,0,0,0][action]
dy = [0,0,0,-dv,dv,0,0][action]
da = [0,0,0,0,0,-0.1,0.1][action]
f = 0.3
realAction = [dx,dy,-0.002,da,f]
return self.step2( realAction)
def step2(self, action):
self._kuka.applyAction(action)
for i in range(self._actionRepeat):
p.stepSimulation()
if self._renders:
time.sleep(self._timeStep)
self._observation = self.getExtendedObservation()
if self._termination():
break
self._envStepCounter += 1
#print("self._envStepCounter")
#print(self._envStepCounter)
done = self._termination()
reward = self._reward()
#print("len=%r" % len(self._observation))
return np.array(self._observation), reward, done, {}
def _render(self, mode='human', close=False):
return
def _termination(self):
#print (self._kuka.endEffectorPos[2])
state = p.getLinkState(self._kuka.kukaUid,self._kuka.kukaEndEffectorIndex)
actualEndEffectorPos = state[0]
#print("self._envStepCounter")
#print(self._envStepCounter)
if (self.terminated or self._envStepCounter>1000):
self._observation = self.getExtendedObservation()
return True
if (actualEndEffectorPos[2] <= 0.10):
self.terminated = 1
#print("closing gripper, attempting grasp")
#start grasp and terminate
fingerAngle = 0.3
for i in range (1000):
graspAction = [0,0,0.001,0,fingerAngle]
self._kuka.applyAction(graspAction)
p.stepSimulation()
fingerAngle = fingerAngle-(0.3/100.)
if (fingerAngle<0):
fingerAngle=0
self._observation = self.getExtendedObservation()
return True
return False
def _reward(self):
#rewards is height of target object
blockPos,blockOrn=p.getBasePositionAndOrientation(self.blockUid)
closestPoints = p.getClosestPoints(self.blockUid,self._kuka.kukaUid,1000)
reward = -1000
numPt = len(closestPoints)
#print(numPt)
if (numPt>0):
#print("reward:")
reward = -closestPoints[0][8]*10
if (blockPos[2] >0.2):
print("grasped a block!!!")
print("self._envStepCounter")
print(self._envStepCounter)
reward = reward+1000
#print("reward")
#print(reward)
return reward

View File

@@ -0,0 +1,44 @@
import gym
from envs.bullet.kukaCamGymEnv import KukaCamGymEnv
from baselines import deepq
import datetime
def callback(lcl, glb):
# stop training if reward exceeds 199
total = sum(lcl['episode_rewards'][-101:-1]) / 100
totalt = lcl['t']
#print("totalt")
#print(totalt)
is_solved = totalt > 2000 and total >= 10
return is_solved
def main():
env = KukaCamGymEnv(renders=True)
model = deepq.models.cnn_to_mlp(
convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
hiddens=[256],
dueling=False
)
act = deepq.learn(
env,
q_func=model,
lr=1e-3,
max_timesteps=10000000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback
)
print("Saving model to kuka_cam_model.pkl")
act.save("kuka_cam_model.pkl")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,33 @@
import gym
from envs.bullet.cartpole_bullet import CartPoleBulletEnv
from baselines import deepq
def callback(lcl, glb):
# stop training if reward exceeds 199
is_solved = lcl['t'] > 100 and sum(lcl['episode_rewards'][-101:-1]) / 100 >= 199
return is_solved
def main():
env = gym.make('CartPoleBulletEnv-v0')
model = deepq.models.mlp([64])
act = deepq.learn(
env,
q_func=model,
lr=1e-3,
max_timesteps=100000,
buffer_size=50000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
print_freq=10,
callback=callback
)
print("Saving model to cartpole_model.pkl")
act.save("cartpole_model.pkl")
if __name__ == '__main__':
main()