diff --git a/examples/SharedMemory/IKTrajectoryHelper.cpp b/examples/SharedMemory/IKTrajectoryHelper.cpp
index 0a6cbbf80..0145251bb 100644
--- a/examples/SharedMemory/IKTrajectoryHelper.cpp
+++ b/examples/SharedMemory/IKTrajectoryHelper.cpp
@@ -185,7 +185,9 @@ bool IKTrajectoryHelper::computeNullspaceVel(int numQ, const double* q_current,
 {
     m_data->m_nullSpaceVelocity.SetLength(numQ);
     m_data->m_nullSpaceVelocity.SetZero();
-    double stayCloseToZeroGain = 0.1;
+	// TODO: Expose the coefficents of the null space term so that the user can choose to balance the null space task and the IK target task.
+	// Can also adaptively adjust the coefficients based on the residual of the null space velocity in the IK target task space.
+    double stayCloseToZeroGain = 0.001;
     double stayAwayFromLimitsGain = 10.0;
 
     // Stay close to zero
diff --git a/examples/SharedMemory/PhysicsServerCommandProcessor.cpp b/examples/SharedMemory/PhysicsServerCommandProcessor.cpp
index 7f3223b59..f9e0cd6a9 100644
--- a/examples/SharedMemory/PhysicsServerCommandProcessor.cpp
+++ b/examples/SharedMemory/PhysicsServerCommandProcessor.cpp
@@ -6667,6 +6667,11 @@ bool PhysicsServerCommandProcessor::processCommand(const struct SharedMemoryComm
                                 }
                                 // Only calculate if the localPosition is non-zero.
                                 if (btInverseDynamics::maxAbs(localPosition) > 0.0) {
+                                    // Write the localPosition into world coordinates.
+                                    btInverseDynamics::mat33 world_rotation_body;
+                                    tree->getBodyTransform(clientCmd.m_calculateJacobianArguments.m_linkIndex + 1, &world_rotation_body);
+                                    localPosition = world_rotation_body * localPosition;
+                                    // Correct the translational jacobian.
                                     btInverseDynamics::mat33 skewCrossProduct;
                                     btInverseDynamics::skew(localPosition, &skewCrossProduct);
                                     btInverseDynamics::mat3x jac_l(3, numDofs + baseDofs);
diff --git a/examples/ThirdPartyLibs/BussIK/Jacobian.cpp b/examples/ThirdPartyLibs/BussIK/Jacobian.cpp
index 301910a9c..c082bef31 100644
--- a/examples/ThirdPartyLibs/BussIK/Jacobian.cpp
+++ b/examples/ThirdPartyLibs/BussIK/Jacobian.cpp
@@ -354,6 +354,12 @@ void Jacobian::CalcDeltaThetasDLSwithNullspace(const VectorRn& desiredV)
     // Compute null space velocity
     VectorRn nullV(J.GetNumColumns());
     P.Multiply(desiredV, nullV);
+	
+	// Compute residual
+	VectorRn residual(J.GetNumRows());
+	J.Multiply(nullV, residual);
+	// TODO: Use residual to set the null space term coefficient adaptively.
+	//printf("residual: %f\n", residual.Norm());
     
     // Add null space velocity
     dTheta += nullV;
diff --git a/examples/pybullet/examples/inverse_kinematics.py b/examples/pybullet/examples/inverse_kinematics.py
index 0dba11bf9..9aa32afed 100644
--- a/examples/pybullet/examples/inverse_kinematics.py
+++ b/examples/pybullet/examples/inverse_kinematics.py
@@ -37,20 +37,21 @@ hasPrevPose = 0
 useNullSpace = 0
 
 useOrientation = 1
-useSimulation = 1
+#If we set useSimulation=0, it sets the arm pose to be the IK result directly without using dynamic control.
+#This can be used to test the IK result accuracy.
+useSimulation = 0
 useRealTimeSimulation = 1
 p.setRealTimeSimulation(useRealTimeSimulation)
 #trailDuration is duration (in seconds) after debug lines will be removed automatically
 #use 0 for no-removal
 trailDuration = 15
-
 	
 while 1:
 	if (useRealTimeSimulation):
 		dt = datetime.now()
 		t = (dt.second/60.)*2.*math.pi
 	else:
-		t=t+0.1
+		t=t+0.001
 	
 	if (useSimulation and useRealTimeSimulation==0):
 		p.stepSimulation()
diff --git a/examples/pybullet/examples/jacobian.py b/examples/pybullet/examples/jacobian.py
new file mode 100644
index 000000000..17638bfb3
--- /dev/null
+++ b/examples/pybullet/examples/jacobian.py
@@ -0,0 +1,66 @@
+import pybullet as p
+
+
+def getJointStates(robot):
+	joint_states = p.getJointStates(robot, range(p.getNumJoints(robot)))
+	joint_positions = [state[0] for state in joint_states]
+	joint_velocities = [state[1] for state in joint_states]
+	joint_torques = [state[3] for state in joint_states]
+	return joint_positions, joint_velocities, joint_torques
+
+def setJointPosition(robot, position, kp=1.0, kv=0.3):
+	num_joints = p.getNumJoints(robot)
+	zero_vec = [0.0] * num_joints
+	if len(position) == num_joints:
+		p.setJointMotorControlArray(robot, range(num_joints), p.POSITION_CONTROL,
+			targetPositions=position, targetVelocities=zero_vec,
+			positionGains=[kp] * num_joints, velocityGains=[kv] * num_joints)
+	else:
+		print("Not setting torque. "
+			  "Expected torque vector of "
+			  "length {}, got {}".format(num_joints, len(torque)))
+
+def multiplyJacobian(jacobian, vector):
+	result = [0.0, 0.0, 0.0]
+	for c in range(len(vector)):
+		for r in range(3):
+			result[r] += jacobian[r][c] * vector[c]
+	return result
+
+
+clid = p.connect(p.SHARED_MEMORY)
+if (clid<0):
+	p.connect(p.DIRECT)
+time_step = 0.001
+gravity_constant = -9.81
+p.resetSimulation()
+p.setTimeStep(time_step)
+p.setGravity(0.0, 0.0, gravity_constant)
+p.loadURDF("plane.urdf",[0,0,-0.3])
+kukaId = p.loadURDF("kuka_iiwa/model.urdf",[0,0,0])
+p.resetBasePositionAndOrientation(kukaId,[0,0,0],[0,0,0,1])
+kukaEndEffectorIndex = 6
+numJoints = p.getNumJoints(kukaId)
+if (numJoints!=7):
+	exit()
+# Set a joint target for the position control and step the sim.
+setJointPosition(kukaId, [0.1] * p.getNumJoints(kukaId))
+p.stepSimulation()
+# Get the joint and link state directly from Bullet.
+pos, vel, torq = getJointStates(kukaId)
+result = p.getLinkState(kukaId, kukaEndEffectorIndex, computeLinkVelocity=1, computeForwardKinematics=1)
+link_trn, link_rot, com_trn, com_rot, frame_pos, frame_rot, link_vt, link_vr = result
+# Get the Jacobians for the CoM of the end-effector link.
+# Note that in this example com_rot = identity, and we would need to use com_rot.T * com_trn.
+# The localPosition is always defined in terms of the link frame coordinates.
+zero_vec = [0.0] * numJoints
+jac_t, jac_r = p.calculateJacobian(kukaId, kukaEndEffectorIndex, com_trn, pos, zero_vec, zero_vec)
+
+print ("Link linear velocity of CoM from getLinkState:")
+print (link_vt)
+print ("Link linear velocity of CoM from linearJacobian * q_dot:")
+print (multiplyJacobian(jac_t, vel))
+print ("Link angular velocity of CoM from getLinkState:")
+print (link_vr)
+print ("Link angular velocity of CoM from angularJacobian * q_dot:")
+print (multiplyJacobian(jac_r, vel))
diff --git a/examples/pybullet/gym/pybullet_envs/agents/config_ppo.py b/examples/pybullet/gym/pybullet_envs/agents/configs.py
similarity index 73%
rename from examples/pybullet/gym/pybullet_envs/agents/config_ppo.py
rename to examples/pybullet/gym/pybullet_envs/agents/configs.py
index 2ccd38adc..9b2cb2da2 100644
--- a/examples/pybullet/gym/pybullet_envs/agents/config_ppo.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/configs.py
@@ -1,8 +1,25 @@
-"""The PPO training configuration file for minitaur environments."""
+# Copyright 2017 The TensorFlow Agents Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Example configurations using the PPO algorithm."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
 import functools
+
 from agents import ppo
 from agents.scripts import networks
 from pybullet_envs.bullet import minitaur_gym_env
@@ -11,33 +28,33 @@ import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env
 import pybullet_envs
 
 
-# pylint: disable=unused-variable
 def default():
-  """The default configurations."""
+  """Default configuration for PPO."""
   # General
   algorithm = ppo.PPOAlgorithm
-  num_agents = 25
+  num_agents = 10
   eval_episodes = 25
   use_gpu = False
   # Network
   network = networks.ForwardGaussianPolicy
   weight_summaries = dict(
-      all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
+      all=r'.*',
+      policy=r'.*/policy/.*',
+      value=r'.*/value/.*')
   policy_layers = 200, 100
   value_layers = 200, 100
-  init_mean_factor = 0.2
+  init_mean_factor = 0.05
   init_logstd = -1
-  network_config = dict()
   # Optimization
   update_every = 25
   policy_optimizer = 'AdamOptimizer'
   value_optimizer = 'AdamOptimizer'
-  update_epochs_policy = 25
-  update_epochs_value = 25
-  value_lr = 1e-3
+  update_epochs_policy = 50
+  update_epochs_value = 50
   policy_lr = 1e-4
+  value_lr = 3e-4
   # Losses
-  discount = 0.99
+  discount = 0.985
   kl_target = 1e-2
   kl_cutoff_factor = 2
   kl_cutoff_coef = 1000
@@ -107,4 +124,3 @@ def pybullet_minitaur():
   steps = 3e7  # 30M
   return locals()
 
-
diff --git a/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py b/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py
index 195df7df8..6b6a49766 100644
--- a/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/train_ppo.py
@@ -1,8 +1,22 @@
-r"""Script to use Proximal Policy Gradient for the minitaur environments.
+# Copyright 2017 The TensorFlow Agents Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
-Run:
- python train_ppo.py --logdif=/tmp/train --config=minitaur_pybullet
+r"""Script to train a batch reinforcement learning algorithm.
 
+Command line:
+
+  python3 -m agents.scripts.train --logdir=/path/to/logdir --config=pendulum
 """
 
 from __future__ import absolute_import
@@ -10,39 +24,142 @@ from __future__ import division
 from __future__ import print_function
 
 import datetime
+import functools
 import os
+
+import gym
 import tensorflow as tf
 
 from agents import tools
-from agents.scripts import train
+from . import configs
 from agents.scripts import utility
-from . import config_ppo
 
 
-flags = tf.app.flags
-FLAGS = tf.app.flags.FLAGS
+def _create_environment(config):
+  """Constructor for an instance of the environment.
 
-flags.DEFINE_string(
-    'logdir', None,
-    'Base directory to store logs.')
-flags.DEFINE_string(
-    'config', None,
-    'Configuration to execute.')
-flags.DEFINE_string(
-    'timestamp', datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
-    'Sub directory to store logs.')
+  Args:
+    config: Object providing configurations via attributes.
+
+  Returns:
+    Wrapped OpenAI Gym environment.
+  """
+  if isinstance(config.env, str):
+    env = gym.make(config.env)
+  else:
+    env = config.env()
+  if config.max_length:
+    env = tools.wrappers.LimitDuration(env, config.max_length)
+  env = tools.wrappers.RangeNormalize(env)
+  env = tools.wrappers.ClipAction(env)
+  env = tools.wrappers.ConvertTo32Bit(env)
+  return env
+
+
+def _define_loop(graph, logdir, train_steps, eval_steps):
+  """Create and configure a training loop with training and evaluation phases.
+
+  Args:
+    graph: Object providing graph elements via attributes.
+    logdir: Log directory for storing checkpoints and summaries.
+    train_steps: Number of training steps per epoch.
+    eval_steps: Number of evaluation steps per epoch.
+
+  Returns:
+    Loop object.
+  """
+  loop = tools.Loop(
+      logdir, graph.step, graph.should_log, graph.do_report,
+      graph.force_reset)
+  loop.add_phase(
+      'train', graph.done, graph.score, graph.summary, train_steps,
+      report_every=None,
+      log_every=train_steps // 2,
+      checkpoint_every=None,
+      feed={graph.is_training: True})
+  loop.add_phase(
+      'eval', graph.done, graph.score, graph.summary, eval_steps,
+      report_every=eval_steps,
+      log_every=eval_steps // 2,
+      checkpoint_every=10 * eval_steps,
+      feed={graph.is_training: False})
+  return loop
+
+
+def train(config, env_processes):
+  """Training and evaluation entry point yielding scores.
+
+  Resolves some configuration attributes, creates environments, graph, and
+  training loop. By default, assigns all operations to the CPU.
+
+  Args:
+    config: Object providing configurations via attributes.
+    env_processes: Whether to step environments in separate processes.
+
+  Yields:
+    Evaluation scores.
+  """
+  tf.reset_default_graph()
+  with config.unlocked:
+    config.network = functools.partial(
+        utility.define_network, config.network, config)
+    config.policy_optimizer = getattr(tf.train, config.policy_optimizer)
+    config.value_optimizer = getattr(tf.train, config.value_optimizer)
+  if config.update_every % config.num_agents:
+    tf.logging.warn('Number of agents should divide episodes per update.')
+  with tf.device('/cpu:0'):
+    batch_env = utility.define_batch_env(
+        lambda: _create_environment(config),
+        config.num_agents, env_processes)
+    graph = utility.define_simulation_graph(
+        batch_env, config.algorithm, config)
+    loop = _define_loop(
+        graph, config.logdir,
+        config.update_every * config.max_length,
+        config.eval_episodes * config.max_length)
+    total_steps = int(
+        config.steps / config.update_every *
+        (config.update_every + config.eval_episodes))
+  # Exclude episode related variables since the Python state of environments is
+  # not checkpointed and thus new episodes start after resuming.
+  saver = utility.define_saver(exclude=(r'.*_temporary/.*',))
+  sess_config = tf.ConfigProto(allow_soft_placement=True)
+  sess_config.gpu_options.allow_growth = True
+  with tf.Session(config=sess_config) as sess:
+    utility.initialize_variables(sess, saver, config.logdir)
+    for score in loop.run(sess, saver, total_steps):
+      yield score
+  batch_env.close()
 
 
 def main(_):
   """Create or load configuration and launch the trainer."""
-  config = tools.AttrDict(getattr(config_ppo, FLAGS.config)())
-  logdir = FLAGS.logdir and os.path.join(
-      FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config))
-  utility.save_config(config, logdir)
-  for score in train.train(config, env_processes=True):
-    tf.logging.info(str(score))
+  utility.set_up_logging()
+  if not FLAGS.config:
+    raise KeyError('You must specify a configuration.')
+  logdir = FLAGS.logdir and os.path.expanduser(os.path.join(
+      FLAGS.logdir, '{}-{}'.format(FLAGS.timestamp, FLAGS.config)))
+  try:
+    config = utility.load_config(logdir)
+  except IOError:
+    config = tools.AttrDict(getattr(configs, FLAGS.config)())
+    config = utility.save_config(config, logdir)
+  for score in train(config, FLAGS.env_processes):
+    tf.logging.info('Score {}.'.format(score))
 
 
 if __name__ == '__main__':
+  FLAGS = tf.app.flags.FLAGS
+  tf.app.flags.DEFINE_string(
+      'logdir', None,
+      'Base directory to store logs.')
+  tf.app.flags.DEFINE_string(
+      'timestamp', datetime.datetime.now().strftime('%Y%m%dT%H%M%S'),
+      'Sub directory to store logs.')
+  tf.app.flags.DEFINE_string(
+      'config', None,
+      'Configuration to execute.')
+  tf.app.flags.DEFINE_boolean(
+      'env_processes', True,
+      'Step environments in separate processes to circumvent the GIL.')
   tf.app.run()
-
diff --git a/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py b/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py
index fa5f7b21c..f6673dc1d 100644
--- a/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py
+++ b/examples/pybullet/gym/pybullet_envs/agents/visualize_ppo.py
@@ -1,42 +1,157 @@
+# Copyright 2017 The TensorFlow Agents Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
-r"""Script to visualize the trained PPO agent.
+r"""Script to render videos of the Proximal Policy Gradient algorithm.
 
-python -m pybullet_envs.agents.visualize \
---logdir=ppo
---outdir=/tmp/video/
+Command line:
 
+  python3 -m agents.scripts.visualize \
+      --logdir=/path/to/logdir/<time>-<config> --outdir=/path/to/outdir/
 """
 
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+import os
+
+import gym
 import tensorflow as tf
 
-from agents.scripts import visualize
+from agents import tools
+from agents.scripts import utility
 
 
-flags = tf.app.flags
-FLAGS = tf.app.flags.FLAGS
-flags.DEFINE_string("logdir", None,
-                    "Directory to the checkpoint of a training run.")
-flags.DEFINE_string("outdir", None,
-                    "Local directory for storing the monitoring outdir.")
-flags.DEFINE_string("checkpoint", None,
-                    "Checkpoint name to load; defaults to most recent.")
-flags.DEFINE_integer("num_agents", 1,
-                     "How many environments to step in parallel.")
-flags.DEFINE_integer("num_episodes", 1, "Minimum number of episodes to render.")
-flags.DEFINE_boolean(
-    "env_processes", False,
-    "Step environments in separate processes to circumvent the GIL.")
+def _create_environment(config, outdir):
+  """Constructor for an instance of the environment.
+
+  Args:
+    config: Object providing configurations via attributes.
+    outdir: Directory to store videos in.
+
+  Returns:
+    Wrapped OpenAI Gym environment.
+  """
+  if isinstance(config.env, str):
+    env = gym.make(config.env)
+  else:
+    env = config.env()
+  # Ensure that the environment has the specification attribute set as expected
+  # by the monitor wrapper.
+  if not hasattr(env, 'spec'):
+    setattr(env, 'spec', getattr(env, 'spec', None))
+  if config.max_length:
+    env = tools.wrappers.LimitDuration(env, config.max_length)
+  env = gym.wrappers.Monitor(
+      env, outdir, lambda unused_episode_number: True)
+  env = tools.wrappers.RangeNormalize(env)
+  env = tools.wrappers.ClipAction(env)
+  env = tools.wrappers.ConvertTo32Bit(env)
+  return env
+
+
+def _define_loop(graph, eval_steps):
+  """Create and configure an evaluation loop.
+
+  Args:
+    graph: Object providing graph elements via attributes.
+    eval_steps: Number of evaluation steps per epoch.
+
+  Returns:
+    Loop object.
+  """
+  loop = tools.Loop(
+      None, graph.step, graph.should_log, graph.do_report, graph.force_reset)
+  loop.add_phase(
+      'eval', graph.done, graph.score, graph.summary, eval_steps,
+      report_every=eval_steps,
+      log_every=None,
+      checkpoint_every=None,
+      feed={graph.is_training: False})
+  return loop
+
+
+def visualize(
+    logdir, outdir, num_agents, num_episodes, checkpoint=None,
+    env_processes=True):
+  """Recover checkpoint and render videos from it.
+
+  Args:
+    logdir: Logging directory of the trained algorithm.
+    outdir: Directory to store rendered videos in.
+    num_agents: Number of environments to simulate in parallel.
+    num_episodes: Total number of episodes to simulate.
+    checkpoint: Checkpoint name to load; defaults to most recent.
+    env_processes: Whether to step environments in separate processes.
+  """
+  config = utility.load_config(logdir)
+  with config.unlocked:
+    config.network = functools.partial(
+        utility.define_network, config.network, config)
+    config.policy_optimizer = getattr(tf.train, config.policy_optimizer)
+    config.value_optimizer = getattr(tf.train, config.value_optimizer)
+  with tf.device('/cpu:0'):
+    batch_env = utility.define_batch_env(
+        lambda: _create_environment(config, outdir),
+        num_agents, env_processes)
+    graph = utility.define_simulation_graph(
+        batch_env, config.algorithm, config)
+    total_steps = num_episodes * config.max_length
+    loop = _define_loop(graph, total_steps)
+  saver = utility.define_saver(
+      exclude=(r'.*_temporary/.*', r'global_step'))
+  sess_config = tf.ConfigProto(allow_soft_placement=True)
+  sess_config.gpu_options.allow_growth = True
+  with tf.Session(config=sess_config) as sess:
+    utility.initialize_variables(
+        sess, saver, config.logdir, checkpoint, resume=True)
+    for unused_score in loop.run(sess, saver, total_steps):
+      pass
+  batch_env.close()
 
 
 def main(_):
-  visualize.visualize(FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents,
-                      FLAGS.num_episodes, FLAGS.checkpoint, FLAGS.env_processes)
+  """Load a trained algorithm and render videos."""
+  utility.set_up_logging()
+  if not FLAGS.logdir or not FLAGS.outdir:
+    raise KeyError('You must specify logging and outdirs directories.')
+  FLAGS.logdir = os.path.expanduser(FLAGS.logdir)
+  FLAGS.outdir = os.path.expanduser(FLAGS.outdir)
+  visualize(
+      FLAGS.logdir, FLAGS.outdir, FLAGS.num_agents, FLAGS.num_episodes,
+      FLAGS.checkpoint, FLAGS.env_processes)
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
+  FLAGS = tf.app.flags.FLAGS
+  tf.app.flags.DEFINE_string(
+      'logdir', None,
+      'Directory to the checkpoint of a training run.')
+  tf.app.flags.DEFINE_string(
+      'outdir', None,
+      'Local directory for storing the monitoring outdir.')
+  tf.app.flags.DEFINE_string(
+      'checkpoint', None,
+      'Checkpoint name to load; defaults to most recent.')
+  tf.app.flags.DEFINE_integer(
+      'num_agents', 1,
+      'How many environments to step in parallel.')
+  tf.app.flags.DEFINE_integer(
+      'num_episodes', 5,
+      'Minimum number of episodes to render.')
+  tf.app.flags.DEFINE_boolean(
+      'env_processes', True,
+      'Step environments in separate processes to circumvent the GIL.')
   tf.app.run()
-
diff --git a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
index 5a35bf9a7..3e7ee6c49 100644
--- a/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
+++ b/examples/pybullet/gym/pybullet_envs/bullet/kuka.py
@@ -34,7 +34,7 @@ class Kuka:
     #restposes for null space
     self.rp=[0,0,0,0.5*math.pi,0,-math.pi*0.5*0.66,0]
     #joint damping coefficents
-    self.jd=[0.1,0.1,0.1,0.1,0.1,0.1,0.1]
+    self.jd=[0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001]
     self.reset()
     
   def reset(self):
diff --git a/examples/pybullet/pybullet.c b/examples/pybullet/pybullet.c
index 306481c21..b332ad650 100644
--- a/examples/pybullet/pybullet.c
+++ b/examples/pybullet/pybullet.c
@@ -6881,16 +6881,32 @@ static PyObject* pybullet_calculateInverseKinematics(PyObject* self,
 			hasNullSpace = 1;
 		}
 
-		if (numJoints && (szJointDamping == numJoints))
+		if (szJointDamping > 0)
 		{
-			int szInBytes = sizeof(double) * numJoints;
-			int i;
-			jointDamping = (double*)malloc(szInBytes);
-			for (i = 0; i < numJoints; i++)
+			// We allow the number of joint damping values to be larger than
+			// the number of degrees of freedom (DOFs). On the server side, it does
+			// the check and only sets joint damping for all DOFs.
+			// We can use the number of DOFs here when that is exposed. Since the
+			// number of joints is larger than the number of DOFs (we assume the
+			// joint is either fixed joint or one DOF joint), it is safe to use
+			// number of joints here.
+			if (szJointDamping < numJoints)
 			{
-				jointDamping[i] = pybullet_internalGetFloatFromSequence(jointDampingObj, i);
+				PyErr_SetString(SpamError,
+								"calculateInverseKinematics the size of input joint damping values is smaller than the number of joints.");
+				return NULL;
+			}
+			else
+			{
+				int szInBytes = sizeof(double) * szJointDamping;
+				int i;
+				jointDamping = (double*)malloc(szInBytes);
+				for (i = 0; i < szJointDamping; i++)
+				{
+					jointDamping[i] = pybullet_internalGetFloatFromSequence(jointDampingObj, i);
+				}
+				hasJointDamping = 1;
 			}
-			hasJointDamping = 1;
 		}
 
 		if (hasPos)
@@ -6929,6 +6945,7 @@ static PyObject* pybullet_calculateInverseKinematics(PyObject* self,
 			{
 				b3CalculateInverseKinematicsSetJointDamping(command, numJoints, jointDamping);
 			}
+			free(jointDamping);
 
 			statusHandle = b3SubmitClientCommandAndWaitStatus(sm, command);
 
@@ -7394,13 +7411,20 @@ static PyMethodDef SpamMethods[] = {
 	 "Get the state (position, velocity etc) for multiple joints on a body."},
 
 	{"getLinkState", (PyCFunction)pybullet_getLinkState, METH_VARARGS | METH_KEYWORDS,
+	"position_linkcom_world, world_rotation_linkcom,\n"
+	"position_linkcom_frame, frame_rotation_linkcom,\n"
+	"position_frame_world, world_rotation_frame,\n"
+	"linearVelocity_linkcom_world, angularVelocity_linkcom_world\n"
+	"  = getLinkState(objectUniqueId, linkIndex, computeLinkVelocity=0,\n"
+	"                 computeForwardKinematics=0, physicsClientId=0)\n"
 	 "Provides extra information such as the Cartesian world coordinates"
 	 " center of mass (COM) of the link, relative to the world reference"
 	 " frame."},
 
 	{"resetJointState", (PyCFunction)pybullet_resetJointState, METH_VARARGS | METH_KEYWORDS,
-	 "Reset the state (position, velocity etc) for a joint on a body "
-	 "instantaneously, not through physics simulation."},
+	"resetJointState(objectUniqueId, jointIndex, targetValue, targetVelocity=0, physicsClientId=0)\n"
+	"Reset the state (position, velocity etc) for a joint on a body "
+	"instantaneously, not through physics simulation."},
 	
 	{"changeDynamics", (PyCFunction)pybullet_changeDynamicsInfo, METH_VARARGS | METH_KEYWORDS,
 	 "change dynamics information such as mass, lateral friction coefficient."},
@@ -7546,7 +7570,7 @@ static PyMethodDef SpamMethods[] = {
 	 "Args:\n"
 	 "  bodyIndex - a scalar defining the unique object id.\n"
 	 "  linkIndex - a scalar identifying the link containing the local point.\n"
-	 "  localPosition - a list of [x, y, z] of the coordinates of the local point.\n"
+	 "  localPosition - a list of [x, y, z] of the coordinates defined in the link frame.\n"
 	 "  objPositions - a list of the joint positions.\n"
 	 "  objVelocities - a list of the joint velocities.\n"
 	 "  objAccelerations - a list of the joint accelerations.\n"
diff --git a/setup.py b/setup.py
index 29139f534..0321e9658 100644
--- a/setup.py
+++ b/setup.py
@@ -441,7 +441,7 @@ print("-----")
 
 setup(
 	name = 'pybullet',
-	version='1.4.9',
+	version='1.5.1',
 	description='Official Python Interface for the Bullet Physics SDK specialized for Robotics Simulation and Reinforcement Learning',
 	long_description='pybullet is an easy to use Python module for physics simulation, robotics and deep reinforcement learning based on the Bullet Physics SDK. With pybullet you can load articulated bodies from URDF, SDF and other file formats. pybullet provides forward dynamics simulation, inverse dynamics computation, forward and inverse kinematics and collision detection and ray intersection queries. Aside from physics simulation, pybullet supports to rendering, with a CPU renderer and OpenGL visualization and support for virtual reality headsets.',
 	url='https://github.com/bulletphysics/bullet3',
diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp
index 4bdee1a05..fb435aa7f 100644
--- a/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp
+++ b/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp
@@ -24,8 +24,8 @@ bool clipConvexFacesAndFindContactsCPU = false;//false;//true;
 bool reduceConcaveContactsOnGPU = true;//false;
 bool reduceConvexContactsOnGPU = true;//false;
 bool findConvexClippingFacesGPU = true;
-bool useGjk = true;///option for CPU/host testing, when findSeparatingAxisOnGpu = false
-bool useGjkContacts = true;//////option for CPU/host testing when findSeparatingAxisOnGpu = false
+bool useGjk = false;///option for CPU/host testing, when findSeparatingAxisOnGpu = false
+bool useGjkContacts = false;//////option for CPU/host testing when findSeparatingAxisOnGpu = false
 
 
 static int myframecount=0;///for testing
@@ -2606,146 +2606,6 @@ void	computeContactSphereConvex(int pairIndex,
 
 
 
-#include "b3GjkPairDetector.h"
-#include "b3GjkEpa.h"
-#include "b3VoronoiSimplexSolver.h"
-
-int computeContactConvexConvex( b3AlignedObjectArray<b3Int4>& pairs,
-																int pairIndex,
-																int bodyIndexA, int bodyIndexB, 
-																int collidableIndexA, int collidableIndexB, 
-																const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies, 
-																const b3AlignedObjectArray<b3Collidable>& collidables,
-																const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes,
-																const b3AlignedObjectArray<b3Vector3>& convexVertices,
-																const b3AlignedObjectArray<b3Vector3>& uniqueEdges,
-																const b3AlignedObjectArray<int>& convexIndices,
-																const b3AlignedObjectArray<b3GpuFace>& faces,
-																b3AlignedObjectArray<b3Contact4>& globalContactsOut,
-																int& nGlobalContactsOut,
-																int maxContactCapacity,
-																b3AlignedObjectArray<int>&hostHasSepAxis,
-																b3AlignedObjectArray<b3Vector3>&hostSepAxis
-
-
-							   //,const b3AlignedObjectArray<b3Contact4>& oldContacts
-																)
-{	
-	int contactIndex = -1;
-	b3VoronoiSimplexSolver simplexSolver;
-	b3GjkEpaSolver2 epaSolver;
-			
-	b3GjkPairDetector gjkDetector(&simplexSolver,&epaSolver);
-
-	b3Transform transA;
-	transA.setOrigin(rigidBodies[bodyIndexA].m_pos);
-	transA.setRotation(rigidBodies[bodyIndexA].m_quat);
-
-	b3Transform transB;
-	transB.setOrigin(rigidBodies[bodyIndexB].m_pos);
-	transB.setRotation(rigidBodies[bodyIndexB].m_quat);
-	float maximumDistanceSquared = 1e30f;
-					
-	b3Vector3 resultPointOnBWorld;
-	b3Vector3 sepAxis2=b3MakeVector3(0,1,0);
-	b3Scalar distance2 = 1e30f;
-	
-	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;
-	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;
-
-	//int sz = sizeof(b3Contact4);
-
-	bool result2 = getClosestPoints(&gjkDetector, transA, transB,
-		convexShapes[shapeIndexA], convexShapes[shapeIndexB],
-		convexVertices,convexVertices,
-		maximumDistanceSquared,
-		sepAxis2,
-		distance2,
-		resultPointOnBWorld);
-	
-	
-	if (result2)
-	{
-		if (nGlobalContactsOut<maxContactCapacity)
-		{
-			contactIndex = nGlobalContactsOut;
-			globalContactsOut.expand();
-			b3Contact4& newContact = globalContactsOut.at(nGlobalContactsOut);
-			newContact.m_batchIdx = 0;//i;
-			newContact.m_bodyAPtrAndSignBit = (rigidBodies.at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA;
-			newContact.m_bodyBPtrAndSignBit = (rigidBodies.at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB;
-
-			newContact.m_frictionCoeffCmp = 45874;
-			newContact.m_restituitionCoeffCmp = 0;
-					
-			
-			int numPoints = 0;
-			if (pairs[pairIndex].z>=0)
-			{
-				//printf("add existing points?\n");
-				//refresh
-				
-				int numOldPoints = 0;//oldContacts[pairs[pairIndex].z].getNPoints();
-				if (numOldPoints)
-				{
-					//newContact = oldContacts[pairs[pairIndex].z];
-#ifdef PERSISTENT_CONTACTS_HOST
-					b3ContactCache::refreshContactPoints(transA,transB,newContact);
-#endif //PERSISTENT_CONTACTS_HOST
-				}
-				numPoints = b3Contact4Data_getNumPoints(&newContact);
-
-			}
-
-			/*
-			int insertIndex = m_manifoldPtr->getCacheEntry(newPt);
-				if (insertIndex >= 0)
-				{
-					//const btManifoldPoint& oldPoint = m_manifoldPtr->getContactPoint(insertIndex);
-					m_manifoldPtr->replaceContactPoint(newPt,insertIndex);
-				} else
-				{
-					insertIndex = m_manifoldPtr->addManifoldPoint(newPt);
-				}
-			*/
-			
-			int p=numPoints;
-			if (numPoints<4)
-			{
-				numPoints++;
-			} else
-			{
-				p=3;
-			}
-
-			{
-				resultPointOnBWorld.w = distance2;
-				newContact.m_worldPosB[p] = resultPointOnBWorld;
-#ifdef PERSISTENT_CONTACTS_HOST
-				b3Vector3 resultPointOnAWorld = resultPointOnBWorld+distance2*sepAxis2;
-				newContact.m_localPosA[p] = transA.inverse()*resultPointOnAWorld;
-				newContact.m_localPosB[p] = transB.inverse()*resultPointOnBWorld;
-#endif
-				newContact.m_worldNormalOnB = sepAxis2;
-				
-				
-				hostHasSepAxis[pairIndex] = 1;
-				hostSepAxis[pairIndex] =sepAxis2;
-				//printf("sepAxis[%d]=%f,%f,%f,%f\n",pairIndex,sepAxis2.x,sepAxis2.y,sepAxis2.z,sepAxis2.w);
-			}
-			//printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints);
-			newContact.m_worldNormalOnB.w = (b3Scalar)numPoints;
-
-			
-			nGlobalContactsOut++;
-		} else
-		{
-			b3Error("Error: exceeding contact capacity (%d/%d)\n", nGlobalContactsOut,maxContactCapacity);
-		}
-	}
-	
-	return contactIndex;
-}
 
 int computeContactConvexConvex2(
 																int pairIndex,
@@ -3025,8 +2885,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
 			hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
 		{
 			//printf("hostPairs[i].z=%d\n",hostPairs[i].z);
-			//int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
-			int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
+			int contactIndex = computeContactConvexConvex2(           i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
+			//int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
 
 
 			if (contactIndex>=0)
@@ -3538,9 +3398,10 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
 
 			
 				
-				
+			//int contactIndex = computeContactConvexConvex2(           i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
+							b3AlignedObjectArray<b3Contact4> oldHostContacts;	
 							int result;
-							result = computeContactConvexConvex( hostPairs,
+							result = computeContactConvexConvex2( //hostPairs,
 													   pairIndex,
 													bodyIndexA, bodyIndexB,
 													   collidableIndexA, collidableIndexB,
@@ -3554,8 +3415,9 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*
 													   hostContacts,
 													   nGlobalContactsOut,
 														maxContactCapacity,
-														hostHasSepAxis,
-														hostSepAxis
+														oldHostContacts
+														//hostHasSepAxis,
+														//hostSepAxis
 														
 																);
 						}//mpr
diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkPairDetector.cpp b/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkPairDetector.cpp
deleted file mode 100644
index 39f8c31ba..000000000
--- a/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkPairDetector.cpp
+++ /dev/null
@@ -1,533 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#include "b3GjkPairDetector.h"
-#include "Bullet3Common/b3Transform.h"
-#include "b3VoronoiSimplexSolver.h"
-#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"
-#include "b3VectorFloat4.h"
-#include "b3GjkEpa.h"
-#include "b3SupportMappings.h"
-
-//must be above the machine epsilon
-#define REL_ERROR2 b3Scalar(1.0e-6)
-
-//temp globals, to improve GJK/EPA/penetration calculations
-int gNumDeepPenetrationChecks2 = 0;
-int gNumGjkChecks2 = 0;
-int gGjkSeparatingAxis2=0;
-int gEpaSeparatingAxis2=0;
-
-
-
-b3GjkPairDetector::b3GjkPairDetector(b3VoronoiSimplexSolver* simplexSolver,b3GjkEpaSolver2*	penetrationDepthSolver)
-:m_cachedSeparatingAxis(b3MakeVector3(b3Scalar(0.),b3Scalar(-1.),b3Scalar(0.))),
-m_penetrationDepthSolver(penetrationDepthSolver),
-m_simplexSolver(simplexSolver),
-m_ignoreMargin(false),
-m_lastUsedMethod(-1),
-m_catchDegeneracies(1),
-m_fixContactNormalDirection(1)
-{
-}
-
-
-
-
-bool calcPenDepth( b3VoronoiSimplexSolver& simplexSolver,
-											  const b3Transform&	transformA, const b3Transform&	transformB,
-	const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, 
-	const b3AlignedObjectArray<b3Vector3>& verticesA,
-	const b3AlignedObjectArray<b3Vector3>& verticesB,
-											  b3Vector3& v, b3Vector3& wWitnessOnA, b3Vector3& wWitnessOnB)
-{
-
-	(void)v;
-	(void)simplexSolver;
-
-	b3Vector3	guessVector(transformB.getOrigin()-transformA.getOrigin());
-	b3GjkEpaSolver2::sResults	results;
-	
-
-	if(b3GjkEpaSolver2::Penetration(transformA,transformB,&hullA,&hullB,verticesA,verticesB,guessVector,results))
-	{
-		wWitnessOnA = results.witnesses[0];
-		wWitnessOnB = results.witnesses[1];
-		v = results.normal;
-		return true;		
-	} 
-	else
-	{
-		if(b3GjkEpaSolver2::Distance(transformA,transformB,&hullA,&hullB,verticesA,verticesB,guessVector,results))
-		{
-			wWitnessOnA = results.witnesses[0];
-			wWitnessOnB = results.witnesses[1];
-			v = results.normal;
-			return false;
-		}
-	}
-
-	return false;
-}
-#define dot3F4 b3Dot
-
-inline void project(const b3ConvexPolyhedronData& hull,  const float4& pos, const b3Quaternion& orn, const float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max)
-{
-	min = FLT_MAX;
-	max = -FLT_MAX;
-	int numVerts = hull.m_numVertices;
-
-	const float4 localDir = b3QuatRotate(orn.inverse(),dir);
-
-	b3Scalar offset = dot3F4(pos,dir);
-
-	for(int i=0;i<numVerts;i++)
-	{
-		//b3Vector3 pt = trans * vertices[m_vertexOffset+i];
-		//b3Scalar dp = pt.dot(dir);
-		//b3Vector3 vertex = vertices[hull.m_vertexOffset+i];
-		b3Scalar dp = dot3F4((float4&)vertices[hull.m_vertexOffset+i],localDir);
-		//b3Assert(dp==dpL);
-		if(dp < min)	min = dp;
-		if(dp > max)	max = dp;
-	}
-	if(min>max)
-	{
-		b3Scalar tmp = min;
-		min = max;
-		max = tmp;
-	}
-	min += offset;
-	max += offset;
-}
-
-#if 0
-static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, 
-	const float4& posA,const b3Quaternion& ornA,
-	const float4& posB,const b3Quaternion& ornB,
-	float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA,const b3AlignedObjectArray<b3Vector3>& verticesB,b3Scalar& depth)
-{
-	b3Scalar Min0,Max0;
-	b3Scalar Min1,Max1;
-	project(hullA,posA,ornA,sep_axis,verticesA, Min0, Max0);
-	project(hullB,posB,ornB, sep_axis,verticesB, Min1, Max1);
-
-	if(Max0<Min1 || Max1<Min0)
-		return false;
-
-	b3Scalar d0 = Max0 - Min1;
-	b3Assert(d0>=0.0f);
-	b3Scalar d1 = Max1 - Min0;
-	b3Assert(d1>=0.0f);
-	if (d0<d1)
-	{
-		depth = d0;
-		sep_axis *=-1;
-	} else
-	{
-		depth = d1;
-		
-	}
-	
-	return true;
-}
-#endif
-
-bool getClosestPoints(b3GjkPairDetector* gjkDetector, const b3Transform&	transA, const b3Transform&	transB,
-	const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, 
-	const b3AlignedObjectArray<b3Vector3>& verticesA,
-	const b3AlignedObjectArray<b3Vector3>& verticesB,
-	b3Scalar maximumDistanceSquared,
-	b3Vector3& resultSepNormal,
-	float& resultSepDistance,
-	b3Vector3& resultPointOnB)
-{
-	//resultSepDistance = maximumDistanceSquared;
-
-	gjkDetector->m_cachedSeparatingDistance = 0.f;
-
-	b3Scalar distance=b3Scalar(0.);
-	b3Vector3	normalInB= b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.));
-	b3Vector3 pointOnA,pointOnB;
-
-	b3Transform localTransA = transA;
-	b3Transform localTransB = transB;
-	
-	b3Vector3 positionOffset = b3MakeVector3(0,0,0);// = (localTransA.getOrigin() + localTransB.getOrigin()) * b3Scalar(0.5);
-	localTransA.getOrigin() -= positionOffset;
-	localTransB.getOrigin() -= positionOffset;
-
-	bool check2d = false;//m_minkowskiA->isConvex2d() && m_minkowskiB->isConvex2d();
-
-	b3Scalar marginA = 0.f;//m_marginA;
-	b3Scalar marginB = 0.f;//m_marginB;
-
-	gNumGjkChecks2++;
-
-
-	//for CCD we don't use margins
-	if (gjkDetector->m_ignoreMargin)
-	{
-		marginA = b3Scalar(0.);
-		marginB = b3Scalar(0.);
-	}
-
-	gjkDetector->m_curIter = 0;
-	int gGjkMaxIter = 1000;//this is to catch invalid input, perhaps check for #NaN?
-	gjkDetector->m_cachedSeparatingAxis.setValue(1,1,1);//0,0,0);
-
-	bool isValid = false;
-	bool checkSimplex = false;
-	bool checkPenetration = true;
-	gjkDetector->m_degenerateSimplex = 0;
-
-	gjkDetector->m_lastUsedMethod = -1;
-
-	{
-		b3Scalar squaredDistance = B3_LARGE_FLOAT;
-		b3Scalar delta = -1e30f;//b3Scalar(0.);
-	//	b3Scalar prevDelta = -1e30f;//b3Scalar(0.);
-		
-		b3Scalar margin = marginA + marginB;
-	//	b3Scalar bestDeltaN = -1e30f;
-	//	b3Vector3 bestSepAxis= b3MakeVector3(0,0,0);
-		
-		
-
-		gjkDetector->m_simplexSolver->reset();
-		
-		for ( ; ; )
-		//while (true)
-		{
-
-		
-			b3Vector3 seperatingAxisInA = (-gjkDetector->m_cachedSeparatingAxis)* localTransA.getBasis();
-			b3Vector3 seperatingAxisInB = gjkDetector->m_cachedSeparatingAxis* localTransB.getBasis();
-
-			b3Vector3 pInA = localGetSupportVertexWithoutMargin(seperatingAxisInA,&hullA,verticesA);
-			b3Vector3 qInB = localGetSupportVertexWithoutMargin(seperatingAxisInB,&hullB,verticesB);
-
-			b3Vector3  pWorld = localTransA(pInA);	
-			b3Vector3  qWorld = localTransB(qInB);
-
-
-#ifdef DEBUG_SPU_COLLISION_DETECTION
-		spu_printf("got local supporting vertices\n");
-#endif
-
-			if (check2d)
-			{
-				pWorld[2] = 0.f;
-				qWorld[2] = 0.f;
-			}
-
-			b3Vector3 w	= pWorld - qWorld;
-			delta = gjkDetector->m_cachedSeparatingAxis.dot(w);
-
-			// potential exit, they don't overlap
-			if ((delta > b3Scalar(0.0)) && (delta * delta > squaredDistance * maximumDistanceSquared)) 
-			{
-				gjkDetector->m_degenerateSimplex = 10;
-				checkSimplex=true;
-				//checkPenetration = false;
-				break;
-			}
-
-			//exit 0: the new point is already in the simplex, or we didn't come any closer
-			if (gjkDetector->m_simplexSolver->inSimplex(w))
-			{
-				gjkDetector->m_degenerateSimplex = 1;
-				checkSimplex = true;
-				break;
-			}
-			// are we getting any closer ?
-			b3Scalar f0 = squaredDistance - delta;
-			b3Scalar f1 = squaredDistance * REL_ERROR2;
-
-			if (f0 <= f1)
-			{
-				if (f0 <= b3Scalar(0.))
-				{
-					gjkDetector->m_degenerateSimplex = 2;
-				} else
-				{
-					gjkDetector->m_degenerateSimplex = 11;
-				}
-				checkSimplex = true;
-				break;
-			}
-
-#ifdef DEBUG_SPU_COLLISION_DETECTION
-		spu_printf("addVertex 1\n");
-#endif
-			//add current vertex to simplex
-			gjkDetector->m_simplexSolver->addVertex(w, pWorld, qWorld);
-#ifdef DEBUG_SPU_COLLISION_DETECTION
-		spu_printf("addVertex 2\n");
-#endif
-			b3Vector3 newCachedSeparatingAxis;
-
-			//calculate the closest point to the origin (update vector v)
-			if (!gjkDetector->m_simplexSolver->closest(newCachedSeparatingAxis))
-			{
-				gjkDetector->m_degenerateSimplex = 3;
-				checkSimplex = true;
-				break;
-			}
-
-			if(newCachedSeparatingAxis.length2()<REL_ERROR2)
-            {
-				gjkDetector->m_cachedSeparatingAxis = newCachedSeparatingAxis;
-                gjkDetector->m_degenerateSimplex = 6;
-                checkSimplex = true;
-                break;
-            }
-
-			b3Scalar previousSquaredDistance = squaredDistance;
-			squaredDistance = newCachedSeparatingAxis.length2();
-#if 0
-///warning: this termination condition leads to some problems in 2d test case see Bullet/Demos/Box2dDemo
-			if (squaredDistance>previousSquaredDistance)
-			{
-				gjkDetector->m_degenerateSimplex = 7;
-				squaredDistance = previousSquaredDistance;
-                checkSimplex = false;
-                break;
-			}
-#endif //
-			
-
-			//redundant gjkDetector->m_simplexSolver->compute_points(pointOnA, pointOnB);
-
-			//are we getting any closer ?
-			if (previousSquaredDistance - squaredDistance <= B3_EPSILON * previousSquaredDistance) 
-			{ 
-//				gjkDetector->m_simplexSolver->backup_closest(gjkDetector->m_cachedSeparatingAxis);
-				checkSimplex = true;
-				gjkDetector->m_degenerateSimplex = 12;
-				
-				break;
-			}
-
-			gjkDetector->m_cachedSeparatingAxis = newCachedSeparatingAxis;
-
-			  //degeneracy, this is typically due to invalid/uninitialized worldtransforms for a btCollisionObject   
-              if (gjkDetector->m_curIter++ > gGjkMaxIter)   
-              {   
-                      #if defined(DEBUG) || defined (_DEBUG) || defined (DEBUG_SPU_COLLISION_DETECTION)
-
-                              printf("btGjkPairDetector maxIter exceeded:%i\n",gjkDetector->m_curIter);   
-                              printf("sepAxis=(%f,%f,%f), squaredDistance = %f\n",   
-                              gjkDetector->m_cachedSeparatingAxis.getX(),   
-                              gjkDetector->m_cachedSeparatingAxis.getY(),   
-                              gjkDetector->m_cachedSeparatingAxis.getZ(),   
-                              squaredDistance);
-                              
-
-                      #endif   
-                      break;   
-
-              } 
-
-
-			bool check = (!gjkDetector->m_simplexSolver->fullSimplex());
-			//bool check = (!gjkDetector->m_simplexSolver->fullSimplex() && squaredDistance > B3_EPSILON * gjkDetector->m_simplexSolver->maxVertex());
-
-			if (!check)
-			{
-				//do we need this backup_closest here ?
-//				gjkDetector->m_simplexSolver->backup_closest(gjkDetector->m_cachedSeparatingAxis);
-				gjkDetector->m_degenerateSimplex = 13;
-				break;
-			}
-		}
-
-		if (checkSimplex)
-		{
-			gjkDetector->m_simplexSolver->compute_points(pointOnA, pointOnB);
-			normalInB = gjkDetector->m_cachedSeparatingAxis;
-			b3Scalar lenSqr =gjkDetector->m_cachedSeparatingAxis.length2();
-			
-			//valid normal
-			if (lenSqr < 0.0001)
-			{
-				gjkDetector->m_degenerateSimplex = 5;
-			} 
-			if (lenSqr > B3_EPSILON*B3_EPSILON)
-			{
-				b3Scalar rlen = b3Scalar(1.) / b3Sqrt(lenSqr );
-				normalInB *= rlen; //normalize
-				b3Scalar s = b3Sqrt(squaredDistance);
-			
-				b3Assert(s > b3Scalar(0.0));
-				pointOnA -= gjkDetector->m_cachedSeparatingAxis * (marginA / s);
-				pointOnB += gjkDetector->m_cachedSeparatingAxis * (marginB / s);
-				distance = ((b3Scalar(1.)/rlen) - margin);
-				isValid = true;
-				
-				gjkDetector->m_lastUsedMethod = 1;
-			} else
-			{
-				gjkDetector->m_lastUsedMethod = 2;
-			}
-		}
-
-		bool catchDegeneratePenetrationCase = 
-			(gjkDetector->m_catchDegeneracies && gjkDetector->m_penetrationDepthSolver && gjkDetector->m_degenerateSimplex && ((distance+margin) < 0.01));
-
-		//if (checkPenetration && !isValid)
-		if (checkPenetration && (!isValid || catchDegeneratePenetrationCase ))
-		{
-			//penetration case
-
-			//if there is no way to handle penetrations, bail out
-			if (gjkDetector->m_penetrationDepthSolver)
-			{
-				// Penetration depth case.
-				b3Vector3 tmpPointOnA,tmpPointOnB;
-				
-				gNumDeepPenetrationChecks2++;
-				gjkDetector->m_cachedSeparatingAxis.setZero();
-
-				bool isValid2 = calcPenDepth( 
-					*gjkDetector->m_simplexSolver, 
-					transA,transB,hullA,hullB,verticesA,verticesB,
-					gjkDetector->m_cachedSeparatingAxis, tmpPointOnA, tmpPointOnB
-					);
-
-
-				if (isValid2)
-				{
-					b3Vector3 tmpNormalInB = tmpPointOnB-tmpPointOnA;
-					b3Scalar lenSqr = tmpNormalInB.length2();
-					if (lenSqr <= (B3_EPSILON*B3_EPSILON))
-					{
-						tmpNormalInB = gjkDetector->m_cachedSeparatingAxis;
-						lenSqr = gjkDetector->m_cachedSeparatingAxis.length2();
-					}
-
-					if (lenSqr > (B3_EPSILON*B3_EPSILON))
-					{
-						tmpNormalInB /= b3Sqrt(lenSqr);
-						b3Scalar distance2 = -(tmpPointOnA-tmpPointOnB).length();
-						//only replace valid penetrations when the result is deeper (check)
-						if (!isValid || (distance2 < distance))
-						{
-							distance = distance2;
-							pointOnA = tmpPointOnA;
-							pointOnB = tmpPointOnB;
-							normalInB = tmpNormalInB;
-							isValid = true;
-							gjkDetector->m_lastUsedMethod = 3;
-						} else
-						{
-							gjkDetector->m_lastUsedMethod = 8;
-						}
-					} else
-					{
-						gjkDetector->m_lastUsedMethod = 9;
-					}
-				} else
-
-				{
-					///this is another degenerate case, where the initial GJK calculation reports a degenerate case
-					///EPA reports no penetration, and the second GJK (using the supporting vector without margin)
-					///reports a valid positive distance. Use the results of the second GJK instead of failing.
-					///thanks to Jacob.Langford for the reproduction case
-					///http://code.google.com/p/bullet/issues/detail?id=250
-
-				
-					if (gjkDetector->m_cachedSeparatingAxis.length2() > b3Scalar(0.))
-					{
-						b3Scalar distance2 = (tmpPointOnA-tmpPointOnB).length()-margin;
-						//only replace valid distances when the distance is less
-						if (!isValid || (distance2 < distance))
-						{
-							distance = distance2;
-							pointOnA = tmpPointOnA;
-							pointOnB = tmpPointOnB;
-							pointOnA -= gjkDetector->m_cachedSeparatingAxis * marginA ;
-							pointOnB += gjkDetector->m_cachedSeparatingAxis * marginB ;
-							normalInB = gjkDetector->m_cachedSeparatingAxis;
-							normalInB.normalize();
-							isValid = true;
-							gjkDetector->m_lastUsedMethod = 6;
-						} else
-						{
-							gjkDetector->m_lastUsedMethod = 5;
-						}
-					}
-				}
-				
-			}
-
-		}
-	}
-
-	
-
-	if (isValid && (distance < 0))
-	//if (isValid && ((distance < 0) || (distance*distance < maximumDistanceSquared)))
-	{
-
-		if (1)//m_fixContactNormalDirection)
-		{
-			///@workaround for sticky convex collisions
-			//in some degenerate cases (usually when the use uses very small margins) 
-			//the contact normal is pointing the wrong direction
-			//so fix it now (until we can deal with all degenerate cases in GJK and EPA)
-			//contact normals need to point from B to A in all cases, so we can simply check if the contact normal really points from B to A
-			//We like to use a dot product of the normal against the difference of the centroids, 
-			//once the centroid is available in the API
-			//until then we use the center of the aabb to approximate the centroid
-			b3Vector3 posA  = localTransA*hullA.m_localCenter;
-			b3Vector3 posB  = localTransB*hullB.m_localCenter;
-
-			
-			b3Vector3 diff = posA-posB;
-			if (diff.dot(normalInB) < 0.f)
-				normalInB *= -1.f;
-		}
-		gjkDetector->m_cachedSeparatingAxis = normalInB;
-		gjkDetector->m_cachedSeparatingDistance = distance;
-
-		/*output.addContactPoint(
-			normalInB,
-			pointOnB+positionOffset,
-			distance);
-			*/
-		
-		static float maxPenetrationDistance = 0.f;
-		if (distance<maxPenetrationDistance)
-		{
-			maxPenetrationDistance = distance;
-			printf("maxPenetrationDistance = %f\n",maxPenetrationDistance);
-		}
-		resultSepNormal = normalInB;
-		resultSepDistance = distance;
-		resultPointOnB = pointOnB+positionOffset;
-		return true;
-	}
-
-		
-		
-	return false;
-
-}
-
-
-
-
-
diff --git a/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkPairDetector.h b/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkPairDetector.h
deleted file mode 100644
index 2a0f85a88..000000000
--- a/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkPairDetector.h
+++ /dev/null
@@ -1,84 +0,0 @@
-
-
-
-
-#ifndef B3_GJK_PAIR_DETECTOR_H
-#define B3_GJK_PAIR_DETECTOR_H
-
-
-#include "Bullet3Common/b3Vector3.h"
-#include "Bullet3Common/b3AlignedObjectArray.h"
-
-class b3Transform;
-struct b3GjkEpaSolver2;
-class b3VoronoiSimplexSolver;
-struct b3ConvexPolyhedronData;
-
-B3_ATTRIBUTE_ALIGNED16(struct) b3GjkPairDetector
-{
-	
-
-	b3Vector3	m_cachedSeparatingAxis;
-	b3GjkEpaSolver2*	m_penetrationDepthSolver;
-	b3VoronoiSimplexSolver* m_simplexSolver;
-
-
-	bool		m_ignoreMargin;
-	b3Scalar	m_cachedSeparatingDistance;
-	
-
-public:
-
-	//some debugging to fix degeneracy problems
-	int			m_lastUsedMethod;
-	int			m_curIter;
-	int			m_degenerateSimplex;
-	int			m_catchDegeneracies;
-	int			m_fixContactNormalDirection;
-
-	b3GjkPairDetector(b3VoronoiSimplexSolver* simplexSolver,b3GjkEpaSolver2*	penetrationDepthSolver);
-	
-	virtual ~b3GjkPairDetector() {};
-
-	
-	//void	getClosestPoints(,Result& output);
-	
-	void setCachedSeperatingAxis(const b3Vector3& seperatingAxis)
-	{
-		m_cachedSeparatingAxis = seperatingAxis;
-	}
-
-	const b3Vector3& getCachedSeparatingAxis() const
-	{
-		return m_cachedSeparatingAxis;
-	}
-	b3Scalar	getCachedSeparatingDistance() const
-	{
-		return m_cachedSeparatingDistance;
-	}
-
-	void	setPenetrationDepthSolver(b3GjkEpaSolver2*	penetrationDepthSolver)
-	{
-		m_penetrationDepthSolver = penetrationDepthSolver;
-	}
-
-	///don't use setIgnoreMargin, it's for Bullet's internal use
-	void	setIgnoreMargin(bool ignoreMargin)
-	{
-		m_ignoreMargin = ignoreMargin;
-	}
-
-
-};
-
-
-bool getClosestPoints(b3GjkPairDetector* gjkDetector, const b3Transform&	transA, const b3Transform&	transB,
-	const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, 
-	const b3AlignedObjectArray<b3Vector3>& verticesA,
-	const b3AlignedObjectArray<b3Vector3>& verticesB,
-	b3Scalar maximumDistanceSquared,
-	b3Vector3& resultSepNormal,
-	float& resultSepDistance,
-	b3Vector3& resultPointOnB);
-
-#endif //B3_GJK_PAIR_DETECTOR_H