/* Copyright (c) 2012 Advanced Micro Devices, Inc. This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. */ //Originally written by Takahiro Harada class SolverInl { public: typedef SolverBase::ConstraintData ConstraintData; static __forceinline void setLinearAndAngular(const MYF4& n, const MYF4& r0, const MYF4& r1, MYF4& linear, MYF4& angular0, MYF4& angular1) { linear = -n; angular0 = -cross3(r0, n); angular1 = cross3(r1, n); } static __forceinline float calcJacCoeff(const MYF4& linear0, const MYF4& linear1, const MYF4& angular0, const MYF4& angular1, float invMass0, const Matrix3x3& invInertia0, float invMass1, const Matrix3x3& invInertia1) { // linear0,1 are normlized float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0; float jmj1 = dot3F4(mtMul3(angular0,invInertia0), angular0); float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; float jmj3 = dot3F4(mtMul3(angular1,invInertia1), angular1); return -1.f/(jmj0+jmj1+jmj2+jmj3); } static __forceinline float calcRelVel(const MYF4& l0, const MYF4& l1, const MYF4& a0, const MYF4& a1, const MYF4& linVel0, const MYF4& angVel0, const MYF4& linVel1, const MYF4& angVel1) { return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1); } static __forceinline void setConstraint4( const MYF4& posA, const MYF4& linVelA, const MYF4& angVelA, float invMassA, const Matrix3x3& invInertiaA, const MYF4& posB, const MYF4& linVelB, const MYF4& angVelB, float invMassB, const Matrix3x3& invInertiaB, const Contact4& src, const SolverBase::ConstraintCfg& cfg, Constraint4& dstC ) { dstC.m_bodyA = (u32)src.m_bodyAPtr; dstC.m_bodyB = (u32)src.m_bodyBPtr; float dtInv = 1.f/cfg.m_dt; for(int ic=0; ic<4; ic++) { dstC.m_appliedRambdaDt[ic] = 0.f; } dstC.m_fJacCoeffInv[0] = dstC.m_fJacCoeffInv[1] = 0.f; const MYF4& n = src.m_worldNormal; dstC.m_linear = -n; dstC.setFrictionCoeff( src.getFrictionCoeff() ); for(int ic=0; ic<4; ic++) { MYF4 r0 = src.m_worldPos[ic] - posA; MYF4 r1 = src.m_worldPos[ic] - posB; if( ic >= src.getNPoints() ) { dstC.m_jacCoeffInv[ic] = 0.f; continue; } float relVelN; { MYF4 linear, angular0, angular1; setLinearAndAngular(n, r0, r1, linear, angular0, angular1); dstC.m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, invMassA, invInertiaA, invMassB, invInertiaB ); relVelN = calcRelVel(linear, -linear, angular0, angular1, linVelA, angVelA, linVelB, angVelB); float e = src.getRestituitionCoeff(); if( relVelN*relVelN < 0.004f ) e = 0.f; dstC.m_b[ic] = e*relVelN; dstC.m_b[ic] += (src.getPenetration(ic) + cfg.m_positionDrift)*cfg.m_positionConstraintCoeff*dtInv; dstC.m_appliedRambdaDt[ic] = 0.f; } } if( src.getNPoints() > 1 ) { // prepare friction MYF4 center = MAKE_MYF4(0.f); for(int i=0; i 0.95f || (invMassA == 0.f || invMassB == 0.f)) { float angNA = dot3F4( n, angVelA ); float angNB = dot3F4( n, angVelB ); angVelA -= (angNA*0.1f)*n; angVelB -= (angNB*0.1f)*n; } } } template static __inline void solveContact(Constraint4& cs, const MYF4& posA, MYF4& linVelA, MYF4& angVelA, float invMassA, const Matrix3x3& invInertiaA, const MYF4& posB, MYF4& linVelB, MYF4& angVelB, float invMassB, const Matrix3x3& invInertiaB, float maxRambdaDt[4], float minRambdaDt[4]) { MYF4 dLinVelA = MAKE_MYF4(0.f); MYF4 dAngVelA = MAKE_MYF4(0.f); MYF4 dLinVelB = MAKE_MYF4(0.f); MYF4 dAngVelB = MAKE_MYF4(0.f); for(int ic=0; ic<4; ic++) { // dont necessary because this makes change to 0 if( cs.m_jacCoeffInv[ic] == 0.f ) continue; { MYF4 angular0, angular1, linear; MYF4 r0 = cs.m_worldPos[ic] - posA; MYF4 r1 = cs.m_worldPos[ic] - posB; setLinearAndAngular( -cs.m_linear, r0, r1, linear, angular0, angular1 ); float rambdaDt = calcRelVel(cs.m_linear, -cs.m_linear, angular0, angular1, linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic]; rambdaDt *= cs.m_jacCoeffInv[ic]; { float prevSum = cs.m_appliedRambdaDt[ic]; float updated = prevSum; updated += rambdaDt; updated = max2( updated, minRambdaDt[ic] ); updated = min2( updated, maxRambdaDt[ic] ); rambdaDt = updated - prevSum; cs.m_appliedRambdaDt[ic] = updated; } MYF4 linImp0 = invMassA*linear*rambdaDt; MYF4 linImp1 = invMassB*(-linear)*rambdaDt; MYF4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; MYF4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; if( JACOBI ) { dLinVelA += linImp0; dAngVelA += angImp0; dLinVelB += linImp1; dAngVelB += angImp1; } else { linVelA += linImp0; angVelA += angImp0; linVelB += linImp1; angVelB += angImp1; } } } if( JACOBI ) { linVelA += dLinVelA; angVelA += dAngVelA; linVelB += dLinVelB; angVelB += dAngVelB; } } enum { N_SPLIT = SolverBase::N_SPLIT, }; // for parallel solve struct ParallelSolveData { u32 m_n[N_SPLIT*N_SPLIT]; u32 m_offset[N_SPLIT*N_SPLIT]; }; static __inline int sortConstraintByBatch(Contact4* cs, int n, int ignoreIdx, int simdWidth = -1) { SortData* sortData; { BT_PROFILE("new"); sortData = new SortData[n]; } u32* idxBuffer = new u32[n]; u32* idxSrc = idxBuffer; u32* idxDst = idxBuffer; int nIdxSrc, nIdxDst; const int N_FLG = 256; const int FLG_MASK = N_FLG-1; u32 flg[N_FLG/32]; #if defined(_DEBUG) for(int i=0; i sortBuffer; sortBuffer.setRawPtr( deviceHost, sortData, n ); RadixSort::Data* sort = RadixSort::allocate( deviceHost, n ); RadixSort::execute( sort, sortBuffer, n ); RadixSort::deallocate( sort ); } DeviceUtils::deallocate( deviceHost ); } { BT_PROFILE("reorder"); // reorder Contact4* old = new Contact4[n]; memcpy( old, cs, sizeof(Contact4)*n); for(int i=0; i* bodies, const Buffer* shapes, const Buffer* constraints, int start, int nConstraints) : m_bodies( bodies ), m_shapes( shapes ), m_constraints( constraints ), m_start( start ), m_nConstraints( nConstraints ), m_solveFriction( true ){} u16 getType(){ return 0; } void run(int tIdx) { HostBuffer& hBody = *(HostBuffer*)m_bodies; HostBuffer& hShape = *(HostBuffer*)m_shapes; HostBuffer& hc = *(HostBuffer*)m_constraints; for(int ic=0; ic( hc[i], bodyA.m_pos, (MYF4&)bodyA.m_linVel, (MYF4&)bodyA.m_angVel, bodyA.m_invMass, hShape[aIdx].m_invInertia, bodyB.m_pos, (MYF4&)bodyB.m_linVel, (MYF4&)bodyB.m_angVel, bodyB.m_invMass, hShape[bIdx].m_invInertia, maxRambdaDt, minRambdaDt ); } else { float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; float sum = 0; for(int j=0; j<4; j++) { sum +=hc[i].m_appliedRambdaDt[j]; } frictionCoeff = 0.7f; for(int j=0; j<4; j++) { maxRambdaDt[j] = frictionCoeff*sum; minRambdaDt[j] = -maxRambdaDt[j]; } SolverInl::solveFriction( hc[i], bodyA.m_pos, (MYF4&)bodyA.m_linVel, (MYF4&)bodyA.m_angVel, bodyA.m_invMass, hShape[aIdx].m_invInertia, bodyB.m_pos, (MYF4&)bodyB.m_linVel, (MYF4&)bodyB.m_angVel, bodyB.m_invMass, hShape[bIdx].m_invInertia, maxRambdaDt, minRambdaDt ); } } } const Buffer* m_bodies; const Buffer* m_shapes; const Buffer* m_constraints; int m_start; int m_nConstraints; bool m_solveFriction; }; template<> static Solver::Data* Solver::allocate( const Device* device, int pairCapacity ) { Solver::Data* data = new Data; data->m_device = device; data->m_parallelSolveData = 0; return data; } template<> static void Solver::deallocate( Solver::Data* data ) { if( data->m_parallelSolveData ) delete (SolverInl::ParallelSolveData*)data->m_parallelSolveData; delete data; } void sortContacts2( Solver::Data* data, const Buffer* bodyBuf, Buffer* contactsIn, void* additionalData, int nContacts, const Solver::ConstraintCfg& cfg ) { ADLASSERT( data->m_device->m_type == TYPE_HOST ); HostBuffer* bodyNative = (HostBuffer*)BufferUtils::map( data->m_device, bodyBuf ); HostBuffer* contactNative = (HostBuffer*)BufferUtils::map( data->m_device, contactsIn); if( cfg.m_enableParallelSolve ) { ADLASSERT( data->m_parallelSolveData == 0 ); data->m_parallelSolveData = new SolverInl::ParallelSolveData; SolverInl::ParallelSolveData* solveData = (SolverInl::ParallelSolveData*)data->m_parallelSolveData; HostBuffer sortData( data->m_device, nContacts ); { // 2. set cell idx float spacing = adl::SolverBase::N_OBJ_PER_SPLIT*cfg.m_averageExtent; float xScale = 1.f/spacing; for(int i=0; i= 0 && xIdx < adl::SolverBase::N_SPLIT ); ADLASSERT( zIdx >= 0 && zIdx < adl::SolverBase::N_SPLIT ); sortData[i].m_key = (xIdx+zIdx*adl::SolverBase::N_SPLIT); sortData[i].m_value = i; } } { // 3. sort by cell idx RadixSort::Data* sData = RadixSort::allocate( data->m_device, nContacts ); RadixSort::execute( sData, sortData, nContacts ); RadixSort::deallocate( sData ); } { // 4. find entries HostBuffer counts; counts.setRawPtr( data->m_device, solveData->m_n, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); HostBuffer offsets; offsets.setRawPtr( data->m_device, solveData->m_offset, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); { BoundSearch::Data* sData = BoundSearch::allocate( data->m_device ); PrefixScan::Data* pData = PrefixScan::allocate( data->m_device, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); BoundSearch::execute( sData, sortData, nContacts, counts, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT, BoundSearchBase::COUNT ); PrefixScan::execute( pData, counts, offsets, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); BoundSearch::deallocate( sData ); PrefixScan::deallocate( pData ); } #if defined(_DEBUG) { HostBuffer n0( data->m_device, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); HostBuffer offset0( data->m_device, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); for(int i=0; im_ptr, sizeof(Contact4)*nContacts ); for(int i=0; i( bodyNative, bodyBuf ); BufferUtils::unmap( contactNative, contactsIn ); } static void reorderConvertToConstraints2( Solver::Data* data, const Buffer* bodyBuf, const Buffer* shapeBuf, adl::Buffer* contactsIn, SolverData contactCOut, void* additionalData, int nContacts, const Solver::ConstraintCfg& cfg ) { sortContacts2( data, bodyBuf, contactsIn, additionalData, nContacts, cfg ); { SolverInl::ParallelSolveData* solveData = (SolverInl::ParallelSolveData*)data->m_parallelSolveData; Buffer n; n.setRawPtr( data->m_device, solveData->m_n, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); Buffer offsets; offsets.setRawPtr( data->m_device, solveData->m_offset, adl::SolverBase::N_SPLIT*adl::SolverBase::N_SPLIT ); Solver::batchContacts( data, contactsIn, nContacts, &n, &offsets, cfg.m_staticIdx ); printf("hello\n"); } Solver::convertToConstraints( data, bodyBuf, shapeBuf, contactsIn, contactCOut, additionalData, nContacts, cfg ); } template static void solveContactConstraint( Solver::Data* data, const Buffer* bodyBuf, const Buffer* shapeBuf, SolverData constraint, void* additionalData, int n ) { Buffer* bodyNative = BufferUtils::map( data->m_device, bodyBuf ); Buffer* shapeNative = BufferUtils::map( data->m_device, shapeBuf ); Buffer* constraintNative = BufferUtils::map( data->m_device, (const Buffer*)constraint ); for(int iter=0; iterm_nIterations; iter++) { SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ); task.m_solveFriction = false; task.run(0); } for(int iter=0; iterm_nIterations; iter++) { SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ); task.m_solveFriction = true; task.run(0); } BufferUtils::unmap( bodyNative, bodyBuf ); BufferUtils::unmap( shapeNative, shapeBuf ); BufferUtils::unmap( constraintNative, (const Buffer*)constraint ); } #if 0 static int createSolveTasks( int batchIdx, Data* data, const Buffer* bodyBuf, const Buffer* shapeBuf, SolverData constraint, int n, ThreadPool::Task* tasksOut[], int taskCapacity ) { /* ADLASSERT( (N_SPLIT&1) == 0 ); ADLASSERT( batchIdx < N_BATCHES ); ADLASSERT( data->m_device->m_type == TYPE_HOST ); ADLASSERT( data->m_parallelSolveData ); SolverInl::ParallelSolveData* solveData = (SolverInl::ParallelSolveData*)data->m_parallelSolveData; data->m_batchIdx = 0; const int nx = N_SPLIT/2; int nTasksCreated = 0; // for(int ii=0; ii<2; ii++) for(batchIdx=0; batchIdx<4; batchIdx++) { int2 offset = make_int2( batchIdx&1, batchIdx>>1 ); for(int ix=0; ixm_n[cellIdx]; int start = solveData->m_offset[cellIdx]; if( n == 0 ) continue; SolveTask* task = new SolveTask( bodyBuf, shapeBuf, (const Buffer*)constraint, start, n ); // task->m_solveFriction = (ii==0)? false:true; tasksOut[nTasksCreated++] = task; } } return nTasksCreated; */ ADLASSERT(0); return 0; } #endif static void convertToConstraints2( Solver::Data* data, const Buffer* bodyBuf, const Buffer* shapeBuf, Buffer* contactsIn, SolverData contactCOut, void* additionalData, int nContacts, const Solver::ConstraintCfg& cfg ) { ADLASSERT( data->m_device->m_type == TYPE_HOST ); HostBuffer* bodyNative = (HostBuffer*)BufferUtils::map( data->m_device, bodyBuf ); HostBuffer* shapeNative = (HostBuffer*)BufferUtils::map( data->m_device, shapeBuf ); HostBuffer* contactNative = (HostBuffer*)BufferUtils::map( data->m_device, contactsIn ); HostBuffer* constraintNative = (HostBuffer*)BufferUtils::map( data->m_device, (Buffer*)contactCOut ); { #if !defined(_DEBUG) #pragma omp parallel for #endif for(int i=0; i( bodyNative, bodyBuf ); BufferUtils::unmap( shapeNative, shapeBuf ); BufferUtils::unmap( contactNative, contactsIn ); BufferUtils::unmap( constraintNative, (Buffer*)contactCOut ); } static void batchContacts2( Solver::Data* data, Buffer* contacts, int nContacts, Buffer* n, Buffer* offsets, int staticIdx ) { ADLASSERT( data->m_device->m_type == TYPE_HOST ); HostBuffer* contactNative =0; HostBuffer* nNative =0; HostBuffer* offsetsNative =0; int sz = sizeof(Contact4); int sz2 = sizeof(int2); { BT_PROFILE("BufferUtils::map"); contactNative = (HostBuffer*)BufferUtils::map( data->m_device, contacts, nContacts ); } { BT_PROFILE("BufferUtils::map2"); nNative = (HostBuffer*)BufferUtils::map( data->m_device, n ); offsetsNative= (HostBuffer*)BufferUtils::map( data->m_device, offsets ); } { BT_PROFILE("sortConstraintByBatch"); int numNonzeroGrid=0; int maxNumBatches = 0; for(int i=0; im_ptr+offset, n, staticIdx,-1 ); // on GPU maxNumBatches = max(numBatches,maxNumBatches); // SolverInl::sortConstraintByBatch( contactNative->m_ptr+offset, n, staticIdx ); // on CPU } } printf("maxNumBatches = %d\n", maxNumBatches); } { BT_PROFILE("BufferUtils::unmap"); BufferUtils::unmap( contactNative, contacts, nContacts ); } { BT_PROFILE("BufferUtils::unmap2"); BufferUtils::unmap( nNative, n ); BufferUtils::unmap( offsetsNative, offsets ); } }