static const char* prefixScanKernelsDX11= \ "/*\n" " 2011 Takahiro Harada\n" "*/\n" "\n" "typedef uint u32;\n" "\n" "#define GET_GROUP_IDX groupIdx.x\n" "#define GET_LOCAL_IDX localIdx.x\n" "#define GET_GLOBAL_IDX globalIdx.x\n" "#define GROUP_LDS_BARRIER GroupMemoryBarrierWithGroupSync()\n" "\n" "// takahiro end\n" "#define WG_SIZE 128\n" "\n" "#define GET_GROUP_SIZE WG_SIZE\n" "\n" "\n" "cbuffer SortCB : register( b0 )\n" "{\n" " int m_numElems;\n" " int m_numBlocks;\n" " int m_numScanBlocks;\n" "};\n" " \n" "RWStructuredBuffer dst : register( u0 );\n" "RWStructuredBuffer src : register( u1 );\n" "RWStructuredBuffer sumBuffer : register( u2 );\n" "\n" "\n" "groupshared u32 ldsData[2048];\n" "\n" "u32 ScanExclusive(u32 n, int lIdx, int lSize)\n" "{\n" " u32 blocksum;\n" " int offset = 1;\n" " for(int nActive=n>>1; nActive>0; nActive>>=1, offset<<=1)\n" " {\n" " GROUP_LDS_BARRIER;\n" " for(int iIdx=lIdx; iIdx>= 1;\n" " for(int nActive=1; nActive>=1 )\n" " {\n" " GROUP_LDS_BARRIER;\n" " for( int iIdx = lIdx; iIdx blockSum2 : register( u1 );\n" "\n" "[numthreads(WG_SIZE, 1, 1)]\n" "void AddOffsetKernel(uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID)\n" "{\n" " const u32 blockSize = WG_SIZE*2;\n" "\n" " int myIdx = GET_GROUP_IDX+1;\n" " int llIdx = GET_LOCAL_IDX;\n" "\n" " u32 iBlockSum = blockSum2[myIdx];\n" "\n" " int endValue = min((myIdx+1)*(blockSize), m_numElems);\n" " for(int i=myIdx*blockSize+llIdx; i