template void run( Device* device, int minSize = 512, int maxSize = 64*1024 )//, int increment = 512 ) { ADLASSERT( TYPE == device->m_type ); Stopwatch sw( device ); // RadixSort::Data* data0 = RadixSort::allocate( device, maxSize, RadixSortBase::SORT_SIMPLE ); RadixSort::Data* data0 = RadixSort::allocate( device, maxSize, RadixSortBase::SORT_STANDARD ); RadixSort::Data* data1 = RadixSort::allocate( device, maxSize, RadixSortBase::SORT_STANDARD ); RadixSort::Data* data2 = RadixSort::allocate( device, maxSize, RadixSortBase::SORT_ADVANCED ); Buffer buf0( device, maxSize ); Buffer buf1( device, maxSize ); Buffer buf2( device, maxSize ); SortData* input = new SortData[ maxSize ]; // for(int iter = minSize; iter<=maxSize; iter+=increment) for(int iter = minSize; iter<=maxSize; iter*=2) { int size = NEXTMULTIPLEOF( iter, 512 ); for(int i=0; i::execute( data0, buf0, size ); sw.split(); RadixSort::execute( data1, buf1, size ); sw.split(); RadixSort::execute( data2, buf2, size ); sw.stop(); float t[3]; sw.getMs( t, 3 ); // printf(" %d %3.2f %3.2f %3.2f\n", size, t[0], t[1], t[2]); printf(" %d %3.2f %3.2f\n", size, t[1], t[2]); } RadixSort::deallocate( data0 ); RadixSort::deallocate( data1 ); RadixSort::deallocate( data2 ); delete [] input; } template void run32( Device* device, int size ) { //Cayman: 4194.30Keys: 373.05MKeys/s //Cypress: 4194.30Keys: 315.13MKeys/s ADLASSERT( TYPE == device->m_type ); Stopwatch sw( device ); RadixSort32::Data* data = RadixSort32::allocate( device, size ); Copy::Data* copyData = Copy::allocate( device ); Buffer inputMaster( device, size ); Buffer input( device, size ); Buffer output( device, size ); { u32* host = new u32[size]; for(int i=0; i::execute( copyData, (Buffer&)input, (Buffer&)inputMaster, size ); // RadixSort32::execute( data, input, size ); RadixSort32::execute( data, input, output, size ); } sw.stop(); { float tInS = sw.getMs()/1000.f/(float)nIter; float mKeysPerS = size/1000.f/1000.f/tInS; printf("%3.2fMKeys: %3.2fMKeys/s\n", size/1000.f, mKeysPerS); } RadixSort32::deallocate( data ); Copy::deallocate( copyData ); } template void radixSortBenchmark() { Device* device; { DeviceUtils::Config cfg; device = DeviceUtils::allocate( TYPE, cfg ); } run32( device, 256*1024*8*2 ); // run32( device, 256*20*6 ); // run( device, 512, 1024*128*4 ); DeviceUtils::deallocate( device ); }