Updated CDTestFramework with the OPCODE Array SAP test.
Thanks Pierre Terdiman for the latest update.
This commit is contained in:
@@ -1,19 +1,3 @@
|
||||
/*
|
||||
* ICE / OPCODE - Optimized Collision Detection
|
||||
* http://www.codercorner.com/Opcode.htm
|
||||
*
|
||||
* Copyright (c) 2001-2008 Pierre Terdiman, pierre@codercorner.com
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from the use of this software.
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**
|
||||
* Contains source code from the article "Radix Sort Revisited".
|
||||
@@ -44,12 +28,18 @@ subject to the following restrictions:
|
||||
* - 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting......
|
||||
* - 01.02.02: - "mIndices" renamed => "mRanks". That's a rank sorter after all.
|
||||
* - ranks are not "reset" anymore, but implicit on first calls
|
||||
* - 07.05.02: - offsets rewritten with one less indirection.
|
||||
* - 11.03.02: - "bool" replaced with RadixHint enum
|
||||
* - 07.05.02: offsets rewritten with one less indirection.
|
||||
* - 11.03.02: "bool" replaced with RadixHint enum
|
||||
* - 07.15.04: stack-based radix added
|
||||
* - we want to use the radix sort but without making it static, and without allocating anything.
|
||||
* - we internally allocate two arrays of ranks. Each of them has N udwords to sort N values.
|
||||
* - 1Mb/2/sizeof(udword) = 131072 values max, at the same time.
|
||||
* - 09.22.04: - adapted to MacOS by Chris Lamb
|
||||
* - 01.12.06: - added optimizations suggested by Kyle Hubert
|
||||
*
|
||||
* \class RadixSort
|
||||
* \author Pierre Terdiman
|
||||
* \version 1.4
|
||||
* \version 1.5
|
||||
* \date August, 15, 1998
|
||||
*/
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -58,14 +48,13 @@ subject to the following restrictions:
|
||||
To do:
|
||||
- add an offset parameter between two input values (avoid some data recopy sometimes)
|
||||
- unroll ? asm ?
|
||||
- 11 bits trick & 3 passes as Michael did
|
||||
- prefetch stuff the day I have a P3
|
||||
- make a version with 16-bits indices ?
|
||||
*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Precompiled Header
|
||||
#include "Stdafx.h"
|
||||
#include "StdAfx.h"
|
||||
|
||||
using namespace IceCore;
|
||||
|
||||
@@ -82,17 +71,31 @@ using namespace IceCore;
|
||||
mPreviousSize = n; \
|
||||
}
|
||||
|
||||
#if defined(__APPLE__) || defined(_XBOX)
|
||||
#define H0_OFFSET 768
|
||||
#define H1_OFFSET 512
|
||||
#define H2_OFFSET 256
|
||||
#define H3_OFFSET 0
|
||||
#define BYTES_INC (3-j)
|
||||
#else
|
||||
#define H0_OFFSET 0
|
||||
#define H1_OFFSET 256
|
||||
#define H2_OFFSET 512
|
||||
#define H3_OFFSET 768
|
||||
#define BYTES_INC j
|
||||
#endif
|
||||
|
||||
#define CREATE_HISTOGRAMS(type, buffer) \
|
||||
/* Clear counters/histograms */ \
|
||||
ZeroMemory(mHistogram, 256*4*sizeof(udword)); \
|
||||
\
|
||||
/* Prepare to count */ \
|
||||
ubyte* p = (ubyte*)input; \
|
||||
ubyte* pe = &p[nb*4]; \
|
||||
udword* h0= &mHistogram[0]; /* Histogram for first pass (LSB) */ \
|
||||
udword* h1= &mHistogram[256]; /* Histogram for second pass */ \
|
||||
udword* h2= &mHistogram[512]; /* Histogram for third pass */ \
|
||||
udword* h3= &mHistogram[768]; /* Histogram for last pass (MSB) */ \
|
||||
const ubyte* p = (const ubyte*)input; \
|
||||
const ubyte* pe = &p[nb*4]; \
|
||||
udword* h0= &mHistogram[H0_OFFSET]; /* Histogram for first pass (LSB) */ \
|
||||
udword* h1= &mHistogram[H1_OFFSET]; /* Histogram for second pass */ \
|
||||
udword* h2= &mHistogram[H2_OFFSET]; /* Histogram for third pass */ \
|
||||
udword* h3= &mHistogram[H3_OFFSET]; /* Histogram for last pass (MSB) */ \
|
||||
\
|
||||
bool AlreadySorted = true; /* Optimism... */ \
|
||||
\
|
||||
@@ -128,7 +131,7 @@ using namespace IceCore;
|
||||
else \
|
||||
{ \
|
||||
/* Prepare for temporal coherence */ \
|
||||
udword* Indices = mRanks; \
|
||||
const udword* Indices = mRanks; \
|
||||
type PrevVal = (type)buffer[*Indices]; \
|
||||
\
|
||||
while(p!=pe) \
|
||||
@@ -159,7 +162,7 @@ using namespace IceCore;
|
||||
|
||||
#define CHECK_PASS_VALIDITY(pass) \
|
||||
/* Shortcut to current counters */ \
|
||||
udword* CurCount = &mHistogram[pass<<8]; \
|
||||
const udword* CurCount = &mHistogram[pass<<8]; \
|
||||
\
|
||||
/* Reset flag. The sorting pass is supposed to be performed. (default) */ \
|
||||
bool PerformPass = true; \
|
||||
@@ -183,12 +186,12 @@ using namespace IceCore;
|
||||
* Constructor.
|
||||
*/
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
RadixSort::RadixSort() : mRanks(null), mRanks2(null), mCurrentSize(0), mTotalCalls(0), mNbHits(0)
|
||||
RadixSort::RadixSort() : mRanks(null), mRanks2(null), mCurrentSize(0), mTotalCalls(0), mNbHits(0), mDeleteRanks(true)
|
||||
{
|
||||
#ifndef RADIX_LOCAL_RAM
|
||||
// Allocate input-independent ram
|
||||
mHistogram = new udword[256*4];
|
||||
mOffset = new udword[256];
|
||||
mHistogram = ICE_ALLOC(sizeof(udword)*256*4);
|
||||
mOffset = ICE_ALLOC(sizeof(udword)*256);
|
||||
#endif
|
||||
// Initialize indices
|
||||
INVALIDATE_RANKS;
|
||||
@@ -203,11 +206,14 @@ RadixSort::~RadixSort()
|
||||
{
|
||||
// Release everything
|
||||
#ifndef RADIX_LOCAL_RAM
|
||||
DELETEARRAY(mOffset);
|
||||
DELETEARRAY(mHistogram);
|
||||
ICE_FREE(mOffset);
|
||||
ICE_FREE(mHistogram);
|
||||
#endif
|
||||
DELETEARRAY(mRanks2);
|
||||
DELETEARRAY(mRanks);
|
||||
if(mDeleteRanks)
|
||||
{
|
||||
ICE_FREE(mRanks2);
|
||||
ICE_FREE(mRanks);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -219,14 +225,16 @@ RadixSort::~RadixSort()
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
bool RadixSort::Resize(udword nb)
|
||||
{
|
||||
// Free previously used ram
|
||||
DELETEARRAY(mRanks2);
|
||||
DELETEARRAY(mRanks);
|
||||
|
||||
// Get some fresh one
|
||||
mRanks = new udword[nb]; CHECKALLOC(mRanks);
|
||||
mRanks2 = new udword[nb]; CHECKALLOC(mRanks2);
|
||||
if(mDeleteRanks)
|
||||
{
|
||||
// Free previously used ram
|
||||
ICE_FREE(mRanks2);
|
||||
ICE_FREE(mRanks);
|
||||
|
||||
// Get some fresh one
|
||||
mRanks = (udword*)ICE_ALLOC(sizeof(udword)*nb); CHECKALLOC(mRanks);
|
||||
mRanks2 = (udword*)ICE_ALLOC(sizeof(udword)*nb); CHECKALLOC(mRanks2);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -276,7 +284,7 @@ RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint)
|
||||
// have 2 code paths even if just a single opcode changes. Self-modifying code, someone?
|
||||
if(hint==RADIX_UNSIGNED) { CREATE_HISTOGRAMS(udword, input); }
|
||||
else { CREATE_HISTOGRAMS(sdword, input); }
|
||||
|
||||
/*
|
||||
// Compute #negative values involved if needed
|
||||
udword NbNegativeValues = 0;
|
||||
if(hint==RADIX_SIGNED)
|
||||
@@ -287,7 +295,7 @@ RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint)
|
||||
udword* h3= &mHistogram[768];
|
||||
for(udword i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part
|
||||
}
|
||||
|
||||
*/
|
||||
// Radix sort, j is the pass number (0=LSB, 3=MSB)
|
||||
for(udword j=0;j<4;j++)
|
||||
{
|
||||
@@ -311,23 +319,38 @@ RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint)
|
||||
else
|
||||
{
|
||||
// This is a special case to correctly handle negative integers. They're sorted in the right order but at the wrong place.
|
||||
|
||||
/*
|
||||
// Create biased offsets, in order for negative numbers to be sorted as well
|
||||
// mOffset[0] = NbNegativeValues; // First positive number takes place after the negative ones
|
||||
mLink[0] = &mRanks2[NbNegativeValues]; // First positive number takes place after the negative ones
|
||||
// for(udword i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
|
||||
mLink[0] = &mRanks2[NbNegativeValues]; // First positive number takes place after the negative ones
|
||||
for(udword i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
|
||||
|
||||
// Fixing the wrong place for negative values
|
||||
// mOffset[128] = 0;
|
||||
mLink[128] = mRanks2;
|
||||
// for(i=129;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1];
|
||||
mLink[128] = mRanks2;
|
||||
for(udword i=129;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
|
||||
*/
|
||||
|
||||
// From Kyle Hubert:
|
||||
|
||||
//mOffset[128] = 0;
|
||||
mLink[128] = mRanks2;
|
||||
//for(i=129;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1];
|
||||
for(udword i=129;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
|
||||
|
||||
//mOffset[0] = mOffset[255] + CurCount[255];
|
||||
mLink[0] = mLink[255] + CurCount[255];
|
||||
//for(i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1];
|
||||
for(udword i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
|
||||
|
||||
|
||||
}
|
||||
|
||||
// Perform Radix Sort
|
||||
ubyte* InputBytes = (ubyte*)input;
|
||||
InputBytes += j;
|
||||
const ubyte* InputBytes = (const ubyte*)input;
|
||||
InputBytes += BYTES_INC;
|
||||
if(INVALID_RANKS)
|
||||
{
|
||||
// for(udword i=0;i<nb;i++) mRanks2[mOffset[InputBytes[i<<2]]++] = i;
|
||||
@@ -336,8 +359,8 @@ RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint)
|
||||
}
|
||||
else
|
||||
{
|
||||
udword* Indices = mRanks;
|
||||
udword* IndicesEnd = &mRanks[nb];
|
||||
const udword* Indices = mRanks;
|
||||
const udword* IndicesEnd = &mRanks[nb];
|
||||
while(Indices!=IndicesEnd)
|
||||
{
|
||||
udword id = *Indices++;
|
||||
@@ -347,7 +370,9 @@ RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint)
|
||||
}
|
||||
|
||||
// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
|
||||
udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp;
|
||||
udword* Tmp = mRanks;
|
||||
mRanks = mRanks2;
|
||||
mRanks2 = Tmp;
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
@@ -371,7 +396,7 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
|
||||
// Stats
|
||||
mTotalCalls++;
|
||||
|
||||
udword* input = (udword*)input2;
|
||||
const udword* input = (const udword*)input2;
|
||||
|
||||
// Resize lists if needed
|
||||
CheckResize(nb);
|
||||
@@ -392,15 +417,16 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
|
||||
// generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
|
||||
// wouldn't work with mixed positive/negative values....
|
||||
{ CREATE_HISTOGRAMS(float, input2); }
|
||||
|
||||
/*
|
||||
// Compute #negative values involved if needed
|
||||
udword NbNegativeValues = 0;
|
||||
// An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
|
||||
// last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
|
||||
// responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
|
||||
// ### is that ok on Apple ?!
|
||||
udword* h3= &mHistogram[768];
|
||||
for(udword i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part
|
||||
|
||||
*/
|
||||
// Radix sort, j is the pass number (0=LSB, 3=MSB)
|
||||
for(udword j=0;j<4;j++)
|
||||
{
|
||||
@@ -419,8 +445,8 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
|
||||
for(udword i=1;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
|
||||
|
||||
// Perform Radix Sort
|
||||
ubyte* InputBytes = (ubyte*)input;
|
||||
InputBytes += j;
|
||||
const ubyte* InputBytes = (const ubyte*)input;
|
||||
InputBytes += BYTES_INC;
|
||||
if(INVALID_RANKS)
|
||||
{
|
||||
// for(i=0;i<nb;i++) mRanks2[mOffset[InputBytes[i<<2]]++] = i;
|
||||
@@ -429,8 +455,8 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
|
||||
}
|
||||
else
|
||||
{
|
||||
udword* Indices = mRanks;
|
||||
udword* IndicesEnd = &mRanks[nb];
|
||||
const udword* Indices = mRanks;
|
||||
const udword* IndicesEnd = &mRanks[nb];
|
||||
while(Indices!=IndicesEnd)
|
||||
{
|
||||
udword id = *Indices++;
|
||||
@@ -440,7 +466,9 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
|
||||
}
|
||||
|
||||
// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
|
||||
udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp;
|
||||
udword* Tmp = mRanks;
|
||||
mRanks = mRanks2;
|
||||
mRanks2 = Tmp;
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -450,19 +478,32 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
|
||||
|
||||
if(PerformPass)
|
||||
{
|
||||
/*
|
||||
// Create biased offsets, in order for negative numbers to be sorted as well
|
||||
// mOffset[0] = NbNegativeValues; // First positive number takes place after the negative ones
|
||||
mLink[0] = &mRanks2[NbNegativeValues]; // First positive number takes place after the negative ones
|
||||
// for(udword i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
|
||||
mLink[0] = &mRanks2[NbNegativeValues]; // First positive number takes place after the negative ones
|
||||
for(udword i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
|
||||
|
||||
// We must reverse the sorting order for negative numbers!
|
||||
// mOffset[255] = 0;
|
||||
mLink[255] = mRanks2;
|
||||
// for(i=0;i<127;i++) mOffset[254-i] = mOffset[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values
|
||||
for(udword i=0;i<127;i++) mLink[254-i] = mLink[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values
|
||||
// for(i=128;i<256;i++) mOffset[i] += CurCount[i]; // Fixing the wrong place for negative values
|
||||
mLink[255] = mRanks2;
|
||||
for(udword i=0;i<127;i++) mLink[254-i] = mLink[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values
|
||||
for(udword i=128;i<256;i++) mLink[i] += CurCount[i]; // Fixing the wrong place for negative values
|
||||
*/
|
||||
|
||||
// From Kyle Hubert:
|
||||
|
||||
//mOffset[255] = CurCount[255];
|
||||
mLink[255] = mRanks2 + CurCount[255];
|
||||
//for(udword i=254;i>127;i--) mOffset[i] = mOffset[i+1] + CurCount[i];
|
||||
for(udword i=254;i>127;i--) mLink[i] = mLink[i+1] + CurCount[i];
|
||||
//mOffset[0] = mOffset[128] + CurCount[128];
|
||||
mLink[0] = mLink[128] + CurCount[128];
|
||||
//for(udword i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1];
|
||||
for(udword i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
|
||||
|
||||
// Perform Radix Sort
|
||||
if(INVALID_RANKS)
|
||||
@@ -491,7 +532,9 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
|
||||
}
|
||||
}
|
||||
// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
|
||||
udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp;
|
||||
udword* Tmp = mRanks;
|
||||
mRanks = mRanks2;
|
||||
mRanks2 = Tmp;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -510,7 +553,9 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
|
||||
}
|
||||
|
||||
// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
|
||||
udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp;
|
||||
udword* Tmp = mRanks;
|
||||
mRanks = mRanks2;
|
||||
mRanks2 = Tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -534,3 +579,14 @@ udword RadixSort::GetUsedRam() const
|
||||
UsedRam += 2*CURRENT_SIZE*sizeof(udword); // 2 lists of indices
|
||||
return UsedRam;
|
||||
}
|
||||
|
||||
bool RadixSort::SetRankBuffers(udword* ranks0, udword* ranks1)
|
||||
{
|
||||
if(!ranks0 || !ranks1) return false;
|
||||
|
||||
mRanks = ranks0;
|
||||
mRanks2 = ranks1;
|
||||
mDeleteRanks = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user