Updated CDTestFramework with the OPCODE Array SAP test.

Thanks Pierre Terdiman for the latest update.
This commit is contained in:
erwin.coumans
2008-09-01 18:46:57 +00:00
parent f655eff89f
commit 932de57d4c
41 changed files with 6385 additions and 410 deletions

View File

@@ -1,19 +1,3 @@
/*
* ICE / OPCODE - Optimized Collision Detection
* http://www.codercorner.com/Opcode.htm
*
* Copyright (c) 2001-2008 Pierre Terdiman, pierre@codercorner.com
This software is provided 'as-is', without any express or implied warranty.
In no event will the authors be held liable for any damages arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it freely,
subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Contains source code from the article "Radix Sort Revisited".
@@ -44,12 +28,18 @@ subject to the following restrictions:
* - 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting......
* - 01.02.02: - "mIndices" renamed => "mRanks". That's a rank sorter after all.
* - ranks are not "reset" anymore, but implicit on first calls
* - 07.05.02: - offsets rewritten with one less indirection.
* - 11.03.02: - "bool" replaced with RadixHint enum
* - 07.05.02: offsets rewritten with one less indirection.
* - 11.03.02: "bool" replaced with RadixHint enum
* - 07.15.04: stack-based radix added
* - we want to use the radix sort but without making it static, and without allocating anything.
* - we internally allocate two arrays of ranks. Each of them has N udwords to sort N values.
* - 1Mb/2/sizeof(udword) = 131072 values max, at the same time.
* - 09.22.04: - adapted to MacOS by Chris Lamb
* - 01.12.06: - added optimizations suggested by Kyle Hubert
*
* \class RadixSort
* \author Pierre Terdiman
* \version 1.4
* \version 1.5
* \date August, 15, 1998
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -58,14 +48,13 @@ subject to the following restrictions:
To do:
- add an offset parameter between two input values (avoid some data recopy sometimes)
- unroll ? asm ?
- 11 bits trick & 3 passes as Michael did
- prefetch stuff the day I have a P3
- make a version with 16-bits indices ?
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Precompiled Header
#include "Stdafx.h"
#include "StdAfx.h"
using namespace IceCore;
@@ -82,17 +71,31 @@ using namespace IceCore;
mPreviousSize = n; \
}
#if defined(__APPLE__) || defined(_XBOX)
#define H0_OFFSET 768
#define H1_OFFSET 512
#define H2_OFFSET 256
#define H3_OFFSET 0
#define BYTES_INC (3-j)
#else
#define H0_OFFSET 0
#define H1_OFFSET 256
#define H2_OFFSET 512
#define H3_OFFSET 768
#define BYTES_INC j
#endif
#define CREATE_HISTOGRAMS(type, buffer) \
/* Clear counters/histograms */ \
ZeroMemory(mHistogram, 256*4*sizeof(udword)); \
\
/* Prepare to count */ \
ubyte* p = (ubyte*)input; \
ubyte* pe = &p[nb*4]; \
udword* h0= &mHistogram[0]; /* Histogram for first pass (LSB) */ \
udword* h1= &mHistogram[256]; /* Histogram for second pass */ \
udword* h2= &mHistogram[512]; /* Histogram for third pass */ \
udword* h3= &mHistogram[768]; /* Histogram for last pass (MSB) */ \
const ubyte* p = (const ubyte*)input; \
const ubyte* pe = &p[nb*4]; \
udword* h0= &mHistogram[H0_OFFSET]; /* Histogram for first pass (LSB) */ \
udword* h1= &mHistogram[H1_OFFSET]; /* Histogram for second pass */ \
udword* h2= &mHistogram[H2_OFFSET]; /* Histogram for third pass */ \
udword* h3= &mHistogram[H3_OFFSET]; /* Histogram for last pass (MSB) */ \
\
bool AlreadySorted = true; /* Optimism... */ \
\
@@ -128,7 +131,7 @@ using namespace IceCore;
else \
{ \
/* Prepare for temporal coherence */ \
udword* Indices = mRanks; \
const udword* Indices = mRanks; \
type PrevVal = (type)buffer[*Indices]; \
\
while(p!=pe) \
@@ -159,7 +162,7 @@ using namespace IceCore;
#define CHECK_PASS_VALIDITY(pass) \
/* Shortcut to current counters */ \
udword* CurCount = &mHistogram[pass<<8]; \
const udword* CurCount = &mHistogram[pass<<8]; \
\
/* Reset flag. The sorting pass is supposed to be performed. (default) */ \
bool PerformPass = true; \
@@ -183,12 +186,12 @@ using namespace IceCore;
* Constructor.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
RadixSort::RadixSort() : mRanks(null), mRanks2(null), mCurrentSize(0), mTotalCalls(0), mNbHits(0)
RadixSort::RadixSort() : mRanks(null), mRanks2(null), mCurrentSize(0), mTotalCalls(0), mNbHits(0), mDeleteRanks(true)
{
#ifndef RADIX_LOCAL_RAM
// Allocate input-independent ram
mHistogram = new udword[256*4];
mOffset = new udword[256];
mHistogram = ICE_ALLOC(sizeof(udword)*256*4);
mOffset = ICE_ALLOC(sizeof(udword)*256);
#endif
// Initialize indices
INVALIDATE_RANKS;
@@ -203,11 +206,14 @@ RadixSort::~RadixSort()
{
// Release everything
#ifndef RADIX_LOCAL_RAM
DELETEARRAY(mOffset);
DELETEARRAY(mHistogram);
ICE_FREE(mOffset);
ICE_FREE(mHistogram);
#endif
DELETEARRAY(mRanks2);
DELETEARRAY(mRanks);
if(mDeleteRanks)
{
ICE_FREE(mRanks2);
ICE_FREE(mRanks);
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -219,14 +225,16 @@ RadixSort::~RadixSort()
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool RadixSort::Resize(udword nb)
{
// Free previously used ram
DELETEARRAY(mRanks2);
DELETEARRAY(mRanks);
// Get some fresh one
mRanks = new udword[nb]; CHECKALLOC(mRanks);
mRanks2 = new udword[nb]; CHECKALLOC(mRanks2);
if(mDeleteRanks)
{
// Free previously used ram
ICE_FREE(mRanks2);
ICE_FREE(mRanks);
// Get some fresh one
mRanks = (udword*)ICE_ALLOC(sizeof(udword)*nb); CHECKALLOC(mRanks);
mRanks2 = (udword*)ICE_ALLOC(sizeof(udword)*nb); CHECKALLOC(mRanks2);
}
return true;
}
@@ -276,7 +284,7 @@ RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint)
// have 2 code paths even if just a single opcode changes. Self-modifying code, someone?
if(hint==RADIX_UNSIGNED) { CREATE_HISTOGRAMS(udword, input); }
else { CREATE_HISTOGRAMS(sdword, input); }
/*
// Compute #negative values involved if needed
udword NbNegativeValues = 0;
if(hint==RADIX_SIGNED)
@@ -287,7 +295,7 @@ RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint)
udword* h3= &mHistogram[768];
for(udword i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part
}
*/
// Radix sort, j is the pass number (0=LSB, 3=MSB)
for(udword j=0;j<4;j++)
{
@@ -311,23 +319,38 @@ RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint)
else
{
// This is a special case to correctly handle negative integers. They're sorted in the right order but at the wrong place.
/*
// Create biased offsets, in order for negative numbers to be sorted as well
// mOffset[0] = NbNegativeValues; // First positive number takes place after the negative ones
mLink[0] = &mRanks2[NbNegativeValues]; // First positive number takes place after the negative ones
// for(udword i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
mLink[0] = &mRanks2[NbNegativeValues]; // First positive number takes place after the negative ones
for(udword i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
// Fixing the wrong place for negative values
// mOffset[128] = 0;
mLink[128] = mRanks2;
// for(i=129;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1];
mLink[128] = mRanks2;
for(udword i=129;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
*/
// From Kyle Hubert:
//mOffset[128] = 0;
mLink[128] = mRanks2;
//for(i=129;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1];
for(udword i=129;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
//mOffset[0] = mOffset[255] + CurCount[255];
mLink[0] = mLink[255] + CurCount[255];
//for(i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1];
for(udword i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
}
// Perform Radix Sort
ubyte* InputBytes = (ubyte*)input;
InputBytes += j;
const ubyte* InputBytes = (const ubyte*)input;
InputBytes += BYTES_INC;
if(INVALID_RANKS)
{
// for(udword i=0;i<nb;i++) mRanks2[mOffset[InputBytes[i<<2]]++] = i;
@@ -336,8 +359,8 @@ RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint)
}
else
{
udword* Indices = mRanks;
udword* IndicesEnd = &mRanks[nb];
const udword* Indices = mRanks;
const udword* IndicesEnd = &mRanks[nb];
while(Indices!=IndicesEnd)
{
udword id = *Indices++;
@@ -347,7 +370,9 @@ RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint)
}
// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp;
udword* Tmp = mRanks;
mRanks = mRanks2;
mRanks2 = Tmp;
}
}
return *this;
@@ -371,7 +396,7 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
// Stats
mTotalCalls++;
udword* input = (udword*)input2;
const udword* input = (const udword*)input2;
// Resize lists if needed
CheckResize(nb);
@@ -392,15 +417,16 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
// generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
// wouldn't work with mixed positive/negative values....
{ CREATE_HISTOGRAMS(float, input2); }
/*
// Compute #negative values involved if needed
udword NbNegativeValues = 0;
// An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
// last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
// responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
// ### is that ok on Apple ?!
udword* h3= &mHistogram[768];
for(udword i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part
*/
// Radix sort, j is the pass number (0=LSB, 3=MSB)
for(udword j=0;j<4;j++)
{
@@ -419,8 +445,8 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
for(udword i=1;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
// Perform Radix Sort
ubyte* InputBytes = (ubyte*)input;
InputBytes += j;
const ubyte* InputBytes = (const ubyte*)input;
InputBytes += BYTES_INC;
if(INVALID_RANKS)
{
// for(i=0;i<nb;i++) mRanks2[mOffset[InputBytes[i<<2]]++] = i;
@@ -429,8 +455,8 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
}
else
{
udword* Indices = mRanks;
udword* IndicesEnd = &mRanks[nb];
const udword* Indices = mRanks;
const udword* IndicesEnd = &mRanks[nb];
while(Indices!=IndicesEnd)
{
udword id = *Indices++;
@@ -440,7 +466,9 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
}
// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp;
udword* Tmp = mRanks;
mRanks = mRanks2;
mRanks2 = Tmp;
}
}
else
@@ -450,19 +478,32 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
if(PerformPass)
{
/*
// Create biased offsets, in order for negative numbers to be sorted as well
// mOffset[0] = NbNegativeValues; // First positive number takes place after the negative ones
mLink[0] = &mRanks2[NbNegativeValues]; // First positive number takes place after the negative ones
// for(udword i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
mLink[0] = &mRanks2[NbNegativeValues]; // First positive number takes place after the negative ones
for(udword i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers
// We must reverse the sorting order for negative numbers!
// mOffset[255] = 0;
mLink[255] = mRanks2;
// for(i=0;i<127;i++) mOffset[254-i] = mOffset[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values
for(udword i=0;i<127;i++) mLink[254-i] = mLink[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values
// for(i=128;i<256;i++) mOffset[i] += CurCount[i]; // Fixing the wrong place for negative values
mLink[255] = mRanks2;
for(udword i=0;i<127;i++) mLink[254-i] = mLink[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values
for(udword i=128;i<256;i++) mLink[i] += CurCount[i]; // Fixing the wrong place for negative values
*/
// From Kyle Hubert:
//mOffset[255] = CurCount[255];
mLink[255] = mRanks2 + CurCount[255];
//for(udword i=254;i>127;i--) mOffset[i] = mOffset[i+1] + CurCount[i];
for(udword i=254;i>127;i--) mLink[i] = mLink[i+1] + CurCount[i];
//mOffset[0] = mOffset[128] + CurCount[128];
mLink[0] = mLink[128] + CurCount[128];
//for(udword i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1];
for(udword i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
// Perform Radix Sort
if(INVALID_RANKS)
@@ -491,7 +532,9 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
}
}
// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp;
udword* Tmp = mRanks;
mRanks = mRanks2;
mRanks2 = Tmp;
}
else
{
@@ -510,7 +553,9 @@ RadixSort& RadixSort::Sort(const float* input2, udword nb)
}
// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp;
udword* Tmp = mRanks;
mRanks = mRanks2;
mRanks2 = Tmp;
}
}
}
@@ -534,3 +579,14 @@ udword RadixSort::GetUsedRam() const
UsedRam += 2*CURRENT_SIZE*sizeof(udword); // 2 lists of indices
return UsedRam;
}
bool RadixSort::SetRankBuffers(udword* ranks0, udword* ranks1)
{
if(!ranks0 || !ranks1) return false;
mRanks = ranks0;
mRanks2 = ranks1;
mDeleteRanks = false;
return true;
}