/* ----------------------------------------------------------------------------- This source file is part of OGRE (Object-oriented Graphics Rendering Engine) For the latest info, see http://www.ogre3d.org/ Copyright (c) 2000-2009 Torus Knot Software Ltd Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ----------------------------------------------------------------------------- */ #ifndef __RadixSort_H__ #define __RadixSort_H__ #include "OgrePrerequisites.h" namespace Ogre { /** \addtogroup Core * @{ */ /** \addtogroup General * @{ */ /** Class for performing a radix sort (fast comparison-less sort based on byte value) on various standard STL containers. @remarks A radix sort is a very fast sort algorithm. It doesn't use comparisons and thus is able to break the theoretical minimum O(N*logN) complexity. Radix sort is complexity O(k*N), where k is a constant. Note that radix sorting is not in-place, it requires additional storage, so it trades memory for speed. The overhead of copying means that it is only faster for fairly large datasets, so you are advised to only use it for collections of at least a few hundred items. @par This is a template class to allow it to deal with a variety of containers, and a variety of value types to sort on. In addition to providing the container and value type on construction, you also need to supply a functor object which will retrieve the value to compare on for each item in the list. For example, if you had an std::vector of by-value instances of an object of class 'Bibble', and you wanted to sort on Bibble::getDoobrie(), you'd have to firstly create a functor like this: @code struct BibbleSortFunctor { float operator()(const Bibble& val) const { return val.getDoobrie(); } } @endcode Then, you need to declare a RadixSort class which names the container type, the value type in the container, and the type of the value you want to sort by. You can then call the sort function. E.g. @code RadixSort radixSorter; BibbleSortFunctor functor; radixSorter.sort(myBibbleList, functor); @endcode You should try to reuse RadixSort instances, since repeated allocation of the internal storage is then avoided. @note Radix sorting is often associated with just unsigned integer values. Our implementation can handle both unsigned and signed integers, as well as floats (which are often not supported by other radix sorters). doubles are not supported; you will need to implement your functor object to convert to float if you wish to use this sort routine. */ template class RadixSort { public: typedef typename TContainer::iterator ContainerIter; protected: /// Alpha-pass counters of values (histogram) /// 4 of them so we can radix sort a maximum of a 32bit value int mCounters[4][256]; /// Beta-pass offsets int mOffsets[256]; /// Sort area size int mSortSize; /// Number of passes for this type int mNumPasses; struct SortEntry { TCompValueType key; ContainerIter iter; SortEntry() {} SortEntry(TCompValueType k, ContainerIter it) : key(k), iter(it) {} }; /// Temp sort storage typedef std::vector > SortVector; SortVector mSortArea1; SortVector mSortArea2; SortVector* mSrc; SortVector* mDest; TContainer mTmpContainer; // initial copy void sortPass(int byteIndex) { // Calculate offsets // Basically this just leaves gaps for duplicate entries to fill mOffsets[0] = 0; for (int i = 1; i < 256; ++i) { mOffsets[i] = mOffsets[i-1] + mCounters[byteIndex][i-1]; } // Sort pass for (int i = 0; i < mSortSize; ++i) { unsigned char byteVal = getByte(byteIndex, (*mSrc)[i].key); (*mDest)[mOffsets[byteVal]++] = (*mSrc)[i]; } } template void finalPass(int byteIndex, T val) { // default is to do normal pass sortPass(byteIndex); } // special case signed int void finalPass(int byteIndex, int val) { int numNeg = 0; // all negative values are in entries 128+ in most significant byte for (int i = 128; i < 256; ++i) { numNeg += mCounters[byteIndex][i]; } // Calculate offsets - positive ones start at the number of negatives // do positive numbers mOffsets[0] = numNeg; for (int i = 1; i < 128; ++i) { mOffsets[i] = mOffsets[i-1] + mCounters[byteIndex][i-1]; } // Do negative numbers (must start at zero) // No need to invert ordering, already correct (-1 is highest number) mOffsets[128] = 0; for (int i = 129; i < 256; ++i) { mOffsets[i] = mOffsets[i-1] + mCounters[byteIndex][i-1]; } // Sort pass for (int i = 0; i < mSortSize; ++i) { unsigned char byteVal = getByte(byteIndex, (*mSrc)[i].key); (*mDest)[mOffsets[byteVal]++] = (*mSrc)[i]; } } // special case float void finalPass(int byteIndex, float val) { // floats need to be special cased since negative numbers will come // after positives (high bit = sign) and will be in reverse order // (no ones-complement of the +ve value) int numNeg = 0; // all negative values are in entries 128+ in most significant byte for (int i = 128; i < 256; ++i) { numNeg += mCounters[byteIndex][i]; } // Calculate offsets - positive ones start at the number of negatives // do positive numbers normally mOffsets[0] = numNeg; for (int i = 1; i < 128; ++i) { mOffsets[i] = mOffsets[i-1] + mCounters[byteIndex][i-1]; } // Do negative numbers (must start at zero) // Also need to invert ordering // In order to preserve the stability of the sort (essential since // we rely on previous bytes already being sorted) we have to count // backwards in our offsets from mOffsets[255] = mCounters[byteIndex][255]; for (int i = 254; i > 127; --i) { mOffsets[i] = mOffsets[i+1] + mCounters[byteIndex][i]; } // Sort pass for (int i = 0; i < mSortSize; ++i) { unsigned char byteVal = getByte(byteIndex, (*mSrc)[i].key); if (byteVal > 127) { // -ve; pre-decrement since offsets set to count (*mDest)[--mOffsets[byteVal]] = (*mSrc)[i]; } else { // +ve (*mDest)[mOffsets[byteVal]++] = (*mSrc)[i]; } } } inline unsigned char getByte(int byteIndex, TCompValueType val) { #if OGRE_ENDIAN == OGRE_ENDIAN_LITTLE return ((unsigned char*)(&val))[byteIndex]; #else return ((unsigned char*)(&val))[mNumPasses - byteIndex - 1]; #endif } public: RadixSort() {} ~RadixSort() {} /** Main sort function @param container A container of the type you declared when declaring @param func A functor which returns the value for comparison when given a container value */ template void sort(TContainer& container, TFunction func) { if (container.empty()) return; // Set up the sort areas mSortSize = static_cast(container.size()); mSortArea1.resize(container.size()); mSortArea2.resize(container.size()); // Copy data now (we need constant iterators for sorting) mTmpContainer = container; mNumPasses = sizeof(TCompValueType); // Counter pass // Initialise the counts int p; for (p = 0; p < mNumPasses; ++p) memset(mCounters[p], 0, sizeof(int) * 256); // Perform alpha pass to count ContainerIter i = mTmpContainer.begin(); TCompValueType prevValue = func.operator()(*i); bool needsSorting = false; for (int u = 0; i != mTmpContainer.end(); ++i, ++u) { // get sort value TCompValueType val = func.operator()(*i); // cheap check to see if needs sorting (temporal coherence) if (!needsSorting && val < prevValue) needsSorting = true; // Create a sort entry mSortArea1[u].key = val; mSortArea1[u].iter = i; // increase counters for (p = 0; p < mNumPasses; ++p) { unsigned char byteVal = getByte(p, val); mCounters[p][byteVal]++; } prevValue = val; } // early exit if already sorted if (!needsSorting) return; // Sort passes mSrc = &mSortArea1; mDest = &mSortArea2; for (p = 0; p < mNumPasses - 1; ++p) { sortPass(p); // flip src/dst SortVector* tmp = mSrc; mSrc = mDest; mDest = tmp; } // Final pass may differ, make polymorphic finalPass(p, prevValue); // Copy everything back int c = 0; for (i = container.begin(); i != container.end(); ++i, ++c) { *i = *((*mDest)[c].iter); } } }; /** @} */ /** @} */ } #endif