Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

/ray/src/lib/tl/sort.h

Go to the documentation of this file.
00001 /*
00002  * lib/tl/sort.h 
00003  * 
00004  * Array sorting templates. 
00005  * 
00006  * Copyright (c) 2004 by Wolfgang Wieser ] wwieser (a) gmx <*> de [ 
00007  * 
00008  * This file may be distributed and/or modified under the terms of the 
00009  * GNU General Public License version 2 as published by the Free Software 
00010  * Foundation. (See COPYING.GPL for details.)
00011  * 
00012  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
00013  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
00014  * 
00015  */
00016 
00017 #ifndef _TemplateLibrary_SORT_H_
00018 #define _TemplateLibrary_SORT_H_ 1
00019 
00026 #include <lib/sconfig.h>    /* MUST be first */
00027 
00028 #include <lib/tl/defop.h>
00029 
00031 
00035 template<typename T,typename _OP>inline void _SortSwap(
00036     T *a,size_t i0,size_t i1,T &tmp,const _OP &op)
00037 {
00038     op.ass(tmp,op.idx(a,i0));
00039     op.ass(op.idx(a,i0),op.idx(a,i1));
00040     op.ass(op.idx(a,i1),tmp);
00041 }
00042 
00059 template<typename T,typename _OP>void InsertionSortBS(
00060     T *a,size_t nelem,
00061     const _OP &op=TLDefaultOperators_CDT<T>())
00062 {
00063     T tmp;
00064     if(nelem>=2 && op.lt(op.idx(a,1),op.idx(a,0)))  // a[1]<a[0]
00065     {  _SortSwap(a,0,1,tmp,op);  }
00066     for(size_t i=2; i<nelem; i++)
00067     {
00068         if(op.le(op.idx(a,i-1),op.idx(a,i)))  continue;  // if(a[i-1]<=a[i])
00069         // Need to move a[i]; find insertion index. 
00070         T &key=op.idx(a,i);
00071         size_t aa=0,bb=i-1;  // bb>aa here as we start with i=2. 
00072         while(bb-aa>1)
00073         {  size_t m=(aa+bb)/2;  op.le(op.idx(a,m),key) ? aa=m : bb=m;  }
00074         size_t ii = (!aa && op.lt(key,op.idx(a,0))) ? 0 : bb;
00075         // Move the elements (ii to i). 
00076         op.ass(tmp,op.idx(a,i));   // tmp=a[i];
00077         for(size_t j=i; j>ii; j--)  op.ass(op.idx(a,j),op.idx(a,j-1));
00078         op.ass(op.idx(a,ii),tmp);
00079     }
00080 }
00081 
00097 template<typename T,typename _OP>void InsertionSort(
00098     T *a,size_t nelem,
00099     const _OP &op=TLDefaultOperators_CDT<T>())
00100 {
00101     T tmp;
00102     for(size_t i=1; i<nelem; i++)
00103     {
00104         if(op.le(op.idx(a,i-1),op.idx(a,i)))  continue;  // if(a[i-1]<=a[i])
00105         // Need to move a[i]. 
00106         op.ass(tmp,op.idx(a,i));   // tmp=a[i];
00107         size_t j=i;
00108         // See if it is smaller than a[0] (to get around index check in loop). 
00109         if(op.lt(op.idx(a,i),op.idx(a,0)))   // if(a[i]<a[0])
00110             for(; j; j--)  op.ass(op.idx(a,j),op.idx(a,j-1));
00111         else
00112             do { op.ass(op.idx(a,j),op.idx(a,j-1));  --j; }
00113             while(op.lt(tmp,op.idx(a,j-1)));
00114         // a[i] now moved. 
00115         op.ass(op.idx(a,j),tmp);
00116     }
00117 }
00118 
00119 
00147 template<typename T,typename _OP>void HeapSort(
00148     T *_a,size_t nelem,
00149     const _OP &op=TLDefaultOperators_CDT<T>())
00150 {
00151     // Heap addressing is shifted by 1 element so that address calculation 
00152     // gets easier. 
00153     T *a=(T*)(((char*)_a)-op.size());
00154     
00155     T tmp;
00156     // Build up heap: Largest element at the root. Use upheap operation. 
00157     for(size_t i=2; i<=nelem; i++)
00158     {
00159         // upheap(i) with heap size i. 
00160         // For simple types, these 3 possibilities are equally fast. 
00161         #if 1
00162             if(op.le(op.idx(a,i),op.idx(a,i/2)))  continue;
00163             size_t j=i,k=j/2;
00164             op.ass(tmp,op.idx(a,j));
00165             do
00166             {  op.ass(op.idx(a,j),op.idx(a,k)); j=k; k/=2;  }
00167             while(j>1 && op.lt(op.idx(a,k),tmp));
00168             op.ass(op.idx(a,j),tmp);
00169         #elif 0
00170             size_t j=i,k=j/2;
00171             while(j>1 && op.lt(op.idx(a,k),op.idx(a,j)))
00172             {  _SortSwap(a,j,k,tmp,op); j=k; k/=2;  }
00173         #else
00174             size_t j=i,k=j/2;
00175             op.ass(tmp,op.idx(a,j));
00176             while(j>1 && op.lt(op.idx(a,k),tmp))
00177             {  op.ass(op.idx(a,j),op.idx(a,k)); j=k; k/=2;  }
00178             op.ass(op.idx(a,j),tmp);
00179         #endif
00180     }
00181     
00182     // Remove elements from the heap: 
00183     // This is the part which should be improved. 
00184     for(size_t n=nelem; n>1; )
00185     {
00186         op.ass(tmp,op.idx(a,n));
00187         op.ass(op.idx(a,n--),op.idx(a,1));
00188         // downheap(1) with heap size n and element 1 already in tmp. 
00189         size_t k=1,e=n/2;
00190         while(k<=e)
00191         {
00192             #if 1  /* The second version is not faster, either. */
00193                 size_t j=k+k;
00194                 if(j<n && op.lt(op.idx(a,j),op.idx(a,j+1)))  ++j;
00195                 if(op.le(op.idx(a,j),tmp))  break;
00196                 op.ass(op.idx(a,k),op.idx(a,j));  k=j;
00197             #else
00198                 size_t j=k+k;
00199                 char *t=(char*)&op.idx(a,j),*t1=t+op.size();
00200                 if(j<n && op.lt(*(T*)t,*(T*)t1))
00201                 {  ++j;  t=t1;  }
00202                 if(op.le(*(T*)t,tmp))  break;
00203                 op.ass(op.idx(a,k),*(T*)t);  k=j;
00204             #endif
00205         }
00206         op.ass(op.idx(a,k),tmp);
00207     }
00208 }
00209 
00210 
00216 template<typename T,typename _OP>inline void _QuickSort_Final(
00217     T *a,size_t nelem,T &tmp,
00218     const _OP &op)
00219 {
00220     if(nelem<=2)
00221     {
00222         if(nelem<2)  return;
00223         if(op.lt(op.idx(a,1),op.idx(a,0)))  // a[1]<a[0]
00224             _SortSwap(a,0,1,tmp,op);
00225         return;
00226     }
00227     //if(nelem==3)  {  Sort3() }
00228     
00229     // One could either use one insertion sort step at the end for the 
00230     // complete array or calling it directly here for each sub-array. 
00231     // Tests showed that it is slightly faster here (in amount of compares) 
00232     // and also CPU caching should be better. 
00233     InsertionSort(a,nelem,op);
00234 }
00235 
00236 
00242 template<typename T,typename _OP>void _DumbQuickSort_R(
00243     T *a,size_t nelem,size_t insertion_sort_thresh,
00244     const _OP &op)
00245 {
00246     T tmp;
00247     
00248     // Sort small subarrays using insertion sort. 
00249     if(nelem<insertion_sort_thresh)  // insertion_sort_thresh>=2 (!!)
00250     {  _QuickSort_Final(a,nelem,tmp,op);  return;  }
00251     
00252     // MUST USE nelem>=2 here. 
00253     // Choose element to place: always the first element. 
00254     
00255     // The famous quicksort array walk: 
00256     T &key=op.idx(a,0);
00257     size_t aa=1,bb=nelem-1;
00258     while(aa<bb)
00259     {
00260         #if 1  /* Seems slightly faster for simple types. */
00261             while(aa<bb && op.le(op.idx(a,aa),key))  ++aa;
00262             while(aa<bb && op.le(key,op.idx(a,bb)))  --bb;
00263             if(aa>=bb)  break;
00264         #else
00265             for(;;++aa)
00266             {   if(aa>=bb)  goto breakout;
00267                 if(op.lt(key,op.idx(a,aa)))  break;  }
00268             for(;;--bb)
00269             {   if(aa>=bb)  goto breakout;
00270                 if(op.lt(op.idx(a,bb),key))  break;  }
00271             //Assert(aa<bb);
00272         #endif
00273         _SortSwap(a,aa++,bb--,tmp,op);
00274     } // breakout:;    // <-- Un-comment when using second variant above. 
00275     
00276     // Place key element: 
00277     if(op.gt(op.idx(a,bb),key))
00278     { Assert(aa==bb); --bb;  }
00279     _SortSwap(a,0,bb,tmp,op);
00280     
00281     // Recurse: 
00282     _DumbQuickSort_R(a,bb,insertion_sort_thresh,op);
00283     _DumbQuickSort_R(&op.idx(a,bb+1),nelem-(bb+1),insertion_sort_thresh,op);
00284 }
00285 
00301 template<typename T,typename _OP>void DumbQuickSort(
00302     T *a,size_t nelem,
00303     size_t insertion_sort_thresh=16,
00304     const _OP &op=TLDefaultOperators_CDT<T>())
00305 {
00306     _DumbQuickSort_R(a,nelem,
00307         insertion_sort_thresh<2 ? 2 : insertion_sort_thresh,op);
00308     // The insertion sort step is done at end of recursion for small 
00309     // subarray and not for all the array. 
00310     //InsertionSort(a,nelem,op);
00311 }
00312 
00313 
00322 #define TLIB_QUICKSORT_TAIL_CALL_OPTIMIZE 1
00323 template<typename T,typename _OP>void _CleverQuickSort_R(
00324     T *a,size_t nelem,size_t insertion_sort_thresh,
00325     const _OP &op)
00326 {
00327     T tmp,key;
00328     
00329 #if TLIB_QUICKSORT_TAIL_CALL_OPTIMIZE
00330 for(;;) {
00331 #endif
00332     // Sort small subarrays using insertion sort. 
00333     if(nelem<insertion_sort_thresh)  // insertion_sort_thresh>=3 (!!)
00334     {  _QuickSort_Final(a,nelem,tmp,op);  return;  }
00335     
00336     // MUST USE nelem>=3 here. 
00337     // Choose element to place: median of first, middle and last element. 
00338     // Sort these 3 elements here to provide marks so that we do not have 
00339     // to check the indices in the array walk. 
00340     // In the end, the largest of the 3 elements is at index bb, the smallest 
00341     // at index 1 (not 0) and the median ist saved in key. 
00342     size_t aa=0,bb=nelem-1,m=(aa+bb)/2;  // Start layout:  aa .. m .. bb
00343                                          // Target layout: m aa ... bb
00344     if(op.le(op.idx(a,aa),op.idx(a,bb)))  // aa<=bb
00345     {
00346         if(op.lt(op.idx(a,m),op.idx(a,aa)))  // m<aa  -> order: m aa bb
00347         {
00348             // largest (bb) at end: OK
00349             // median (aa) at beginning: OK
00350             op.ass(key,op.idx(a,aa));
00351             op.ass(op.idx(a,aa),op.idx(a,m));   // from here: swap m,1
00352             op.ass(op.idx(a,m),op.idx(a,1));
00353             op.ass(op.idx(a,1),op.idx(a,aa));
00354         }
00355         else if(op.le(op.idx(a,m),op.idx(a,bb)))  // m<=bb  -> order: aa m bb
00356         {
00357             // largest (bb) at end: OK
00358             op.ass(key,op.idx(a,m));
00359             op.ass(op.idx(a,m),op.idx(a,1));
00360             op.ass(op.idx(a,1),op.idx(a,aa));
00361             //op.ass(op.idx(a,aa),op.idx(a,m));  // (not needed)
00362         }
00363         else  // order: aa bb m
00364         {
00365             op.ass(key,op.idx(a,bb));
00366             op.ass(op.idx(a,bb),op.idx(a,m));  // largest to end
00367             op.ass(op.idx(a,m),op.idx(a,1));
00368             op.ass(op.idx(a,1),op.idx(a,aa));
00369         }
00370     }
00371     else if(op.lt(op.idx(a,bb),op.idx(a,m)))  // bb<m
00372     {
00373         if(op.le(op.idx(a,m),op.idx(a,aa)))  // m<=aa  -> order bb m aa
00374         {
00375             op.ass(key,op.idx(a,m));
00376             op.ass(op.idx(a,m),op.idx(a,1));
00377             op.ass(op.idx(a,1),op.idx(a,bb));
00378             op.ass(op.idx(a,bb),op.idx(a,aa));
00379         }
00380         else  // order: bb aa m
00381         {
00382             op.ass(key,op.idx(a,aa));
00383             op.ass(op.idx(a,aa),op.idx(a,m));
00384             op.ass(op.idx(a,m),op.idx(a,1));
00385             op.ass(op.idx(a,1),op.idx(a,bb));
00386             op.ass(op.idx(a,bb),op.idx(a,aa));
00387         }
00388     }
00389     else  // order: m bb aa
00390     {
00391         op.ass(key,op.idx(a,bb));
00392         op.ass(op.idx(a,bb),op.idx(a,aa));
00393         _SortSwap(a,m,1,tmp,op);
00394         /*op.ass(op.idx(a,aa),op.idx(a,m));
00395         op.ass(op.idx(a,m),op.idx(a,1));
00396         op.ass(op.idx(a,1),op.idx(a,aa));*/
00397     }
00398     
00399     // The famous quicksort array walk: 
00400     // We must use lt() and not le() because of the marks (to avoid the 
00401     // index check). 
00402     ++aa;
00403     while(aa<bb)
00404     {
00405         while(op.lt(op.idx(a,aa),key))  ++aa;
00406         while(op.lt(key,op.idx(a,bb)))  --bb;
00407         if(aa>=bb)  break;
00408         _SortSwap(a,aa++,bb--,tmp,op);
00409     }
00410     
00411     // Place key element: 
00412     if(op.gt(op.idx(a,bb),key))
00413     { Assert(aa==bb); --bb;  }
00414     op.ass(op.idx(a,0),op.idx(a,bb));
00415     op.ass(op.idx(a,bb),key);
00416     
00417 #if TLIB_QUICKSORT_TAIL_CALL_OPTIMIZE
00418     // Recurse on the smaller subarray and do the larger one by tail 
00419     // recursion elimination. This guarantees O(log(n)) memory 
00420     // consumption. 
00421     aa=nelem-(bb+1);
00422     if(bb<aa)
00423     {
00424         _CleverQuickSort_R(a,bb,insertion_sort_thresh,op);
00425         a=&op.idx(a,bb+1);  nelem=aa;
00426     } else {
00427         _CleverQuickSort_R(&op.idx(a,bb+1),aa,insertion_sort_thresh,op);
00428         nelem=bb;
00429     }
00430 } // end for(;;) {
00431 #else
00432     // Recurse: (simple version) 
00433     _CleverQuickSort_R(a,bb,insertion_sort_thresh,op);
00434     _CleverQuickSort_R(&op.idx(a,bb+1),nelem-(bb+1),insertion_sort_thresh,op);
00435 #endif
00436 }
00437 
00462 template<typename T,typename _OP>void CleverQuickSort(
00463     T *a,size_t nelem,
00464     size_t insertion_sort_thresh=16,
00465     const _OP &op=TLDefaultOperators_CDT<T>())
00466 {
00467     _CleverQuickSort_R(a,nelem,
00468         insertion_sort_thresh<3 ? 3 : insertion_sort_thresh,op);
00469     // The insertion sort step is done at end of recursion for small 
00470     // subarray and not for all the array. 
00471     //InsertionSort(a,nelem,op);
00472 }
00473 
00474 #endif  /* _TemplateLibrary_SORT_H_ */

Generated on Sat Feb 19 22:33:46 2005 for Ray by doxygen 1.3.5