Ray: /ray/src/lib/numerics/rootsolve/direct.cc Source File

00001 /*
00002  * lib/numerics/rootsolve/direct.cc 
00003  * 
00004  * Numerics library root solving: Directly solvable cases (i.e. where an 
00005  * explicit formula exitst). 
00006  * 
00007  * Copyright (c) 2004 by Wolfgang Wieser ] wwieser (a) gmx <*> de [
00008  * 
00009  * This file may be distributed and/or modified under the terms of the
00010  * GNU General Public License version 2 as published by the Free Software
00011  * Foundation. (See COPYING.GPL for details.)
00012  * 
00013  * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
00014  * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
00015  * 
00016  */ 
00017 
00018 #include "rootsolve.h"
00019 
00020 
00021 // THREAD SAFETY CHECK: PASSED. 
00022 
00023 namespace NUM
00024 {
00025 
00026 // Initialize static const data: 
00027 // Values smaller than this are treated as zero. 
00028 const PolyRootSolver_Direct::Params PolyRootSolver_Direct::defaults = 
00029 {
00030     INIT_FIELD(nearly_zero) 1e-10  // Rayshade supposes 1e-9
00031 };
00032 
00033 
00034 // Solve roots of polynomial of degree 1, i.e. 
00035 //  0 = c[0] + c[1]*x
00036 // Root returned in r[0]; number of roots returned as return value. 
00037 int PolyRootSolver_Direct::Solve_1(const dbl *c,dbl *r)
00038 {
00039     // Well, THIS is easy. 
00040     
00041     if(c[1]==0.0)  return(0);
00042     
00043     r[0]=-c[0]/c[1];
00044     
00045     return(1);
00046 }
00047 
00048 
00049 // Solve roots of polynomial of degree 2, i.e. 
00050 //  0 = c[0] + c[1]*x + c[2]*x^2
00051 // Roots returned in r[0..1]; number of roots returned as return value. 
00052 int PolyRootSolver_Direct::Solve_2(const dbl *c,dbl *r)
00053 {
00054     if(c[2]==0.0)
00055     {
00056         // In this case the polynomial is actually of degree 1. 
00057         return(Solve_1(c,r));
00058     }
00059     
00060     #if 0
00061         //---- First method: use normal form x^2 + p x + q = 0 ----
00062         // This can be found in any better maths formula book 
00063         // (e.g. Bronstein et al). 
00064         dbl p = -0.5*c[1] / c[2];
00065         dbl q =      c[0] / c[2];
00066         dbl D = sqr(p) - q;
00067         // If the discriminant is nearly zero, we have one (dbl) root: 
00068         if(fabs(D)<par.nearly_zero)
00069         {  r[0]=p;  return(1);  }
00070         // Otherwise we have 2 unless the discriminant is smaller than zero. 
00071         if(D>0.0)
00072         {
00073             D = sqrt(D);
00074             r[0]=p+D;
00075             r[1]=p-D;
00076             return(2);
00077         }
00078     #else
00079         //---- Second method: Do as I have learned it in school :) ----
00080         // This method runs somewhat faster on my machine even if I 
00081         // replace the 2 divisions st the beginning of the upper 
00082         // variant by one division and 2 multiplications. 
00083         // Furthermore, I (Wolfgang) implemented some additional 
00084         // nonstandard code to reduce leading digit cancellation. 
00085         dbl D = sqr(c[1]) - 4.0*c[0]*c[2];
00086         // If the discriminant is nearly zero, we have one (dbl) root: 
00087         if(fabs(D)<par.nearly_zero)
00088         {  r[0] = -0.5*c[1]/c[2];  return(1);  }
00089         // Otherwise we have 2 unless the discriminant is smaller than zero. 
00090         if(D>0.0)
00091         {
00092             D = sqrt(D);
00093             #if 0
00094                 // This is the standard school-book way of calculting it: 
00095                 dbl f = -0.5/c[2];
00096                 r[0] = (c[1]+D)*f;
00097                 r[1] = (c[1]-D)*f;
00098             #else
00099                 // This variant will reduce the digit cancellation effect. 
00100                 // Disavantage: one additional fabs() and 
00101                 //              one additional division
00102                 // Advantage: one fewer subtraction, one fewer multiplication
00103                 D = -0.5*( c[1]<0 ? (c[1]-D) : (c[1]+D) );
00104                 // Note: The case D==0 cannot happen here since D>0 
00105                 //       and gets added to c[1] in a way which increases 
00106                 //       the absolute value. 
00107                 r[0] = c[0]/D;
00108                 r[1] = D/c[2];
00109             #endif
00110             return(2);
00111         }
00112     #endif
00113     
00114     return(0);
00115 }
00116 
00117 
00118 // Solve roots of polynomial of degree 3, i.e. 
00119 //  0 = c[0] + c[1]*x + c[2]*x^2 + c[3]*x^3
00120 // Roots returned in r[0..2]; number of roots returned as return value. 
00121 // This uses the algorithm proposed by Cardano. 
00122 int PolyRootSolver_Direct::Solve_3(const dbl *c,dbl *r)
00123 {
00124     if(c[3]==0.0)
00125     {
00126         // In this case the polynomial is actually of degree 2. 
00127         return(Solve_2(c,r));
00128     }
00129     
00130     // This application of Cardano's formula is based on 
00131     // - code taken from Raycer 1.0 BETA, Copyright (C) 2002 Tim E. Madsen, 
00132     //   Reuben Lee, Justin Pahl, distributed under the GPL. 
00133     // - "Exact solutions of cubic polynomial equations" by 
00134     //   Stephen R. Schmitt (ported from Zeno-1.2 to C(++))
00135     // - Bronstein, et al. 
00136     
00137     // Normal form: x^3 + Ax^2 + Bx + C = 0. 
00138     dbl cc = 1/c[3];
00139     dbl A = c[2]*cc;
00140     dbl B = c[1]*cc;
00141     dbl C = c[0]*cc;
00142     // Substitute x = y - A/3 to eliminate quadric term: x^3 +px + q = 0
00143     dbl sq_A = sqr(A);
00144     // (These was modified by me (Wolfgang) by multiplying/factoring 
00145     //  out FP constants and variables; p was changed into -par.)
00146     dbl p = (sq_A-3.0*B) / 9.0;
00147     dbl q = A * (sq_A-4.5*B) / 27 + 0.5*C;
00148     dbl cb_p = p*p*p;
00149     dbl D = sqr(q)-cb_p;
00150     // Substitution value (see above; moved here by Wolfgang): 
00151     dbl sub = A/-3.0;
00152     
00153     // This case is extremely unlikely (my test did not trigger it and 
00154     // I am hence not even sure if it works correctly), so I will leave 
00155     // it away via #if 0...#endif
00156     #if 1
00157     if(fabs(D)<par.nearly_zero)
00158     {
00159 CritAssert(!"CHECK ME!\n");
00160         
00161         if(fabs(q)<par.nearly_zero)
00162         {
00163             // One triple solution
00164             r[0]=sub;
00165             return(1);
00166         }
00167         // One single and one dbl solution
00168         dbl u = cbrt(-q);
00169         r[0]=2.0*u+sub;
00170         r[1]=sub-u;
00171         return(2);
00172     }
00173     #endif
00174     
00175     if(D>0.0)
00176     {
00177         // One real solution. 
00178         // This case is probably the most likely one. 
00179         // There are 3 methods to compute this. 
00180         
00181         #if 0
00182             // Fastest but worst method. This is the original Rayshade algo. 
00183             D = sqrt(D);
00184             r[0] = cbrt(D-q) - cbrt(D+q) + sub;
00185         #elif 0
00186             // This relies on cbrt() to be well implemented but suffers 
00187             // somewhat from leading digit cancellation in the sqrt()+q part. 
00188             D = -cbrt( sqrt(q*q-cb_p) + q );
00189             r[0] = (D + p/D) + sub;
00190         #else
00191             // This is essentially the same as above but working around the 
00192             // leading digit cancellation. See Solve_2() for a 
00193             // similar trick. This showed to be the best method in my tests. 
00194             // The improvement here is much more than in the 2nd order case. 
00195             D = sqrt(q*q-cb_p);
00196             D = q<0 ? cbrt(D-q) : -cbrt(D+q) ;
00197             r[0] = (D + p/D) + sub;
00198         #endif
00199         return(1);
00200     }
00201     
00202     // Casus irreducibilis: three real solutions. 
00203     dbl phi = (1.0/3) * acos(q / sqrt(cb_p));
00204     dbl t = -2*sqrt(p);
00205     r[0] = t * cos(phi        ) + sub;
00206     r[1] = t * cos(phi+FPConst<dbl>::mul_pi_2_3) + sub;
00207     r[2] = t * cos(phi-FPConst<dbl>::mul_pi_2_3) + sub;
00208     return(3);
00209 }
00210 
00211 
00212 // Solve roots of polynomial of degree 4, i.e. 
00213 //  0 = c[0] + c[1]*x + c[2]*x^2 + c[3]*x^3 + c[4]*x^4
00214 // Roots returned in r[0..3]; number of roots returned as return value. 
00215 // This uses the algorithm proposed by Francois Vieta. 
00216 int PolyRootSolver_Direct::Solve_4(const dbl *c,dbl *_r)
00217 {
00218     if(c[4]==0.0)
00219     {
00220         // In this case the polynomial is actually of degree 2. 
00221         return(Solve_3(c,_r));
00222     }
00223     
00224     // This is based on the the implementation as found in 
00225     // Raycer 1.0 BETA, Copyright (C) 2002 Tim E. Madsen, 
00226     // Reuben Lee, Justin Pahl, distributed under the GPL. 
00227     // I (Wolfgang) spent several hours in optimizing this algorithm by 
00228     // pulling factors out and calculating better temporaries when 
00229     // hand-inlining the needed solvers for 2nd and 3rd order polynomials. 
00230     
00231     // Use normal form: x^4 + Ax^3 + Bx^2 + Cx + D = 0. 
00232     dbl f=1.0/c[4];
00233     dbl A = c[3]*f;
00234     dbl B = c[2]*f;
00235     dbl C = c[1]*f;
00236     dbl D = c[0]*f;
00237 
00238     // Substitute x = y - A/4 to eliminate cubic term: 
00239     //   x^4 + px^2 + qx + r = 0
00240     // Note: These constants are all correct (e.g. 3/256 = 0.011718750000..); 
00241     //       I (Wolfgang) computed them from Raycer-1.0 code. 
00242     dbl sq_A = A * A;
00243     dbl p = -0.375 * sq_A + B;
00244     dbl q = (0.125 * sq_A - 0.5*B)*A + C;
00245     //dbl r = (-0.01171875*sq_A + 0.0625*B)*sq_A - 0.25*A*C + D;
00246     dbl r = -0.01171875*sq_A*sq_A + 0.0625*sq_A*B - 0.25*A*C + D;
00247     
00248     dbl coeffs[4];
00249     int num;
00250     if(fabs(r)<par.nearly_zero)
00251     {
00252         // No absolute term: y(y^3 + py + q) = 0 
00253         coeffs[0] = q;
00254         coeffs[1] = p;
00255         coeffs[2] = 0;
00256         coeffs[3] = 1;
00257         num = Solve_3(coeffs,_r);
00258         
00259         // Resubstitute: 
00260         dbl sub = -0.25*A;
00261         for(int i=0; i<num; i++)  _r[i]+=sub;
00262         
00263         // Add the solution y=0: 
00264         _r[num++] = sub;
00265     }
00266     else
00267     {
00268         // Solve the resolvent cubic...
00269         #if 0
00270             coeffs[0] =  0.5*r*p - 0.125*q*q;
00271             coeffs[1] = -r;
00272             coeffs[2] = -0.5*p;
00273             coeffs[3] =  1;
00274             num = Solve_3(coeffs,_r);
00275             if(!num)  return(0);  // This should not happen... (3rd order!!)
00276 
00277             // ...and take the one real solution...
00278             dbl z = _r[0];
00279         #else
00280             // This is the same as above but "hand-inlined": 
00281             // This works much faster than the above version. 
00282             dbl z;  // "any real solution"
00283             do {
00284                 dbl sq_p = sqr(p);
00285                 dbl pp = (sq_p+12*r)/36.0;
00286                 dbl qq = -0.0625*q*q - p*(sq_p-36*r)/216.0; // <-- not "pp"
00287                 dbl cb_pp = pp*pp*pp;
00288                 dbl D = sqr(qq)-cb_pp;
00289                 dbl sub = p/6.0;  // <-- not "pp"
00290                 
00291                 if(fabs(D)<par.nearly_zero)
00292                 {
00293                     if(fabs(qq)<par.nearly_zero)
00294                     {  z=sub;  break;  }
00295                     // Take one of these: 
00296                     //z=2.0*cbrt(-q)+sub;   // single solution
00297                     z=sub-cbrt(-qq);       // dbl solution
00298                     break;
00299                 }
00300                 if(D>0.0)
00301                 {
00302                     D = sqrt(qq*qq-cb_pp);
00303                     D = qq<0 ? cbrt(D-qq) : -cbrt(D+qq) ;
00304                     z = (D + pp/D) + sub;
00305                     break;
00306                 }
00307                 // Take any one of these: 
00308                 z = -2*sqrt(pp) * cos((1.0/3) * acos(qq/sqrt(cb_pp))        ) + sub;
00309                 //z = -2*sqrt(pp) * cos((1.0/3) * acos(qq/sqrt(cb_pp))+M_2PI_3) + sub;
00310                 //z = -2*sqrt(pp) * cos((1.0/3) * acos(qq/sqrt(cb_pp))-M_2PI_3) + sub;
00311                 break;
00312             } while(0);
00313         #endif
00314         
00315         // ...to build two quadric equations
00316         dbl u = z*z - r;
00317         if(fabs(u)<par.nearly_zero)  u=0;
00318         else if(u>0)  u=sqrt(u);
00319         else return 0;
00320         
00321         dbl v = 2*z - p;
00322         if(fabs(v)<par.nearly_zero)  v=0;
00323         else if(v>0)  v=sqrt(v);
00324         else return 0;
00325         
00326         #if 0
00327             coeffs[0] = z-u;
00328             coeffs[1] = q<0 ? -v : v;
00329             coeffs[2] = 1;
00330             num=Solve_2(coeffs,_r);
00331             
00332             coeffs[0] = z+u;
00333             coeffs[1] = -coeffs[1];  // q<0 ? v : -v;
00334             //coeffs[2] = 1;   (already)
00335             num += Solve_2(coeffs,_r+num);
00336             
00337             // Resubstitute: 
00338             dbl sub = -0.25*A;
00339             for(int i=0; i<num; i++)  _r[i]+=sub;
00340         #else
00341             // This is the same as above but "hand-inlined": 
00342             // This works faster than the above version. 
00343             num=0;
00344             dbl c_1 = 0.5 * (q<0 ? -v : v);
00345             bool c_1_gn = c_1>0;
00346             dbl DD = sqr(c_1) - z;
00347             dbl sub = -0.25*A;  // (undo substitution)
00348             dbl D = DD + u;
00349             if(fabs(D)<par.nearly_zero)
00350             {  _r[num++] = sub - c_1;  }
00351             else if(D>0)
00352             {
00353                 D = sqrt(D);
00354                 #if 0  /* faster */
00355                     dbl sub2 = sub - c_1;
00356                     _r[num++] = sub2 - D;
00357                     _r[num++] = sub2 + D;
00358                 #else  /* slightly more accurate */
00359                     D = c_1_gn ? (c_1+D) : (c_1-D);
00360                     _r[num++] = sub - D;
00361                     _r[num++] = (u-z)/D + sub;
00362                 #endif
00363             }
00364             
00365             D = DD - u;
00366             if(fabs(D)<par.nearly_zero)
00367             {  _r[num++] = sub + c_1;  }
00368             else if(D>0) 
00369             {
00370                 D = sqrt(D);
00371                 #if 0  /* faster */
00372                     dbl sub2 = sub + c_1;
00373                     _r[num++] = sub2 - D;
00374                     _r[num++] = sub2 + D;
00375                 #else  /* slightly more accurate */
00376                     D = c_1_gn ? (c_1+D) : (c_1-D);
00377                     _r[num++] = D + sub;
00378                     _r[num++] = (z+u)/D + sub;
00379                 #endif
00380             }
00381         #endif
00382     }
00383     
00384     return(num);
00385 }
00386 
00387 
00388 int PolyRootSolver_Direct::solve(int order,const dbl *c,dbl *r)
00389 {
00390     switch(order)
00391     {
00392         case 1:  return(Solve_1(c,r));
00393         case 2:  return(Solve_2(c,r));
00394         case 3:  return(Solve_3(c,r));
00395         case 4:  return(Solve_4(c,r));
00396         //default: run off...
00397     }
00398     
00399     return(-2);   // invalid order parameter
00400 }
00401 
00402 
00403 PolyRootSolver_Direct::PolyRootSolver_Direct() : 
00404     PolyRootSolver(),
00405     par(defaults)
00406 {
00407     // empty
00408 }
00409 
00410 PolyRootSolver_Direct::~PolyRootSolver_Direct()
00411 {
00412     // Not inline because virtual. 
00413 }
00414 
00415 }  // end of namespace NUM
00416 
00417 
00418 #if 0
00419 // Test routine: NOTE: Test routine is for old non-object oriented code; 
00420 //                     needs to be changed for current version. 
00421 int main()
00422 {
00423     // This is nothing intelligent; just throw random havoc onto the 
00424     // routines. 
00425     dbl c[5],r[4],rB[4];
00426     int deg=2;
00427     #if 1
00428     fprintf(stderr,"DEG=%d\n",deg);
00429     for(int i=0; i<1000000; i++)
00430     {
00431         for(int j=0; j<=deg; j++)
00432             c[j]=(rand()%2000-1000)/100.0;
00433         int n=NUM::SolvePolyRoots_2(c,r);
00434         for(int j=0; j<n; j++)
00435         {
00436             dbl R=NUM::PolvEval(r[j],c,deg);
00437             if(fabs(R)>5e-13)  // 13
00438                 fprintf(stderr,"[%3d] %g %g %g -> %g for %g (n=%d, j=%d)\n",i,
00439                     c[0],c[1],c[2],R,r[j],n,j);
00440         }
00441     }
00442     
00443     deg=3;
00444     fprintf(stderr,"DEG=%d\n",deg);
00445     for(int i=0; i<1000000; i++)
00446     {
00447         for(int j=0; j<=deg; j++)
00448             c[j]=(rand()%2000-1000)/100.0;
00449         int n=NUM::SolvePolyRoots_3(c,r);
00450         //int nB=NUM::SolvePolyRoots_3_B(c,rB);
00451         for(int j=0; j<n; j++)
00452         {
00453             dbl R=NUM::PolvEval(r[j],c,deg);
00454             if(fabs(R)>1e-9)
00455                 fprintf(stderr,"[%6d ] %g %g %g %g -> %g for %g (n=%d,j=%d)\n",
00456                     i,c[0],c[1],c[2],c[3],R,r[j],n,j);
00457         }
00458         /*for(int j=0; j<nB; j++)
00459         {
00460             dbl R=NUM::PolvEval(rB[j],c,deg);
00461             if(fabs(R)>1e-9)
00462                 fprintf(stderr,"[%6dB] %g %g %g %g -> %g for %g (n=%d,j=%d)\n",
00463                     i,c[0],c[1],c[2],c[3],R,rB[j],nB,j);
00464         }
00465         if(n!=nB)
00466         {
00467             fprintf(stderr,"[%6d*]: n=%d, nB=%d:",i,n,nB);
00468             for(int j=0; j<n; j++)  fprintf(stderr," %g",r[j]);
00469             fprintf(stderr," <->");
00470             for(int j=0; j<nB; j++)  fprintf(stderr," %g",rB[j]);
00471             fprintf(stderr,"\n");
00472         }*/
00473     }
00474     #endif
00475     
00476     deg=4;
00477     fprintf(stderr,"DEG=%d\n",deg);
00478     for(int i=0; i<1000000; i++)
00479     {
00480         for(int j=0; j<=deg; j++)
00481             c[j]=(rand()%2000-1000)/100.0;
00482         int n=NUM::SolvePolyRoots_4(c,r);
00483         //int nB=NUM::SolvePolyRoots_4_B(c,rB);
00484         for(int j=0; j<n; j++)
00485         {
00486             dbl R=NUM::PolvEval(r[j],c,deg);
00487             if(fabs(R)>1e-5)  // 8
00488                 fprintf(stderr,"[%6d ] %g %g %g %g %g -> %g for %g (n=%d,j=%d)\n",
00489                     i,c[0],c[1],c[2],c[3],c[4],R,r[j],n,j);
00490         }
00491         /*
00492         for(int j=0; j<nB; j++)
00493         {
00494             dbl R=NUM::PolvEval(rB[j],c,deg);
00495             if(fabs(R)>1e-5)  // 8
00496                 fprintf(stderr,"[%6dB] %g %g %g %g %g -> %g for %g (n=%d,j=%d)\n",
00497                     i,c[0],c[1],c[2],c[3],c[4],R,rB[j],nB,j);
00498         }
00499         if(n!=nB)
00500         {
00501             fprintf(stderr,"[%6d*]: n=%d, nB=%d:",i,n,nB);
00502             for(int j=0; j<n; j++)  fprintf(stderr," %g",r[j]);
00503             fprintf(stderr," <->");
00504             for(int j=0; j<nB; j++)  fprintf(stderr," %g",rB[j]);
00505             fprintf(stderr,"\n");
00506         }*/
00507     }
00508     
00509     return(0);
00510 }
00511 #endif