|
|
I messed with the f_mandelbulb internal function, squeezed about 10-15% better
performance out of it, and posted what I have so far below. (Note the handy
sincos() function in the standard library saving a couple of trig calls.)
Interestingly, I also found an additional 10-15% improvement in render time by
seriously overloading the processors with one to two hundred POV-Ray worker
threads on eight cores and using smaller render block size (16 for 640*360
images). I'm guessing the extra switching cost is more than offset by keeping
the floating-point pipelines full.
~David Wagner
DBL f_mandelbulb(FPUContext *ctx, DBL *ptr, unsigned int) // 79
{ // Coded by David Wagner, and based on
// Daniel White's original squaring formula,
// Paul Nylander's generalization and phi phase shift formulation,
// the Normalized Iteration Count Algorithm,
// and Abram Hindle's POV-Ray internal function patch.
DBL cx=PARAM_X, cy=PARAM_Y, cz=PARAM_Z;
DBL halfp_r = PARAM(0);// Radial power exponent.
DBL p_theta = PARAM(1);// Power of elevation theta.
DBL p_phi = PARAM(2);// Power of azimuth phi.
DBL phase = PARAM(3);// Phase shift of phi.
int i_bailout = PARAM(4);// Maximum number of iterations.
DBL r2_bailout = pow(PARAM(5),2);// Assumed divergence radius.
DBL x = cx, y = cy, z = cz;
DBL r2 = x*x + y*y + z*z;
DBL rp, sinpphi, cospphi, sinptheta, cosptheta;
int i=1;
halfp_r*=0.5;
while(i < i_bailout && r2 < r2_bailout) {
++i;
rp = pow(r2,halfp_r);
sincos(p_phi * atan2(z,sqrt(x*x + y*y)) + phase, &sinpphi, &cospphi);
sincos(p_theta * atan2(y,x), &sinptheta, &cosptheta);
z = cz + rp * sinpphi;
x = cx + cosptheta * rp*cospphi;
y = cy + sinptheta * rp*cospphi;
r2 = x*x + y*y + z*z;
}
if (i < i_bailout) {
return 1/( i + log(log(r2_bailout) / log( r2))
/ (log(halfp_r)+0.693147180559945) );
}// ==log(2*halfp_r)
return 1/( i + log(log(r2_bailout) / log(1+r2))
/ (log(halfp_r)+0.693147180559945) );
}
Post a reply to this message
|
|