POV-Ray: Newsgroups: povray.off-topic: Interesting take on C++ vs others...: Re: Interesting take on C++ vs others...

POV-Ray : Newsgroups : povray.off-topic : Interesting take on C++ vs others... : Re: Interesting take on C++ vs others...		Server Time 6 Sep 2024 01:26:15 EDT (-0400)
From: nemesis
Date: 4 Jun 2009 12:47:46
Message: <4a27fab2@news.povray.org>
nemesis escreveu:
> Perhaps I should bring on Jon Harrop to the rescue. :)

well, and truly I did:

from: Jon Harrop <jon### [at] ffconsultancycom>
to: namekuseijin <nam### [at] gmailcom>

date Tue, Jun 2, 2009 at 9:49 PM
subject Re: ocaml benchmark going bad

On Tuesday 02 June 2009 22:19:53 you wrote:
 > Hey, man.  Thought you'd like to know about this:
 >
 > http://metamatix.org/~ocaml/price-of-abstraction.html
 >
 > Don't know how old it is but seemingly OCaml is taking a beating by
 > both C++ and Scala on a mandelbrot implementation.  Perhaps the author
 > forgot some options to the ocamlopt compiler?
 >
 > saw it in the povray newsgroups:
 > 
http://news.povray.org/povray.off-topic/thread/%3C4a256d90%241%40news.povra
 >y.org%3E/


Hi,

The author claims that the programs are equivalent when they are not and 
makes many completely false statements as a consequence. Objectively, he 
should have asked for peer review but, actually, he could have written 
decent OCaml code just by copying any of the many efficient Mandelbrot 
benchmarks written in OCaml that are freely available on the web.

Firstly, he uses a simple and efficient imperative loop in the C++:

int iters1(int max_iter,double xc,double yc) {
   double x = xc;
   double y = yc;
   for(int count = 0;count<max_iter;count++) {
      if( x*x + y*y >= 4.0) { return count; }
      double tmp = x*x-y*y+xc;
      y = 2.0 * x * y + yc;
      x = tmp;
   }
   return max_iter;
}

Great. I'd expect to see a simple imperative and efficient loop in OCaml 
but the OCaml code he describes as "equivalent" is actually completely 
different:

let iters1 max_iter xc yc =
  let rec aux count x y =
    if count = max_iter then max_iter else begin
      if x *. x  +. y *. y >= 4.0 then count else
      aux (count+1) (x *. x -. y *. y +. xc) (2.0 *. x *. y +. yc)
    end in
  aux 0 xc yc

Eh? He's used a nested recursive function for no reason and that has
introduced two radical differences compared to the C++:

1. OCaml does not unbox floats across function boundaries like this so 
the floats are needlessly allocated and deallocated at every iteration 
in the OCaml. In comparison, the C++ does no heap allocation.

2. His nested function is not self-contained: it captures xc and yc from 
its environment. So this is compiled into a closure (a concept that does 
not exist in C++, of course) incurring allocation and making every 
single function invocation in the inner loop several times more 
expensive. In comparison, the C++ does direct function calls to 
statically-known locations.

No problem, I thought. This was obviously written by a C++ guy who 
doesn't know OCaml. But then I saw his OOP example in C++:

class Complex {
public:
   Complex(double xc,double yc) : x(xc), y(yc) { }

   double norm_square() {
      return x*x+y*y;
   }

   Complex operator*(const Complex &other) {
      return Complex(x*other.x-y*other.y,
                     x*other.y+y*other.x);
   }

   Complex operator+(const Complex &other) {
      return Complex(x + other.x, y + other.y);
   }

private:
   double x;
   double y;
};

int iters2(int max_iter,double xc,double yc) {
   Complex c(xc,yc);
   Complex z(xc,yc);
   for(int count = 0;count<max_iter;count++) {
      if( z.norm_square() >= 4.0 ) { return count; }
      z = z * z + c;
   }

   return max_iter;
}

His overloaded * does not implement multiplication correctly but, more
importantly, he forgot that objects in C++ are represented by pointers. 
By removing all of the pointers he has removed all of the allocation but 
also any chance of using subtypes and/or inheritance. In other words, 
this is not OOP at all. He has just used a struct.

Let's try an honest C++ equivalent that does support OOP, i.e. you can 
derive a new kind of complex number from the Complex class and use it
interchangeably because they will have the same uniform run-time 
representation of a *pointer* to an object:

#include <cstdio>

const int RESOLUTION = 5000;

class Complex {
public:
   Complex(double xc,double yc) : x(xc), y(yc) { }

   double norm_square() {
      return x*x+y*y;
   }

   Complex *sqr() {
     return new Complex(x*x - y*y, 2 * x*y);
   }

   Complex *add(const Complex *other) {
     return new Complex(x + other->x, y + other->y);
   }

private:
   double x;
   double y;
};

int iters2(int max_iter,double xc,double yc) {
   Complex *c = new Complex(xc,yc);
   Complex *z = new Complex(xc,yc);
   for(int count = 0;count<max_iter;count++) {
      if( z->norm_square() >= 4.0 ) {
        delete c;
        delete z;
        return count;
      }
      Complex *z2 = z->sqr(), *oz = z;
      z = z2->add(c);
      delete z2;
      delete oz;
   }
   delete c;
   delete z;

   return max_iter;
}

int main() {
   int    max_val = RESOLUTION/2;
   int    min_val = -max_val;
   double mul = 2.0 / max_val;
   int    count = 0;
   for(int i=min_val;i<=max_val;i++) {
      for(int j=min_val;j<=max_val;j++) {
         count += iters2(100,mul*i,mul*j);
      }
   }
   printf("result: %d\n",count);
}

That is 15x slower than his C++ code because allocation is so 
inefficient in C++.

He also failed to mention that abstraction is usually achieved with 
functors and not objects in OCaml. Let's try using the higher-order 
module system:

let resolution = 5000

module type COMPLEX = sig
  type t

  val mk : float -> float -> t
  val norm2 : t -> float
  val add : t -> t -> unit
  val sqr : t -> unit
end

module Complex : COMPLEX = struct
  type t = {mutable r: float; mutable i: float}

  let mk r i = {r=r; i=i}
  let norm2 z = z.r *. z.r +. z.i *. z.i
  let add z1 z2 =
    z1.r <- z1.r +. z2.r;
    z1.i <- z1.i +. z2.i
  let sqr z =
    let r' = z.r *. z.r -. z.i *. z.i and i' = 2.0 *. z.r *. z.i in
    z.r <- r';
    z.i <- i'
end

module Iters(C: COMPLEX) = struct
  let iters max_iter xc yc =
    let c = C.mk xc yc in
    let i = ref 0 and z = C.mk xc yc in
    while !i < max_iter && C.norm2 z < 4.0 do
      C.sqr z;
      C.add z c;
      incr i
    done;
    !i
end

let () =
  let module I = Iters(Complex) in
  let max_val = resolution/2 in
  let min_val = - max_val in
  let mul = 2.0 /. float max_val in
  let count = ref 0 in
  for i = min_val to max_val do
    for j = min_val to max_val do
      let x = float i *. mul in
      let y = float j *. mul in
      count := !count + I.iters 100 x y;
    done
  done;
  Printf.printf "result: %d\n" !count

So here are my timings:

C++:    3s   (raw)
C++:   35s   (genuine objects)
OCaml:  4.6s (record)
OCaml: 10s   (functor)

--
Dr Jon Harrop, Flying Frog Consultancy Ltd.
http://www.ffconsultancy.com/?e
Post a reply to this message