// .----------------------------------------------------------------------. // | Jaap's distributed render system for povray animations v 0.02 | // | | // | (for a Windows network) | // | | // | 2006 J.W. Stolk http://www.jwstolk.xs4all.nl | // '----------------------------------------------------------------------' // .----------------------------------------------------------------------. // | Why? | // '----------------------------------------------------------------------' // // It's very easy to speed up rendering of animation by using more computers, // simply render a subset of frames on each computer. it can be done by // choosing apropreate start_frame and end_frame settings for each computer. // The biggest disadvantage of this methode is that the render time per frame // (as wel as the processing power of the used computers) can vary a lot. // This results is that some comuters have finished ther part (and do nothing) // while other computers still have a few days to go... // // This render system contains a "master" (this file) that distributes the // to-be-rendered frames to the "clients" one by one, untill all frames are // rendered. this way all computers will be used untill the whole animation // is compleated. // // appart from a small dos batch file that repeatetly runs in povray, its all // SDL, so you should be able to change it to your needs. :-) // .----------------------------------------------------------------------. // | Canges: | // '----------------------------------------------------------------------' // v0.02: -now detecting faild render (skip frame) // -updated batch files (added %clinet_name% setting) // -now counting frames rendered by each client // -calculate ETA // -updated comments // .----------------------------------------------------------------------. // | How it works: | // '----------------------------------------------------------------------' // // mater <-> client communication: // very simple: it is done with files in a shared folder on a windows network. // all clients need to have read and write permission in this folder. // // It all starts with the master_loop.bat batch file. This batch file // contains a simple loop that repeatedly "renders" the master.pov file. // you should run this _ONLY_ on the master computer. // // master.pov keeps track of the render farm status using a simple textfile. // the textfile mainly indicates which frame each client is rendering. // master.pov assigns frames to the clients by generating a batch file with // the name of the client, containing a render command for that frame. // com_.bat // // Each client runs its own copy of the client_loop.bat batch file. This // batch file moves the render command to the client, runs it and copies the // rendered image back to the shared folder. a file "res_.txt" // is created to indicate a failed or sucessful render. // // master.pov detects that a client is ready by testing for the // "res_.txt" file. then it assigns a new frame to that client. // // needed for time/date functions: #version unofficial MegaPov 1.21; // .----------------------------------------------------------------------. // | usage: | // '----------------------------------------------------------------------' // Make up a name for each computer and put it in this list: #declare clients = 3; // (max 10 in winXP!, see notes below) #declare client_name = array[clients] { "pc_red", "pc_green", "pc_blue" // (note the missing comma after the last entry) } // put the following batch file on each client. (call it client_loop.bat) /* --- client_loop.bat --- (cut here) ---- @echo on REM -- Settings: set client_name=pc_red set local_dir=c:\render_farm\ set shared_dir=z:\render_farm\ :loop @echo -------------------- %client_name% -------------------- sleep 1 REM -- check for a new command: IF EXIST "%shared_dir%com_%client_name%.bat" GOTO com_found @echo no command avalable goto loop :com_found REM -- move the render command to this client: copy "%shared_dir%com_%client_name%.bat" "%local_dir%" /y del "%shared_dir%com_%client_name%.bat" REM -- run the render command: @echo Rendering . . . call com_%client_name%.bat REM -- delete the finished render command: (prevents running it again) del "%local_dir%com_%client_name%.bat" REM -- is there a result? IF EXIST "*.png" GOTO png_found REM -- return ERROR to master: @echo ERROR ! echo 0, > "%shared_dir%res_%client_name%.txt" goto loop :png_found REM -- move the resulting image to the shared directory: copy "%local_dir%*.png" "%shared_dir%" del "%local_dir%*.png" REM -- return DONE to master: @echo Done. echo 1, > "%shared_dir%res_%client_name%.txt" goto loop */ //--- (upto here) ---- // change the client name to a uniek name on each computer // set the local and shared directory // you will also need the sleep.exe DOS utility !! // set the render settings for the animation: // "test.pov" -> test001.png .. test199.png #declare animation_name = "test"; // the pov-file you want to render, without ".pov" !! #declare animation_first_frame = 1; #declare animation_last_frame = 199; // options without "+SF" or "+EF" !!!!!: //(i don't know why I needed the "+L". it's already in the povray.ini file) #declare animation_render_options= "+w720 +h400 +a0.3 -j +KF8.0 +KFF199 +FN +L\"C:\\jaap\\render_farm\\POV-Ray for Windows v3.6\\INCLUDE\""; #declare client_povray_location = "C:\\jaap\\render_farm\\POV-Ray for Windows v3.6\\bin\\megapov.exe"; #declare status_file_name = "render_farm_status.txt"; // the master computer runs (renders) master.pov repeatedly using // the following batch file. (call it master_loop.bat) /* --- master_loop.bat --- (cut here) ---- @echo off REM -- Settings: set local_povray=C:\render_farm\POV-Ray for Windows v3.6\bin\megapov.exe :loop sleep 1 REM -- try to assign frames to clients: "%local_povray%" master.pov +w10 +h10 -a -j -D -F REM -- rename new assignments from .tmp to .bat: REM (to prevent race conditions) call rename_list.bat goto loop */ //--- (upto here) ---- // That's it !! you are now ready to start your render farm :-) // // start the client_loop.bat on each client first. // finally start the master_loop.bat on the master computer. // .----------------------------------------------------------------------. // | maximum number of connected computers: | // '----------------------------------------------------------------------' // // There is a very hard limit on the number of connections in windows XP. // Apparently to sell more copies of windows server... the limit is only 10. // To make matters worse, there also was a bug in XP that caused connections // to expire to slow, so it was very easy to reach the maximum. this bug // was fixed in SP2: http://support.microsoft.com/kb/815432 // This really sick limit is asking for equally sick solutions: // In order of preference: (I have not tested any of these) // -put the shared folder on a win98 box (upgrade from winXP to win98...) // -make multiple shared volumes and change master.pov to put the render // commands (and look for PNG's) in the first share for clients 1..9 and // use the next share for clients 10..18, etc. // -run SAMBA on a Linux box (SAMBA should be on the Knoppix live CD) // -make a super-master.pov, 10 master.pov's and 100 clients ? // -use some kind of network storage device // - // -(pay the ransom for a copy of the "unrestricted" windows server) // .----------------------------------------------------------------------. // | notes: (PLEASE READ): | // '----------------------------------------------------------------------' // // -you can open the render_farm_status.txt at any time using notepad // (notepad will open the file as read-only). so you can see what the next // (non-rendered) frame is and which frame each client is rendering. // -it's setup for PNG output images. // -it's setup for megapov, you only need to change the client_povray_location // to make it work with the official pov-ray // -you probably need to disable I/O restrictions (only for the shared folder?) // -each client has it's own copy of the povray code that you want to render. // (local copy is faster anyway) // -before starting: // -old render_farm_status.txt file should be deleted !!! // -the output directories in the clients should not contain _any_ PNG's // -the clients should not contain old render commands // -the shared directory should not contain PNG's with the same filename. // -the shared directory should not contain old render commands // -you should probably not run the client on the master computer. it would // result in two instances of povray running on the same computer. // -you can use a slow computer as master, since it doesn't do any rendering. // -it's best to start the loop.bat files from a dos-box. that way the dos-box // is not closed if there is an error, so you can read the error :-) // -in case of problems, remove the "@echo off" from the batch files, to see // which command is causing the error. // -the loops can only be stoped manually ( +C ) or close the dos-box. // they don't stop automatically if the animation is rendered. // -povray must be installed in the same path on each client // -I installed povray+megapov in c:\render_farm\ (can be anywhere) // -I needed to use a very short pov-ray name to keep the name of the rendered // images (including the frame number!) within 8 chracters. it's probably a // windows megapov limmitation, I haven't noticed it in the Linux version. // -possible (big?) rewrite: don't define a table with clients names here, // let each client indicate it's own name in some way. That would make it // possible to add and remove clients while rendering. // -timeouts? if a client does not render a frame in a reasonable time, send // the same frame to a different client. (do this after all other frames // are rendered?) // .----------------------------------------------------------------------. // |----------------------------------------------------------------------| // '----------------------------------------------------------------------' #debug " -=- -=- -=- -=- -=- -=- -=- -=- -=- -=- -=- -=- -=- -=-\n" // .----------------------------------------------------------------------. // | get current time since start of year in seconds: (megapov only!) | // '----------------------------------------------------------------------' // (stats may be wrong around newyear...) #declare time_now = (((val(date("%j"))*24+val(date("%H")))*60)+val(date("%M"))*60)+val(date("%S")); // .----------------------------------------------------------------------. // | read the status file: | // '----------------------------------------------------------------------' #declare status_first_frame = 0; #declare client_working = array[clients]; // 0=waiting 1=working #declare client_frame = array[clients]; // frame number currentlly in process by client #declare not_used = "not_used"; #declare status_start_time = 0; #declare client_frames = array[clients]; // number of completed frames #declare client_time = array[clients]; // time spend on the compleated frames. #if(file_exists(status_file_name)=1) // file exists: read it: #fopen status_file status_file_name read #read (status_file, not_used, status_first_frame, status_start_time) #declare client=0; #while (client < clients) #read (status_file, not_used, val1, val2, val3, val4) #declare client_working[client] = val1; #declare client_frame[client] = val2; #declare client_frames[client] = val3; #declare client_time[client] = val4; #declare client=client+1; #end #fclose status_file #else // file does not exist: start with clear status: #declare status_first_frame = animation_first_frame; #declare status_start_time = time_now; #declare client=0; #while (client < clients) #declare client_working[client] = 0; #declare client_frame[client] = 0; #declare client_frames[client] = 0; #declare client_time[client] = 0.0; #declare client=client+1; #end #end // .----------------------------------------------------------------------. // | check if any of the clients are ready: | // '----------------------------------------------------------------------' // if a client has returned its output image, it's ready :-) #fopen bat_rename "rename_list.bat" write //(to delete res_*.txt files and rename com_*.bat ) #declare result=0; #declare client=0; #while (client < clients) #if(client_working[client] = 1) #declare result_file_name = concat("z:\\jaap\\render_farm\\res_",client_name[client],".txt"); #if(file_exists(result_file_name)=1) #fopen result_file result_file_name read #read (result_file, result) // 0=error 1=done #fclose result_file // delete result file: #write(bat_rename, "del \"",result_file_name,"\"\n") #if(result = 1) #debug concat(client_name[client],": (",str(client_frames[client],0,0),") Compleated frame ",str(client_frame[client],0,0),".\n") #else #debug concat(client_name[client],": (",str(client_frames[client],0,0),") ERROR in frame ",str(client_frame[client],0,0),". (skiped)\n") // write list of skipped frames?? send to other client ?? #end #declare client_working[client] = 0; #declare client_frame[client] = 0; #declare client_frames[client]= client_frames[client] + 1; #declare client_time[client] = time_now - status_start_time; #else #debug concat(client_name[client],": (",str(client_frames[client],0,0),") Rendering frame ",str(client_frame[client],0,0),".\n") #end #end #declare client=client+1; #end // .----------------------------------------------------------------------. // | generate new command batch files for clients that are ready: | // '----------------------------------------------------------------------' // The file is generated with a .tmp extension, and later renamed, // to prevent that a client reads an incompleat command. #declare client=0; #while (client < clients) #if(status_first_frame > animation_last_frame) #debug "all frames are done !! (currently rendering the last frame(s) )\n" #declare client=9999; // (break out of loop) #else #if(client_working[client] = 0) #debug concat("assigned frame ",str(status_first_frame,0,0)," to client ",client_name[client],"\n") // generate new command batch file: #declare tmp_file_name = concat("com_",client_name[client],".tmp"); #declare bat_file_name = concat("com_",client_name[client],".bat"); #fopen bat_file tmp_file_name write #write(bat_file, "\"",client_povray_location,"\" ",animation_name,".pov ",animation_render_options) #write(bat_file, " +SF",str(status_first_frame,0,0)," +EF",str(status_first_frame,0,0),"\n") #fclose bat_file // add it to the rename_list batch file: #write(bat_rename, "rename \"",tmp_file_name,"\" \"",bat_file_name,"\"\n") // update the client status: #declare client_working[client] = 1; #declare client_frame[client] = status_first_frame; #declare status_first_frame = status_first_frame + 1; #end #end #declare client=client+1; #end #fclose bat_rename // .----------------------------------------------------------------------. // | calculate ETA: | // '----------------------------------------------------------------------' #macro print_time ( tm ) #declare tm_day = floor(tm/(24*60*60)); #declare tm = tm - tm_day*24*60*60; #declare tm_h = floor(tm/(60*60)); #declare tm = tm - tm_h*60*60; #declare tm_m = floor(tm/60); #declare tm_s = tm - tm_m*60; #debug concat(str(tm_day,3,0),"d ",str(tm_h,-2,0),"h ",str(tm_m,-2,0),"m ",str(tm_s,-2,0),"s") #end #declare total_client_frames=0; #declare total_client_time =0; #declare client=0; #while (client < clients) #declare total_client_frames = total_client_frames + client_frames[client]; #declare total_client_time = total_client_time + client_time[client]; #declare client=client+1; #end #if(total_client_frames = 0) #declare sec_per_frame = ((((99*24)+23)*60)+59)*60+59; //(unknown) #declare expected_total_time = ((((99*24)+23)*60)+59)*60+59; //(unknown) #declare ETA = ((((99*24)+23)*60)+59)*60+59; //(unknown) #else #declare sec_per_frame = total_client_time / total_client_frames; #declare animation_frames = animation_last_frame - animation_first_frame + 1; #declare expected_total_time = (animation_frames * sec_per_frame) / clients; #declare ETA = expected_total_time - (time_now - status_start_time); #end #debug "time per frame: " print_time(sec_per_frame) #debug ".\n" #debug "time : " print_time(time_now - status_start_time) #debug ".\n" #debug "total: " print_time(expected_total_time) #debug ".\n" #debug "ETA : " print_time(ETA) #debug ".\n" #debug " -=- -=- -=- -=- -=- -=- -=- -=- -=- -=- -=- -=- -=- -=-\n" // .----------------------------------------------------------------------. // | write the status file: | // '----------------------------------------------------------------------' #fopen status_file status_file_name write #write(status_file, "\"next_frame:\",",status_first_frame,",",status_start_time,",\n") #declare client=0; #while (client < clients) #write(status_file,"\"",client_name[client],"\",",client_working[client],",",client_frame[client],",") #write(status_file,client_frames[client],",",client_time[client],",\n") #declare client=client+1; #end #fclose status_file //end