17 #define TACHYON_INTERNAL 1 25 #if !defined(_MSC_VER) 38 MPI_Request * requests;
39 MPI_Status * statuses;
47 #if !defined(USE_MPI_IN_PLACE) 48 #if (MPI_VERSION >= 2) || defined(MPI_IN_PLACE) 49 #define USE_MPI_IN_PLACE 1 79 ph->comm = MPI_COMM_WORLD;
93 static void rt_par_comm_info(
parhandle *ph, MPI_Comm *caller_comm) {
96 MPI_Comm_rank(MPI_COMM_WORLD, &ph->
worldrank);
97 MPI_Comm_size(MPI_COMM_WORLD, &ph->
worldsize);
98 MPI_Comm_rank(*caller_comm, &ph->
callrank);
99 MPI_Comm_size(*caller_comm, &ph->
callsize);
100 MPI_Comm_rank(ph->comm, &ph->
commrank);
101 MPI_Comm_size(ph->comm, &ph->
commsize);
120 MPI_Init(argc, argv);
124 ph->comm = MPI_COMM_WORLD;
125 rt_par_comm_info(ph, &ph->comm);
134 MPI_Comm *caller_comm = (MPI_Comm *) mpicomm;
135 if (caller_comm != NULL) {
140 ph->comm = *caller_comm;
141 rt_par_comm_info(ph, &ph->comm);
153 MPI_Comm comm = MPI_COMM_WORLD;
162 MPI_Comm *caller_comm = (MPI_Comm *) mpicomm;
163 if (caller_comm != NULL) {
168 MPI_Comm_split(*caller_comm, color, key, &ph->comm);
169 rt_par_comm_info(ph, caller_comm);
183 MPI_Comm *caller_comm = (MPI_Comm *) mpicomm;
184 if (caller_comm != NULL) {
188 MPI_Comm_free(&ph->comm);
194 ph->comm = *caller_comm;
195 rt_par_comm_info(ph, &ph->comm);
210 MPI_Comm comm = MPI_COMM_WORLD;
220 int color,
int key) {
224 MPI_Comm *caller_comm = (MPI_Comm *) mpicomm;
225 if (caller_comm != NULL) {
229 MPI_Comm_free(&ph->comm);
235 MPI_Comm_split(*caller_comm, color, key, &ph->comm);
236 rt_par_comm_info(ph, caller_comm);
248 int color,
int key) {
252 MPI_Comm comm = MPI_COMM_WORLD;
266 MPI_Comm comm = MPI_COMM_WORLD;
267 MPI_Comm_rank(comm, &myrank);
291 MPI_Comm_free(&ph->comm);
324 MPI_Barrier(ph->comm);
334 char namebuf[MPI_MAX_PROCESSOR_NAME];
337 *nodes = (nodeinfo *) malloc(numnodes *
sizeof(nodeinfo));
339 (*nodes)[mynode].cpuspeed = 1.0;
340 (*nodes)[mynode].nodespeed = (*nodes)[mynode].numcpus *
341 (*nodes)[mynode].cpuspeed;
342 (*nodes)[mynode].cpucaps = NULL;
346 MPI_Get_processor_name((
char *) &namebuf, &namelen);
347 strncpy((
char *) &(*nodes)[mynode].machname, namebuf,
348 (((namelen + 1) < 511) ? (namelen+1) : 511));
349 #if defined(USE_MPI_IN_PLACE) 350 MPI_Allgather(MPI_IN_PLACE,
sizeof(nodeinfo), MPI_BYTE,
351 &(*nodes)[ 0],
sizeof(nodeinfo), MPI_BYTE,
354 MPI_Allgather(&(*nodes)[mynode],
sizeof(nodeinfo), MPI_BYTE,
355 &(*nodes)[ 0],
sizeof(nodeinfo), MPI_BYTE,
361 #if defined(_MSC_VER) 362 strcpy((*nodes)[mynode].machname,
"Windows");
364 strcpy((*nodes)[mynode].machname,
"Mercury");
366 gethostname((*nodes)[mynode].machname, 511);
370 free((*nodes)[mynode].cpucaps);
399 p = malloc(
sizeof(pardata));
405 p->requests = malloc(
sizeof(MPI_Request)*count);
406 p->statuses = malloc(
sizeof(MPI_Status)*count);
407 p->indices = malloc(
sizeof(
int)*count);
418 pardata *p = (pardata *) voidhandle;
420 if (p->requests != NULL)
423 if (p->statuses != NULL)
426 if (p->indices != NULL)
448 if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB24) {
450 unsigned char *imgbuf = (
unsigned char *) scene->img;
453 for (i=0; i<scene->vres; i++) {
455 addr = i * scene->hres * 3;
456 MPI_Recv_init(&imgbuf[addr], scene->hres * 3, MPI_BYTE,
457 i % ph->
commsize, i+1, ph->comm, &p->requests[p->count]);
464 for (i=0; i<scene->vres; i++) {
466 addr = i * scene->hres * 3;
467 MPI_Send_init(&imgbuf[addr], scene->hres * 3, MPI_BYTE,
468 0, i+1, ph->comm, &p->requests[p->count]);
474 }
else if (scene->imgbufformat == RT_IMAGE_BUFFER_RGB96F) {
476 float *imgbuf = (
float *) scene->img;
479 for (i=0; i<scene->vres; i++) {
481 addr = i * scene->hres * 3;
482 MPI_Recv_init(&imgbuf[addr], scene->hres * 3, MPI_FLOAT,
483 i % ph->
commsize, i+1, ph->comm, &p->requests[p->count]);
490 for (i=0; i<scene->vres; i++) {
492 addr = i * scene->hres * 3;
493 MPI_Send_init(&imgbuf[addr], scene->hres * 3, MPI_FLOAT,
494 0, i+1, ph->comm, &p->requests[p->count]);
514 pardata *p = (pardata *) voidhandle;
518 MPI_Startall(p->count, p->requests);
529 pardata *p = (pardata *) voidhandle;
531 MPI_Waitall(p->count, p->requests, p->statuses);
543 pardata *p = (pardata *) voidhandle;
548 if (p->haveinited != 0 || p->havestarted != 0) {
549 for (i=0; i<p->count; i++) {
550 MPI_Request_free(&p->requests[i]);
563 pardata *p = (pardata *) voidhandle;
575 pardata *p = (pardata *) voidhandle;
578 #if MPI_TUNE == 0 || !defined(MPI_TUNE) 585 int numtotest = (numtorecv < (p->count - p->curmsg)) ?
586 numtorecv : (p->count - p->curmsg);
589 printf(
"Internal Tachyon MPI error, tried to recv zero/negative count!\n");
593 MPI_Testsome(numtotest, &p->requests[p->curmsg], &outcount,
594 &p->indices[p->curmsg], &p->statuses[p->curmsg]);
595 p->curmsg += numtorecv;
601 MPI_Testany(p->count, p->requests, &index, &flag, p->statuses);
607 MPI_Testall(p->count, p->requests, &flag, p->statuses);
613 for (i=1; i<p->nodes; i++)
614 MPI_Testany(p->count, p->requests, &index, &flag, p->statuses);
617 if (p->curmsg >= p->count) {
618 printf(
"Internal Tachyon MPI error, tried to send oob count!\n");
621 MPI_Start(&p->requests[p->curmsg]);
void rt_par_free_reqbuf(rt_parhandle voidparhandle, rt_parbuf voidhandle)
int rt_par_finish(rt_parhandle voidhandle)
void rt_par_barrier_sync(rt_parhandle voidhandle)
static void rt_par_comm_default(parhandle *ph)
int rt_par_set_mpi_comm(rt_parhandle voidhandle, void *mpicomm)
void rt_par_start_scanlinereceives(rt_parhandle voidparhandle, rt_parbuf voidhandle)
rt_parhandle rt_par_init_nompi(void)
int rt_par_set_mpi_comm_split(rt_parhandle voidhandle, void *mpicomm, int color, int key)
void rt_par_delete_scanlinereceives(rt_parhandle voidparhandle, rt_parbuf voidhandle)
rt_parhandle rt_par_init_mpi_comm(void *mpicomm)
void rt_par_waitscanlines(rt_parhandle voidparhandle, rt_parbuf voidhandle)
Tachyon cross-platform thread creation and management, atomic operations, and CPU feature query APIs...
int rt_par_rank(rt_parhandle voidhandle)
int rt_par_set_mpi_comm_world_split(rt_parhandle voidhandle, int color, int key)
int rt_par_getcpuinfo(rt_parhandle voidhandle, nodeinfo **nodes)
int rt_par_set_mpi_comm_world_split_all(rt_parhandle voidhandle)
int rt_par_sendrecvscanline_get_totalrows(rt_parhandle voidparhandle, rt_parbuf voidhandle)
int rt_thread_numprocessors(void)
number of processors available, subject to user override
rt_parhandle rt_par_init_mpi_comm_split(void *mpicomm, int color, int key)
Tachyon cross-platform timers, special math function wrappers, and RNGs.
int rt_cpu_capability_flags(rt_cpu_caps_t *cpucaps)
CPU optional instruction set capability flags.
int rt_par_size(rt_parhandle voidhandle)
void rt_par_sendrecvscanline(rt_parhandle voidparhandle, rt_parbuf voidhandle)
void * rt_par_init_scanlinereceives(rt_parhandle voidhandle, scenedef *scene)
rt_parhandle rt_par_init_mpi_comm_world(void)
rt_parhandle rt_par_init(int *argc, char ***argv)
void * rt_par_allocate_reqbuf(rt_parhandle voidhandle, int count)
Tachyon public API function prototypes and declarations used to drive the ray tracing engine...
int rt_par_set_mpi_comm_world(rt_parhandle voidhandle)