/*
 * parametric.cpp - Parametric surface plotting example scene.
 *
 * (C) Copyright 2013-2022 John E. Stone
 * SPDX-License-Identifier: BSD-3-Clause
 *
 * $Id: parametric.cpp,v 1.8 2022/03/23 06:41:57 johns Exp $
 *
 */

/**
 *  \file parametric.cpp
 *  \brief Parametric surface plotting example scene.
 *
 */

//
// Parametric surface plotting example
// John E. Stone, Dec 2021
//

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "ProfileHooks.h"

#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"

#include "TachyonOptiX.h"

#if defined(TACHYON_USEPINNEDMEMORY)
#include <cuda_runtime.h>
#endif


void HSItoRGB(float h, float s, float i, float &r, float &g, float &b) {
  float t=2.0f * M_PI * h;
  float scale = i / 2.0f;
  r=(1.0f + s*sin(t - 2.0f*M_PI/3.0f)) * scale;
  g=(1.0f + s*sin(t)) * scale;
  b=(1.0f + s*sin(t + 2.0f*M_PI/3.0f)) * scale;
}


template <typename F>
float *parametric_lambda(int udiv, float umin, float umax,
                         int vdiv, float vmin, float vmax,
                         F function) {
  int numpts = udiv * vdiv;
#if defined(TACHYON_USEPINNEDMEMORY)
  float *coords;
  cudaMallocHost(&coords, numpts * sizeof(float) * 3);
#else
  float *coords = new float[numpts * 3];
#endif
  float uinc = (umax-umin) / udiv;
  float vinc = (vmax-vmin) / vdiv;

  int iu, iv;
  float u = umin;
  for (iu=0; iu<udiv; iu++,u+=uinc) {
    int ind = 3*iu*vdiv;
    float v = vmin;
    for (iv=0; iv<vdiv; iv++,v+=vinc,ind+=3) {
      function(coords+ind, u, v);
    }
  }

  return coords;
}


float * parametric_grid_verts(const char *surftype, int udiv, int vdiv, int &wrapmesh) {
  const float twopi = 2.0f * M_PI;

  // wrap mesh by default, unless disabled
  wrapmesh = 1;

  // Bour minimal surface example: 
  //   https://en.wikipedia.org/wiki/Bour%27s_minimal_surface
  //   https://doc.sagemath.org/html/en/reference/plot3d/sage/plot/plot3d/parametric_plot3d.html
  if (!strcmp(surftype, "bour")) {
    wrapmesh = 0;
    return parametric_lambda(udiv, 0.0f, 0.2f * twopi, 
                             vdiv, 0.0f, twopi,
      [=](float *coords, float u, float v) {
        const float r = 2.0f;

        float sinv, cosv;
        sincosf(v, &sinv, &cosv);

        coords[0] = r * ( u * cosv - 0.5f * (u*u)*cosf(2.0f * v));
        coords[1] = r * (-u * sinv * (u * cosv + 1.0f));
        coords[2] = r * (4.0f/3.0f * powf(u, 1.5f) * cosf(3.0f * v * 0.5f));
      }
    );
  }


  // Sage bowtie example: 
  //   https://doc.sagemath.org/html/en/reference/plot3d/sage/plot/plot3d/parametric_plot3d.html
  if (!strcmp(surftype, "bowtie")) {
    return parametric_lambda(udiv, -M_PI, M_PI, 
                             vdiv, -M_PI, M_PI,
      [=](float *coords, float u, float v) {
        const float r = 1.0f;

        float sinu, cosu, sinv, cosv;
        sincosf(u, &sinu, &cosu);
        sincosf(v, &sinv, &cosv);

        const float sqrt2 = 1.41421356237309504880f;  // sqrtf(2.0f);
        coords[0] = r * sinu / (sqrt2 + sinv);
        coords[1] = r * sinu / (sqrt2 + cosv);
        coords[2] = r * cosu / (1.0f + sqrt2);
      }
    );
  }


  // Sage example: 
  //   https://doc.sagemath.org/html/en/reference/plot3d/sage/plot/plot3d/parametric_plot3d.html
  if (!strcmp(surftype, "chiclet")) {
    return parametric_lambda(udiv, 0.0f, twopi, 
                             vdiv, -M_PI, M_PI,
      [=](float *coords, float u, float v) {
        const float r = 1.5f;

        float sinu, cosu, sinv, cosv;
        sincosf(u, &sinu, &cosu);
        sincosf(v, &sinv, &cosv);

        coords[0] = 0.5f * r * (sinu + cosv);
        coords[1] = r * cosu;
        coords[2] = r * sinv;
      }
    );
  }


  // Cone
  if (!strcmp(surftype, "cone")) {
    return parametric_lambda(udiv, 0.0f, twopi, 
                             vdiv, 0.0f, twopi,
      [=](float *coords, float u, float v) {
        const float r = 2.0f;

        float sinu, cosu;
        sincosf(u, &sinu, &cosu);

        coords[0] = r * sinu * sinf(v);
        coords[1] = r * sinf(v);
        coords[2] = r * cosu * sinf(v);
      }
    );
  }


  // Klein bottle:
  //   https://de.wikipedia.org/wiki/Kleinsche_Flasche#Beschreibung_im_3-dimensionalen_Raum
  if (!strcmp(surftype, "klein")) {
    wrapmesh = 0;
    float unowrapscale = udiv/float(udiv-1); // scale hack
    float vnowrapscale = vdiv/float(vdiv-1); // scale hack

    return parametric_lambda(udiv, 0.0f, twopi, 
                             vdiv, 0.0f, twopi,
      [=](float *coords, float u, float v) {
        float u2 = u * unowrapscale;
        float v2 = v * vnowrapscale;

        float sinu, cosu, sinv, cosv;
        sincosf(u2, &sinu, &cosu);
        sincosf(v2, &sinv, &cosv);

        const float r = 2.0f - cosu;
        coords[0] = 0.4f * (2.0f * (1.0f - sinu) * cosu + 
                    r * cosv * (2.0f * expf(-powf(u2/2.0f - M_PI, 2.0f)) - 1.0f));
        coords[1] = 0.4f * (-(6.0f * sinu + 
                    0.5f * r * sinu * cosv * expf(-powf(u2 - 3.0f * M_PI * 0.5f, 2.0f))));
        coords[2] = 0.4f * (r * sinv);
      }
    );
  }


  // Plane
  if (!strcmp(surftype, "plane")) {
    wrapmesh = 0;

    return parametric_lambda(udiv, -M_PI, M_PI, 
                             vdiv, -M_PI, M_PI,
      [=](float *coords, float u, float v) {
        const float r = 1.0f;
        coords[0] = r * u;
        coords[1] = r * v;
        coords[2] = 0.5f;
      }
    );
  }


  // A parametric seashell:
  //   https://www.chebfun.org/examples/geom/ParametricSurfaces.html
  if (!strcmp(surftype, "shell")) {
    wrapmesh = 0;
    float unowrapscale = udiv/float(udiv-1); // scale hack
    float vnowrapscale = vdiv/float(vdiv-1); // scale hack

    return parametric_lambda(udiv, 0.0f, twopi, 
                             vdiv, 0.0f, twopi,
      [=](float *coords, float u, float v) {
        float u3 = u * unowrapscale * 3.0f;
        float v2 = v * vnowrapscale;
        float coshalfvsqr = cosf(v2 * 0.5f);
        coshalfvsqr *= coshalfvsqr;
        float expu_6f = expf(u3/(6.0f * M_PI));
        coords[0] = 2.0f * ( 1.0f - expu_6f) * cosf(u3) * coshalfvsqr;
        coords[1] = -5.5f -(-expf(u3/(3.0f * M_PI)) - sinf(v2) + expf(u3/(6.0f * M_PI)) * sinf(v2));
        coords[2] = 2.0f * (-1.0f + expu_6f) * sinf(u3) * coshalfvsqr;
      }
    );
  }


  // Sphere
  if (!strcmp(surftype, "sphere")) {
    float vnowrapscale = vdiv/float(vdiv-1); // scale hack

    return parametric_lambda(udiv, 0.0f, twopi, 
                             vdiv, M_PI*0.5f, 1.5f*M_PI,
      [=](float *coords, float u, float v) {
        const float r = 2.0f;
        float v2 = v * vnowrapscale;
        coords[0] = r * sinf(u) * cosf(v2);
        coords[1] = r * sinf(v2);
        coords[2] = r * cosf(u) * cosf(v2);
      }
    );
  }
 
 
  // Sage Dini's spiral example: 
  //   https://doc.sagemath.org/html/en/reference/plot3d/sage/plot/plot3d/parametric_plot3d.html
  if (!strcmp(surftype, "spiral")) {
    wrapmesh = 0;

    return parametric_lambda(udiv, 0.0f, twopi, 
                             vdiv, 0.1f, 2.0f,
      [=](float *coords, float u, float v) {
        const float r = 2.0f;
        float u2 = u * 2.0f;
        float v2 = v;

        float sinu, cosu, sinv, cosv;
        sincosf(u2, &sinu, &cosu);
        sincosf(v2, &sinv, &cosv);

        coords[0] = r * cosu * sinv;
        coords[1] = 1.0 + -r * ((cosv + logf(tanf(v2 * 0.5f))) + 0.2f*u2);
        coords[2] = r * sinu * sinv;
      }
    );
  }


  if (!strcmp(surftype, "spiral2")) {
    wrapmesh = 0;
    float unowrapscale = udiv/float(udiv-1); // scale hack
    float vnowrapscale = vdiv/float(vdiv-1); // scale hack

    return parametric_lambda(udiv, 0.0f, twopi, 
                             vdiv, 0.0f, twopi,
      [=](float *coords, float u, float v) {
        const float r = 0.5f;
        float u2 = u * unowrapscale * 1.4f;
        float v2 = v * vnowrapscale * 0.4f;

        float sinu, cosu, sinv, cosv;
        sincosf(u2, &sinu, &cosu);
        sincosf(v2, &sinv, &cosv);

        coords[0] = -1.0f + r * u2 * sinu * cosv;
        coords[1] = -3.0f + r * u2 * sinv;
        coords[2] = r * u2 * cosu * cosv;
      }
    );
  }


  if (!strcmp(surftype, "torus")) {
    return parametric_lambda(udiv, 0.0f, twopi, 
                             vdiv, 0.0f, twopi,
      [=](float *coords, float u, float v) {
        float sinu, cosu, sinv, cosv;
        sincosf(u, &sinu, &cosu);
        sincosf(v, &sinv, &cosv);

        coords[0] = 2.0f * cosu * (1.0f - cosv * 0.5f);
        coords[1] = 2.0f * sinv * 0.5f;
        coords[2] = 2.0f * sinu * (1.0f - cosv * 0.5f);
      }
    );
  }


  // Sage trefoil example: 
  //   https://doc.sagemath.org/html/en/reference/plot3d/sage/plot/plot3d/parametric_plot3d.html
  if (!strcmp(surftype, "trefoil")) {
    return parametric_lambda(udiv, -M_PI, M_PI, 
                             vdiv, -M_PI, M_PI,
      [=](float *coords, float u, float v) {
        const float r = 0.7f;
        float v2 = 2.0f * v;
        float v3 = 3.0f * v;
        coords[0] = r * ((4.0f * (1.0f + 0.25f*sinf(v3)) + cosf(u))*cosf(v2));
        coords[1] = r * ((4.0f * (1.0f + 0.25f*sinf(v3)) + cosf(u))*sinf(v2));
        coords[2] = r * (sinf(u) + 2.0f*cosf(v3));
      }
    );
  }


  if (!strcmp(surftype, "trefoil2")) {
    return parametric_lambda(udiv, 0.0f, twopi, 
                             vdiv, 0.0f, twopi,
      [=](float *coords, float u, float v) {
        const float r = 2.0f;
        float u2 = 2.0f * u;
        float u3 = 3.0f * u;
        const float twothirdspi = M_PI * 2.0f / 3.0f;
        coords[0] = r * sinf(u3) / (2.0f + cosf(v));
        coords[1] = r / 2.0f * (cosf(u) - 2.0f * cosf(u2)) * (2.0f + cosf(v)) * (2.0f + cosf(v + twothirdspi)) / 4.0f;
        coords[2] = r * (sinf(u) + 2.0f * sinf(u2)) / (2.0f + cosf(v + twothirdspi)) / 4.0f;
      }
    );
  }


  // Mathematica example:
  //   https://mathematica.stackexchange.com/questions/37698/how-to-plot-a-certain-surface-what-is-its-parametric-equation
  if (!strcmp(surftype, "twisty")) {
    return parametric_lambda(udiv, 0.0f, twopi, 
                             vdiv, 0.0f, twopi,
      [=](float *coords, float u, float v) {
        const float r = 0.5f;
        float u3 = 3.0f * u;
        float v3 = 3.0f * v;
        coords[0] = r * cosf(v) * (6.0f - (1.25f + sinf(u3)) * sinf(u - v3));
        coords[2] = r * (6.0f - (1.25f + sinf(u3)) * sinf(u - v3)) * sinf(v);
        coords[1] = r * -cosf(u - v3) * (1.25f + sinf(u3));
      }
    );
  }


  // Vase
  if (!strcmp(surftype, "vase")) {
    return parametric_lambda(udiv, 0.0f, twopi, 
                             vdiv, -M_PI, 2.0f/3.0f*M_PI,
      [=](float *coords, float u, float v) {
        float sinu, cosu;
        sincosf(u, &sinu, &cosu);
        coords[0] = cosu;
        coords[1] = v;
        coords[2] = sinu;
      }
    );
  }

  
  return NULL; // no surface name was recognized, bail out...
}


float * parametric_grid_colors3f(int udiv, int vdiv) {
  int numpts = udiv * vdiv;
#if defined(TACHYON_USEPINNEDMEMORY)
  float *colors;
  cudaMallocHost(&colors, numpts * sizeof(float) * 3);
#else
  float *colors = new float[numpts * 3];
#endif

  for (int iu=0; iu<udiv; iu++) {
    for (int iv=0; iv<vdiv; iv++) {
      int i = iu*vdiv + iv;
      int ind = 3*i;

      float h = float(i) / float(numpts);
      float s = 1.0f;
//      // stripes
//      float l = (fmodf(h*40.0f, 1.0f) > 0.5f) ? 0.85f : 0.35f;
      float l = 1.0f;

      HSItoRGB(h, s, l, colors[ind], colors[ind + 1], colors[ind + 2]);
    }
  }

  return colors;
}


float * parametric_grid_colors4f(int udiv, int vdiv) {
  int numpts = udiv * vdiv;
#if defined(TACHYON_USEPINNEDMEMORY)
  float *colors;
  cudaMallocHost(&colors, numpts * sizeof(float) * 4);
#else
  float *colors = new float[numpts * 4];
#endif

  for (int iu=0; iu<udiv; iu++) {
    for (int iv=0; iv<vdiv; iv++) {
      int i = iu*vdiv + iv;
      int ind = 4*i;

      float h = float(i) / float(numpts);
      float s = 1.0f;
//      // stripes
//      float l = (fmodf(h*40.0f, 1.0f) > 0.5f) ? 0.85f : 0.35f;
      float l = 1.0f;

      HSItoRGB(h, s, l, colors[ind], colors[ind + 1], colors[ind + 2]);
      colors[ind + 3] = 1.0f;
    }
  }

  return colors;
}


int * parametric_quadmesh_indices(int udiv, int vdiv, int wrapmesh) {
  // if we're not doing wraparound, don't generate associated connections 
  int umax = (wrapmesh) ? udiv : (udiv-1);
  int vmax = (wrapmesh) ? vdiv : (vdiv-1);

  int numedges = umax * vmax;
#if defined(TACHYON_USEPINNEDMEMORY)
  int * quadmesh_indices;
  cudaMallocHost(&quadmesh_indices, numedges * sizeof(int) * 4);
#else
  int * quadmesh_indices = new int[numedges * 4];
#endif

  for (int iu=0; iu<umax; iu++) {
    for (int iv=0; iv<vmax; iv++) {
      int vertind = iu * vdiv + iv;
      int quadind = 4 * (iu * vmax + iv);
      int iunext = (iu+1) % udiv; // wrap mesh
      int ivnext = (iv+1) % vdiv; // wrap mesh

      // mesh needs to wrap around
      quadmesh_indices[quadind    ] = vertind;                  // v00 v00
      quadmesh_indices[quadind + 1] = iu * vdiv + ivnext;       // v01 v10
      quadmesh_indices[quadind + 2] = iunext * vdiv + ivnext;   // v11 v11
      quadmesh_indices[quadind + 3] = iunext * vdiv + iv;       // v10 v01
    }
  }

  return quadmesh_indices;
} 


int * parametric_trimesh_indices(int udiv, int vdiv, int wrapmesh) {
  // if we're not doing wraparound, don't generate associated connections 
  int umax = (wrapmesh) ? udiv : (udiv-1);
  int vmax = (wrapmesh) ? vdiv : (vdiv-1);

  int numedges = umax * vmax;
#if defined(TACHYON_USEPINNEDMEMORY)
  int * trimesh_indices;
  cudaMallocHost(&trimesh_indices, numedges * sizeof(int) * 2 * 3);
#else
  int * trimesh_indices = new int[numedges * 2 * 3];
#endif

  for (int iu=0; iu<umax; iu++) {
    for (int iv=0; iv<vmax; iv++) {
      int vertind = iu * vdiv + iv;
      int quadind = 6 * (iu * vmax + iv);
      int iunext = (iu+1) % udiv; // wrap mesh
      int ivnext = (iv+1) % vdiv; // wrap mesh

      // mesh needs to wrap around
      trimesh_indices[quadind    ] = vertind; 
      trimesh_indices[quadind + 1] = iu * vdiv + ivnext;
      trimesh_indices[quadind + 2] = iunext * vdiv + iv;

      trimesh_indices[quadind + 3] = iu * vdiv + ivnext;
      trimesh_indices[quadind + 4] = iunext * vdiv + iv;
      trimesh_indices[quadind + 5] = iunext * vdiv + ivnext;
    }
  }

  return trimesh_indices;
} 


int * parametric_wiremesh_indices(int udiv, int vdiv, int wrapmesh) {
  // if we're not doing wraparound, don't generate associated connections 
  int umax = (wrapmesh) ? udiv : (udiv-1);
  int vmax = (wrapmesh) ? vdiv : (vdiv-1);

  int numedges = umax * vmax;
#if defined(TACHYON_USEPINNEDMEMORY)
  int * wiremesh_indices;
  cudaMallocHost(&wiremesh_indices, numedges * sizeof(int) * 2 * 3);
#else
  int * wiremesh_indices = new int[numedges * 2 * 2];
#endif

  for (int iu=0; iu<umax; iu++) {
    for (int iv=0; iv<vmax; iv++) {
      int vertind = iu * vdiv + iv;
      int cylind = 4 * (iu * vmax + iv);
      int iunext = (iu+1) % udiv; // wrap mesh
      int ivnext = (iv+1) % vdiv; // wrap mesh

      // mesh needs to wrap around
      wiremesh_indices[cylind    ] = vertind; 
      wiremesh_indices[cylind + 1] = iu * vdiv + ivnext;

      wiremesh_indices[cylind + 2] = vertind; 
      wiremesh_indices[cylind + 3] = iunext * vdiv + iv;
    }
  }

  return wiremesh_indices;
}


//
// Draw parametric surface as a triangle mesh
//
void gen_trimesh(TachyonOptiX *rt, int udiv, int vdiv, int wrapmesh,
                 float *coords, float *colors, int *trimesh_indices, int mat) {
  int numpts = udiv * vdiv;

  // if we're not doing wraparound, don't generate associated connections 
  int umax = (wrapmesh) ? udiv : (udiv-1);
  int vmax = (wrapmesh) ? vdiv : (vdiv-1);
  int numedges = umax * vmax;

  TriangleMesh mesh;
  mesh.vertices.resize(numpts);
  mesh.vertcolors3f.resize(numpts);
  mesh.indices.resize(numedges*2);
  
  float3 *verts = mesh.vertices.data();
  float3 *cols = mesh.vertcolors3f.data();
  int3 *indices = mesh.indices.data();

  memcpy(verts, coords, numpts * 3 * sizeof(float));
  memcpy(cols, colors, numpts * 3 * sizeof(float));
  memcpy(indices, trimesh_indices, numedges * 2 * 3 * sizeof(int));

  rt->add_trimesh(mesh, mat);
}


//
// Draw parametric surface as a quad mesh
//
void gen_quadmesh(TachyonOptiX *rt, int udiv, int vdiv, int wrapmesh,
                  float *coords, float *colors, int *quadmesh_indices, int mat) {
  int numpts = udiv * vdiv;

  // if we're not doing wraparound, don't generate associated connections 
  int umax = (wrapmesh) ? udiv : (udiv-1);
  int vmax = (wrapmesh) ? vdiv : (vdiv-1);
  int numedges = umax * vmax;

  QuadMesh mesh;
  mesh.vertices.resize(numpts);
  mesh.vertcolors3f.resize(numpts);
  mesh.indices.resize(numedges);
  
  float3 *verts = mesh.vertices.data();
  float3 *cols = mesh.vertcolors3f.data();
  int4 *indices = mesh.indices.data();

  memcpy(verts, coords, numpts * 3 * sizeof(float));
  memcpy(cols, colors, numpts * 3 * sizeof(float));
  memcpy(indices, quadmesh_indices, numedges * 4 * sizeof(int));

  rt->add_quadmesh(mesh, mat);
}


//
// Draw points on parametric surface using spheres
//
void gen_spheresurf(TachyonOptiX *rt, int udiv, int vdiv, 
                    float *coords, float radius, float *colors, int mat) {
  int numpts = udiv * vdiv;

  SphereArray spheres;
  spheres.center.resize(numpts);
  spheres.radius.resize(numpts);
  spheres.primcolors3f.resize(numpts);

  float3 *verts = spheres.center.data();
  float *radii = spheres.radius.data();
  float3 *cols = spheres.primcolors3f.data();

  memcpy(verts, coords, numpts * 3 * sizeof(float));
  memcpy(cols, colors, numpts * 3 * sizeof(float));
  for (int i=0; i<numpts; i++)
    radii[i]=radius;   

  rt->add_spherearray(spheres, mat);
}



//
// Draw wire mesh with cylinders
//
void gen_wiremesh(TachyonOptiX *rt, int udiv, int vdiv, int wrapmesh,
                  float *coords, float radius, float *colors, 
                  int *wiremesh_indices, int mat) {
  int numpts = udiv * vdiv;

  // if we're not doing wraparound, don't generate associated connections 
  int umax = (wrapmesh) ? udiv : (udiv-1);
  int vmax = (wrapmesh) ? vdiv : (vdiv-1);
  int numedges = umax * vmax;

  CylinderArray cyls;
  cyls.start.resize(numedges);
  cyls.end.resize(numedges);
  cyls.radius.resize(numedges);
  cyls.primcolors3f.resize(numedges);

  float3 *vstart = cyls.start.data();
  float3 *vend = cyls.start.data();
  float *vrad = cyls.radius.data();
  float3 *cols = cyls.primcolors3f.data(); 

  memcpy(vstart, coords, numpts * 3 * sizeof(float));

  for (int i=0; i<numedges; i++) {
    int idx1 = 3 * wiremesh_indices[i*2];
    int idx2 = 3 * wiremesh_indices[i*2+1];
    vstart[i] = make_float3(coords[idx1], coords[idx1+1], coords[idx1+2]);
    vend[i] = make_float3(coords[idx2], coords[idx2+1], coords[idx2+2]);
    vrad[i] = radius;
    cols[i] = make_float3(colors[idx1], colors[idx1+1], colors[idx1+2]);
  }

  rt->add_cylarray(cyls, mat);
}



//
// Draw a quad or triangle mesh for the floor
//
void gen_floor(TachyonOptiX *rt, float width, float height, float length, int mat) {
  float vertex[] = {
      -100.0f,  4.0f, -100.0f,
      -100.0f,  4.0f,  100.0f,
       100.0f,  4.0f, -100.0f,
       100.0f,  4.0f,  100.0f
  };
  float color[] = {
      1.0f, 1.0f, 1.0f, 1.0f,
      1.0f, 1.0f, 1.0f, 1.0f,
      1.0f, 1.0f, 1.0f, 1.0f,
      1.0f, 1.0f, 1.0f, 1.0f
  };
  int index[] = {
      0, 1, 2,                                  // triangle-1
      1, 2, 3                                   // triangle-2
  };

  vertex[ 0] = -width / 2.0f;
  vertex[ 3] = -width / 2.0f;
  vertex[ 6] =  width / 2.0f;
  vertex[ 9] =  width / 2.0f;

  vertex[ 1] = height;
  vertex[ 4] = height;
  vertex[ 7] = height;
  vertex[10] = height;

  vertex[ 2] = -length / 2.0f;
  vertex[ 5] =  length / 2.0f;
  vertex[ 8] = -length / 2.0f;
  vertex[11] =  length / 2.0f;

  TriangleMesh mesh;
  mesh.vertices.resize(4);
  mesh.vertcolors3f.resize(4);
  mesh.indices.resize(2*3);

  float3 *verts = mesh.vertices.data();
  float3 *cols = mesh.vertcolors3f.data();
  int3 *indices = mesh.indices.data();

  memcpy(verts, vertex, 4 * 3 * sizeof(float));
  memcpy(cols, color, 4 * 3 * sizeof(float));
  memcpy(indices, index, 2 * 3 * sizeof(int));

  rt->add_trimesh(mesh, mat);
}



void print_coords(float *coords, int numpts, int count) {
  printf("Coordinate dump:\n");
  if (count > numpts)
    count = numpts;

  int i;
  for (i=0; i<count; i++) {
    int ind = i * 3;
    printf("[%d]: %.3f  %.3f  %.3f\n", 
           i, coords[ind], coords[ind+1], coords[ind+2]);

  }
  printf("...\n");
  for (i=numpts-count; i<numpts; i++) {
    int ind = i * 3;
    printf("[%d]: %.3f  %.3f  %.3f\n", 
           i, coords[ind], coords[ind+1], coords[ind+2]);
  }
}


int main(int argc, const char **argv) {
  PROFILE_INITIALIZE();

  // some sane defaults
  int imgSize[2] = {4096, 4096 }; // W x H
  const char *surftype = "twisty";
  int udiv = 80;
  int vdiv = 80; 
  int wireframe = 0; 
  int usequads = 0;
  int nofloor = 0;
  int nospheres = 0;
  float radius = 0.025f;
  int ambientocclusion = 1;
  int dumpcoords = 0;
  int nosurf = 0;
  int warmup = 0; 

  //
  // camera defaults
  //
#if 0
  float cam_pos[] =  {0.0f,  -6.0f,   -8.0f};   // look at origin from -Z
  float cam_up[] =   {0.0f,   1.0f,    0.0f};	// Y-up
  float cam_view[3];

  // look at origin
  float invlen = 1.0f / sqrtf(cam_pos[0]*cam_pos[0] +
                              cam_pos[1]*cam_pos[1] +
                              cam_pos[2]*cam_pos[2]);
  cam_view[0] = -cam_pos[0] * invlen;
  cam_view[1] = -cam_pos[1] * invlen;
  cam_view[2] = -cam_pos[2] * invlen;
#endif

  // 
  // parse args
  //
  if (argc == 1) {
    printf("Usage: %s surftype udivs vdivs [optional flags]\n", argv[0]);
    printf("  optional flags:  -nofloor: don't draw floor\n");
    printf("                   -nospheres: don't draw spheres\n");
    printf("                   -quads: draw surfaces using quads\n");
    printf("                   -wireframe: draw wireframe over surfaces\n");
    printf("                   -ao: add renderer-specific AO lighting\n");
    printf("                   -res XXXX YYYY: override default image res\n");
    return -1;
  }

  // parse surface type
  if (argc > 1) {
    surftype = argv[1];
  }

  // parse udiv/vdiv params
  if (argc > 3) {
    udiv = atoi(argv[2]);
    vdiv = atoi(argv[3]);
  }

  // parse remaining optional parameter flags
  if (argc > 4) {
    for (int i=4; i<argc; i++) {
      if (!strcmp("-wireframe", argv[i])) {
        wireframe = 1;
        radius = 0.00625f;
        printf("Drawing wireframe mesh over surface...\n");
        continue;
      }

      if (!strcmp("-quads", argv[i])) {
        usequads = 1;
        printf("Drawing surface using quads.\n");
        continue;
      }

      if (!strcmp("-nofloor", argv[i])) {
        nofloor = 1;
        printf("Drawing surface without floor.\n");
        continue;
      }

      if (!strcmp("-nosurf", argv[i])) {
        nosurf = 1;
        printf("Don't draw surface.\n");
        continue;
      }

      if (!strcmp("-nospheres", argv[i])) {
        nospheres = 1;
        printf("Drawing surface without sphere points.\n");
        continue;
      }

      if (!strcmp("-ao", argv[i])) {
        ambientocclusion = 1;
        printf("Enabling renderer-specific AO lighting.\n");
        continue;
      }

      if (!strcmp("-noao", argv[i])) {
        ambientocclusion = 0;
        printf("Disabling renderer-specific AO lighting.\n");
        continue;
      }

      if (!strcmp("-res", argv[i])) {
        if ((argc - i) >= 2) {
          imgSize[0] = atoi(argv[++i]);
          imgSize[1] = atoi(argv[++i]);
          printf("Image resolution set to: %d x %d\n", imgSize[0], imgSize[1]);
        }
        continue;
      }

#if 0
      if (!strcmp("-pause", argv[i])) {
        sleep(10);
      }
#endif

      if (!strcmp("-dumpcoords", argv[i])) {
        dumpcoords = 10;
        if ((argc - i) >= 2) {
          dumpcoords = atoi(argv[++i]);
        }      
      }

      if (!strcmp("-warmup", argv[i])) {
        warmup = 1;
        printf("Enabling profiling warm-up pass and timing.\n");
        continue;
      }

      printf("Unrecognized flag: '%s'.\n", argv[i]);
    }
  } 


#if defined(TACHYON_USEPINNEDMEMORY)
  printf("USING PINNED HOST MEMORY ALLOCATIONS\n");
#endif


  //
  // Compute parametric surface vertices, faces, colors, and normals
  //
  int numpts = udiv * vdiv;
  printf("Calculating parametric grid type: %s, udiv: %d, vdiv: %d, points: %d\n", surftype, udiv, vdiv, numpts);

  int wrapmesh = 0; // flag indicating 2-D mesh wraparound
  float *coords = parametric_grid_verts(surftype, udiv, vdiv, wrapmesh);
  if (!coords) {
    printf("Surface type '%s' unrecognized, exiting.\n", surftype);
    return -1;
  }

  if (dumpcoords) {
    print_coords(coords, numpts, dumpcoords);
  } 

  PROFILE_PUSH_RANGE("Calculate Mesh", 0);
  float *colors = parametric_grid_colors3f(udiv, vdiv);
//  float *colors = parametric_grid_colors4f(udiv, vdiv);
  int *quadmesh_indices = parametric_quadmesh_indices(udiv, vdiv, wrapmesh);
  int *trimesh_indices = parametric_trimesh_indices(udiv, vdiv, wrapmesh);
  int *wiremesh_indices = parametric_wiremesh_indices(udiv, vdiv, wrapmesh);
  PROFILE_POP_RANGE();

  if (!nosurf) {
    printf("Surface mesh contains %d quads, or %d triangles\n", 
           udiv*vdiv, udiv*vdiv*2);
  }
  if (!nospheres) {
    printf("Surface scene contains %d spheres\n", numpts);
  }

 
  PROFILE_PUSH_RANGE("Initialize Tachyon", 0);
  printf("Initializing TachyonOptiX...");

  /// static methods for querying OptiX-supprted GPU hardware independent
  /// of whether we actually have an active context.
  unsigned int devcount = TachyonOptiX::device_count();
  unsigned int optixversion = TachyonOptiX::optix_version();

  printf("Found %u OptiX devices\n", devcount);
  printf("OptiX version used for build: %d.%d.%d (%u)\n",
         optixversion/10000,
         (optixversion%10000)/100,
         (optixversion%100),
         optixversion);

  TachyonOptiX *rt = new TachyonOptiX();
  PROFILE_POP_RANGE();

  PROFILE_PUSH_RANGE("Build Scene", 0);
  //
  // Build scene
  //

  // create and setup camera
  rt->framebuffer_colorspace(RT_COLORSPACE_sRGB);
  rt->framebuffer_resize(imgSize[0], imgSize[1]);
//  rt->set_verbose_mode(TachyonOptiX::RT_VERB_MIN);
//  rt->set_verbose_mode(TachyonOptiX::RT_VERB_DEBUG);
  float rtbgcolor[] = { 1.0, 1.0, 1.0 };
  float rtbggradtopcolor[] = { 0.6, 0.0, 0.0 };
  float rtbggradbotcolor[] = { 0.0, 0.0, 0.6 };

  rt->set_bg_color(rtbgcolor);
  rt->set_bg_color_grad_top(rtbggradtopcolor);
  rt->set_bg_color_grad_bot(rtbggradbotcolor);

  float bggradient[] = { 0.0f, 1.0f, 0.0f };
  rt->set_bg_gradient(bggradient);
  rt->set_bg_gradient_topval(1.0f);
  rt->set_bg_gradient_botval(-1.0f);

//  rt->set_bg_mode(TachyonOptiX::RT_BACKGROUND_TEXTURE_SOLID);
rt->set_bg_mode(TachyonOptiX::RT_BACKGROUND_TEXTURE_SKY_SPHERE);
//  rt->set_bg_mode(TachyonOptiX::RT_BACKGROUND_TEXTURE_SKY_SPHERE);

  rt->set_aa_samples(16);
  rt->shadows_enable(1);

  if (ambientocclusion) {
    rt->set_ao_samples(16);
    rt->set_ao_ambient(0.9);
    rt->set_ao_direct(0.2);
    rt->set_ao_maxdist(100.2);
  }

  rt->camera_dof_enable(0);


  float lightdir0[] = { -0.5f, 0.5f, -1.0f };
  float lightcolor0[] = { 1.0f, 1.0f, 1.0f };
  rt->add_directional_light(lightdir0, lightcolor0);

  // set camera params
  rt->set_camera_type(TachyonOptiX::RT_PERSPECTIVE);
//  rt->set_camera_type(TachyonOptiX::RT_ORTHOGRAPHIC);

  float campos[3] = {0.f, 10.0f, 12.f};
  float camU[3]   = {1.0f, 0.0f, 0.0f};
  float camV[3]   = {0.0f, -1.0f, 0.0f};
  float camW[3]   = {0.0f,  0.0f, 1.0f};

  rt->set_camera_pos(campos);
  rt->set_camera_ONB(camU, camV, camW);

  float camat[3]   = {0.f, 0.f, 0.f};
  rt->set_camera_lookat(camat, camV);

  rt->set_camera_zoom(0.5f);
  rt->set_camera_dof_fnumber(64.0f);
  rt->set_camera_dof_focal_dist(0.7f);
  // set stereoscopic display parameters
  rt->set_camera_stereo_eyesep(0.6f);
  rt->set_camera_stereo_convergence_dist(10.0f);

  // set depth cueing parameters
  float start = 1.0f;
  float end = 30.0f;
  float density = 0.33f;
//    rt->set_cue_mode(TachyonOptiX::RT_FOG_LINEAR, start, end, density);
//    rt->set_cue_mode(TachyonOptiX::RT_FOG_EXP, start, end, density);
//    rt->set_cue_mode(TachyonOptiX::RT_FOG_EXP2, start, end, density);
  rt->set_cue_mode(TachyonOptiX::RT_FOG_NONE, start, end, density);

  int mat = 0;
  float ambient = 0.1f;
  float diffuse = 0.7f;
  float specular = 0.0f;
  float shininess = 0.0f;
  float reflectivity = 0.0f;
  float opacity = 1.0f;
  float outline = 0.0f;
  float outlinewidth = 0.0f;
  int transmode = 0;

  rt->add_material(ambient, diffuse, specular, shininess, reflectivity, 
                   opacity, outline, outlinewidth, transmode, mat);
  PROFILE_POP_RANGE();


  if (warmup) {
    rt->set_verbose_mode(TachyonOptiX::RT_VERB_MIN);
    PROFILE_PUSH_RANGE("Renderer Warmup Passes", 0);
    // force warmup passes on an empty scene so our timings of subsequent
    // scene data are way more realistic
    for (int w=0; w<100; w++) { 
      rt->render();
      rt->framebuffer_clear();
    }
    PROFILE_POP_RANGE();
  }

  rt->set_verbose_mode(TachyonOptiX::RT_VERB_TIMING);

  // XXX Start Nsight Compute Profiles here...
  PROFILE_START();

  PROFILE_PUSH_RANGE("Generate Scene", 0);
  if (!nospheres) {
    // Draw points on parametric surface using spheres
    gen_spheresurf(rt, udiv, vdiv, coords, radius, (wireframe) ? NULL : colors, mat);
  }

  if (!nosurf) {
    if (wireframe) {
      // Draw wire mesh with cylinders
      gen_wiremesh(rt, udiv, vdiv, wrapmesh, coords, radius, (wireframe) ? NULL : colors, wiremesh_indices, mat);
    }

    if (usequads) {
      // Draw parametric surface as a quad mesh
      gen_quadmesh(rt, udiv, vdiv, wrapmesh, coords, colors, quadmesh_indices, mat);
    } else {
      // Draw parametric surface as a triangle mesh
      gen_trimesh(rt, udiv, vdiv, wrapmesh, coords, colors, trimesh_indices, mat);
    }
  }

  if (!nofloor) {
    // Draw a quad or triangle mesh for the floor
    gen_floor(rt, 200.0f, -4.0f, 200.0f, mat);
  }
  PROFILE_POP_RANGE();

  PROFILE_PUSH_RANGE("Render Scene", 0);
  printf("Rendering frames w/ accumulation buffer...\n");
  // render 100 accumulated frames
//  for (int frames = 0; frames < 3; frames++) {
    rt->render();
//  }
  PROFILE_POP_RANGE();

  rt->print_raystats_info();

  PROFILE_PUSH_RANGE("Write Output Image", 0);

  char filename[1024];
  sprintf(filename, "parametric-%s-%d-%d.png", surftype, udiv, vdiv);
  printf("Writing accumulated frames to '%s'...\n", filename);
  if (filename != NULL) {
    rt->framebuffer_get_size(imgSize[0], imgSize[1]);
    size_t bufsz = imgSize[0] * imgSize[1] * sizeof(int);
    unsigned char *rgb4u = (unsigned char *) calloc(1, bufsz);
    rt->framebuffer_download_rgb4u(rgb4u);

#if 0
    if (writealpha) {
printf("Writing rgba4u alpha channel output image 2\n");
      if (write_image_file_rgba4u(filename, rgb4u, imgSize[0], imgSize[1]))
        printf("Failed to write image '%s'!!\n", filename);
    } else {
      if (write_image_file_rgb4u(filename, rgb4u, imgSize[0], imgSize[1]))
        printf("Failed to write image '%s'!!\n", filename);
    }
#else
    stbi_write_png(filename, imgSize[0], imgSize[1], 4, rgb4u, imgSize[0] * sizeof(int));
#endif
  
    free(rgb4u);
  }


#if defined(TACHYON_USEPINNEDMEMORY)
  cudaFreeHost(coords);
  cudaFreeHost(colors);
  cudaFreeHost(quadmesh_indices);
  cudaFreeHost(trimesh_indices);
  cudaFreeHost(wiremesh_indices);
#else
  delete [] coords;
  delete [] colors;
  delete [] quadmesh_indices;
  delete [] trimesh_indices;
  delete [] wiremesh_indices;
#endif

  delete rt;

  PROFILE_POP_RANGE();
  return 0;
}


