// Laplacian benchmark for Array class

#ifndef NDEBUG
#define NDEBUG
#endif

#include "Array.h"

// gcc compilation:
// g++ -funroll-loops -O3 -DNDEBUG laplacian.cc

// cxx compilation:
// cxx -D__USE_STD_IOSTREAM -fast -O4 -DNDEBUG laplacian.cc

// Make sure __restrict isn't disabled in some header file!
#undef __restrict

// Uncomment for compilers without a __restrict keyword:
//#define __restrict

// restrict isn't (yet) in the C++ standard; many compilers support __restrict
#define restrict __restrict

using namespace Array;
using namespace std;

// restrict guarantees that there are no (relevant) data aliases:
typedef array1<double>::opt restrict vector;

int main()
{
  int i,j,k,n,m;
  n=m=4000;
  array2<double> u(n,m), L(n,m);

  for(i=0; i < n; i++) {
    vector ui=u[i];
    for(j=0; j < m; j++) {
      ui[j]=i*i+j*j;
    }
  }

  for(k=0; k < 10; k++) {
    for(i=1; i < n-1; i++) {
      vector Li=L[i];
      vector uim1=u[i-1];
      vector ui=u[i];
      vector uip1=u[i+1];
      for(j=1; j < m-1; j++) {
// Optimized Laplacian:	
	Li[j]=-uim1[j]-ui[j-1]+4.0*ui[j]-ui[j+1]-uip1[j];
// Equivalent forms:
//	L[i][j]=-u[i-1][j]-u[i][j-1]+4.0*u[i][j]-u[i][j+1]-u[i+1][j];
//	L(i,j)=-u(i-1,j)-u(i,j-1)+4.0*u(i,j)-u(i,j+1)-u(i+1,j);
      }
    }
  }
	
#if 0
  double sum=0.0;
  for(i=1; i < n-1; i++) {
    vector Li=L[i];
    for(j=1; j < m-1; j++) {
      sum += Li[j];
    }
  }

  std::cout << sum << endl;
#endif
	
  return 0;
}

