/*****************************************************************************
 *                                  N L E C G
 *****************************************************************************
 *
 *   PROGRAM ID:        NLECG.C
 *
 *   AUTHOR:            Glynne Casteel
 *                      GlynneC@ix.netcom.com
 *
 *                      This software IS copyrighted, BUT you may use it freely
 *                      PROVIDED THAT you send me a copy of any commercial or
 *                      shareware product that incorporates this code.
 *
 *
 *   DATE:              June 11, 1994
 *
 *   DESCRIPTION:
 *
 *      This is the only Conjugate Gradient I have ever seen that
 *      directly solves a system of NLEs.  Not only that, but it
 *      has no line search!!  I found it in the following article:
 *
 *                  AT Chronopoulos, J Comp Appl Math 40(1992) p.73
 *                                                   [June 92]
 *
 *      The name nlecg= Non Linear Equations, Conjugate Gradient
 *      the name cgchr=  Conjugate Gradient, Chronopoulos
 *
 *
 *   INPUT PARAMETERS:  None
 *
 *   RETURN/EXIT VALUE: Number of iterations required for solution
 *
 *   INPUT FILES:       None
 *
 *   OUTPUT FILES:      None
 *
 *   COMPILE/LINK:      Microsoft C 6.0 compatable compiler
 *
 *
 *   SPECIAL NOTES:     None
 *
 *****************************************************************************
 *                           MODIFICATION LOG
 *
 *   DATE          NAME                DESCRIPTION
 *   ------------  ------------------  ----------------------------------
 *
 ******************************************************************************/



/*
   When the COLLINS_EXTERNS flag is turned on it puts the following
   types of definitions into the source:

    extern unsigned long enStep;

   In the application the flag isn't on so declaration occurs.
   These variables are used to communicate progress of the subroutine
   to the application.
*/
#define COLLINS_EXTERNS   1
#include <collins.h>


// locally global variables
static NLFXNS fxn;
static int nLoops;
static float *f;

// local functions and macros
#define TINY  1.0e-10
static void cgchr( int n, float x[], float tol, float s[],
                   float Jf[], float Js[] );
static float fSquared( int n, float x[] )
{
float ff;

  fxn( n, x, f );
  for( ff=0 ; n>0 ; n-- )
      ff += f[n]*f[n];
  return( ff );
}




/*
   ******* HERE IS THE ENTRY POINT INTO THIS MODULE *********
*/

int nlecg( int n, float x[], NLFXNS fUser, float tol )
/*
   This subroutine is just a dummy to allocate the (3*n)
   dimensional workspace w which is needed by the following
   subroutines. This is a holdover from the FORTRAN roots of
   this library. In FORTRAN, you can't dynamically allocate
   an array, so the calling program must create a "work array"
   of a certain size. Then you use clever function parameter
   mapping to divvy up this array into all of the vectors and
   matrices needed in the library module.

   This subroutine also assigns user defined functions to
   (locally) global pointers, so we don't have to keep passing
   them to each lower level.
*/
{
float **w;
  w= matrix( 1, 3, 1, n );
  f= vector( 1, n );
  fxn= fUser;

  if( tol <= 0.0 )
      tol= 1.0e-6;

  cgchr( n, x, tol, w[1], w[2], w[3] );
  free_matrix( w, 1, 3, 1, n );
  free_vector( f, 1, n );

  return( nLoops );
}









static void cgchr( int n, float x[], float tol, float s[],
                   float Jf[], float Js[] )
{
float b, c, ss, xx, ff, ff0, fJf, JfJf, JsJs, JfJs, eps;
int   i, j, k, bConverged, nMaxLoops;


/* ----------------------VARIABLE KEY--------------------------------
   n       length of independent (vector) variable
   x       independent variable-- on input it contains
               the initial guess;  on output, the soln
   f       stores value of previous function (fxn) call
   ff      the scalar dot product f.f
   s       the direction of the correction
   fJf     the scalar dot product   f.J.f
   JfJf    the scalar dot product   J.f.J.f
   JsJs    the vector dot product   J.s.J.s
   JfJs    the vector dot product   J.f.J.s

---------------------------------------------------------------------
*/

// initizations
  fxn( n, x, f );
  for( ff=xx=0, k=1 ; k<=n ; k++ )
  {
      ff += f[k]*f[k];
      xx += x[k]*x[k];
      s[k]= -f[k];

      x[k] += G_EPS *f[k];
  }
  ss= ff;

  b= 0.0;
  JfJf= 0.0;
  fxn( n, x, Jf );
  for( k=1 ; k <=n ; k++ )
  {
      Jf[k]= ( Jf[k]-f[k] ) / G_EPS;
      JfJf += Jf[k]*Jf[k];
      Js[k]= 0;

      x[k] -= G_EPS *f[k];
  }

  nMaxLoops= n*max(10,n) +1;
  if( nMaxLoops < 500 )
      nMaxLoops= 500;
  nLoops= 0;
  bConverged= 0;


// main loop
  while( bConverged<2 && nLoops<nMaxLoops )
  {
      nLoops++;
      printf( "%5d\r", nLoops );

      ff0= ff;
      if( nLoops <= 10 )
      {
          linmin( n, x, fSquared, s, &ff );

          for( xx=ss=0, k=1 ; k <=n ; k++ )
          {
              xx += x[k]*x[k];
              ss += s[k]*s[k];
          }
      }
      else
      {
          for( fJf=0, k=1 ; k <=n ; k++ )
              fJf += f[k] * Jf[k];

          c= fJf/JfJf; // Chronopoulos says this is the asymptotic value
                       // but I've never done this many nLoops!

          for( k=1 ; k <=n ; k++ )
          {
              s[k] *= c;
              x[k] += s[k];
          }

          fxn( n, x, f );

          for( ff=xx=ss=0, k=1 ; k <=n ; k++ )
          {
              ff += f[k]*f[k];
              xx += x[k]*x[k];
              ss += s[k]*s[k];
          }
      }


// /****/ printf( "ConjGrad: F= %e\n", ff );
// /****/ printf( "\t  x= (%e %e)\n", x[1], x[2] );
// /****/ printf( "\t  JsJs= %e, JfJs= %e, JfJf= %e\n", JsJs, JfJs, JfJf );
// /****/ printf( "\t  s= (%e %e)\n", s[1], s[2] );


      if( fabs(ff-ff0)<= 0.5*tol*(ff+ff0+TINY) )
          bConverged++;
      else if( fabs(ff-ff0)<= 0.5*tol*(2+ff+ff0+TINY) )
          bConverged++;
      else if( ff < tol )
          bConverged= 2;
      else
      {
          bConverged= 0;

          // calc Jf & JfJf
          eps= sqrt( xx );
          if( eps< 1.0 )  eps= 1.0;
          eps *= G_EPS;
          eps /= sqrt( ff );

          for( k=1 ; k <=n ; k++ )
              x[k] += eps *f[k];
          fxn( n, x, Jf );
          JfJf= 0.0;
          for( k=1 ; k <=n ; k++ )
          {
              x[k] -= eps *f[k];
              Jf[k]= ( Jf[k]-f[k] ) / eps;
              JfJf += Jf[k]*Jf[k];
          }

/**/
          // calc Js, JsJs, & JfJs
          eps *= sqrt( ff/ss );
          for( k=1 ; k <=n ; k++ )
              x[k] += eps *s[k];
          fxn( n, x, Js );
          JsJs= 0.0;
          JfJs= 0.0;
          for( k=1 ; k <=n ; k++ )
          {
              x[k] -= eps *s[k];
              Js[k]= ( Js[k]-f[k] ) / eps;
              JsJs += Js[k]*Js[k];
              JfJs += Jf[k]*Js[k];
          }

// Here is one of Chronopoulos' shortcuts that doesn't seem to work.
/*
          // calc Js, JfJs, & JsJs
          JsJs= 0.0;
          JfJs= 0.0;
          for( k=1 ; k <=n ; k++ )
          {
              Js[k]= b*Js[k] -Jf[k];
              JsJs += Js[k]*Js[k];
              JfJs += Jf[k]*Js[k];
          }
*/

          // calc b & update s
          b= JfJs/JsJs;
          for( k=1 ; k <=n ; k++ )
              s[k]= b*s[k] -f[k];

      } // endif-elseif-else

  } // end while(not converged)



  if( bConverged < 2 )
      nLoops= -nLoops;
}