/*****************************************************************************
 *                                  S S R 1
 *****************************************************************************
 *
 *   PROGRAM ID:        SSR1.C
 *
 *   AUTHOR:            Glynne Casteel
 *                      GlynneC@ix.netcom.com
 *
 *                      This software IS copyrighted, BUT you may use it freely
 *                      PROVIDED THAT you send me a copy of any commercial or
 *                      shareware product that incorporates this code.
 *
 *
 *   DATE:              March 10, 1995
 *
 *   DESCRIPTION:
 *
 *      This is Sun Lin-ping's Scaled SR1 method as described in:
 *           J Comp Math, Vol 12 No 4 (1994), p.380
 *
 *      This article extends the ideas developed in a previous paper by
 *      Sun Lin-ping & M Osborne.
 *
 *
 *      As I understand it, this is basically Davidon's 1975 algorithm
 *      within the context of Oren's Self-Scaling update framework.
 *
 *      The paper introduces a number of really neat tricks, including
 *      a Cholesky factorization of the update matrix, which is much
 *      cleaner than anything I've read before....due to the fact that
 *      he's working with a rank one update.
 *
 *
 *
 *   INPUT PARAMETERS:  None
 *
 *   RETURN/EXIT VALUE: Number of iterations required for solution
 *
 *   INPUT FILES:       None
 *
 *   OUTPUT FILES:      None
 *
 *   COMPILE/LINK:      Microsoft C 6.0 compatable compiler
 *
 *
 *   SPECIAL NOTES:     None
 *
 *****************************************************************************
 *                           MODIFICATION LOG
 *
 *   DATE          NAME                DESCRIPTION
 *   ------------  ------------------  ----------------------------------
 *
 ******************************************************************************/



/*
   When the COLLINS_EXTERNS flag is turned on it puts the following
   types of definitions into the source:

    extern unsigned long enStep;

   In the application the flag isn't on so declaration occurs.
   These variables are used to communicate progress of a subroutine
   to the application.
*/
#define COLLINS_EXTERNS   1
#include <collins.h>

// locally global variables
static OPTFXN F;
static GRADF grad;
static int nLoops;
static float **C;
static float Cscale, Xscale, *gpfTemp1, *gpfTemp2;

// local functions and macros
#define TINY   1.0e-10
static void opt( int n, float x[], float tol, float g[], float s[],
                 float dg[], float w[], float Cw[] );
static void gradF( int n, float x[], float F0, float g[] );
static int  UseScalarUpdate( int n, float a, float b, float e, float *g, float *dg );


/*
   ******* HERE IS THE ENTRY POINT INTO THIS MODULE *********
*/
int ssr1( int n, float x[], OPTFXN FXN, GRADF DFXN, float tol )
/*
   This subroutine is just a dummy to allocate the workspace w
   which is needed by the following subroutines.

   This subroutine also assigns user defined functions to
   (locally) global pointers, so we don't have to keep passing
   them to each lower level.
*/
{
float **w;

  w= matrix( 1, n+7, 1, n );
  F= FXN;

  // Ignore DFXN parameter, even if user has supplied one
  // The reason is that this algorithm uses a scaled gradient.
  // (The scale factor is the Cholesky matrix).
  grad= gradF;

  if( tol <= 0.0 )
      tol= 1.0e-6;

  // Unlike most of my routines, the iteration matrix is not
  // passed as a parameter to opt(), because it is also needed
  // in the scaled gradient routine gradF() and I didn't want
  // to change the typedef for GRADF....
  C= &w[7];
  // I also need these variables in gradF()...
  gpfTemp1= w[6];
  gpfTemp2= w[7];

  opt( n, x, tol, w[1], w[2], w[3], w[4], w[5] );
  free_matrix( w, 1, n+7, 1, n );

  return( nLoops );
}




static void opt( int n, float x[], float tol, float g[], float s[],
                 float dg[], float w[], float Cw[] )

/* ----------------------VARIABLE KEY-----------------------------
   n          length of independent (vector) variable

   x          independent variable-- on input it contains
              the initial guess;  on output, the solution

   F          objective function (to be supplied by user!!)

   g          stores value of gradF call

   s          the search direction

   dg         the change in g between steps

   t          scale factor applied to s

   C          Cholesky factor of Hessian matrix

   w          vector used to update C

   Cw         the vector dot product    C.w

   gg, gdg,   the scalar dot products   g.g, g.dg
   dgdg,                                dg.dg

------------------------------------------------------------------
*/
{
int nRestartCriteria;
int i, j, k, bRestart, bConverged;
float F0, F1, t, gg, gdg, dgdg, a,b,c, z, theta, theta_, trace, trace_, u, u_;
float ww, ww_, CwCw, CwCw_;
int nUpdateMethod;

// I'm using a single epsilon value, Lin-ping uses two plus a sigma
// value.  I am taking them all to be 1e-4, which is the value he
// suggests for sigma:
float e= 1e-4;

// aliases
float *x_temp, *w_, *Cw_;

// init aliases
  x_temp= gpfTemp1;
  Cw_= gpfTemp1;
  w_= gpfTemp2;


// initializations
  nRestartCriteria= max(n, 10);
  nRestartCriteria *= n;
  nLoops= 0;
  bRestart= nRestartCriteria;
  // Explicitly check for convergence here??
  bConverged= 0;


// main loop
  while(  !bConverged )
  {
      if( bRestart >= nRestartCriteria )
      {
          gg= 0.0;

          for( k=1 ; k<=n ; k++ )
          {
              for( i=k ; i<=n ; i++ )
                  C[i][k]= C[k][i]= 0.0;
              C[k][k]= 1.0;
          }

          for( Xscale=0.0, k=1 ; k<=n ; k++ )
              Xscale += x[k]*x[k];
          Xscale= sqrt( Xscale/n );
          Cscale= sqrt( 1.0000/n );
          F1= 0.0;
          F0= F( n, x );
          grad( n, x, F0, g );

          // other initializations
          for( k=1 ; k<=n ; k++ )
          {
              s[k]= -g[k];
              dg[k]= g[k];
              gg += g[k]*g[k];
          }
          t= 10.0;
          bRestart= 0;
          //printf( "\nRestarted..........\n" );
      }
      //printf( "%5d  F=%16.8e,  Cscale=%16.8e, Xscale=%16.8e\r", nLoops, F0, Cscale, Xscale );
      printf( "%5d  F=%16.8e\r", nLoops, F0 );

      {
      // ******************************************************************
      // Cut-back search starting at t=1

          F1= F0;
          t *= 20.0;
          //if( t > 1.5 )
          //    t *= 20.0;
          //else
          //    t= 2.00;

          do
          {
              for( t=0.5*t, i=1 ; i<=n ; i++ )
                  x_temp[i]= x[i] +t*s[i];
              F0= F( n, x_temp );

          } while( F0 > F1 - e*t*gg );
          for( Xscale=0.0, i=1 ; i<=n ; i++ )
          {
              x[i]= x_temp[i];
              Xscale += x[i]*x[i];
          }
          Xscale= sqrt( Xscale/n );
      // ******************************************************************
      }

      grad( n, x, F0, g );
      for( i=1 ; i<=n ; i++ )
          dg[i]= g[i] -dg[i];


// Exit test
      //if( fabs(F1-F0) <= 0.5*tol*(fabs(F1)+fabs(F0)+TINY) )
      if( fabs(F0) <= tol || nLoops > 500 )
          bConverged= 1;
      else
// else keep going....
      {
          // calculate dot products
          gg= gdg= dgdg= 0.0;
          for( i=1 ; i<=n ; i++ )
          {
              gg   +=  g[i] *  g[i];
              gdg  +=  g[i] * dg[i];
              dgdg += dg[i] * dg[i];
          }

          nUpdateMethod= 5;

          //if( gdg < -e*sqrt(gg*dgdg) )
          if( gdg < -G_EPS*sqrt(gg*dgdg) )
          {
              // calc a,b,c
              a= dgdg;
              b= -t*gdg;
              c= t*t*gg;


              //if( b-a > e*sqrt((c-2*b+a)*(a)) )
              if( b-a > G_EPS*sqrt((c-2*b+a)*(a)) )
              {
                  nUpdateMethod= 6;
                  // use theta=1 & update C matrix
                  theta= 1.0;
                  u= ( sqrt((c-b)/(b-a)) -1.0 ) / (c-2*b+a);

                  for( i=1 ; i<=n ; i++ )
                      w[i]= -dg[i] -t*g[i];

                  for( i=1 ; i<=n ; i++ )
                  {
                      Cw[i]= 0.0;
                      for( k=1 ; k<=n ; k++ )
                          Cw[i]= C[i][k] * w[k];
                  }

                  for( Cscale=0.0, i=1 ; i<=n ; i++ )
                  {
                      for( k=1 ; k<=n ; k++ )
                      {
                          C[i][k] += u * Cw[i] * w[k];
                          Cscale += C[i][k] * C[i][k];
                      }
                  }
                  Cscale= sqrt(Cscale) / n;
              }
              else if( UseScalarUpdate(n,a*t/b,b,e,g,dg) )
              {
                  nUpdateMethod= 7;

                  // scale C matrix
                  z= sqrt( b/a );
                  for( i=1 ; i<=n ; i++ )
                  {
                      for( k=1 ; k<=n ; k++ )
                          C[i][k] *= z;
                  }
                  Cscale *= t;
              }
              else
              {
                  nUpdateMethod= 8;

                  // choose between theta_ & theta
                  // update C matrix
                  z= c/b;
                  theta_= z - sqrt( z*z - c/a );
                  theta = z + sqrt( z*z - c/a );

                  u_= ( sqrt(theta_*(c-b*theta_)/(b-a*theta_)) - theta_ )
                      / ( c - 2*b*theta_ + a*theta_*theta_ );
                  u = ( sqrt(theta*(c-b*theta)/(b-a*theta)) - theta )
                      / ( c - 2*b*theta + a*theta*theta );

                  for( ww= ww_=0.0, i=1 ; i<=n ; i++ )
                  {
                      w[i] = -dg[i] - t*g[i]/theta;
                      w_[i]= -dg[i] - t*g[i]/theta_;

                      ww  += w[i] * w[i];
                      ww_ += w_[i]* w_[i];
                  }

                  for( i=1 ; i<=n ; i++ )
                  {
                      Cw[i]= Cw_[i]= 0.0;
                      for( CwCw=CwCw_=0.0, k=1 ; k<=n ; k++ )
                      {
                          Cw[i]  += C[i][k] * w[k];
                          Cw_[i] += C[i][k] * w_[k];

                          CwCw  += Cw[i]  * Cw[i];
                          CwCw_ += Cw_[i] * Cw_[i];
                      }
                  }

                  Cscale= n*n*Cscale*Cscale;
                  trace = theta  * ( Cscale + theta *u *(2+theta *u *ww) *CwCw );
                  trace_= theta_ * ( Cscale + theta_*u_*(2+theta_*u_*ww_)*CwCw_ );

                  // The reason we did all of that.....pick the values corresponding
                  // to the smallest trace, ie smallest Frobenius norm.
                  if( trace < trace_ )
                  {
                      trace_= trace;
                      theta_= theta;
                      u_= u;

                      // recall Cw_ & w_ are just pointer aliases
                      Cw_= Cw;
                      w_ = w;
                  }

                  u_ *= theta_;
                  theta_= sqrt( theta_ );
                  for( Cscale=0.0, i=1 ; i<=n ; i++ )
                  {
                      for( k=1 ; k<=n ; k++ )
                      {
                          C[i][k]= theta_ * (C[i][k] + u_*Cw_[i]*w_[k]);
                          Cscale += C[i][k] * C[i][k];
                      }
                  }
                  Cscale= sqrt( Cscale ) / n;

                  // Reset aliases
                  Cw_= gpfTemp1;
                  w_= gpfTemp2;

              }

              //printf( "\n Update Method %d  in step %5d\t\t\t\n", nUpdateMethod, nLoops );
              bRestart= 0;
          }
          else
          {
              // don't update C matrix at all
              // But don't do this too many consecutive times!!
              bRestart++;
          }


    // ******************************************************************************
          // calc new s[]
          for( i=1 ; i<=n ; i++ )
          {
              for( s[i]=0.0, k=1 ; k<=n ; k++ )
                   s[i] += -C[i][k] * g[k];
          }

          // dump off old g[] into dg[]
          for( i=1 ; i<=n ; i++ )
              dg[i]= g[i];

          nLoops++;

          //if( nLoops%(20*n*n) == 0 )
          //    bRestart= 1;
      }
  }

  if( !bConverged )
      nLoops= -nLoops;
}


static int  UseScalarUpdate( int n, float a, float b, float e, float *g, float *dg )
{
int i, k;
float *Cw;

  Cw= gpfTemp1;
  for( b= 0.0, i=1 ; i<=n ; i++ )
  {
      Cw[i]= 0.0;
      for( k=1 ; k<=n ; k++ )
          Cw[i] += C[i][k] * (dg[k]+a*g[k]);
      b += Cw[i]*Cw[i];
  }

  if( sqrt(b) < e )
      return(1);  // use scalar update for C
  else
      return(0);  // don't use scalar update
}



// Had to doctor this routine up to use the C matrix columns
static void gradF( int n, float x[], float F0, float g[] )
{
int i, k;
float a, *x_temp;

  x_temp= gpfTemp1;

  // Use trace of C matrix & magnitude of x vector for scaling
  if( Xscale < 1.0 )
      a= G_EPS / Cscale;
  else
      a= G_EPS * Xscale / Cscale;


  for( i=1 ; i<=n ; i++ )
  {
      // Finite differencing
      for( k=1 ; k<=n ; k++ )
      {
          x_temp[k]= x[k] + a*C[k][i];
      }

      g[i]= ( F(n,x_temp) -F0 ) / a;
  }

}

