/* This work is a modification of code written by Jens Wetzl and Oliver Taubamann in 2012. 
 * The original work can be found here: https://github.com/jwetzl/CudaLBFGS 
 * This work is not endorsed by the authors. */

/**
 *   ___ _   _ ___   _     _       ___ ___ ___ ___
 *  / __| | | |   \ /_\   | |  ___| _ ) __/ __/ __|
 * | (__| |_| | |) / _ \  | |_|___| _ \ _| (_ \__ \
 *  \___|\___/|___/_/ \_\ |____|  |___/_| \___|___/
 *
 * File lbfgs.h: Interface of the minimizer.
 *               This is the core class of the library.
 **/

#ifndef LBFGS_H
#define LBFGS_H

#include <stdbool.h>

#include "cost_function.h"
#include "error_checking.h"
#include "timer.h"

class lbfgs
{
	public:
		lbfgs(cost_function& cf);
		~lbfgs();

		enum status {
			LBFGS_BELOW_GRADIENT_EPS,
			LBFGS_REACHED_MAX_ITER,
			LBFGS_REACHED_MAX_EVALS,
			LBFGS_LINE_SEARCH_FAILED,
			LBFGS_COLD_FUSION_DIAGNOSED
		};

		// Returns a string describing the status
		// indicated by the value of stat.
		static std::string statusToString(status stat);

		// Runs minimization of the cost function cf
		// using the L-BFGS method implemented in CUDA.
		//
		// d_x is the device memory location containing
		// the initial guess as cf.getNumberOfUnknowns()
		// consecutive doubles. On output, d_x will
		// contain the solution of argmin_x(cf.f(x)) if
		// minimization succeeded, or the last solution
		// found when minimization was aborted.
		//
		// Returns a status code indicating why minimization
		// has stopped, see also lbfgs::status and
		// lbfgs::statusToString.
		status minimize(double *d_x, double *energy, double *outrms, _Bool *debug, int *itdone, _Bool *cudatimet, int *ecalls, _Bool *coldfusiont, double *coldfusionlim);

		// Same as lbfgs::minimize.
		status gpu_lbfgs(double *d_x, double *energy, double *outrms, _Bool *debug, int *itdone, _Bool *cudatimet, int *ecalls, _Bool *coldfusiont, double *coldfusionlim);

		// The maximum number of iterations to be performed.
		//
		// Default value: 10000
		//
		size_t getMaxIterations() const         { return m_maxIter;    }
		void   setMaxIterations(size_t maxIter) { m_maxIter = maxIter; }

		// The maximum number of function and gradient evaluations
		// (combined) to be performed.
		//
		// Default value: (unbounded)
		//
		size_t getMaxEvaluations() const          { return m_maxEvals;     }
		void   setMaxEvaluations(size_t maxEvals) { m_maxEvals = maxEvals; }

		// The gradient epsilon represents a threshold for determining
		// if the current solution is already good enough.
		//
		// Default value: 10^-4
		//
		double getGradientEpsilon() const            { return m_gradientEps;        }
		void  setGradientEpsilon(double gradientEps) { m_gradientEps = gradientEps; }

		// The maximum step size allowed (checked in line search).  
		//
		// Default value: 0.2
		//
		double getMaxStep() const            { return m_maxStep;        }
		void  setMaxStep(double maxStep) { m_maxStep = maxStep; }

		// The maximum rise in energy allowed (checked in line search).  
		//
		// Default value: 10^-4
		//
		double getMaxFRise() const            { return m_maxFRise;        }
		void  setMaxFRise(double maxFRise) { m_maxFRise = maxFRise; }

		// True = atom coords active, false = rigid coords active  
		//
		// Default value: true 
		//
		bool getAtomRigidT() const            { return m_atomRigidT;      }
		void setAtomRigidT(bool atomRigidT)   { m_atomRigidT = atomRigidT;}

		// The initial guess for the diagonal elements of the inverse Hessian
		//
		// Default value: 0.1
		//
		double getDGuess() const            { return m_dGuess;        }
		void  setDGuess(double dGuess) { m_dGuess = dGuess; }

		// The number of degrees of freedom = 6 * nRigidBody + 3 * additional atoms
		//
		// Default value: 0 
		//
		int getDegFreedoms() const            { return m_degFreedoms;      }
		void setDegFreedoms(int degFreedoms)   { m_degFreedoms = degFreedoms;}

		// The number of rigid bodies 
		//
		// Default value: 0 
		//
		int getnRigidBody() const            { return m_nRigidBody;      }
		void setnRigidBody(int nRigidBody)   { m_nRigidBody = nRigidBody;}

		// The maximum number of sites in a rigid body
		//
		// Default value: 0 
		//
		int getMaxSite() const            { return m_maxSite;      }
		void setMaxSite(int maxSite)   { m_maxSite = maxSite;}

		// Defines how many solution and gradient vector updates are 
		// kept to estimate the inverse Hessian during optimization
		//
		// Default value: 4
		//
		double getMUpdate() const            { return m_mUpdate;        }
		void  setMUpdate(double mUpdate) { m_mUpdate = mUpdate; }

		// Device pointer, the number of rigid bodies
		//
		// Default value: Not set if rigid body framework not used 
		//
		int* getDevnRigidSites() const            { return m_d_nRigidSites;      }
		void setDevnRigidSites(int* d_nRigidSites)   { m_d_nRigidSites = d_nRigidSites;}

		// Device pointer, list of atoms in rigid bodies, need a file called rbodyconfig
		//
		// Default value: Not set if rigid body framework not used 
		//
		int* getDevRigidGroups() const            { return m_d_RigidGroups;      }
		void setDevRigidGroups(int* d_RigidGroups)   { m_d_RigidGroups = d_RigidGroups;}

		// Device pointer, coordinates of the rigid body sites
		//
		// Default value: Not set if rigid body framework not used 
		//
		double* getDevSitesRigid() const            { return m_d_SitesRigid;      }
		void setDevSitesRigid(double* d_SitesRigid)   { m_d_SitesRigid = d_SitesRigid;}

		// Device pointer, list of atoms not in rigid bodies
		//
		// Default value: Not set if rigid body framework not used 
		//
		int* getDevRigidSingles() const            { return m_d_RigidSingles;      }
		void setDevRigidSingles(int* d_RigidSingles)   { m_d_RigidSingles = d_RigidSingles;}

		void writetodebug(std::string debugline) const;

	private:
		cost_function& m_costFunction;

		size_t m_maxIter;
		size_t m_maxEvals;

		double  m_gradientEps;
		double  m_maxStep;
		double m_maxFRise;
		double m_dGuess;
		double *m_d_SitesRigid;

		bool m_atomRigidT;

		int m_degFreedoms;
		int m_nRigidBody;
		int m_maxSite;
		int m_mUpdate;
		int *m_d_nRigidSites;
		int *m_d_RigidGroups;
		int *m_d_RigidSingles;

		mutable cublasHandle_t m_cublasHandle;

		// axpy  computes  dst = a * x + y
		// scale computes  dst = a * x
		// dot   computes  dst = x^T y
		// nrm2  computes  dst = sqrt(x^T y)
		//
		// x, y, dest (for axpy / scale) are n-vectors,
		// a,    dest (for dot)          are scalars.
		//
		// aDevicePointer / dstDevicePointer indicate whether
		// dst and a point to memory on the device or host.
		// All other pointers (marked with a d_) must point to device memory.

		void dispatch_axpy (const size_t n, double *d_dst, const double *d_y, const double *d_x, const double *a, bool aDevicePointer  = true) const;
		void dispatch_scale(const size_t n, double *d_dst, const double *d_x,                   const double *a, bool aDevicePointer   = true) const;
		void dispatch_dot  (const size_t n, double *dst,   const double *d_x, const double *d_y,                 bool dstDevicePointer = true) const;
		void dispatch_nrm2 (const size_t n, double *dst,   const double *d_x,                                    bool aDevicePointer   = true) const;

		bool gpu_linesearch(double *d_x, double *d_z, double *d_fk, double *d_gk, size_t &evals,
				lbfgs::status &stat, double *d_step, size_t maxEvals, 
				timer *timer_evals, timer *timer_linesearch, int *d_status, _Bool *debug, 
				_Bool *cudatimet, bool atomRigidT, int degFreedoms, int nRigidBody, 
				int *d_cmax, int *d_maxsite, double *d_xrigid, double *d_gkrigid, 
				_Bool *coldfusiont, double *coldfusionlim, size_t it);

		// Converts rigid body coordinates to atomic coordinates before energy calculation
		void transform_rigidtoc(int hostcmax, double *d_x, double *d_xrigid, int degFreedoms, 
				int hostmaxsite, int *d_cmax, int *d_maxsite);

		// Converts gradient to rigid body gradient
		void transform_grad(double *d_gk, double *d_xrigid, double *d_gkrigid, int hostcmax, 
				int *d_cmax, int hostmaxsite, int *d_maxsite, int degFreedoms);

};

#endif /* end of include guard: LBFGS_H */
