### Non-linear least squares # define non-linear function f = function(x,theta) theta[1] + theta[2]*exp(-theta[3]*x) # generate data set.seed(42) theta0 = c(1,1,2) # true parameter values to be estimated n = 100 x = runif(n,0,5) y = f(x, theta0) + rnorm(n, sd = 0.1) ## plot function and generated data plot(x,y) curve(f(x, theta0), from=0, to=5, lwd=2, col="#332288", add=TRUE) legend("topright", legend="Truth", lwd=2, col='#332288', bty='n') # calculate (analytical) gradient as function of x and theta. # Note: in linear models, the gradient is only a function of x! dtheta1 = function(x,theta) 1 dtheta2 = function(x,theta) exp(-theta[3]*x) dtheta3 = function(x,theta) -theta[2]*x*exp(-theta[3]*x) dtheta = function(x,theta) cbind(dtheta1(x,theta),dtheta2(x,theta),dtheta3(x,theta)) # calculate numerical gradient ## compute finite-difference gradient (c.f., nlme::fdHess) fdGrad <- function (pars, fun, ..., .relStep = (.Machine$double.eps)^(1/2), minAbsPar = 0) { ##pars <- as.numeric(pars) npar <- length(pars) incr <- ifelse(abs(pars) <= minAbsPar, .relStep, (abs(pars)-minAbsPar) * .relStep) ival <- do.call(fun, list(pars, ...)) diff <- rep(0,npar) sapply(1:npar, function(i) { del <- rep(0,npar) del[i] <- incr[i] (do.call(fun, list(pars+del, ...))-ival)/incr[i] }) } ndtheta = function(x, theta) fdGrad(theta, function(pars) f(x, pars)) # function to solve for theta hat using relative change in sum of squares as convergence criteria solve_nlls = function(fun, x, y, theta, dtheta, tol = 1e-5){ ss.theta = sum((y - fun(x,theta))^2) ss.theta.hat = ss.theta *100 # initial value to ensure that following code is run while(abs((ss.theta - ss.theta.hat)/ss.theta) > tol){ ss.theta = sum((y - fun(x,theta))^2) J = dtheta(x, theta) theta.hat = theta + MASS::ginv(t(J)%*%J) %*% t(J) %*% (y - fun(x,theta)) ss.theta.hat = sum((y - fun(x,theta.hat))^2) theta = theta.hat } return(theta) } theta.hat = solve_nlls(fun = f,x,y,theta = c(1,1,3), dtheta) theta.hat2 = solve_nlls(fun = f,x,y,theta = c(1,1,3), ndtheta) # estimate for error term sigma.hat = sqrt(mean((y-f(x,theta.hat))^2)) # calculate the variance/covariance matrix of the theta estimates J = dtheta(x,theta.hat) Sigma.mat = solve(t(J)%*%J) * sigma.hat^2 # plot estimate and 95% CI plot(x,y) curve(f(x, theta0), from=0, to=5, lwd=2, col="#332288", add=TRUE) x.seq = seq(min(x),max(x),length.out=100) lines(x.seq, f(x.seq, theta.hat), lwd = 2, col='#882255') legend("topright", legend=c("Truth","Estimate"), lwd=2, col=c('#332288','#882255'), bty='n') # use the delta method to find an approximate 95% CI on the mean grd = dtheta(x.seq, theta.hat) est.se = sqrt(diag(grd %*% Sigma.mat %*% t(grd))) lines(x.seq, f(x.seq, theta.hat) + 1.96*est.se, col = '#882255') lines(x.seq, f(x.seq, theta.hat) - 1.96*est.se, col = '#882255') # plot gradient (sensitivity matrix) d.seq <- dtheta(x.seq, theta.hat) plot(range(x.seq), range(d.seq), xlab="x", ylab="dtheta", type="n") for(i in 1:ncol(d.seq)) lines(x.seq, d.seq[,i], col=i) lines(x.seq, x.seq*0, lty=3) legend("topright", c("theta1","theta2","theta3"), lty=1, col=1:3, bty='n')