## generate some bivariate data
x1 <- seq(1,8,0.2)
mx2 = 1.6345 + #-0.6235*cos(x1*0.6067) + 
               -1.3501*sin(x1*0.6067) + 
               -1.1622*cos(2*x1*0.6067) +
               -0.9443*sin(2*x1*0.6067)
x2 <- mx2 + rnorm(length(x1),0,0.2)

## scale & center the data
x <- scale(cbind(x1,x2))

## compute ranges for plotting data
alim <- extendrange(x, f=0.1)
alim_ <- range(x)

## plot centered data
par(pty='s')
plot(x[,1], x[,2], bty='n',
     xlab=expression(x[1]),
     ylab=expression(x[2]),
     xlim=alim, ylim=alim)

## plot 'true' association between x1 and x2
lines(scale(x1), scale(mx2), lty=2)

## plot first principal component line
svdx <- svd(x)
clip(alim_[1],alim_[2],alim_[1],alim_[2])
with(svdx, abline(a=0, b=v[2,1]/v[1,1]))


## plot projections of each point onto line
z1 <- with(svdx, x%*%v[,1]%*%t(v[,1]))
segments(x0=x[,1],y0=x[,2],
         x1=z1[,1],y1=z1[,2])

step_a <- function(obj, df=10) {
  
  #### step (a) of iterative algorithm ####
  ## compute scatterplot smoother in either dimension
  obj$fit1 <- smooth.spline(x=obj$lam, y=obj$x[,1], df=df)
  obj$fit2 <- smooth.spline(x=obj$lam, y=obj$x[,2], df=df)
  
  return(obj)
  
}

## compute euclidean dist between data and principal curve 
euc_dist <- function(l, x, f1, f2)
  sum((c(predict(f1, l)$y, predict(f2, l)$y) - x)^2)

## grid search function
optimize_grid <- function(f, interval, n=100, ...) {
  grid <- seq(interval[1], interval[2], length.out=n)
  fval <- sapply(grid, f, ...)
  return(grid[which.min(fval)])
}

step_b <- function(obj) {

  #### step (b) of iterative algorithm ####

  ## recompute lambdas s.t. euclidean distance
  ## is smallest (i.e., orthogonal distance)
  obj$lam <- apply(obj$x,1, function(x0) optimize_grid(euc_dist,
    interval=extendrange(obj$lam, f=1.00),
    x=x0, f1=obj$fit1, f2=obj$fit2))
  
  ## calculate sum of euclidean distances
  obj$dist <- sum(sapply(1:nrow(x), function(i) 
    euc_dist(obj$lam[i], obj$x[i,], obj$fit1, obj$fit2)))
  
  return(obj)
  
}

plot_current <- function(obj) {
  
  ## plot data and, for a sequence of lambda,
  ## the principal curve
  plot(obj$x[,1], obj$x[,2], bty='n',
       xlab=expression(x[1]),
       ylab=expression(x[2]),
       xlim=alim, ylim=alim)
  lines(scale(x1), scale(mx2), lty=2)
  seq_lam <- seq(min(obj$lam),max(obj$lam),length.out=100)
  lines(predict(obj$fit1, seq_lam)$y, 
        predict(obj$fit2, seq_lam)$y)
  
  ## show points along curve corresponding
  ## to original lambdas
  z1 <- cbind(predict(obj$fit1, obj$lam)$y, 
              predict(obj$fit2, obj$lam)$y)
  segments(x0=obj$x[,1],y0=obj$x[,2],
           x1=z1[,1],y1=z1[,2])
  if(!is.null(obj$dist))
    legend('topleft', 
           legend=paste0('dist = ', round(obj$dist,3)),
           bty='n')
}


## compute initial lambda (initialize as first principal component)
obj <- list(x=x, lam=with(svdx, as.numeric(u[,1]*d[1])))

## iterate these steps
obj <- step_a(obj, df=4) ## compute principal curve
obj <- step_b(obj) ## update lambdas
plot_current(obj)


## plot centered data
par(pty='s')
plot(x[,1], x[,2], bty='n',
     xlab=expression(x[1]),
     ylab=expression(x[2]),
     xlim=alim, ylim=alim)

## plot 'true' association between x1 and x2
lines(scale(x1), scale(mx2), lty=2)

## use principal_curve function from princurve library
lines(principal_curve(x, df=3), col='blue')
lines(principal_curve(x, df=6), col='green')
legend('topleft', c('df=3','df=6'),
       lty=1, col=c('blue','green'), bty='n')

## plot dist as function of 'df'
df_seq <- seq(2, 15, 1)
dist_seq <- sapply(df_seq, function(df_)
  principal_curve(x, df=df_)$dist)
plot(df_seq, dist_seq, type='b',
     xlab='df', ylab='dist')