#Gradient Descent

rm(list=ls())
#Create a sequence of elements in a Vector
xs <- seq(0,4,len=20)
xs

#Define the function to optimize
f <-  function(x) {2.4 * (x-2)^2 + 3}

#Plot the function 
plot(xs,f(xs),type="l",xlab="x",ylab=expression(2.4(x-2)^2 + 3)) 

#Calculate the gradient (df/dx)
grad <- function(x){
  2.4*2*(x-2)
}

#Gradient Descent Implementation
x <- 0.1  #initialize the first guess for x-value
xtrace <- x  #store x values for graphing purposes
ftrace <- f(x)  #store y values (function evaluated at x) for graphing purposes
stepFactor <- 0.01  #learning rate 'alpha'
for (step in 1:5000) {
  x <- x - stepFactor*grad(x)  #gradient descent update
  xtrace <- c(xtrace,x)  #update for graph
  ftrace <- c(ftrace,f(x))  #update for graph
}

lines(xtrace,ftrace,type="b",col="blue")
text(0.5,6,"Gradient Descent",col="red",pos=4)

#Print final value of x
print(x)  #x converges to 2.0
text(2,4,"x=2",col="red",pos=1)
text(2,4,"(Global minimum)",col="red",pos=3)