Expressions

Introduction
Expressions
Symbolic Computation
A more complex eg

Introduction

In R a call is an unevaluated expression which consists of the named function applied to the given arguments. Function call is used to create a call. It receives a function’s name, plus a series of arguments to be applied to the function. eval is used to evaluate the final result.

a <- 25
b <- call("sqrt", a)
b

## sqrt(25)

eval(b)

## [1] 5

a <- 16               # does not influence the previous environment
eval(b)

## [1] 5

is.call(b)

## [1] TRUE

is.call(call)         # functions are not calls

## [1] FALSE

c <- call("^", 2, 4)  # call can receive multiple arguments
eval(c)

## [1] 16

quote returns the argument as non evaluated:

a <- 25
b <- call("sqrt", quote(a))
eval(b)

## [1] 5

a <- 16               # now it influences, since R still not evaluated the parameter
eval(b)

## [1] 4

eval(quote(b), env=list(b=1)) # reads from an environment (can also be a list or a dataframe)

## [1] 1

do.call constructs and executes a function call from a name or a function and a list of arguments to be passed to it.

a <- 10
b <- 2
f <- function (a,b) a/b
do.call("f", args=list(a+1, b))

## [1] 5.5

do.call("f", args=list(b=a, a=b))

## [1] 0.2

# make an environment
env <- new.env()
assign("a", 2, envir = env) # same as env$a <- 2
assign("b", 8, envir = env)
assign("f", function(a,b) a+b, envir = env)
as.list(env)

## $a
## [1] 2
## 
## $b
## [1] 8
## 
## $f
## function (a, b) 
## a + b

do.call("f", args=list(quote(a), quote(b)), envir=env)

## [1] 10

env$f <- function(a,b) a^b
do.call("f", args=list(quote(a), quote(b)), envir=env)

## [1] 256

substitute return the unevaluated expression, replacing any variables bound in the environment. The environment is a given list of assignments, or if omitted is the current evaluation environment.

substitute(a+b, list(a=1))

## 1 + b

a <- substitute(a+b+c, list(a=1,c=5))
a

## 1 + b + 5

eval(a, list(b=10))

## [1] 16

We can use eval and substitute to implement subset, a function that return subsets of vectors, matrices or data frames which meet conditions:

df <- data.frame(x=11:18, y=18:11)
df

##    x  y
## 1 11 18
## 2 12 17
## 3 13 16
## 4 14 15
## 5 15 14
## 6 16 13
## 7 17 12
## 8 18 11

subset(df, x>y)

##    x  y
## 5 15 14
## 6 16 13
## 7 17 12
## 8 18 11

my.subset <- function(x, condition) {
  condition_call <- substitute(condition)
  rows <- eval(condition_call, env=x, enclos=parent.frame()) # parent.frame is the var scope the user needs
  x[rows, ]
}

my.subset(df, x>y)

##    x  y
## 5 15 14
## 6 16 13
## 7 17 12
## 8 18 11

a <- 15
my.subset(df, x>a)

##    x  y
## 6 16 13
## 7 17 12
## 8 18 11

Notice that these type of functions are no longer referentially transparent. A function is referentially transparent if you can replace its arguments with their values and its behaviour doesn’t change. For example, if a function, f(), is referentially transparent and both x and y are 10, then f(x), f(y), and f(10) will all return the same result. Check here for more info

Expressions

Expressions are calls in R. Function expression returns a vector of type “expression” containing its arguments (unevaluated).

expr <- expression(x^2 + b*x)
is.expression(expr)

## [1] TRUE

eval(expr, list(x=10, b=3))

## [1] 130

eval(expr, list(x=c(10,20,30), b=1:3))

## [1] 110 440 990

all.vars return a character vector containing all the names which occur in an expression or call.

expr <- expression(x^2 + b*x)
all.vars(expr)

## [1] "x" "b"

all.vars(quote(expr))

## [1] "expr"

Symbolic Computation

We can do some basic symbolic computation. D and deriv compute the derivate of an expression:

expr <- expression(x^2 + b*x)
de.dx <- D(expr, "x")   # for simple variable
de.dx

## 2 * x + b

de.db <- D(expr, "b")
de.db

## x

eval(de.dx, list(b=1, x=10))

## [1] 21

# computing n-th derivative
# pre: n>0
Dn <- function(expr, name, n=1) {
   if (n == 1)
     D(expr, name)
   else
     Dn(D(expr, name), name, n-1)
}
Dn(expression(sin(x^2)), "x", 3)

## -(sin(x^2) * (2 * x) * 2 + ((cos(x^2) * (2 * x) * (2 * x) + sin(x^2) * 
##     2) * (2 * x) + sin(x^2) * (2 * x) * 2))

expr <- expression(x^2 + b*x*y + y^3)
deriv(expr, namevec=c("x","y"))               # for multiple variables

## expression({
##     .expr2 <- b * x
##     .value <- x^2 + .expr2 * y + y^3
##     .grad <- array(0, c(length(.value), 2L), list(NULL, c("x", 
##         "y")))
##     .grad[, "x"] <- 2 * x + b * y
##     .grad[, "y"] <- .expr2 + 3 * y^2
##     attr(.value, "gradient") <- .grad
##     .value
## })

d <- deriv(expr, namevec=c("x","y"), hessian=TRUE) # includes the Hessian, ie, the matrix of second derivatives 
d

## expression({
##     .expr2 <- b * x
##     .value <- x^2 + .expr2 * y + y^3
##     .grad <- array(0, c(length(.value), 2L), list(NULL, c("x", 
##         "y")))
##     .hessian <- array(0, c(length(.value), 2L, 2L), list(NULL, 
##         c("x", "y"), c("x", "y")))
##     .grad[, "x"] <- 2 * x + b * y
##     .hessian[, "x", "x"] <- 2
##     .hessian[, "x", "y"] <- .hessian[, "y", "x"] <- b
##     .grad[, "y"] <- .expr2 + 3 * y^2
##     .hessian[, "y", "y"] <- 3 * (2 * y)
##     attr(.value, "gradient") <- .grad
##     attr(.value, "hessian") <- .hessian
##     .value
## })

eval(d, list(x=1,y=3))

## [1] 34
## attr(,"gradient")
##      x  y
## [1,] 8 29
## attr(,"hessian")
## , , x
## 
##      x y
## [1,] 2 2
## 
## , , y
## 
##      x  y
## [1,] 2 18

A more complex eg

Ref. We are trying to fit data \(x\) into the nonlinear model:

\[\frac{K y_0 e^{u(x-tl)}}{K + y_0(e^{u(x-tl)}-1)} + b_1 + (b_0-b_1)e^{-kx} + b_2x\]

# the model equation
expr <- expression( (K*y0*exp(u*(x-tl)))/(K + y0*(exp(u*(x-tl))-1)) + 
                    b1 + (b0 - b1)*exp(-k*x) + b2*x )

all.vars(expr)

## [1] "K"  "y0" "u"  "x"  "tl" "b1" "b0" "k"  "b2"

The next line produces a list of the partial derivatives of the above equation with respect to each parameter:

ds <- sapply(all.vars(expr), function(v) {D(expr, v)} )
ds

## $K
## y0 * exp(u * (x - tl))/(K + y0 * (exp(u * (x - tl)) - 1)) - (K * 
##     y0 * exp(u * (x - tl)))/(K + y0 * (exp(u * (x - tl)) - 1))^2
## 
## $y0
## K * exp(u * (x - tl))/(K + y0 * (exp(u * (x - tl)) - 1)) - (K * 
##     y0 * exp(u * (x - tl))) * (exp(u * (x - tl)) - 1)/(K + y0 * 
##     (exp(u * (x - tl)) - 1))^2
## 
## $u
## K * y0 * (exp(u * (x - tl)) * (x - tl))/(K + y0 * (exp(u * (x - 
##     tl)) - 1)) - (K * y0 * exp(u * (x - tl))) * (y0 * (exp(u * 
##     (x - tl)) * (x - tl)))/(K + y0 * (exp(u * (x - tl)) - 1))^2
## 
## $x
## K * y0 * (exp(u * (x - tl)) * u)/(K + y0 * (exp(u * (x - tl)) - 
##     1)) - (K * y0 * exp(u * (x - tl))) * (y0 * (exp(u * (x - 
##     tl)) * u))/(K + y0 * (exp(u * (x - tl)) - 1))^2 - (b0 - b1) * 
##     (exp(-k * x) * k) + b2
## 
## $tl
## -(K * y0 * (exp(u * (x - tl)) * u)/(K + y0 * (exp(u * (x - tl)) - 
##     1)) - (K * y0 * exp(u * (x - tl))) * (y0 * (exp(u * (x - 
##     tl)) * u))/(K + y0 * (exp(u * (x - tl)) - 1))^2)
## 
## $b1
## 1 - exp(-k * x)
## 
## $b0
## exp(-k * x)
## 
## $k
## -((b0 - b1) * (exp(-k * x) * x))
## 
## $b2
## x

class(ds)

## [1] "list"

class(ds[[1]])

## [1] "call"

Each element of this list is itself an expression.

Now, if we assign values to the parameters, we can compute the Jacobian matrix \(J\), necessary to compute the gradient:

jacob <- function(expr, env) {
   t( sapply(all.vars(expr), function(v) {eval(D(expr, v), env=env)} ) )
}

So, let’s give them some values:

# this will be the environment for the evaluation of J
ps <- c(y0=0.01, u=0.3, tl=5, K=2, b0=0.01, b1=1, b2=0.001, k=0.1)
x  <- seq(0,10)

J <- jacob(expr, env= c(as.list(ps), list(x=x)))
J <- J[names(ps),,drop=F]  # drop 'x' row which refers to the independent variable
J

##          [,1]       [,2]       [,3]       [,4]       [,5]      [,6]
## y0  2.249e-01  3.033e-01  4.090e-01  5.513e-01  7.427e-01  1.000000
## u  -1.119e-02 -1.207e-02 -1.221e-02 -1.097e-02 -7.390e-03  0.000000
## tl -6.712e-04 -9.054e-04 -1.221e-03 -1.646e-03 -2.217e-03 -0.002985
## K  -4.367e-06 -5.299e-06 -6.068e-06 -6.218e-06 -4.813e-06  0.000000
## b0  1.000e+00  9.048e-01  8.187e-01  7.408e-01  6.703e-01  0.606531
## b1  0.000e+00  9.516e-02  1.813e-01  2.592e-01  3.297e-01  0.393469
## b2  0.000e+00  1.000e+00  2.000e+00  3.000e+00  4.000e+00  5.000000
## k   0.000e+00  8.958e-01  1.621e+00  2.200e+00  2.654e+00  3.002327
##          [,7]       [,8]       [,9]      [,10]      [,11]
## y0  1.345e+00  1.807e+00  2.424e+00  3.2444063  4.3296326
## u   1.338e-02  3.596e-02  7.236e-02  0.1291274  0.2153992
## tl -4.015e-03 -5.395e-03 -7.236e-03 -0.0096846 -0.0129240
## K   1.177e-05  3.714e-05  8.846e-05  0.0001882  0.0003769
## b0  5.488e-01  4.966e-01  4.493e-01  0.4065697  0.3678794
## b1  4.512e-01  5.034e-01  5.507e-01  0.5934303  0.6321206
## b2  6.000e+00  7.000e+00  8.000e+00  9.0000000 10.0000000
## k   3.260e+00  3.441e+00  3.559e+00  3.6225357  3.6420065

The Hessian \(H\) is approximately \(H \approx J^TJ\)

H <- J %*% t(J)  # because linear algebra in R is a little strange, the transpose is applied to the 2nd Jacobian

The gradient is \(g = -J r\) where \(r\) are the residuals.

We can box all this into a class:

ModelObject = setRefClass('ModelObject', 
                          
  fields = list(
    name = 'character',
    expr = 'expression'
  ),
  
  methods = list(
    value = function(p, data){
      eval(.self$expr, c(as.list(p), as.list(data)))
    },
  
    jacobian = function(p, data){
      J = t(sapply(all.vars(.self$expr), function(v, p, data){
              eval(D(.self$expr, v), c(as.list(p), as.list(data)))
            }, p=p, data=data))

      return(J[names(p),,drop=F])
    },
    
    gradient = function(p, data){
        r = data$y - value(p, data)
        return(-jacobian(p, data) %*% r)
    },
    
    hessian = function(p, data){
      J = jacobian(p, data)
      return(J %*% t(J))
    }
  )
)

So let’s make some fake data and test the model:

# the model expression
expr <- expression( (K*y0*exp(u*(x-tl)))/(K + y0*(exp(u*(x-tl))-1)) + 
                    b1 + (b0 - b1)*exp(-k*x) + b2*x )

# make some data:
xs <- seq(0,48,by=0.25)
p0 <- c(y0=0.01, u=0.3, tl=5, K=2, b0=0.01, b1=1, b2=0.001, k=0.1)  # true values of the parameters
xy <- list(x=xs,
           y=eval(expr, envir=c(as.list(p0), list(x=xs)))
          )
  
plot(xy, main='Fit Results', type="l", col="blue", lty=2, lwd=2); # target function
xy$y <- xy$y+rnorm(length(xs),0,.15)  # add some noise
points(xy$x, xy$y, pch=19)                                        # observational data
                   
mo <- ModelObject(
        name = 'our eg',
        expr = expr
      )

# initial values for the parameters (we are assuming that we don't know the true values)
ps <- c(y0=0.05, u=1, tl=3, K=1, b0=0.1, b1=1, b2=0.01, k=0.5)

fit <- nlminb(start     = ps, 
              objective = function(p, data){
                            r = data$y - mo$value(p,data)
                            return(r %*% r)
                          }, 
              gradient  = mo$gradient, 
              hessian   = mo$hessian, 
              data      = xy)

lines(xy$x, mo$value(fit$par, xy), col="red", lwd=2)             # model estimate

plot of chunk unnamed-chunk-15

And another eg:

# make some data:
xy <- list(x=seq(0,10,by=0.25), y=dnorm(seq(0,10,by=0.25),10,2)) 
p0 <- c(y0=0.01, u=0.2, l=5, A=log(1.5/0.01))

mo <- ModelObject(
        name = 'gompertz',
        expr = expression( y0*exp(A*exp(-exp((u*exp(1)/A)*(l-x)+1))) )
      )

fit <- nlminb(start=p0, 
              objective= function(p, data){
                           r = data$y - mo$value(p,data)
                           return(r %*% r)
                         }, 
              gradient = mo$gradient, 
              hessian = mo$hessian, 
              data=xy)

plot(xy, main='Fit Results', pch=19);
lines(xy$x, mo$value(fit$par, xy), col="red", lwd=2)

plot of chunk unnamed-chunk-16

Expressions

João Neto

October 2014

Introduction

Expressions

Symbolic Computation

A more complex eg