Warning: not so basic stuff for non functional language fans 8-)

Check: + https://github.com/hadley/devtools/wiki/Functional-programming + http://adv-r.had.co.nz/Data-structures.html

Standard functionals

func <- function(x) x%%2==0  # lambda expressions
func(4)
## [1] TRUE
(function(x)x%%2==0)(4)  
## [1] TRUE
# Filter
Filter((function(x)x%%2==0),1:20)
##  [1]  2  4  6  8 10 12 14 16 18 20
# Map
mapply((function(x)x*2),1:20)
##  [1]  2  4  6  8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40
# Fold (foldl by default, use right=T for foldr)
# use accumulate=T for scan
Reduce((function(x,acc)x+acc),1:10,0)  # eg, vector sum
## [1] 55
Reduce((function(x,acc)x*acc),1:10,1)  # eg, vector product
## [1] 3628800
Reduce((function(x,acc)x*acc),1:10,1,accumulate=T)
##  [1]       1       1       2       6      24     120     720    5040
##  [9]   40320  362880 3628800
# returns the 1st element that satisfies the predicate
Find((function(x)x%%2==0),20:1)  
## [1] 20
# returns the index of the 1st element that satisfies the predicate
Position((function(x)x%%2==0),20:1)
## [1] 1
# these high-order functions work for every R function
formals(function(x=4)x+5)
## $x
## [1] 4
body(function(x=4)x+5)
## x + 5
environment(function(x=4)x+5)
## <environment: R_GlobalEnv>
# eg, apply sd to all columns of mtcars data frame, and then
# turn the resulting list into a vector
unlist(lapply(mtcars,sd))
##         mpg         cyl        disp          hp        drat          wt 
##   6.0269481   1.7859216 123.9386938  68.5628685   0.5346787   0.9784574 
##        qsec          vs          am        gear        carb 
##   1.7869432   0.5040161   0.4989909   0.7378041   1.6152000

Applying functions over lists/vectors

ref: http://stackoverflow.com/questions/3505701/r-grouping-functions-sapply-vs-lapply-vs-apply-vs-tapply-vs-by-vs-aggrega

# Two dimensional matrix
M <- matrix(seq(1,16), 4, 4)
M
##      [,1] [,2] [,3] [,4]
## [1,]    1    5    9   13
## [2,]    2    6   10   14
## [3,]    3    7   11   15
## [4,]    4    8   12   16
# apply min to rows
apply(M, 1, min)
## [1] 1 2 3 4
# apply max to columns
apply(M, 2, max)
## [1]  4  8 12 16
# apply double for each cell
apply(M, c(1,2), function(x) 2*x)
##      [,1] [,2] [,3] [,4]
## [1,]    2   10   18   26
## [2,]    4   12   20   28
## [3,]    6   14   22   30
## [4,]    8   16   24   32
# 3 dimensional array
M <- array( seq(32), dim = c(4,4,2))
M
## , , 1
## 
##      [,1] [,2] [,3] [,4]
## [1,]    1    5    9   13
## [2,]    2    6   10   14
## [3,]    3    7   11   15
## [4,]    4    8   12   16
## 
## , , 2
## 
##      [,1] [,2] [,3] [,4]
## [1,]   17   21   25   29
## [2,]   18   22   26   30
## [3,]   19   23   27   31
## [4,]   20   24   28   32
# Apply f across each M[*, , ] - i.e across 2nd and 3rd dimension
apply(M, 1, max)
## [1] 29 30 31 32
apply(M, 1, sum) # Result is one-dimensional
## [1] 120 128 136 144
# Apply sum across each M[*, *, ] - i.e Sum across 3rd dimension
apply(M, c(1,2), sum)
##      [,1] [,2] [,3] [,4]
## [1,]   18   26   34   42
## [2,]   20   28   36   44
## [3,]   22   30   38   46
## [4,]   24   32   40   48
# Result is two-dimensional
f <- function(x) x^2
as.vector(lapply(1:6,f), mode="integer") # change list to vector just for tidy output
## [1]  1  4  9 16 25 36
sapply(1:10,f)  # does the same thing
##  [1]   1   4   9  16  25  36  49  64  81 100
matrix(sapply(1:25,f),5,5)
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1   36  121  256  441
## [2,]    4   49  144  289  484
## [3,]    9   64  169  324  529
## [4,]   16   81  196  361  576
## [5,]   25  100  225  400  625
add <- function(x, y) x + y
sapply(1:10, add, 3)  # the 3 is passed to add() as its 2nd argument
##  [1]  4  5  6  7  8  9 10 11 12 13
sapply(1:10, `+`, 3)  # search object *called* as '+'
##  [1]  4  5  6  7  8  9 10 11 12 13
sapply(1:10, "+", 3)  # search object *named* as '+'
##  [1]  4  5  6  7  8  9 10 11 12 13
x <- list(1:3, 4:9, 10:12)
sapply(x, "[", 2)     # equivalent to sapply(x, function(x) x[2])
## [1]  2  5 11

An eg with list of functions:

summary <- function(x) {
  funs <- c(mean, median, sd, mad, IQR)
  lapply(funs, function(f) f(x, na.rm = TRUE))
}
summary(rnorm(100))
## [[1]]
## [1] 0.08504035
## 
## [[2]]
## [1] -0.09771956
## 
## [[3]]
## [1] 1.035898
## 
## [[4]]
## [1] 1.042071
## 
## [[5]]
## [1] 1.41872
f(runif(10))
##  [1] 0.402364169 0.425024010 0.135703288 0.361465132 0.395346852
##  [6] 0.001033643 0.485006278 0.286659677 0.097783390 0.036028147
replicate(3,f(runif(10)))  # replicates 3 times the previous instruction
##             [,1]         [,2]       [,3]
##  [1,] 0.36552085 0.0311398468 0.06603426
##  [2,] 0.07281059 0.3269985617 0.53624294
##  [3,] 0.01150483 0.4394893929 0.04703448
##  [4,] 0.32840718 0.0191090669 0.42680599
##  [5,] 0.41376382 0.0009683344 0.01910608
##  [6,] 0.37684437 0.3345655828 0.00405777
##  [7,] 0.05967578 0.6927456121 0.17158436
##  [8,] 0.64129500 0.0263523135 0.03000256
##  [9,] 0.12957366 0.3716564637 0.92696523
## [10,] 0.88994004 0.1201024499 0.01843136
outer(1:5,1:3,"*")
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    2    4    6
## [3,]    3    6    9
## [4,]    4    8   12
## [5,]    5   10   15
1:5 %o% 1:3  # same thing
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    2    4    6
## [3,]    3    6    9
## [4,]    4    8   12
## [5,]    5   10   15
mapply(rep, 9:6, 1:4)
## [[1]]
## [1] 9
## 
## [[2]]
## [1] 8 8
## 
## [[3]]
## [1] 7 7 7
## 
## [[4]]
## [1] 6 6 6 6
f1 <- function(x,y,z) 100*x+10*y+z
mapply(f1,1:3,4:6,7:9)
## [1] 147 258 369

Another important function is fold which is vector reduction to a value by applying some associate function on a list (given an identity value for empty lists) :

# SOurce: Brian Rowe's "Modeling Data with Functional Programming in R"
fold <- function(xs, fn, acc, ...) {
  sapply(xs, function(x) acc <<- fn(x, acc), ...)
  acc
}

There are a lot of functions that can be defined this way:

my_sum <- function (xs) fold(xs, `+`, 0)
my_sum(1:4)
## [1] 10
my_sum(c())
## [1] 0
my_len <- function(xs) fold(xs, function(x,acc) 1+acc, 0)
my_len(c(0,1,2,3))
## [1] 4

Other function stuff

When calling a function you can specify arguments by position, by complete name, or by partial name. Arguments are matched first by exact name (perfect matching), then by prefix matching and finally by position.

f <- function(abcdef, bcde1, bcde2) {
  list(a = abcdef, b1 = bcde1, b2 = bcde2)
}

str(f(1, 2, 3))
## List of 3
##  $ a : num 1
##  $ b1: num 2
##  $ b2: num 3
str(f(2, 3, abcdef = 1))
## List of 3
##  $ a : num 1
##  $ b1: num 2
##  $ b2: num 3
# Can abbreviate long argument names:
str(f(2, 3, a = 1))
## List of 3
##  $ a : num 1
##  $ b1: num 2
##  $ b2: num 3
# But this doesn't work because abbreviation is ambiguous
str(f(1, 3, b = 1))
## Error in f(1, 3, b = 1): argument 3 matches multiple formal arguments

Calling a function given a list of arguments

args <- list(1:10, na.rm = TRUE)
do.call(mean, args)  # same as mean(1:10, na.rm = TRUE)
## [1] 5.5

R can check if an argument is missing:

f <- function(x,y) {
  c(missing(x),missing(y))  
}

f(x=1)
## [1] FALSE  TRUE
f(y=2)
## [1]  TRUE FALSE
f(,3)
## [1]  TRUE FALSE
f(4,)
## [1] FALSE  TRUE

Lazy Eval: R uses lazy evaluation when dealing with function arguments, it olny computes them if necessary

f <- function(x,y) {
  x*2
}

f(4,stop("error!"))
## [1] 8

This might bring some subtle problems:

add <- function(x) {
  function(y) x + y
}
adders <- lapply(1:10, add) # a list of functions
adders[[1]](5)  # hmmm... (the last value of x in the vector cycle above is 10)
## [1] 6
adders[[10]](5) # ok
## [1] 15
# this is solved by forcing the evaluation of 'x' in each element of the vector cycle
add <- function(x) {
  force(x)
  function(y) x + y
}

adders2 <- lapply(1:10, add)
adders2[[1]](5)
## [1] 6
adders2[[10]](5)
## [1] 15

Default arguments are evaluated inside the function. This means that if the expression depends on the current environment the results will differ depending on whether you use the default value or explicitly provide one

f <- function(x = ls()) {
  a <- 1
  x
}

# ls() evaluated inside f:
f()
## [1] "a" "x"
# ls() evaluated in global environment:
f(ls())
##  [1] "add"     "adders"  "adders2" "args"    "f"       "f1"      "fold"   
##  [8] "func"    "M"       "my_len"  "my_sum"  "summary" "x"

More technically, an unevaluated argument is called a promise, or (less commonly) a thunk. A promise is made up of two parts:

  1. the expression which gives rise to the delayed computation. It can be accessed with substitute()
  1. the environment where the expression was created and where it should be evaluated

The first time a promise is accessed the expression is evaluated in the environment where it was created. This value is cached, so that subsequent access to the evaluated promise does not recompute the value (but the original expression is still associated with the value, so substitute can continue to access it). ref

substitute(expression(a + b), list(a = 1))
## expression(1 + b)

The special argument ... passes all non-matched args to the inner functions

f <- function(...) {
  names(list(...))
}
f(a = 1, b = 2)
## [1] "a" "b"
f <- function(x, y, ...) {
  g <- function(z, w=1) {
    x*1000+y*100+z*10+w
  }
}

f1 <- f(1,2,z=3)
f1(4)
## [1] 1241
f1(w=4,z=5)
## [1] 1254

Infix Functions: use %name% to enclose the function name

"%+%" <- function(a, b) paste(a, b, sep = "")
"new" %+% " string"
## [1] "new string"
`%+%`("new", " string") # alternative call
## [1] "new string"
`+`(1, 5)
## [1] 6
# use \ for special chars
"%/\\%" <- function(a, b) paste(a, b)
"a" %/\% "b"
## [1] "a b"

An eg that creates a Matlab-like DSL for matrix descriptions:

qm<-function(...)
{
  # turn ... into string
  args<-deparse(substitute(rbind(cbind(...))))
  
  # create "rbind(cbind(.),cbind(.),.)" construct
  args<-gsub("\\|","),cbind(",args)
  
  # eval
  eval(parse(text=args))
}

  
M<-N<-diag(2)

qm(M,c(4,5) | c(1,2),N | t(1:3))
##      [,1] [,2] [,3]
## [1,]    1    0    4
## [2,]    0    1    5
## [3,]    1    1    0
## [4,]    2    0    1
## [5,]    1    2    3

Closures

An object is data with functions. A closure is a function with data. – John D Cook

# returns a new function which as access to the environment
# variable 'exponent'
power <- function(exponent) {
  function(x) x ^ exponent  
}

square <- power(2)
square(2)
## [1] 4
square(4)
## [1] 16
cube <- power(3)
cube(2)
## [1] 8
cube(4)
## [1] 64
as.list(environment(square)) # shows the closure's environment 
## $exponent
## [1] 2
# Closures are useful for making function factories, 

missing_remover <- function(na) {
  function(x) {
    x[x == na] <- NA
    x
  }
}

remove_99 <- missing_remover(99)
remove_99(c(99,100,101,99,98))
## [1]  NA 100 101  NA  98
remove_dot <- missing_remover(".")
remove_dot(c(".","a",".","b"))
## [1] NA  "a" NA  "b"
# And are one way to manage mutable state in R. 
new_counter <- function() {
  i <- 0
  function() {
    i <<- i + 1  # operator '<<-' searches for 'i' in the parent environment
    i
  }
}

counter_one <- new_counter()
counter_two <- new_counter()
counter_one() 
## [1] 1
counter_one() 
## [1] 2
counter_two() 
## [1] 1
as.list(environment(counter_one)) # check its mutable state
## $i
## [1] 2
as.list(environment(counter_two))
## $i
## [1] 1

Currying

currying is the technique of transforming a function that takes multiple arguments (or a tuple of arguments) in such a way that it can be called as a chain of functions, each with a single argument (partial application) – Wikipedia

# list of functions and currying

#eg, mean functions

compute_mean <- list(
  base = function(x) mean(x),
  sum = function(x) sum(x) / length(x),
  manual = function(x) {
    total <- 0
    n <- length(x)
    for (i in seq_along(x)) {
      total <- total + x[i] / n
    }
    total
  }
)

xs <- runif(1e5)
system.time(compute_mean$base(xs))
##    user  system elapsed 
##       0       0       0
system.time(compute_mean$sum(xs))
##    user  system elapsed 
##       0       0       0
system.time(compute_mean$manual(xs))
##    user  system elapsed 
##    0.07    0.00    0.08
# or test all in one line
lapply(compute_mean, function(f) system.time(f(xs)))
## $base
##    user  system elapsed 
##       0       0       0 
## 
## $sum
##    user  system elapsed 
##       0       0       0 
## 
## $manual
##    user  system elapsed 
##    0.08    0.00    0.07
Map(function(f) system.time(f(xs)), compute_mean)
## $base
##    user  system elapsed 
##       0       0       0 
## 
## $sum
##    user  system elapsed 
##       0       0       0 
## 
## $manual
##    user  system elapsed 
##    0.08    0.00    0.08
# another way
call_fun <- function(f, ...) f(...)

timer <- function(f) {
  force(f)  # force the evaluation of expression
  function(...) system.time(f(...))
}

timers <- lapply(compute_mean, timer) # return a list of functions
lapply(timers, call_fun, xs)
## $base
##    user  system elapsed 
##       0       0       0 
## 
## $sum
##    user  system elapsed 
##       0       0       0 
## 
## $manual
##    user  system elapsed 
##    0.08    0.00    0.08
# implementation of currying:

Curry <- function(FUN,...) { 
  .orig <- list(...)
  function(...) {
    do.call(FUN, c(.orig, list(...)))
  }
}

add <- function(x, y) x + y
addOne <- Curry(add, y = 1)
addOne(4) # 5
## [1] 5
# using curry in interesting ways:
funs <- list(
  sum = sum,
  mean = mean,
  median = median
)
# now turn that list elements, into functions that remove NAs
funs2 <- lapply(funs, Curry, na.rm = TRUE)

Package pryr implements currying with partial()

# library(devtools)
# install_github("pryr")
library(pryr)
## 
## Attaching package: 'pryr'
## 
## The following object is masked _by_ '.GlobalEnv':
## 
##     f
f <- function(x,y) 10*x+y
f(5,6)
## [1] 56
f1 <- partial(f, x=5)
f1(6)
## [1] 56
f2 <- partial(f, y=6)
f2(5)
## [1] 56

Function Operators

Function operators (FO) are functions that take one (or more) functions as input and return a function as output.

we’ll explore four types of function operators (FOs):

Behavioural FOs leave the inputs and outputs of a function unchanged, but adds some extra behaviour.

# add a delay to a function call:
delay_by <- function(delay, f) {
  function(...) {
    Sys.sleep(delay)
    f(...)
  }
}

system.time(runif(100))
##    user  system elapsed 
##       0       0       0
system.time(delay_by(1, runif)(100))
##    user  system elapsed 
##    0.00    0.00    1.01
# add a dot every 10 processing units
dot_every <- function(n, f) {
  i <- 1
  function(...) {
    if (i %% n == 0) cat(".")
    i <<- i + 1
    f(...)
  }
}

x <- lapply(1:100, runif)
x <- lapply(1:100, dot_every(10, runif))
## ..........

Memoisation

fib <- function(n) {
  if (n < 2) 
    return(1)
  fib(n - 2) + fib(n - 1)
}
system.time(fib(28))
##    user  system elapsed 
##    1.63    0.00    1.64
###### MEMOISE IT!
library(memoise)

fib2 <- memoise(
  function(n) {
    if (n < 2) 
      return(1)
    fib2(n - 2) + fib2(n - 1)
  }
)

system.time(fib2(28))
##    user  system elapsed 
##    0.02    0.00    0.05

Capturing function invocations

One challenge with functionals is that it can be hard to see what’s going on inside. It’s not easy to pry open their internals like it is with a for loop. However, we can use FOs to help us. The tee function, defined below, has three arguments, all functions: f, the original function; on_input, a function that’s called with the inputs to f, and on_output a function that’s called with the output from f.

ignore <- function(...) NULL
tee <- function(f, on_input = ignore, on_output = ignore) {
  function(...) {
    input <- if (nargs() == 1) c(...) else list(...)
    on_input(input)
    output <- f(...)
    on_output(output)
    output
  }
}

g <- function(x) cos(x) - x
uniroot(g, c(-5, 5))
## $root
## [1] 0.7390853
## 
## $f.root
## [1] -2.603993e-07
## 
## $iter
## [1] 6
## 
## $init.it
## [1] NA
## 
## $estim.prec
## [1] 6.103516e-05
uniroot(tee(g, on_input = print),  c(-5, 5))
## [1] -5
## [1] 5
## [1] 0.2836622
## [1] 0.8752034
## [1] 0.7229804
## [1] 0.7386309
## [1] 0.7390853
## [1] 0.7390243
## [1] 0.7390853
## $root
## [1] 0.7390853
## 
## $f.root
## [1] -2.603993e-07
## 
## $iter
## [1] 6
## 
## $init.it
## [1] NA
## 
## $estim.prec
## [1] 6.103516e-05
uniroot(tee(g, on_output = print), c(-5, 5))
## [1] 5.283662
## [1] -4.716338
## [1] 0.6763747
## [1] -0.2343627
## [1] 0.02685676
## [1] 0.0007601196
## [1] -2.603993e-07
## [1] 0.0001018874
## [1] -2.603993e-07
## $root
## [1] 0.7390853
## 
## $f.root
## [1] -2.603993e-07
## 
## $iter
## [1] 6
## 
## $init.it
## [1] NA
## 
## $estim.prec
## [1] 6.103516e-05

Output FOs

How to modify the output of a function.

Negate <- function(f) { # Negates the function output
  function(...) !f(...)
}

(Negate(is.null))(NULL)
## [1] FALSE
# removes all null elements from a list
compact <- function(x) Filter(Negate(is.null), x) 

compact(c(NULL,3,3))
## [1] 3 3
# failwith() turns a function that throws an error into a function that returns a default value when there's an error

failwith <- function(default = NULL, f, quiet = TRUE) {
  function(...) {
    out <- default
    try(out <- f(...), silent = quiet) # silent a True does not show error msg
    out
  }
}
log("a")
## Error in log("a"): non-numeric argument to mathematical function
failwith(NA, log)("a")
## [1] NA

Function composition

An important way of combining functions is through composition: f(g(x)).

compose <- function(f, g) {
  function(...) f(g(...))
}

"%.%" <- compose

sqrt(3*4)
## [1] 3.464102
(sqrt %.% `*`)(3,4)
## [1] 3.464102
#  function operators that combine logical predicates:

and <- function(f1, f2) {
  function(...) {
    f1(...) && f2(...)
  }
}
or <- function(f1, f2) {
  function(...) {
    f1(...) || f2(...)
  }
}
not <- function(f1) {
  function(...) {
    !f1(...)
  }
}

# So something like:
data <- Filter(function(x) is.character(x) || is.factor(x), iris)
# becomes
data <- Filter(or(is.character, is.factor), iris)

A note about lazy eval:

wrap <- function(f) {
  function(...) f(...)
}

fs <- list(sum = sum, mean = mean, min = min)
gs <- lapply(fs, wrap)
gs$sum(1:10)  # bug, it's returning the minimum
## [1] 55
environment(gs$sum)$f
## function (..., na.rm = FALSE)  .Primitive("sum")

It doesn’t work well with lapply() because f is lazily evaluated. This means that if you give lapply() a list of functions and a FO to apply those functions, it will look like it repeatedly applied the last function.

Another problem is that as designed, we have to pass a function object, rather than the name of a function, which is often more convenient. We can solve both problems by using match.fun(): it forces evaluation of f, and will find the function object if given its name:

wrap2 <- function(f) {
  f <- match.fun(f)
  function(...) f(...)
}

fs <- c(sum = "sum", mean = "mean", min = "min")
hs <- lapply(fs, wrap2)
hs$sum(1:10)
## [1] 55
environment(hs$sum)$f
## function (..., na.rm = FALSE)  .Primitive("sum")