Warning: not so basic stuff for non functional language fans 8-)

## Standard functionals

func <- function(x) x%%2==0  # lambda expressions
func(4)
## [1] TRUE
(function(x)x%%2==0)(4)
## [1] TRUE
# Filter
Filter((function(x)x%%2==0),1:20)
##  [1]  2  4  6  8 10 12 14 16 18 20
# Map
mapply((function(x)x*2),1:20)
##  [1]  2  4  6  8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40
# Fold (foldl by default, use right=T for foldr)
# use accumulate=T for scan
Reduce((function(x,acc)x+acc),1:10,0)  # eg, vector sum
## [1] 55
Reduce((function(x,acc)x*acc),1:10,1)  # eg, vector product
## [1] 3628800
Reduce((function(x,acc)x*acc),1:10,1,accumulate=T)
##  [1]       1       1       2       6      24     120     720    5040
##  [9]   40320  362880 3628800
# returns the 1st element that satisfies the predicate
Find((function(x)x%%2==0),20:1)
## [1] 20
# returns the index of the 1st element that satisfies the predicate
Position((function(x)x%%2==0),20:1)
## [1] 1
# these high-order functions work for every R function
formals(function(x=4)x+5)
## $x ## [1] 4 body(function(x=4)x+5) ## x + 5 environment(function(x=4)x+5) ## <environment: R_GlobalEnv> # eg, apply sd to all columns of mtcars data frame, and then # turn the resulting list into a vector unlist(lapply(mtcars,sd)) ## mpg cyl disp hp drat wt ## 6.0269481 1.7859216 123.9386938 68.5628685 0.5346787 0.9784574 ## qsec vs am gear carb ## 1.7869432 0.5040161 0.4989909 0.7378041 1.6152000 ## Applying functions over lists/vectors • apply: apply a function to the rows or columns of a matrix # Two dimensional matrix M <- matrix(seq(1,16), 4, 4) M ## [,1] [,2] [,3] [,4] ## [1,] 1 5 9 13 ## [2,] 2 6 10 14 ## [3,] 3 7 11 15 ## [4,] 4 8 12 16 # apply min to rows apply(M, 1, min) ## [1] 1 2 3 4 # apply max to columns apply(M, 2, max) ## [1] 4 8 12 16 # apply double for each cell apply(M, c(1,2), function(x) 2*x) ## [,1] [,2] [,3] [,4] ## [1,] 2 10 18 26 ## [2,] 4 12 20 28 ## [3,] 6 14 22 30 ## [4,] 8 16 24 32 # 3 dimensional array M <- array( seq(32), dim = c(4,4,2)) M ## , , 1 ## ## [,1] [,2] [,3] [,4] ## [1,] 1 5 9 13 ## [2,] 2 6 10 14 ## [3,] 3 7 11 15 ## [4,] 4 8 12 16 ## ## , , 2 ## ## [,1] [,2] [,3] [,4] ## [1,] 17 21 25 29 ## [2,] 18 22 26 30 ## [3,] 19 23 27 31 ## [4,] 20 24 28 32 # Apply f across each M[*, , ] - i.e across 2nd and 3rd dimension apply(M, 1, max) ## [1] 29 30 31 32 apply(M, 1, sum) # Result is one-dimensional ## [1] 120 128 136 144 # Apply sum across each M[*, *, ] - i.e Sum across 3rd dimension apply(M, c(1,2), sum) ## [,1] [,2] [,3] [,4] ## [1,] 18 26 34 42 ## [2,] 20 28 36 44 ## [3,] 22 30 38 46 ## [4,] 24 32 40 48 # Result is two-dimensional • lapply(x,f): returns a list of the same length as x, each element of which is the result of applying f to the corresponding element of x • sapply(x,f): same but returns a vector f <- function(x) x^2 as.vector(lapply(1:6,f), mode="integer") # change list to vector just for tidy output ## [1] 1 4 9 16 25 36 sapply(1:10,f) # does the same thing ## [1] 1 4 9 16 25 36 49 64 81 100 matrix(sapply(1:25,f),5,5) ## [,1] [,2] [,3] [,4] [,5] ## [1,] 1 36 121 256 441 ## [2,] 4 49 144 289 484 ## [3,] 9 64 169 324 529 ## [4,] 16 81 196 361 576 ## [5,] 25 100 225 400 625 add <- function(x, y) x + y sapply(1:10, add, 3) # the 3 is passed to add() as its 2nd argument ## [1] 4 5 6 7 8 9 10 11 12 13 sapply(1:10, +, 3) # search object *called* as '+' ## [1] 4 5 6 7 8 9 10 11 12 13 sapply(1:10, "+", 3) # search object *named* as '+' ## [1] 4 5 6 7 8 9 10 11 12 13 x <- list(1:3, 4:9, 10:12) sapply(x, "[", 2) # equivalent to sapply(x, function(x) x[2]) ## [1] 2 5 11 An eg with list of functions: summary <- function(x) { funs <- c(mean, median, sd, mad, IQR) lapply(funs, function(f) f(x, na.rm = TRUE)) } summary(rnorm(100)) ## [[1]] ## [1] 0.08504035 ## ## [[2]] ## [1] -0.09771956 ## ## [[3]] ## [1] 1.035898 ## ## [[4]] ## [1] 1.042071 ## ## [[5]] ## [1] 1.41872 • replicates(n,expression): replicates expression n times • outer(xs,ys,f): returns a matrix with all f(x,y) • mapply(f,xs,ys,…): applies f to (xs,ys,…), each ith element from all the vectors for each iteration f(runif(10)) ## [1] 0.402364169 0.425024010 0.135703288 0.361465132 0.395346852 ## [6] 0.001033643 0.485006278 0.286659677 0.097783390 0.036028147 replicate(3,f(runif(10))) # replicates 3 times the previous instruction ## [,1] [,2] [,3] ## [1,] 0.36552085 0.0311398468 0.06603426 ## [2,] 0.07281059 0.3269985617 0.53624294 ## [3,] 0.01150483 0.4394893929 0.04703448 ## [4,] 0.32840718 0.0191090669 0.42680599 ## [5,] 0.41376382 0.0009683344 0.01910608 ## [6,] 0.37684437 0.3345655828 0.00405777 ## [7,] 0.05967578 0.6927456121 0.17158436 ## [8,] 0.64129500 0.0263523135 0.03000256 ## [9,] 0.12957366 0.3716564637 0.92696523 ## [10,] 0.88994004 0.1201024499 0.01843136 outer(1:5,1:3,"*") ## [,1] [,2] [,3] ## [1,] 1 2 3 ## [2,] 2 4 6 ## [3,] 3 6 9 ## [4,] 4 8 12 ## [5,] 5 10 15 1:5 %o% 1:3 # same thing ## [,1] [,2] [,3] ## [1,] 1 2 3 ## [2,] 2 4 6 ## [3,] 3 6 9 ## [4,] 4 8 12 ## [5,] 5 10 15 mapply(rep, 9:6, 1:4) ## [[1]] ## [1] 9 ## ## [[2]] ## [1] 8 8 ## ## [[3]] ## [1] 7 7 7 ## ## [[4]] ## [1] 6 6 6 6 f1 <- function(x,y,z) 100*x+10*y+z mapply(f1,1:3,4:6,7:9) ## [1] 147 258 369 Another important function is fold which is vector reduction to a value by applying some associate function on a list (given an identity value for empty lists) : # SOurce: Brian Rowe's "Modeling Data with Functional Programming in R" fold <- function(xs, fn, acc, ...) { sapply(xs, function(x) acc <<- fn(x, acc), ...) acc } There are a lot of functions that can be defined this way: my_sum <- function (xs) fold(xs, +, 0) my_sum(1:4) ## [1] 10 my_sum(c()) ## [1] 0 my_len <- function(xs) fold(xs, function(x,acc) 1+acc, 0) my_len(c(0,1,2,3)) ## [1] 4 ## Other function stuff When calling a function you can specify arguments by position, by complete name, or by partial name. Arguments are matched first by exact name (perfect matching), then by prefix matching and finally by position. f <- function(abcdef, bcde1, bcde2) { list(a = abcdef, b1 = bcde1, b2 = bcde2) } str(f(1, 2, 3)) ## List of 3 ##$ a : num 1
##  $b1: num 2 ##$ b2: num 3
str(f(2, 3, abcdef = 1))
## List of 3
##  $a : num 1 ##$ b1: num 2
##  $b2: num 3 # Can abbreviate long argument names: str(f(2, 3, a = 1)) ## List of 3 ##$ a : num 1
##  $b1: num 2 ##$ b2: num 3
# But this doesn't work because abbreviation is ambiguous
str(f(1, 3, b = 1))
## Error in f(1, 3, b = 1): argument 3 matches multiple formal arguments

Calling a function given a list of arguments

args <- list(1:10, na.rm = TRUE)
do.call(mean, args)  # same as mean(1:10, na.rm = TRUE)
## [1] 5.5

R can check if an argument is missing:

f <- function(x,y) {
c(missing(x),missing(y))
}

f(x=1)
## [1] FALSE  TRUE
f(y=2)
## [1]  TRUE FALSE
f(,3)
## [1]  TRUE FALSE
f(4,)
## [1] FALSE  TRUE

Lazy Eval: R uses lazy evaluation when dealing with function arguments, it olny computes them if necessary

f <- function(x,y) {
x*2
}

f(4,stop("error!"))
## [1] 8

This might bring some subtle problems:

function(y) x + y
}
adders[[1]](5)  # hmmm... (the last value of x in the vector cycle above is 10)
## [1] 6
## [1] 15
# this is solved by forcing the evaluation of 'x' in each element of the vector cycle
force(x)
function(y) x + y
}

## [1] 6
## [1] 15

Default arguments are evaluated inside the function. This means that if the expression depends on the current environment the results will differ depending on whether you use the default value or explicitly provide one

f <- function(x = ls()) {
a <- 1
x
}

# ls() evaluated inside f:
f()
## [1] "a" "x"
# ls() evaluated in global environment:
f(ls())
##  [8] "func"    "M"       "my_len"  "my_sum"  "summary" "x"

More technically, an unevaluated argument is called a promise, or (less commonly) a thunk. A promise is made up of two parts:

1. the expression which gives rise to the delayed computation. It can be accessed with substitute()
1. the environment where the expression was created and where it should be evaluated

The first time a promise is accessed the expression is evaluated in the environment where it was created. This value is cached, so that subsequent access to the evaluated promise does not recompute the value (but the original expression is still associated with the value, so substitute can continue to access it). ref

substitute(expression(a + b), list(a = 1))
## expression(1 + b)

The special argument ... passes all non-matched args to the inner functions

f <- function(...) {
names(list(...))
}
f(a = 1, b = 2)
## [1] "a" "b"
f <- function(x, y, ...) {
g <- function(z, w=1) {
x*1000+y*100+z*10+w
}
}

f1 <- f(1,2,z=3)
f1(4)
## [1] 1241
f1(w=4,z=5)
## [1] 1254

Infix Functions: use %name% to enclose the function name

"%+%" <- function(a, b) paste(a, b, sep = "")
"new" %+% " string"
## [1] "new string"
%+%("new", " string") # alternative call
## [1] "new string"
+(1, 5)
## [1] 6
# use \ for special chars
"%/\\%" <- function(a, b) paste(a, b)
"a" %/\% "b"
## [1] "a b"

An eg that creates a Matlab-like DSL for matrix descriptions:

qm<-function(...)
{
# turn ... into string
args<-deparse(substitute(rbind(cbind(...))))

# create "rbind(cbind(.),cbind(.),.)" construct
args<-gsub("\\|","),cbind(",args)

# eval
eval(parse(text=args))
}

M<-N<-diag(2)

qm(M,c(4,5) | c(1,2),N | t(1:3))
##      [,1] [,2] [,3]
## [1,]    1    0    4
## [2,]    0    1    5
## [3,]    1    1    0
## [4,]    2    0    1
## [5,]    1    2    3

## Closures

An object is data with functions. A closure is a function with data. – John D Cook

# variable 'exponent'
power <- function(exponent) {
function(x) x ^ exponent
}

square <- power(2)
square(2)
## [1] 4
square(4)
## [1] 16
cube <- power(3)
cube(2)
## [1] 8
cube(4)
## [1] 64
as.list(environment(square)) # shows the closure's environment
## $exponent ## [1] 2 # Closures are useful for making function factories, missing_remover <- function(na) { function(x) { x[x == na] <- NA x } } remove_99 <- missing_remover(99) remove_99(c(99,100,101,99,98)) ## [1] NA 100 101 NA 98 remove_dot <- missing_remover(".") remove_dot(c(".","a",".","b")) ## [1] NA "a" NA "b" # And are one way to manage mutable state in R. new_counter <- function() { i <- 0 function() { i <<- i + 1 # operator '<<-' searches for 'i' in the parent environment i } } counter_one <- new_counter() counter_two <- new_counter() counter_one() ## [1] 1 counter_one() ## [1] 2 counter_two() ## [1] 1 as.list(environment(counter_one)) # check its mutable state ##$i
## [1] 2
as.list(environment(counter_two))
## $i ## [1] 1 ## Currying currying is the technique of transforming a function that takes multiple arguments (or a tuple of arguments) in such a way that it can be called as a chain of functions, each with a single argument (partial application) – Wikipedia # list of functions and currying #eg, mean functions compute_mean <- list( base = function(x) mean(x), sum = function(x) sum(x) / length(x), manual = function(x) { total <- 0 n <- length(x) for (i in seq_along(x)) { total <- total + x[i] / n } total } ) xs <- runif(1e5) system.time(compute_mean$base(xs))
##    user  system elapsed
##       0       0       0
system.time(compute_mean$sum(xs)) ## user system elapsed ## 0 0 0 system.time(compute_mean$manual(xs))
##    user  system elapsed
##    0.07    0.00    0.08
# or test all in one line
lapply(compute_mean, function(f) system.time(f(xs)))
## $base ## user system elapsed ## 0 0 0 ## ##$sum
##    user  system elapsed
##       0       0       0
##
## $manual ## user system elapsed ## 0.08 0.00 0.07 Map(function(f) system.time(f(xs)), compute_mean) ##$base
##    user  system elapsed
##       0       0       0
##
## $sum ## user system elapsed ## 0 0 0 ## ##$manual
##    user  system elapsed
##    0.08    0.00    0.08
# another way
call_fun <- function(f, ...) f(...)

timer <- function(f) {
force(f)  # force the evaluation of expression
function(...) system.time(f(...))
}

timers <- lapply(compute_mean, timer) # return a list of functions
lapply(timers, call_fun, xs)
## $base ## user system elapsed ## 0 0 0 ## ##$sum
##    user  system elapsed
##       0       0       0
##
## $manual ## user system elapsed ## 0.08 0.00 0.08 # implementation of currying: Curry <- function(FUN,...) { .orig <- list(...) function(...) { do.call(FUN, c(.orig, list(...))) } } add <- function(x, y) x + y addOne <- Curry(add, y = 1) addOne(4) # 5 ## [1] 5 # using curry in interesting ways: funs <- list( sum = sum, mean = mean, median = median ) # now turn that list elements, into functions that remove NAs funs2 <- lapply(funs, Curry, na.rm = TRUE) Package pryr implements currying with partial() # library(devtools) # install_github("pryr") library(pryr) ## ## Attaching package: 'pryr' ## ## The following object is masked _by_ '.GlobalEnv': ## ## f f <- function(x,y) 10*x+y f(5,6) ## [1] 56 f1 <- partial(f, x=5) f1(6) ## [1] 56 f2 <- partial(f, y=6) f2(5) ## [1] 56 ## Function Operators Function operators (FO) are functions that take one (or more) functions as input and return a function as output. we’ll explore four types of function operators (FOs): • Behavioural FOs. While leaving the function otherwise unchanged, this type can do things like automatically log when the function is run, ensure that a function is run only once, and delay the operation of a function. • Output FOs. This type can return different values depending on whether a function throws an error, or negates the result of a logical predicate. • Input FOs. This type can modify inputs like partially evaluating a function, convert a function that takes multiple arguments to one that takes a list, or automatically vectorise a function. • Combining FOs. This type can combine the results of predicate functions with boolean operators, or compose multiple function calls. Behavioural FOs leave the inputs and outputs of a function unchanged, but adds some extra behaviour. # add a delay to a function call: delay_by <- function(delay, f) { function(...) { Sys.sleep(delay) f(...) } } system.time(runif(100)) ## user system elapsed ## 0 0 0 system.time(delay_by(1, runif)(100)) ## user system elapsed ## 0.00 0.00 1.01 # add a dot every 10 processing units dot_every <- function(n, f) { i <- 1 function(...) { if (i %% n == 0) cat(".") i <<- i + 1 f(...) } } x <- lapply(1:100, runif) x <- lapply(1:100, dot_every(10, runif)) ## .......... ## Memoisation fib <- function(n) { if (n < 2) return(1) fib(n - 2) + fib(n - 1) } system.time(fib(28)) ## user system elapsed ## 1.63 0.00 1.64 ###### MEMOISE IT! library(memoise) fib2 <- memoise( function(n) { if (n < 2) return(1) fib2(n - 2) + fib2(n - 1) } ) system.time(fib2(28)) ## user system elapsed ## 0.02 0.00 0.05 ## Capturing function invocations One challenge with functionals is that it can be hard to see what’s going on inside. It’s not easy to pry open their internals like it is with a for loop. However, we can use FOs to help us. The tee function, defined below, has three arguments, all functions: f, the original function; on_input, a function that’s called with the inputs to f, and on_output a function that’s called with the output from f. ignore <- function(...) NULL tee <- function(f, on_input = ignore, on_output = ignore) { function(...) { input <- if (nargs() == 1) c(...) else list(...) on_input(input) output <- f(...) on_output(output) output } } g <- function(x) cos(x) - x uniroot(g, c(-5, 5)) ##$root
## [1] 0.7390853
##
## $f.root ## [1] -2.603993e-07 ## ##$iter
## [1] 6
##
## $init.it ## [1] NA ## ##$estim.prec
## [1] 6.103516e-05
uniroot(tee(g, on_input = print),  c(-5, 5))
## [1] -5
## [1] 5
## [1] 0.2836622
## [1] 0.8752034
## [1] 0.7229804
## [1] 0.7386309
## [1] 0.7390853
## [1] 0.7390243
## [1] 0.7390853
## $root ## [1] 0.7390853 ## ##$f.root
## [1] -2.603993e-07
##
## $iter ## [1] 6 ## ##$init.it
## [1] NA
##
## $estim.prec ## [1] 6.103516e-05 uniroot(tee(g, on_output = print), c(-5, 5)) ## [1] 5.283662 ## [1] -4.716338 ## [1] 0.6763747 ## [1] -0.2343627 ## [1] 0.02685676 ## [1] 0.0007601196 ## [1] -2.603993e-07 ## [1] 0.0001018874 ## [1] -2.603993e-07 ##$root
## [1] 0.7390853
##
## $f.root ## [1] -2.603993e-07 ## ##$iter
## [1] 6
##
## $init.it ## [1] NA ## ##$estim.prec
## [1] 6.103516e-05

## Output FOs

How to modify the output of a function.

Negate <- function(f) { # Negates the function output
function(...) !f(...)
}

(Negate(is.null))(NULL)
## [1] FALSE
# removes all null elements from a list
compact <- function(x) Filter(Negate(is.null), x)

compact(c(NULL,3,3))
## [1] 3 3
# failwith() turns a function that throws an error into a function that returns a default value when there's an error

failwith <- function(default = NULL, f, quiet = TRUE) {
function(...) {
out <- default
try(out <- f(...), silent = quiet) # silent a True does not show error msg
out
}
}
log("a")
## Error in log("a"): non-numeric argument to mathematical function
failwith(NA, log)("a")
## [1] NA

Function composition

An important way of combining functions is through composition: f(g(x)).

compose <- function(f, g) {
function(...) f(g(...))
}

"%.%" <- compose

sqrt(3*4)
## [1] 3.464102
(sqrt %.% *)(3,4)
## [1] 3.464102
#  function operators that combine logical predicates:

and <- function(f1, f2) {
function(...) {
f1(...) && f2(...)
}
}
or <- function(f1, f2) {
function(...) {
f1(...) || f2(...)
}
}
not <- function(f1) {
function(...) {
!f1(...)
}
}

# So something like:
data <- Filter(function(x) is.character(x) || is.factor(x), iris)
# becomes
data <- Filter(or(is.character, is.factor), iris)

gs$sum(1:10) # bug, it's returning the minimum ## [1] 55 environment(gs$sum)$f ## function (..., na.rm = FALSE) .Primitive("sum") It doesn’t work well with lapply() because f is lazily evaluated. This means that if you give lapply() a list of functions and a FO to apply those functions, it will look like it repeatedly applied the last function. Another problem is that as designed, we have to pass a function object, rather than the name of a function, which is often more convenient. We can solve both problems by using match.fun(): it forces evaluation of f, and will find the function object if given its name: wrap2 <- function(f) { f <- match.fun(f) function(...) f(...) } fs <- c(sum = "sum", mean = "mean", min = "min") hs <- lapply(fs, wrap2) hs$sum(1:10)
environment(hs$sum)$f