Memoization is the ability to cache the results of previous function invocations in order to save time and space resources.

The classical eg is the naïve recursive computation of the Fibonacci sequence:

# pre: n>=0
fib <- function(n) {
if (n<2)
return(1)

return (fib(n-1)+fib(n-2))
}

system.time(fib(25))
##    user  system elapsed
##    0.27    0.00    0.27
system.time(fib(30))
##    user  system elapsed
##    2.83    0.00    2.87

The problem is that the same arguments are computed again and again. If we were able to keep the intermediate results, the computation would be much faster:

n <- 101
results <- rep(NA,n)  # intermediate results

fib2 <- function(n, results) {
return (results[n+1])

if (n<2) {
eval.parent(substitute(results[n+1] <- 1))  # needed: R does not have call by reference
} else {
eval.parent(substitute(results[n+1] <- fib2(n-1, results) + fib2(n-2, results)))
}

return (results[n+1])
}

system.time(fib2(25, results))
##    user  system elapsed
##       0       0       0
system.time(fib2(30, results))
##    user  system elapsed
##       0       0       0
system.time(fib2(100, results))
##    user  system elapsed
##       0       0       0

There is a R package useful to memoize functions:

library(memoise)
## Warning: package 'memoise' was built under R version 3.1.2

There are just three functions:

• memoise – memoise a function

• forget – resets the cache of a memoised function

• is.memoised – checks if a function is memoised

a <- function(n) { runif(n) }

memA <- memoise(a)

replicate(5,    a(2))
##        [,1]   [,2]    [,3]   [,4]    [,5]
## [1,] 0.5727 0.9778 0.06922 0.6043 0.77005
## [2,] 0.2082 0.6593 0.82466 0.4097 0.05769
replicate(5, memA(2))
##         [,1]    [,2]    [,3]    [,4]    [,5]
## [1,] 0.66333 0.66333 0.66333 0.66333 0.66333
## [2,] 0.01798 0.01798 0.01798 0.01798 0.01798

Notice, however, that it does not work that well with recursivity:

fibM <- memoize(fib)

system.time(fibM(25))
##    user  system elapsed
##    0.26    0.00    0.27
system.time(fibM(30))
##    user  system elapsed
##    2.87    0.00    2.87
system.time(fibM(33))
##    user  system elapsed
##   12.22    0.00   12.26

In this post there’s an alternative solution:

fibM <- (function() {

# The code here related to the cache *mostly* comes from the memoise
# package's object new_cache.

cache <- NULL

cache_reset <- function() {
cache <<- new.env(TRUE, emptyenv())
cache_set('0', 0)
cache_set('1', 1)
}

cache_set <- function(key, value) {
assign(key, value, envir = cache)
}

cache_get <- function(key) {
get(key, envir = cache, inherits = FALSE)
}

cache_has_key <- function(key) {
exists(key, envir = cache, inherits = FALSE)
}

cache_reset() # Initialize the cache

# This is the function that gets returned by the anonymous function and
# becomes fibM.
function(n) {

nc <- as.character(n)

# Handle "vectors" by element
if (length(n) > 1) {
return(sapply(n, fibM))
}

# Cached cases
if (cache_has_key(nc))
return(cache_get(nc))

out <- fibM(n - 1) + fibM(n - 2)
cache_set(nc, out)
return(out)
}
})()

Let’s use it:

ls(environment(fibM)$cache) # current environment (only base values are computed) ## [1] "0" "1" fibM(30) ## [1] 832040 ls(environment(fibM)$cache)
##  [1] "0"  "1"  "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "2"  "20"
## [15] "21" "22" "23" "24" "25" "26" "27" "28" "29" "3"  "30" "4"  "5"  "6"
## [29] "7"  "8"  "9"
system.time(fibM(33))
##    user  system elapsed
##       0       0       0