Memoization is the ability to cache the results of previous function invocations in order to save time and space resources.

The classical eg is the naïve recursive computation of the Fibonacci sequence:

# pre: n>=0
fib <- function(n) {
  if (n<2) 
    return(1)
  
  return (fib(n-1)+fib(n-2))
}

system.time(fib(25))
##    user  system elapsed 
##    0.27    0.00    0.27
system.time(fib(30))
##    user  system elapsed 
##    2.83    0.00    2.87

The problem is that the same arguments are computed again and again. If we were able to keep the intermediate results, the computation would be much faster:

n <- 101
results <- rep(NA,n)  # intermediate results

fib2 <- function(n, results) {
  if (!is.na(results[n+1]))  # answer already known
    return (results[n+1])
  
  if (n<2) {
    eval.parent(substitute(results[n+1] <- 1))  # needed: R does not have call by reference
  } else {
    eval.parent(substitute(results[n+1] <- fib2(n-1, results) + fib2(n-2, results)))
  }
  
  return (results[n+1])
}

system.time(fib2(25, results))
##    user  system elapsed 
##       0       0       0
system.time(fib2(30, results))
##    user  system elapsed 
##       0       0       0
system.time(fib2(100, results))
##    user  system elapsed 
##       0       0       0

There is a R package useful to memoize functions:

library(memoise)
## Warning: package 'memoise' was built under R version 3.1.2

There are just three functions:

a <- function(n) { runif(n) }

memA <- memoise(a)

replicate(5,    a(2))
##        [,1]   [,2]    [,3]   [,4]    [,5]
## [1,] 0.5727 0.9778 0.06922 0.6043 0.77005
## [2,] 0.2082 0.6593 0.82466 0.4097 0.05769
replicate(5, memA(2))
##         [,1]    [,2]    [,3]    [,4]    [,5]
## [1,] 0.66333 0.66333 0.66333 0.66333 0.66333
## [2,] 0.01798 0.01798 0.01798 0.01798 0.01798

Notice, however, that it does not work that well with recursivity:

fibM <- memoize(fib)

system.time(fibM(25))
##    user  system elapsed 
##    0.26    0.00    0.27
system.time(fibM(30))
##    user  system elapsed 
##    2.87    0.00    2.87
system.time(fibM(33))
##    user  system elapsed 
##   12.22    0.00   12.26

In this post there’s an alternative solution:

fibM <- (function() {
 
    # The code here related to the cache *mostly* comes from the memoise
    # package's object new_cache.
 
    cache <- NULL
 
    cache_reset <- function() {
        cache <<- new.env(TRUE, emptyenv())
        cache_set('0', 0)
        cache_set('1', 1)
    }
 
    cache_set <- function(key, value) {
        assign(key, value, envir = cache)
    }
 
    cache_get <- function(key) {
        get(key, envir = cache, inherits = FALSE)
    }
 
    cache_has_key <- function(key) {
        exists(key, envir = cache, inherits = FALSE)
    }
 
    cache_reset() # Initialize the cache
  
    # This is the function that gets returned by the anonymous function and
    # becomes fibM.
    function(n) {
 
        nc <- as.character(n)
 
        # Handle "vectors" by element
        if (length(n) > 1) {
            return(sapply(n, fibM))
        }
 
        # Cached cases
        if (cache_has_key(nc)) 
            return(cache_get(nc))
 
        out <- fibM(n - 1) + fibM(n - 2)
        cache_set(nc, out)
        return(out)
    }
})()

Let’s use it:

ls(environment(fibM)$cache) # current environment (only base values are computed)
## [1] "0" "1"
fibM(30)
## [1] 832040
ls(environment(fibM)$cache)
##  [1] "0"  "1"  "10" "11" "12" "13" "14" "15" "16" "17" "18" "19" "2"  "20"
## [15] "21" "22" "23" "24" "25" "26" "27" "28" "29" "3"  "30" "4"  "5"  "6" 
## [29] "7"  "8"  "9"
system.time(fibM(33))
##    user  system elapsed 
##       0       0       0