Combine memoization and tail

Is it possible to combine memoization and tail-recursion somehow? I'm learning F# at the moment and understand both concepts but can't seem to combine them.

Suppose I have the following memoize function (from Real-World Functional Programming):

let memoize f = let cache = new Dictionary<_, _>()
                (fun x -> match cache.TryGetValue(x) with
                          | true, y -> y
                          | _       -> let v = f(x)
                                       cache.Add(x, v)
                                       v)

and the following factorial function:

let rec factorial(x) = if (x = 0) then 1 else x * factorial(x - 1)

Memoizing factorial isn't too difficult and making it tail-recursive isn't either:

let rec memoizedFactorial =
  memoize (fun x -> if (x = 0) then 1 else x * memoizedFactorial(x - 1))

let tailRecursiveFactorial(x) =
  let rec factorialUtil(x, res) = if (x = 0)
                                  then res
                                  else let newRes = x * res
                                       factorialUtil(x - 1, newRes)
  factorialUtil(x, 1)

But can you combine memoization and tail-recursion? I made some attempts but can't seem to get it working. Or is this simply not possible?


As always, continuations yield an elegant tailcall solution:

open System.Collections.Generic 

let cache = Dictionary<_,_>()  // TODO move inside 
let memoizedTRFactorial =
    let rec fac n k =  // must make tailcalls to k
        match cache.TryGetValue(n) with
        | true, r -> k r
        | _ -> 
            if n=0 then
                k 1
            else
                fac (n-1) (fun r1 ->
                    printfn "multiplying by %d" n  //***
                    let r = r1 * n
                    cache.Add(n,r)
                    k r)
    fun n -> fac n id

printfn "---"
let r = memoizedTRFactorial 4
printfn "%d" r
for KeyValue(k,v) in cache do
    printfn "%d: %d" k v

printfn "---"
let r2 = memoizedTRFactorial 5
printfn "%d" r2

printfn "---"

// comment out *** line, then run this
//let r3 = memoizedTRFactorial 100000
//printfn "%d" r3

There are two kinds of tests. First, this demos that calling F(4) caches F(4), F(3), F(2), F(1) as you would like.

Then, comment out the *** printf and uncomment the final test (and compile in Release mode) to show that it does not StackOverflow (it uses tailcalls correctly).

Perhaps I'll generalize out 'memoize' and demonstrate it on 'fib' next...

EDIT

Ok, here's the next step, I think, decoupling memoization from factorial:

open System.Collections.Generic 

let cache = Dictionary<_,_>()  // TODO move inside 
let memoize fGuts n =
    let rec newFunc n k =  // must make tailcalls to k
        match cache.TryGetValue(n) with
        | true, r -> k r
        | _ -> 
            fGuts n (fun r ->
                        cache.Add(n,r)
                        k r) newFunc
    newFunc n id 
let TRFactorialGuts n k memoGuts =
    if n=0 then
        k 1
    else
        memoGuts (n-1) (fun r1 ->
            printfn "multiplying by %d" n  //***
            let r = r1 * n
            k r) 

let memoizedTRFactorial = memoize TRFactorialGuts 

printfn "---"
let r = memoizedTRFactorial 4
printfn "%d" r
for KeyValue(k,v) in cache do
    printfn "%d: %d" k v

printfn "---"
let r2 = memoizedTRFactorial 5
printfn "%d" r2

printfn "---"

// comment out *** line, then run this
//let r3 = memoizedTRFactorial 100000
//printfn "%d" r3

EDIT

Ok, here's a fully generalized version that seems to work.

open System.Collections.Generic 

let memoize fGuts =
    let cache = Dictionary<_,_>()
    let rec newFunc n k =  // must make tailcalls to k
        match cache.TryGetValue(n) with
        | true, r -> k r
        | _ -> 
            fGuts n (fun r ->
                        cache.Add(n,r)
                        k r) newFunc
    cache, (fun n -> newFunc n id)
let TRFactorialGuts n k memoGuts =
    if n=0 then
        k 1
    else
        memoGuts (n-1) (fun r1 ->
            printfn "multiplying by %d" n  //***
            let r = r1 * n
            k r) 

let facCache,memoizedTRFactorial = memoize TRFactorialGuts 

printfn "---"
let r = memoizedTRFactorial 4
printfn "%d" r
for KeyValue(k,v) in facCache do
    printfn "%d: %d" k v

printfn "---"
let r2 = memoizedTRFactorial 5
printfn "%d" r2

printfn "---"

// comment out *** line, then run this
//let r3 = memoizedTRFactorial 100000
//printfn "%d" r3

let TRFibGuts n k memoGuts =
    if n=0 || n=1 then
        k 1
    else
        memoGuts (n-1) (fun r1 ->
            memoGuts (n-2) (fun r2 ->
                printfn "adding %d+%d" r1 r2 //%%%
                let r = r1+r2
                k r)) 
let fibCache, memoizedTRFib = memoize TRFibGuts 
printfn "---"
let r5 = memoizedTRFib 4
printfn "%d" r5
for KeyValue(k,v) in fibCache do
    printfn "%d: %d" k v

printfn "---"
let r6 = memoizedTRFib 5
printfn "%d" r6

printfn "---"

// comment out %%% line, then run this
//let r7 = memoizedTRFib 100000
//printfn "%d" r7

The predicament of memoizing tail-recursive functions is, of course, that when tail-recursive function

let f x = 
   ......
   f x1

calls itself, it is not allowed to do anything with a result of the recursive call, including putting it into cache. Tricky; so what can we do?

The critical insight here is that since the recursive function is not allowed to do anything with a result of recursive call, the result for all arguments to recursive calls will be the same! Therefore if recursion call trace is this

f x0 -> f x1 -> f x2 -> f x3 -> ... -> f xN -> res

then for all x in x0,x1,...,xN the result of fx will be the same, namely res. So the last invocation of a recursive function, the non-recursive call, knows the results for all the previous values - it is in a position to cache them. The only thing you need to do is to pass a list of visited values to it. Here is what it might look for factorial:

let cache = Dictionary<_,_>()

let rec fact0 l ((n,res) as arg) = 
    let commitToCache r = 
        l |> List.iter  (fun a -> cache.Add(a,r))
    match cache.TryGetValue(arg) with
    |   true, cachedResult -> commitToCache cachedResult; cachedResult
    |   false, _ ->
            if n = 1 then
                commitToCache res
                cache.Add(arg, res)
                res
            else
                fact0 (arg::l) (n-1, n*res)

let fact n = fact0 [] (n,1)

But wait! Look - l parameter of fact0 contains all the arguments to recursive calls to fact0 - just like the stack would in a non-tail-recursive version! That is exactly right. Any non-tail recursive algorithm can be converted to a tail-recursive one by moving the "list of stack frames" from stack to heap and converting the "postprocessing" of recursive call result into a walk over that data structure.

Pragmatic note: The factorial example above illustrates a general technique. It is quite useless as is - for factorial function it is quite enough to cache the top-level fact n result, because calculation of fact n for a particular n only hits a unique series of (n,res) pairs of arguments to fact0 - if (n,1) is not cached yet, then none of the pairs fact0 is going to be called on are.

Note that in this example, when we went from non-tail-recursive factorial to a tail-recursive factorial, we exploited the fact that multiplication is associative and commutative - tail-recursive factorial execute a different set of multiplications than a non-tail-recursive one.

In fact, a general technique exists for going from non-tail-recursive to tail-recursive algorithm, which yields an algorithm equivalent to a tee. This technique is called "continuatuion-passing transformation". Going that route, you can take a non-tail-recursive memoizing factorial and get a tail-recursive memoizing factorial by pretty much a mechanical transformation. See Brian's answer for exposition of this method.


I'm not sure if there's a simpler way to do this, but one approach would be to create a memoizing y-combinator:

let memoY f =
  let cache = Dictionary<_,_>()
  let rec fn x =
    match cache.TryGetValue(x) with
    | true,y -> y
    | _ -> let v = f fn x
           cache.Add(x,v)
           v
  fn

Then, you can use this combinator in lieu of "let rec", with the first argument representing the function to call recursively:

let tailRecFact =
  let factHelper fact (x, res) = 
    printfn "%i,%i" x res
    if x = 0 then res 
    else fact (x-1, x*res)
  let memoized = memoY factHelper
  fun x -> memoized (x,1)

EDIT

As Mitya pointed out, memoY doesn't preserve the tail recursive properties of the memoee. Here's a revised combinator which uses exceptions and mutable state to memoize any recursive function without overflowing the stack (even if the original function is not itself tail recursive!):

let memoY f =
  let cache = Dictionary<_,_>()
  fun x ->
    let l = ResizeArray([x])
    while l.Count <> 0 do
      let v = l.[l.Count - 1]
      if cache.ContainsKey(v) then l.RemoveAt(l.Count - 1)
      else
        try
          cache.[v] <- f (fun x -> 
            if cache.ContainsKey(x) then cache.[x] 
            else 
              l.Add(x)
              failwith "Need to recurse") v
        with _ -> ()
    cache.[x]

Unfortunately, the machinery which is inserted into each recursive call is somewhat heavy, so performance on un-memoized inputs requiring deep recursion can be a bit slow. However, compared to some other solutions, this has the benefit that it requires fairly minimal changes to the natural expression of recursive functions:

let fib = memoY (fun fib n -> 
  printfn "%i" n; 
  if n <= 1 then n 
  else (fib (n-1)) + (fib (n-2)))

let _ = fib 5000

EDIT

I'll expand a bit on how this compares to other solutions. This technique takes advantage of the fact that exceptions provide a side channel: a function of type 'a -> 'b doesn't actually need to return a value of type 'b , but can instead exit via an exception. We wouldn't need to use exceptions if the return type explicitly contained an additional value indicating failure. Of course, we could use the 'b option as the return type of the function for this purpose. This would lead to the following memoizing combinator:

let memoO f =
  let cache = Dictionary<_,_>()
  fun x ->
    let l = ResizeArray([x])
    while l.Count <> 0 do
      let v = l.[l.Count - 1]
      if cache.ContainsKey v then l.RemoveAt(l.Count - 1)
      else
        match f(fun x -> if cache.ContainsKey x then Some(cache.[x]) else l.Add(x); None) v with
        | Some(r) -> cache.[v] <- r; 
        | None -> ()
    cache.[x]

Previously, our memoization process looked like:

fun fib n -> 
  printfn "%i" n; 
  if n <= 1 then n 
  else (fib (n-1)) + (fib (n-2))
|> memoY

Now, we need to incorporate the fact that fib should return an int option instead of an int . Given a suitable workflow for option types, this could be written as follows:

fun fib n -> option {
  printfn "%i" n
  if n <= 1 then return n
  else
    let! x = fib (n-1)
    let! y = fib (n-2)
    return x + y
} |> memoO

However, if we're willing to change the return type of the first parameter (from int to int option in this case), we may as well go all the way and just use continuations in the return type instead, as in Brian's solution. Here's a variation on his definitions:

let memoC f =
  let cache = Dictionary<_,_>()
  let rec fn n k =
    match cache.TryGetValue(n) with
    | true, r -> k r
    | _ -> 
        f fn n (fun r ->
          cache.Add(n,r)
          k r)
  fun n -> fn n id

And again, if we have a suitable computation expression for building CPS functions, we can define our recursive function like this:

fun fib n -> cps {
  printfn "%i" n
  if n <= 1 then return n
  else
    let! x = fib (n-1)
    let! y = fib (n-2)
    return x + y
} |> memoC

This is exactly the same as what Brian has done, but I find the syntax here is easier to follow. To make this work, all we need are the following two definitions:

type CpsBuilder() =
  member this.Return x k = k x
  member this.Bind(m,f) k = m (fun a -> f a k)

let cps = CpsBuilder()
链接地址: http://www.djcxy.com/p/14134.html

上一篇: F#中的慢尾递归

下一篇: 结合记忆和尾巴