(**************************************************************************) (* *) (* Menhir *) (* *) (* François Pottier and Yann Régis-Gianas, INRIA Rocquencourt *) (* *) (* Copyright 2005 Institut National de Recherche en Informatique et *) (* en Automatique. All rights reserved. This file is distributed *) (* under the terms of the Q Public License version 1.0, with the *) (* change described in file LICENSE. *) (* *) (**************************************************************************) (* This is an implementation of Patricia trees, following Chris Okasaki's paper at the 1998 ML Workshop in Baltimore. Both big-endian and little-endian trees are provided. Both sets and maps are implemented on top of Patricia trees. *) (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Little-endian vs big-endian trees} *) (* A tree is little-endian if it expects the key's least significant bits to be tested first during a search. It is big-endian if it expects the key's most significant bits to be tested first. Most of the code is independent of this design choice, so it is written as a functor, parameterized by a small structure which defines endianness. Here is the interface which must be adhered to by such a structure. *) module Endianness = struct module type S = sig (* A mask is an integer with a single one bit (i.e. a power of 2). *) type mask = int (* [branching_bit] accepts two distinct integers and returns a mask which identifies the first bit where they differ. The meaning of ``first'' varies according to the endianness being implemented. *) val branching_bit: int -> int -> mask (* [mask i m] returns an integer [i'], where all bits which [m] says are relevant are identical to those in [i], and all others are set to some unspecified, but fixed value. Which bits are ``relevant'' according to a given mask varies according to the endianness being implemented. *) val mask: int -> mask -> int (* [shorter m1 m2] returns [true] if and only if [m1] describes a shorter prefix than [m2], i.e. if it makes fewer bits relevant. Which bits are ``relevant'' according to a given mask varies according to the endianness being implemented. *) val shorter: mask -> mask -> bool end (* Now, let us define [Little] and [Big], two possible [Endiannness] choices. *) module Little = struct type mask = int let lowest_bit x = x land (-x) (* Performing a logical ``xor'' of [i0] and [i1] yields a bit field where all differences between [i0] and [i1] show up as one bits. (There must be at least one, since [i0] and [i1] are distinct.) The ``first'' one is the lowest bit in this bit field, since we are checking least significant bits first. *) let branching_bit i0 i1 = lowest_bit (i0 lxor i1) (* The ``relevant'' bits in an integer [i] are those which are found (strictly) to the right of the single one bit in the mask [m]. We keep these bits, and set all others to 0. *) let mask i m = i land (m-1) (* The smaller [m] is, the fewer bits are relevant. *) let shorter = (<) end module Big = struct type mask = int let lowest_bit x = x land (-x) let rec highest_bit x = let m = lowest_bit x in if x = m then m else highest_bit (x - m) (* Performing a logical ``xor'' of [i0] and [i1] yields a bit field where all differences between [i0] and [i1] show up as one bits. (There must be at least one, since [i0] and [i1] are distinct.) The ``first'' one is the highest bit in this bit field, since we are checking most significant bits first. In Okasaki's paper, this loop is sped up by computing a conservative initial guess. Indeed, the bit at which the two prefixes disagree must be somewhere within the shorter prefix, so we can begin searching at the least-significant valid bit in the shorter prefix. Unfortunately, to allow computing the initial guess, the main code has to pass in additional parameters, e.g. a mask which describes the length of each prefix. This ``pollutes'' the endianness-independent code. For this reason, this optimization isn't implemented here. *) let branching_bit i0 i1 = highest_bit (i0 lxor i1) (* The ``relevant'' bits in an integer [i] are those which are found (strictly) to the left of the single one bit in the mask [m]. We keep these bits, and set all others to 0. Okasaki uses a different convention, which allows big-endian Patricia trees to masquerade as binary search trees. This feature does not seem to be useful here. *) let mask i m = i land (lnot (2*m-1)) (* The smaller [m] is, the more bits are relevant. *) let shorter = (>) end end (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Patricia-tree-based maps} *) module Make (X : Endianness.S) = struct (* Patricia trees are maps whose keys are integers. *) type key = int (* A tree is either empty, or a leaf node, containing both the integer key and a piece of data, or a binary node. Each binary node carries two integers. The first one is the longest common prefix of all keys in this sub-tree. The second integer is the branching bit. It is an integer with a single one bit (i.e. a power of 2), which describes the bit being tested at this node. *) type 'a t = | Empty | Leaf of int * 'a | Branch of int * X.mask * 'a t * 'a t (* The empty map. *) let empty = Empty (* [choose m] returns an arbitrarily chosen binding in [m], if [m] is nonempty, and raises [Not_found] otherwise. *) let rec choose = function | Empty -> raise Not_found | Leaf (key, data) -> key, data | Branch (_, _, tree0, _) -> choose tree0 (* [lookup k m] looks up the value associated to the key [k] in the map [m], and raises [Not_found] if no value is bound to [k]. This implementation takes branches \emph{without} checking whether the key matches the prefix found at the current node. This means that a query for a non-existent key shall be detected only when finally reaching a leaf, rather than higher up in the tree. This strategy is better when (most) queries are expected to be successful. *) let rec lookup key = function | Empty -> raise Not_found | Leaf (key', data) -> if key = key' then data else raise Not_found | Branch (_, mask, tree0, tree1) -> lookup key (if (key land mask) = 0 then tree0 else tree1) let find = lookup (* [mem k m] tells whether the key [k] appears in the domain of the map [m]. *) let mem k m = try let _ = lookup k m in true with Not_found -> false (* The auxiliary function [join] merges two trees in the simple case where their prefixes disagree. Assume $t_0$ and $t_1$ are non-empty trees, with longest common prefixes $p_0$ and $p_1$, respectively. Further, suppose that $p_0$ and $p_1$ disagree, that is, neither prefix is contained in the other. Then, no matter how large $t_0$ and $t_1$ are, we can merge them simply by creating a new [Branch] node that has $t_0$ and $t_1$ as children! *) let join p0 t0 p1 t1 = let m = X.branching_bit p0 p1 in let p = X.mask p0 (* for instance *) m in if (p0 land m) = 0 then Branch(p, m, t0, t1) else Branch(p, m, t1, t0) (* The auxiliary function [match_prefix] tells whether a given key has a given prefix. More specifically, [match_prefix k p m] returns [true] if and only if the key [k] has prefix [p] up to bit [m]. Throughout our implementation of Patricia trees, prefixes are assumed to be in normal form, i.e. their irrelevant bits are set to some predictable value. Formally, we assume [X.mask p m] equals [p] whenever [p] is a prefix with [m] relevant bits. This allows implementing [match_prefix] using only one call to [X.mask]. On the other hand, this requires normalizing prefixes, as done e.g. in [join] above, where [X.mask p0 m] has to be used instead of [p0]. *) let match_prefix k p m = X.mask k m = p (* [fine_add decide k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding from [k] to [d0] already exists, then the resulting map contains a binding from [k] to [decide d0 d]. *) type 'a decision = 'a -> 'a -> 'a exception Unchanged let basic_add decide k d m = let rec add t = match t with | Empty -> Leaf (k, d) | Leaf (k0, d0) -> if k = k0 then let d' = decide d0 d in if d' == d0 then raise Unchanged else Leaf (k, d') else join k (Leaf (k, d)) k0 t | Branch (p, m, t0, t1) -> if match_prefix k p m then if (k land m) = 0 then Branch (p, m, add t0, t1) else Branch (p, m, t0, add t1) else join k (Leaf (k, d)) p t in add m let strict_add k d m = basic_add (fun _ _ -> raise Unchanged) k d m let fine_add decide k d m = try basic_add decide k d m with Unchanged -> m (* [add k d m] returns a map whose bindings are all bindings in [m], plus a binding of the key [k] to the datum [d]. If a binding already exists for [k], it is overridden. *) let add k d m = fine_add (fun old_binding new_binding -> new_binding) k d m (* [singleton k d] returns a map whose only binding is from [k] to [d]. *) let singleton k d = Leaf (k, d) (* [is_singleton m] returns [Some (k, d)] if [m] is a singleton map that maps [k] to [d]. Otherwise, it returns [None]. *) let is_singleton = function | Leaf (k, d) -> Some (k, d) | Empty | Branch _ -> None (* [is_empty m] returns [true] if and only if the map [m] defines no bindings at all. *) let is_empty = function | Empty -> true | Leaf _ | Branch _ -> false (* [cardinal m] returns [m]'s cardinal, that is, the number of keys it binds, or, in other words, its domain's cardinal. *) let rec cardinal = function | Empty -> 0 | Leaf _ -> 1 | Branch (_, _, t0, t1) -> cardinal t0 + cardinal t1 (* [remove k m] returns the map [m] deprived from any binding involving [k]. *) let remove key m = let rec remove = function | Empty -> raise Not_found | Leaf (key', _) -> if key = key' then Empty else raise Not_found | Branch (prefix, mask, tree0, tree1) -> if (key land mask) = 0 then match remove tree0 with | Empty -> tree1 | tree0 -> Branch (prefix, mask, tree0, tree1) else match remove tree1 with | Empty -> tree0 | tree1 -> Branch (prefix, mask, tree0, tree1) in try remove m with Not_found -> m (* [lookup_and_remove k m] looks up the value [v] associated to the key [k] in the map [m], and raises [Not_found] if no value is bound to [k]. The call returns the value [v], together with the map [m] deprived from the binding from [k] to [v]. *) let rec lookup_and_remove key = function | Empty -> raise Not_found | Leaf (key', data) -> if key = key' then data, Empty else raise Not_found | Branch (prefix, mask, tree0, tree1) -> if (key land mask) = 0 then match lookup_and_remove key tree0 with | data, Empty -> data, tree1 | data, tree0 -> data, Branch (prefix, mask, tree0, tree1) else match lookup_and_remove key tree1 with | data, Empty -> data, tree0 | data, tree1 -> data, Branch (prefix, mask, tree0, tree1) let find_and_remove = lookup_and_remove (* [fine_union decide m1 m2] returns the union of the maps [m1] and [m2]. If a key [k] is bound to [x1] (resp. [x2]) within [m1] (resp. [m2]), then [decide] is called. It is passed [x1] and [x2], and must return the value which shall be bound to [k] in the final map. The operation returns [m2] itself (as opposed to a copy of it) when its result is equal to [m2]. *) let reverse decision elem1 elem2 = decision elem2 elem1 let fine_union decide m1 m2 = let rec union s t = match s, t with | Empty, _ -> t | (Leaf _ | Branch _), Empty -> s | Leaf(key, value), _ -> fine_add (reverse decide) key value t | Branch _, Leaf(key, value) -> fine_add decide key value s | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) & (m = n) then (* The trees have the same prefix. Merge their sub-trees. *) let u0 = union s0 t0 and u1 = union s1 t1 in if t0 == u0 && t1 == u1 then t else Branch(p, m, u0, u1) else if (X.shorter m n) & (match_prefix q p m) then (* [q] contains [p]. Merge [t] with a sub-tree of [s]. *) if (q land m) = 0 then Branch(p, m, union s0 t, s1) else Branch(p, m, s0, union s1 t) else if (X.shorter n m) & (match_prefix p q n) then (* [p] contains [q]. Merge [s] with a sub-tree of [t]. *) if (p land n) = 0 then let u0 = union s t0 in if t0 == u0 then t else Branch(q, n, u0, t1) else let u1 = union s t1 in if t1 == u1 then t else Branch(q, n, t0, u1) else (* The prefixes disagree. *) join p s q t in union m1 m2 (* [union m1 m2] returns the union of the maps [m1] and [m2]. Bindings in [m2] take precedence over those in [m1]. *) let union m1 m2 = fine_union (fun d d' -> d') m1 m2 (* [iter f m] invokes [f k x], in turn, for each binding from key [k] to element [x] in the map [m]. Keys are presented to [f] according to some unspecified, but fixed, order. *) let rec iter f = function | Empty -> () | Leaf (key, data) -> f key data | Branch (_, _, tree0, tree1) -> iter f tree0; iter f tree1 (* [fold f m seed] invokes [f k d accu], in turn, for each binding from key [k] to datum [d] in the map [m]. Keys are presented to [f] in increasing order according to the map's ordering. The initial value of [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final value of [accu]. *) let rec fold f m accu = match m with | Empty -> accu | Leaf (key, data) -> f key data accu | Branch (_, _, tree0, tree1) -> fold f tree1 (fold f tree0 accu) (* [fold_rev] performs exactly the same job as [fold], but presents keys to [f] in the opposite order. *) let rec fold_rev f m accu = match m with | Empty -> accu | Leaf (key, data) -> f key data accu | Branch (_, _, tree0, tree1) -> fold_rev f tree0 (fold_rev f tree1 accu) (* It is valid to evaluate [iter2 f m1 m2] if and only if [m1] and [m2] have the same domain. Doing so invokes [f k x1 x2], in turn, for each key [k] bound to [x1] in [m1] and to [x2] in [m2]. Bindings are presented to [f] according to some unspecified, but fixed, order. *) let rec iter2 f t1 t2 = match t1, t2 with | Empty, Empty -> () | Leaf (key1, data1), Leaf (key2, data2) -> assert (key1 = key2); f key1 (* for instance *) data1 data2 | Branch (p1, m1, left1, right1), Branch (p2, m2, left2, right2) -> assert (p1 = p2); assert (m1 = m2); iter2 f left1 left2; iter2 f right1 right2 | _, _ -> assert false (* [map f m] returns the map obtained by composing the map [m] with the function [f]; that is, the map $k\mapsto f(m(k))$. *) let rec map f = function | Empty -> Empty | Leaf (key, data) -> Leaf(key, f data) | Branch (p, m, tree0, tree1) -> Branch (p, m, map f tree0, map f tree1) (* [endo_map] is similar to [map], but attempts to physically share its result with its input. This saves memory when [f] is the identity function. *) let rec endo_map f tree = match tree with | Empty -> tree | Leaf (key, data) -> let data' = f data in if data == data' then tree else Leaf(key, data') | Branch (p, m, tree0, tree1) -> let tree0' = endo_map f tree0 in let tree1' = endo_map f tree1 in if (tree0' == tree0) & (tree1' == tree1) then tree else Branch (p, m, tree0', tree1') (* [iterator m] returns a stateful iterator over the map [m]. *) (* TEMPORARY performance could be improved, see JCF's paper *) let iterator m = let remainder = ref [ m ] in let rec next () = match !remainder with | [] -> None | Empty :: parent -> remainder := parent; next() | (Leaf (key, data)) :: parent -> remainder := parent; Some (key, data) | (Branch(_, _, s0, s1)) :: parent -> remainder := s0 :: s1 :: parent; next () in next (* If [dcompare] is an ordering over data, then [compare dcompare] is an ordering over maps. *) exception Got of int let compare dcompare m1 m2 = let iterator2 = iterator m2 in try iter (fun key1 data1 -> match iterator2() with | None -> raise (Got 1) | Some (key2, data2) -> let c = Pervasives.compare key1 key2 in if c <> 0 then raise (Got c) else let c = dcompare data1 data2 in if c <> 0 then raise (Got c) ) m1; match iterator2() with | None -> 0 | Some _ -> -1 with Got c -> c (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Patricia-tree-based sets} *) (* To enhance code sharing, it would be possible to implement maps as sets of pairs, or (vice-versa) to implement sets as maps to the unit element. However, both possibilities introduce some space and time inefficiency. To avoid it, we define each structure separately. *) module Domain = struct type element = int type t = | Empty | Leaf of int | Branch of int * X.mask * t * t (* The empty set. *) let empty = Empty (* [is_empty s] returns [true] if and only if the set [s] is empty. *) let is_empty = function | Empty -> true | Leaf _ | Branch _ -> false (* [singleton x] returns a set whose only element is [x]. *) let singleton x = Leaf x (* [is_singleton s] returns [Some x] if [s] is a singleton containing [x] as its only element; otherwise, it returns [None]. *) let is_singleton = function | Leaf x -> Some x | Empty | Branch _ -> None (* [choose s] returns an arbitrarily chosen element of [s], if [s] is nonempty, and raises [Not_found] otherwise. *) let rec choose = function | Empty -> raise Not_found | Leaf x -> x | Branch (_, _, tree0, _) -> choose tree0 (* [cardinal s] returns [s]'s cardinal. *) let rec cardinal = function | Empty -> 0 | Leaf _ -> 1 | Branch (_, _, t0, t1) -> cardinal t0 + cardinal t1 (* [mem x s] returns [true] if and only if [x] appears in the set [s]. *) let rec mem x = function | Empty -> false | Leaf x' -> x = x' | Branch (_, mask, tree0, tree1) -> mem x (if (x land mask) = 0 then tree0 else tree1) (* The auxiliary function [join] merges two trees in the simple case where their prefixes disagree. *) let join p0 t0 p1 t1 = let m = X.branching_bit p0 p1 in let p = X.mask p0 (* for instance *) m in if (p0 land m) = 0 then Branch(p, m, t0, t1) else Branch(p, m, t1, t0) (* [add x s] returns a set whose elements are all elements of [s], plus [x]. *) exception Unchanged let rec strict_add x t = match t with | Empty -> Leaf x | Leaf x0 -> if x = x0 then raise Unchanged else join x (Leaf x) x0 t | Branch (p, m, t0, t1) -> if match_prefix x p m then if (x land m) = 0 then Branch (p, m, strict_add x t0, t1) else Branch (p, m, t0, strict_add x t1) else join x (Leaf x) p t let add x s = try strict_add x s with Unchanged -> s (* [make2 x y] creates a set whose elements are [x] and [y]. [x] and [y] need not be distinct. *) let make2 x y = add x (Leaf y) (* [fine_add] does not make much sense for sets of integers. Better warn the user. *) type decision = int -> int -> int let fine_add decision x s = assert false (* [remove x s] returns a set whose elements are all elements of [s], except [x]. *) let remove x s = let rec strict_remove = function | Empty -> raise Not_found | Leaf x' -> if x = x' then Empty else raise Not_found | Branch (prefix, mask, tree0, tree1) -> if (x land mask) = 0 then match strict_remove tree0 with | Empty -> tree1 | tree0 -> Branch (prefix, mask, tree0, tree1) else match strict_remove tree1 with | Empty -> tree0 | tree1 -> Branch (prefix, mask, tree0, tree1) in try strict_remove s with Not_found -> s (* [union s1 s2] returns the union of the sets [s1] and [s2]. *) let rec union s t = match s, t with | Empty, _ -> t | _, Empty -> s | Leaf x, _ -> add x t | _, Leaf x -> add x s | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) & (m = n) then (* The trees have the same prefix. Merge their sub-trees. *) let u0 = union s0 t0 and u1 = union s1 t1 in if t0 == u0 && t1 == u1 then t else Branch(p, m, u0, u1) else if (X.shorter m n) & (match_prefix q p m) then (* [q] contains [p]. Merge [t] with a sub-tree of [s]. *) if (q land m) = 0 then Branch(p, m, union s0 t, s1) else Branch(p, m, s0, union s1 t) else if (X.shorter n m) & (match_prefix p q n) then (* [p] contains [q]. Merge [s] with a sub-tree of [t]. *) if (p land n) = 0 then let u0 = union s t0 in if t0 == u0 then t else Branch(q, n, u0, t1) else let u1 = union s t1 in if t1 == u1 then t else Branch(q, n, t0, u1) else (* The prefixes disagree. *) join p s q t (* [fine_union] does not make much sense for sets of integers. Better warn the user. *) let fine_union decision s1 s2 = assert false (* [build] is a ``smart constructor''. It builds a [Branch] node with the specified arguments, but ensures that the newly created node does not have an [Empty] child. *) let build p m t0 t1 = match t0, t1 with | Empty, Empty -> Empty | Empty, _ -> t1 | _, Empty -> t0 | _, _ -> Branch(p, m, t0, t1) (* [diff s t] returns the set difference of [s] and [t], that is, $s\setminus t$. *) let rec diff s t = match s, t with | Empty, _ | _, Empty -> s | Leaf x, _ -> if mem x t then Empty else s | _, Leaf x -> remove x s | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) & (m = n) then (* The trees have the same prefix. Compute the differences of their sub-trees. *) build p m (diff s0 t0) (diff s1 t1) else if (X.shorter m n) & (match_prefix q p m) then (* [q] contains [p]. Subtract [t] off a sub-tree of [s]. *) if (q land m) = 0 then build p m (diff s0 t) s1 else build p m s0 (diff s1 t) else if (X.shorter n m) & (match_prefix p q n) then (* [p] contains [q]. Subtract a sub-tree of [t] off [s]. *) diff s (if (p land n) = 0 then t0 else t1) else (* The prefixes disagree. *) s (* [inter s t] returns the set intersection of [s] and [t], that is, $s\cap t$. *) let rec inter s t = match s, t with | Empty, _ | _, Empty -> Empty | (Leaf x as s), t | t, (Leaf x as s) -> if mem x t then s else Empty | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) & (m = n) then (* The trees have the same prefix. Compute the intersections of their sub-trees. *) build p m (inter s0 t0) (inter s1 t1) else if (X.shorter m n) & (match_prefix q p m) then (* [q] contains [p]. Intersect [t] with a sub-tree of [s]. *) inter (if (q land m) = 0 then s0 else s1) t else if (X.shorter n m) & (match_prefix p q n) then (* [p] contains [q]. Intersect [s] with a sub-tree of [t]. *) inter s (if (p land n) = 0 then t0 else t1) else (* The prefixes disagree. *) Empty (* [disjoint s1 s2] returns [true] if and only if the sets [s1] and [s2] are disjoint, i.e. iff their intersection is empty. It is a specialized version of [inter], which uses less space. *) exception NotDisjoint let disjoint s t = let rec inter s t = match s, t with | Empty, _ | _, Empty -> () | Leaf x, _ -> if mem x t then raise NotDisjoint | _, Leaf x -> if mem x s then raise NotDisjoint | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) & (m = n) then begin inter s0 t0; inter s1 t1 end else if (X.shorter m n) & (match_prefix q p m) then inter (if (q land m) = 0 then s0 else s1) t else if (X.shorter n m) & (match_prefix p q n) then inter s (if (p land n) = 0 then t0 else t1) else () in try inter s t; true with NotDisjoint -> false (* [iter f s] invokes [f x], in turn, for each element [x] of the set [s]. Elements are presented to [f] according to some unspecified, but fixed, order. *) let rec iter f = function | Empty -> () | Leaf x -> f x | Branch (_, _, tree0, tree1) -> iter f tree0; iter f tree1 (* [fold f s seed] invokes [f x accu], in turn, for each element [x] of the set [s]. Elements are presented to [f] according to some unspecified, but fixed, order. The initial value of [accu] is [seed]; then, at each new call, its value is the value returned by the previous invocation of [f]. The value returned by [fold] is the final value of [accu]. In other words, if $s = \{ x_1, x_2, \ldots, x_n \}$, where $x_1 < x_2 < \ldots < x_n$, then [fold f s seed] computes $([f]\,x_n\,\ldots\,([f]\,x_2\,([f]\,x_1\,[seed]))\ldots)$. *) let rec fold f s accu = match s with | Empty -> accu | Leaf x -> f x accu | Branch (_, _, s0, s1) -> fold f s1 (fold f s0 accu) (* [elements s] is a list of all elements in the set [s]. *) let elements s = fold (fun tl hd -> tl :: hd) s [] (* [fold_rev] performs exactly the same job as [fold], but presents elements to [f] in the opposite order. *) let rec fold_rev f s accu = match s with | Empty -> accu | Leaf x -> f x accu | Branch (_, _, s0, s1) -> fold_rev f s0 (fold_rev f s1 accu) (* [iter2] does not make much sense for sets of integers. Better warn the user. *) let rec iter2 f t1 t2 = assert false (* [iterator s] returns a stateful iterator over the set [s]. That is, if $s = \{ x_1, x_2, \ldots, x_n \}$, where $x_1 < x_2 < \ldots < x_n$, then [iterator s] is a function which, when invoked for the $k^{\text{th}}$ time, returns [Some]$x_k$, if $k\leq n$, and [None] otherwise. Such a function can be useful when one wishes to iterate over a set's elements, without being restricted by the call stack's discipline. For more comments about this algorithm, please see module [Baltree], which defines a similar one. *) let iterator s = let remainder = ref [ s ] in let rec next () = match !remainder with | [] -> None | Empty :: parent -> remainder := parent; next() | (Leaf x) :: parent -> remainder := parent; Some x | (Branch(_, _, s0, s1)) :: parent -> remainder := s0 :: s1 :: parent; next () in next (* [exists p s] returns [true] if and only if some element of [s] matches the predicate [p]. *) exception Exists let exists p s = try iter (fun x -> if p x then raise Exists ) s; false with Exists -> true (* [compare] is an ordering over sets. *) exception Got of int let compare s1 s2 = let iterator2 = iterator s2 in try iter (fun x1 -> match iterator2() with | None -> raise (Got 1) | Some x2 -> let c = Pervasives.compare x1 x2 in if c <> 0 then raise (Got c) ) s1; match iterator2() with | None -> 0 | Some _ -> -1 with Got c -> c (* [equal] implements equality over sets. *) let equal s1 s2 = compare s1 s2 = 0 (* [subset] implements the subset predicate over sets. In other words, [subset s t] returns [true] if and only if $s\subseteq t$. It is a specialized version of [diff]. *) exception NotSubset let subset s t = let rec diff s t = match s, t with | Empty, _ -> () | _, Empty | Branch _, Leaf _ -> raise NotSubset | Leaf x, _ -> if not (mem x t) then raise NotSubset | Branch(p, m, s0, s1), Branch(q, n, t0, t1) -> if (p = q) & (m = n) then begin diff s0 t0; diff s1 t1 end else if (X.shorter n m) & (match_prefix p q n) then diff s (if (p land n) = 0 then t0 else t1) else (* Either [q] contains [p], which means at least one of [s]'s sub-trees is not contained within [t], or the prefixes disagree. In either case, the subset relationship cannot possibly hold. *) raise NotSubset in try diff s t; true with NotSubset -> false (* [filter p s] returns the subset of [s] formed by all elements which satisfy the predicate [p]. *) let filter predicate s = let modified = ref false in let subset = fold (fun element subset -> if predicate element then add element subset else begin modified := true; subset end ) s Empty in if !modified then subset else s (* [map f s] computes the image of [s] through [f]. *) let map f s = fold (fun element accu -> add (f element) accu ) s Empty (* [monotone_map] and [endo_map] do not make much sense for sets of integers. Better warn the user. *) let monotone_map f s = assert false let endo_map f s = assert false end (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Relating sets and maps} *) (* Back to the world of maps. Let us now describe the relationship which exists between maps and their domains. *) (* [domain m] returns [m]'s domain. *) let rec domain = function | Empty -> Domain.Empty | Leaf (k, _) -> Domain.Leaf k | Branch (p, m, t0, t1) -> Domain.Branch (p, m, domain t0, domain t1) (* [lift f s] returns the map $k\mapsto f(k)$, where $k$ ranges over a set of keys [s]. *) let rec lift f = function | Domain.Empty -> Empty | Domain.Leaf k -> Leaf (k, f k) | Domain.Branch (p, m, t0, t1) -> Branch(p, m, lift f t0, lift f t1) (* [build] is a ``smart constructor''. It builds a [Branch] node with the specified arguments, but ensures that the newly created node does not have an [Empty] child. *) let build p m t0 t1 = match t0, t1 with | Empty, Empty -> Empty | Empty, _ -> t1 | _, Empty -> t0 | _, _ -> Branch(p, m, t0, t1) (* [corestrict m d] performs a co-restriction of the map [m] to the domain [d]. That is, it returns the map $k\mapsto m(k)$, where $k$ ranges over all keys bound in [m] but \emph{not} present in [d]. Its code resembles [diff]'s. *) let rec corestrict s t = match s, t with | Empty, _ | _, Domain.Empty -> s | Leaf (k, _), _ -> if Domain.mem k t then Empty else s | _, Domain.Leaf k -> remove k s | Branch(p, m, s0, s1), Domain.Branch(q, n, t0, t1) -> if (p = q) & (m = n) then build p m (corestrict s0 t0) (corestrict s1 t1) else if (X.shorter m n) & (match_prefix q p m) then if (q land m) = 0 then build p m (corestrict s0 t) s1 else build p m s0 (corestrict s1 t) else if (X.shorter n m) & (match_prefix p q n) then corestrict s (if (p land n) = 0 then t0 else t1) else s end (*i --------------------------------------------------------------------------------------------------------------- i*) (*s \mysection{Instantiating the functor} *) module Little = Make(Endianness.Little) module Big = Make(Endianness.Big)