source: project/release/4/treap/trunk/treap-eggdoc.scm @ 14465

Last change on this file since 14465 was 14465, checked in by Ivan Raikov, 10 years ago

treap and rb-tree copied to release/4 branch and ported to Chicken 4

File size: 15.8 KB
Line 
1
2(use eggdoc)
3
4(define doc
5  `((eggdoc:begin
6     (name "treap")
7     (description "A sorted dictionary data structure based on randomized search trees.")
8     (author (p "Oleg Kiselyov; packaged for Chicken Scheme by " 
9                (url "http://chicken.wiki.br/users/ivan-raikov" "Ivan Raikov")))
10
11     (history 
12      (version "1.5" "Ported to Chicken 4")
13      (version "1.4" "Build script updated for better cross-platform compatibility")
14      (version "1.3" "License upgrade to GPL v3")
15      (version "1.2" "Added support for chicken-setup -test")
16      (version "1.1" "Added definitions for the dec macros")
17      (version "1.0" "Initial release"))
18
19     (requires)
20
21     (usage "(require-extension treap)")
22
23     (download "treap.egg")
24
25     (documentation
26     
27      (p "The treap library is an implementation of an ordered
28dictionary data structure, based on randomized search trees (treaps)
29by Seidel and Aragon: " (pre "R. Seidel and C. R. Aragon. Randomized Search Trees.
30Algorithmica, 16(4/5):464-497, 1996."))
31
32      (p "This code defines a treap object that implements an ordered
33dictionary mapping of keys to values. The object responds to a variety
34of query and update messages, including efficient methods for finding
35the minimum and maximum keys and their associated values as well as
36traversing of a treap in an ascending or descending order of
37keys. Looking up an arbitrary or the min/max keys, and deleting the
38min/max keys require no more key comparisons than the depth of the
39treap, which is O(log n) where n is the total number of keys in the
40treap. Arbitrary key deletion and insertions run in " 
41         (tt "O(log n)") (em " amortized") " time.")
42
43      (p "This code is inspired by a Stefan Nilsson's article " 
44         (i "Treaps in Java") " (" (i "Dr.Dobb's Journal") 
45", July 1997, p.40-44) and by the Java
46implementation of treaps described in the article. Yet this Scheme
47code has been developed completely from scratch, using the description
48of the algorithm given in the article, and insight gained from
49examining the Java source code. As a matter of fact, treap insertion
50and deletion algorithms implemented in this code differ from the ones
51described in the article and implemented in the Java code; this Scheme
52implementation uses fewer assignments and comparisons (see below for
53details). Some insight as to a generic tree interface gleaned from "
54         (tt "wttree.scm")  " (Weight balanced trees) by Stephen Adams.")
55
56      (p "A treap is a regular binary search tree, with one extension. The extension is that every node in a tree, beside a key, a value, and references to its left and right children, has an additional constant field, a priority.  The value of this field is set (to a random integer number) when the node is constructed, and is not changed afterwards. At any given moment, the priority of every non-leaf node never exceeds the priorities of its children. When a new node is inserted, we check that this invariant holds; otherwise, we perform a right or left rotation that swaps a parent and its kid, keeping the ordering of keys intact; the changes may need to be propagated recursively up. The priority property, and the fact they are chosen at random, makes a treap look like a binary search tree built by a random sequence of insertions. As the article shows, this makes a treap a balanced tree: the expected length of an average search path is roughly " 
57
58         (tt "1.4log2(n)-1") ", and the expected length of the longest search path is about "
59         (tt "4.3log2(n)") ". See Stefan Nilsson's article for more details.")
60
61      (subsection "Treap procedures"
62
63          (p "The treap object is created by a make-treap function, "
64             "the only user-visible function defined in this egg: "
65
66             (procedure "make-treap:: KEY-COMPARE-PROC -> SELECTOR"
67                        (p "where KEY-COMPARE-PROC is a user-supplied function "
68                           "that takes two keys and returns a negative, positive, or zero number "
69                           "depending on how the first key compares to the second. ")
70                       
71                        (p "The returned selector procedure can take one of the following arguments: "
72                           (symbol-table 
73                            (describe "'get" 
74                                      ("returns a procedure " (tt "LAMBDA KEY . DEFAULT-CLAUSE") 
75                                         " which searches the treap for an association with a given "
76                                         (tt "KEY") ", and returns a (key . value) pair of the found association. "
77                                         "If an association with " (tt "KEY") " cannot be located in the treap, "
78                                         "the PROC returns the result of evaluating the " (tt "DEFAULT-CLAUSE") ". "
79                                         "If the default clause is omitted, an error is signalled. "
80                                         (tt "KEY") " must be comparable to the keys in the treap "
81                                         "by a key-compare predicate (which has been specified "
82                                         "when the treap was created)"))
83                           
84                            (describe "'get-min"
85                                      ("returns a (key . value) pair for an association in the "
86                                         "treap with the smallest key. If the treap is empty, an error "
87                                         "is signalled."))
88                           
89                            (describe "'delete-min!"
90                                      ("removes the min key and the corresponding association "
91                                         "from the treap. Returns a (key . value) pair of the "
92                                         "removed association. If the treap is empty, an error "
93                                         "is signalled. "))
94                           
95                            (describe "'get-max"
96                                      ("returns a (key . value) pair for an association in the "
97                                         "treap with the largest key. If the treap is empty, an error "
98                                         "is signalled."))
99                           
100                            (describe "'delete-max!"
101                                      ("removes the max key and the corresponding association "
102                                         "from the treap. Returns a (key . value) pair of the "
103                                         "removed association. If the treap is empty, an error is signalled.")) 
104                           
105                            (describe "'empty?"
106                                      ("returns " (tt "#t") " if the treap is empty"))
107                           
108                            (describe "'size"
109                                      ("returns the size (the number of associations) in the treap"))
110                           
111                           
112                            (describe "'depth"
113                                      ("returns the depth of the tree. It requires "
114                                         "the complete traversal of the tree, so use sparingly"))
115
116                           
117                            (describe "'clear!"
118                                      ("removes all associations from the treap (thus making it empty)"))
119                           
120                            (describe "'put!"
121                                      ("returns a procedure " (tt "LAMBDA KEY VALUE") 
122                                         " which, given a " (tt "KEY") " and a " (tt "VALUE") 
123                                         ", adds the corresponding association to the treap. "
124                                         "If an association with the same " (tt "KEY") 
125                                         " already exists, its value is replaced with the "
126                                         (tt "VALUE") " (and the old (key . value) association is returned). "
127                                         "Otherwise, the return value is " (tt "#f") "."))
128                           
129                           
130                            (describe "'delete!"
131                                      ("returns a procedure " (tt "LAMBDA KEY . DEFAULT-CLAUSE")
132                                         " which searches the treap for an association with a given "
133                                         (tt "KEY") ", deletes it, and returns a (key . value) pair of the found "
134                                         "and deleted association. If an association with the KEY cannot be located "
135                                         "in the treap, the " (tt "PROC") " returns the result of evaluating "
136                                         (tt "DEFAULT-CLAUSE") ". " 
137                                         "If the default clause is omitted, an error is signalled. "))
138                           
139                            (describe "'for-each-ascending"
140                                      ("returns a procedure " (tt "LAMBDA PROC") 
141                                         " that will apply the given procedure PROC to each (key . value) "
142                                         "association of the treap, from the one with the smallest key "
143                                         "all the way to the one with the max key, in an ascending order "
144                                         "of keys. The treap must not be empty."))
145                           
146                            (describe "'for-each-descending"
147                                      ("returns a procedure " (tt "LAMBDA PROC") " that will apply the given "
148                                         "procedure " (tt "PROC") "to each (key . value) association of the treap, "
149                                         "in the descending order of keys. The treap must not be empty."))
150                           
151                            (describe "'debugprint"
152                                      ("prints out all the nodes in the treap, for debug purposes")))))))
153
154      (subsection "Notes on the algorithm"
155          (p "As the DDJ paper shows, insertion of a node into a treap is a simple
156recursive algorithm, Example 1 of the paper. This algorithm is implemented
157in the accompanying source [Java] code as "
158             (pre "   private Tree insert(Tree node, Tree tree)
159                      {
160                        if (tree == null) return node;
161                        int comp = node.key.compareTo(tree.key);
162                        if (comp < 0) {
163                           tree.left = insert(node, tree.left);
164                           if (tree.prio > tree.left.prio)
165                              tree = tree.rotateRight();
166                        } else if (comp > 0) {
167                           tree.right = insert(node, tree.right);
168                           if (tree.prio > tree.right.prio)
169                              tree = tree.rotateLeft();
170                        } else {
171                           keyFound = true;
172                           prevValue = tree.value;
173                           tree.value = node.value;
174                        }
175                        return tree;
176                     }"))
177         
178          (p "This algorithm, however, is not as efficient as it could be. Suppose we "
179             "try to insert a node which turns out to already exist in the tree, "
180             "at a depth D. The algorithm above would descend into this node in the "
181             "winding phase of the algorithm, replace the node's value, and, in the "
182             "unwinding phase of the recursion, would perform D assignments of the kind "
183             (pre "    tree.left = insert(node, tree.left);")
184             "and D comparisons of nodes' priorities. None of these priority checks and "
185             "assignments are actually necessary: as we haven't added any new node, "
186             "the tree structure hasn't changed.")
187         
188
189          (p "Therefore, the present Scheme code implements a different insertion "
190             "algorithm, which avoids doing unnecessary operations. The idea is simple: "
191             "if we insert a new node into some particular branch of the treap and verify "
192             "that this branch conforms to the treap priority invariant, we are certain "
193             "the invariant holds throughout the entire treap, and no further checks "
194             "(up the tree to the root) are necessary. In more detail: "
195             (ol (li "Starting from the root, we recursively descend until we find "
196                     "a node with a given key, or a place a new node can be inserted.")
197                 (li "We insert the node and check to see if its priority is less than "
198                     "that of its parent. If this is the case, we left- or right-rotate "
199                     "the tree to make the old parent a child of the new node, and the "
200                     "new node a new root of this particular branch. We return this new "
201                     "parent as an indication that further checks up the tree are "
202                     "necessary. If the new node conforms to the parent's priority, we "
203                     "insert it and return " (tt "#f"))
204                 (li "On the way up, we check the priorities again and rotate the tree "
205                     "to restore the priority invariant at the current level if needed.")
206                 (li "If no changes are made at the current level, we return a flag " (tt "#f")
207                     "meaning that no further changes or checks are necessary at the higher levels.")))
208         
209          (p "Thus, if a new node was originally inserted at a level D in the tree (level "
210             "0 being the root) but then migrated up by L levels (because of its priority), "
211             "the original insertion algorithm would perform (D-1) assignments, "
212             "(D-1) priority checks, plus L rotations (at a cost of 2 assignments in the "
213             "treap each). Our algorithm does only (L+1) node priority checks and "
214             "max(2(L-1)+2,1) assignments. ")
215         
216          (p "Note if priorities are really (uniformly) random, L is uniformly distributed "
217             "over [0,D], so the average cost of our algorithm is "
218             (pre "     D/2 +1 checks and D assignments")
219             "compared to"
220             (pre "       D-1 checks and 2D-1 assignments")
221             "for the original algorithm described in the DDJ paper.")
222
223          (p "The similar gripe applies to the Java implementation of a node deletion "
224             "algorithm: "
225             (pre "   private Tree delete(Ordered key, Tree t)
226                      {
227                        if (t == null) return null;
228                        int comp = key.compareTo(t.key);
229                        if (comp < 0)
230                           t.left = delete(key, t.left);
231                        else if (comp > 0)
232                           t.right = delete(key, t.right);
233                        else {
234                           keyFound = true;
235                           prevValue = t.value;
236                           t = t.deleteRoot();
237                        }
238                        return t;
239                     }"))
240
241          (p "The algorithm as implemented looks fully-recursive. Furthermore, "
242             "deletion of a node at a level D in the treap involves at least D "
243             "assignments, most of them being unnecessary. Indeed, if a node being "
244             "deleted is a leaf, only one change to the tree is needed to detach "
245             "the node. Deleting a node obviously requires a left- or a right-kid "
246             "field of the node's parent be modified (cleared). This change, "
247             "however does NOT need to be propagated up: deleting of a node does "
248             "not violate neither ordering nor priority invariants of the treap; "
249             "all changes are confined to a branch rooted at the parent of the "
250             "deleted node.")
251         
252          (p "This Scheme code implements node deletion algorithm in the optimal "
253             "way, performing only those assignments which are absolutely "
254             "necessary, and replacing full recursions with tail recursions (which "
255             "are simply iterations).  Our implementation is also simpler and "
256             "clearer, making use of a helper procedure join! to join two treap "
257             "branches (while keeping both treap invariants intact). The deletion "
258             "algorithm can then be expressed as replacing a node with a join of "
259             "its two kids; compare this explanation to the one given in the DDJ "
260             "paper!")))
261
262     (examples (pre #<<EOF
263;; "--> Sorting of a set of numbers via a treap"
264
265(define (++ x) (fx+ 1 x))
266(define (-- x) (fx- x 1))
267
268(let
269  ((min-key -1) (max-key 10)
270   (treap (make-treap (lambda (x y) (- x y))))
271   ;; a hard-wired association between a key and a value
272   (compute-assoc (lambda (key) (cons key (++ key)))))
273
274  ;; loading a sequence [min-key .. max-key] in ascending order
275  (do ((i min-key (++ i))) ((> i max-key))
276    ((treap 'put!) i (cdr (compute-assoc i))))
277  (treap 'debugprint)
278  (print "treap's depth is " (treap 'depth) "\n")
279
280 ; (assert (equal? ((treap 'get) (++ min-key))
281 ;                (compute-assoc (++ min-key))))
282  (print ((treap 'get) (++ min-key)))
283
284  ; (assert (equal? ((treap 'get) (++ min-key) #f)
285  ;               (compute-assoc (++ min-key))))
286  (print ((treap 'get) (++ min-key) 'notfound))
287
288  ;; checking traversing in the ascending order
289  (let ((expected-key min-key))
290    ((treap 'for-each-ascending)
291      (lambda (association)
292        (print (equal? association (compute-assoc expected-key)))
293        (set! expected-key (++ expected-key)))))
294    ;(assert (= expected-key (++ max-key))))
295
296  ;; clearing the treap and reloading the same seq in descending order
297  (treap 'clear!)
298  (do ((i max-key (-- i))) ((< i min-key))
299     ((treap 'put!) i (cdr (compute-assoc i))))
300  (treap 'debugprint)
301  (print "treap's depth is " (treap 'depth) "\n")
302
303  ;(assert (equal? (treap 'get-max) ((treap 'get) max-key)))
304  ;(assert (equal? (treap 'delete-max!) (compute-assoc max-key)))
305  ;(assert (not ((treap 'get) max-key #f)))
306  ;(assert (equal? (treap 'get-max) ((treap 'get) (-- max-key))))
307  ;(assert (= (treap 'size) (+ -2 (++ (- max-key min-key)))))
308
309  ;; tchecking traversing in the descending order
310  (let ((expected-key max-key))
311    ((treap 'for-each-descending)
312      (lambda (association)
313        (print (equal? association (compute-assoc expected-key)))
314        (set! expected-key (-- expected-key))))))
315    ;(assert (= expected-key (-- min-key))))
316EOF
317))
318     (license
319      "Copyright Oleg Kiselyov. Chicken Scheme support copyright by Ivan
320Raikov.
321
322This program is free software: you can redistribute it and/or modify
323it under the terms of the GNU General Public License as published by
324the Free Software Foundation, either version 3 of the License, or (at
325your option) any later version.
326
327This program is distributed in the hope that it will be useful, but
328WITHOUT ANY WARRANTY; without even the implied warranty of
329MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
330General Public License for more details.
331
332A full copy of the GPL license can be found at
333<http://www.gnu.org/licenses/>."))))
334
335(if (eggdoc->html doc) (void))
Note: See TracBrowser for help on using the repository browser.