source: project/release/4/vlist/trunk/vlist.scm @ 25849

Last change on this file since 25849 was 25849, checked in by Ivan Raikov, 9 years ago

initial import of vlist, a functional list-like structure (ported from Guile)

File size: 20.1 KB
Line 
1;;;
2;;; Author: Ludovic CourtÚs <ludo@gnu.org>
3;;; Adapted for Chicken Scheme by Ivan Raikov.
4;;;
5;;; This module provides an implementations of vlists, a functional list-like
6;;; data structure described by Phil Bagwell in "Fast Functional Lists,
7;;; Hash-Lists, Dequeues and Variable-Length Arrays", EPFL Technical Report,
8;;; 2002.
9;;;
10;;; The idea is to store vlist elements in increasingly large contiguous blocks
11;;; (implemented as vectors here).  These blocks are linked to one another using
12;;; a pointer to the next block (called `block-base' here) and an offset within
13;;; that block (`block-offset' here).  The size of these blocks form a geometric
14;;; series with ratio `block-growth-factor'.
15;;;
16;;; In the best case (e.g., using a vlist returned by `list->vlist'),
17;;; elements from the first half of an N-element vlist are accessed in O(1)
18;;; (assuming `block-growth-factor' is 2), and `vlist-length' takes only
19;;; O(ln(N)).  Furthermore, the data structure improves data locality since
20;;; vlist elements are adjacent, which plays well with caches.
21;;;
22;;; Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
23;;; Chicken Scheme modifications Copyright 2012 Ivan Raikov.
24;;;
25;;; This library is free software; you can redistribute it and/or
26;;; modify it under the terms of the GNU Lesser General Public
27;;; License as published by the Free Software Foundation; either
28;;; version 3 of the License, or (at your option) any later version.
29;;;
30;;; This library is distributed in the hope that it will be useful,
31;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
32;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
33;;; Lesser General Public License for more details.
34;;;
35;;; A full copy of the Lesser GPL license can be found at
36;;; <http://www.gnu.org/licenses/>.
37;;;
38
39
40(module vlist
41
42        (vlist? vlist-cons vlist-head vlist-tail vlist-null?
43         vlist-null list->vlist vlist-ref vlist-drop vlist-take
44         vlist-length vlist-fold vlist-fold-right vlist-map
45         vlist-unfold vlist-unfold-right vlist-append
46         vlist-reverse vlist-filter vlist-delete vlist->list
47         vlist-for-each
48         block-growth-factor
49
50         vhash? vhash-cons vhash-consq vhash-consv
51         vhash-assoc vhash-assq vhash-assv
52         vhash-delete vhash-delq vhash-delv
53         vhash-fold vhash-fold-right
54         vhash-fold* vhash-foldq* vhash-foldv*
55         alist->vhash)
56                   
57 (import scheme chicken )
58 (require-library srfi-1 srfi-69 extras)
59 (import (only srfi-1 fold fold-right)
60         (only srfi-69 hash eq?-hash eqv?-hash)
61         (only extras format))
62
63;;;
64;;; VList Blocks and Block Descriptors.
65;;;
66
67(define block-growth-factor (make-parameter 2))
68
69(define-inline (make-block base offset size hash-tab?)
70  ;; Return a block (and block descriptor) of SIZE elements pointing to BASE
71  ;; at OFFSET.  If HASH-TAB? is true, a "hash table" is also added.
72  ;; Note: We use `next-free' instead of `last-used' as suggested by Bagwell.
73
74  ;; XXX: We could improve locality here by having a single vector but currently
75  ;; the extra arithmetic outweighs the benefits (!).
76  (vector (make-vector size)
77          base offset size 0
78          (and hash-tab? (make-vector size #f))))
79
80
81(define-syntax define-block-accessor
82  (syntax-rules ()
83    [(_ name index)
84     (begin
85       (define-inline (name block)
86         (vector-ref block index)))]
87    ))
88
89(define-block-accessor block-content 0)
90(define-block-accessor block-base 1)
91(define-block-accessor block-offset 2)
92(define-block-accessor block-size 3)
93(define-block-accessor block-next-free 4)
94(define-block-accessor block-hash-table 5)
95
96(define-inline (increment-block-next-free! block)
97  (vector-set! block 4
98               (+ (block-next-free block) 1)))
99
100(define-inline (block-append! block value)
101  ;; This is not thread-safe.  To fix it, see Section 2.8 of the paper.
102  (let ((offset (block-next-free block)))
103    (increment-block-next-free! block)
104    (vector-set! (block-content block) offset value)
105    #t))
106
107(define-inline (block-ref block offset)
108  (vector-ref (block-content block) offset))
109
110(define-inline (block-ref* block offset)
111  (let ((v (block-ref block offset)))
112    (if (block-hash-table block)
113        (car v) ;; hide the vhash link
114        v)))
115
116(define-inline (block-hash-table-ref block offset)
117  (vector-ref (block-hash-table block) offset))
118
119(define-inline (block-hash-table-set! block offset value)
120  (vector-set! (block-hash-table block) offset value))
121
122(define block-null
123  ;; The null block.
124  (make-block #f 0 0 #f))
125
126;;;
127;;; VLists.
128;;;
129
130(define-record-type <vlist>
131  ;; A vlist is just a base+offset pair pointing to a block.
132
133  ;; XXX: Allocating a <vlist> record in addition to the block at each
134  ;; `vlist-cons' call is inefficient.  However, Bagwell's hack to avoid it
135  ;; (Section 2.2) would require GC_ALL_INTERIOR_POINTERS, which would be a
136  ;; performance hit for everyone.
137  (make-vlist base offset)
138  vlist?
139  (base    vlist-base)
140  (offset  vlist-offset))
141
142(define-record-printer (<vlist> vl port)
143  (cond ((vlist-null? vl)
144         (format port "#<vlist ()>"))
145        ((block-hash-table (vlist-base vl))
146         (format port "#<vhash ~a pairs>"
147                 (vhash-fold (lambda (k v r)
148                               (+ 1 r))
149                             0
150                             vl)))
151        (else
152         (format port "#<vlist ~a>"
153                 (vlist->list vl)))))
154 
155
156(define vlist-null
157  ;; The empty vlist.
158  (make-vlist block-null 0))
159
160(define-inline (block-cons item vlist hash-tab?)
161  (let loop ((base   (vlist-base vlist))
162             (offset (+ 1 (vlist-offset vlist))))
163    (if (and (< offset (block-size base))
164             (= offset (block-next-free base))
165             (block-append! base item))
166        (make-vlist base offset)
167        (let ((size (cond ((eq? base block-null) 1)
168                          ((< offset (block-size base))
169                           ;; new vlist head
170                           1)
171                          (else
172                           (* (block-growth-factor)
173                              (block-size base))))))
174          ;; Prepend a new block pointing to BASE.
175          (loop (make-block base (- offset 1) size hash-tab?)
176                0)))))
177
178;; Return a new vlist with {item} as its head and {vlist} as its tail
179
180(define (vlist-cons item vlist)
181  ;; Note: Calling `vlist-cons' on a vhash will not do the right thing: it
182  ;; doesn't box ITEM so that it can have the hidden "next" link used by
183  ;; vhash items, and it passes `#f' as the HASH-TAB? argument to
184  ;; `block-cons'.  However, inserting all the checks here has an important
185  ;; performance penalty, hence this choice.
186  (block-cons item vlist #f))
187
188;; Return the head of {vlist}.
189(define (vlist-head vlist)
190  (let ((base   (vlist-base vlist))
191        (offset (vlist-offset vlist)))
192    (block-ref* base offset)))
193
194;; Return the tail of {vlist}.
195(define (vlist-tail vlist)
196  (let ((base   (vlist-base vlist))
197        (offset (vlist-offset vlist)))
198    (if (> offset 0)
199        (make-vlist base (- offset 1))
200        (make-vlist (block-base base)
201                    (block-offset base)))))
202
203;; Return true if {vlist} is empty.
204(define (vlist-null? vlist)
205  (let ((base (vlist-base vlist)))
206    (and (not (block-base base))
207         (= 0 (block-size base)))))
208
209;;;
210;;; VList Utilities.
211;;;
212
213;; Return a new vlist whose contents correspond to {lst}.
214(define (list->vlist lst)
215  (vlist-reverse (fold vlist-cons vlist-null lst)))
216
217;; Fold over {vlist}, calling {proc} for each element.
218(define (vlist-fold proc init vlist)
219  ;; FIXME: Handle multiple lists.
220  (let loop ((base   (vlist-base vlist))
221             (offset (vlist-offset vlist))
222             (result init))
223    (if (eq? base block-null)
224        result
225        (let* ((next  (- offset 1))
226               (done? (< next 0)))
227          (loop (if done? (block-base base) base)
228                (if done? (block-offset base) next)
229                (proc (block-ref* base offset) result))))))
230
231;; Fold over {vlist}, calling {proc} for each element, starting from the last element.
232(define (vlist-fold-right proc init vlist)
233  (define len (vlist-length vlist))
234
235  (let loop ((index  (- len 1))
236             (result init))
237    (if (< index 0)
238        result
239        (loop (- index 1)
240              (proc (vlist-ref vlist index) result)))))
241
242;; Return a new {vlist} whose content are those of {vlist} in reverse order.
243(define (vlist-reverse vlist)
244  (vlist-fold vlist-cons vlist-null vlist))
245
246;; Map {proc} over the elements of {vlist} and return a new vlist.
247(define (vlist-map proc vlist)
248  (vlist-fold (lambda (item result)
249                (vlist-cons (proc item) result))
250              vlist-null
251              (vlist-reverse vlist)))
252
253;; Return a new list whose contents match those of {vlist}.
254(define (vlist->list vlist)
255  (vlist-fold-right cons '() vlist))
256
257;; Return the element at index {index} in {vlist}.
258(define (vlist-ref vlist index)
259  (let loop ((index   index)
260             (base    (vlist-base vlist))
261             (offset  (vlist-offset vlist)))
262    (if (<= index offset)
263        (block-ref* base (- offset index))
264        (loop (- index offset 1)
265              (block-base base)
266              (block-offset base)))))
267
268;; Return a new vlist that does not contain the {count} first elements of {vlist}.
269(define (vlist-drop vlist count)
270  (let loop ((count  count)
271             (base   (vlist-base vlist))
272             (offset (vlist-offset vlist)))
273    (if (<= count offset)
274        (make-vlist base (- offset count))
275        (loop (- count offset 1)
276              (block-base base)
277              (block-offset base)))))
278
279; Return a new vlist that contains only the {count} first elements of {vlist}.
280(define (vlist-take vlist count)
281  (let loop ((count  count)
282             (vlist  vlist)
283             (result vlist-null))
284    (if (= 0 count)
285        (vlist-reverse result)
286        (loop (- count 1)
287              (vlist-tail vlist)
288              (vlist-cons (vlist-head vlist) result)))))
289
290;; Return a new vlist containing all the elements from {vlist} that satisfy {pred}.
291(define (vlist-filter pred vlist)
292  (vlist-fold-right (lambda (e v)
293                      (if (pred e)
294                          (vlist-cons e v)
295                          v))
296                    vlist-null
297                    vlist))
298
299;; Return a new vlist corresponding to {vlist} without the elements {equal?} to {x}.
300(define (vlist-delete x vlist #!optional (equal? equal?))
301  (vlist-filter (lambda (e)
302                  (not (equal? e x)))
303                vlist))
304
305;; Return the length of {vlist}.
306(define (vlist-length vlist)
307  (let loop ((base (vlist-base vlist))
308             (len  (vlist-offset vlist)))
309    (if (eq? base block-null)
310        len
311        (loop (block-base base)
312              (+ len 1 (block-offset base))))))
313
314;; Return a new vlist.  See the description of SRFI-1 `unfold' for details.
315(define (vlist-unfold p f g seed
316                       #!optional (tail-gen (lambda (x) vlist-null)))
317  (let uf ((seed seed))
318    (if (p seed)
319        (tail-gen seed)
320        (vlist-cons (f seed)
321                    (uf (g seed))))))
322
323;; Return a new vlist.  See the description of SRFI-1 `unfold-right' for details.
324(define (vlist-unfold-right p f g seed #!optional (tail vlist-null))
325  (let uf ((seed seed) (lis tail))
326    (if (p seed)
327        lis
328        (uf (g seed) (vlist-cons (f seed) lis)))))
329
330;; Append the given lists.
331(define (vlist-append . vlists)
332  (if (null? vlists)
333      vlist-null
334      (fold-right (lambda (vlist result)
335                    (vlist-fold-right (lambda (e v)
336                                        (vlist-cons e v))
337                                      result
338                                      vlist))
339                  vlist-null
340                  vlists)))
341
342;; Call {proc} on each element of {vlist}.  The result is unspecified.
343(define (vlist-for-each proc vlist)
344  (vlist-fold (lambda (item x)
345                (proc item))
346              (if #f #f)
347              vlist))
348
349;;;
350;;; Hash Lists, aka. `VHash'.
351;;;
352
353;; Assume keys K1 and K2, H = hash(K1) = hash(K2), and two values V1 and V2
354;; associated with K1 and K2, respectively.  The resulting layout is a
355;; follows:
356;;
357;;     ,--------------------.
358;;     | ,-> (K1 . V1) ---. |
359;;     | |                | |
360;;     | |   (K2 . V2) <--' |
361;;     | |                  |
362;;     +-|------------------+
363;;     | |                  |
364;;     | |                  |
365;;     | `-- O <---------------H
366;;     |                    |
367;;     `--------------------'
368;;
369;; The bottom part is the "hash table" part of the vhash, as returned by
370;; `block-hash-table'; the other half is the data part.  O is the offset of
371;; the first value associated with a key that hashes to H in the data part.
372;; The (K1 . V1) pair has a "hidden" link to the (K2 . V2) pair; hiding the
373;; link is handled by `block-ref'.
374
375;; This API potentially requires users to repeat which hash function and which
376;; equality predicate to use.  This can lead to unpredictable results if they
377;; are used in consistenly, e.g., between `vhash-cons' and `vhash-assoc', which
378;; is undesirable, as argued in http://savannah.gnu.org/bugs/?22159 .  OTOH, two
379;; arguments can be made in favor of this API:
380;;
381;;  - It's consistent with how alists are handled in SRFI-1.
382;;
383;;  - In practice, users will probably consistenly use either the `q', the `v',
384;;    or the plain variant (`vlist-cons' and `vlist-assoc' without any optional
385;;    argument), i.e., they will rarely explicitly pass a hash function or
386;;    equality predicate.
387
388;; Return true if {obj} is a hash list.
389(define (vhash? obj)
390  (and (vlist? obj)
391       (let ((base (vlist-base obj)))
392         (and base
393              (vector? (block-hash-table base))))))
394
395;; Return a new hash list based on {vhash} where {key} is associated
396;; with {value}.  Use {hash} to compute {key}'s hash.
397(define (vhash-cons key value vhash #!optional (hash hash))
398  (let* ((key+value (cons key value))
399         (entry     (cons key+value #f))
400         (vlist     (block-cons entry vhash #t))
401         (base      (vlist-base vlist))
402         (khash     (hash key (block-size base))))
403
404    (let ((o (block-hash-table-ref base khash)))
405      (if o (set-cdr! entry o)))
406
407    (block-hash-table-set! base khash
408                           (vlist-offset vlist))
409
410    vlist))
411
412(define vhash-consq (cut vhash-cons <> <> <> eq?-hash))
413(define vhash-consv (cut vhash-cons <> <> <> eqv?-hash))
414
415(define-inline (%vhash-fold* proc init key vhash equal? hash)
416  ;; Fold over all the values associated with KEY in VHASH.
417  (define khash
418    (let ((size (block-size (vlist-base vhash))))
419      (and (> size 0) (hash key size))))
420
421  (let loop ((base       (vlist-base vhash))
422             (khash      khash)
423             (offset     (and khash
424                              (block-hash-table-ref (vlist-base vhash)
425                                                    khash)))
426             (max-offset (vlist-offset vhash))
427             (result     init))
428
429    (let ((answer (and offset (block-ref base offset))))
430      (cond ((and (pair? answer)
431                  (<= offset max-offset)
432                  (let ((answer-key (caar answer)))
433                    (equal? key answer-key)))
434             (let ((result      (proc (cdar answer) result))
435                   (next-offset (cdr answer)))
436               (loop base khash next-offset max-offset result)))
437            ((and (pair? answer) (cdr answer))
438             =>
439             (lambda (next-offset)
440               (loop base khash next-offset max-offset result)))
441            (else
442             (let ((next-base (block-base base)))
443               (if (and next-base (> (block-size next-base) 0))
444                   (let* ((khash  (hash key (block-size next-base)))
445                          (offset (block-hash-table-ref next-base khash)))
446                     (loop next-base khash offset (block-offset base)
447                           result))
448                   result)))))))
449
450;; Fold over all the values associated with {key} in {vhash}, with
451;; each call to {proc} having the form {(proc value result)},
452;; where {result} is the result of the previous call to {proc} and
453;; {init} the value of {result} for the first call to {proc}."
454(define (vhash-fold* proc init key vhash
455                      #!optional (equal? equal?) (hash hash))
456  (%vhash-fold* proc init key vhash equal? hash))
457
458;; Same as {vhash-fold*}, but using {eq?-hash} and {eq?}.
459(define (vhash-foldq* proc init key vhash)
460  (%vhash-fold* proc init key vhash eq? eq?-hash))
461
462;; Same as {vhash-fold*}, but using {eqv?-hash} and {eqv?}.
463(define (vhash-foldv* proc init key vhash)
464  (%vhash-fold* proc init key vhash eqv? eqv?-hash))
465
466;; A specialization of `vhash-fold*' that stops when the first value
467;; associated with KEY is found or when the end-of-list is reached.
468;; Inline to make sure `vhash-assq' gets to use the `eq?' instruction
469;; instead of calling the `eq?'  subr.
470(define-inline (%vhash-assoc key vhash equal? hash)
471  (define khash
472    (let ((size (block-size (vlist-base vhash))))
473      (and (> size 0) (hash key size))))
474
475  (let loop ((base       (vlist-base vhash))
476             (khash      khash)
477             (offset     (and khash
478                              (block-hash-table-ref (vlist-base vhash)
479                                                    khash)))
480             (max-offset (vlist-offset vhash)))
481    (let ((answer (and offset (block-ref base offset))))
482      (cond ((and (pair? answer)
483                  (<= offset max-offset)
484                  (let ((answer-key (caar answer)))
485                    (equal? key answer-key)))
486             (car answer))
487            ((and (pair? answer) (cdr answer))
488             =>
489             (lambda (next-offset)
490               (loop base khash next-offset max-offset)))
491            (else
492             (let ((next-base (block-base base)))
493               (and next-base
494                    (> (block-size next-base) 0)
495                    (let* ((khash  (hash key (block-size next-base)))
496                           (offset (block-hash-table-ref next-base khash)))
497                      (loop next-base khash offset
498                            (block-offset base))))))))))
499
500;; Return the first key/value pair from {vhash} whose key is equal to
501;; {key} according to the {equal?} equality predicate.
502(define (vhash-assoc key vhash #!optional (equal? equal?) (hash hash))
503  (%vhash-assoc key vhash equal? hash))
504
505;; Return the first key/value pair from {vhash} whose key is {eq?} to
506;; {key}.
507(define (vhash-assq key vhash)
508  (%vhash-assoc key vhash eq? eq?-hash))
509
510;; Return the first key/value pair from {vhash} whose key is {eqv?} to
511;; {key}.
512(define (vhash-assv key vhash)
513  (%vhash-assoc key vhash eqv? eqv?-hash))
514
515;; Remove all associations from {vhash} with {key}, comparing keys with {equal?}.
516(define (vhash-delete key vhash #!optional (equal? equal?) (hash hash))
517  (if (vhash-assoc key vhash equal? hash)
518      (vlist-fold (lambda (k+v result)
519                    (let ((k (car k+v))
520                          (v (cdr k+v)))
521                      (if (equal? k key)
522                          result
523                          (vhash-cons k v result hash))))
524                  vlist-null
525                  vhash)
526      vhash))
527
528(define vhash-delq (cut vhash-delete <> <> eq? eq?-hash))
529(define vhash-delv (cut vhash-delete <> <> eqv? eqv?-hash))
530
531;; Fold over the key/pair elements of {vhash} from left to right, with
532;; each call to {proc} having the form {({proc} key value result)},
533;; where {result} is the result of the previous call to {proc} and
534;; {init} the value of {result} for the first call to {proc}.
535
536(define (vhash-fold proc init vhash)
537  (vlist-fold (lambda (key+value result)
538                (proc (car key+value) (cdr key+value)
539                      result))
540              init
541              vhash))
542
543;; Fold over the key/pair elements of {vhash} from right to left, with
544;; each call to {proc} having the form {({proc} key value result)},
545;; where {result} is the result of the previous call to {proc} and
546;; {init} the value of {result} for the first call to {proc}.
547
548(define (vhash-fold-right proc init vhash)
549  (vlist-fold-right (lambda (key+value result)
550                      (proc (car key+value) (cdr key+value)
551                            result))
552                    init
553                    vhash))
554
555;; Return the vhash corresponding to {alist}, an association list.
556
557(define (alist->vhash alist #!optional (hash hash))
558  (fold-right (lambda (pair result)
559                (vhash-cons (car pair) (cdr pair) result hash))
560              vlist-null
561              alist))
562
563)
Note: See TracBrowser for help on using the repository browser.