source: project/release/5/vlist/trunk/vlist.scm @ 37274

Last change on this file since 37274 was 37274, checked in by Ivan Raikov, 2 years ago

port of vlist to C5 (thanks to Yuriy Shirokov)

File size: 20.3 KB
Line 
1;;;
2;;; Author: Ludovic CourtÚs <ludo@gnu.org>
3;;; Adapted for Chicken Scheme by Ivan Raikov.
4;;;
5;;; This module provides an implementations of vlists, a functional list-like
6;;; data structure described by Phil Bagwell in "Fast Functional Lists,
7;;; Hash-Lists, Dequeues and Variable-Length Arrays", EPFL Technical Report,
8;;; 2002.
9;;;
10;;; The idea is to store vlist elements in increasingly large contiguous blocks
11;;; (implemented as vectors here).  These blocks are linked to one another using
12;;; a pointer to the next block (called `block-base' here) and an offset within
13;;; that block (`block-offset' here).  The size of these blocks form a geometric
14;;; series with ratio `block-growth-factor'.
15;;;
16;;; In the best case (e.g., using a vlist returned by `list->vlist'),
17;;; elements from the first half of an N-element vlist are accessed in O(1)
18;;; (assuming `block-growth-factor' is 2), and `vlist-length' takes only
19;;; O(ln(N)).  Furthermore, the data structure improves data locality since
20;;; vlist elements are adjacent, which plays well with caches.
21;;;
22;;; Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
23;;; Chicken Scheme modifications Copyright 2012 Ivan Raikov.
24;;;
25;;; This library is free software; you can redistribute it and/or
26;;; modify it under the terms of the GNU Lesser General Public
27;;; License as published by the Free Software Foundation; either
28;;; version 3 of the License, or (at your option) any later version.
29;;;
30;;; This library is distributed in the hope that it will be useful,
31;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
32;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
33;;; Lesser General Public License for more details.
34;;;
35;;; A full copy of the Lesser GPL license can be found at
36;;; <http://www.gnu.org/licenses/>.
37;;;
38
39
40(module vlist
41
42        (vlist? vlist-cons vlist-head vlist-tail vlist-null?
43         vlist-null list->vlist vlist-ref vlist-drop vlist-take
44         vlist-length vlist-fold vlist-fold-right vlist-map
45         vlist-unfold vlist-unfold-right vlist-append
46         vlist-reverse vlist-filter vlist-delete vlist->list
47         vlist-for-each
48         block-growth-factor
49
50         vhash? vhash-cons vhash-consq vhash-consv
51         vhash-assoc vhash-assq vhash-assv
52         vhash-delete vhash-delq vhash-delv
53         vhash-fold vhash-fold-right
54         vhash-fold* vhash-foldq* vhash-foldv*
55         alist->vhash)
56
57  (import scheme)
58  (import chicken.base)
59  (import chicken.format)
60  (require-library srfi-1 srfi-69)
61  (import (only srfi-1 fold fold-right)
62          (only srfi-69 hash eq?-hash eqv?-hash))
63
64;;;
65;;; VList Blocks and Block Descriptors.
66;;;
67
68(define block-growth-factor (make-parameter 2))
69
70(define-inline (make-block base offset size hash-tab?)
71  ;; Return a block (and block descriptor) of SIZE elements pointing to BASE
72  ;; at OFFSET.  If HASH-TAB? is true, a "hash table" is also added.
73  ;; Note: We use `next-free' instead of `last-used' as suggested by Bagwell.
74
75  ;; XXX: We could improve locality here by having a single vector but currently
76  ;; the extra arithmetic outweighs the benefits (!).
77  (vector (make-vector size)
78          base offset size 0
79          (and hash-tab? (make-vector size #f))))
80
81
82(define-syntax define-block-accessor
83  (syntax-rules ()
84    [(_ name index)
85     (begin
86       (define-inline (name block)
87         (vector-ref block index)))]
88    ))
89
90(define-block-accessor block-content 0)
91(define-block-accessor block-base 1)
92(define-block-accessor block-offset 2)
93(define-block-accessor block-size 3)
94(define-block-accessor block-next-free 4)
95(define-block-accessor block-hash-table 5)
96
97(define-inline (increment-block-next-free! block)
98  (vector-set! block 4
99               (+ (block-next-free block) 1)))
100
101(define-inline (block-append! block value)
102  ;; This is not thread-safe.  To fix it, see Section 2.8 of the paper.
103  (let ((offset (block-next-free block)))
104    (increment-block-next-free! block)
105    (vector-set! (block-content block) offset value)
106    #t))
107
108(define-inline (block-ref block offset)
109  (vector-ref (block-content block) offset))
110
111(define-inline (block-ref* block offset)
112  (let ((v (block-ref block offset)))
113    (if (block-hash-table block)
114        (car v) ;; hide the vhash link
115        v)))
116
117(define-inline (block-hash-table-ref block offset)
118  (vector-ref (block-hash-table block) offset))
119
120(define-inline (block-hash-table-set! block offset value)
121  (vector-set! (block-hash-table block) offset value))
122
123(define block-null
124  ;; The null block.
125  (make-block #f 0 0 #f))
126
127;;;
128;;; VLists.
129;;;
130
131(define-record-type <vlist>
132  ;; A vlist is just a base+offset pair pointing to a block.
133
134  ;; XXX: Allocating a <vlist> record in addition to the block at each
135  ;; `vlist-cons' call is inefficient.  However, Bagwell's hack to avoid it
136  ;; (Section 2.2) would require GC_ALL_INTERIOR_POINTERS, which would be a
137  ;; performance hit for everyone.
138  (make-vlist base offset)
139  vlist?
140  (base    vlist-base)
141  (offset  vlist-offset))
142
143(define-record-printer (<vlist> vl port)
144  (cond ((vlist-null? vl)
145         (format port "#<vlist ()>"))
146        ((block-hash-table (vlist-base vl))
147         (format port "#<vhash ~a pairs>"
148                 (vhash-fold (lambda (k v r)
149                               (+ 1 r))
150                             0
151                             vl)))
152        (else
153         (format port "#<vlist ~a>"
154                 (vlist->list vl)))))
155
156
157(define vlist-null
158  ;; The empty vlist.
159  (make-vlist block-null 0))
160
161(define-inline (block-cons item vlist hash-tab?)
162  (let loop ((base   (vlist-base vlist))
163             (offset (+ 1 (vlist-offset vlist))))
164    (if (and (< offset (block-size base))
165             (= offset (block-next-free base))
166             (block-append! base item))
167        (make-vlist base offset)
168        (let ((size (cond ((eq? base block-null) 1)
169                          ((< offset (block-size base))
170                           ;; new vlist head
171                           1)
172                          (else
173                           (* (block-growth-factor)
174                              (block-size base))))))
175          ;; Prepend a new block pointing to BASE.
176          (loop (make-block base (- offset 1) size hash-tab?)
177                0)))))
178
179;; Return a new vlist with {item} as its head and {vlist} as its tail
180
181(define (vlist-cons item vlist)
182  ;; Note: Calling `vlist-cons' on a vhash will not do the right thing: it
183  ;; doesn't box ITEM so that it can have the hidden "next" link used by
184  ;; vhash items, and it passes `#f' as the HASH-TAB? argument to
185  ;; `block-cons'.  However, inserting all the checks here has an important
186  ;; performance penalty, hence this choice.
187  (block-cons item vlist #f))
188
189;; Return the head of {vlist}.
190(define (vlist-head vlist)
191  (let ((base   (vlist-base vlist))
192        (offset (vlist-offset vlist)))
193    (block-ref* base offset)))
194
195;; Return the tail of {vlist}.
196(define (vlist-tail vlist)
197  (let ((base   (vlist-base vlist))
198        (offset (vlist-offset vlist)))
199    (if (> offset 0)
200        (make-vlist base (- offset 1))
201        (make-vlist (block-base base)
202                    (block-offset base)))))
203
204;; Return true if {vlist} is empty.
205(define (vlist-null? vlist)
206  (let ((base (vlist-base vlist)))
207    (and (not (block-base base))
208         (= 0 (block-size base)))))
209
210;;;
211;;; VList Utilities.
212;;;
213
214;; Return a new vlist whose contents correspond to {lst}.
215(define (list->vlist lst)
216  (vlist-reverse (fold vlist-cons vlist-null lst)))
217
218;; Fold over {vlist}, calling {proc} for each element.
219(define (vlist-fold proc init vlist)
220  ;; FIXME: Handle multiple lists.
221  (let loop ((base   (vlist-base vlist))
222             (offset (vlist-offset vlist))
223             (result init))
224    (if (eq? base block-null)
225        result
226        (let* ((next  (- offset 1))
227               (done? (< next 0)))
228          (loop (if done? (block-base base) base)
229                (if done? (block-offset base) next)
230                (proc (block-ref* base offset) result))))))
231
232;; Fold over {vlist}, calling {proc} for each element, starting from the last element.
233(define (vlist-fold-right proc init vlist)
234  (define len (vlist-length vlist))
235
236  (let loop ((index  (- len 1))
237             (result init))
238    (if (< index 0)
239        result
240        (loop (- index 1)
241              (proc (vlist-ref vlist index) result)))))
242
243;; Return a new {vlist} whose content are those of {vlist} in reverse order.
244(define (vlist-reverse vlist)
245  (vlist-fold vlist-cons vlist-null vlist))
246
247;; Map {proc} over the elements of {vlist} and return a new vlist.
248(define (vlist-map proc vlist)
249  (vlist-fold (lambda (item result)
250                (vlist-cons (proc item) result))
251              vlist-null
252              (vlist-reverse vlist)))
253
254;; Return a new list whose contents match those of {vlist}.
255(define (vlist->list vlist)
256  (vlist-fold-right cons '() vlist))
257
258;; Return the element at index {index} in {vlist}.
259(define (vlist-ref vlist index)
260  (let loop ((index   index)
261             (base    (vlist-base vlist))
262             (offset  (vlist-offset vlist)))
263    (if (<= index offset)
264        (block-ref* base (- offset index))
265        (loop (- index offset 1)
266              (block-base base)
267              (block-offset base)))))
268
269;; Return a new vlist that does not contain the {count} first elements of {vlist}.
270(define (vlist-drop vlist count)
271  (let loop ((count  count)
272             (base   (vlist-base vlist))
273             (offset (vlist-offset vlist)))
274    (if (<= count offset)
275        (make-vlist base (- offset count))
276        (loop (- count offset 1)
277              (block-base base)
278              (block-offset base)))))
279
280; Return a new vlist that contains only the {count} first elements of {vlist}.
281(define (vlist-take vlist count)
282  (let loop ((count  count)
283             (vlist  vlist)
284             (result vlist-null))
285    (if (= 0 count)
286        (vlist-reverse result)
287        (loop (- count 1)
288              (vlist-tail vlist)
289              (vlist-cons (vlist-head vlist) result)))))
290
291;; Return a new vlist containing all the elements from {vlist} that satisfy {pred}.
292(define (vlist-filter pred vlist)
293  (vlist-fold-right (lambda (e v)
294                      (if (pred e)
295                          (vlist-cons e v)
296                          v))
297                    vlist-null
298                    vlist))
299
300;; Return a new vlist corresponding to {vlist} without the elements {equal?} to {x}.
301(define (vlist-delete x vlist #!optional (equal? equal?))
302  (vlist-filter (lambda (e)
303                  (not (equal? e x)))
304                vlist))
305
306;; Return the length of {vlist}.
307(define (vlist-length vlist)
308  (let loop ((base (vlist-base vlist))
309             (len  (vlist-offset vlist)))
310    (if (eq? base block-null)
311        len
312        (loop (block-base base)
313              (+ len 1 (block-offset base))))))
314
315;; Return a new vlist.  See the description of SRFI-1 `unfold' for details.
316(define (vlist-unfold p f g seed
317                       #!optional (tail-gen (lambda (x) vlist-null)))
318  (let uf ((seed seed))
319    (if (p seed)
320        (tail-gen seed)
321        (vlist-cons (f seed)
322                    (uf (g seed))))))
323
324;; Return a new vlist.  See the description of SRFI-1 `unfold-right' for details.
325(define (vlist-unfold-right p f g seed #!optional (tail vlist-null))
326  (let uf ((seed seed) (lis tail))
327    (if (p seed)
328        lis
329        (uf (g seed) (vlist-cons (f seed) lis)))))
330
331;; Append the given lists.
332(define (vlist-append . vlists)
333  (if (null? vlists)
334      vlist-null
335      (fold-right (lambda (vlist result)
336                    (vlist-fold-right (lambda (e v)
337                                        (vlist-cons e v))
338                                      result
339                                      vlist))
340                  vlist-null
341                  vlists)))
342
343;; Call {proc} on each element of {vlist}.  The result is unspecified.
344(define (vlist-for-each proc vlist)
345  (vlist-fold (lambda (item x)
346                (proc item))
347              (if #f #f)
348              vlist))
349
350;;;
351;;; Hash Lists, aka. `VHash'.
352;;;
353
354;; Assume keys K1 and K2, H = hash(K1) = hash(K2), and two values V1 and V2
355;; associated with K1 and K2, respectively.  The resulting layout is a
356;; follows:
357;;
358;;     ,--------------------.
359;;     | ,-> (K1 . V1) ---. |
360;;     | |                | |
361;;     | |   (K2 . V2) <--' |
362;;     | |                  |
363;;     +-|------------------+
364;;     | |                  |
365;;     | |                  |
366;;     | `-- O <---------------H
367;;     |                    |
368;;     `--------------------'
369;;
370;; The bottom part is the "hash table" part of the vhash, as returned by
371;; `block-hash-table'; the other half is the data part.  O is the offset of
372;; the first value associated with a key that hashes to H in the data part.
373;; The (K1 . V1) pair has a "hidden" link to the (K2 . V2) pair; hiding the
374;; link is handled by `block-ref'.
375
376;; This API potentially requires users to repeat which hash function and which
377;; equality predicate to use.  This can lead to unpredictable results if they
378;; are used in consistenly, e.g., between `vhash-cons' and `vhash-assoc', which
379;; is undesirable, as argued in http://savannah.gnu.org/bugs/?22159 .  OTOH, two
380;; arguments can be made in favor of this API:
381;;
382;;  - It's consistent with how alists are handled in SRFI-1.
383;;
384;;  - In practice, users will probably consistenly use either the `q', the `v',
385;;    or the plain variant (`vlist-cons' and `vlist-assoc' without any optional
386;;    argument), i.e., they will rarely explicitly pass a hash function or
387;;    equality predicate.
388
389;; Return true if {obj} is a hash list.
390(define (vhash? obj)
391  (and (vlist? obj)
392       (let ((base (vlist-base obj)))
393         (and base
394              (vector? (block-hash-table base))))))
395
396;; Return a new hash list based on {vhash} where {key} is associated
397;; with {value}.  Use {hash} to compute {key}'s hash.
398(define (vhash-cons key value vhash #!optional (hash hash))
399  (let* ((key+value (cons key value))
400         (entry     (cons key+value #f))
401         (vlist     (block-cons entry vhash #t))
402         (base      (vlist-base vlist))
403         (khash     (hash key (block-size base))))
404
405    (let ((o (block-hash-table-ref base khash)))
406      (if o (set-cdr! entry o)))
407
408    (block-hash-table-set! base khash
409                           (vlist-offset vlist))
410
411    vlist))
412
413(define vhash-consq (cut vhash-cons <> <> <> eq?-hash))
414(define vhash-consv (cut vhash-cons <> <> <> eqv?-hash))
415
416(define-inline (%vhash-fold* proc init key vhash equal? hash)
417  ;; Fold over all the values associated with KEY in VHASH.
418  (define khash
419    (let ((size (block-size (vlist-base vhash))))
420      (and (> size 0) (hash key size))))
421
422  (let loop ((base       (vlist-base vhash))
423             (khash      khash)
424             (offset     (and khash
425                              (block-hash-table-ref (vlist-base vhash)
426                                                    khash)))
427             (max-offset (vlist-offset vhash))
428             (result     init))
429
430    (let ((answer (and offset (block-ref base offset))))
431      (cond ((and (pair? answer)
432                  (<= offset max-offset)
433                  (let ((answer-key (caar answer)))
434                    (equal? key answer-key)))
435             (let ((result      (proc (cdar answer) result))
436                   (next-offset (cdr answer)))
437               (loop base khash next-offset max-offset result)))
438            ((and (pair? answer) (cdr answer))
439             =>
440             (lambda (next-offset)
441               (loop base khash next-offset max-offset result)))
442            (else
443             (let ((next-base (block-base base)))
444               (if (and next-base (> (block-size next-base) 0))
445                   (let* ((khash  (hash key (block-size next-base)))
446                          (offset (block-hash-table-ref next-base khash)))
447                     (loop next-base khash offset (block-offset base)
448                           result))
449                   result)))))))
450
451;; Fold over all the values associated with {key} in {vhash}, with
452;; each call to {proc} having the form {(proc value result)},
453;; where {result} is the result of the previous call to {proc} and
454;; {init} the value of {result} for the first call to {proc}."
455(define (vhash-fold* proc init key vhash
456                      #!optional (equal? equal?) (hash hash))
457  (%vhash-fold* proc init key vhash equal? hash))
458
459;; Same as {vhash-fold*}, but using {eq?-hash} and {eq?}.
460(define (vhash-foldq* proc init key vhash)
461  (%vhash-fold* proc init key vhash eq? eq?-hash))
462
463;; Same as {vhash-fold*}, but using {eqv?-hash} and {eqv?}.
464(define (vhash-foldv* proc init key vhash)
465  (%vhash-fold* proc init key vhash eqv? eqv?-hash))
466
467;; A specialization of `vhash-fold*' that stops when the first value
468;; associated with KEY is found or when the end-of-list is reached.
469;; Inline to make sure `vhash-assq' gets to use the `eq?' instruction
470;; instead of calling the `eq?'  subr.
471(define-inline (%vhash-assoc key vhash equal? hash)
472  (define khash
473    (let ((size (block-size (vlist-base vhash))))
474      (and (> size 0) (hash key size))))
475
476  (let loop ((base       (vlist-base vhash))
477             (khash      khash)
478             (offset     (and khash
479                              (block-hash-table-ref (vlist-base vhash)
480                                                    khash)))
481             (max-offset (vlist-offset vhash)))
482    (let ((answer (and offset (block-ref base offset))))
483      (cond ((and (pair? answer)
484                  (<= offset max-offset)
485                  (let ((answer-key (caar answer)))
486                    (equal? key answer-key)))
487             (car answer))
488            ((and (pair? answer) (cdr answer))
489             =>
490             (lambda (next-offset)
491               (loop base khash next-offset max-offset)))
492            (else
493             (let ((next-base (block-base base)))
494               (and next-base
495                    (> (block-size next-base) 0)
496                    (let* ((khash  (hash key (block-size next-base)))
497                           (offset (block-hash-table-ref next-base khash)))
498                      (loop next-base khash offset
499                            (block-offset base))))))))))
500
501;; Return the first key/value pair from {vhash} whose key is equal to
502;; {key} according to the {equal?} equality predicate.
503(define (vhash-assoc key vhash #!optional (equal? equal?) (hash hash))
504  (%vhash-assoc key vhash equal? hash))
505
506;; Return the first key/value pair from {vhash} whose key is {eq?} to
507;; {key}.
508(define (vhash-assq key vhash)
509  (%vhash-assoc key vhash eq? eq?-hash))
510
511;; Return the first key/value pair from {vhash} whose key is {eqv?} to
512;; {key}.
513(define (vhash-assv key vhash)
514  (%vhash-assoc key vhash eqv? eqv?-hash))
515
516;; Remove all associations from {vhash} with {key}, comparing keys with {equal?}.
517(define (vhash-delete key vhash #!optional (equal? equal?) (hash hash))
518  (if (vhash-assoc key vhash equal? hash)
519      (vlist-fold (lambda (k+v result)
520                    (let ((k (car k+v))
521                          (v (cdr k+v)))
522                      (if (equal? k key)
523                          result
524                          (vhash-cons k v result hash))))
525                  vlist-null
526                  vhash)
527      vhash))
528
529(define vhash-delq (cut vhash-delete <> <> eq? eq?-hash))
530(define vhash-delv (cut vhash-delete <> <> eqv? eqv?-hash))
531
532;; Fold over the key/pair elements of {vhash} from left to right, with
533;; each call to {proc} having the form {({proc} key value result)},
534;; where {result} is the result of the previous call to {proc} and
535;; {init} the value of {result} for the first call to {proc}.
536
537(define (vhash-fold proc init vhash)
538  (vlist-fold (lambda (key+value result)
539                (proc (car key+value) (cdr key+value)
540                      result))
541              init
542              vhash))
543
544;; Fold over the key/pair elements of {vhash} from right to left, with
545;; each call to {proc} having the form {({proc} key value result)},
546;; where {result} is the result of the previous call to {proc} and
547;; {init} the value of {result} for the first call to {proc}.
548
549(define (vhash-fold-right proc init vhash)
550  (vlist-fold-right (lambda (key+value result)
551                      (proc (car key+value) (cdr key+value)
552                            result))
553                    init
554                    vhash))
555
556;; Return the vhash corresponding to {alist}, an association list.
557
558(define (alist->vhash alist #!optional (hash hash))
559  (fold-right (lambda (pair result)
560                (vhash-cons (car pair) (cdr pair) result hash))
561              vlist-null
562              alist))
563
564)
Note: See TracBrowser for help on using the repository browser.