1 | ;; |
---|
2 | ;; |
---|
3 | ;; A dictionary structure implemented with a counting Bloom filter. |
---|
4 | ;; |
---|
5 | ;; Based on the Shared-node Fast Hash Table (SFHT) data structure |
---|
6 | ;; described by Song, et al., in _Fast Hash Table Lookup Using |
---|
7 | ;; Extended Bloom Filter: An Aid to Network Processing_. (SIGCOMM'05) |
---|
8 | ;; |
---|
9 | ;; |
---|
10 | ;; Copyright 2007-2009 Ivan Raikov. |
---|
11 | ;; |
---|
12 | ;; |
---|
13 | ;; This program is free software: you can redistribute it and/or modify |
---|
14 | ;; it under the terms of the GNU General Public License as published by |
---|
15 | ;; the Free Software Foundation, either version 3 of the License, or |
---|
16 | ;; (at your option) any later version. |
---|
17 | ;; |
---|
18 | ;; This program is distributed in the hope that it will be useful, but |
---|
19 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
21 | ;; General Public License for more details. |
---|
22 | ;; |
---|
23 | ;; A full copy of the GPL license can be found at |
---|
24 | ;; <http://www.gnu.org/licenses/>.")))) |
---|
25 | ;; |
---|
26 | |
---|
27 | (module sfht |
---|
28 | |
---|
29 | (make-sfht) |
---|
30 | |
---|
31 | (import scheme chicken data-structures) |
---|
32 | |
---|
33 | (require-extension srfi-1 matchable sparse-vectors) |
---|
34 | |
---|
35 | (define ln06185 (log 0.6185)) |
---|
36 | (define ln05 (log 0.5)) |
---|
37 | |
---|
38 | (define (sfht:error x . rest) |
---|
39 | (let ((port (open-output-string))) |
---|
40 | (let loop ((objs (cons x rest))) |
---|
41 | (if (null? objs) |
---|
42 | (begin |
---|
43 | (newline port) |
---|
44 | (error 'sfht (get-output-string port))) |
---|
45 | (begin (display (car objs) port) |
---|
46 | (display " " port) |
---|
47 | (loop (cdr objs))))))) |
---|
48 | |
---|
49 | (define (make-sfht n p make-random-state random! key->vector key-vector-ref key-vector-length . rest) |
---|
50 | |
---|
51 | (let-optionals rest ((key-equal? equal?)) |
---|
52 | |
---|
53 | (define m (round (+ 1.0 (* (* n (log p)) ln06185)))) |
---|
54 | (define k (inexact->exact (round (+ 0.5 (/ (log p) ln05))))) |
---|
55 | |
---|
56 | (define default (list 0 (list))) |
---|
57 | |
---|
58 | (define ba (make-sparse-vector default)) |
---|
59 | |
---|
60 | (define size 0) |
---|
61 | |
---|
62 | ;; Hash functions based on uniform pseudo-random numbers |
---|
63 | (define rng-states (list-tabulate k (lambda (i) |
---|
64 | (make-random-state i)))) |
---|
65 | |
---|
66 | ;; Pre-calculate hash function coefficients for vectors of size up |
---|
67 | ;; to 127 elements |
---|
68 | (define H 127) |
---|
69 | |
---|
70 | (define random-coeffs |
---|
71 | (map (lambda (st) |
---|
72 | (list-tabulate H (lambda (x) (random! st)))) |
---|
73 | rng-states)) |
---|
74 | |
---|
75 | (define (extend-random-coeffs! H1) |
---|
76 | (let ((d (- H1 H))) |
---|
77 | (if (< d 1) (sfht:error 'extend-random-coeffs! |
---|
78 | ": the new number of coefficients is less than the old one")) |
---|
79 | (let ((cfs1 (map (lambda (st cfs) |
---|
80 | (let ((cfs1 (list-tabulate d (lambda (x) (random! st))))) |
---|
81 | (append cfs cfs1))) |
---|
82 | rng-states random-coeffs))) |
---|
83 | (set! random-coeffs cfs1) |
---|
84 | (set! H H1)))) |
---|
85 | |
---|
86 | (define (and-coeffs h kv) |
---|
87 | (let loop ((i 0) (h h) (ax (list))) |
---|
88 | (if (null? h) (reverse ax) |
---|
89 | (loop (fx+ 1 i) (cdr h) (cons (* (car h) (key-vector-ref kv i)) ax))))) |
---|
90 | |
---|
91 | (define (hash key k n) |
---|
92 | (let* ((kv (key->vector key)) |
---|
93 | (kvl (key-vector-length kv))) |
---|
94 | (if (fx> kvl H) |
---|
95 | (extend-random-coeffs! kvl)) |
---|
96 | (if (fx= kvl 0) (map car random-coeffs) |
---|
97 | (let loop ((i 0) (hh random-coeffs) (ax (list))) |
---|
98 | (if (null? hh) |
---|
99 | (map (lambda (lst) (apply bitwise-xor lst)) (reverse ax)) |
---|
100 | (loop (fx+ 1 i) (cdr hh) |
---|
101 | (cons (and-coeffs (car hh) kv) ax))))))) |
---|
102 | |
---|
103 | (define (insert! key x) |
---|
104 | (let ((h (hash key k n)) |
---|
105 | (b (cons (cons key x) (list)))) |
---|
106 | (let loop ((i h)) |
---|
107 | (if (not (null? i)) |
---|
108 | (let* ((index (car i)) |
---|
109 | (bkt (sparse-vector-ref ba index))) |
---|
110 | (let-values (((sz lst) (match bkt |
---|
111 | ((sz lst) (values sz lst)) |
---|
112 | (else (sfht:error 'insert! ": invalid bucket " bkt " at index " index))))) |
---|
113 | (if (fx= 0 sz) |
---|
114 | (sparse-vector-set! ba index (list 1 (list (cons key x)))) |
---|
115 | (beginxb |
---|
116 | (let tail ((k sz) (lst lst) (prev #f)) |
---|
117 | (if (null? lst) |
---|
118 | (let ((b (list (cons key x)))) |
---|
119 | (if prev (set-cdr! prev b) (set-cdr! bkt b))) |
---|
120 | (if (fx<= k 0) |
---|
121 | (set-cdr! bkt (cons (cons key x) (cdr bkt))) |
---|
122 | (tail (fx- k 1) (cdr lst) lst)))) |
---|
123 | (set-car! bkt (fx+ 1 (car bkt))))) |
---|
124 | (loop (cdr i)))))) |
---|
125 | (set! size (fx+ 1 size)) |
---|
126 | #f)) |
---|
127 | |
---|
128 | (define (delete! key) |
---|
129 | (define found? #f) |
---|
130 | (let ((h (hash key k n))) |
---|
131 | (let loop ((i h)) |
---|
132 | (if (not (null? i)) |
---|
133 | (let* ((index (car i)) |
---|
134 | (bkt (sparse-vector-ref ba index))) |
---|
135 | (let-values (((sz lst) (match bkt |
---|
136 | ((sz lst) (values sz lst)) |
---|
137 | (else (sfht:error 'remove! ": invalid bucket " bkt " at index " i))))) |
---|
138 | (let bktloop ((k sz) (lst lst) (prev #f)) |
---|
139 | (if (not (null? lst)) |
---|
140 | (if (fx< 0 k) |
---|
141 | (match lst |
---|
142 | (((key1 . _) . rest) |
---|
143 | (if (key-equal? key key1) |
---|
144 | (begin |
---|
145 | (set! found? #t) |
---|
146 | (set-car! bkt (fx- (car bkt) 1)) |
---|
147 | (if prev (set-cdr! prev rest) (set-cdr! bkt (list rest)))) |
---|
148 | (bktloop (fx- k 1) rest lst))) |
---|
149 | (else (sfht:error 'remove! ": invalid bucket list " lst))))))) |
---|
150 | (loop (cdr i))))) |
---|
151 | (if found? (set! size (fx- size 1))) |
---|
152 | found?)) |
---|
153 | |
---|
154 | (define (min-bucket bkts . rest) |
---|
155 | (let-optionals rest ((minb (or (null? bkts) (car bkts)))) |
---|
156 | (if (null? bkts) minb |
---|
157 | (let ((bkt (car bkts))) |
---|
158 | (if (fx< (car bkt) (car minb)) |
---|
159 | (min-bucket (cdr bkts) bkt) |
---|
160 | (min-bucket (cdr bkts) minb)))))) |
---|
161 | |
---|
162 | (define (find key) |
---|
163 | (let* ((h (hash key k n)) |
---|
164 | (bkts (map (lambda (i) (sparse-vector-ref ba i)) h)) |
---|
165 | (minb (min-bucket bkts))) |
---|
166 | (let loop ((k (car minb)) (lst (cadr minb))) |
---|
167 | (if (or (fx= 0 k) (null? lst)) #f |
---|
168 | (match lst |
---|
169 | (((key1 . _) . rest) |
---|
170 | (if (key-equal? key key1) (car lst) |
---|
171 | (loop (fx- k 1) rest))) |
---|
172 | (else (sfht:error 'find ": invalid bucket list " lst))))))) |
---|
173 | |
---|
174 | (define (debugprint) |
---|
175 | (let ((bkts (sparse-vector->list ba))) |
---|
176 | (for-each |
---|
177 | (lambda (bkt) |
---|
178 | (display "bucket:") |
---|
179 | (display " sz = ") |
---|
180 | (display (car bkt)) |
---|
181 | (display " lst = ") |
---|
182 | (display (cdr bkt)) |
---|
183 | (display #\newline)) |
---|
184 | bkts))) |
---|
185 | |
---|
186 | |
---|
187 | (define (apply-default-clause src key default-clause) |
---|
188 | (cond |
---|
189 | ((null? default-clause) |
---|
190 | (sfht:error src ": key " key " was not found in the SFHT ")) |
---|
191 | ((procedure? (car default-clause)) ((car default-clause))) |
---|
192 | (else (car default-clause)))) |
---|
193 | |
---|
194 | |
---|
195 | ;; Dispatcher |
---|
196 | (lambda (selector) |
---|
197 | (case selector |
---|
198 | ((get) |
---|
199 | (lambda (key . default-clause) |
---|
200 | (or (find key) (apply-default-clause 'get key default-clause)))) |
---|
201 | |
---|
202 | ((delete!) |
---|
203 | (lambda (key . default-clause) |
---|
204 | (or (delete! key) (apply-default-clause 'delete! key default-clause)))) |
---|
205 | |
---|
206 | ((put!) insert!) |
---|
207 | |
---|
208 | ((empty?) (fx= size 0)) |
---|
209 | |
---|
210 | ((size) size) |
---|
211 | |
---|
212 | ((clear!) (begin |
---|
213 | (set! ba (make-sparse-vector default)) |
---|
214 | (set! size 0))) |
---|
215 | |
---|
216 | ((debugprint) (debugprint)) |
---|
217 | (else |
---|
218 | (sfht:error "Unknown message " selector " sent to an SFHT")))))) |
---|
219 | |
---|
220 | ) |
---|