source: project/bloom-filter/bloom-filter.html @ 5443

Last change on this file since 5443 was 5443, checked in by Kon Lovett, 13 years ago

Chg for misc-extn 3.0

File size: 10.4 KB
Line 
1<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
2<!-- Generated by eggdoc Revision: 1.20  -->
3<html>
4<head>
5<title>Eggs Unlimited - bloom-filter</title><style type="text/css"> <!--
6      CODE {
7            color: #666666;
8          }
9/*   DT.definition EM { font-weight: bold; font-style: normal; } */
10
11     DT.definition { 
12                   background: #eee;
13                   color: black;
14                   padding: 0.2em 1em 0.2em 0.7em;
15                   margin-left: 0.2em;
16border: 1px solid #bbc;
17                   font-family: "Andale Mono", monospace;
18                   /* font-size: 1.2em; */
19                   
20                 }
21     DD {
22                   margin-top: 0.8em;
23                   margin-bottom: 0.8em;
24     }
25     DIV.subsection {
26                    border-top: 1px solid #448;
27                    padding-left: 1em;
28                    margin-bottom: 1.2em;
29     }
30     DIV.subsubsection {
31                    border-top: 1px dotted #99c;
32                    /* border-left: 1px solid #99c; */
33                    padding-left: 1em;
34                    margin-bottom: 1.2em;
35     }
36     DIV.subsubsubsection {
37                    border-top: 1px solid #ddf;
38                    padding-left: 1em;
39                    margin-bottom: 1.2em;
40     }
41
42         DIV.section {
43                 margin-bottom: 1.5em;
44         }
45         a:link {
46                 color: #336;
47         }
48         a:visited { color: #666; }
49         a:active  { color: #966; }
50         a:hover   { color: #669; }
51         body { margin: 0; padding: 0; background: #fff; color: #000; font: 9pt "Lucida Grande", "Verdana", sans-serif; }
52         H2 {
53                 background: #336;
54                 color: #fff;
55                 padding-top: 0.5em;
56                 padding-bottom: 0.5em;
57                 padding-left: 16px;
58                 margin: 0 0 1em 0;
59        }
60        UL LI {
61                list-style: none;
62        }
63        TT {
64                font-family: "Andale Mono", monospace;
65                /* font-size: 1.2em; */
66        }
67        H3 {
68                color: #113;
69                margin-bottom: 0.5em;
70        }
71        H4, H5, H6 {
72                color: #113;
73                margin-bottom: 1.0em;
74        }
75        H5 {
76                font-weight: normal;
77                font-style: italic;
78                font-size: 100%;
79                margin-top: 1.2em;
80        }
81        H6 {
82                font-weight: bold;
83                font-size: 85%;
84                margin-top: 1.2em;
85        }
86     DIV#eggheader {
87         text-align: center;
88                 float: right;
89                 margin-right: 2em;
90     }
91     DIV#header IMG {
92            /* display: block; margin-left: auto; margin-right: auto;  */
93            /* float: right; */
94            border: none;  /* firefox */
95     }
96     DIV#footer {
97                background: #bbd;
98                padding: 0.7em ;
99                border-top: 1px solid #cce;
100     }
101     DIV#footer hr {
102                display: none;
103     }
104     DIV#footer a {
105                float: left;
106     }
107     DIV#revision-history {
108         float: right;
109     }
110     
111     DIV#body {
112                 margin: 1em 1em 1em 16px;
113         }
114
115     DIV#examples PRE {
116       background: #eef;
117       padding: 0.1em;
118       border: 1px solid #aac;
119     }
120     PRE#license, DIV#examples PRE {
121       padding: 0.5em;
122     }
123     DIV#examples PRE {
124       /* font-size: 85%; */
125     }
126     PRE { font-family: "Andale Mono", monospace; }
127     TABLE {
128       background: #eef;
129       padding: 0.2em;
130       border: 1px solid #aac;
131       border-collapse: collapse;
132       width: 100%;
133     }
134     TABLE.symbol-table TD.symbol {
135          width: 15em;
136          font-family: "Andale Mono", monospace;
137          /* font-size: 1.2em; */
138     }
139     TH {
140       text-align: left;
141       border-bottom: 1px solid #aac;
142       padding: 0.25em 0.5em 0.25em 0.5em;
143     } 
144     TD { padding: 0.25em 0.5em 0.25em 0.5em; }
145     --></style></head>
146<body>
147<div id="header">
148<h2>bloom-filter</h2>
149<div id="eggheader"><a href="index.html">
150<img src="egg.jpg" alt="[Picture of an egg]" /></a></div></div>
151<div id="body">
152<div class="section">
153<h3>Description</h3>
154<p>Provides a simple Bloom Filter</p></div>
155<div class="section">
156<h3>Author</h3><a href="mailto:klovett@pacbell.net">Kon Lovett</a></div>
157<div class="section">
158<h3>Version</h3>
159<ul>
160<li>1.101 Dropped :optional</li>
161<li>1.1 Support for &quot;optimal K&quot;</li>
162<li>1.0 Exports</li>
163<li>0.2 Add hash primitives configuration file</li>
164<li>0.1 Initial release</li></ul></div>
165<div class="section">
166<h3>Requires</h3>
167<ul>
168<li>iset</li>
169<li>hashes</li>
170<li>md5</li>
171<li>sha1</li>
172<li>sha2</li>
173<li>tiger-hash</li>
174<li>ripemd</li>
175<li>message-digest</li>
176<li>lookup-table</li>
177<li>mathh</li>
178<li>misc-extn</li></ul></div>
179<div class="section">
180<h3>Usage</h3><tt>(require-extension bloom-filter)</tt></div>
181<div class="section">
182<h3>Download</h3><a href="bloom-filter.egg">bloom-filter.egg</a></div>
183<div class="section">
184<h3>Documentation</h3>
185<div class="subsection">
186<h4>Bloom Filter Object</h4>
187<dt class="definition"><strong>procedure:</strong> (make-bloom-filter M MESSAGE-DIGEST-PRIMITIVE-LIST [K])</dt>
188<dd>
189<p>Returns a bloom-filter object with <tt>M</tt> bits of
190                                        discrimination and a set of hash functions built from the
191                                        supplied <tt>MESSAGE-DIGEST-PRIMITIVE-LIST</tt>. The
192                                        elements of the list of primitives may be an actual primitive
193                                        object or a symbol naming the desired message-digest.</p>
194<p>The number of hash functions, k, is not necessarily the
195                                        same as the number of message-digests. A hash function is
196                                        defined as returning an unsigned 32 bit integer. Most
197                                        message-digests return more 32 bits of hash. The actual length
198                                        of the hash is divided into 32 bit blocks to get the
199                                        individual hash functions.</p>
200<p>The argument <tt>K</tt> will restrict the actual number
201                                        of hash functions to the &quot;first&quot; k, no matter how many more
202                                        the supplied message-digests create. First in the order of <tt>MESSAGE-DIGEST-PRIMITIVE-LIST</tt>.</p>
203<p>Selecting the optimal set of message-digests is beyond the
204                                        scope of <tt>make-bloom-filter</tt>.</p></dd>
205<dt class="definition"><strong>procedure:</strong> (bloom-filter-n BLOOM-FILTER)</dt>
206<dd>
207<p>The current population - the number of objects added to the filter.</p></dd>
208<dt class="definition"><strong>procedure:</strong> (bloom-filter-m BLOOM-FILTER)</dt>
209<dd>
210<p>The number of bits of discrimination.</p></dd>
211<dt class="definition"><strong>procedure:</strong> (bloom-filter-k BLOOM-FILTER)</dt>
212<dd>
213<p>The number of hash functions. (See above.)</p></dd>
214<dt class="definition"><strong>procedure:</strong> (bloom-filter-p-false-positive BLOOM-FILTER [N])</dt>
215<dd>
216<p>The probability of false positives for the given population
217                                        size. The current population is assumed.</p></dd>
218<dt class="definition"><strong>procedure:</strong> (bloom-filter-set! BLOOM-FILTER OBJECT)</dt>
219<dd>
220<p>Add the specified <tt>OBJECT</tt> to the indicated <tt>BLOOM-FILTER</tt>.</p></dd>
221<dt class="definition"><strong>procedure:</strong> (bloom-filter-exists? BLOOM-FILTER OBJECT)</dt>
222<dd>
223<p>Is the specified <tt>OBJECT</tt> in the indicated <tt>BLOOM-FILTER</tt>.</p></dd></div>
224<div class="subsection">
225<h4>Auxillary Procedures</h4>
226<dt class="definition"><strong>procedure:</strong> (bloom-filter:optimum-k N M)</dt>
227<dd>
228<p>Optimal count of hash functions for the given population
229                                        size <tt>N</tt> and <tt>M</tt> bits of discrimination.</p></dd>
230<dt class="definition"><strong>procedure:</strong> (bloom-filter:optimum-m K N)</dt>
231<dd>
232<p>Optimal count of bits of discrimination for the given
233                                        population size <tt>N</tt> and <tt>K</tt> number of hash
234                                        functions.</p></dd>
235<dt class="definition"><strong>procedure:</strong> (bloom-filter:p-false-positive K N M)</dt>
236<dd>
237<p>What is the probability of false positives for the
238                                        population size <tt>N</tt> assuming <tt>K</tt> hash
239                                        functions and <tt>M</tt> bits of discrimination.</p></dd>
240<dt class="definition"><strong>procedure:</strong> (bloom-filter:desired-m P N [K])</dt>
241<dd>
242<p>Calculates a near-optimal number of bits of discrimination
243                                        to meet the desired probability of false positives <tt>P</tt>,
244                                        with the given population size <tt>N</tt> and number of hash
245                                        functions <tt>K</tt>. When the k parameter is missing the <tt>bloom-filter:optimum-k</tt> procedure is used to calculate a
246                                        value.</p>
247<p>A multi-valued return of the calculated M, K, and P values.
248                                        The calculated probability may be lower than the desired.</p></dd>
249<dt class="definition"><strong>procedure:</strong> (bloom-filter:actual-k MESSAGE-DIGEST-PRIMITIVE-LIST)</dt>
250<dd>
251<p>Calculates the actual number of hash functions for the <tt>MESSAGE-DIGEST-PRIMITIVE-LIST</tt>. The elements of the list of
252                                        primitives may be an actual primitive object or a symbol naming
253                                        the desired message-digest.</p></dd>
254<dt class="definition"><strong>procedure:</strong> (bloom-filter:p-random-one-bit K N M)</dt>
255<dd>
256<p>Guess.</p></dd></div>
257<div class="subsection">
258<h4>Hash Primitives Configuration File</h4>
259<p>A file, &quot;hash-primitives-info&quot;, is located in the Chicken
260                                Repository. The file contains the information needed by
261                                bloom-filter to load hash primitives at runtime. The file is
262                                self-documenting.</p></div></div>
263<div class="section">
264<h3>References</h3>
265<ul>
266<li><a href="http://www.cs.wisc.edu/~cao/papers/summary-cache/node8.html#SECTION00053000000000000000">Nice exposition of Bloom Filter False Positive Probability.</a></li>
267<li><a href="http://www.cc.gatech.edu/fac/Pete.Manolios/bloom-filters/calculator.html">A web interface for a better version of <tt>bloom-filter:desired-m</tt>.</a></li></ul></div>
268<div class="section">
269<h3>License</h3>
270<pre>Copyright (c) 2006, Kon Lovett.  All rights reserved.
271
272Permission is hereby granted, free of charge, to any person obtaining a
273copy of this software and associated documentation files (the Software),
274to deal in the Software without restriction, including without limitation
275the rights to use, copy, modify, merge, publish, distribute, sublicense,
276and/or sell copies of the Software, and to permit persons to whom the
277Software is furnished to do so, subject to the following conditions:
278
279The above copyright notice and this permission notice shall be included
280in all copies or substantial portions of the Software.
281
282THE SOFTWARE IS PROVIDED ASIS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
283IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
284FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
285THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
286OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
287ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
288OTHER DEALINGS IN THE SOFTWARE.</pre></div></div>
289<div id="footer">
290<hr /><a href="index.html">&lt; Egg index</a>
291<div id="revision-history">$Revision$ $Date$</div>&nbsp;</div></body></html>
Note: See TracBrowser for help on using the repository browser.