source: project/release/4/random-test/trunk/random-test-eggdoc.scm @ 14480

Last change on this file since 14480 was 14480, checked in by Ivan Raikov, 11 years ago

ported random-test to Chicken 4

File size: 8.2 KB
Line 
1
2(use eggdoc)
3
4(define doc
5  `((eggdoc:begin
6     (name "random-test")
7     (description "Some simple randomness tests for a sequence of numbers.")
8     (author (url "http://chicken.wiki.br/users/ivan-raikov" "Ivan Raikov"))
9
10     (history 
11      (version "1.7" "Ported to Chicken 4")
12      (version "1.6" "Removed testeez dependency")
13      (version "1.5" "Build script updated for better cross-platform compatibility")
14      (version "1.4" "eggdoc documentation fix")
15      (version "1.3" "License upgrade to GPL v3.")
16      (version "1.2" "Simplified the interface of random-test procedure")
17      (version "1.1" "Bug fix in the bin update code")
18      (version "1.0" "Initial release"))
19
20     (requires easyffi)
21
22     (usage "(require-extension random-test)")
23
24     (download "random-test.egg")
25
26     (documentation
27     
28      (p "The " (tt "random-test") " library provides a procedure that applies various "
29         "statistical tests to a sequence of random numerical values, and a procedure to "
30         "reports the results of those tests in convenient form. ")
31
32      (p "The library is useful for evaluating pseudorandom number generators for "
33         "statistical sampling applications, compression algorithms, and other applications "
34         "where the properties of a random sequence are of interest. The code in the library "
35         "is based on the " (url "http://www.fourmilab.ch/random/" "ent program") 
36         " by John Walker.")
37
38
39      (subsection "Procedures"
40
41                  (procedure "make-random-test:: [CAR CDR NULL?] -> (SEQ -> RANDOM-STATS)"
42                             (p "This procedure creates a procedure that reads in a sequence "
43                                "of numerical values, and performs statistical tests to tests the "
44                                "randomness of the elements of the sequence.")
45                             (p "By default, the sequence is expected to be a list; however, if a "
46                                "different sequential data structure is used (e.g. a stream), "
47                                "the optional arguments " (tt "CAR, CDR, NULL?") " may be used to "
48                                "specify procedures that perform the corresponding operations on "
49                                "the input sequence. ")
50                             (p "The returned procedure is of the form " (tt "SEQ -> RANDOM-STATS")
51                                ", where " (tt "SEQ") " is the sequence and the returned value is an "
52                                "alist with the following fields: "
53                                (symbol-table 
54                                 (describe "'ent" 
55                                           "entropy measure")
56                                 (describe "'chisq"
57                                           "the result of the Chi-Square test (see next section)")
58                                 (describe "'pochisq"
59                                           "the calculated probability of the Chi-Square test (see next section)")
60                                 (describe "'mean"
61                                           "the mean of the values in the input sequence")
62                                 (describe "'min"
63                                           "the minimum of the values in the input sequence")
64                                 (describe "'max"
65                                           "the maximum of the values in the input sequence")
66                                 (describe "'montepi"
67                                           "Monte Carlo value of pi (see next section)")
68                                 (describe "'scc"
69                                           "the serial correlation coefficient (see next section)"))))
70                 
71                  (procedure "format-random-stats:: OUT * RANDOM-STATS -> UNDEFINED"
72                             
73                             (p "Given an output port, and the value returned by the random test "
74                                "procedure, this procedure outputs a human readable interpretation "
75                                "of the test results.")))
76
77      (subsection "Tests"
78
79                  (ol 
80                   (li "Chi-Square Test"
81                       (p "In general, the Chi-Square distribution for an experiment with " 
82                          (tt "k") " possible outcomes, performed " (tt "n") " times, "
83                          "in which " (tt "Y1, Y2,... Yk") " are the number of experiments "
84                          "which resulted in each possible outcome, and probabilities of each "
85                          "outcome " (tt "p1, p2,... pk")", is given as: " 
86                          (img (@ (src "random-test.fig1.png")
87                                  (alt "\\chi^{2} = \\sum_{1 <= i <= k}{\\frac{(Y_{i} - m  p_{i})^{2}}{np_{i}}}"))))
88                       
89                       (p (tt "\\Chi^2") " will grow larger as the measured results diverge from those "
90                          "expected by pure chance. The probability " (tt "Q") " that a Chi-Square value "
91                          "calculated for an experiment with d degrees of freedom (where " (tt "d=k-1") 
92                          ", one less the number of possible outcomes) is due to chance is: "
93                          (img (@ (src "random-test.fig2.png")
94                                  (alt "Q(\\Chi^2,d) = [2^{d/2} * \\Gamma(d/2)]^{-1} * \\int_{\\Chi^2}^{\\infty}(t)^{d/2-1} * exp(-t/2) * dt"))))
95
96                       (p "Where Gamma is the generalization of the factorial function to real "
97                          "and complex arguments: " 
98                          (img (@ (src "random-test.fig3.png")
99                                  (alt "\\Gamma(x) = \\int_{0}^{\\infty} t^{x-1} * exp(-t) * dt"))))
100                       
101                       (p "There is no closed form solution for Q, so it must be evaluated "
102                          "numerically.  Note that the probability calculated from the " 
103                          (tt "\\Chi^2") " is an approximation which is valid only for large "
104                          "values of n, and is therefore only meaningful when calculated "
105                          "from a large number of independent experiments. ")
106                       
107                       (p "In this implementation, the Chi-Square distribution is calculated "
108                          "for the list of values given as argument to the random-test "
109                          "procedure and expressed as an absolute number and a percentage "
110                          "which indicates how frequently a truly random sequence would exceed "
111                          "the value calculated. ")
112                       
113                       (p "The percentage can be interpreted as the degree to which the sequence "
114                          "tested is suspected of being non-random. If the percentage is greater "
115                          "than 99% or less than 1%, the sequence is almost certainly not random. "
116                          "If the percentage is between 99% and 95% or between 1% and 5%, "
117                          "the sequence is suspect. Percentages between 90% and 95% and 5% and 10% "
118                          "indicate the sequence is almost suspect."))
119                       
120                   (li "Arithmetic Mean"
121                       (p "This is simply the result of summing the all the values in the sequence "
122                          "and dividing by the sequence length. If the data are close to random, "
123                          "the mean should be about " (tt "(2^b - 1)/2") " where " (tt "b") 
124                          " is the number of bits used to represent a value. "
125                          "If the mean departs from this value, the values are consistently high or low."))
126                     
127                   (li "Monte Carlo Value for Pi"
128                       (p "Each pair of two values in the input sequence is used as X and Y coordinates "
129                          "within a square with side N (the length of the input sequence). "
130                          "If the distance of the randomly-generated point is less than the radius of a "
131                          "circle inscribed within the square, the pair of values is considered a hit. "
132                          "The percentage of hits can be used to calculate the value of pi: " (pre #<<EOF
133 # points within circle     1/4 * pi * r^2 
134 ----------------------  =  -------------- = 1/4 * pi
135 # points within square          r^2
136EOF
137)
138                         "Therefore, " (pre #<<EOF
139          # points within circle
140 pi = 4 * ----------------------
141          # points within square
142EOF
143))
144       
145                       (p "For very long sequences (this approximation converges very slowly), "
146                          "the value will approach the correct value of Pi if the sequence is close to random."))
147                   
148                   (li "Serial Correlation Coefficient"
149                       (p "This quantity measures the extent to which each value in the sequence "
150                          "depends upon the previous value. For random sequences, this metric "
151                          "(which can be positive or negative) will be close to zero. "
152                          "A non-random sequence such as a text file will yield a serial "
153                          "correlation coefficient of about 0.5. Predictable data will exhibit "
154                          "serial correlation coefficients approaching 1.")))))
155
156
157     (examples (pre #<<EOF
158(require-extension random-test)
159(require-extension srfi-1)
160
161(randomize (current-milliseconds))
162
163(define random-test (make-random-test))
164(define lst (list-tabulate 1000000 (lambda (x) (random 1000000))))
165
166(random-test lst 1000000)
167
168(define stats (random-test lst 1000000))
169
170(format-random-stats stats)
171EOF
172))
173
174     (license
175      "Copyright 2007-2009 Ivan Raikov.
176
177This program is free software: you can redistribute it and/or modify
178it under the terms of the GNU General Public License as published by
179the Free Software Foundation, either version 3 of the License, or (at
180your option) any later version.
181
182This program is distributed in the hope that it will be useful, but
183WITHOUT ANY WARRANTY; without even the implied warranty of
184MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
185General Public License for more details.
186
187A full copy of the GPL license can be found at
188<http://www.gnu.org/licenses/>."))))
189
190(if (eggdoc->html doc) (void))
Note: See TracBrowser for help on using the repository browser.