1 | ;;; Port of cl-stats.lisp to Chicken Scheme, by Peter Lane, 2010. |
---|
2 | |
---|
3 | ;;; This program is free software: you can redistribute it and/or modify |
---|
4 | ;;; it under the terms of the GNU General Public License as published by |
---|
5 | ;;; the Free Software Foundation, either version 3 of the License, or |
---|
6 | ;;; (at your option) any later version. |
---|
7 | |
---|
8 | ;;; This program is distributed in the hope that it will be useful, |
---|
9 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
10 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
11 | ;;; GNU General Public License for more details. |
---|
12 | |
---|
13 | ;;; You should have received a copy of the GNU General Public License |
---|
14 | ;;; along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
15 | |
---|
16 | ;;; ------- Source of methods ------- |
---|
17 | ;;; |
---|
18 | ;;; The formulas and methods used are largely taken from Bernard Rosner, |
---|
19 | ;;; "Fundamentals of Biostatistics," 5th edition. "Rosner x" is a page |
---|
20 | ;;; number. |
---|
21 | |
---|
22 | ;;; These abreviations used in function and variable names: |
---|
23 | ;;; ci = confidence interval |
---|
24 | ;;; cdf = cumulative density function |
---|
25 | ;;; ge = greater than or equal to |
---|
26 | ;;; le = less than or equal to |
---|
27 | ;;; pdf = probability density function |
---|
28 | ;;; sd = standard deviation |
---|
29 | ;;; rxc = rows by columns |
---|
30 | ;;; sse = sample size estimate |
---|
31 | |
---|
32 | ;;; ------- Original copyright notices from cl-stats.lisp ------- |
---|
33 | |
---|
34 | ;;; Statistical functions in Common Lisp. Version 1.04 Feb 24, 2005 |
---|
35 | ;;; |
---|
36 | ;;; This code is copyright (c) 2000, 2001, 2002, 2005 by Larry Hunter |
---|
37 | ;;; (Larry.Hunter@uchsc.edu) except where otherwise noted. (Released |
---|
38 | ;;; under GPL version 2.) |
---|
39 | |
---|
40 | ;;; CLASP utilities marked as such in comment: |
---|
41 | |
---|
42 | ;;; Copyright (c) 1990 - 1994 University of Massachusetts |
---|
43 | ;;; Department of Computer Science |
---|
44 | ;;; Experimental Knowledge Systems Laboratory |
---|
45 | ;;; Professor Paul Cohen, Director. |
---|
46 | ;;; All rights reserved. |
---|
47 | |
---|
48 | ;;; Permission to use, copy, modify and distribute this software and its |
---|
49 | ;;; documentation is hereby granted without fee, provided that the above |
---|
50 | ;;; copyright notice of EKSL, this paragraph and the one following appear |
---|
51 | ;;; in all copies and in supporting documentation. |
---|
52 | ;;; EKSL makes no representation about the suitability of this software for any |
---|
53 | ;;; purposes. It is provided "AS IS", without express or implied warranties |
---|
54 | ;;; including (but not limited to) all implied warranties of merchantability |
---|
55 | ;;; and fitness for a particular purpose, and notwithstanding any other |
---|
56 | ;;; provision contained herein. In no event shall EKSL be liable for any |
---|
57 | ;;; special, indirect or consequential damages whatsoever resulting from loss |
---|
58 | ;;; of use, data or profits, whether in an action of contract, negligence or |
---|
59 | ;;; other tortuous action, arising out of or in connection with the use or |
---|
60 | ;;; performance of this software, even if EKSL is advised of the possiblity of |
---|
61 | ;;; such damages. |
---|
62 | |
---|
63 | ;;; For more information write to clasp-support@cs.umass.edu |
---|
64 | |
---|
65 | ;;; ------------------------------------------------------------------------------- |
---|
66 | |
---|
67 | (module |
---|
68 | statistics |
---|
69 | ( |
---|
70 | ; utilities |
---|
71 | average-rank |
---|
72 | beta-incomplete |
---|
73 | bin-and-count |
---|
74 | combinations |
---|
75 | factorial |
---|
76 | find-critical-value |
---|
77 | fisher-z-transform |
---|
78 | gamma-incomplete |
---|
79 | gamma-ln |
---|
80 | permutations |
---|
81 | random-normal |
---|
82 | random-pick |
---|
83 | random-sample |
---|
84 | random-weighted-sample |
---|
85 | sign |
---|
86 | square |
---|
87 | cumsum |
---|
88 | |
---|
89 | ; descriptive statistics |
---|
90 | mean |
---|
91 | median |
---|
92 | mode |
---|
93 | geometric-mean |
---|
94 | range |
---|
95 | percentile |
---|
96 | variance |
---|
97 | standard-deviation |
---|
98 | coefficient-of-variation |
---|
99 | standard-error-of-the-mean |
---|
100 | mean-sd-n |
---|
101 | |
---|
102 | ; distributional functions |
---|
103 | binomial-probability |
---|
104 | binomial-cumulative-probability |
---|
105 | binomial-ge-probability |
---|
106 | binomial-le-probability |
---|
107 | poisson-probability |
---|
108 | poisson-cumulative-probability |
---|
109 | poisson-ge-probability |
---|
110 | normal-pdf |
---|
111 | convert-to-standard-normal |
---|
112 | phi |
---|
113 | z |
---|
114 | t-distribution |
---|
115 | chi-square |
---|
116 | chi-square-cdf |
---|
117 | |
---|
118 | ; Confidence intervals |
---|
119 | binomial-probability-ci |
---|
120 | poisson-mu-ci |
---|
121 | normal-mean-ci |
---|
122 | normal-mean-ci-on-sequence |
---|
123 | normal-variance-ci |
---|
124 | normal-variance-ci-on-sequence |
---|
125 | normal-sd-ci |
---|
126 | normal-sd-ci-on-sequence |
---|
127 | |
---|
128 | ; Hypothesis testing |
---|
129 | ; (parametric) |
---|
130 | z-test |
---|
131 | z-test-on-sequence |
---|
132 | t-test-one-sample |
---|
133 | t-test-one-sample-on-sequence |
---|
134 | t-test-paired |
---|
135 | t-test-paired-on-sequences |
---|
136 | t-test-two-sample |
---|
137 | t-test-two-sample-on-sequences |
---|
138 | f-test |
---|
139 | chi-square-test-one-sample |
---|
140 | binomial-test-one-sample |
---|
141 | binomial-test-two-sample |
---|
142 | fisher-exact-test |
---|
143 | mcnemars-test |
---|
144 | poisson-test-one-sample |
---|
145 | ; (non parametric) |
---|
146 | sign-test |
---|
147 | sign-test-on-sequence |
---|
148 | wilcoxon-signed-rank-test |
---|
149 | wilcoxon-signed-rank-test-on-sequences |
---|
150 | chi-square-test-rxc |
---|
151 | chi-square-test-for-trend |
---|
152 | |
---|
153 | ; Sample size estimates |
---|
154 | t-test-one-sample-sse |
---|
155 | t-test-two-sample-sse |
---|
156 | t-test-paired-sse |
---|
157 | binomial-test-one-sample-sse |
---|
158 | binomial-test-two-sample-sse |
---|
159 | binomial-test-paired-sse |
---|
160 | correlation-sse |
---|
161 | |
---|
162 | ; Correlation and regression |
---|
163 | linear-regression |
---|
164 | correlation-coefficient |
---|
165 | correlation-test-two-sample |
---|
166 | correlation-test-two-sample-on-sequences |
---|
167 | spearman-rank-correlation |
---|
168 | |
---|
169 | ; Significance test functions |
---|
170 | t-significance |
---|
171 | f-significance |
---|
172 | ; (chi-square significance is calculated from chi-square-cdf |
---|
173 | ; in ways depending on the problem) |
---|
174 | |
---|
175 | ;; The Lambert W function, also called the omega function or product logarithm. |
---|
176 | lambert-W0 |
---|
177 | lambert-Wm1 |
---|
178 | ) |
---|
179 | |
---|
180 | (import scheme (chicken base) (chicken foreign) (chicken format) (chicken keyword) (chicken sort)) |
---|
181 | (import srfi-1 |
---|
182 | (only srfi-13 string<) |
---|
183 | srfi-25 |
---|
184 | srfi-69 |
---|
185 | vector-lib) |
---|
186 | |
---|
187 | ;; --------------------------------------------------------------------------- |
---|
188 | ;; Include functions from GNU Scientific Library |
---|
189 | |
---|
190 | #> |
---|
191 | #include <assert.h> |
---|
192 | #include <gsl/gsl_rng.h> |
---|
193 | #include <gsl/gsl_sf_erf.h> |
---|
194 | #include <gsl/gsl_sf_gamma.h> |
---|
195 | #include <gsl/gsl_sf_lambert.h> |
---|
196 | |
---|
197 | const gsl_rng_type *statistics_rng_T; |
---|
198 | gsl_rng *statistics_rng_st; |
---|
199 | |
---|
200 | void statistics_rng_init() |
---|
201 | { |
---|
202 | gsl_rng_env_setup(); |
---|
203 | |
---|
204 | statistics_rng_T = gsl_rng_default; |
---|
205 | assert(statistics_rng_T != NULL); |
---|
206 | statistics_rng_st = gsl_rng_alloc (statistics_rng_T); |
---|
207 | assert(statistics_rng_st != NULL); |
---|
208 | } |
---|
209 | |
---|
210 | gsl_rng *statistics_rng_get_state () |
---|
211 | { |
---|
212 | return statistics_rng_st; |
---|
213 | } |
---|
214 | <# |
---|
215 | |
---|
216 | (define rng-init (foreign-lambda void "statistics_rng_init")) |
---|
217 | (define rng-state (foreign-lambda nonnull-c-pointer "statistics_rng_get_state")) |
---|
218 | (define gsl-random-uniform (foreign-lambda double "gsl_rng_uniform" nonnull-c-pointer)) |
---|
219 | (define gsl-random-uniform-int (foreign-lambda unsigned-int "gsl_rng_uniform_int" nonnull-c-pointer unsigned-int)) |
---|
220 | |
---|
221 | ;; The principal branch of the Lambert W function |
---|
222 | (define lambert-W0 |
---|
223 | (foreign-lambda double "gsl_sf_lambert_W0" double)) |
---|
224 | |
---|
225 | ;;The secondary real-valued branch of the Lambert W function |
---|
226 | (define lambert-Wm1 |
---|
227 | (foreign-lambda double "gsl_sf_lambert_Wm1" double)) |
---|
228 | |
---|
229 | (define beta-incomplete |
---|
230 | (foreign-lambda double "gsl_sf_beta_inc" double double double)) |
---|
231 | |
---|
232 | (define (random-uniform) |
---|
233 | (gsl-random-uniform (rng-state))) |
---|
234 | |
---|
235 | (define (random-uniform-int n) |
---|
236 | (gsl-random-uniform-int (rng-state) n)) |
---|
237 | |
---|
238 | ;; returns complementary normalised incomplete Gamma Function |
---|
239 | (define (gamma-incomplete a x) |
---|
240 | (define gamma-inc (foreign-lambda double "gsl_sf_gamma_inc_P" double double)) |
---|
241 | (values (if (= x 0.0) 0.0 (gamma-inc a x)) |
---|
242 | (gamma-ln a))) |
---|
243 | |
---|
244 | (define (gamma-ln x) |
---|
245 | (define lngamma (foreign-lambda double "gsl_sf_lngamma" double)) |
---|
246 | (cond ((<= x 0) |
---|
247 | (error "Argument to gamma-ln must be positive")) |
---|
248 | ((> x 1.0e302) |
---|
249 | (error "Argument to gamma-ln too large")) |
---|
250 | (else |
---|
251 | (lngamma x)))) |
---|
252 | |
---|
253 | ;; --------------------------------------------------------------------------- |
---|
254 | (define :1-beta 'scm-stats-1-beta) |
---|
255 | (define :alpha 'scm-stats-alpha) |
---|
256 | (define :approximate? 'scm-stats-approximate) |
---|
257 | (define :epsilon 'scm-stats-epsilon) |
---|
258 | (define :exact? 'scm-stats-exact) |
---|
259 | (define :mu 'scm-stats-mu) |
---|
260 | (define :one-tailed? 'scm-stats-one-tailed) |
---|
261 | (define :positive 'scm-stats-positive) |
---|
262 | (define :precision 'scm-stats-precision) |
---|
263 | (define :rate 'scm-stats-rate) |
---|
264 | (define :sample-ratio 'scm-stats-sample-ratio) |
---|
265 | (define :sigma 'scm-stats-sigma) |
---|
266 | (define :tails 'scm-stats-tails) |
---|
267 | (define :variances-equal? 'scm-stats-variances-equal) |
---|
268 | (define :variance-significance-cutoff 'scm-stats-variance-significance-cutoff) |
---|
269 | (define :x-tolerance 'scm-stats-x-tolerance) |
---|
270 | (define :y-tolerance 'scm-stats-y-tolerance) |
---|
271 | |
---|
272 | ;; --------------------------------------------------------------------------- |
---|
273 | ;; Some utility procedures |
---|
274 | |
---|
275 | (define PI 3.1415926535897932385) |
---|
276 | |
---|
277 | (define (array-shape arr) |
---|
278 | (let ((r (array-rank arr))) |
---|
279 | (let recur ((d 0) (shape '())) |
---|
280 | (if (< d r) |
---|
281 | (let ((s (- (array-end arr d) (array-start arr d)))) |
---|
282 | (recur (+ 1 d) (cons s shape))) |
---|
283 | (reverse shape))) |
---|
284 | )) |
---|
285 | |
---|
286 | ;; Average rank calculation for non-parametric tests. Ranks are 1 based, |
---|
287 | ;; but lisp is 0 based, so add 1! |
---|
288 | (define (average-rank value sorted-values) |
---|
289 | (let ((first (position value sorted-values)) |
---|
290 | (last (- (- (length sorted-values) 1) |
---|
291 | (position value (reverse sorted-values))))) |
---|
292 | (+ 1 (if (= first last) |
---|
293 | first |
---|
294 | (/ (+ first last) 2))))) |
---|
295 | |
---|
296 | ;; BIN-AND-COUNT |
---|
297 | ;; |
---|
298 | ;; Make N equal width bins and count the number of elements of sequence that |
---|
299 | ;; belong in each. |
---|
300 | (define (bin-and-count sequence n) |
---|
301 | (let ((smallest (fold min (car sequence) (cdr sequence))) |
---|
302 | (increment (/ (range sequence) |
---|
303 | n)) |
---|
304 | (bins (make-vector n 0))) |
---|
305 | (let loop ((bin 0)) |
---|
306 | (if (= bin n) |
---|
307 | (begin ; PCL: Make sure largest element is included in highest bin count! |
---|
308 | (vector-set! bins (- n 1) (+ (vector-ref bins (- n 1)) 1)) |
---|
309 | bins) |
---|
310 | (begin |
---|
311 | (vector-set! bins |
---|
312 | bin |
---|
313 | (length |
---|
314 | (filter |
---|
315 | (lambda (x) (and (>= x (+ smallest (* bin increment))) |
---|
316 | (< x (+ smallest (* (+ 1 bin) increment))))) |
---|
317 | sequence))) |
---|
318 | (loop (+ 1 bin))))))) |
---|
319 | |
---|
320 | ;; COMBINATIONS |
---|
321 | ;; How many ways to take n things taken k at a time, when order does not matter |
---|
322 | ;; Rosner 90 |
---|
323 | (define (combinations n k) |
---|
324 | (/ (factorial n) |
---|
325 | (* (factorial k) |
---|
326 | (factorial (- n k))))) |
---|
327 | |
---|
328 | (define (error-function x) |
---|
329 | (let-values (((erf ignore) (gamma-incomplete 0.5 (square x)))) |
---|
330 | (if (>= x 0.0) |
---|
331 | erf |
---|
332 | (- erf)))) |
---|
333 | |
---|
334 | (define (factorial n) |
---|
335 | (define (fact-n n result) |
---|
336 | (if (<= n 1) |
---|
337 | result |
---|
338 | (fact-n (- n 1) (* n result)))) |
---|
339 | (fact-n n 1)) |
---|
340 | |
---|
341 | ;; assumes p-function is monotonic (for this library) |
---|
342 | ;; -- #:increasing defaults to #f, may be set to #t |
---|
343 | (define (find-critical-value p-function p-value . args) ; adopted from CLASP 1.4.3 |
---|
344 | (let ((increasing? (get-keyword #:increasing args (lambda () #f))) |
---|
345 | (x-tolerance (get-keyword #:x-tolerance args (lambda () 0.00001))) |
---|
346 | (y-tolerance (get-keyword #:y-tolerance args (lambda () 0.00001)))) |
---|
347 | (let* ((x-low 0) |
---|
348 | (fx-low (p-function x-low)) |
---|
349 | (x-high 1) |
---|
350 | (fx-high (p-function x-high))) |
---|
351 | ; set initial bounding values |
---|
352 | (let loop () |
---|
353 | (unless ((if increasing? <= >=) fx-low p-value fx-high) |
---|
354 | (set! x-low x-high) |
---|
355 | (set! fx-low fx-high) |
---|
356 | (set! x-high (* 2.0 x-high)) |
---|
357 | (set! fx-high (p-function x-high)) |
---|
358 | (loop))) |
---|
359 | ; binary search |
---|
360 | (let loop () |
---|
361 | (let* ((x-mid (/ (+ x-low x-high) 2.0)) |
---|
362 | (fx-mid (p-function x-mid)) |
---|
363 | (y-diff (abs (- fx-mid p-value))) |
---|
364 | (x-diff (- x-high x-low))) |
---|
365 | (if (or (< x-diff x-tolerance) |
---|
366 | (< y-diff y-tolerance)) |
---|
367 | x-mid |
---|
368 | ;; depending on whether p-function is monotonically increasing with x, |
---|
369 | ;; if the function is above the desired p-value ... |
---|
370 | (begin |
---|
371 | (if ((if increasing? > <) p-value fx-mid) |
---|
372 | ;; then critical value is in the upper half |
---|
373 | (begin (set! x-low x-mid) |
---|
374 | (set! fx-low fx-mid)) |
---|
375 | ;; otherwise it's in the lower half |
---|
376 | (begin (set! x-high x-mid) |
---|
377 | (set! fx-high fx-mid))) |
---|
378 | (loop)))))))) |
---|
379 | |
---|
380 | ;; FISHER-Z-TRANSFORM |
---|
381 | ;; Rosner 458 |
---|
382 | ;; Transforms the correlation coefficient to an approximately |
---|
383 | ;; normal distribution. |
---|
384 | (define (fisher-z-transform r) |
---|
385 | (* 0.5 (log (/ (+ 1 r) (- 1 r))))) |
---|
386 | |
---|
387 | ;; taken from TSPL 3 |
---|
388 | (define (list-sort comparison items) |
---|
389 | (define (dosort pred? ls n) |
---|
390 | (if (= n 1) |
---|
391 | (list (car ls)) |
---|
392 | (let ((i (quotient n 2))) |
---|
393 | (domerge pred? |
---|
394 | (dosort pred? ls i) |
---|
395 | (dosort pred? (list-tail ls i) (- n i)))))) |
---|
396 | (define (domerge pred? l1 l2) |
---|
397 | (cond ((null? l1) l2) |
---|
398 | ((null? l2) l1) |
---|
399 | ((pred? (car l2) (car l1)) |
---|
400 | (cons (car l2) (domerge pred? l1 (cdr l2)))) |
---|
401 | (else |
---|
402 | (cons (car l1) (domerge pred? (cdr l1) l2))))) |
---|
403 | (if (null? items) |
---|
404 | items |
---|
405 | (dosort comparison items (length items)))) |
---|
406 | |
---|
407 | ;; PERMUTATIONS |
---|
408 | ;; How many ways to take n things taken k at a time, when order matters |
---|
409 | ;; Rosner 88 |
---|
410 | (define (permutations n k) |
---|
411 | (fold * 1 (iota k n -1))) |
---|
412 | |
---|
413 | (define (position item items) |
---|
414 | (let ((index (list-index (lambda (x) (equal? item x)) |
---|
415 | items))) |
---|
416 | (if (number? index) |
---|
417 | index |
---|
418 | (error "Position: item not in list")))) |
---|
419 | |
---|
420 | (define (cumsum sequence) |
---|
421 | (if (null? sequence) |
---|
422 | 0 |
---|
423 | (reverse (fold (lambda (x ax) (cons (+ x (car ax)) ax)) (list (car sequence)) (cdr sequence))))) |
---|
424 | |
---|
425 | (define (sign x) |
---|
426 | (cond ((negative? x) -1) |
---|
427 | ((positive? x) 1) |
---|
428 | ((zero? x) 0) |
---|
429 | (else '()))) |
---|
430 | |
---|
431 | (define (square x) |
---|
432 | (* x x)) |
---|
433 | |
---|
434 | ;; TODO |
---|
435 | (define (underflow-goes-to-zero x) |
---|
436 | x) |
---|
437 | |
---|
438 | (define (vector-sort comparison items) |
---|
439 | (list->vector (list-sort comparison (vector->list items)))) |
---|
440 | |
---|
441 | ;; RANDOM-NORMAL |
---|
442 | ;; returns a random number with mean and standard-distribution as specified. |
---|
443 | (define (random-normal mean sd) |
---|
444 | (+ mean (* sd (/ (random-uniform-int 1000000) 1000000)))) |
---|
445 | |
---|
446 | ;; RANDOM-PICK |
---|
447 | ;; Random selection from list |
---|
448 | (define (random-pick items) |
---|
449 | (if (and (list? items) (not (null? items))) |
---|
450 | (list-ref items (random-uniform-int (length items))) |
---|
451 | #f)) |
---|
452 | |
---|
453 | ;; RANDOM-SAMPLE |
---|
454 | ;; Return a random sample of size N from sequence, without replacement. |
---|
455 | ;; If N is equal to or greater than the length of the sequence, return |
---|
456 | ;; the entire sequence. |
---|
457 | (define (random-sample n items) |
---|
458 | (cond ((<= n 0) |
---|
459 | '()) |
---|
460 | ((>= n (length items)) |
---|
461 | items) |
---|
462 | (else |
---|
463 | (let loop ((remaining items) |
---|
464 | (kept '())) |
---|
465 | (if (= (length kept) n) |
---|
466 | kept |
---|
467 | (let ((one (random-pick remaining))) |
---|
468 | (loop (delete one remaining) |
---|
469 | (cons one kept)))))))) |
---|
470 | |
---|
471 | ;; RANDOM-WEIGHTED-SAMPLE Return a random sample of size M from |
---|
472 | ;; sequence of length N, without replacement, where each element has |
---|
473 | ;; a defined probability of selection (weight) W. If M is equal to |
---|
474 | ;; or greater to N, return the entire sequence. |
---|
475 | (define (random-weighted-sample m items weights) |
---|
476 | (let ((n (length items))) |
---|
477 | (cond ((<= m 0) '()) |
---|
478 | ((>= m n) items) |
---|
479 | (else |
---|
480 | (let* ((keys (map (lambda (w) (expt (random-uniform) (/ 1 w))) weights)) |
---|
481 | (sorted-items (list-sort (lambda (x y) (> (car x) (car y))) (zip keys items)))) |
---|
482 | (map cadr (take sorted-items m))) |
---|
483 | )) |
---|
484 | )) |
---|
485 | |
---|
486 | |
---|
487 | ;; --------------------------------------------------------------------------- |
---|
488 | ;; Descriptive statistics |
---|
489 | |
---|
490 | ;; MEAN |
---|
491 | ;; Rosner 10 |
---|
492 | (define (mean sequence) |
---|
493 | (if (null? sequence) |
---|
494 | 0 |
---|
495 | (/ (fold + 0 sequence) (length sequence)))) |
---|
496 | |
---|
497 | ;; MEDIAN |
---|
498 | ;; Rosner 12 (and 19) |
---|
499 | (define (median sequence) |
---|
500 | (percentile sequence 50)) |
---|
501 | |
---|
502 | ;; MODE |
---|
503 | ;; Rosner 14 |
---|
504 | ;; Returns two values: a list of the modes and the number of times they occur |
---|
505 | (define (mode sequence) |
---|
506 | (if (null? sequence) |
---|
507 | (error "Mode: Sequence must not be null") |
---|
508 | (let ((count-table (make-hash-table eqv?)) |
---|
509 | (modes '()) |
---|
510 | (mode-count 0)) |
---|
511 | (for-each (lambda (item) |
---|
512 | (hash-table-set! count-table |
---|
513 | item |
---|
514 | (+ 1 (hash-table-ref count-table item (lambda () 0))))) |
---|
515 | sequence) |
---|
516 | (for-each (lambda (key) |
---|
517 | (let ((val (hash-table-ref count-table key (lambda () #f)))) |
---|
518 | (cond ((> val mode-count) ; keep mode |
---|
519 | (set! modes (list key)) |
---|
520 | (set! mode-count val)) |
---|
521 | ((= val mode-count) ; store multiple modes |
---|
522 | (set! modes (cons key modes)))))) |
---|
523 | (hash-table-keys count-table)) |
---|
524 | (cond ((every number? modes) (set! modes (list-sort < modes))) |
---|
525 | ((every string? modes) (set! modes (list-sort string< modes))) |
---|
526 | ) |
---|
527 | (values modes mode-count)))) |
---|
528 | |
---|
529 | ;; GEOMETRIC-MEAN |
---|
530 | ;; Rosner 16 |
---|
531 | (define (geometric-mean sequence . args) |
---|
532 | (let ((base (if (null? args) 10 (car args)))) |
---|
533 | (expt base (mean (map (lambda (x) (/ (log x) |
---|
534 | (log base))) |
---|
535 | sequence))))) |
---|
536 | |
---|
537 | ;; RANGE |
---|
538 | ;; Rosner 18 |
---|
539 | (define (range sequence) |
---|
540 | (if (null? sequence) |
---|
541 | 0 |
---|
542 | (- (fold max (car sequence) (cdr sequence)) |
---|
543 | (fold min (car sequence) (cdr sequence))))) |
---|
544 | |
---|
545 | ;; PERCENTILE |
---|
546 | ;; Rosner 19 |
---|
547 | (define (percentile sequence percent) |
---|
548 | (if (null? sequence) |
---|
549 | (error "Percentile: Sequence must not be null") |
---|
550 | (let* ((sorted-vec (vector-sort < (apply vector sequence))) |
---|
551 | (n (vector-length sorted-vec)) |
---|
552 | (k (* n (/ percent 100))) |
---|
553 | (floor-k (floor k))) |
---|
554 | (if (= k floor-k) |
---|
555 | (/ (+ (vector-ref sorted-vec k) |
---|
556 | (vector-ref sorted-vec (- k 1))) |
---|
557 | 2) |
---|
558 | (vector-ref sorted-vec floor-k))))) |
---|
559 | |
---|
560 | ;; VARIANCE |
---|
561 | ;; Rosner 21 |
---|
562 | (define (variance sequence) |
---|
563 | (if (< (length sequence) 2) |
---|
564 | (error "variance: sequence must contain at least two elements") |
---|
565 | (let ((mean1 (mean sequence))) |
---|
566 | (/ (fold + 0 (map (lambda (x) (square (- mean1 x))) sequence)) |
---|
567 | (- (length sequence) 1))))) |
---|
568 | |
---|
569 | ;; STANDARD-DEVIATION |
---|
570 | ;; Rosner 21 |
---|
571 | (define (standard-deviation sequence) |
---|
572 | (sqrt (variance sequence))) |
---|
573 | |
---|
574 | ;; COEFFICIENT-OF-VARIATION |
---|
575 | ;; Rosner 24 |
---|
576 | (define (coefficient-of-variation sequence) |
---|
577 | (* 100 |
---|
578 | (/ (standard-deviation sequence) |
---|
579 | (mean sequence)))) |
---|
580 | |
---|
581 | ;; STANDARD-ERROR-OF-THE-MEAN |
---|
582 | ;; Rosner 172 |
---|
583 | (define (standard-error-of-the-mean sequence) |
---|
584 | (/ (standard-deviation sequence) |
---|
585 | (sqrt (length sequence)))) |
---|
586 | |
---|
587 | ;; MEAN-SD-N |
---|
588 | ;; Combined calculation, returns mean, standard deviation and length of |
---|
589 | ;; sequence of numbers |
---|
590 | (define (mean-sd-n sequence) |
---|
591 | (values (mean sequence) |
---|
592 | (standard-deviation sequence) |
---|
593 | (length sequence))) |
---|
594 | |
---|
595 | ;; --------------------------------------------------------------------------- |
---|
596 | ;; Distributional functions |
---|
597 | |
---|
598 | ;; BINOMIAL-PROBABILITY |
---|
599 | ;; exact: Rosner 93, approximate 105 |
---|
600 | ;; |
---|
601 | ;; P(X=k) for X a binomial random variable with parameters n & p. |
---|
602 | ;; Binomial expectations for seeing k events in N trials, each having |
---|
603 | ;; probability p. Use the Poisson approximation if N>100 and P<0.01. |
---|
604 | (define (binomial-probability n k p) |
---|
605 | (if (and (> n 100) |
---|
606 | (< p 0.01)) |
---|
607 | (poisson-probability (* n p) k) |
---|
608 | (* (combinations n k) |
---|
609 | (expt p k) |
---|
610 | (expt (- 1 p) (- n k))))) |
---|
611 | |
---|
612 | ;; BINOMIAL-CUMULATIVE-PROBABILITY |
---|
613 | ;; Rosner 94 |
---|
614 | ;; |
---|
615 | ;; P(X<k) for X a binomial random variable with parameters n & p. |
---|
616 | ;; Binomial expectations for fewer than k evens in N trials, each |
---|
617 | ;; having probability p. |
---|
618 | (define (binomial-cumulative-probability n k p) |
---|
619 | (let loop ((sum-upto-k-1 0.0) |
---|
620 | (i 0)) |
---|
621 | (if (= i k) |
---|
622 | sum-upto-k-1 |
---|
623 | (loop (+ sum-upto-k-1 (binomial-probability n i p)) |
---|
624 | (+ 1 i))))) |
---|
625 | |
---|
626 | ;; BINOMIAL-GE-PROBABILITY |
---|
627 | ;; Rosner 94 |
---|
628 | ;; |
---|
629 | ;; The probability of k or more occurrences in N events, each with |
---|
630 | ;; probability p. |
---|
631 | (define (binomial-ge-probability n k p) |
---|
632 | (- 1 (binomial-cumulative-probability n k p))) |
---|
633 | |
---|
634 | ;; for convenience later |
---|
635 | (define (binomial-le-probability n k p) |
---|
636 | (let ((sum-up-to-k 0)) |
---|
637 | (let loop ((i 0)) |
---|
638 | (if (= i (+ 1 k)) |
---|
639 | sum-up-to-k |
---|
640 | (set! sum-up-to-k (+ sum-up-to-k (binomial-probability n i p))))))) |
---|
641 | |
---|
642 | ;; POISSON-PROBABILITY |
---|
643 | ;; Rosner 100 |
---|
644 | ;; |
---|
645 | ;; Probability of seeing k events over a time period when the expected |
---|
646 | ;; number of events over that time is mu. |
---|
647 | (define (poisson-probability mu k) |
---|
648 | (/ (* (exp (- mu)) (expt mu k)) |
---|
649 | (factorial k))) |
---|
650 | |
---|
651 | ;; POISSON-CUMULATIVE-PROBABILITY |
---|
652 | ;; Rosner 197 |
---|
653 | ;; |
---|
654 | ;; Probability of seeing fewer than K events over a time period when the |
---|
655 | ;; expected number of events over that time is mu. |
---|
656 | (define (poisson-cumulative-probability mu k) |
---|
657 | (if (< k 170) |
---|
658 | (let loop ((sum 0) |
---|
659 | (x 0)) |
---|
660 | (if (= x k) |
---|
661 | sum |
---|
662 | (loop (+ sum (poisson-probability mu x)) |
---|
663 | (+ x 1)))) |
---|
664 | (- 1 (let-values (((gi ignore) (gamma-incomplete k mu))) gi)))) |
---|
665 | |
---|
666 | ;; POISSON-GE-PROBABILITY |
---|
667 | ;; Probability of X or more events when expected number of events is mu. |
---|
668 | (define (poisson-ge-probability mu x) |
---|
669 | (- 1 (poisson-cumulative-probability mu x))) |
---|
670 | |
---|
671 | ;; NORMAL-PDF |
---|
672 | ;; Rosner 115 |
---|
673 | ;; The probability density function (PDF) for a normal distribution |
---|
674 | ;; with mean mu and variance sigma-squared at point x. |
---|
675 | (define (normal-pdf x mu sigma-squared) |
---|
676 | (* (/ (sqrt (* 2 PI sigma-squared))) |
---|
677 | (exp (- (/ (square (- x mu)) |
---|
678 | (* 2 sigma-squared)))))) |
---|
679 | |
---|
680 | ;; CONVERT-TO-STANDARD-NORMAL |
---|
681 | ;; Rosner 130 |
---|
682 | ;; Convert X from a Normal distribution with mean mu and variance sigma |
---|
683 | ;; to standard normal |
---|
684 | (define (convert-to-standard-normal x mu sigma) |
---|
685 | (/ (- x mu) sigma)) |
---|
686 | |
---|
687 | ;; PHI |
---|
688 | ;; the CDF of standard normal distribution |
---|
689 | ;; Rosner 125 |
---|
690 | (define (phi x) |
---|
691 | (* 0.5 (+ 1.0 (error-function (/ x (sqrt 2.0)))))) |
---|
692 | |
---|
693 | ;; Z |
---|
694 | ;; The inverse normal function, P(X<Zu) = u where X is distributed as |
---|
695 | ;; the standard normal. Uses binary search. |
---|
696 | ;; Rosner 128 |
---|
697 | (define (z percentile . args) |
---|
698 | (let ((epsilon (get-keyword #:epsilon args (lambda () 1e-15)))) |
---|
699 | (let loop ((min-value -9e0) |
---|
700 | (max-value 9e0) |
---|
701 | (guess 0.0)) |
---|
702 | (if (< (- max-value min-value) epsilon) |
---|
703 | guess |
---|
704 | (if (< (phi guess) percentile) |
---|
705 | (loop guess max-value (mean (list guess max-value))) |
---|
706 | (loop min-value guess (mean (list min-value guess)))))))) |
---|
707 | |
---|
708 | ;; T-DISTRIBUTION |
---|
709 | ;; Rosner 178 |
---|
710 | ;; Returns the point which is the indicated percentile in the T distribution |
---|
711 | ;; with dof degrees of freedom |
---|
712 | (define (t-distribution dof percentile) |
---|
713 | (find-critical-value |
---|
714 | (lambda (x) (t-significance x dof #:tails ':positive)) |
---|
715 | (- 1 percentile))) |
---|
716 | |
---|
717 | ;; CHI-SQUARE |
---|
718 | ;; Rosner 187 |
---|
719 | ;; Returns the point which is the indicated percentile in the Chi Square |
---|
720 | ;; distribution with dof degrees of freedom. |
---|
721 | (define (chi-square dof percentile) |
---|
722 | (find-critical-value (lambda (x) (chi-square-cdf x dof)) |
---|
723 | (- 1 percentile) |
---|
724 | #:increasing #t)) |
---|
725 | |
---|
726 | ;; CHI-SQUARE-CDF |
---|
727 | ;; Computes the left hand tail area under the chi square |
---|
728 | ;; distribution under dof degrees of freedom up to X. |
---|
729 | (define (chi-square-cdf x dof) |
---|
730 | (let-values (((cdf ignore) (gamma-incomplete (* 0.5 dof) (* 0.5 x)))) |
---|
731 | cdf)) |
---|
732 | |
---|
733 | ;; --------------------------------------------------------------------------- |
---|
734 | ;; Confidence intervals |
---|
735 | ;; |
---|
736 | |
---|
737 | ;; BINOMIAL-PROBABILITY-CI |
---|
738 | ;; Rosner 193 (approximate) & 194 (exact) |
---|
739 | |
---|
740 | ;; Confidence intervals on a binomial probability. If a binomial |
---|
741 | ;; probability of p has been observed in N trials, what is the 1-alpha |
---|
742 | ;; confidence interval around p? Approximate (using normal theory |
---|
743 | ;; approximation) when npq >= 10 unless told otherwise |
---|
744 | |
---|
745 | (define (binomial-probability-ci n p alpha . args) |
---|
746 | (let ((exact (get-keyword #:exact? args (lambda () #f)))) |
---|
747 | (if (and (> (* n p (- 1 p)) 10) |
---|
748 | (not exact)) |
---|
749 | (let ((difference (* (z (- 1 (/ alpha 2))) |
---|
750 | (sqrt (/ (* p (- 1 p)) n))))) |
---|
751 | (values (- p difference) (+ p difference))) |
---|
752 | (values (find-critical-value |
---|
753 | (lambda (p1) (binomial-cumulative-probability n (floor (* p n)) p1)) |
---|
754 | (- 1 (/ alpha 2))) |
---|
755 | (find-critical-value |
---|
756 | (lambda (p2) (binomial-cumulative-probability n (+ 1 (floor (* p n))) p2)) |
---|
757 | (/ alpha 2)))))) |
---|
758 | |
---|
759 | ;; POISSON-MU-CI |
---|
760 | ;; Confidence interval for the Poisson parameter mu |
---|
761 | ;; Rosner 197 |
---|
762 | ;; |
---|
763 | ;; Given x observations in a unit of time, what is the 1-alpha confidence |
---|
764 | ;; interval on the Poisson parameter mu (= lambda*T)? |
---|
765 | ;; |
---|
766 | ;; Since find-critical-value assumes that the function is monotonic |
---|
767 | ;; increasing, adjust the value we are looking for taking advantage of |
---|
768 | ;; reflectiveness |
---|
769 | (define (poisson-mu-ci x alpha) |
---|
770 | (values |
---|
771 | (find-critical-value |
---|
772 | (lambda (mu) (poisson-cumulative-probability mu (- x 1))) |
---|
773 | (- 1 (/ alpha 2))) |
---|
774 | (find-critical-value |
---|
775 | (lambda (mu) (poisson-cumulative-probability mu x)) |
---|
776 | (/ alpha 2)))) |
---|
777 | |
---|
778 | ;; NORMAL-MEAN-CI |
---|
779 | ;; Confidence interval for the mean of a normal distribution |
---|
780 | ;; Rosner 180 |
---|
781 | ;; The 1-alpha percent confidence interval on the mean of a normal |
---|
782 | ;; distribution with parameters mean, sd & n. |
---|
783 | (define (normal-mean-ci mean sd n alpha) |
---|
784 | (let ((t-value (t-distribution (- n 1) (- 1 (/ alpha 2))))) |
---|
785 | (values (- mean (* t-value (/ sd (sqrt n)))) |
---|
786 | (+ mean (* t-value (/ sd (sqrt n))))))) |
---|
787 | |
---|
788 | ;; NORMAL-MEAN-CI-ON-SEQUENCE |
---|
789 | ;; |
---|
790 | ;; The 1-alpha confidence interval on the mean of a sequence of numbers |
---|
791 | ;; drawn from a Normal distribution. |
---|
792 | (define (normal-mean-ci-on-sequence sequence alpha) |
---|
793 | (let-values (((mean sd n) (mean-sd-n sequence))) |
---|
794 | (normal-mean-ci mean sd n alpha))) |
---|
795 | |
---|
796 | ;; NORMAL-VARIANCE-CI |
---|
797 | ;; Rosner 190 |
---|
798 | ;; The 1-alpha confidence interval on the variance of a sequence of numbers |
---|
799 | ;; drawn from a Normal distribution. |
---|
800 | (define (normal-variance-ci variance n alpha) |
---|
801 | (let ((chi-square-low (chi-square (- n 1) (- 1 (/ alpha 2)))) |
---|
802 | (chi-square-high (chi-square (- n 1) (/ alpha 2))) |
---|
803 | (the-numerator (* (- n 1) variance))) |
---|
804 | (values (/ the-numerator chi-square-low) |
---|
805 | (/ the-numerator chi-square-high)))) |
---|
806 | |
---|
807 | ;; NORMAL-VARIANCE-CI-ON-SEQUENCE |
---|
808 | ;; |
---|
809 | (define (normal-variance-ci-on-sequence sequence alpha) |
---|
810 | (let ((variance (variance sequence)) |
---|
811 | (n (length sequence))) |
---|
812 | (normal-variance-ci variance n alpha))) |
---|
813 | |
---|
814 | ;; NORMAL-SD-CI |
---|
815 | ;; Rosner 190 |
---|
816 | ;; As above, but a confidence inverval for the standard deviation. |
---|
817 | (define (normal-sd-ci sd n alpha) |
---|
818 | (let-values (((low high) (normal-variance-ci (square sd) n alpha))) |
---|
819 | (values (sqrt low) (sqrt high)))) |
---|
820 | |
---|
821 | ;; NORMAL-SD-CI-ON-SEQUENCE |
---|
822 | (define (normal-sd-ci-on-sequence sequence alpha) |
---|
823 | (let ((sd (standard-deviation sequence)) |
---|
824 | (n (length sequence))) |
---|
825 | (normal-sd-ci sd n alpha))) |
---|
826 | |
---|
827 | ;; --------------------------------------------------------------------------- |
---|
828 | ;; Hypothesis testing |
---|
829 | ;; |
---|
830 | |
---|
831 | ;; Z-TEST |
---|
832 | ;; Rosner 228 |
---|
833 | |
---|
834 | ;; The significance of a one sample Z test for the mean of a normal |
---|
835 | ;; distribution with known variance. mu is the null hypothesis mean, x-bar |
---|
836 | ;; is the observed mean, sigma is the standard deviation and N is the number |
---|
837 | ;; of observations. If tails is :both, the significance of a difference |
---|
838 | ;; between x-bar and mu. If tails is :positive, the significance of x-bar |
---|
839 | ;; is greater than mu, and if tails is :negative, the significance of x-bar |
---|
840 | ;; being less than mu. Returns a p value. |
---|
841 | (define (z-test x-bar n . args) |
---|
842 | (let ((mu (get-keyword #:mu args (lambda () 0))) |
---|
843 | (sigma (get-keyword #:sigma args (lambda () 1))) |
---|
844 | (tails (get-keyword #:tails args (lambda () ':both)))) |
---|
845 | (let ((z (/ (- x-bar mu) (/ sigma (sqrt n))))) |
---|
846 | (case tails |
---|
847 | ((:both) (if (< z 0) |
---|
848 | (* 2 (phi z)) |
---|
849 | (* 2 (- 1 (phi z))))) |
---|
850 | ((:negative) (phi z)) |
---|
851 | ((:positive) (- 1 (phi z))))))) |
---|
852 | |
---|
853 | ;; Z-TEST-ON-SEQUENCE |
---|
854 | ;; Returns a p value for significance of a sample compared with |
---|
855 | ;; given mean and standard deviation for the null hypothesis. |
---|
856 | (define (z-test-on-sequence sequence . args) |
---|
857 | (let ((mu (get-keyword #:mu args (lambda () 0))) |
---|
858 | (sigma (get-keyword #:sigma args (lambda () 1))) |
---|
859 | (tails (get-keyword #:tails args (lambda () ':both)))) |
---|
860 | (let ((x-bar (mean sequence)) |
---|
861 | (n (length sequence))) |
---|
862 | (z-test x-bar n #:mu mu #:sigma sigma #:tails tails)))) |
---|
863 | |
---|
864 | ;; T-TEST-ONE-SAMPLE |
---|
865 | ;; T-TEST-ONE-SAMPLE-ON-SEQUENCE |
---|
866 | ;; Rosner 216 |
---|
867 | |
---|
868 | ;; The significance of a one sample T test for the mean of a normal |
---|
869 | ;; distribution with unknown variance. X-bar is the observed mean, sd is |
---|
870 | ;; the observed standard deviation, N is the number of observations and mu |
---|
871 | ;; is the test mean. -ON-SAMPLE is the same, but calculates the observed |
---|
872 | ;; values from a sequence of numbers. |
---|
873 | (define (t-test-one-sample x-bar sd n mu . args) |
---|
874 | (let ((tails (get-keyword #:tails args (lambda () ':both)))) |
---|
875 | (t-significance (/ (- x-bar mu) (/ sd (sqrt n))) (- n 1) #:tails tails))) |
---|
876 | |
---|
877 | (define (t-test-one-sample-on-sequence sequence mu . args) |
---|
878 | (let ((tails (get-keyword #:tails args (lambda () ':both)))) |
---|
879 | (let-values (((mean sd n) (mean-sd-n sequence))) |
---|
880 | (t-test-one-sample mean sd n mu #:tails tails)))) |
---|
881 | |
---|
882 | ;; T-TEST-PAIRED |
---|
883 | ;; Rosner 276 |
---|
884 | |
---|
885 | ;; The significance of a paired t test for the means of two normal |
---|
886 | ;; distributions in a longitudinal study. D-bar is the mean difference, sd |
---|
887 | ;; is the standard deviation of the differences, N is the number of pairs. |
---|
888 | (define (t-test-paired d-bar sd n . args) |
---|
889 | (let ((tails (get-keyword #:tails args (lambda () ':both)))) |
---|
890 | (t-significance (/ d-bar (/ sd (sqrt n))) (- n 1) #:tails tails))) |
---|
891 | |
---|
892 | ;; T-TEST-PAIRED-ON-SEQUENCES |
---|
893 | ;; Rosner 276 |
---|
894 | |
---|
895 | ;; The significance of a paired t test for means of two normal distributions |
---|
896 | ;; in a longitudinal study. Before is a sequence of before values, after is |
---|
897 | ;; the sequence of paired after values (which must be the same length as the |
---|
898 | ;; before sequence). |
---|
899 | (define (t-test-paired-on-sequences before after . args) |
---|
900 | (let ((tails (get-keyword #:tails args (lambda () ':both)))) |
---|
901 | (let-values (((mean sd n) (mean-sd-n (map - before after)))) |
---|
902 | (t-test-paired mean sd n #:tailed tails)))) |
---|
903 | |
---|
904 | ;; T-TEST-TWO-SAMPLE |
---|
905 | ;; Rosner 282, 294, 297 |
---|
906 | |
---|
907 | ;; The significance of the difference of two means (x-bar1 and x-bar2) with |
---|
908 | ;; standard deviations sd1 and sd2, and sample sizes n1 and n2 respectively. |
---|
909 | ;; The form of the two sample t test depends on whether the sample variances |
---|
910 | ;; are equal or not. If the variable variances-equal? is :test, then we |
---|
911 | ;; use an F test and the variance-significance-cutoff to determine if they |
---|
912 | ;; are equal. If the variances are equal, then we use the two sample t test |
---|
913 | ;; for equal variances. If they are not equal, we use the Satterthwaite |
---|
914 | ;; method, which has good type I error properties (at the loss of some |
---|
915 | ;; power). |
---|
916 | (define (t-test-two-sample x-bar1 sd1 n1 x-bar2 sd2 n2 . args) |
---|
917 | (let ((variances-equal? (get-keyword #:variances-equal? args (lambda () ':test))) |
---|
918 | (variances-significance-cutoff (get-keyword #:variance-significance-cutoff args (lambda () 0.05))) |
---|
919 | (tails (get-keyword #:tails args (lambda () ':both)))) |
---|
920 | (let ((t-statistic #f) |
---|
921 | (dof #f)) |
---|
922 | (if (case variances-equal? |
---|
923 | ((:test) |
---|
924 | (> (f-test (square sd1) n1 (square sd2) n2 #:tails tails) |
---|
925 | variances-significance-cutoff)) |
---|
926 | ((#t :yes :equal) |
---|
927 | #t) |
---|
928 | ((#f :no :unequal) |
---|
929 | #f)) |
---|
930 | (let ((s (sqrt (/ (+ (* (- n1 1) (square sd1)) |
---|
931 | (* (- n2 1) (square sd2))) |
---|
932 | (+ n1 n2 -2))))) |
---|
933 | (set! t-statistic (/ (- x-bar1 x-bar2) |
---|
934 | (* s (sqrt (+ (/ n1) (/ n2)))))) |
---|
935 | (set! dof (+ n1 n2 -2))) |
---|
936 | (let ((variance-ratio-1 (/ (square sd1) n1)) |
---|
937 | (variance-ratio-2 (/ (square sd2) n2))) |
---|
938 | (set! t-statistic (/ (- x-bar1 x-bar2) |
---|
939 | (sqrt (+ variance-ratio-1 variance-ratio-2)))) |
---|
940 | (set! dof (round (/ (square (+ variance-ratio-1 variance-ratio-2)) |
---|
941 | (+ (/ (square variance-ratio-1) (- n1 1)) |
---|
942 | (/ (square variance-ratio-2) (- n2 1)))))))) |
---|
943 | (t-significance t-statistic dof #:tails tails)))) |
---|
944 | |
---|
945 | ;; T-TEST-TWO-SAMPLE-ON-SEQUENCES |
---|
946 | ;; Same as above, but providing the sequences rather than the summaries |
---|
947 | (define (t-test-two-sample-on-sequences sequence1 sequence2 . args) |
---|
948 | (let ((variance-significance-cutoff (get-keyword #:variance-significance-cutoff args (lambda () 0.05))) |
---|
949 | (tails (get-keyword #:tails args (lambda () ':both)))) |
---|
950 | (let-values (((x-bar-1 sd1 n1) (mean-sd-n sequence1)) |
---|
951 | ((x-bar-2 sd2 n2) (mean-sd-n sequence2))) |
---|
952 | (t-test-two-sample x-bar-1 sd1 n1 x-bar-2 sd2 n2 |
---|
953 | #:variances-equal? ':test |
---|
954 | #:variance-significance-cutoff variance-significance-cutoff |
---|
955 | #:tails tails)))) |
---|
956 | |
---|
957 | ;; F-TEST |
---|
958 | ;; Rosner 290 |
---|
959 | ;; F test for the equality of two variances |
---|
960 | (define (f-test variance1 n1 variance2 n2 . args) |
---|
961 | (let ((tails (get-keyword #:tails args (lambda () ':both)))) |
---|
962 | (let ((significance (f-significance (/ variance1 variance2) (- n1 1) (- n2 1) |
---|
963 | (not (eq? tails ':both))))) |
---|
964 | (case tails |
---|
965 | ((:both) significance) |
---|
966 | ((:positive) (if (> variance1 variance2) significance (- 1 significance))) |
---|
967 | ((:negative) (if (< variance1 variance2) significance (- 1 significance))))))) |
---|
968 | |
---|
969 | ;; CHI-SQUARE-TEST-ONE-SAMPLE |
---|
970 | ;; Rosner 246 |
---|
971 | ;; The significance of a one sample Chi square test for the variance of a |
---|
972 | ;; normal distribution. Variance is the observed variance, N is the number |
---|
973 | ;; of observations, and sigma-squared is the test variance. |
---|
974 | (define (chi-square-test-one-sample variance n sigma-squared . args) |
---|
975 | (let ((tails (get-keyword #:tails args (lambda () ':both)))) |
---|
976 | (let ((cdf (chi-square-cdf (/ (* (- n 1) variance) |
---|
977 | sigma-squared) |
---|
978 | (- n 1)))) |
---|
979 | (case tails |
---|
980 | ((:negative) |
---|
981 | cdf) |
---|
982 | ((:positive) |
---|
983 | (- 1 cdf)) |
---|
984 | ((:both) |
---|
985 | (if (<= variance sigma-squared) |
---|
986 | (* 2 cdf) |
---|
987 | (* 2 (- 1 cdf)))))))) |
---|
988 | |
---|
989 | ;; BINOMIAL-TEST-ONE-SAMPLE |
---|
990 | ;; Rosner 249 |
---|
991 | ;; The significance of a one sample test for the equality of an observed |
---|
992 | ;; probability p-hat to an expected probability p under a binomial |
---|
993 | ;; distribution with N observations. Use the normal theory approximation if |
---|
994 | ;; n*p(1-p) > 10 (unless the exact flag is true). |
---|
995 | (define (binomial-test-one-sample p-hat n p . args) |
---|
996 | (let ((tails (get-keyword #:tails args (lambda () ':both))) |
---|
997 | (exact (get-keyword #:exact? args (lambda () #f)))) |
---|
998 | (let ((q (- 1 p))) |
---|
999 | (if (and (> (* n p q) 10) (not exact)) |
---|
1000 | (let ((z (/ (- p-hat p) (sqrt (/ (* p q) n))))) |
---|
1001 | (case tails |
---|
1002 | ((:negative) (phi z)) |
---|
1003 | ((:positive) (- 1 (phi z))) |
---|
1004 | ((:both) (* 2 (if (<= p-hat p) (phi z) (- 1 (phi z))))))) |
---|
1005 | (let* ((observed (round (* p-hat n))) |
---|
1006 | (probability-more-extreme |
---|
1007 | (if (<= p-hat p) |
---|
1008 | (binomial-cumulative-probability n observed p) |
---|
1009 | (binomial-ge-probability n observed p)))) |
---|
1010 | (case tails |
---|
1011 | ((:negative :positive) probability-more-extreme) |
---|
1012 | ((:both) (min (* 2 probability-more-extreme) 1.0)))))))) |
---|
1013 | |
---|
1014 | ;; BINOMIAL-TEST-TWO-SAMPLE |
---|
1015 | ;; Rosner 357 |
---|
1016 | ;; Are the observed probabilities of an event (p-hat1 and p-hat2) in N1/N2 |
---|
1017 | ;; trials different? The normal theory method implemented here. The exact |
---|
1018 | ;; test is Fisher's contingency table method, below. |
---|
1019 | (define (binomial-test-two-sample p-hat1 n1 p-hat2 n2 . args) |
---|
1020 | (let ((tails (get-keyword #:tails args (lambda () ':both))) |
---|
1021 | (exact (get-keyword #:exact? args (lambda () #f)))) |
---|
1022 | (let* ((p-hat (/ (+ (* p-hat1 n1) (* p-hat2 n2)) (+ n1 n2))) |
---|
1023 | (q-hat (- 1 p-hat)) |
---|
1024 | (z (/ (- (abs (- p-hat1 p-hat2)) (+ (/ (* 2 n1)) (/ (* 2 n2)))) |
---|
1025 | (sqrt (* p-hat q-hat (+ (/ n1) (/ n2))))))) |
---|
1026 | (if (and (> (* n1 p-hat q-hat) 5) |
---|
1027 | (> (* n2 p-hat q-hat) 5) |
---|
1028 | (not exact)) |
---|
1029 | (* (- 1 (phi z)) (if (eq? tails ':both) 2 1)) |
---|
1030 | (fisher-exact-test (inexact->exact (round (* p-hat1 n1))) |
---|
1031 | (inexact->exact (round (* (- 1 p-hat1) n1))) |
---|
1032 | (inexact->exact (round (* p-hat2 n2))) |
---|
1033 | (inexact->exact (round (* (- 1 p-hat2) n2))) |
---|
1034 | tails))))) |
---|
1035 | |
---|
1036 | ;; FISHER-EXACT-TEST |
---|
1037 | ;; Rosner 371 |
---|
1038 | ;; Fisher's exact test. Gives a p value for a particular 2x2 contingency table |
---|
1039 | (define (fisher-exact-test a b c d . args) |
---|
1040 | (let ((tails (get-keyword #:tails args (lambda () ':both)))) |
---|
1041 | (define (table-probability ta tb tc td) |
---|
1042 | (let ((tn (+ ta tb tc td))) |
---|
1043 | (/ (* 1.0 (factorial (+ ta tb)) (factorial (+ tc td)) |
---|
1044 | (factorial (+ ta tc)) (factorial (+ tb td))) |
---|
1045 | (* 1.0 (factorial tn) (factorial ta) (factorial tb) |
---|
1046 | (factorial tc) (factorial td))))) |
---|
1047 | (let* ((row-margin1 (+ a b)) |
---|
1048 | (row-margin2 (+ c d)) |
---|
1049 | (column-margin1 (+ a c)) |
---|
1050 | (column-margin2 (+ b d)) |
---|
1051 | (n (+ a b c d)) |
---|
1052 | (table-probabilities (make-vector (+ 1 (min row-margin1 |
---|
1053 | row-margin2 |
---|
1054 | column-margin1 |
---|
1055 | column-margin2)) |
---|
1056 | 0))) |
---|
1057 | ;; rearrange so that the first row and column marginals are |
---|
1058 | ;; smallest. Only need to change first margins and a. |
---|
1059 | (cond ((and (< row-margin2 row-margin1) |
---|
1060 | (< column-margin2 column-margin1)) |
---|
1061 | (set! a d) |
---|
1062 | (set! row-margin1 row-margin2) |
---|
1063 | (set! column-margin1 column-margin2)) |
---|
1064 | ((< row-margin2 row-margin1) |
---|
1065 | (set! a c) |
---|
1066 | (set! row-margin1 row-margin2)) |
---|
1067 | ((< column-margin2 column-margin1) |
---|
1068 | (set! a b) |
---|
1069 | (set! column-margin1 column-margin2))) |
---|
1070 | (let loop ((i 0)) |
---|
1071 | (unless (= i (vector-length table-probabilities)) |
---|
1072 | (let* ((test-a i) |
---|
1073 | (test-b (- row-margin1 i)) |
---|
1074 | (test-c (- column-margin1 i)) |
---|
1075 | (test-d (- n (+ test-a test-b test-c)))) |
---|
1076 | (vector-set! table-probabilities |
---|
1077 | i |
---|
1078 | (table-probability test-a test-b test-c test-d))) |
---|
1079 | (loop (+ 1 i)))) |
---|
1080 | (let ((above 0.0) |
---|
1081 | (below 0.0)) |
---|
1082 | (let loop ((i 0)) |
---|
1083 | (unless (= i (vector-length table-probabilities)) |
---|
1084 | (if (< i (+ 1 a)) |
---|
1085 | (set! above (+ above (vector-ref table-probabilities i))) |
---|
1086 | (set! below (+ below (vector-ref table-probabilities i)))) |
---|
1087 | (loop (+ 1 i)))) |
---|
1088 | (case tails |
---|
1089 | ((:both) (* 2 (min above below))) |
---|
1090 | ((:positive) below) |
---|
1091 | ((:negative) above)))))) |
---|
1092 | |
---|
1093 | ;; MCNEMARS-TEST |
---|
1094 | ;; Rosner 379 and 381 |
---|
1095 | |
---|
1096 | ;; McNemar's test for correlated proportions, used for longitudinal |
---|
1097 | ;; studies. Look only at the number of discordant pairs (one treatment |
---|
1098 | ;; is effective and the other is not). If the two treatments are A |
---|
1099 | ;; and B, a-discordant-count is the number where A worked and B did not, |
---|
1100 | ;; and b-discordant-count is the number where B worked and A did not. |
---|
1101 | (define (mcnemars-test a-discordant-count b-discordant-count . args) |
---|
1102 | (let ((exact (get-keyword #:exact? args (lambda () #f)))) |
---|
1103 | (let ((n (+ a-discordant-count b-discordant-count))) |
---|
1104 | (if (and (> n 20) (not exact)) |
---|
1105 | (let ((x2 (/ (square (- (abs (- a-discordant-count b-discordant-count)) 1)) |
---|
1106 | n))) |
---|
1107 | (- 1 (chi-square-cdf x2 1))) |
---|
1108 | (cond ((= a-discordant-count b-discordant-count) |
---|
1109 | 1.0) |
---|
1110 | ((< a-discordant-count b-discordant-count) |
---|
1111 | (* 2 (binomial-le-probability n a-discordant-count 0.5))) |
---|
1112 | (else |
---|
1113 | (* 2 (binomial-ge-probability n a-discordant-count 0.5)))))))) |
---|
1114 | |
---|
1115 | ;; POISSON-TEST-ONE-SAMPLE |
---|
1116 | ;; Rosner 256 (approximation on 259) |
---|
1117 | ;; The significance of a one sample test for the equality of an observed |
---|
1118 | ;; number of events (observed) and an expected number mu under the |
---|
1119 | ;; poisson distribution. Normal theory approximation is not that great, |
---|
1120 | ;; so don't use it unless told. |
---|
1121 | (define (poisson-test-one-sample observed mu . args) |
---|
1122 | (let ((tails (get-keyword #:tails args (lambda () ':both))) |
---|
1123 | (approximate (get-keyword #:approximate? args (lambda () #f)))) |
---|
1124 | (if approximate |
---|
1125 | (let ((x-square (/ (square (- observed mu)) mu))) |
---|
1126 | (- 1 (chi-square-cdf x-square 1))) |
---|
1127 | (let ((probability-more-extreme |
---|
1128 | (if (< observed mu) |
---|
1129 | (poisson-cumulative-probability mu observed) |
---|
1130 | (poisson-ge-probability mu observed)))) |
---|
1131 | (case tails |
---|
1132 | ((:negative :positive) probability-more-extreme) |
---|
1133 | ((:both) (min (* 2 probability-more-extreme) 1.0))))))) |
---|
1134 | |
---|
1135 | ;; SIGN-TEST |
---|
1136 | ;; Rosner 335-7 |
---|
1137 | ;; Really, just a special case of the binomial one sample test with p = 1/2. |
---|
1138 | ;; The normal theory version has a correction factor to make it a better |
---|
1139 | ;; approximation. |
---|
1140 | (define (sign-test plus-count minus-count . args) |
---|
1141 | (let ((exact (get-keyword #:exact? args (lambda () #f))) |
---|
1142 | (tails (get-keyword #:tails args (lambda () ':both)))) |
---|
1143 | (let* ((n (+ plus-count minus-count)) |
---|
1144 | (p-hat (/ plus-count n))) |
---|
1145 | (if (or (< n 20) exact) |
---|
1146 | (binomial-test-one-sample p-hat n 0.5 :tails tails :exact? #t) |
---|
1147 | (let ((area (- 1 (phi (/ (- (abs (- plus-count minus-count)) 1) |
---|
1148 | (sqrt n)))))) |
---|
1149 | (if (eq? tails ':both) |
---|
1150 | (* 2 area) |
---|
1151 | area)))))) |
---|
1152 | |
---|
1153 | ;; SIGN-TEST-ON-SEQUENCE |
---|
1154 | ;; Same as above, but takes two sequences and tests whether the entries |
---|
1155 | ;; in one are different (greater or less) than the other. |
---|
1156 | (define (sign-test-on-sequence sequence1 sequence2 . args) |
---|
1157 | (let ((exact (get-keyword #:exact? args (lambda () #f))) |
---|
1158 | (tails (get-keyword #:tails args (lambda () ':both)))) |
---|
1159 | (let* ((differences (map - sequence1 sequence2)) |
---|
1160 | (plus-count (length (filter positive? differences))) |
---|
1161 | (minus-count (length (filter negative? differences)))) |
---|
1162 | (sign-test plus-count minus-count :exact? exact :tails tails)))) |
---|
1163 | |
---|
1164 | ;; WILCOXON-SIGNED-RANK-TEST |
---|
1165 | ;; Rosner 341 |
---|
1166 | ;; A test on the ranking of positive and negative differences (are the |
---|
1167 | ;; positive differences significantly larger/smaller than the negative |
---|
1168 | ;; ones). Assumes a continuous and symmetric distribution of differences, |
---|
1169 | ;; although not a normal one. This is the normal theory approximation, |
---|
1170 | ;; which is only valid when N > 15. |
---|
1171 | |
---|
1172 | ;; This test is completely equivalent to the Mann-Whitney test. |
---|
1173 | (define (wilcoxon-signed-rank-test differences . args) |
---|
1174 | (let ((tails (get-keyword #:tails args (lambda () ':both)))) |
---|
1175 | (let* ((nonzero-differences (filter (lambda (n) (not (zero? n))) differences)) |
---|
1176 | (sorted-list (list-sort (lambda (x y) (< (car x) (car y))) |
---|
1177 | (map (lambda (dif) |
---|
1178 | (list (abs dif) |
---|
1179 | (sign dif))) |
---|
1180 | nonzero-differences))) |
---|
1181 | (distinct-values (delete-duplicates (map car sorted-list))) |
---|
1182 | (ties '())) |
---|
1183 | (when (< (length nonzero-differences) 16) |
---|
1184 | (error |
---|
1185 | "This Wilcoxon Signed-Rank Test (normal approximation method) requires nonzero N > 15")) |
---|
1186 | |
---|
1187 | ; add avg-rank to the sorted values |
---|
1188 | (for-each (lambda (value) |
---|
1189 | (let ((first (position value (map car sorted-list))) |
---|
1190 | (last (position value (reverse (map car sorted-list))))) |
---|
1191 | (if (= first last) |
---|
1192 | (append (find (lambda (item) (= (car item) value)) |
---|
1193 | sorted-list) |
---|
1194 | (list (+ 1 first))) |
---|
1195 | (let ((number-tied (+ 1 (- last first))) |
---|
1196 | (avg-rank (+ 1 (/ (+ first last) 2)))) ; + 1 since 0 based |
---|
1197 | (set! ties (cons number-tied ties)) |
---|
1198 | (let loop ((i 0) |
---|
1199 | (result '())) |
---|
1200 | (if (= i number-tied) |
---|
1201 | (reverse result) |
---|
1202 | (loop (+ 1 i) |
---|
1203 | (cons (cons (list-ref sorted-list (+ first i)) |
---|
1204 | (list avg-rank)) |
---|
1205 | result)))))))) |
---|
1206 | distinct-values) |
---|
1207 | (set! ties (reverse ties)) |
---|
1208 | (let* ((direction (if (eq? tails ':negative) -1 1)) |
---|
1209 | (r1 (fold + 0 |
---|
1210 | (map (lambda (entry) |
---|
1211 | (if (= (cadr entry) direction) |
---|
1212 | (caddr entry) |
---|
1213 | 0)) |
---|
1214 | sorted-list))) |
---|
1215 | (n (length nonzero-differences)) |
---|
1216 | (expected-r1 (/ (* n (+ 1 n)) 4)) |
---|
1217 | (ties-factor (if ties |
---|
1218 | (/ (fold + 0 |
---|
1219 | (map (lambda (ti) (- (* ti ti ti) ti)) |
---|
1220 | ties)) |
---|
1221 | 48) |
---|
1222 | 0)) |
---|
1223 | (var-r1 (- (/ (* n (+ 1 n) (+ 1 (* 2 n))) 24) ties-factor)) |
---|
1224 | (T-score (/ (- (abs (- r1 expected-r1)) 0.5) (sqrt var-r1)))) |
---|
1225 | (* (if (eq? tails ':both) 2 1) |
---|
1226 | (- 1 (phi T-score))))))) |
---|
1227 | |
---|
1228 | (define (wilcoxon-signed-rank-test-on-sequences sequence1 sequence2 . args) |
---|
1229 | (let ((tails (get-keyword #:tails args (lambda () ':both)))) |
---|
1230 | (wilcoxon-signed-rank-test (map - sequence1 sequence2) tails))) |
---|
1231 | |
---|
1232 | ;; CHI-SQUARE-TEST-RXC |
---|
1233 | ;; Rosner 395 |
---|
1234 | ;; Takes contingency-table, an RxC array, and returns the significance of |
---|
1235 | ;; the relationship between the row variable and the column variable. Any |
---|
1236 | ;; difference in proportion will cause this test to be significant -- |
---|
1237 | ;; consider using the test for trend instead if you are looking for a |
---|
1238 | ;; consistent change. |
---|
1239 | ;; contingency-table is an array, as defined using srfi-25 |
---|
1240 | (define (chi-square-test-rxc contingency-table) |
---|
1241 | (let* ((shape (array-shape contingency-table)) |
---|
1242 | (rows (car shape)) |
---|
1243 | (columns (cadr shape)) |
---|
1244 | (row-marginals (make-vector rows 0.0)) |
---|
1245 | (column-marginals (make-vector columns 0.0)) |
---|
1246 | (total 0.0) |
---|
1247 | (expected-lt-5 0) |
---|
1248 | (expected-lt-1 0) |
---|
1249 | (expected-values (make-array '#(0) rows columns)) |
---|
1250 | (x2 0.0)) |
---|
1251 | (let loop ((i 0) |
---|
1252 | (j 0)) |
---|
1253 | (cond ((= j columns) |
---|
1254 | (loop (+ 1 i) 0)) |
---|
1255 | ((= i rows) |
---|
1256 | '()) |
---|
1257 | (else |
---|
1258 | (let ((cell (array-ref contingency-table i j))) |
---|
1259 | (vector-set! row-marginals i cell) |
---|
1260 | (vector-set! column-marginals j cell) |
---|
1261 | (set! total (+ total cell)))))) |
---|
1262 | (let loop ((i 0) |
---|
1263 | (j 0)) |
---|
1264 | (cond ((= j columns) |
---|
1265 | (loop (+ 1 i) 0)) |
---|
1266 | ((= i rows) |
---|
1267 | '()) |
---|
1268 | (else |
---|
1269 | (let ((expected (/ (* (vector-ref row-marginals i) (vector-ref column-marginals j)) |
---|
1270 | total))) |
---|
1271 | (when (< expected 1) (set! expected-lt-1 (+ 1 expected-lt-1))) |
---|
1272 | (when (< expected 5) (set! expected-lt-5 (+ 1 expected-lt-5))) |
---|
1273 | (array-set! expected-values i j expected))))) |
---|
1274 | (when (positive? expected-lt-1) |
---|
1275 | (error "This test cannot be used when an expected value is less than one")) |
---|
1276 | (when (> expected-lt-5 (/ (* rows columns) 5)) |
---|
1277 | (error "This test cannot be used when more than 1/5 of the expected values are less than 5.")) |
---|
1278 | (let loop ((i 0) |
---|
1279 | (j 0)) |
---|
1280 | (cond ((= j columns) |
---|
1281 | (loop (+ 1 i) 0)) |
---|
1282 | ((= i rows) |
---|
1283 | '()) |
---|
1284 | (else |
---|
1285 | (set! x2 (/ (square (- (array-ref contingency-table i j) |
---|
1286 | (array-ref expected-values i j))) |
---|
1287 | (array-ref expected-values i j)))))) |
---|
1288 | (- 1 (chi-square-cdf x2 (* (- rows 1) (- columns 1)))))) |
---|
1289 | |
---|
1290 | ;; CHI-SQUARE-TEST-FOR-TREND |
---|
1291 | ;; Rosner 398 |
---|
1292 | ;; This test works on a 2xk table and assesses if there is an increasing or |
---|
1293 | ;; decreasing trend. Arguments are equal sized lists counts. Optionally, |
---|
1294 | ;; provide a list of scores, which represent some numeric attribute of the |
---|
1295 | ;; group. If not provided, scores are assumed to be 1 to k. |
---|
1296 | (define (chi-square-test-for-trend row1-counts row2-counts . args) |
---|
1297 | (let ((scores (if (and (not (null? args)) |
---|
1298 | (= 1 (length args))) |
---|
1299 | (car args) |
---|
1300 | (let loop ((i 0) |
---|
1301 | (res '())) |
---|
1302 | (if (= i (length row1-counts)) |
---|
1303 | (reverse res) |
---|
1304 | (loop (+ 1 i) |
---|
1305 | (cons (+ 1 i) res))))))) |
---|
1306 | (let* ((ns (map + row1-counts row2-counts)) |
---|
1307 | (p-hats (map / row1-counts ns)) |
---|
1308 | (n (fold + 0 ns)) |
---|
1309 | (p-bar (/ (fold + 0 row1-counts) n)) |
---|
1310 | (q-bar (- 1 p-bar)) |
---|
1311 | (s-bar (mean scores)) |
---|
1312 | (a (fold + 0.0 |
---|
1313 | (map (lambda (p-hat ni s) |
---|
1314 | (* ni (- p-hat p-bar) (- s s-bar))) |
---|
1315 | p-hats ns scores))) |
---|
1316 | (b (* 1.0 p-bar q-bar (- (fold + 0 (map (lambda (ni s) (* ni (square s))) |
---|
1317 | ns scores)) |
---|
1318 | (/ (square (fold + 0 (map (lambda (ni s) (* ni s)) |
---|
1319 | ns scores))) |
---|
1320 | n)))) |
---|
1321 | (x2 (/ (square a) b)) |
---|
1322 | (significance (- 1 (chi-square-cdf x2 1)))) |
---|
1323 | (when (< (* p-bar q-bar n) 5) |
---|
1324 | (error |
---|
1325 | "This test is only applicable when N * p-bar * q-bar >= 5")) |
---|
1326 | (format #t "~%The trend is ~a, p=~a~&" (if (< a 0) "decreasing" "increasing") significance) |
---|
1327 | significance))) |
---|
1328 | |
---|
1329 | ;; --------------------------------------------------------------------------- |
---|
1330 | ;; Sample size estimates |
---|
1331 | ;; |
---|
1332 | |
---|
1333 | ;; T-TEST-ONE-SAMPLE-SSE |
---|
1334 | ;; Rosner 238 |
---|
1335 | ;; Returns the number of subjects needed to test whether the mean of a |
---|
1336 | ;; normally distributed sample mu is different from a null hypothesis mean |
---|
1337 | ;; mu-null and variance variance, with alpha, 1-beta and tails as specified. |
---|
1338 | (define (t-test-one-sample-sse mu mu-null variance . args) |
---|
1339 | (let ((alpha (get-keyword #:alpha args (lambda () 0.05))) |
---|
1340 | (one-beta (get-keyword #:1-beta args (lambda () 0.95))) |
---|
1341 | (tails (get-keyword #:tails args (lambda () ':both)))) |
---|
1342 | (let ((z-beta (z one-beta)) |
---|
1343 | (z-alpha (z (- 1 (if (eq? tails ':both) (/ alpha 2) alpha))))) |
---|
1344 | (inexact->exact |
---|
1345 | (ceiling (/ (* variance (square (+ z-beta z-alpha))) |
---|
1346 | (square (- mu-null mu)))))))) |
---|
1347 | |
---|
1348 | ;; T-TEST-TWO-SAMPLE-SSE |
---|
1349 | ;; Rosner 308 |
---|
1350 | ;; Returns the number of subjects needed to test whether the mean mu1 of a |
---|
1351 | ;; normally distributed sample (with variance variance1) is different from a |
---|
1352 | ;; second sample with mean mu2 and variance variance2, with alpha, 1-beta |
---|
1353 | ;; and tails as specified. It is also possible to set a sample size ratio |
---|
1354 | ;; of sample 1 to sample 2. |
---|
1355 | (define (t-test-two-sample-sse mu1 variance1 mu2 variance2 . args) |
---|
1356 | (let ((sample-ratio (get-keyword #:sample-ratio args (lambda () 1))) |
---|
1357 | (alpha (get-keyword #:alpha args (lambda () 0.05))) |
---|
1358 | (one-beta (get-keyword #:1-beta args (lambda () 0.95))) |
---|
1359 | (tails (get-keyword #:tails args (lambda () ':both)))) |
---|
1360 | (let* ((delta2 (square (- mu1 mu2))) |
---|
1361 | (z-term (square (+ (z one-beta) |
---|
1362 | (z (- 1 (if (eq? tails ':both) |
---|
1363 | (/ alpha 2) |
---|
1364 | alpha)))))) |
---|
1365 | (n1 (ceiling (/ (* (+ variance1 (/ variance2 sample-ratio)) z-term) |
---|
1366 | delta2)))) |
---|
1367 | (values (inexact->exact n1) |
---|
1368 | (inexact->exact (ceiling (* sample-ratio n1))))))) |
---|
1369 | |
---|
1370 | ;; T-TEST-PAIRED-SSE |
---|
1371 | ;; Rosner 311 |
---|
1372 | ;; Returns the number of subjects needed to test whether the differences |
---|
1373 | ;; with mean difference-mu and variance difference-variance, with alpha, |
---|
1374 | ;; 1-beta and tails as specified. |
---|
1375 | (define (t-test-paired-sse difference-mu difference-variance . args) |
---|
1376 | (let ((alpha (get-keyword #:alpha args (lambda () 0.05))) |
---|
1377 | (one-beta (get-keyword #:1-beta args (lambda () 0.95))) |
---|
1378 | (tails (get-keyword #:tails args (lambda () ':both)))) |
---|
1379 | (ceiling (/ (* 2 difference-variance |
---|
1380 | (square (+ (z one-beta) |
---|
1381 | (z (- 1 (if (eq? tails ':both) |
---|
1382 | (/ alpha 2) |
---|
1383 | alpha)))))) |
---|
1384 | (square difference-mu))))) |
---|
1385 | |
---|
1386 | ;; BINOMIAL-TEST-ONE-SAMPLE-SSE |
---|
1387 | ;; Rosner 254 |
---|
1388 | ;; Returns the number of subjects needed to test whether an observed |
---|
1389 | ;; probability is significantly different from a particular binomial |
---|
1390 | ;; null hypothesis with a significance alpha and a power 1-beta. |
---|
1391 | (define (binomial-test-one-sample-sse p-estimated p-null . args) |
---|
1392 | (let ((alpha (get-keyword #:alpha args (lambda () 0.05))) |
---|
1393 | (one-beta (get-keyword #:1-beta args (lambda () 0.95))) |
---|
1394 | (tails (get-keyword #:tails args (lambda () ':both)))) |
---|
1395 | (let ((q-null (- 1 p-null)) |
---|
1396 | (q-estimated (- 1 p-estimated))) |
---|
1397 | (inexact->exact |
---|
1398 | (ceiling |
---|
1399 | (/ (* p-null |
---|
1400 | q-null |
---|
1401 | (square (+ (z (- 1 (if (eq? tails ':both) |
---|
1402 | (/ alpha 2) |
---|
1403 | alpha))) |
---|
1404 | (* (z one-beta) |
---|
1405 | (sqrt (/ (* p-estimated q-estimated) |
---|
1406 | (* p-null q-null))))))) |
---|
1407 | (square (- p-estimated p-null)))))))) |
---|
1408 | |
---|
1409 | ;; BINOMIAL-TEST-TWO-SAMPLE-SSE |
---|
1410 | ;; Rosner 384 |
---|
1411 | ;; The number of subjects needed to test if two binomial probabilities |
---|
1412 | ;; are different at a given significance alpha and power 1-beta. The |
---|
1413 | ;; sample sizes can be unequal; the p2 sample is sample-sse-ratio * the |
---|
1414 | ;; size of the p1 sample. It can be a one tailed or two tailed test. |
---|
1415 | (define (binomial-test-two-sample-sse p1 p2 . args) |
---|
1416 | (let ((alpha (get-keyword #:alpha args (lambda () 0.05))) |
---|
1417 | (sample-ratio (get-keyword #:sample-ratio args (lambda () 1))) |
---|
1418 | (one-beta (get-keyword #:1-beta args (lambda () 0.95))) |
---|
1419 | (tails (get-keyword #:tails args (lambda () ':both)))) |
---|
1420 | (let* ((q1 (- 1 p1)) |
---|
1421 | (q2 (- 1 p2)) |
---|
1422 | (delta (abs (- p1 p2))) |
---|
1423 | (p-bar (/ (+ p1 (* sample-ratio p2)) (+ 1 sample-ratio))) |
---|
1424 | (q-bar (- 1 p-bar)) |
---|
1425 | (z-alpha (z (- 1 (if (eq? tails ':both) (/ alpha 2) alpha)))) |
---|
1426 | (z-beta (z one-beta)) |
---|
1427 | (n1 (ceiling |
---|
1428 | (/ (square (+ (* (sqrt (* p-bar q-bar (+ 1 (/ sample-ratio)))) |
---|
1429 | z-alpha) |
---|
1430 | (* (sqrt (+ (* p1 q1) (/ (* p2 q2) sample-ratio))) |
---|
1431 | z-beta))) |
---|
1432 | (square delta))))) |
---|
1433 | (values n1 (ceiling (* sample-ratio n1)))))) |
---|
1434 | |
---|
1435 | ;; BINOMIAL-TEST-PAIRED-SSE |
---|
1436 | ;; Rosner 387 |
---|
1437 | |
---|
1438 | ;; Sample size estimate for the McNemar (discordant pairs) test. Pd is the |
---|
1439 | ;; projected proportion of discordant pairs among all pairs, and Pa is the |
---|
1440 | ;; projected proportion of type A pairs among discordant pairs. alpha, |
---|
1441 | ;; 1-beta and tails are as above. Returns the number of individuals |
---|
1442 | ;; necessary; that is twice the number of matched pairs necessary. |
---|
1443 | (define (binomial-test-paired-sse pd pa . args) |
---|
1444 | (let ((alpha (get-keyword #:alpha args (lambda () 0.05))) |
---|
1445 | (one-beta (get-keyword #:1-beta args (lambda () 0.95))) |
---|
1446 | (tails (get-keyword #:tails args (lambda () ':both)))) |
---|
1447 | (let ((qa (- 1 pa)) |
---|
1448 | (z-alpha (z (- 1 (if (eq? tails ':both) (/ alpha 2) alpha)))) |
---|
1449 | (z-beta (z one-beta))) |
---|
1450 | (ceiling (/ (square (+ z-alpha (* 2 z-beta (sqrt (* pa qa))))) |
---|
1451 | (* 2 (square (- pa 0.5)) pd)))))) |
---|
1452 | |
---|
1453 | ;; CORRELATION-SSE |
---|
1454 | ;; Rosner 463 |
---|
1455 | ;; |
---|
1456 | ;; Returns the size of a sample necessary to find a correlation of |
---|
1457 | ;; expected value rho with significance alpha and power 1-beta. |
---|
1458 | (define (correlation-sse rho . args) |
---|
1459 | (let ((alpha (get-keyword #:alpha args (lambda () 0.05))) |
---|
1460 | (one-beta (get-keyword #:1-beta args (lambda () 0.95)))) |
---|
1461 | (inexact->exact |
---|
1462 | (ceiling (+ 3 (/ (square (+ (z (- 1 alpha)) (z one-beta))) |
---|
1463 | (square (fisher-z-transform rho)))))))) |
---|
1464 | |
---|
1465 | ;; --------------------------------------------------------------------------- |
---|
1466 | ;; Correlation and regression |
---|
1467 | ;; |
---|
1468 | |
---|
1469 | ;; LINEAR-REGRESSION |
---|
1470 | ;; Rosner 31, 441 for t-test |
---|
1471 | ;; Computes the regression equation for a least squares fit of a line to a |
---|
1472 | ;; sequence of points (each a list of two numbers, e.g. '((1.0 0.1) (2.0 0.2))) |
---|
1473 | ;; and reports the intercept, slope, correlation coefficient r, R^2, and |
---|
1474 | ;; the significance of the difference of the slope from 0. |
---|
1475 | (define (linear-regression points) |
---|
1476 | (unless (> (length points) 2) |
---|
1477 | (error "Requires at least three points")) |
---|
1478 | (let ((xs (map car points)) |
---|
1479 | (ys (map cadr points))) |
---|
1480 | (let* ((x-bar (mean xs)) |
---|
1481 | (y-bar (mean ys)) |
---|
1482 | (n (length points)) |
---|
1483 | (Lxx (fold + 0 |
---|
1484 | (map (lambda (xi) (square (- xi x-bar))) xs))) |
---|
1485 | (Lyy (fold + 0 |
---|
1486 | (map (lambda (yi) (square (- yi y-bar))) ys))) |
---|
1487 | (Lxy (fold + 0 |
---|
1488 | (map (lambda (xi yi) (* (- xi x-bar) (- yi y-bar))) |
---|
1489 | xs ys))) |
---|
1490 | (b (if (zero? Lxx) 0 (/ Lxy Lxx))) |
---|
1491 | (a (- y-bar (* b x-bar))) |
---|
1492 | (reg-ss (* b Lxy)) |
---|
1493 | (res-ms (/ (- Lyy reg-ss) (- n 2))) |
---|
1494 | (r (/ Lxy (sqrt (* Lxx Lyy)))) |
---|
1495 | (r2 (/ reg-ss Lyy)) |
---|
1496 | (t-test (/ b (sqrt (/ res-ms Lxx)))) |
---|
1497 | (t-sign (t-significance t-test (- n 2) #:tails ':both))) |
---|
1498 | (format #t "~%Intercept = ~f, slope = ~f, r = ~f, R^2 = ~f, p = ~f~%" |
---|
1499 | a b r r2 t-sign) |
---|
1500 | (values a b r r2 t-sign)))) |
---|
1501 | |
---|
1502 | ;; CORRELATION-COEFFICIENT |
---|
1503 | ;; Also called Pearson Correlation |
---|
1504 | (define (correlation-coefficient points) |
---|
1505 | (let* ((xs (map car points)) |
---|
1506 | (ys (map cadr points)) |
---|
1507 | (x-bar (mean xs)) |
---|
1508 | (y-bar (mean ys))) |
---|
1509 | (/ (fold + 0 (map (lambda (xi yi) (* (- xi x-bar) |
---|
1510 | (- yi y-bar))) |
---|
1511 | xs |
---|
1512 | ys)) |
---|
1513 | (sqrt (* (fold + 0 (map (lambda (xi) (square (- xi x-bar))) |
---|
1514 | xs)) |
---|
1515 | (fold + 0 (map (lambda (yi) (square (- yi y-bar))) |
---|
1516 | ys))))))) |
---|
1517 | |
---|
1518 | |
---|
1519 | ;; CORRELATION-TEST-TWO-SAMPLE |
---|
1520 | ;; Rosner 464 |
---|
1521 | ;; Test if two correlation coefficients are different. Uses Fisher's Z test. |
---|
1522 | (define (correlation-test-two-sample r1 n1 r2 n2 . args) |
---|
1523 | (let ((tails (get-keyword #:tails args (lambda () ':both)))) |
---|
1524 | (let* ((z1 (fisher-z-transform r1)) |
---|
1525 | (z2 (fisher-z-transform r2)) |
---|
1526 | (my-lambda (/ (- z1 z2) |
---|
1527 | (sqrt (+ (/ (- n1 3)) |
---|
1528 | (/ (- n2 3))))))) |
---|
1529 | (case tails |
---|
1530 | ((:both) (* 2 (if (<= my-lambda 0) (phi my-lambda) (- 1 (phi my-lambda))))) |
---|
1531 | ((:positive) (- 1 (phi my-lambda))) |
---|
1532 | ((:negative) (phi my-lambda)))))) |
---|
1533 | |
---|
1534 | (define (correlation-test-two-sample-on-sequences points1 points2 . args) |
---|
1535 | (let ((tails (get-keyword #:tails args (lambda () ':both)))) |
---|
1536 | (let ((r1 (correlation-coefficient points1)) |
---|
1537 | (n1 (length points1)) |
---|
1538 | (r2 (correlation-coefficient points2)) |
---|
1539 | (n2 (length points2))) |
---|
1540 | (correlation-test-two-sample r1 n1 r2 n2 tails)))) |
---|
1541 | |
---|
1542 | ;; SPEARMAN-RANK-CORRELATION |
---|
1543 | ;; Rosner 498 |
---|
1544 | |
---|
1545 | ;; Spearman rank correlation computes the relationship between a pair of |
---|
1546 | ;; variables when one or both are either ordinal or have a distribution that |
---|
1547 | ;; is far from normal. It takes a list of points (same format as |
---|
1548 | ;; linear-regression) and returns the spearman rank correlation coefficient |
---|
1549 | ;; and its significance. |
---|
1550 | (define (spearman-rank-correlation points) |
---|
1551 | (let ((xis (map car points)) |
---|
1552 | (yis (map cadr points))) |
---|
1553 | (let* ((n (length points)) |
---|
1554 | (sorted-xis (list-sort < xis)) |
---|
1555 | (sorted-yis (list-sort < yis)) |
---|
1556 | (average-x-ranks (map (lambda (x) (average-rank x sorted-xis)) xis)) |
---|
1557 | (average-y-ranks (map (lambda (y) (average-rank y sorted-yis)) yis)) |
---|
1558 | (mean-x-rank (mean average-x-ranks)) |
---|
1559 | (mean-y-rank (mean average-y-ranks)) |
---|
1560 | (Lxx (fold + 0 |
---|
1561 | (map (lambda (xi-rank) (square (- xi-rank mean-x-rank))) |
---|
1562 | average-x-ranks))) |
---|
1563 | (Lyy (fold + 0 |
---|
1564 | (map (lambda (yi-rank) (square (- yi-rank mean-y-rank))) |
---|
1565 | average-y-ranks))) |
---|
1566 | (Lxy (fold + 0 |
---|
1567 | (map (lambda (xi-rank yi-rank) |
---|
1568 | (* (- xi-rank mean-x-rank) |
---|
1569 | (- yi-rank mean-y-rank))) |
---|
1570 | average-x-ranks average-y-ranks))) |
---|
1571 | (rs (if (> (* Lxx Lyy) 0) (/ Lxy (sqrt (* Lxx Lyy))) 0)) ; TODO: think about rs = 1 |
---|
1572 | (ts (if (= 1 rs) 1 (/ (* rs (sqrt (- n 2))) (sqrt (- 1 (square rs)))))) |
---|
1573 | (p (t-significance ts (- n 2) #:tails ':both))) |
---|
1574 | (format #t "~%Spearman correlation coefficient ~A, p = ~A~%" rs p) |
---|
1575 | (values rs p)))) |
---|
1576 | |
---|
1577 | ;; --------------------------------------------------------------------------- |
---|
1578 | ;; Significance test functions |
---|
1579 | ;; |
---|
1580 | |
---|
1581 | (define (t-significance t-statistic dof . args) |
---|
1582 | (set! t-statistic (exact->inexact t-statistic)) |
---|
1583 | (set! dof (exact->inexact dof)) |
---|
1584 | (let ((tails (get-keyword #:tails args (lambda () ':both)))) |
---|
1585 | (let ((a (beta-incomplete (* 0.5 dof) 0.5 (/ dof (+ dof (square t-statistic)))))) |
---|
1586 | (case tails |
---|
1587 | ((:both) |
---|
1588 | a) |
---|
1589 | ((:positive) |
---|
1590 | (if (positive? t-statistic) |
---|
1591 | (* 0.5 a) |
---|
1592 | (- 1.0 (* 0.5 a)))) |
---|
1593 | ((:negative) |
---|
1594 | (if (positive? t-statistic) |
---|
1595 | (- 1.0 (* 0.5 a)) |
---|
1596 | (* 0.5 a))))))) |
---|
1597 | |
---|
1598 | (define (f-significance f-statistic numerator-dof denominator-dof . args) |
---|
1599 | (let ((one-tailed? (get-keyword #:one-tailed? args (lambda () #f)))) |
---|
1600 | (let ((tail-area (beta-incomplete (* 0.5 denominator-dof) |
---|
1601 | (* 0.5 numerator-dof) |
---|
1602 | (/ denominator-dof |
---|
1603 | (+ denominator-dof |
---|
1604 | (* numerator-dof f-statistic)))))) |
---|
1605 | (if one-tailed? |
---|
1606 | (if (< f-statistic 1) |
---|
1607 | (- 1 tail-area) |
---|
1608 | tail-area) |
---|
1609 | (begin (set! tail-area (* 2.0 tail-area)) |
---|
1610 | (if (> tail-area 1.0) |
---|
1611 | (- 2.0 tail-area) |
---|
1612 | tail-area)))))) |
---|
1613 | |
---|
1614 | (rng-init) |
---|
1615 | |
---|
1616 | ) ; end of library |
---|
1617 | |
---|