Changeset 36371 in project
- Timestamp:
- 08/24/18 20:39:36 (6 months ago)
- Location:
- release/5/statistics/branches
- Files:
-
- 1 added
- 1 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
release/5/statistics/branches/collections/statistics.scm
r36303 r36371 178 178 ) 179 179 180 (import scheme (chicken base) (chicken foreign) (chicken format) (chicken keyword) (chicken sort) 181 srfi-1 (only srfi-13 string<) srfi-25 srfi-69 vector-lib) 180 (import scheme (chicken base) (chicken foreign) (chicken format) 181 (chicken keyword) (prefix (only srfi-1 fold iota reverse) list.) 182 (only srfi-13 string<) srfi-25 srfi-69 vector-lib 183 yasos yasos-collections data-series) 182 184 183 185 ;; --------------------------------------------------------------------------- … … 271 273 (define PI 3.1415926535897932385) 272 274 275 (define (position item sequence) 276 (let ((index -1)) 277 (do-items-while (lambda (x) 278 (let ((res (equal? item (cadr x)))) 279 (if res (set! index (car x))) 280 (not res))) 281 sequences) 282 (if (> index -1) 283 index 284 (error "Position: item not in sequence")))) 285 273 286 (define (array-shape arr) 274 287 (let ((r (array-rank arr))) … … 295 308 ;; belong in each. 296 309 (define (bin-and-count sequence n) 297 (let ((smallest (fold min (car sequence) (cdr sequence))) 298 (increment (/ (range sequence) 299 n)) 310 (let ((smallest (reduce* min sequence)) 311 (increment (/ (range sequence) n)) 300 312 (bins (make-vector n 0))) 301 313 (let loop ((bin 0)) … … 381 393 (* 0.5 (log (/ (+ 1 r) (- 1 r))))) 382 394 383 ;; taken from TSPL 3384 (define (list-sort comparison items)385 (define (dosort pred? ls n)386 (if (= n 1)387 (list (car ls))388 (let ((i (quotient n 2)))389 (domerge pred?390 (dosort pred? ls i)391 (dosort pred? (list-tail ls i) (- n i))))))392 (define (domerge pred? l1 l2)393 (cond ((null? l1) l2)394 ((null? l2) l1)395 ((pred? (car l2) (car l1))396 (cons (car l2) (domerge pred? l1 (cdr l2))))397 (else398 (cons (car l1) (domerge pred? (cdr l1) l2)))))399 (if (null? items)400 items401 (dosort comparison items (length items))))402 395 403 396 ;; PERMUTATIONS … … 405 398 ;; Rosner 88 406 399 (define (permutations n k) 407 (fold * 1 (iota k n -1))) 408 409 (define (position item items) 410 (let ((index (list-index (lambda (x) (equal? item x)) 411 items))) 412 (if (number? index) 413 index 414 (error "Position: item not in list")))) 400 (list.fold * 1 (list.iota k n -1))) 415 401 416 402 (define (cumsum sequence) 417 (if ( null? sequence)403 (if (empty? sequence) 418 404 0 419 (reverse (fold (lambda (x ax) (cons (+ x (car ax)) ax)) (list (car sequence)) (cdr sequence))))) 405 (list.reverse (reduce* (lambda (x ax) (cons (+ x (car ax)) ax)) 406 sequence)))) 420 407 421 408 (define (sign x) … … 432 419 x) 433 420 434 (define (vector-sort comparison items)435 (list->vector (list-sort comparison (vector->list items))))436 437 421 ;; RANDOM-NORMAL 438 422 ;; returns a random number with mean and standard-distribution as specified. … … 440 424 (+ mean (* sd (/ (random-uniform-int 1000000) 1000000)))) 441 425 442 ;; RANDOM-PICK443 ;; Random selection from list444 (define (random-pick items)445 (if (and (list? items) (not (null? items)))446 (list-ref items (random-uniform-int (length items)))447 #f))448 426 449 427 ;; RANDOM-SAMPLE … … 451 429 ;; If N is equal to or greater than the length of the sequence, return 452 430 ;; the entire sequence. 453 (define (random-sample n items) 454 (cond ((<= n 0) 455 '()) 456 ((>= n (length items)) 457 items) 458 (else 459 (let loop ((remaining items) 460 (kept '())) 461 (if (= (length kept) n) 462 kept 463 (let ((one (random-pick remaining))) 464 (loop (delete one remaining) 465 (cons one kept)))))))) 466 431 (define (random-sample n sequence) 432 (let ((result (make-vector n))) 433 (cond ((<= n 0) 434 (list->vector '())) 435 (else 436 (let loop ((remaining (gen-elts sequence)) 437 (l 0)) 438 (let ((item (remaining))) 439 (if (not item) 440 result 441 (begin 442 (if (< l n) 443 (vector-set! result l item) 444 (let ((i (random-uniform-int n))) 445 (vector-set! result i item))) 446 (loop remaining (+ 1 l)) 447 )) 448 )) 449 )) 450 )) 451 452 467 453 ;; RANDOM-WEIGHTED-SAMPLE Return a random sample of size M from 468 454 ;; sequence of length N, without replacement, where each element has 469 455 ;; a defined probability of selection (weight) W. If M is equal to 470 456 ;; or greater to N, return the entire sequence. 471 (define (random-weighted-sample m itemsweights)472 (let ((n ( length items)))457 (define (random-weighted-sample m sequence weights) 458 (let ((n (size sequence))) 473 459 (cond ((<= m 0) '()) 474 ((>= m n) items)460 ((>= m n) sequence) 475 461 (else 476 (let* ((keys (map (lambda (w) (expt (random-uniform) (/ 1 w))) weights)) 477 (sorted-items (list-sort (lambda (x y) (> (car x) (car y))) (zip keys items)))) 478 (map cadr (take sorted-items m))) 462 (let* ((keys (map-elts (lambda (w) (expt (random-uniform) (/ 1 w))) weights)) 463 (sorted-items (sort (lambda (x y) (> (car (cadr x)) (car (cadr y)))) 464 (zip keys sequence)))) 465 (elt-take sorted-items m)) 479 466 )) 480 467 )) … … 487 474 ;; Rosner 10 488 475 (define (mean sequence) 489 (if ( null? sequence)476 (if (empty? sequence) 490 477 0 491 (/ ( fold + 0 sequence) (lengthsequence))))478 (/ (reduce (lambda (x ax) (+ (cadr x) ax)) 0 sequence) (size sequence)))) 492 479 493 480 ;; MEDIAN … … 500 487 ;; Returns two values: a list of the modes and the number of times they occur 501 488 (define (mode sequence) 502 (if ( null? sequence)489 (if (empty? sequence) 503 490 (error "Mode: Sequence must not be null") 504 491 (let ((count-table (make-hash-table eqv?)) 505 492 (modes '()) 506 493 (mode-count 0)) 507 (for-each (lambda (item) 508 (hash-table-set! count-table 509 item 510 (+ 1 (hash-table-ref count-table item (lambda () 0))))) 511 sequence) 512 (for-each (lambda (key) 513 (let ((val (hash-table-ref count-table key (lambda () #f)))) 514 (cond ((> val mode-count) ; keep mode 515 (set! modes (list key)) 516 (set! mode-count val)) 517 ((= val mode-count) ; store multiple modes 518 (set! modes (cons key modes)))))) 519 (hash-table-keys count-table)) 520 (cond ((every number? modes) (set! modes (list-sort < modes))) 521 ((every string? modes) (set! modes (list-sort string< modes))) 494 (for-each-elt 495 (lambda (item) 496 (hash-table-set! count-table 497 item 498 (+ 1 (hash-table-ref count-table item (lambda () 0))))) 499 sequence) 500 (for-each 501 (lambda (key) 502 (let ((val (hash-table-ref count-table key (lambda () #f)))) 503 (cond ((> val mode-count) ; keep mode 504 (set! modes (list key)) 505 (set! mode-count val)) 506 ((= val mode-count) ; store multiple modes 507 (set! modes (cons key modes)))))) 508 (hash-table-keys count-table)) 509 (cond ((every number? modes) (set! modes (sort < modes))) 510 ((every string? modes) (set! modes (sort string< modes))) 522 511 ) 523 512 (values modes mode-count)))) … … 527 516 (define (geometric-mean sequence . args) 528 517 (let ((base (if (null? args) 10 (car args)))) 529 (expt base (mean (map (lambda (x) (/ (log x)530 (log base)))531 sequence)))))518 (expt base (mean (map-elts (lambda (x) (/ (log x) 519 (log base))) 520 sequence))))) 532 521 533 522 ;; RANGE 534 523 ;; Rosner 18 535 524 (define (range sequence) 536 (if ( null? sequence)525 (if (empty? sequence) 537 526 0 538 (- ( fold max (car sequence) (cdr sequence))539 ( fold min (car sequence) (cdr sequence)))))527 (- (reduce* max sequence) 528 (reduce* min sequence)))) 540 529 541 530 ;; PERCENTILE 542 531 ;; Rosner 19 543 532 (define (percentile sequence percent) 544 (if ( null? sequence)533 (if (empty? sequence) 545 534 (error "Percentile: Sequence must not be null") 546 (let* ((sorted- vec (vector-sort < (apply vector sequence)))547 (n ( vector-length sorted-vec))535 (let* ((sorted-items (sort (lambda (i vi j vj) (< vi vj)) sequence)) 536 (n (size sorted-items)) 548 537 (k (* n (/ percent 100))) 549 538 (floor-k (floor k))) 550 539 (if (= k floor-k) 551 (/ (+ ( vector-ref sorted-veck)552 ( vector-ref sorted-vec(- k 1)))540 (/ (+ (elt-ref sorted-items k) 541 (elt-ref sorted-items (- k 1))) 553 542 2) 554 ( vector-ref sorted-vecfloor-k)))))543 (elt-ref sorted-items floor-k))))) 555 544 556 545 ;; VARIANCE 557 546 ;; Rosner 21 558 547 (define (variance sequence) 559 (if (< ( lengthsequence) 2)548 (if (< (size sequence) 2) 560 549 (error "variance: sequence must contain at least two elements") 561 550 (let ((mean1 (mean sequence))) 562 (/ (fold + 0 (map (lambda (x) (square (- mean1 x))) sequence)) 563 (- (length sequence) 1))))) 551 (/ (reduce (lambda (x ax) (+ (cadr x) ax)) 0 552 (map (lambda (x) (square (- mean1 x))) sequence)) 553 (- (size sequence) 1))))) 564 554 565 555 ;; STANDARD-DEVIATION … … 1170 1160 (let ((tails (get-keyword #:tails args (lambda () ':both)))) 1171 1161 (let* ((nonzero-differences (filter (lambda (n) (not (zero? n))) differences)) 1172 (sorted- list (list-sort (lambda (x y) (< (car x) (car y)))1173 (map(lambda (dif)1174 (list (abs dif)1175 (sign dif)))1176 nonzero-differences)))1162 (sorted-items (sort (lambda (x y) (< (car x) (car y))) 1163 (map-items (lambda (dif) 1164 (list (abs dif) 1165 (sign dif))) 1166 nonzero-differences))) 1177 1167 (distinct-values (delete-duplicates (map car sorted-list))) 1178 1168 (ties '())) 1179 (when (< ( lengthnonzero-differences) 16)1169 (when (< (size nonzero-differences) 16) 1180 1170 (error 1181 1171 "This Wilcoxon Signed-Rank Test (normal approximation method) requires nonzero N > 15")) … … 1470 1460 ;; the significance of the difference of the slope from 0. 1471 1461 (define (linear-regression points) 1472 (unless (> ( lengthpoints) 2)1462 (unless (> (size points) 2) 1473 1463 (error "Requires at least three points")) 1474 (let ((xs ( map car points))1475 (ys ( map cadr points)))1464 (let ((xs (elt-map car points)) 1465 (ys (elt-map cadr points))) 1476 1466 (let* ((x-bar (mean xs)) 1477 1467 (y-bar (mean ys)) 1478 (n ( lengthpoints))1479 (Lxx ( fold+ 01468 (n (size points)) 1469 (Lxx (reduce + 0 1480 1470 (map (lambda (xi) (square (- xi x-bar))) xs))) 1481 (Lyy ( fold+ 01471 (Lyy (reduce + 0 1482 1472 (map (lambda (yi) (square (- yi y-bar))) ys))) 1483 (Lxy (fold + 0 1484 (map (lambda (xi yi) (* (- xi x-bar) (- yi y-bar))) 1485 xs ys))) 1473 (Lxy (reduce + 0 1474 (elt-map (lambda (point) (let ((xi (car point)) 1475 (yi (car point))) 1476 (* (- xi x-bar) (- yi y-bar)))) 1477 points))) 1486 1478 (b (if (zero? Lxx) 0 (/ Lxy Lxx))) 1487 1479 (a (- y-bar (* b x-bar))) … … 1499 1491 ;; Also called Pearson Correlation 1500 1492 (define (correlation-coefficient points) 1501 (let* ((xs ( map car points))1502 (ys ( map cadr points))1493 (let* ((xs (elt-map car points)) 1494 (ys (elt-map cadr points)) 1503 1495 (x-bar (mean xs)) 1504 1496 (y-bar (mean ys))) 1505 (/ (fold + 0 (map (lambda (xi yi) (* (- xi x-bar) 1506 (- yi y-bar))) 1507 xs 1508 ys)) 1509 (sqrt (* (fold + 0 (map (lambda (xi) (square (- xi x-bar))) 1510 xs)) 1511 (fold + 0 (map (lambda (yi) (square (- yi y-bar))) 1512 ys))))))) 1497 (/ (reduce + 0 (elt-map (lambda (point) 1498 (let ((xi (car point)) 1499 (yi (cadr point))) 1500 (* (- xi x-bar) (- yi y-bar)))) 1501 points)) 1502 (sqrt (* (reduce + 0 (elt-map (lambda (xi) (square (- xi x-bar))) 1503 xs)) 1504 (reduce + 0 (elt-map (lambda (yi) (square (- yi y-bar))) 1505 ys))))))) 1513 1506 1514 1507 … … 1545 1538 ;; and its significance. 1546 1539 (define (spearman-rank-correlation points) 1547 (let ((xis ( map car points))1548 (yis ( map cadr points)))1549 (let* ((n ( lengthpoints))1550 (sorted-xis ( list-sort <xis))1551 (sorted-yis ( list-sort <yis))1540 (let ((xis (elt-map car points)) 1541 (yis (elt-map cadr points))) 1542 (let* ((n (size points)) 1543 (sorted-xis (sort (lambda (xi x yi y) (< x y)) xis)) 1544 (sorted-yis (sort (lambda (xi x yi y) (< x y)) yis)) 1552 1545 (average-x-ranks (map (lambda (x) (average-rank x sorted-xis)) xis)) 1553 1546 (average-y-ranks (map (lambda (y) (average-rank y sorted-yis)) yis)) 1554 1547 (mean-x-rank (mean average-x-ranks)) 1555 1548 (mean-y-rank (mean average-y-ranks)) 1556 (Lxx ( fold+ 01557 ( map (lambda (xi-rank) (square (- xi-rank mean-x-rank)))1558 average-x-ranks)))1559 (Lyy ( fold+ 01560 (map (lambda (yi-rank) (square (- yi-rank mean-y-rank)))1561 average-y-ranks)))1562 (Lxy ( fold+ 01563 (map (lambda (xi-rank yi-rank)1564 (* (- xi-rank mean-x-rank)1565 (- yi-rank mean-y-rank)))1566 average-x-ranks average-y-ranks)))1549 (Lxx (reduce + 0 1550 (elt-map (lambda (xi-rank) (square (- xi-rank mean-x-rank))) 1551 average-x-ranks))) 1552 (Lyy (reduce + 0 1553 (elt-map (lambda (yi-rank) (square (- yi-rank mean-y-rank))) 1554 average-y-ranks))) 1555 (Lxy (reduce + 0 1556 (elt-map (lambda (xi-rank yi-rank) 1557 (* (- xi-rank mean-x-rank) 1558 (- yi-rank mean-y-rank))) 1559 average-x-ranks average-y-ranks))) 1567 1560 (rs (if (> (* Lxx Lyy) 0) (/ Lxy (sqrt (* Lxx Lyy))) 0)) ; TODO: think about rs = 1 1568 1561 (ts (if (= 1 rs) 1 (/ (* rs (sqrt (- n 2))) (sqrt (- 1 (square rs))))))
Note: See TracChangeset
for help on using the changeset viewer.