Changeset 36371 in project
 Timestamp:
 08/24/18 20:39:36 (3 months ago)
 Location:
 release/5/statistics/branches
 Files:

 1 added
 1 edited
 1 copied
Legend:
 Unmodified
 Added
 Removed

release/5/statistics/branches/collections/statistics.scm
r36303 r36371 178 178 ) 179 179 180 (import scheme (chicken base) (chicken foreign) (chicken format) (chicken keyword) (chicken sort) 181 srfi1 (only srfi13 string<) srfi25 srfi69 vectorlib) 180 (import scheme (chicken base) (chicken foreign) (chicken format) 181 (chicken keyword) (prefix (only srfi1 fold iota reverse) list.) 182 (only srfi13 string<) srfi25 srfi69 vectorlib 183 yasos yasoscollections dataseries) 182 184 183 185 ;;  … … 271 273 (define PI 3.1415926535897932385) 272 274 275 (define (position item sequence) 276 (let ((index 1)) 277 (doitemswhile (lambda (x) 278 (let ((res (equal? item (cadr x)))) 279 (if res (set! index (car x))) 280 (not res))) 281 sequences) 282 (if (> index 1) 283 index 284 (error "Position: item not in sequence")))) 285 273 286 (define (arrayshape arr) 274 287 (let ((r (arrayrank arr))) … … 295 308 ;; belong in each. 296 309 (define (binandcount sequence n) 297 (let ((smallest (fold min (car sequence) (cdr sequence))) 298 (increment (/ (range sequence) 299 n)) 310 (let ((smallest (reduce* min sequence)) 311 (increment (/ (range sequence) n)) 300 312 (bins (makevector n 0))) 301 313 (let loop ((bin 0)) … … 381 393 (* 0.5 (log (/ (+ 1 r) ( 1 r))))) 382 394 383 ;; taken from TSPL 3384 (define (listsort comparison items)385 (define (dosort pred? ls n)386 (if (= n 1)387 (list (car ls))388 (let ((i (quotient n 2)))389 (domerge pred?390 (dosort pred? ls i)391 (dosort pred? (listtail ls i) ( n i))))))392 (define (domerge pred? l1 l2)393 (cond ((null? l1) l2)394 ((null? l2) l1)395 ((pred? (car l2) (car l1))396 (cons (car l2) (domerge pred? l1 (cdr l2))))397 (else398 (cons (car l1) (domerge pred? (cdr l1) l2)))))399 (if (null? items)400 items401 (dosort comparison items (length items))))402 395 403 396 ;; PERMUTATIONS … … 405 398 ;; Rosner 88 406 399 (define (permutations n k) 407 (fold * 1 (iota k n 1))) 408 409 (define (position item items) 410 (let ((index (listindex (lambda (x) (equal? item x)) 411 items))) 412 (if (number? index) 413 index 414 (error "Position: item not in list")))) 400 (list.fold * 1 (list.iota k n 1))) 415 401 416 402 (define (cumsum sequence) 417 (if ( null? sequence)403 (if (empty? sequence) 418 404 0 419 (reverse (fold (lambda (x ax) (cons (+ x (car ax)) ax)) (list (car sequence)) (cdr sequence))))) 405 (list.reverse (reduce* (lambda (x ax) (cons (+ x (car ax)) ax)) 406 sequence)))) 420 407 421 408 (define (sign x) … … 432 419 x) 433 420 434 (define (vectorsort comparison items)435 (list>vector (listsort comparison (vector>list items))))436 437 421 ;; RANDOMNORMAL 438 422 ;; returns a random number with mean and standarddistribution as specified. … … 440 424 (+ mean (* sd (/ (randomuniformint 1000000) 1000000)))) 441 425 442 ;; RANDOMPICK443 ;; Random selection from list444 (define (randompick items)445 (if (and (list? items) (not (null? items)))446 (listref items (randomuniformint (length items)))447 #f))448 426 449 427 ;; RANDOMSAMPLE … … 451 429 ;; If N is equal to or greater than the length of the sequence, return 452 430 ;; the entire sequence. 453 (define (randomsample n items) 454 (cond ((<= n 0) 455 '()) 456 ((>= n (length items)) 457 items) 458 (else 459 (let loop ((remaining items) 460 (kept '())) 461 (if (= (length kept) n) 462 kept 463 (let ((one (randompick remaining))) 464 (loop (delete one remaining) 465 (cons one kept)))))))) 466 431 (define (randomsample n sequence) 432 (let ((result (makevector n))) 433 (cond ((<= n 0) 434 (list>vector '())) 435 (else 436 (let loop ((remaining (genelts sequence)) 437 (l 0)) 438 (let ((item (remaining))) 439 (if (not item) 440 result 441 (begin 442 (if (< l n) 443 (vectorset! result l item) 444 (let ((i (randomuniformint n))) 445 (vectorset! result i item))) 446 (loop remaining (+ 1 l)) 447 )) 448 )) 449 )) 450 )) 451 452 467 453 ;; RANDOMWEIGHTEDSAMPLE Return a random sample of size M from 468 454 ;; sequence of length N, without replacement, where each element has 469 455 ;; a defined probability of selection (weight) W. If M is equal to 470 456 ;; or greater to N, return the entire sequence. 471 (define (randomweightedsample m itemsweights)472 (let ((n ( length items)))457 (define (randomweightedsample m sequence weights) 458 (let ((n (size sequence))) 473 459 (cond ((<= m 0) '()) 474 ((>= m n) items)460 ((>= m n) sequence) 475 461 (else 476 (let* ((keys (map (lambda (w) (expt (randomuniform) (/ 1 w))) weights)) 477 (sorteditems (listsort (lambda (x y) (> (car x) (car y))) (zip keys items)))) 478 (map cadr (take sorteditems m))) 462 (let* ((keys (mapelts (lambda (w) (expt (randomuniform) (/ 1 w))) weights)) 463 (sorteditems (sort (lambda (x y) (> (car (cadr x)) (car (cadr y)))) 464 (zip keys sequence)))) 465 (elttake sorteditems m)) 479 466 )) 480 467 )) … … 487 474 ;; Rosner 10 488 475 (define (mean sequence) 489 (if ( null? sequence)476 (if (empty? sequence) 490 477 0 491 (/ ( fold + 0 sequence) (lengthsequence))))478 (/ (reduce (lambda (x ax) (+ (cadr x) ax)) 0 sequence) (size sequence)))) 492 479 493 480 ;; MEDIAN … … 500 487 ;; Returns two values: a list of the modes and the number of times they occur 501 488 (define (mode sequence) 502 (if ( null? sequence)489 (if (empty? sequence) 503 490 (error "Mode: Sequence must not be null") 504 491 (let ((counttable (makehashtable eqv?)) 505 492 (modes '()) 506 493 (modecount 0)) 507 (foreach (lambda (item) 508 (hashtableset! counttable 509 item 510 (+ 1 (hashtableref counttable item (lambda () 0))))) 511 sequence) 512 (foreach (lambda (key) 513 (let ((val (hashtableref counttable key (lambda () #f)))) 514 (cond ((> val modecount) ; keep mode 515 (set! modes (list key)) 516 (set! modecount val)) 517 ((= val modecount) ; store multiple modes 518 (set! modes (cons key modes)))))) 519 (hashtablekeys counttable)) 520 (cond ((every number? modes) (set! modes (listsort < modes))) 521 ((every string? modes) (set! modes (listsort string< modes))) 494 (foreachelt 495 (lambda (item) 496 (hashtableset! counttable 497 item 498 (+ 1 (hashtableref counttable item (lambda () 0))))) 499 sequence) 500 (foreach 501 (lambda (key) 502 (let ((val (hashtableref counttable key (lambda () #f)))) 503 (cond ((> val modecount) ; keep mode 504 (set! modes (list key)) 505 (set! modecount val)) 506 ((= val modecount) ; store multiple modes 507 (set! modes (cons key modes)))))) 508 (hashtablekeys counttable)) 509 (cond ((every number? modes) (set! modes (sort < modes))) 510 ((every string? modes) (set! modes (sort string< modes))) 522 511 ) 523 512 (values modes modecount)))) … … 527 516 (define (geometricmean sequence . args) 528 517 (let ((base (if (null? args) 10 (car args)))) 529 (expt base (mean (map (lambda (x) (/ (log x)530 (log base)))531 sequence)))))518 (expt base (mean (mapelts (lambda (x) (/ (log x) 519 (log base))) 520 sequence))))) 532 521 533 522 ;; RANGE 534 523 ;; Rosner 18 535 524 (define (range sequence) 536 (if ( null? sequence)525 (if (empty? sequence) 537 526 0 538 ( ( fold max (car sequence) (cdr sequence))539 ( fold min (car sequence) (cdr sequence)))))527 ( (reduce* max sequence) 528 (reduce* min sequence)))) 540 529 541 530 ;; PERCENTILE 542 531 ;; Rosner 19 543 532 (define (percentile sequence percent) 544 (if ( null? sequence)533 (if (empty? sequence) 545 534 (error "Percentile: Sequence must not be null") 546 (let* ((sorted vec (vectorsort < (apply vector sequence)))547 (n ( vectorlength sortedvec))535 (let* ((sorteditems (sort (lambda (i vi j vj) (< vi vj)) sequence)) 536 (n (size sorteditems)) 548 537 (k (* n (/ percent 100))) 549 538 (floork (floor k))) 550 539 (if (= k floork) 551 (/ (+ ( vectorref sortedveck)552 ( vectorref sortedvec( k 1)))540 (/ (+ (eltref sorteditems k) 541 (eltref sorteditems ( k 1))) 553 542 2) 554 ( vectorref sortedvecfloork)))))543 (eltref sorteditems floork))))) 555 544 556 545 ;; VARIANCE 557 546 ;; Rosner 21 558 547 (define (variance sequence) 559 (if (< ( lengthsequence) 2)548 (if (< (size sequence) 2) 560 549 (error "variance: sequence must contain at least two elements") 561 550 (let ((mean1 (mean sequence))) 562 (/ (fold + 0 (map (lambda (x) (square ( mean1 x))) sequence)) 563 ( (length sequence) 1))))) 551 (/ (reduce (lambda (x ax) (+ (cadr x) ax)) 0 552 (map (lambda (x) (square ( mean1 x))) sequence)) 553 ( (size sequence) 1))))) 564 554 565 555 ;; STANDARDDEVIATION … … 1170 1160 (let ((tails (getkeyword #:tails args (lambda () ':both)))) 1171 1161 (let* ((nonzerodifferences (filter (lambda (n) (not (zero? n))) differences)) 1172 (sorted list (listsort (lambda (x y) (< (car x) (car y)))1173 (map(lambda (dif)1174 (list (abs dif)1175 (sign dif)))1176 nonzerodifferences)))1162 (sorteditems (sort (lambda (x y) (< (car x) (car y))) 1163 (mapitems (lambda (dif) 1164 (list (abs dif) 1165 (sign dif))) 1166 nonzerodifferences))) 1177 1167 (distinctvalues (deleteduplicates (map car sortedlist))) 1178 1168 (ties '())) 1179 (when (< ( lengthnonzerodifferences) 16)1169 (when (< (size nonzerodifferences) 16) 1180 1170 (error 1181 1171 "This Wilcoxon SignedRank Test (normal approximation method) requires nonzero N > 15")) … … 1470 1460 ;; the significance of the difference of the slope from 0. 1471 1461 (define (linearregression points) 1472 (unless (> ( lengthpoints) 2)1462 (unless (> (size points) 2) 1473 1463 (error "Requires at least three points")) 1474 (let ((xs ( map car points))1475 (ys ( map cadr points)))1464 (let ((xs (eltmap car points)) 1465 (ys (eltmap cadr points))) 1476 1466 (let* ((xbar (mean xs)) 1477 1467 (ybar (mean ys)) 1478 (n ( lengthpoints))1479 (Lxx ( fold+ 01468 (n (size points)) 1469 (Lxx (reduce + 0 1480 1470 (map (lambda (xi) (square ( xi xbar))) xs))) 1481 (Lyy ( fold+ 01471 (Lyy (reduce + 0 1482 1472 (map (lambda (yi) (square ( yi ybar))) ys))) 1483 (Lxy (fold + 0 1484 (map (lambda (xi yi) (* ( xi xbar) ( yi ybar))) 1485 xs ys))) 1473 (Lxy (reduce + 0 1474 (eltmap (lambda (point) (let ((xi (car point)) 1475 (yi (car point))) 1476 (* ( xi xbar) ( yi ybar)))) 1477 points))) 1486 1478 (b (if (zero? Lxx) 0 (/ Lxy Lxx))) 1487 1479 (a ( ybar (* b xbar))) … … 1499 1491 ;; Also called Pearson Correlation 1500 1492 (define (correlationcoefficient points) 1501 (let* ((xs ( map car points))1502 (ys ( map cadr points))1493 (let* ((xs (eltmap car points)) 1494 (ys (eltmap cadr points)) 1503 1495 (xbar (mean xs)) 1504 1496 (ybar (mean ys))) 1505 (/ (fold + 0 (map (lambda (xi yi) (* ( xi xbar) 1506 ( yi ybar))) 1507 xs 1508 ys)) 1509 (sqrt (* (fold + 0 (map (lambda (xi) (square ( xi xbar))) 1510 xs)) 1511 (fold + 0 (map (lambda (yi) (square ( yi ybar))) 1512 ys))))))) 1497 (/ (reduce + 0 (eltmap (lambda (point) 1498 (let ((xi (car point)) 1499 (yi (cadr point))) 1500 (* ( xi xbar) ( yi ybar)))) 1501 points)) 1502 (sqrt (* (reduce + 0 (eltmap (lambda (xi) (square ( xi xbar))) 1503 xs)) 1504 (reduce + 0 (eltmap (lambda (yi) (square ( yi ybar))) 1505 ys))))))) 1513 1506 1514 1507 … … 1545 1538 ;; and its significance. 1546 1539 (define (spearmanrankcorrelation points) 1547 (let ((xis ( map car points))1548 (yis ( map cadr points)))1549 (let* ((n ( lengthpoints))1550 (sortedxis ( listsort <xis))1551 (sortedyis ( listsort <yis))1540 (let ((xis (eltmap car points)) 1541 (yis (eltmap cadr points))) 1542 (let* ((n (size points)) 1543 (sortedxis (sort (lambda (xi x yi y) (< x y)) xis)) 1544 (sortedyis (sort (lambda (xi x yi y) (< x y)) yis)) 1552 1545 (averagexranks (map (lambda (x) (averagerank x sortedxis)) xis)) 1553 1546 (averageyranks (map (lambda (y) (averagerank y sortedyis)) yis)) 1554 1547 (meanxrank (mean averagexranks)) 1555 1548 (meanyrank (mean averageyranks)) 1556 (Lxx ( fold+ 01557 ( map (lambda (xirank) (square ( xirank meanxrank)))1558 averagexranks)))1559 (Lyy ( fold+ 01560 (map (lambda (yirank) (square ( yirank meanyrank)))1561 averageyranks)))1562 (Lxy ( fold+ 01563 (map (lambda (xirank yirank)1564 (* ( xirank meanxrank)1565 ( yirank meanyrank)))1566 averagexranks averageyranks)))1549 (Lxx (reduce + 0 1550 (eltmap (lambda (xirank) (square ( xirank meanxrank))) 1551 averagexranks))) 1552 (Lyy (reduce + 0 1553 (eltmap (lambda (yirank) (square ( yirank meanyrank))) 1554 averageyranks))) 1555 (Lxy (reduce + 0 1556 (eltmap (lambda (xirank yirank) 1557 (* ( xirank meanxrank) 1558 ( yirank meanyrank))) 1559 averagexranks averageyranks))) 1567 1560 (rs (if (> (* Lxx Lyy) 0) (/ Lxy (sqrt (* Lxx Lyy))) 0)) ; TODO: think about rs = 1 1568 1561 (ts (if (= 1 rs) 1 (/ (* rs (sqrt ( n 2))) (sqrt ( 1 (square rs))))))
Note: See TracChangeset
for help on using the changeset viewer.