Changeset 17974 in project


Ignore:
Timestamp:
04/30/10 22:37:58 (9 years ago)
Author:
zbigniew
Message:

chicken-doc v0.3.5:

  • Delayed id and path string cache
  • Delayed node metadata
  • repository object; shared id cache
  • export and flesh out node API for chickadee
Location:
release/4/chicken-doc/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • release/4/chicken-doc/trunk/chicken-doc.scm

    r17837 r17974  
    11;;; chicken-doc
    2 
    3 ;; FIXME: Quite a few things are exported for chicken-doc-admin's use only
    4 ;; such as id->key.  Furthermore even certain regular things shouldn't
    5 ;; be exported to the REPL.
    62
    73(include "chicken-doc-text.scm") ; local module
     
    106;; Used by chicken-doc command
    117(verify-repository
     8 open-repository close-repository locate-repository current-repository
    129 repository-base
    1310 describe-signatures
     
    1815;; Used additionally by chicken-doc-admin.  Somewhat internal, but exported.
    1916 repository-information repository-root
    20  repository-magic repository-version
    21  id-cache id-cache-filename id-cache-mtime id-cache-add-directory!
    22  path->keys keys->pathname field-filename keys+field->pathname
     17 repository-magic +repository-version+
     18 repository-id-cache set-repository-id-cache!
     19 path->keys keys->pathname field-filename keys+field->pathname key->id
     20 make-id-cache id-cache-filename
    2321;; Node API
    2422 lookup-node
     23 match-nodes
     24 match-node-paths/re
    2525 node-signature
    2626 node-type
     27 node-sxml
     28 node-path
     29 node-children
     30 node-child-ids         ; experimental
    2731;; Other API
    2832 decompose-qualified-path
     
    4145(define wrap-column
    4246  (make-parameter 76))   ; 0 or #f for no wrapping
    43 (define repository-base
    44   (make-parameter #f))
    45 
    46 (define (locate-repository)
    47   (or (getenv "CHICKEN_DOC_REPOSITORY")
    48       (make-pathname (chicken-home) "chicken-doc")))
    49 
    50 ;; Hmm--should we set this on module load?
    51 (repository-base (locate-repository))
    5247
    5348;;; Lowlevel
    54 
    55 (define (repository-root)
    56   (make-pathname (repository-base) "root"))
    5749
    5850(define +rx:%escape+ (irregex "[%/,.*<>?!: ]"))
     
    8375  (map id->key path))
    8476(define (keys->pathname keys)
    85   (make-pathname (cons (repository-root) keys) #f))
     77  (make-pathname (cons (repository-root (current-repository)) keys) #f))
    8678(define (field-filename name)
    8779  (string-append "," (->string name)))
     
    109101
    110102(define-record-type chicken-doc-node
    111   (make-node path id md)
     103  (%make-node path id md)
    112104  node?
    113105  (path node-path)            ; includes ID
     
    115107  (md node-md))
    116108
     109(define (make-node path id)
     110  (%make-node path id
     111              (delay (read-path-metadata path))))
     112
    117113;; Return string list of child keys (directories) directly under PATH, or #f
    118114;; if the PATH is invalid.
    119115
     116(define (path-child-keys path)
     117  (let* ((keys (path->keys path))
     118         (dir (keys->pathname keys)))
     119    (and (directory? dir)
     120         (filter (lambda (x) (not (eqv? (string-ref x 0) #\,)))  ;; Contains hardcoded ,
     121                 (directory dir)))))
     122
    120123(define (node-children node)
    121   (define (path-child-keys path)
    122     (let* ((keys (path->keys path))
    123            (dir (keys->pathname keys)))
    124       (and (directory? dir)
    125            (filter (lambda (x) (not (eqv? (string-ref x 0) #\,)))  ;; Contains hardcoded ,
    126                    (directory dir)))))
    127124  (let ((path (node-path node)))
    128125    (map (lambda (k)
    129126           (lookup-node (append path (list (key->id k)))))
    130127         (path-child-keys path))))
     128
     129;; Shouldn't be necessary -- normally you should use node-children --
     130;; but currently a node lookup populates the node with metadata,
     131;; which wastes some time if you only need ids.  Ideally metadata
     132;; would be loaded lazily or lookup speed would be faster.
     133(define (node-child-ids node)
     134  (map key->id (path-child-keys (node-path node))))
    131135
    132136;; Obtain metadata alist at PATH.  Valid node without metadata record
     
    150154        (else #f)))
    151155
    152 (define node-metadata node-md)    ; Alternatively, load metadata as needed.
     156(define (node-metadata node)
     157  (force (node-md node)))         ;  load metadata as needed
    153158
    154159;; Return node record at PATH or throw error if the record does
     
    158163                ""   ; TOC
    159164                (last path))))
    160     (make-node path id (read-path-metadata path))))
     165    ;; Note that, if metadata is delayed, our API requires that
     166    ;; the node be checked for existence here.  If instead nodes
     167    ;; were not required to exist, and a manual existence check
     168    ;; were possible, we could avoid the directory touch when
     169    ;; merely matching against nodes.
     170    (let* ((keys (path->keys path))   ; FIXME!! path->keys is noticeably slow [*]
     171           (pathname (keys->pathname keys)))
     172      (or (directory? pathname)
     173          (error "no such node" path)))          ; now required
     174    (make-node path id)))
     175; [*] ,t (let loop ((n 100000)) (if (= n 0) 'done (begin (path->keys '(abc def ghi)) (loop (- n 1)))))  -> 1.66 seconds elapsed, 17 major GCs, 1.2M mutations
     176; [*] ,t (let loop ((n 100000)) (if (= n 0) 'done (begin (path->keys '(abc d.f ghi)) (loop (- n 1)))))  -> 2.76 seconds, 40 GCs, 1.6M mutations
     177; [*] ,t (let loop ((n 100000)) (if (= n 0) 'done (begin (path->keys '("abc" "def" "ghi")) (loop (- n 1))))) -> 0.876 seconds, 6 major GCs, 1.2M mutations
     178;; uri-encode-string takes 5x as long as id->key, skip it
    161179
    162180;; Return string representing signature of PATH.  If no signature, return "".
     
    233251;;; ID search cache
    234252
    235 (define id-cache
    236   (make-parameter #f))
    237 (define (id-cache-filename)
    238   (make-pathname (repository-base) "id.idx"))
    239 (define id-cache-mtime
    240   (make-parameter 0))
    241 (define (id-cache-add-directory! pathname)
    242   (let ((id (key->id (pathname-file pathname)))
    243         ;; We don't save the ID name in the value (since it is in the key)
    244         (val (map key->id (butlast (string-split pathname "/\\")))))   ;; hmm
    245     (hash-table-update!/default (id-cache) id (lambda (old) (cons val old)) '())))
    246 (define (read-id-cache!)
    247   (id-cache
    248    (call-with-input-file (id-cache-filename)
    249      (lambda (in)
    250        (id-cache-mtime (file-modification-time (port->fileno in)))
    251        (alist->hash-table (read in) eq?)))))
     253;; Cache is unique to repository but is shared between
     254;; threads holding the same repository object.
     255(define-record-type id-cache
     256  (%make-id-cache table mtime filename
     257                  ids ; id string list
     258                  paths ; path string list
     259                  )
     260  id-cache?
     261  (table id-cache-table)
     262  (mtime id-cache-mtime)
     263  (filename id-cache-filename)
     264  (ids %id-cache-ids)
     265  (paths %id-cache-paths))
     266
     267;; Delayed construction of id string list and paths is legal
     268;; because cache updates are disallowed.
     269(define (make-id-cache table mtime filename)
     270  (%make-id-cache table mtime filename
     271                  (delay (sort (map symbol->string (hash-table-keys table))
     272                               string<?))
     273                  (delay (sort
     274                          (flatten
     275                           (hash-table-fold
     276                            table
     277                            (lambda (k v s)
     278                              (cons
     279                               (map (lambda (x)
     280                                      (string-intersperse
     281                                       (map ->string (append x (list k))) " "))
     282                                    v)
     283                               s))
     284                            '()))
     285                          string<?))))
     286
     287(define (make-invalid-id-cache repo-base)
     288  (make-id-cache #f 0
     289                 (make-pathname repo-base "id.idx")))
     290
     291(define (current-id-cache)  ; access current id cache hash table; legacy
     292  (repository-id-cache (current-repository)))
     293(define (id-cache-ref c id)
     294  (hash-table-ref/default (id-cache-table c) id '()))
     295(define (id-cache-keys c)
     296  (hash-table-keys (id-cache-table c)))
     297
     298;; Validate and update the shared id cache in the current repository.
    252299(define (validate-id-cache!)
    253   (when (< (id-cache-mtime)
    254            (file-modification-time (id-cache-filename)))
    255     (read-id-cache!)))
    256 (define (invalidate-id-cache!)
    257   (id-cache-mtime 0))
     300  (define (read-id-cache! r c)
     301    (define (read-id-cache c)
     302      (let ((fn (id-cache-filename c)))
     303        (call-with-input-file fn
     304          (lambda (in)
     305            (make-id-cache
     306             (alist->hash-table (read in) eq?)
     307             (file-modification-time (port->fileno in))
     308             fn)))))
     309    (set-repository-id-cache! r (read-id-cache c)))
     310
     311  ;; We don't currently lock id-cache validations with a mutex.
     312  ;; All that (should) happen is that when the cache is (rarely)
     313  ;; updated, if two threads validate at the same time both will
     314  ;; read the entire cache in.
     315  (let* ((r (current-repository))
     316         (c (repository-id-cache r)))
     317    (when (< (id-cache-mtime c)
     318             (file-modification-time (id-cache-filename c)))
     319      (read-id-cache! r c))))
     320
     321;; Not currently needed.  Also not tested and not thread-safe
     322;; (define (invalidate-id-cache!)
     323;;   (set-repository-id-cache! (current-repository) (make-invalid-id-cache)))
     324
     325;; Return a list of sorted IDs as strings, suitable for regex node matching.
     326;; Construction is lazy because it is not that cheap.
     327(define (id-cache-ids c)
     328  (force (%id-cache-ids c)))
     329;; This one's pretty expensive (time and space wise).
     330(define (id-cache-paths c)
     331  (force (%id-cache-paths c)))
    258332
    259333;;; ID search
     
    263337(define (match-nodes/id id)
    264338  (define (lookup id)
    265     (hash-table-ref/default (id-cache) id '()))
     339    (id-cache-ref (current-id-cache) id))
    266340  (validate-id-cache!)
    267341  (let ((id (if (string? id) (string->symbol id) id)))
     
    275349  (let ((rx (irregex re)))
    276350    (validate-id-cache!)
    277     (let ((keys (sort (map symbol->string (hash-table-keys (id-cache)))
    278                       string<?)))
    279       (append-map (lambda (id)
    280                     (match-nodes id))
    281                   (filter-map (lambda (k)
    282                                 (and (string-search rx k) k))
    283                               keys)))))
     351    (append-map (lambda (id)
     352                  (match-nodes id))
     353                (filter-map (lambda (k)
     354                              (and (string-search rx k) k))
     355                            (id-cache-ids (current-id-cache))))))
     356
     357;; Match against full node paths with RE.
     358(define (match-node-paths/re re)
     359  (let ((rx (irregex re)))
     360    (validate-id-cache!)
     361    (map (lambda (path)
     362           (lookup-node (string-split path))) ; stupid resplit
     363         (filter-map (lambda (k)
     364                       (and (string-search rx k) k))
     365                     (id-cache-paths (current-id-cache))))))
     366
     367;; ,t (validate-id-cache!)
     368;;    0.123 seconds elapsed
     369;;    0.024 seconds in (major) GC
     370;;    47942 mutations
     371;;        3 minor GCs
     372;;        5 major GCs
     373;; after id cache loaded, disk cache warm
     374;; ,t (match-nodes (irregex "posix"))
     375;;    0.065 seconds elapsed                (0.06 - 0.10 sec)
     376;;        0 seconds in (major) GC
     377;;    68054 mutations
     378;;      832 minor GCs
     379;;        0 major GCs
     380;; after id-cache-ids cache
     381;; ,t (match-nodes (irregex "posix"))
     382;;    0.036 seconds elapsed
     383;;        0 seconds in (major) GC
     384;;     9642 mutations
     385;;       83 minor GCs
     386;;        0 major GCs
     387;; ,t (match-nodes (irregex "."))
     388;;    1.978 seconds elapsed           ; actually about 10-15 seconds on disk
     389;;    0.057 seconds in (major) GC
     390;;   147205 mutations
     391;;      404 minor GCs
     392;;        4 major GCs
     393;; time chicken-doc -m . >/dev/null    ; presuming totally warm disk cache
     394;; real    0m0.960s
     395;; ,t (match-nodes (irregex "."))
     396;;    0.321 seconds                    ; if metadata read is delayed, but dir checked
     397;;    0.133 seconds                    ; if metadata read delayed and dir not checked
     398;;    0.250 seconds                    ; if metadata read delayed and dir not checked, but path->pathname still computed
     399
     400
    284401
    285402;; Return list of nodes whose identifiers match
     
    320437;;; Repository
    321438
    322 (define repository-version 2)
    323 (define repository-information (make-parameter '()))
    324 (define (repository-magic)
    325   (make-pathname (repository-base) ".chicken-doc-repo"))
    326 (define (verify-repository)
    327   (and (file-exists? (repository-magic))
    328        (let ((repo-info (with-input-from-file (repository-magic) read)))
    329          (repository-information repo-info)
    330          (let ((version (or (alist-ref 'version repo-info) 0)))
    331            (cond ((= version repository-version))
    332                  (else (fprintf (current-error-port) "Invalid repo version number ~a\n" version)
    333                        #f))))))
    334 (define (set-chicken-doc-repository! x)
    335   (invalidate-id-cache!)
    336   (repository-base x)
    337   (unless (verify-repository)
    338     (warning "No chicken-doc repository found at" (repository-base))))
     439(define +repository-version+ 2)
     440
     441;; The repository object is a new concept (formerly all fields
     442;; were global parameters) so our API does not expect a
     443;; repository object to be passed in.  Therefore, we make
     444;; current-repository a global parameter.
     445
     446(define-record-type chicken-doc-repository
     447  (make-repository base root magic info id-cache)
     448  repository?
     449  (base repository-base)
     450  (root repository-root)
     451  (magic repository-magic)
     452  (info repository-information)
     453  (id-cache repository-id-cache set-repository-id-cache!))
     454
     455;; Current repository for node lookup API.
     456(define current-repository (make-parameter #f))
     457
     458;; Return standard location of repository.  Does not
     459;; guarantee it exists.
     460(define (locate-repository)
     461  (or (getenv "CHICKEN_DOC_REPOSITORY")
     462      (make-pathname (chicken-home) "chicken-doc")))
     463
     464;; Open the repository found in the standard location
     465;; and set the current repository for the thread.
     466(define (verify-repository)  ; legacy name; should be changed
     467  (current-repository
     468   (open-repository
     469    (locate-repository))))
     470
     471;; Open repository and return new repository object or
     472;; throw error if nonexistent or format failure.
     473(define (open-repository base)
     474  (let ((magic (make-pathname base ".chicken-doc-repo")))
     475    (if (file-exists? magic)
     476        (let ((info (with-input-from-file magic read)))
     477          (let ((version (or (alist-ref 'version info) 0)))
     478            (cond ((= version +repository-version+)
     479                   (let ((r (make-repository base
     480                                             (make-pathname base "root")
     481                                             magic
     482                                             info
     483                                             (make-invalid-id-cache base))))
     484                     (set-finalizer! r close-repository)
     485                     r))
     486                  (else (error "Invalid repo version number ~a, expected ~a\n"
     487                                version +repository-version+)))))
     488        (error "No chicken-doc repository found at " base))))
     489(define (close-repository r)
     490  (void))
    339491
    340492;;; REPL
     
    356508  ;; Warning -- will execute if called from a script.
    357509  ;; We really only want this to execute at the REPL.
    358   (set-chicken-doc-repository! (repository-base) ;; (locate-repository)
    359                           )
     510  (verify-repository)
     511 
    360512  (toplevel-command 'wtf (lambda () (repl-wtf (string-trim-both
    361513                                          (read-line))))
     
    367519                    ",doc PATHSPEC     Describe identifier or path with chicken-doc"))
    368520
    369 
    370521)  ;; end module
    371522
  • release/4/chicken-doc/trunk/chicken-doc.setup

    r17845 r17974  
    77  'chicken-doc
    88  '("chicken-doc.so" "chicken-doc.import.so" "chicken-doc-text.import.so")
    9   `((version 0.3.3)
     9  `((version 0.3.5)
    1010    (documentation "chicken-doc.html")))
    1111
     
    1313 'chicken-doc-cmd
    1414 '("chicken-doc")
    15  `((version 0.3.3)))
     15 `((version 0.3.5)))
Note: See TracChangeset for help on using the changeset viewer.