; ; Model formula tools for XLISP-Stat regression methods ; ; ;simplest possible version (defun factor (x &key namebase ) "Defines treatment contrast matrix for x and optionally a set of names based on namebase" (let* ( (xlist (sort-data (remove-duplicates (coerce x 'list) :test #'equalp))) (p (- (length xlist) 1)) (rows (row-list (select (identity-matrix (+ p 1)) (iseq 0 p) (iseq 1 p)))) (decoder (mapcar #'cons xlist rows)) (xmatrix (apply #'bind-rows (mapcar #'(lambda (xx) (cdr (assoc xx decoder :test #'equalp))) x))) (xnames (if (null namebase) nil (cdr (mapcar #'(lambda (xx) (write-to-string (list namebase xx) :escape nil)) xlist)))) ) (if (null namebase) xmatrix (list xmatrix xnames)) ) ) (defun interaction ( xs &key (is-factor (repeat t (length xs))) namebases ) "Interactions of any order for categorical and continuous variables" (let* ( (n (length (first xs))) (zlist nil) (znames nil) (p (length xs)) (names (if (null namebases) (repeat "." p) namebases)) (junk (dotimes (i p) (let* ( (thisz (if (elt is-factor i) (factor (elt xs i) :namebase (elt names i)) (list (bind-columns (elt xs i)) (list (elt names i))))) ) (if (null zlist) (setf zlist (column-list (first thisz))) (setf zlist (apply #'append (mapcar #'(lambda (xx) (mapcar #'(lambda (yy) (* xx yy)) (column-list (first thisz)))) zlist)))) (if (null znames) (setf znames (second thisz)) (setf znames (apply #'append (mapcar #'(lambda (xx) (mapcar #'(lambda (yy) (write-to-string (list xx yy) :escape nil)) (second thisz))) znames)))) ) ) ) (zmat (apply #'bind-columns zlist)) ) (if (null namebases) zmat (list zmat znames)) )) ; (defun block-test (index beta covmat &key blockname names (block-only nil)) (let* ( (subbeta (select beta index)) (subcov (select covmat index index)) (waldchisq (inner-product subbeta (matmult (inverse subcov) subbeta))) (waldp (- 1 (chisq-cdf waldchisq (length index)))) (subse (sqrt (diagonal subcov))) (subz (/ subbeta subse)) (subp (* 2 (- 1 (normal-cdf (abs subz))))) (nn (if (null names) (repeat "" (length index)) (select names index))) (blockn (if (null blockname) "block" blockname)) ) (format t "~a~20t~13,5g~35t~,4f~%" blockn waldchisq waldp) (cond ((null block-only) (format t "~5t Variable~25t Estimate~40t Std.Err.~55t p-value~%") (dolist (i (iseq 0 (- (length index) 1))) (format t "~5t~a~25t~13,5g~40t(~,6g)~55t~,4f~%" (select nn i) (select subbeta i) (select subse i) (select subp i))) ) ) ) ) (defun design (varlist) (apply #'bind-columns (mapcar #'first varlist))) (defun names (varlist) (apply #'append (mapcar #'second varlist))) ;; ;; semi-refined version ;; ;; input: a quoted list of factor, term and interaction statements ;; output: a design matrix, a set of variable names, a set of block names and ;; a set of block indices. ;; ; a pointless function which doesn't actually get evaluated (defun term (x) "A metric predictor variable" (x &key namebase) (list (list x) namebase)) (defun col-count (matlist) "adds up columns" (apply #'sum (mapcar #'(lambda (x) (cond ((matrixp x) (second (array-dimensions x))) (t 1))) matlist))) ; ; This function does the work. Note that the design matrix is set up backwards ; since cons sticks things on the front of a list. ; ; (defun formula (model-formula) "Calculates a design matrix, variable names and information for running (block-test) on the fitted model. Argument: a quoted list of metric variables, factor variables and interactions. Metric variables are specified by (term x), factor variables by (factor a) and interactions by (interaction (list a b x) :is-factor (list t t nil)). " (let* ( (design-matrix nil) (name-list nil) (block-names nil) (block-indices nil) (i 0) (junk (dolist (x model-formula) (setf i (if (null design-matrix) 0 (col-count design-matrix))) (cond ((eql (car x) 'term) (let* ((namepos (position ':namebase x)) ) (setf design-matrix (cons (eval (second x)) design-matrix) name-list (cons (if (null (find ':namebase x)) (list (write-to-string (second x))) (list (fourth x))) name-list) block-names (cons (write-to-string (if (null namepos) (second x) (elt x (+ 1 namepos)))) block-names) block-indices (cons (list i) block-indices) ) ) ) ((eql (car x) 'factor) (let* ((namepos (position ':namebase x)) (factorx (eval (if (null namepos) (append x (list :namebase (write-to-string (second x)))) x))) ) (setf design-matrix (cons (first factorx) design-matrix) name-list (cons (second factorx) name-list) block-names (cons (write-to-string (if (null namepos) (second x) (elt x (+ 1 namepos)))) block-names) block-indices (cons (iseq i (- (col-count design-matrix) 1)) block-indices) ) ) ) ((eql (car x) 'interaction) (let* ( (namepos (position ':namebases x)) (interx (eval (if (null namepos) (append x (list :namebases (cons 'list (mapcar #'write-to-string (cdr (second x)))))) x))) ) (setf design-matrix (cons (first interx) design-matrix) name-list (cons (second interx) name-list) block-names (cons (write-to-string (cons 'interaction (if (null namepos) (cdr (second x)) (cdr (elt x (+ 1 namepos)))) ) :escape nil) block-names) block-indices (cons (iseq i (- (col-count design-matrix) 1)) block-indices) ) ) ) (t (error "I don't understand ~s" x)) ) ) ) ) (list (apply #'bind-columns (reverse design-matrix)) (apply #'append (reverse name-list)) (reverse block-names) (reverse block-indices)) ) ) (defun formula-display (beta cov-mat model-formula &key (block-only t) (intercept t)) "Display a model summary for a model with coefficient estimates beta and covariance estimates cov-mat and design matrix from model-formula. This should really be a method for the relevant object. Little checking is performed: garbage in; garbage out" (let* ( (xintercept (if intercept 1 0)) (nblocks (length (fourth model-formula))) (varnames (if intercept (cons "Intercept" (second model-formula)) (second model-formula))) (block-indices (+ xintercept (fourth model-formula))) ) (format t "~a~20t~a~35t~a~%" "Block" "Wald Chisq" "p-value") (if (equal (length beta) (+ xintercept (second (array-dimensions (first model-formula))))) (dolist (i (iseq 0 (- nblocks 1))) (block-test (elt block-indices i) beta cov-mat :names varnames :blockname (elt (third model-formula) i) :block-only block-only)) (error "length of beta doesn't match design matrix") ) ) ) ;--------- model-formula prototype ------------ (defproto model-formula-proto '(formula design-matrix name-list block-names block-indices subset) '() *object* "Model formula") (defmeth model-formula-proto :formula (&optional (new nil set)) "Set or return model formula" (when set (setf (slot-value 'formula) new) (send self :compute)) (slot-value 'formula)) (defmeth model-formula-proto :isnew (&key formula subset) (send self :subset subset) (send self :formula formula) ) (defun as-formula (x &key (subset nil)) (send model-formula-proto :new :formula x :subset subset)) ; (defmeth model-formula-proto :design-matrix (&optional (new nil set)) "Sets or returns design-matrix" (when set (setf (slot-value 'design-matrix) new)) (slot-value 'design-matrix)) (defmeth model-formula-proto :name-list (&optional (new nil set) ) "Sets or returns list of column names" (when set (setf (slot-value 'name-list) new)) (slot-value 'name-list)) (defmeth model-formula-proto :block-names (&optional (new nil set)) "Sets or returns list of block names" (when set (setf (slot-value 'block-names) new)) (slot-value 'block-names)) (defmeth model-formula-proto :block-indices (&optional (new nil set) ) "Sets or returns list of column names" (when set (setf (slot-value 'block-indices) new)) (slot-value 'block-indices)) (defmeth model-formula-proto :subset (&optional (new nil set) ) "Sets or returns subset of included cases" (when set (setf (slot-value 'subset) new)) (slot-value 'subset)) ; (defmeth model-formula-proto :compute () "Internal use" (let* ( (result (formula (send self :formula))) (xmat (first result)) ) (if (send self :subset) (send self :design-matrix (apply #'bind-rows (select (row-list xmat) (send self :subset)))) (send self :design-matrix xmat)) (send self :name-list (second result)) (send self :block-names (third result)) (send self :block-indices (fourth result)) )) ; ; methods for regression-proto and some descendants ; This will work if :sigma-hat^2*:xtxinv is the covariance matrix ; which is also true for the glim and nonlinear regression prototypes ; (defun regression-formula (formula-object y &rest args &key (predictor-names nil has-names) (print-summary t) (intercept t) (print t)) "Like regression-model but uses a formula instead of the x argument" (let* ( (name-list (if has-names predictor-names (send formula-object :name-list))) (result (apply #'regression-model (list (send formula-object ':design-matrix) y ':predictor-names name-list ':print (null print-summary) args))) ) (send result :add-slot 'formula formula-object) (if print-summary (send result :display-with-formula t)) result )) (defmeth regression-model-proto :formula (&optional (new nil set)) "Sets or returns the model formula object or nil for none" (when (and set (send self :has-slot 'formula)) (setf (slot-value 'formula) new)) (slot-value 'formula)) (defmeth regression-model-proto :display-with-formula (&optional (block-only t)) "Display a summary of the model based on its formula" (let ( (formula (if (send self :has-slot 'formula) (send self :formula) nil)) ) (cond ((null formula) (format t "No formula present: using default method~%") (send self :display)) (t (let* ( (xintercept (if (send self :intercept) 1 0)) (nblocks (length (send formula :block-indices))) (block-indices (+ xintercept (send formula :block-indices))) (cov-mat (* (^ (send self :sigma-hat) 2) (send self :xtxinv))) (varnames (if (send self :intercept) (cons "Intercept" (send self :predictor-names)) (send self :predictor-names))) ) (format t "~a~20t~a~35t~a~%" "Block" "Wald Chisq" "p-value") (dolist (i (iseq 0 (- nblocks 1) )) (block-test (elt block-indices i) (send self :coef-estimates) cov-mat :names varnames :blockname (elt (send formula :block-names) i) :block-only block-only)) ) ) ) ) ) (provide "modelformula")