;;; -*- Mode: LISP; Package: id3; Syntax: Common-lisp; -*-
;;; ************************************************************************
;;;
;;; PORTABLE AI LAB - UNI ZH
;;;
;;; ************************************************************************
;;;
;;; Filename:   id3-lib.cl
;;; Shor Desc: Library stuff for ID3 only
;;; Version:    2.0
;;; Status:     Review
;;; Last Mod:   15.5.91 TW
;;; Author:     Thomas Wehrle
;;;
;;; Copyright (c) 1992 Istituto Dalle Molle (IDSIA), University of
;;; Zurich, Swiss Federal Institute of Technology Lausanne.
;;;
;;; Permission is granted to any individual or institution to use, copy,
;;; modify, and distribute this software, provided that this complete
;;; copyright and permission notice is maintained, intact, in all
;;; copies and supporting documentation.
;;;
;;; IDSIA provides this software "as is" without express or implied
;;; warranty.  
;;;

;;; --------------------------------------------------------------------------
;;; Change History: 
;;; 
;;;	
;;; --------------------------------------------------------------------------

;;; Construct a new table, with all rows containing a * replaced by
;;; several rows, one for each possible value for the attribute
;;; containing the *.

(in-package :id3)



(defmethod expand-wildcards ((tab table))
      (let ((range-list
               (mapcar 
                      (function 
                           (lambda (attribute)
                               (delete '* (get-possible-values attribute tab))))
                      (attributes tab))))
          (setf (rows tab)
                    (apply (function append)
                          (mapcar
                                 (function
                                      (lambda (row)
                                          (expand-row row range-list)))
                                 (rows tab)))))
      tab)



;;; Given a row and a range-list, return a number of rows.  If the
;;; original row had a * in position i, return a list of rows, with
;;; the * replaced by each value in the ith element of range-list.
;;; Rows with more than one * are treated appropriately.

(defun expand-row (row range-list)
      (let ((position (search (list '*) row)))
          (if position 
             (let ((values (nth position range-list))
                     (rows nil))
                 (dotimes (i (length values) rows)
                       (setf rows (append (expand-row 
                                                              (substitute (nth i values) '* row :count 1)
                                                              range-list)
                                                  rows))))
             (list row))))



;;; Computes a table that gives clustering statistics on the values of
;;; a certain attribute.  All values of the input attribute must be
;;; numeric.  Each row in the constructed table gives a value that the
;;; original attribute takes, the number of times that value is
;;; repeated, an the cumulative number of values that are less than or
;;; equal to that value.  The rows are sorted in descending order on
;;; the reported values.

(defmethod cumulated-values ((tab table) attribute)
      ;  (inspect tab)
      (let ((values (get-values attribute tab)))
          (unless (every (function numberp) values)
                (error "Non number value found for attribute ~a." attribute))
          (make-instance 'table
                 :attributes '(raw-val frequency cum-freq)
                 :rows
                 (do* ((l1 (sort values #'<=) (cdr l1))
                           (l2 (list (list (car l1) 0 0)))
                           (cum 1 (1+ cum)))
                          ((null l1) (reverse l2))
                       (if (equal (car l1) (caar l2))
                          (progn (incf (cadar l2))
                               (setf (caddar l2) cum))
                          (setf l2 (cons (list (car l1) 1 cum) l2)))))))



;;; Given a list of numbers, returns the value that is percent between
;;; the beginning of the list and the numberth entry.  e.g.,
;;; (threshold 50 9 ...) will return the entry half way between the
;;; beginning of the list and the 9th element, i.e., the fifth entry.
;;; If the percentage does not specify a single element, the value is
;;; determined by linear interpolation between the two neighboring
;;; values. 

(defun threshold (percent number list)
      (let* ((n (1- (* percent (/ (1+ number) 100))))
                (n-1 (floor n))
                (n+1 (ceiling n))
                (p (- n n-1)))
          (when (minusp n-1) (setf n-1 0))
          (when (minusp n+1) (setf n+1 0))
          (when (> n-1 (1- number)) (setf n-1 (1- number)))
          (when (> n+1 (1- number)) (setf n+1 (1- number)))
          (let ((v-1 (nth n-1 list))
                  (v+1 (nth n+1 list)))
              (+ v-1 (* p (- v+1 v-1))))))



;;; Finds a cutoff value, if possible, such that the number of rows for
;;; which the specified attribute takes a value less than the cutoff
;;; value is the given percentage of all the rows. 

(defmethod cut-value ((tab table) attribute percent)
      ;  (inspect tab)
      (let ((values (get-values attribute tab)))
          (unless (every (function numberp) values)
                (error "Non number value found for attribute ~a." attribute))
          (let* ((values (sort values #'<=))
                    (n (number-of-rows tab)))
              (threshold percent n values))))



;;; Modify the table by applying function fcn to the value of the
;;; given attribute in all rows.  This does not build a new table, but
;;; modifies the old one.

(defmethod exchange-values ((tab table) attribute fcn)
      (let ((position (search (list attribute) (attributes tab))))
          (dolist (row (rows tab) tab)
                (setf (nth position row)
                          (apply fcn (list (nth position row)))))))



;;; Replaces the values of the given attribute with values that are
;;; evenly spaced through its distribution.  The even spacing is given
;;; by the third argument.  

(defmacro substitute-values (tab attribute var-or-divisor &rest body)
      (if body
         `(exchange-values (copy ,tab) ,attribute
           (function
                (lambda (,var-or-divisor) ,@body)))
         (let* ((e-table (eval tab))
                   (e-attribute (eval attribute))
                   (divisor (eval var-or-divisor))
                   (values (get-values e-attribute e-table)))
             ;      (inspect tab)
             (unless (every (function numberp) values)
                   (error "Non number value found for attribute ~a." e-attribute))
             (unless (and (integerp divisor) (plusp divisor))
                   (error "The number of ranges (~a) has to be natural." divisor))
             (let* ((values (sort values #'<=))
                       (number-of-items (number-of-rows e-table))
                       (interval-width (/ 100 divisor))
                       (intervals (do* ((i 1 (1+ i))
                                                     (v interval-width (+ v interval-width))
                                                     (l (list v) (cons v l)))
                                                    ((>= (1+ i) divisor) (reverse l))))
                       (syms (mapcar
                                          (function
                                               (lambda (val)
                                                   (concatenate 'string
                                                         "Interval-"
                                                         (write-to-string (/ val interval-width))
                                                         "-"
                                                         (write-to-string divisor))))
                                          intervals))
                       (var (gensym))
                       (body `((cond
                                                 ,@(mapcar 
                                                             (function 
                                                                  (lambda (val name)
                                                                      `((<= ,var ,(threshold val number-of-items values)) 
                                                                        ,name)))
                                                             intervals syms)
                                                 (t ,(concatenate 'string
                                                           "Interval-"
                                                           (write-to-string divisor)
                                                           "-"
                                                           (write-to-string divisor)))))))
                 `(substitute-values ,tab ,attribute ,var ,@body)))))



(defmethod apply-user-function ((tab table))
      (let* ((attr (read-from-string
                               (ask "Please enter the attribute.~%=> ")))
                (pos (search
                               (list attr)
                               (attributes tab)
                               :test #'equal))
                (fcn nil))
          (if pos
             (progn
                  (setf fcn
                            (eval
                                  (read-from-string
                                       (concatenate 'string
                                             (ask "Please enter a function of the form:~
~%   (function (lambda (val) (* (truncate (/ val 10)) 10)))                                                     ~%=> "
                                              :condition
                                              (function (lambda (string)
                                                                     (let* ((eof (gensym))
                                                                               (sexp (read-from-string
                                                                                                (concatenate 'string string ")))))))))))")
                                                                                                nil eof)))
                                                                         (and (not (eq sexp eof))
                                                                                  (consp sexp)
                                                                                  (functionp (eval sexp))))))
                                              :error-message "S-expression incomplete or no function")
                                             ")))))))))))"))))
                  (exchange-values tab attr fcn))
             (display-error "Attribute does not exist!"))))



(defun cluster (tab &key (clusters nil)
                                 (namedp nil)
                                 (attribute nil))
      (let* ((attribute
                  (if attribute attribute 
                     (read-from-string
                          (ask "What is the attribute ? "
                           :condition
                           (function (lambda (string)
                                                  (let ((x (read-from-string string nil)))
                                                      (member x (attributes tab) :test #'equal))))
                           :error-message "Attribute does not exist!"))))
                (values (get-values attribute tab))
                (ok (every (function numberp) values)))
          (if (not ok)
             (progn ;(inspect tab)
                  (display-error "Non number value found for attribute!"))
             (let* ((clusters (if clusters clusters
                                             (read-from-string
                                                  (ask "How many clusters ? ~%=> "
                                                   :condition 
                                                   (function (lambda (string)
                                                                          (let ((x (read-from-string string nil)))
                                                                              (and (numberp x) (integerp x) (plusp x))))))
                                                  :error-message "Please enter a positive integer.")))
                       (values (sort values #'<=))
                       (number-of-items (number-of-rows tab))
                       (interval-width (/ 100 clusters))
                       (intervals (do* ((i 1 (1+ i))
                                                     (v interval-width (+ v interval-width))
                                                     (l (list v) (cons v l)))
                                                    ((>= (1+ i) clusters) (reverse l))))
                       (syms (if namedp
                                       (let ((syms nil))
                                           (dotimes (i clusters (nreverse syms))
                                                 (setf syms
                                                           (cons
                                                                 (read-from-string
                                                                      (ask (format nil
                                                                                    "Enter name of Interval (~a/~a)~%==> "
                                                                                    (1+ i) clusters)))
                                                                 syms))))
                                       (case clusters
                                             (1 (list (read-from-string "static")))
                                             (2 (list (read-from-string "low")
                                                    (read-from-string "high")))
                                             (3 (list (read-from-string "low")
                                                    (read-from-string "medium")
                                                    (read-from-string "high")))
                                             (otherwise (mapcar (function (lambda (val)
                                                                                                           (read-from-string
                                                                                                                (format nil "Intv-~a-~a" 
                                                                                                                     (/ val interval-width)
                                                                                                                     clusters))))
                                                                          intervals)))))
                       (var (gensym))
                       (body `((cond ,@(mapcar 
                                                             (function 
                                                                  (lambda (val name)
                                                                      `((<= ,var ,(threshold val number-of-items values)) 
                                                                        ',name)))
                                                             intervals syms)
                                                 (t ',(if namedp (first (last syms))
                                                          (case clusters
                                                                (1 (read-from-string "static"))
                                                                ((2 3) (read-from-string "high"))
                                                                (otherwise (read-from-string
                                                                                           (format nil "Intv-~a-~a" clusters clusters))))))))))
                 
                 (eval `(exchange-values ,tab ',attribute
                              (function
                                   (lambda (,var) ,@body))))))))



;;; Determine whether the table has a clash, with respect to the given
;;; target attribute.  A clash is defined as two rows that are
;;; identical except for the value of the target attribute.  The clash
;;; status is printed. The function returns 2 values:
;;; 1. number of clashes
;;; 2. number of examples

(defmethod clash-p ((tab table) target-attribute)
      (let* ((rows (remove-duplicates (rows tab) :test #'equal))
                (len (length rows))
                (uniques 
                 (length 
                     (remove-duplicates
                          (mapcar (function 
                                                (lambda (row)
                                                    (remove-nth
                                                     (search (list target-attribute) 
                                                           (attributes tab))
                                                     row)))
                                 rows)
                          :test (function equal))))
                (clash (not (equal len uniques))))
          (if clash 
             (format-display *id3-output-window*
                  "~%Clashes (~a in ~a different examples)." 
              (- len uniques)
              len)
             (format-display *id3-output-window*
                    "~%No clashes (in ~a different examples)."
              len))
          (values (- len uniques) len)))



(defmethod reduce-data ((tab table))
      (setf (rows tab)
                (remove-duplicates (rows tab) :test #'equal))  
      tab)



(defmethod bad-way-p ((tree decision-tree))
      (let ((bad-leave "*impossible*"))
          (= (length (remove bad-leave (leaves tree) :test (function equal))) 0)))



(defmethod revise ((tree decision-tree))
      (if (null (descendants tree))
         (make-instance 'decision-tree :content (content tree))
         (unless (bad-way-p tree)
               (make-instance 'decision-tree 
                      :content (content tree)
                      :descendants
                      (remove nil 
                           (mapcar (function revise)
                                  (descendants tree)))))))



(defun list-of-tables ()
      (let ((result nil))
          (dolist (item *list-of-tables*)
                (push (table-name item) result))
          result))



(defun select-table-by-name (name)
      (let ((result nil))
          (dolist (item *list-of-tables*)
                (when (string= (table-name item) name)
                       (setq result item)))
          result))



(defmethod number-of-rows ((tab table))
      (length (rows tab)))



;;; ========================================================================
;;; END OF FILE
;;; ========================================================================
