;;; -*- Mode: LISP; Package: id3; Syntax: Common-lisp; -*-;;;
;;; ************************************************************************
;;;
;;; PORTABLE AI LAB - UNI ZH
;;;
;;; ************************************************************************
;;;
;;; Filename:   id3.cl
;;; Short Desc: Main routine for ID3 algorithm
;;; Version:    2.0
;;; Status:     Review
;;; Last Mod:   5.2.91 TW
;;; Author:     Thomas Wehrle
;;;
;;; Copyright (c) 1992 Istituto Dalle Molle (IDSIA), University of
;;; Zurich, Swiss Federal Institute of Technology Lausanne.
;;;
;;; Permission is granted to any individual or institution to use, copy,
;;; modify, and distribute this software, provided that this complete
;;; copyright and permission notice is maintained, intact, in all
;;; copies and supporting documentation.
;;;
;;; IDSIA provides this software "as is" without express or implied
;;; warranty.  
;;;

;;; --------------------------------------------------------------------------
;;; Change History: 
;;; 
;;;	
;;; --------------------------------------------------------------------------

;;; ==========================================================================
;;; DESCRIPTION
;;; ==========================================================================


;;; The basic ID3 algorithm.  ID3 takes as input a set of examples,
;;; represented as lists of attributes and their values.  It computes
;;; a decision tree which has the shortest possible expected search
;;; depth.  This is done by computing the information content
;;; (entropy) of each attribute, and selecting the most informative
;;; attribute to test next.  More details can be found in the Module
;;; Description for Machine Learning accompanying the Portable AI Lab.
;;;
;;; This version was coded from the following article:
;;;    Thompson, B. & Thompson, W. (1986). Finding Rules in Data.
;;;    BYTE, 11, 149 - 158.
;;; ==========================================================================
;;; PACKAGE DECLARATIONS
;;; ==========================================================================

(in-package :id3)

(defvar *precision* 'double-float)	; arithmetic precision



;;; ==========================================================================
;;; ENTROPY calculation
;;; ==========================================================================

;;; p computes the conditioned probability that the target-attribute
;;; has value t-value, given that a particular attribute has a value.
;;; This is computed for a particular set of data by actually
;;; computing the frequency with which the target-attribute takes the
;;; value t-value when the attribute has the specified value, divided
;;; by the overall frequency with which the attribute has value.  This
;;; is written as p(Ci|Aj) in Thompson, where t-value is the i'th
;;; value of target-attribute, and value is the j'th value of
;;; attribute.

(defmethod p (target-attribute t-value attribute value (data table))
      (/ (count-occurences-if target-attribute t-value attribute value data)
         (coerce (count-occurences attribute value data) *precision*)))



;;; H is the conditioned entropy of the target-attribute, given that
;;; attribute has a particular value.  It is computed from the
;;; conditional probabilities defined above.

(defmethod H (target-attribute attribute value (data table))
      (- (reduce (function +)
               (mapcar
                      (function (lambda (t-value)
                                             (let ((probability 
                                                      (p target-attribute t-value attribute value data)))
                                                 (* probability (log2 probability)))))
                      (get-possible-values target-attribute data)))))



;;; The total entropy of an attribute, with respect to a target
;;; attribute.  This is a weighted sum of the conditioned entropies
;;; (H, above), given all possible values of the attribute.  It
;;; represents the expected amount of information left to be
;;; determined after the value for attribute has been specified.  This
;;; value is minimum for the best attribute to be tested next.

(defmethod entropy (target-attribute attribute (data table))
      (reduce (function +)
           (mapcar
                  (function  (lambda (value)
                                          (let ((HH (H target-attribute attribute value data)))
                                              (* HH (/ (count-occurences attribute value data)
                                                            (number-of-rows data))))))
                  (get-possible-values attribute data))))



;;; The best attribute to select next is the one with the lowest
;;; entropy.  The entropy of each attributes in the table is computed,
;;; and the lowest one selected.

(defmethod best-attribute (target-attribute (data table))
      (format-display *id3-output-window*
       "~%===========================================================")
      (format-display *id3-output-window* 
       "~%Try to find best splitting attribute for ~a in ~a" 
       target-attribute data)
      (let* ((entropies
                  (mapcar 
                         (function 
                              (lambda (attribute)
                                  (cons attribute (entropy target-attribute attribute data))))
                         (remove target-attribute (attributes data))))
                (minimum (do ((pair-list entropies (cdr pair-list))
                                            (entr most-positive-fixnum)
                                            (pair nil))
                                           ((null pair-list) pair)
                                          (when (< (cdr (car pair-list)) entr)
                                                 (setf entr (cdr (car pair-list))
                                                           pair (car pair-list))))))
          (format-display *id3-output-window*
           "~%   Entropies  : ~a" entropies)
          (format-display *id3-output-window*
           "~%   Choosing   : ~a~%~%" 
           (if (null minimum)
              "<unable to get entropy>"
              (car minimum)))
          (cond ((null minimum) nil)
                      (t (car minimum)))))



;;; ==========================================================================
;;; DECISION TREE CONSTRUCTION
;;; ==========================================================================

;;; Determines if in the data table, the target attribute has exactly
;;; one value.  This is used to detect successful bottom of recursion
;;; in classify.

(defmethod one-value-p (target-attribute (data table))
      (= (length (get-possible-values target-attribute data)) 1))



;;; Constructs a decision tree for the target-attribute from a data
;;; table.  

(defmethod classify (target-attribute (data table))
      (if (one-value-p target-attribute data) ; if the target attribute
                                                                               ; has only one value, then
                                                                               ; the decision tree is
                                                                               ; trivial. 
         (progn
              (format-display *id3-output-window* 
                     " <no further splitting>~%~a~%" data)
              (make-instance 'decision-tree
                     :content target-attribute 
                     :descendants 
                     (list (make-instance 'decision-tree
                                    :content (get-nth-value target-attribute 0 data)))))
                                                                               ; otherwise, select the best
                                                                               ; splitting attribute.
         (let ((split-attribute (best-attribute target-attribute data)))
             (if (null split-attribute)
                (if *accept-clashes* 
                   (make-instance 'decision-tree
                          :content target-attribute
                          :descendants
                          (list 
                             (make-instance 'decision-tree
                                    :content (get-possible-values target-attribute data))))
                   (progn
                        (format-display *id3-output-window*
                              "~%Too few attributes or bad data. Cannot find rule~%~%")
                        (make-instance 'decision-tree
                               :content 'classification
                               :descendants 
                               (list (make-instance 'decision-tree
                                              :content "*impossible*")))))
                (make-instance 'decision-tree	; construct a tree with the
                                                                                ; chosen splitting attribute
                                                                                ; at its root, with its
                                                                                ; descendants given by a
                                                                                ; recursive call to id3 on the
                                                                                ; subtables with the splitting
                                                                                ; attribute taking each
                                                                                ; possible value. 
                       :content split-attribute 
                       :descendants 
                       (mapcar
                              (function 
                                   (lambda (value)
                                       (format-display *id3-output-window*
                                             "~%For ~a = ~a:" split-attribute value)
                                       (make-instance 'decision-tree
                                              :content value
                                              :descendants 
                                              (list (classify 
                                                       target-attribute 
                                                       (get-subtable split-attribute value data))))))
                              (get-possible-values split-attribute data)))))))



;;; **************** ID3 init functions *****************************



(defmethod find-sub-tree2 ((tree decision-tree))
      (let* ((possible-values
                    (mapcar
                           (function (lambda (x) (format nil "~a" (content x))))
                           (descendants tree)))
              (value (select-item-from-list
                                 *id3-main-window*
                                 possible-values
                                 (concatenate 'string 
                                       "What is the value of "
                                       (write-to-string (content tree))
                                       " ?"))))
          (when value
                 (do* ((desc-list (descendants tree) (rest desc-list))
                           (sub-tree (first desc-list) (first desc-list)))
                          ((equal value (write-to-string (content sub-tree)))
                           (first (descendants sub-tree)))))))


(defmethod query2 (disp (tree decision-tree))
      (if (<= (length (descendants tree)) 1)
         (format-display disp
              "~%~%From what I know I expect ~a to be ~a.~%"
              (content tree)
              (content (first (descendants tree))))
         (let ((query (find-sub-tree2 tree)))
             (when query
                    (query2 disp query)))))



(defmethod query2 (disp something)
      (when (null something) 
             (format-display disp
                  "~%No knowledge about this - sorry.")))



(defun set-output-tree ()
      (cond ((null *current-decision-tree*) 
                   (format-display *id3-output-window*
                        "Sorry, no decision tree - run classify"))
                  (t (set-dialog-item-text (aref (view-subviews *id3-main-window*) 8)
                       (concatenate 'string 
                             "Decision tree based on "
                             (write-to-string *target-attribute*))))))
                                              


(defun set-menu-after-load ()
      (set-menu-item-available-p
           (find-named-object ':load-table *id3-file-menu*) nil)
     (set-menu-item-available-p
           (find-named-object ':new-table *id3-table-editor-menu*) nil)
      (set-menu-item-available-p
           (find-named-object ':load-table *id3-table-editor-menu*) nil)
      (set-dialog-item-available-p
           (find-named-object ':load-table *out-dialog*) nil)
      (set-menu-item-available-p
           (find-named-object ':set-target-attribute *id3-classify-menu*) t)
      (set-dialog-item-available-p
           (find-named-object ':set-target-attribute *out-dialog*) t)
      (set-menu-item-available-p
           (find-named-object ':save-table-as *id3-table-editor-menu*) t)
      (set-menu-item-available-p
           (find-named-object ':add-attribute *id3-table-editor-menu*) t)
      (set-menu-item-available-p
           (find-named-object ':delete-attribute *id3-table-editor-menu*) t)
      (set-menu-item-available-p
           (find-named-object ':add-example *id3-table-editor-menu*) t)
      (set-menu-item-available-p
           (find-named-object ':delete-example *id3-table-editor-menu*) t)
      (set-menu-item-available-p
           (find-named-object ':expand-wildcards *id3-table-editor-menu*) t)
      (set-menu-item-available-p
           (find-named-object ':remove-duplication *id3-table-editor-menu*) t))



(defun set-menu-before-load ()
      (set-menu-item-available-p
           (find-named-object ':load-table *id3-file-menu*) t)
     (set-menu-item-available-p
           (find-named-object ':new-table  *id3-table-editor-menu*) t)
      (set-menu-item-available-p
           (find-named-object ':load-table *id3-table-editor-menu*) t)
      (set-dialog-item-available-p
           (find-named-object ':load-table *out-dialog*) t)
      (set-menu-item-available-p
           (find-named-object ':set-target-attribute *id3-classify-menu*) nil)
      (set-dialog-item-available-p
           (find-named-object ':set-target-attribute *out-dialog*) nil)
      (set-menu-item-available-p
           (find-named-object ':classify *id3-classify-menu*) nil)
      (set-dialog-item-available-p
           (find-named-object ':classify *out-dialog*) nil)
      (set-menu-item-available-p
           (find-named-object ':query *id3-classify-menu*) nil)
      (set-menu-item-available-p
           (find-named-object ':get-rules *id3-classify-menu*) nil)
      (set-menu-item-available-p
           (find-named-object ':Select-output-tree *id3-file-menu*) nil)
      (set-menu-item-available-p
           (find-named-object ':save-table-as *id3-table-editor-menu*) nil)
      (set-menu-item-available-p
           (find-named-object ':add-attribute *id3-table-editor-menu*) nil)
      (set-menu-item-available-p
           (find-named-object ':delete-attribute *id3-table-editor-menu*) nil)
      (set-menu-item-available-p
           (find-named-object ':add-example *id3-table-editor-menu*) nil)
      (set-menu-item-available-p
           (find-named-object ':delete-example *id3-table-editor-menu*) nil)
      (set-menu-item-available-p
           (find-named-object ':expand-wildcards *id3-table-editor-menu*) nil)
      (set-menu-item-available-p
           (find-named-object ':remove-duplication *id3-table-editor-menu*) nil))



;;; ========================================================================
;;; END OF FILE
;;; ========================================================================

