;;; SYMBOLIC REGRESSION WITH GENETIC PROGRAMMING
;;; file gp.l
;;; written by C.F. Eick (e-mail: ceick@cs.uh.edu)
;;; last modified April 28, 1997 at 12:59p
;;;
;;; the code that follows contains
;;; a crossover, mutation and initialization operators for genetic programming
;;; only suitable for systems that involve binary and unary operators
;;; this software can be used and rewritten for non-commercial purposes
;;; as long as authorship of the software is identified.
;;; this software has not been intensively tested and might
;;; contain bugs.
;;;
;;; More advanced versions for the above problem include:
;;; gp-gen2.l, gp-gen3.l, and 5v-new.l. 5v-new.l was developed in collaboration ;;; with Walter Sanz>
;;; The advanced versions are not available on www, but you can
;;; contact us, if you are interested.
;;;Four important constants; can be modified before loading this file
(defconstant *lim-cr-points* 59)
;;; crossover operator will give preference to generating trees with
;;; less than 59 crossover points
(defconstant *pop-size* 444)
;;; population size is set to 444;
(defconstant *lim-mut* 3)
;;; mutation rate is set to 1/3, 1/*lim-mut*, in general
(defconstant *lim-copy* 11)
;;; copy rate is set to 1/11, 1/*lim-copy*, in general
;;; returns the number of possible crossover points for li
(defun cr-points (li)
(cond
((atom li) 1)
((= (length li) 2) (+ 1 (cr-points (cadr li))))
(t (+ 1 (cr-points (cadr li)) (cr-points (caddr li)))) ))
(defun copy (li)
(cond
((atom li) li)
(t (cons (copy (car li)) (copy (cdr li))) )))
;;; closure begins
(let ((c 0)
(r nil))
(defun save-r (res) (setq r res))
(defun get-r () r)
(defun reset-c () (setq c 0))
(defun c-eq (m) (eq c m))
(defun c-lt (m) (< c m))
(defun increase-c () (setq c (+ 1 c)))
)
;;;end closure
; tree-subst substitutes rep in exp at position m
; does not perform any substitution for m=0 or if m is
; larger than the number of crossover points in exp
(defun tree-subst (exp m rep)
(reset-c)
(rep-sub exp m rep)
exp)
(defun rep-sub (exp m rep)
(cond
((and (listp exp) (null (cddr exp)))
(increase-c)
(if (c-eq m) (rplaca (cdr exp) rep)
(rep-sub (cadr exp) m rep)))
((listp exp)
(cond
((c-eq (- m 1)) (rplaca (cdr exp) rep))
((c-eq (- m 2)) (rplaca (cddr exp) rep))
(t (increase-c) (increase-c)
(rep-sub (cadr exp) m rep)
(if (c-lt m) (rep-sub (caddr exp) m rep)) )) )) )
;;; finds expression at position m in exp
(defun find1 (exp m)
(save-r nil)
(reset-c)
(find2 exp m)
(get-r))
(DEFUN FIND2 (EXP M)
(COND
((EQ M 0) (save-r exp))
((AND (LISTP EXP) (NULL (CDDR EXP))) (INCREASE-C)
(IF (c-eq M) (save-r (CADR EXP)) (FIND2 (CADR EXP) M)))
((LISTP EXP)
(COND
((c-eq (- m 1)) (save-r (CADR EXP)))
((c-eq (- m 2)) (save-r (CADDR EXP)))
(T (INCREASE-C) (INCREASE-C)
(FIND2 (CADR EXP) M)
(if (null (get-r)) (FIND2 (CADDR EXP) M)) )))))
;;; the crossover operator copies exp1 as well as the
;;; expression in exp2 that is substituted into exp1;
;;; crossover of two atoms results in the generation of
;;; a new tree!
;;; uses heuristic that decreases probability of generating larger trees,
;;; if trees become too large (cr1+cr2>52)
(defun crossover (exp1 exp2)
(if (and (atom exp1) (atom exp2))
(gen-4-tree)
(let ((cr1 (cr-points exp1))
(cr2 (cr-points exp2)))
(let ((p1 (random cr1))
(p2 (random cr2)))
(if (or (< (+ cr1 cr2) (* 2 *lim-cr-points*)) (< p1 p2))
(cond
((eq p1 0) (copy (find1 exp2 p2)))
(t (tree-subst (copy exp1) p1 (copy (find1 exp2 p2))) ))
(cond
((eq p2 0) (copy (find1 exp1 p1)))
(t (tree-subst (copy exp2) p2 (copy (find1 exp1 p1))) ))
)))))
;;; simple crossover operator (not used here!)
(defun crossover1 (exp1 exp2)
(let ((p1 (random (cr-points exp1)))
(p2 (random (cr-points exp2))))
(cond
((eq p1 0) (copy (find1 exp2 p2)))
(t (tree-subst (copy exp1) p1 (copy (find1 exp2 p2))) )) ))
; replaces a randomly chosen node (excluding the root) in
; exp by a randomly generated tree of height 2; if the root
; selected mutate1 is used
(defun mutate (exp1)
(let ((p1 (random (cr-points exp1))))
(if (eq p1 0) (mutate1 exp1) (tree-subst (copy exp1) p1 (gen-2-tree)) )
))
;;; mutate1 creates a new tree of height 3, if expr is an atom, tries
;;; to shorten the tree by making the successor of the first argument become
;;; the first argument, and replaces the first argument of expr by a random
;;; tree of height 2, otherwise.
(defun mutate1 (expr)
(cond
((atom expr) (gen-3-tree))
((atom (cadr expr)) (if (null (cddr expr))
(list (car expr) (gen-2-tree))
(list (car expr) (gen-2-tree) (caddr expr)) ) )
(t (if (null (cddr expr))
(list (car expr) (cadadr expr))
(list (car expr) (cadadr expr) (caddr expr)) ) )
))
;;; the functions that follow are specific for a particular project
;;; and have to be modified if symbolic regression is used in
;;; a different context.
;;; generator functions to generate trees of height 2, 3, and 4.
;;; generates variable x in 70% of the cases and a number in
;;; {0.00, 0.01, 0.02, 0.03,...,0.99, 1.00} otherwise.
(defun gen-term ()
(if (> (random 10) 2)
'x
(* 0.01 (random 101)) ))
(defun gen-unop ()
(let ((x (random 3)))
(cond
((eq x 0) 'cosa)
((eq x 1) 'sin30a)
((eq x 2) 'sqrta))))
(defun sqrta (x)
(sqrt (abs x)))
(defun cosa (x)
(abs (cos x)))
(defun sin30a (x)
(abs (sin (* 30 x))))
(defun gen-biop ()
(let ((x (random 3)))
(cond
((eq x 0) '+)
((eq x 1) '-)
((eq x 2) '*))))
(defun gen-2-tree ()
(if (eq (random 2) 0)
(list (gen-unop) (gen-term))
(list (gen-biop) (gen-term)(gen-term)) ))
(defun gen-3-tree ()
(if (eq (random 2) 0)
(list (gen-unop) (gen-2-tree))
(list (gen-biop) (gen-2-tree)(gen-2-tree)) ))
(defun gen-4-tree ()
(if (eq (random 2) 0)
(list (gen-unop) (gen-3-tree))
(list (gen-biop) (gen-3-tree)(gen-2-tree)) ))
;evaluation operators
(defun manh-dist (x y)
(abs (- x y)))
(defun eval-appr (exp in-list out-list)
(let ((f `(lambda (x) ,exp)))
(apply #'+ (mapcar #'manh-dist
(mapcar f in-list)
out-list)) ))
;;; 2 Benchmarks for the Symbolic Regression System
(defun eval1 (expr)
(eval-appr expr '(0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0)
'(1.0 0.8 0.6 0.4 0.2 0.1 0.2 0.4 0.6 0.8 1.0)))
(defun eval2 (expr)
(eval-appr expr '(0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0)
'(1.0 0.0 0.9 0.1 0.8 0.2 0.7 0.3 0.6 0.4 0.5)))
(defun eval3 (expr)
(eval-appr expr '(0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0)
'(1.0 0.7 0.5 0.4 0.3 0.26 0.20 0.15 0.11 0.07 0.04)))
(defun better (expr1 expr2 eval)
(if (< (funcall eval expr1) (funcall eval expr2)) expr1 expr2))
;;; Execution environment of a genetic programming system
;;; written by Christoph F. Eick
;;; code has been written in a hurry and there are many
;;; places to increase its efficiency; moreover, it has
;;; not been exhaustively tested and therefore might contain bugs
;;; if you have any suggestions for improvement, send e-mail to:
;;; ceick@cs.uh.edu
;;;the array arr stores
;;; two generations the current and the next generation
;;; begin closure
(let ((arr (make-array (list 2 *pop-size*)))
(c-arr (make-array 2 :initial-element 0))
(best-so-far 999999999)
(best nil))
(defun g-insert (i el)
(setf (aref arr i (aref c-arr i)) el)
(setf (aref c-arr i) (+ 1 (aref c-arr i))))
(defun g-clear (i)
(setf (aref c-arr i) 0))
(defun g-change (i ind el)
(setf (aref arr i ind) el))
(defun cont() arr)
(defun g-read (i ind)
(aref arr i ind))
(defun best () (list best best-so-far))
(defun best-so-far () best-so-far)
(defun update-best (best1 bsf)
(setq best best1)
(setq best-so-far bsf))
(defun reset ()
(setq best-so-far 999999999))
(defun avg-fit (i)
(let ((re 0))
(dotimes (x *pop-size* (/ re *pop-size*))
(setq re (+ re (cadr (g-read i x)))) )))
)
;;; end closure
;;; creates the initial generation; creates 50% 2-tree, 25% 3-trees
;;; and 25% 4-trees; creates 2 solutions randomly and keeps the better one!
(defun init-pop (i eval1)
(let ((t1)(t2)(t3)(t4))
(g-clear i)
(dotimes (x *pop-size* 'init-gen)
(setq t1 (if (eq (random 2) 0) (gen-3-tree) (gen-4-tree)))
(setq t2 (funcall eval1 t1))
(setq t3 (gen-2-tree))
(setq t4 (funcall eval1 t3))
(if (< t2 t4)
(g-insert i (list t1 t2))
(g-insert i (list t3 t4)) )
)))
;;; selection operator of the GP-system:
;;; picks 3 solutions randomly and returns the best
(defun 3-tou-selection (i)
(let ((t1 (g-read i (random *pop-size*)))
(t2 (g-read i (random *pop-size*))))
(if (> (cadr t1) (cadr t2)) (setq t1 t2))
(setq t2 (g-read i (random *pop-size*)))
(if (> (cadr t1) (cadr t2)) (setq t1 t2))
(car t1)))
(defun 2-tou-selection (i)
(let ((t1 (g-read i (random *pop-size*)))
(t2 (g-read i (random *pop-size*))))
(if (> (cadr t1) (cadr t2)) (setq t1 t2))
(car t1)))
(defun 4-tou-selection (i)
(let ((t1 (g-read i (random *pop-size*)))
(t2 (g-read i (random *pop-size*))))
(if (> (cadr t1) (cadr t2)) (setq t1 t2))
(setq t2 (g-read i (random *pop-size*)))
(if (> (cadr t1) (cadr t2)) (setq t1 t2))
(setq t2 (g-read i (random *pop-size*)))
(if (> (cadr t1) (cadr t2)) (setq t1 t2))
(car t1)))
;;; generates a new generation in arr[j,*] from arr[i,*]; employes
;;; tournament selection with k=3 and applies mutation in 10% of the cases
;;; and uses copying with probability 1/*lim-copy and crossover with
;;; probability (1 - 1/*lim-copy); mutation is applied with probability
;;; 1/*lim-mut* to the solution that was received through crossover or
;;; copying respectively.
(defun breed (i j eval1)
(g-clear j)
(let ((t1)(t2))
(dotimes (x *pop-size* 'breeding-done)
(setq t1 (if (eq (random *lim-copy*) 0)
(4-tou-selection i)
(crossover (4-tou-selection i) (3-tou-selection i))))
(if (eq (random *lim-mut*) 0) (setq t1 (mutate t1)))
(setq t2 (apply eval1 (list t1)))
(if (< t2 (best-so-far)) (update-best t1 t2))
(g-insert j (list t1 t2)) )))
;;;top-level function; runs the GP-system for n generation with
;;;respect to evaluation function eval1 using two subarrays.
(defun run-gp (n eval1)
(reset)
(init-pop 0 eval1)
(format t "Generation 0 avg-fitness: ~A~%~%" (avg-fit 0))
(dotimes (x (floor (/ n 2)) (progn
(format t "Last Generation ~A avg-fitness: ~A~%~%"
n (avg-fit 0))
(best)))
(breed 0 1 eval1)
(if (eq (random 2) 0)
(format t "Generation ~A avg-fitness: ~A best-solution-so-far:~%~A~%~%"
(+ (* x 2) 1) (avg-fit 1) (best))
)
(breed 1 0 eval1)
)
)
;;; 3 evaluations functions are defined in this program
;;; you could call (run-gp 200 #'eval1) "run 200 gen. with respect to eval1"
;;; or (run-gp 150 #'eval2)
;;; or (run-gp 100 #'eval3)