4,887,235
	533	534
    (call bb-word-alu-operation-dispatch))
  (assign bb-width (- bb-width (b-constant 32.)))
  (incr-d-offset)	;1
  (assign bb-s-offset bb-s-offset-ahead)	;1
  (parallel					;1
    (assign bb-s-word (rotate bb-s-word2 byte-r))
    (lisp (trace-path #/.))
    (jump ubitblt-d-aligned-row-both)))

;;At entry, we have s-word fetched from memory like
;;          <------s.bitpos------>
;;ssssssssss......................
;;but then rotated so it looks like
;;......................ssssssssss
;;<------s.bitpos------>
;;
;;This is to be combined with d-word which looks like
;;....................dddddddddddd
;;                    <---width-->
(defucode ubitbit-d-aligned-row-both-done
  (assign bb-s-word (logxor bb-constant-a bb-s-word))
  (if (plus-fixnum bb-width)
      (sequential
       (assign b-temp (32- bb-s-bitpos))
       (if (lesser-or-equal-fixnum bb-width b-temp)
	   ;;we have enouqh s bits
	   ;;<----s.bitpos---><--a.temp--->
	   ;;.................sssssssssssssss
	   ;;....................dddddddddddd
	   ;;                    <---width-->
	   (sequential
	    (assign byte-r (b-constant 0))
	    (assign byte-s (1- bb-width))
	    (parallel
	     (assign-vma-offset d)
	     (lisp (trace-path #/4))
	     (jump bb-byte-alu-operation-dispatch)))	 ;jcall
	 ;;need to get another source word
	 ;;<----s.bitpos---><----a.temp--->
	 ;;.................sssssssssssssss
	 ;;............dddddddddddddddddddd
	 ;;            <-------width------>
	 (sequential
	  (parallel-with-s-access bb-s-offset-ahead
	    (assign byte-r b-temp)
	    (assign byte-s (1- bb-s-bitpos))
	    (assign bb-s-word2 (logxor memory-data bb-constant)))
	  (assign bb-s-word (dpb bb-s-word2 byte-s byte-r bb-s-word))
	  (assign byte-r (b-constant 0))
	  (assign byte-s (1- bb-width))
	  (parallel
	   (assign-vma-offset d)
	   (lisp (trace-path #/5))
	   (jump bb-byte-alu-operation-dispatch)))))	;jcall
    (parallel-with-return
     (lisp (trace-path #/3)))))

;bb-s-word has the previous source word, rotated but not xored with bb-constant
;3 cycles per word seems to be the best I can do (cant rotate while storing in bitbit-buffer)
;If bb-s-word was xored already, it would take 4 cycles per word here
(defmacro def-bitblt-rotated-block-read (name n)
  `(defucode ,name
     (assign byte-s (1- bb-s-bitpos))
     (parallel
      (assign a-block-size (b-constant ,n))	;Used later to advance offsets
      (assign b-block-size obus)
      (start-memory block read))		;start first word
     (parallel
      (waiting-for-memory)			;waiting for first word
      (assign byte-r (32- bb-s-bitpos)))
     ,@(ioop for i from (- n-bitblt-buffers n) below n-bitblt-buffers
	     append `((abus-array-data
		       (assign bb-s-word2 (dpb memory-data byte-s byte-r bb-s-word)))
		      (parallel
		       (declare-memory-timing data-cycle)	;MD holds
		       (assign bb-s-word (rotate memory-data byte-r))
		       ,(and (> (- n-bitblt-buffers i) 1)
			     `(start-memory block read)))
		      (parallel
		       (assign (bitblt-buffer ,i)
			       (set-type (logxor bb-constant bb-s-word2) dtp-fix))
		       ,(if (= (- n-bitblt-buffers i) 1)
			    `(return)))))))

(def-bitblt-rotated-block-read ubitblt-rotated-block-read-8 8)
(def-bitblt-rotated-block-read ubitblt-rotated-block-read-4 4)

(defucode ubitblt-long-row-source-backwards
  (parallel
   (assign b-temp bb-d-bitpos)
   (if (zero-fixnum bb-d-bitpos)
       (if (zero-fixnum bb-s-bitpos)
4,887,235
	535	536
	(parallel
	 (assign bb-s-offset (1+ bb-s-offset)) ;the loop will decr first, before pclsr
	 (lisp (trace-path #/a))
	 (jump ubitblt-aligned-row-source-backwards))
	(sequential
	 (parallel-with-s-access bb-5-offset
	   (assign byte-r (32- bb-s-bitpos))
	   (parallel
	    (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r)))
	    (lisp (trace-path #/c))
	    (jump ubitblt-d-aligned-row-source-backwards)))))
     (if (equal-fixnum b-temp bb-s-bitpos)
	 (sequential
	  (parallel-with-s-access bb-s-offset
	    (assign byte-s (1- bb-s-bitpos))
	    (assign bb-s-word (logxor memory-data bb-constant)))
	  (parallel-with-d-access-check-write bb-d-offset
	    (decr-d-offset)
	    (parallel
	     (assign byte-r (b-constant 0))
	     (assign bb-s-bitpos (b-constant 0)))
	    (store-word (dpb bb-s-word byte-s byte-r memory-data)))
	  ;; Now we can turn into the aligned case
	  (assign bb-width (- bb-width b-temp))
	  (parallel
	   (assign bb-d-bitpos (b-constant 0))
	   (lisp (trace-path #/b))
	   (jump ubitblt-aligned-row-source-backwards)))
       (if (greater-fixnum bb-s-bitpos b-temp)	;s > d, enough in the current word
	   (sequential
	    (parallel-with-s-access bb-s-offset
	      (assign byte-s (1- bb-d-bitpos))
	      (assign byte-r (- b-temp bb-s-bitpos))
;;XXbrad bb-s-word?
	      (assign bb-s-word (logxor bb-constant memory-data)))
	    (parallel-with-d-access-check-write bb-d-offset
	      (assign bb-s-bitpos (- bb-s-bitpos b-temp))
	      (assign bb-d-bitpos (b-constant 0))
	      (store-word (ldb bb-s-word byte-s byte-r memory-data)))
	    (assign bb-s-word (rotate bb-s-word byte-r))
	    (assign bb-width (- bb-width b-temp))
	    (parallel
	     (decr-d-offset)
	     (lisp (trace-path #/d))
	     (jump ubitblt-d-aligned-row-source-backwards)))
	 (sequential			;s < d, need to fetch another word
	  (parallel-with-s-access bb-s-offset
	    (parallel
	     (assign byte-r (- b-temp bb-s-bitpos))
	     (assign a-temp (- b-temp bb-s-bitpos)))
	    (assign byte-s (1- a-temp))
	    (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r))))
	  (decr-wrap-s-offset-ahead)
	  (parallel-with-s-access bb-s-offset-ahead
	    (assign bb-s-word2 (logxor bb-constant memory-data)))
	  (assign bb-s-word (ldb bb-s-word2 byte-s byte-r bb-s-word))
	  (parallel-with-d-access bb-d-offset
	    (assign byte-r (b-constant 0))
	    (assign byte-s (1- bb-d-bitpos))
	    (store-word (ldb bb-s-word byte-s byte-r memory-data)))
	  (assign bb-s-bitpos (32- a-temp))
	  (assign byte-r a-temp)
	  (assign bb-s-word (rotate bb-s-word2 byte-r))
	  (assign bb-s-offset bb-s-offset-ahead)
	  (assign bb-width (- bb-width b-temp))
	  (assign bb-d-bitpos (b-constant 0))
	  (parallel
	   (decr-d-offset)
	   (lisp (trace-path #/e))
	   (jump ubitblt-d-aligned-row-source-backwards))))))))

;bb-s-offset is 1+ the real value at this point
(defucode ubitblt-aligned-row-source-backwards	;9 cycles per word
  (decr-wrap-s-offset)				;1
  (parallel-with-s-access bb-s-offset		;4
    (trap-if (lesser-fixnum bb-width (b-constant 32.))
	     ubitblt-aligned-row-source-backwards-done)
    (waiting-for-memory)
    (assign bb-s-word (logxor bb-constant memory-data)))
  (assign-vma-offset d)				;1
  (store-word bb-s-word)			;1
  (assign bb-width (- bb-width (b-constant 32.))) ;1
  (parallel					;1
   (decr-d-offset)
   (lisp (trace-path #/,))
   (jump ubitblt-aligned-row-source-backwards)))

(defucode ubitblt-aligned-row-source-backwards-done
  (if (plus-fixnum bb-width)
      (sequential
       (parallel-with-s-access bb-s-offset
	 (assign byte-s (1- bb-width))
4,887,235
	537	538
	(assign byte-r bb-width)
	(assign bb-s-word (logxor bb-constant (ldb memory-data byte-s byte-r))))
       (parallel-with-d-access bb-d-offset
	(assign byte-r (32- bb-width))
	(parallel-with-return
	 (store-word (dpb bb-s-word byte-s byte-r memory-data))
	 (lisp (trace-path #/2)))))
    (parallel-with-return
     (lisp (trace-path #/1)))))

;;each time through the loop. bb-s-word has the low part of the previous word
;;rotated to be at the hign end of the word. We use it as background to LDB the
;;high part of the next word into it.

;bb-s-offset is 1+ the "real" value at this point
;could bum one cycle by moving assignment to byte-s out of loop,
;but this should use block mode anyway
(defucode ubitblt-d-aligned-row-source-backwards	;11 cycles per word
  (decr-wrap-s-offset)	;1
  (parallel-with-s-access bb-s-offset	;4
    (trap-if (lesser-fixnum bb-width (b-constant 32.))
	     ubitblt-d-aligned-row-source-backwards-done)
    (assign byte-r (32- bb-s-bitpos))
    (assign bb-s-word2 (logxor bb-constant memory-data)))
  (assign byte-s (31- bb-s-bitpos))	;1
  (assign-vma-offset d)	;1
  (store-word (ldb bb-s-word2 byte-s byte-r bb-s-word)) ;1
  (assign bb-width (- bb-width (b-constant 32.))) ;1
  (decr-d-offset)	;1
  (parallel	;1
   (assign bb-s-word (rotate bb-s-word2 byte-r))
   (lisp (trace-path #/.))
   (jump ubitblt-d-aligned-row-source-backwards)))

(defucode ubitblt-d-aligned-row-sourcs-backwards-done
  (parallel
   (assign bb-width-b bb-width)
   (if (plus-fixnum bb-width)
       (if (greater-or-equal-fixnum bb-s-bitpos bb-width-b)
	   (parallel-with-d-access bb-d-offset
	     (assign byte-r (b-constant 0))
	     (assign byte-s (31- bb-width))
	     (parallel-with-return
	      (store-word (ldb memory-data byte-s byte-r bb-s-word))
	      (lisp (trace-path #/4))))
	 (sequential
	  (parallel-with-s-access bb-s-offset
	    (assign byte-r bb-width)
	    (assign bb-s-word (rotate bb-s-word byte-r))
	    (assign bb-s-word2 (logxor bb-constant memory-data)))
	  (parallel
	   (assign byte-r (- bb-width-b bb-s-bitpos))
	   (assign a-temp obus))
	  (assign byte-s (1- a-temp))
	  (assign bb-s-word (ldb bb-s-word2 byte-s byte-r bb-s-word))
	  (parallel-with-d-access bb-d-offset
	    (assign byte-s (1- bb-width))
	    (assign byte-r (32- bb-width))
	    (parallel-with-return
	     (store-word (dpb bb-s-word byte-s byte-r memory-data))
	     (lisp (trace-path #/5))))))
     (parallel-with-return
      (lisp (trace-path #/3))))))
;;XXXbrad - break here - doesn't match up
(assign b-temp bb-d-bitpos)
(if (zero-fixnum bb-d-bitpos)
(if (zero-fixnum bb-s-bitpos)
(parallel
	(assign bb-s-offset (1+ bb-s-offset)) ;loop will decr first before pclsr
	(lisp (trace-path #/a))
	(jump ubitblt-aligned-row-both-backwards))
(parallel-with-u-access bb-s-offset
(assign byte-r (32- bb-s-bitpos))
(parallel
	(assign bb-s-word (logxor bb-constant (rotate memory-data byte-r)))
	(lisp (trace-path #/c))
	(jump ubitblt-d-aligned-row-both-backwards))))
(if (equal-fixnum b-temp bb-s-bitpos)
    (sequential
	(parallel-with-s-access bb-s-offset
	  (assign byte-s (1- bb-s-bitpos))
	  (assign byte-r (b-constant 0))
	  (assign bb-s-word (logxor bb-constant memory-data)))
	(parallel
	 (assign-vma-offset d)
	 (call bb-byte-alu-operation-dispatch))
	(assign bb-width (- bb-width b-temp))
	(assign bb-s-bitpos (b-constant 0))
	(assign bb-d-bitpos (b-constant 0))
	(parallel
	(decr-d-offset)
	(lisp (trace-path #/b))
4,887,235
	539	540

	(jump ubitblt-aligned-row-both-backwards)))
(if (greater-fixnum bb-s-bitpos b-temp) ;s > d, enough in first word
(sequential
	(parallel-with-s-access bb-s-offset
	(parallel
(assign byte-r (- b-temp bb-s-bitpos))
	(assign a-temp obus))	;this is negative
	(assign byte-s (1- bb-d-bitpos))
	(assign bb-s-word (logxor bb-constant (rotate memory-data byte-r))))
(assign byte-r (b-constant 0))
	(parallel
	(assign-vma-offset d)
	(call bb-byte-alu-operation-dispatch))
	(assign bb-s-bitpos (- bb-s-bitpos b-temp))
	(assign bb-d-bitpos (b-constant 0)
	(assign bb-width (- bb-width b-temp))
	(parallel
(decr-d-offset)
	(lisp (trace-path #/d))
	(jump ubitblt-d-aligned-row-both-backwards)))
	(sequential	;s<d, need to fetch another word
	(parallel-with-s-access bb-s-offset
	(assign byte-r (- b-temp bb-s-bitpos))
	(assign bb-s-word (logxor bb-constant (rotate memory-data byte-r))))
(decr-wrap-s-offset-ahead)
	(parallel-with-s-access bb-s-offset-ahead
	(assign a-temp (- b-temp bb-s-bitpos))
	(assign byte-s (1- a-temp))
	(assign bb-s-word2 (logxor bb-constant memory-data)))
	(assign bb-s-word (ldb bb-s-word2 byte-s byte-r bb-s-word))
	(assign byte-s (1- bb-d-bitpos))
	(assign byte-r (b-constant 0))
(parallel
	(assign-vma-offset d)
	(call bb-byte-alu-operation-dispatch))
	(parallel
(assign a-temp (- b-temp bb-s-bitpos))
	(assign byte-r obus))
	(assign bb-s-word (rotate bb-s-word2 byte-r))
	(assign bb-s-bitpos (32- a-temp))
	(assign bb-s-offset bb-s-offset-ahead)
	(assign bb-d-bitpos (b-constant 0))
	(assign bb-width (- bb-width b-temp))
	(parallel
	(decr-d-offset)
	(lisp (trace-path #/e))
	(jump ubitblt-d-aligned-row-both-backwards))))))))

;bb-s-offset is 1+ its real value
;bb-s-word has the previous word, rotated and xored
(defucode ubitblt-d-aligned-row-both-backwards	;14 cycles per word
  (decr-wrap-s-offset)				;1 cycles
  (parallel-with-s-access bb-s-offset		;4 cycles
    (trap-if (lesser-fixnum bb-width (b-constant 32.))
	     ubitblt-d-aligned-row-both-backwards-done)
    (assign byte-r (32- bb-s-bitpos))
    (assign bb-s-word2 (logxor bb-constant memory-data)))
  (assign byte-s (31- bb-s-bitpos))		;1
  (assign bb-s-word (ldb bb-s-word2 byte-s byte-r bb-s-word)) ;1 cycle
  (parallel					;1+3 cycles
   (assign-vma-offset d)
   (cal bb-word-alu-operation-dispatch))
  (assign bb-s-word (rotate bb-s-word2 byte-r)) ;1
  (assign bb-width (- bb-width (b-constant 32.))) ;1
  (parallel
   (decr-d-offset)
   (lisp (trace-path #/.))			;1
   (jump ubitblt-d-aligned-row-both-backwards)))

(defucode ubitblt-a-aligned-row-both-backwards-done
  (parallel
   (assign bb-width-b bb-width)
   (if (plus-fixnum bb-width)
       (if (greater-or-equal-fixnum bb-s-bitpos bb-width-b)
	   (sequential
	    (assign byte-r bb-width)
	    (assign bb-s-word (rotate bb-s-word byte-r))
	    (assign byte-s (1- bb-width))
	    (assign byte-r (32- bb-width))
	    (parallel
	     (assign-vma-offset d)
	     (lisp (trace-path #/4))
	     (jump bb-byte-alu-operation-dispatch))) ;jcall
	 (sequential
	  (parallel-with-s-access bb-s-offset
	    (assign byte-r bb-width)
	    (assign bb-s-word (rotate bb-s-word byte-r))
	    (assign bb-s-word2 (logxor bb-constant memory-data)))
	  (parallel
	   (assign byte-r (- bb-width-b bb-s-bitpos))
	   (assign a-temp obus))
4,887,235
	541	542
	(assign byte-s (1- a-temp))
	(assign bb-s-word (ldb bb-s-word2 byte-s byte-r bb-s-word))
	(assign byte-s (1- bb-width))
	(assign byte-r (32- bb-width))
	(parallel
	 (assign-vma-offset d)
	 (lisp (trace-path #/5))
	 (jump bb-byte-alu-operation-dispatch))))	;jcall
     (parallel-with-return
      (lisp (trace-path #/3))))))

;;code for %decode-bitblt-arrays
;;Take alu from-array to-array
;;Return (s-beg-addr s-beg-bitpos s-row-length s-height s-bits-per-elt
;;	  d-beg-addr d-beg-bitpos d-row-length d-height d-bits-per-elt
;;	  array-reg-event-count)

;;args
(defatomicro bbd-alu (amem (stack-pointer -2)))
(defatomicro bbd-s-array (amem (stack-pointer -1)))
(defatomicro bbd-d-array top-of-stack-a)

;; 4 slots for array-setup-2d to return its results
(defatomicro bbd-control	(amem (stack-pointer 1)))
(defatomicro bbd-base-pointer	(amem (stack-pointer 2)))
(defatomicro bbd-width		(amem (stack-potnter 3)))
(defatomicro bbd-height		(amem (stack-pointer 4)))
(defatomicro bbd-s-beg-addr	(amem (stack-pointer 5)))
(defatomicro bbd-s-beg-bitpos	(amem (stack-pointer 6)))
(defatomicro bbd-s-row-length	(amem (stack-pointer 7)))
(defatomicro bbd-s-height	(amem (stack-pointer 8)))
(defatomicro bbd-s-bits-per-elt	(amem (stack-pointer 9.))
(defatomicro bbd-d-beg-addr	(amem (stack-pointer 10.)))
(defatomicro bbd-d-beg-bitpos	(amem (stack-pointer 11.)))
(defatomicro bbd-d-row-length	(amem (stack-pointer 12.)))
(defatomicro bbd-d-height	(amem (stack-pointer 13.)))
(defatomicro bbd-d-bits-per-elt (amem (stack-pointer 14.)))
(defatomicro bbd-event-count	(amem (stack-pointer 15.)))

(defatomicro bb-alu-depends-on-source
  (b-constant #,(loop for alu in `(5 10.	;source
				  ;3 12.	:dest
				  ;0 15.	;neither
				  1 2 4 6 7 8. 9. 11. 13. 14. 	;both
				  )
		      sum (ash 1 alu))))

(defmicro compute-beg-bitpos (for-what)
  (let ((beg-bitpos (selectq for-what
		      (s 'bbd-s-beg-bitpos)
		      (d 'bbd-d-beg-bitpos)
		      (otherwise (ferror "What is ~S" for-what))))
	(row-length (selectq for-what
		      (s 'bbd-s-row-length)
		      (d 'bbd-d-row-length)
		      (otherwise (ferror "What Is ~S" for-what)))))
    `(sequential
      (assign b-low-dividend top-of-stack)
      (assign a-positive-divisor bbd-width)
      (parallel
       (assign b-high-dividend (a-constant 0))
       (assign a-divide-step-count (b-constant 15.)))
      (parallel
       (assign a-negative-divisor (- a-positive-divisor))
       (call divide-subroutine))
      ;; bits per elt setup correctly in byte-r
      (assign ,beg-bitpos (set-type (rotate b-high-dividend byte-r) dtp-fix))
      (assign b-temp (set-type (ldb ,row-length 27. 5 0) dtp-fix))
      (assign bb-a-temp b-temp)
      (mpy-32-32 bb-a-temp b-low-dividend set-b-temp for-effect nil))))

(defmicro set-b-temp (x)
  `(assign b-temp ,x))

(definst %bitblt-decode-arrays no-operand
  ;;See whether the alu operation depends on the source array
  (assign byte-r (32- bbd-alu))
  (parallel
   (assign top-of-stack (a-constant 0))		;the "subscript"
   (if (ldb-bit-test bb-alu-depends-on-source byte-r)
       (sequential
	(parallel
	 (check-arg-type array bbd-s-array dtp-array)
	 (assign vma bbd-s-array)
	 (assign b-vma bbd-s-array)
	 (call array-setup-2d))
	(parallel (assign b-temp bbd-control)
		  (call bbd-bits-per-elt))
	(parallel (assign bbd-s-bits-per-elt (set-type b-temp dtp-fix))
		  (assign byte-r b-temp))
4,887,235
	543	544
	(assign bbd-s-row-length (set-type (rotate bbd-width byte-r) dtp-fix))
	(compute-beg-bitpos s)
	(assign bbd-s-beg-addr (+. bbd-base-pointer b-temp))
	(assign bbd-s-height bbd-height))
     (sequential
      (assign bbd-s-bits-per-elt (set-type (a-constant 1) dtp-fix))
      (assign bbd-s-row-length (set-type (a-constant 1000000) dtp-fix))
      (assign bbd-s-beg-bitpos (set-type (a-constant 0) dtp-fix))
      (assign bbd-s-beg-addr quote-nil)
      (assign bbd-s-height (set-type (a-constant 1000000) dtp-fix)))))
;; decode the destination array
 (assign top-of-stack (b-constant 0))	;the "subscript
 (parallel
  (check-arg-type array bbd-d-array dtp-array)
  (assign vma bbd-d-array)
  (assign b-vma bbd-d-array)
  (call array-setup-2d))
 (parallel (assign b-temp bbd-control)
	   (assign bbd-event-count bbd-control)
	   (call bbd-bits-per-elt))
 (parallel (assign bbd-d-bits-per-elt (set-type b-temp dtp-fix))
	   (assign byte-r b-temp))
 (assign bbd-d-row-length (set-type (rotate bbd-width byte-r) dtp-fix))
 (compute-beg-bitpos d)
 (assign bbd-d-beg-addr (+ bbd-base-pointer b-temp))
 (assign bbd-d-height bbd-height)
 ;; Now copy resUlts down over arguments and array-sotup-2d work are:
 (assign b-temp frame-pointer)
 (assign frame-pointer (+ stack-pointer (b-constant 4)))
 (assign b-temp-2 (+ stack-pointer (b-constant 15.)))
 (parallel
  (assign stack-pointer (- stack-pointer (b-constant 3)))
  (call blt-stack))
 (parallel
  (assign framm-pointer b-temp)
  (assign top-of-stack top-of-stack-a)
  (next-instruction)))

;;take an array-register control word in L-temp, return a decoding of its
;;dispatch type in b-temp.
(defucode bbd-bits-per-elt
  (dispatch-after-this (array-register-dispatch-field b-temp)
		       (nop)
    ((%array-register-dispatch-1-bit)
     (parallel (assign b-temp (set-type (b-constant 0) dtp-fix)) (return)))
    ((%array-register-dispatch-2-bit)
     (parallel (assign b-temp (set-type (b-constant 1) dtp-fix)) (return)))
    ((%array-register-dispatch-4-bit)
     (parallel (assign b-temp (set-type (b-constant 2) dtp-fix)) (return)))
    ((%array-register-dispatch-8-bit)
     (parallel (assign b-temp (set-type (b-constant 3) dtp-fix)) (return)))
    ((%array-register-dispatch-16-bit)
     (parallel (assign b-temp (set-type (b-constant 4) dtp-fix)) (return)))
    ((%array-register-dispatch-word)
     (parallel (assign b-temp (set-type (b-constant 5) dtp-fix)) (return)))
    (otherwise (signal-error unimplemented-or-illegal-array-type))))

;;; -*- Mode:LISP; Package:Micro: Base:8; Lowercase: T -*-
;;; (c) copyright 1982, Symbolics, Inc.

;;; Binding stack stuff

;Address operand: special variable value cell
;Stack operand: value to bind it to
(definst bind-specvar indirect-operand
  (assign vma (- frame-function macro-unsigned-immediate 1))
  (parallel (start-memory read)
	    (assign b-temp (1+ %binding-stack-pointer)))
  (error-if (greater-pointer b-temp %binding-stack-limit)
	    bind-stack-overflow)
  (parallel (transport)					;Pick up pointer to value cell
	    (assign vma memory-data)
	    (jump bind-top-of-stack)))

;First arg: locative to cell to bind
;Second arg: value to bind it to
(definst bind-locative no-operand
  (assign b-temp (1+ %binding-stack-pointer))
  (error-if (greater-pointer b-temp %binding-stack-limit)
	    bind-stack-overflow)
  (parallel (check-data-type next-on-stack dtp-locative)
	    (assign vma next-on-stack)
	    (call bind-top-of-stack))
  (parallel (for-effect (popval))
	    (next-instruction)))

;;; Stack overflow must have been checked by hers, and b-temp has (1... Ibinding-stack-pointer)
;;; vma has locative to bound cell
;;; new-value will be popped off the stack
(defucode bind-top-of-stack
4,887,235
	545	546
	(parallel (start-memory read)		;read previous value
		  (if (bit frame-bindings-bit)	;a-temp acts eventual second binding word
		      (parallel (assign a-temp (set-cdr (set-type vma dtp-locative) 1))
				(jump bind-top-of-stack-1))
		      (parallel (assign a-temp (set-cdr (set-type vma dtp-locative) 0))
				(jump bind-top-of-stack-1)))))

(defucode bind-top-of-stack-1
  (parallel (declare-memory-timing data-cycle)
	    (transport bind)			;transport previous value
	    (assign a-temp-2 memory-data)
	    (assign b-temp-3 memory-data))
  (parallel (assign b-temp-2 vma)		;b-temp-2 -> value cell
	    (assign vma b-temp))		;vma -> binding stack
  (store-contents a-temp-2 block)		;write to binding stack
  (store-contents a-temp block)
  (parallel
   (assign top-of-stack next-on-stack)		;pop stack
   (decrement-stack-pointer)
   (assign vma b-temp-2))			;write new value into value cell
  (store-contents (amem (stack-pointer 1)) (cdr b-temp-3))	 ;preserving call's cdr code
  (assign frame-bindings-bit (b-constant 1))	;finalize binding (cant pclsr any more)
  (parallel (assign %binding-stack-pointer (+ %binding-stack-pointer (b-constant 2)))
	    (next-instruction)))

;Called by funcall-instance-binding-loop (and closure processing if that were in microcode)
(defucode bind-top-of-stack-closure
  (assign b-temp (1+ %binding-stack-pointer))
  (error-if (greater-pointer b-temp %binding-stack-limit)
	    bind-stack-overflow)
  (parallel (start-memory read)			;read previous value
	    (if (bit frame-bindings-bit)	;a-temp gets eventual second binding word
		(parallel (assign a-temp (set-cdr (set-type vma dtp-locative) 3))
			  (jump bind-top-of-stack-1))
	        (parallel (assign a-temp (set-cdr (set-type vma dtp-locative) 2))
			  (jump bind-top-of-stack-1)))))

(defmicro more-bindings-flag (opnd)	;low bit of cdr field
  `(parallel ,(get-to-abus opnd)
	     (ldb ybus-crocks-1 1 14.)))

;;; 0) Verify stack level
;;; 1) Pop locative
;;; 2) Pop old value
;;; 3) Transport-bind the current-value and write old-value
;;; returns locative in a-temp-2 so that you can check cdr-code
;;; must preserve b-temp
(defmicro call-unbind-1 (&optional return)
  `(parallel (assign vma %binding-stack-pointer)
	     (assign b-temp-2 %binding-stack-pointer)
	     ,(if return `(call-and-return-to unbind-1 ,return)
			 `(call unbind-1))))

(defucode unbind-1
  (parallel (start-memory read)
	    (error-if (greater-pointer %binding-stack-low b-temp-2)
		      bind-stack-underflow))
  (error-if (not (bit frame-bindings-bit)) unbind-too-many)
  (parallel (transport)
	    (assign a-temp-2 memory-data))	;a-temp-2 gets locative to value cell
  (memread (1- %binding-stack-pointer))
  (parallel (transport bind)
	    (assign a-temp memory-data))	;a-temp gets old value (or evcp or null)
  (memread a-temp-2)
  (parallel (transport bind-write)		;Follow forwards but no EVCPs
	    (assign b-temp-2 memory-data))
  (store-contents a-temp (cdr b-temp-2))	;Store back old value, preserving cells cdr
  (if (not (bit (more-bindings-flag a-temp-2))) ;Now finalize (cannot pclsr any core)
      (assign frame-bindings-bit (b-constant 0))
    (drop-through))
  (parallel (assign %binding-stack-pointer (- %binding-stack-pointer (b-constant 2)))
	    (return)))

(definst unbind-n unsigned-immediate-operand
  (if (not (bit first-part-done))
      (sequential
       (pushval (set-type (1- macro-unsigned-immediate) dtp-fix))
       (parallel (assign first-part-done (b-constant 1))
		 (clear-stack-adjustment)
		 (jump urbind-n-loop)))
    (goto unbind-n-loop)))

(defucode unbind-n-loop
  (call-unbind-1)
  (parallel
   (assign top-of-stack-a (1- top-of-stack-a))
   (assign top-of-stack obus)
   (if (minus-fixnum obus)
       (parallel
	(assign first-part-done (b-constant 0))
	(decrement-stack-pointer)
4,887,235
	547	548
	(jump fixup-tos))
	(goto unbind-n-loop))))

(defucode frame-cleanup-bind-stack-unwind
  (if (bit frame-bindings-bit)
	   (call-unbind-1 frame-cleanup-bind-stack-unwind)
	   (return)))

(defucode pop-binding-stack-to-b-temp
  (if (equal-pointer %binding-stack-pointer b-temp)
      (return)
    (call-unbind-1 pop-binding-stack-to-b-temp)))

(definst %save-binding-stack-level no-operand
  (pushval %binding-stack-pointer))

;If you want to save one control-memory location, make this "smashes-stack"
;and recompile all Lisp code.
(definst %restore-binding-stack-level no-operand
  (parallel (check-data-type top-of-stack-a dtp-locative)
	    (assign b-temp top-of-stack-a))
  (parallel (for-effect (popval))
	    (jump pop-binding-stack-to-b-temp)))

;;; -*- Mode:Lisp; Package:Micro; Base:8; Lowercase:yes -*-
;;; (c) Copyright 1982, Symbolics, Inc.

; Microcode definitions for the most basic instructions

;Get defmicro and all his hosts
#M
(declare (cond ((not (status feature lmucode))
		(load 'udcls))))


;;; Some sinple instructions

(definst1 push-immed signed-immediate-operand
  (pushval (set-type macro-signed-immediate dtp-fix)))

(definst1 push-local address-operand
  (pushval address-operand))

(definst push-address-local address-operand
  (if (bit-test (a-constant 1_7) macro-signed-immediate)
      ;Stack-relative
      (parallel (pushval (set-type (+ stack-pointer macro-signed-immediate 1)
				   dtp-locative))
		(next-instruction))
      ;Frame-relative
      (parallel (pushval (set-type (+ frame-pointer macro-signed-immediate)
				   dtp-locative))
		(next-instruction))))

;There is a multiple group at the top of the stack, and its size
;needs to get added to our operand. We then go that deep in the
;stack and retrieve a word.
(definst push-from-beyond-multiple unsigned-immediate-operand
  (assign b-temp (+ top-of-stack-a macro-unsigned-immediate 1))
  (assign xbas (- stack-pointer b-temp))
  (parallel (pushval (amem (xbas 0)))
	    (next-instruction)))

;Access the constant as memory, even though it is stored in A-memory, because
;there tends to be an invisible pointer there.
(definst push-microcode-escape-constant unsigned-immediate-operand
  (parallel
   (assign vma (+ (a-constant (+ (get 'microcode-escape-constants 'a-memory-block-address)
				 (get 'a-memory-virtual-address 'sysconstant)))
		  macro-unsigned-immediate))
   (jump pushmem)))

(definst1 pop-local (address-operand needs-stack)
  (assign address-operand (popval)))

(definst1 movem-local (address-operand needs-stack)
  (assign address-operand top-of-stack))

(definst1 ldb-immed 10-bit-immediate-operand
  (check-fixnum-1arg-a top-of-stack-a
    (otherwise (take-post-trap ldb-escape preserve-stack)))
  (newtop (set-type (ldb top-of-stack-a macro macro) dtp-fix)))

(definst1 dpb-immed (10-bit-immediate-operand needs-stack)
  (check-fixnum-2args next-on-stack top-of-stack
    (otherwise (take-post-trap dpb-escape preserve-stack)))
  (pop2push (set-type (dpb next-on-stack macro macro top-of-stack) dtp-fix)))

(definst lsh-stack (no-operand needs-stack)
  (parallel
   (check-fixnum-2args next-on-stack top-of-stack)
4,887,235
	549	550
   (if (minus-fixnum top-of-stack)
       ;Shift right by LDBing
       (parallel
	(assign byte-s (+ (a-constant 37) top-of-stack))	;Bytssize-1
	(if (minus-fixnum obus)
	    ;Shifted away--result is zero
	    (parallel (pop2push (set-type (a-constant 0) dtp-fix))
		      (next-instruction))
	  (sequential
	   (assign byte-r (+ (a-constant 37) top-of-stack 1))	;Rotate
	   (parallel
	    (pop2push (set-type (ldb next-on-stack byte-s byte-r)
				dtp-fix))
	    (next-instruction)))))
     ;Shift left by DPBing
     (parallel
      (assign byte-s (- (a-constant 37) top-of-stack))		;Bytesize-1
      (if (minus-fixnum obus)
	  ;Shifted away--result is zero
	  (parallel (pop2push (set-type (a-constant 0) dtp-fix))
		    (next-instruction))
	(sequential
	 (assign byte-r top-of-stack)			;Rotate
	 (parallel
	  (pop2push (set-type (dpb next-on-stack byte-s byte-r 0)
			      dtp-fix))
	  (next-instruction))))))))

(definst rot-stack (no-operand needs-stack)
  (assign byte-r top-of-stack)		;Truncates to 5 bits
  (parallel
   (check-fixnum-2args next-on-stack top-of-stack)
   (pop2push (set-type (rotate next-on-stack byte-r) dtp-fix))
   (next-instruction)))

;;; Memory reference instructions

;Put something in vma and jump here. This pushes the contents of memory
;as the result of the instruction.
(defucode pushmem
  (start-memory read)
  (nop)
  (parallel (transport)
	    (pushval memory-data)
	    (next-instruction)))

;Put something in vma and jump here. This puts the contents of memory
;on the tcp of the stack (replacing an operand).
(defucode newtopmem
  (start-memory read)
  (nop)
  (parallel (transport)
	    (newtop memory-data)
	    (next-instruction)))

;Put something in VMA and jump here. This pushes the contents of the location
;printed to by that ocat ion.
(defucode pushmemind
  (start-memory read)
  (nop)
  (parallel (transport)
	    (assign vma memory-data)
	    (jump pushmem)))

;Put address in vma and jump here. Top of stack is popped and stored into
;that memory location, leaving the locations cdr code unchanged.
;Touch memory-data only once, for the sake of the temporary memory control.
(defucode popmem
  (parallel (start-memory read)		;Read in case of invz. store-data to B side
	    (assign b-temp top-of-stack-a))
  (for-effect (popval))			;Pop stack, adjust top-of-stack register
  (parallel (transport write)		;Follow any forwarding pointer
	    (assign a-temp		;Merge new data with old cdr code
		    (merge-cdr b-temp memory-data)))
  (parallel (store-contents a-temp)	;Now write back the new car
	    (next-instruction)))

;indirect version of popmem
(defucode popmemind
  (start-memory read)
  (nop)
  (parallel (transport)
	    (assign vma memory-data)
	    (jump popmem)))


(definst push-constant constant-operand
  (parallel (assign vma (- frame-function macro-unsigned-immediate 1))
	    (jump pushmem)))

(definst push-indirect indirect-operand
4,887,235
	551	552
  (parallel (assign vma (- frame-function macro-unsigned-immediate 1))
	    (jump pushmemind)))

(definst pop-indirect (indirect-operand needs-stack)
  (parallel (assign vma (- frame-function macro-unsigned-immediate 1))
	    (jump popmemind)))

(definst movem-indirect (indirect-operand needs-stack)
  (parallel (pushval top-of-stack)
	    (jump pop-indirect)))

;;; List Processing

	;This is the format-3 version, others will exist, too.
(definst car no-operand
  (parallel (check-data-type top-of-stack-a dtp-list dtp-locative dtp-nil)
	    (assign vma top-of-stack-a)
	    (if (data-type? top-of-stack-a dtp-nil)
		(parallel (newtop quote-nil) (next-instruction))
	      (goto newtopmem))))

;Note that this assumes that the storage allocator does not allow
;a 2-word cons to lie across a page boundary. (Or the MC does hair????---)
(definst cdr no-operand
  (parallel
   (check-data-type top-of-stack-a dtp-list dtp-locative dtp-nil)	;[1]
   (assign vma to-of-stack-a)
   (if (data-type? top-of-stack-a dtp-nil)
       (parallel (newtop quote-nil) (next-instruction))			;[2]
       (sequential
	 (start-memory read)						;[2]
	 (if (data-type? top-of-stack-a dtp-locative)			;[3]
	     (parallel (transport)					;[4]
		       (newtop memory-data)
		       (next-instruction))
	     (parallel
	      (transport cdr)						;[4]
	      ;Cant do this with temporary memory control
	      ;(increment-pma)
	      (if (cdr-code? memory-data cdr-next)
		  (parallel
		   (newtop (set-type (1+ vma) dtp-list))		;[5]
		   (next-instruction))
		  (parallel
		   (assign vma (1+ vma))				;[5]
		   (take-dispatch)))
	      (dispatch-after-next (cdr-code memory-data)
	        ((cdr-nil) (parallel (newtop quote-nil)			;[6]
				     (next-instruction)))
		((cdr-normal)
		 ;Extra code inserted for temporary memory control
		 (start-memory read)	;vma has been incremented
		 (nop)
		 ;End extra code
		 (parallel (transport)
			   (newtop memory-data)
			   (next-instruction)))
		(otherwise (signal-error bad-cdr-code)))))))))

;Cdr timings:
;  cdr of nil			2 cycles
;  cdr of locative		4 cycles
;  cdr of list, cdr-next	5 cycles
;  cdr of list, cdr-nil		6 cycles
;  car of list, cdr-normal	6 cycles
;This is about as fast as it can go without using a 4-way skip,
;which would make all the list cases 5 cycles.


;This version returns no value. Rather than provide versions that
;return one or the other of the arguments, we will just let the
;compiler worry about it.
(definst rplaca no-operand		;format 3
  (parallel (check-data-type next-on-stack dtp-list dtp-locative)
	    (assign vma next-on-stack)
	    (jump rplaca1)))

(defucode rplaca1
  (parallel (start-memory read)
	    (assign b-temp top-of-stack-a)
	    (decrement-stack-pointer))
  (for-effect (popval))			;Adjust stack during memory wait
  (parallel (transport write)		;Follow	forwarding pointer
	    (assign a-temp		;Merge new data with old cdr code
		    (merge-cdr b-temp memory-data)))
  (parallel (store-contents a-temp)	;Now write back the new car
	    (next-instruction)))

(definst rplacd no-operand
  (parallel (check-data-type next-on-stack dtp-list dtp-locative)
	    (assign vma next-on-stack)
	    (if (data-type? next-on-stack dtp-locative)
