4,887,235
	333	334

(defucode ubitblt-long-row-both-backwards
  (parallel
   (assign b-temp bb-d-bitpos)
   (if (zero-fixnum bb-d-bitpos)
       (if (zero-fixnum bb-s-bitpos)
	   (parallel
	     (assign bb-s-offset (1+ bb-s-offset))	;loop will decr first before pclsr
	     (lisp (trace-path #/a))
	     (jump ubitblt-aligned-row-both-backwards))
	 (parallel-with-s-access bb-s-offset
	   (assign byte-r (32- bb-s-bitpos))
	   (parallel
	    (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r)))
	    (lisp (trace-path #/c))
	    (jump ubitblt-d-aligned-row-both-backwards))))
     (if (equal-fixnum b-temp bb-s-bitpos)
	 (sequential
	  (parallel-with-s-access bb-s-offset
	    (assign byte-s (1- bb-s-bitpos))
	    (assign byte-r (b-constant 0))
	    (assign bb-s-word (logxor bb-constant memory-data)))
	  (parallel
	   (assign-vma-offset 0)
	   (call bb-byte-alu-operation-dispatch))
	  (assign bb-width (- bb-width b-temp))
	  (assign bb-s-bitpos (b-constant 0))
	  (assign bb-d-bitpos (b-constant 0))
	  (parallel
	    (decr-d-offset)
	    (lisp (trace-path #/b))
	    (jump ubitblt-aligned-row-both-backwards)))
       (if (greater-fixnum bb-s-bitpos b-temp)	;s > d, enough in first word
	   (sequential
	     (parallel-with-s-access bb-s-offset
	       (parallel
		 (assign byte-r (- b-temp bb-s-bitpos))
		 (assign a-temp obus))	;this is negative
	       (assign byte-s (1- bb-d-bitpos))
	       (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r))))
	     (assign byte-r (b-constant 0))
	     (parallel
	       (assign-vma-offset d)
	       (call bb-byte-alu-operation-dispatch))
	     (assign bb-s-bitpos (- bb-s-bitpos b-temp))
	     (assign bb-d-bitpos (b-constant 0))
	     (assign bb-width (- bb-width b-temp))
	     (parallel
	       (decr-d-offset)
	       (lisp (trace-path #/d))
	       (jump ubitblt-d-aligned-row-both-backwards)))
	   (sequential				;s<d, need to fetch another word
	     (parallel-with-s-access bb-s-offset
	       (assign byte-r (- b-temp bb-s-bitpos))
	       (assign bb-s-word (logxor bb-constant (rotate memory-data byte-r))))
	     (decr-wrap-s-offset-ahead)
	     (parallel-with-s-access bb-s-offset-ahead
	       (assign a-temp (- b-temp bb-s-bitpos))
	       (assign byte-s (1- a-temp))
	       (assign bb-s-word2 (logxor bb-constant memory-data)))
	     (assign bb-s-word (ldb bb-s-word2 byte-s byte-r bb-s-word))
	     (assign byte-s (1- bb-d-bitpos))
	     (assign byte-r (b-constant 0))
	     (parallel
	       (assign-vma-offset d)
	       (call bb-byte-alu-operation-dispatch))
	     (parallel
	       (assign a-temp (- b-temp bb-s-bitpos))
	       (assign byte-r obus))
	     (assign bb-s-word (rotate bb-s-word2 byte-r))
	     (assign bb-s-bitpos (32- a-temp))
	     (assign bb-s-offset bb-s-offset-ahead)
	     (assign bb-d-bitpos (b-constant 0))
	     (assign bb-width (- bb-width b-temp))
	     (parallel
	       (decr-d-offset)
	       (lisp (trace-path #/e))
	       (jump ubitblt-d-aligned-row-both-backwards))))))))

;bb-s-offset is 1+ its "real" value
(defucode ubitblt-aligned-row-both-backwards 	;11 cycles per word
  (decr-wrap-s-offset)				;1
  (parallel-with-s-access bb-s-offset		;4
    (trap-if (lesser-fixnum bb-width (b-constant 32.))
	     ubitblt-aligned-row-both-backwards-done)
    (waiting-for-memory)
    (assign bb-s-word (logxor bb-constant memory-data)))
  (parallel					;1+3
   (assign-vma-offset d)
   (call bb-word-alu-operation-dispatch))
4,887,235
	335	336
  (assign bb-width (- bb-width (b-constant 32.))) ;1
  (parallel					;1
   (decr-d-offset)
   (lisp (trace-path #/,))
   (jump ubitblt-aligned-row-both-backwards)))

(defucode ubitblt-aligned-row-both-backwards-done
  (if (plus-fixnum bb-width)
      (sequential
       (parallel-with-s-access bb-s-offset
	 (assign byte-s (1- bb-width))
	 (assign byte-r bb-width)
	 (assign bb-s-word (logxor bb-constant (ldb memory-data byte-s byte-r))))
       (assign byte-r (32- bb-width))
       (parallel
	 (assign-vma-offset d)
	 (lisp (trace-path #/2))
	 (jump bb-byte-alu-operation-dispatch)))	;jcall
    (parallel-with-return
      (lisp (trace-path #/1)))))


F:>lmach>ucode>nBITBLT.LISP.22


;bb-s-offset is 1+ its "real" value
;bb-s-word has the previous word, rotated and xored
(defucode ubitblt-d-aligned-row-both-backwards	;14 cycles per word
  (decr-wrap-s-offset)				;1 cycles
  (parallel-with-s-access bb-s-offset		;4 cyclcs
    (trap-if (lesser-fixnum bb-width (b-constant 32.))
	     ubitblt-d-aligned-row-both-backwards-done)
    (assign bye-r (32- bb-s-bitpos))
    (assign bb-s-word2 (logxor bb-constant memory-data)))
  (assign byte-s (31- bb-s-bitpos))		;1
  (assign bb-s-word (ldb bb-s-word2 byte-s byte-r bb-s-word)) ;1 cycle
  (parallel					;1+3 cycles
    (assign-vma-offset d)
    (call bb-word-alu-operation-dispatch))
  (assign bb-s-word (rotate bb-s-word2 byte-r))	;1
  (assign bb-width (- bb-width (b-constant 32.))) ;1
  (parallel
    (decr-d-offset)
    (lisp (trace-path #/.))
    (jump ubitblt-d-aligned-row-both-backwards)))

(defucode ubitblt-d-aligned-row-both-backwards-done
  (parallel
    (assign bb-width-b bb-width)
    (if (plus-fixnum bb-width)
	(if (greater-or-equal-fixnum bb-s-bitpos bb-width-b)
	    (sequential
	      (assign byte-r bb-width)
	      (assign bb-s-word (rotate bb-s-word byte-r))
	      (assign byte-s (1- bb-width))
	      (assign byte-r (32- bb-width))
	      (parallel
	        (assign-vma-offset d)
		(lisp (trace-path #/4))
	        (jump bb-byte-alu-operation-dispatch))) ;jcall
            (sequential
	      (parallel-with-s-access bb-s-offset
	        (assign byte-r bb-width)
		(assign bb-s-word (rotate bb-s-word byte-r))
		(assign bb-s-word2 (logxor bb-constant memory-data)))
	      (parallel
	        (assign byte-r (- bb-width-b bb-s-bitpos))
		(assign a-temp obus))
	     (assign byte-s (1- a-temp))
	     (assign bb-s-word (ldb bb-s-word2 byte-s byte-r bb-s-word))
	     (assign byte-s (1- bb-width))
	     (assign byte-r (32- bb-width))
	     (parallel
	       (assign-vma-offset d)
	       (lisp (trace-path #/5))
	       (jump bb-byte-alu-operation-dispatch))))		;jcall
    (parallel-with-return
     (lisp (trace-path #/3))))))


;;code for %decode-bitblt-arrays
;;Take alu from-arrzy to-arr2t4
;;Return (s-beg-addr s-beg-bitpos s-row-length s-height s-bits-per-elt
;;	  d-beg-addr d-beg-bitpos d-row-length d-height d-bits-per-elt
;;	  array-reg-event-count)
4,887,235
	337	338

;;arss
(defatomicro bbd-alu (amem (stack-pointer -2)))
(defatomicro bbd-s-array (amem (stack-pointer -1)))
(defatomicro bbd-d-array top-of-stack-a)

;; 4 slots for array-setup-2d to return its results
(defatomicro bbd-control (amem (stack-pointer 1)))
(defatomicro bbd-base-pointer (amem (stack-pointer 2)))
(defatomicro bbd-width (amem (stack-pointer 3)))
(defatomicro bbd-height (amem (stack-pointer 4)))

(defatomicro bbd-s-beg-addr	(amem (stack-pointer 5)))
(defatomicro bbd-s-beg-bitpos	(amem (stack-pointer 6)))
(defatomicro bbd-s-row-length	(amem (stack-pointer 7)))
(defatomicro bbd-s-height	(amem (stack-pointer 8)))
(defatomicro bbd-s-bits-per-elt	(amem (stack-pointer 9.)))
(defatomicro bbd-d-beg-addr	(amem (stack-pointer 10.)))
(defatomicro bbd-d-beg-bitpos	(amem (stack-pointer 11.)))
(defatomicro bbd-d-row-length	(amem (stack-pointer 12.)))
(defatomicro bbd-d-height	(amem (stack-pointer 13.)))
(defatomicro bbd-d-bits-per-elt	(amem (stack-pointer 14.)))
(defatomicro bbd-event-count	(amem (stack-pointer 15.)))

(defatomicro bb-alu-depends-on-source
  	     (b-constant #.(loop for alu in '( 5 10.	;source
					      ;3 12.	;dest
					      ;0 15.	;neither
					      1 2 4 6 7 8. 9. 11. 13. 14.	;both
				 sum (ash 1 alu))))

(defmicro compute-beg-bitpos (for-what)
  (let ((beg-bitpos (selectq for-what
			     (s 'bbd-s-beg-bitpos)
			     (d 'bbd-d-beg-bitpos)
			     (otherwise (ferror "What is ~S" for-what))))
	(row-length (selectq for-what
			     (s 'bbd-s-row-length)
			     (d 'bbd-d-row-length)
			     (otherwise (ferror "What is ~S" for-what)))))
    '(sequential
       (assign b-low-dividend top-of-stack)
       (assign a-positive-divisor bbd-width)
       (parallel
	 (assign b-high-dividend (a-constant 0))
	 (assign a-divide-step-count (b-constant 15.)))
       (parallel
	 (assign a-negative-divisor (- a-positive-divisor))
	 (call divide-subroutine))
       ;; bits per elt setup correctly in byte-r
       (assign ,beg-bitpos (set-type (rotate b-high-dividend byte-r) dtp-fix))
       (assign b-temp (set-type (ldb ,row-length 27. 5 0) dtp-fix))
       (assign bb-a-temp b-temp)
       (mpy-32-32 bb-a-temp b-low-dividend set-b-temp for-effect nil))))

(defmicro set-b-temp (x)
  '(assign b-temp ,x))

(definst %bitblt-decode-arrays no-operand
  ;;See whether the alu operation depends on the source array
  (assign byte-r (32- bbd-alu))
  (parallel
    (assign top-of-stack (a-constant 0))	;the "subscript"
    (if (ldb-bit-test bb-alu-depends-on-source byte-r)
	(sequential
	  (parallel
	    (check-arg-type array bbd-s-array dtp-array)
	    (assign vma bbd-s-array)
	    (assign b-vma bbd-s-array)
	    (call array-setup-2d))
	  (parallel (assign b-temp bbd-control)
		    (call bbd-bits-per-elt))
	  (parallel (assign bbd-s-bits-per-elt (set-type b-temp dtp-fix))
		    (assign byte-r b-temp))
	  (assign bbd-s-row-length (set-type (rotate bbd-width byte-r) dtp-fix))
	  (compute-beg-bitpos s)
	  (assign bbd-s-beg-addr (+ bbd-base-pointer b-temp))
	  (assign bbd-s-height bbd-height))
      (sequential
        (assign bbd-s-bits-per-elt (set-type (a-constant 1) dtp-fix))
	(assign bbd-s-row-length (set-type (a-constant 10000000) dtp-fix))
	(assign bbd-s-beg-bitpos (set-type (a-constant 0) dtp-fix))
	(assign bbd-s-beg-addr quote-nil)
	(assign bbd-s-height (set-type (a-constant 1000000) dtp-fix)))))
  ;; decode the destination array
  (assign top-of-stack (b-constant 0))	;the "subscript"
  (parallel
    (check-arg-type array bbd-d-array dtp-array)
    (assign vma bbd-d-array)
    (assign b-vma bbd-d-array)
    (call array-setup-2d))
4,887,235
	339	340
    (parallel (assign b-temp bbd-control)
	      (assign bbd-event-count bbd-control)
	      (call bbd-bits-per-elt))
    (parallel (assign bbd-d-bits-per-elt (set-type b-temp dtp-fix))
	      (assign tyte-r b-temp))
    (assign bbd-d-row-length (set-type (rotate bbd-width byte-r) dtp-fix))
    (compute-beg-bitpos d)
    (assign bbd-d-beg-addr (+ bbd-base-pointer b-temp))
    (assign bbd-d-height bbd-height)
    ;; Now copy results down over arguments and array-setup-2d work area
    (assign b-temp frame-pointer)
    (assign frame-pointer (+ stack-pointer (b-constant 4)))
    (assign b-temp-2 (+ stack-pointer (b-constant 15.)))
    (parallel
      (assign stack-pointer (- stack-pointer (b-constant 3)))
      (call bit-stack))
    (parallel
      (assign frame-pointer b-temp)
      (assign top-of-stack top-of-stack-a)
      (next-instruction)))

;;take an array-register control word in b-temp. return a decoding of its
;;dispatch type in b-temp.
(defucode bbd-bits-per-elt
  (dispatch-after-this (array-register-dispatch-field b-temp)
		       (nop)
		       ((%array-register-dispatch-1-bit)
			(parallel (assign b-temp (set-type (b-constant 0) dtp-fix)) (return)))
		       ((%array-register-dispatch-2-bit)
			(parallel (assign b-temp (set-type (b-constant 1) dtp-fix)) (return)))
		       ((%array-register-dispatch-4-bit)
			(parallel (assign b-temp (set-type (b-constant 2) dtp-fix)) (return)))
		       ((%array-register-dispatch-8-bit)
			(parallel (assign b-temp (set-type (b-constant 3) dtp-fix)) (return)))
		       ((%array-register-dispatch-16-bit)
			(parallel (assign b-temp (set-type (b-constant 4) dtp-fix)) (return)))
		       ((%array-register-dispatch-word)
			(parallel (assign b-temp (set-type (b-constant 5) dtp-fix)) (return)))
		       (otherwise (signal-error unimplemented-or-illegal-array-type))))


F:>lmach>ucode>multiply.lisp.32

;;; -*- Mode:Lisp; Package:Micro; Base:8; Lowercase:yes -*-
;;; (c) Copyright 1982, Symbolics, Inc.

;Microcode for the multiplier

;Get defmicro and all his hosts
#M
(declare (cond ((not (status feature lmucode))
		(load 'udcls))))

;The following microcode-controllable signals exist:
;	x-twos-complement
;	y-twos-complement
;	x-clk-enable
;	y-clk-enable
;	lsp & msp output-enable (select mpy as Xbus source)
;
;	msp-clk and lsp-clk happen every cycle
;	feed-through is aiwaus off
;	right-shift is always on
;	round is always off

;:MPY-PRODUCT is a source (on Xtus)
;MPY-X, MPY-X-SIGNED, MPY-Y, MPY-Y-SIGNED are destinations
; Note that the X destinations get the low halfword and the
; Y destinations get the high halfword.
;These destinations are implemented by the micros WRITE-MPY-X
; and WRITE-MPY-Y-FROM-HIGG. which take an optional SIGNED flag.
;Special skips needed:
; ALU-CARRY (out of bit 31. into non-existent bit 32)

;The basic low-level multiply subroutine, as a micro so that the
;locations of the two fixnum arguments and the two fixnum results
;may be varied. No error checking is included.
;The a-source and b-source arguments are the arguments.

;Store-low-product and store-high-product are routines to dispose
; of the results.
;finally is stuff to do in parallel with the last cycle, which
; appears in 4 different copies.

4,887,235
	341	342
;Execution time is 9 cycles in most common case, sometimes 12 cycles.
;Usage of temporaries: (not optimized to minimize number of temporaries!)
;	a-temp		A swapped (AH)
;	a-temp-2	AL x BH then ALxBH 4. AH+BL
;	b-temp		AH x BL
;	b-temp-2	B swapped (BL)
;	b-temp-3	AH x BH

(defmicro mpy-32-32 (a-source b-source
		     store-low-product store-high-product finally)
  '(sequential
     (assign a-temp (ldb ,a-source 16. 16.))
     (parallel (write-mpy-x a-temp signed) 		;AH
	       (write-mpy-y-from-high ,b-source signed) ;BH
	       (assign b-temp-2 (dpb ,b-source 16. 16. 0)))
     (parallel (assign b-temp-3 mpy-product) 		;AHxBH
	       (write-mpy-y-from-high b-temp-2))	;BL
     (parallel (assign b-temp mpy-product)		;AHXBL
	       (if (minus-fixnum mpy-product)
		   (assign b-temp-3 (- b-temp-3 (a-constant 1_16.)))
		   (drop-through)))
     (parallel (write-mpy-x ,a-source)	;AL
	       (write-mpy-y-from-high ,b-source signed)) ;BH
     (parallel (assign a-temp-2 mpy-product) 		;ALxBH
	       (write-mpy-y-from-high b-temp-2) 	;BL
	       (if (minus-fixnum mpy-product)
		   (assign b-temp-3 (- b-temp-3 (a-constant 1_16.)))
		 (drop-through)))
     (parallel (assign a-temp-2 (+ b-temp a-temp-2))
	       (if alu-carry
		   (assign b-temp-3 (+ b-temp-3 (a-constant 1_16.)))
		   (drop-through)))
     (parallel
	(,store-low-product			;Low Product
	 (set-type (+ mpy-product (dpb a-temp-2 16. 16. 0)) dtp-fix))
	(if alu-carry
	    (parallel
	        (,store-high-product
		        (set-type (+ b-temp-3 (ldb a-temp-2 16. 16.) 1) dtp-fix))
		,finally)
	  (parallel
	      (,store-high-product
	                (set-type (+ b-temp-3 (ldb a-temp-2 16. 16.)) dtp-fix))
	      ,finally)))))

;Multiplication of a 32-bit number by a 16-bit number. (4 cycles)
(defmicro mpy-32-16 (32-bit-number 16-bit-number
		     store-low-product store-high-product finally)
  '(sequential
    (parallel (write-mpy-x ,16-bit-number signed) 	;B
	      (write-mpy-y-from-high ,32-bit-number signed)	;AH
	      (assign b-temp (dpb ,32-bit-number 16. 16. 0)))
    (parallel
      (assign b-temp mpy-product)			;AH x B
      (write-mpy-y-from-high b-temp)			;AL
      (if (plus-or-zero-fixnum mpy-product)
	  (parallel
	   (,store-low-product
	      (set-type (+ mpy-product (dpb b-temp 16. 16. 0)) dtp-fix))
	   (if alu-carry
	       (parallel
		 (,store-high-product
		       (set-type (1+ (ldb b-temp 16. 16.)) dtp-fix))
		 ,finally)
	       (parallel
		(,store-high-product
		       (set-type (ldb b-temp 16. 16.) dtp-fix))
		,finally)))
	 (parallel
	   (,store-low-product
	      (set-type (+ mpy-product (dpb b-temp 16. 16. 0)) dtp-fix))
	   (if alu-carry
	       (parallel
		 (,store-high-product
		       (set-type (+ (a-constant 177777_16.)
				    (ldb b-temp 16. 16.)
				    I)
				 dtp-fix))
		 ,finally)
	       (parallel
		 (,store-high-product
		       (set-type (+ (a-constant 177777_16.)
				    (ldb b-temp 16. 16.))
				 dtp-fix))
		 ,finally)))))))

4,887,235
	343	344

;;; Arithmetic instructions that use multiplication

(defmicro set-a-temp (x)
  '(assign a-temp ,x))

(defmicro set-next-on-stack (x)
  '(assign next-on-stack ,x))

;Basic fixnum multiply subroutine. No error checking.
;Takes two fixnums on the stack and returns their double-precision
;Product as two fixnums on the stack (low-order recult is pushed first).
(defucode 32-bit-multiply
  (mpy-32-32 next-on-stack top-of-stack
	     set-next-on-stack newtop
	     (return)))

;Instruction version of the above.
(definst multiply-double (no-operand needs-stack)
  (parallel
    (check-fixnum-2args next-on-stack top-of-stack
      (otherwise (signal-error wrong-type-argument any (:fixnum))))
    (jump 32-bit-multiply)))

;Generic number multiplication.
(definst multiply-stack (no-operand needs-stack)
  (parallel
    ;; This cant be check-binary-arithmetic-operands-fast because that needs
    ;; the spec field
    (check-fixnum-2args next-on-stack top-of-stack
      (otherwise (sequential
		   (trap-no-save)
		   (check-binary-arithmetic-operands-fast no-operand %arith-op-multiply
							  multiply-stack fmul))))
    (mpy-32-32 next-on-stack top-of-stack
	       pop2push set-a-temp nil))
  ;Now check for overflow. Having trashed our args we are unpclsrable,
  ;but we can turn into a call-quick-external instruction.
  ;Fortunately the multiplier hardware does SETZ x SETZ correctly.
  ;Overflow occurs if any bits in high word not equal to sign of low word
  (parallel
    (trap-if (not (all-ones (- a-temp (complemented-sign-bit top-of-stack))))
	     multiply-overflow)
    (next-instruction)))

;Generic number multiplication with an immediate argument
(definst multiply-immed signed-immediate-operand
  (parallel 	;Must check both args for fixnum to make magic-number win
    (check-binary-arithmetic-operands-fast signed-immediate-operand %arith-op-multiply
					   multiply-stack fmul)
    (mpy-32-16 top-of-stack-a macro-signed-immediate newtop set-a-temp nil))
  ;Overflow checking
  (parallel
    (trap-if (not (all-ones (- a-temp (complemented-sign-bit top-of-stack))))
	     multiply-overflow)
    (next-instruction)))

;;; Here a-temp is the top word of the overflowed result
;;; What we want to do here is convert the 62 bit result to be distributed 31 bits per
;;; word. Note that the only special case is setz * setz which will give setz in the top
;;: word and 0 in the bottom.
;;; ***	If it is possible do selective deposit, it would be possible to bum a cycle ***
;;; *** Think about this when you have time to breath ***
(defucode multiply-overflow
  (parallel (trap-no-save)
	    (assign b-temp (ldb top-of-stack 1 31.)))
  ;;; Clear sign bit of the bottom word
  (newtop (set-type (ldb top-of-stack 31. 0) dtp-fix))
  ;; Put sign bit of bottom into sign bit of top 31 bits
  (assign a-temp (dpb b-temp 1 31. a-temp))
  ;; Now rotate it into the bottom bit
  (pushval (set-type (rotate a-temp 1) dtp-fix))
  (take-post-trap multiplicative-fixnum-overflow preserve-stack))


F:>lmach>ucode>map.lisp.29

;;; -*- Mode:Lisp; Package:Micro; Base:8; Lowercase:yes -*-
;;; (c) Copyright 1982, Symbolicsm Inc.

;;; Microcode for Map Cache and Page Tags



;Get defmicro and all his hosts
#M
(declare (cond ((not (status feature lmucode))
		(load 'udcls))))
4,887,235
	345	346

;Declared in SYSDF1:
;  %WIRED-VIRTUAL-ADDRESS-HIGH		;Highest address in wired cold load.
;  %WIRED-PHYSICAL-ADDRESS-LOW		;First physmem it is stored into.
;  %WIRED-PHYSICAL-ADDRESS-HIGH		;Last physmem it is stored into.  [not used]

;Do not use any b-temps in this file. as it is best to be able to ignore map
;misses when writing the rest of the microcode.
;b-map-vma must be in the upper 16 B-memory locations to save cycles.

(reserve-scratchpad-memory 2451 2452 375 376)

(defareg a-map-addr)	;Physical address to map (low 8 bits zero)
(defbreg b-map-vma)	;Copy of VMA or temporary

(define-sysconstant %page-pht-miss)
(define-sysconstant %page-write-fault)

;; Don't forget! The map write data come from ABUS!  Not OBUS!
(defmicro write-both-maps (a-source)
  '(parallel ,(get-to-abus a-source)
	     (write-lbus-dev 37 7 nil)
	     (microinstruction speed slow-first-half)))

(defmicro write-lru-map (a-source)
  '(parallel ,(get-to-abus a-source)
	     (write-lbus-dev 37 4 nil)
	     (microinstruction speed slow-first-half)))

(defmicro write-map-a (a-source)
  '(parallel ,(get-to-abus a-source)
	     (write-lbus-dev 37 5 nil)
	     (microinstruction speed slow-first-half)))

(defmicro write-map-b (a-source)
  '(parallel ,(get-to-abus a-source)
	     (write-lbus-dev 37 6 nil)
	     (microinstruction speed slow-first-half)))

;Conditional test valid while writing map
(defatomicro map-load-successful
  (microcondition mc-cond true (microinstruction)))

;Reading page tags
(defmicro page-tag-bit (n)
  (make-microcondition 'not-lbus-dev-cond 'false
		       '(write-lbus-dev 36 .(dpb n 0302 3) nil)))

;0 if miss, non-zero if hit or vma-phys-addr. Bits <33:32> of map read data.
(defatomicro map-select-code
  (parallel (microinstruction abus map speed very-slow)
	    (ldb ybus-crocks-1 2 12.)))

;Write into the gc-map
(defmicro write-gc-map (adr val)
  (paralyze (get-to-abus adr)
	    (get-to-bbus val)
	    '(microinstruction spec load-special-maps magic 1)))

;Clear the map cache and the PHTC
;VM used as a loop counter, called initially with zero in VMA
(defucode clear-map-cache
  ;; Write both maps with -1 (no-match tag)
  (write-both-maps (a-constant -1))
  ;; Mung until no qood
  (assign vma (+ vma (b-constant 1_8)))
  (if (lesser-pointer vma (b-constant 1_20.))
      (goto clear-map-cache)
      (drop-through))
  ;; Make sure PHTC address and size and ASN are corroct
  (write-lbus-dev 37 1 %current-phtc)
  ;; Get lower and upper bounds of PHTC
  (assign a-temp (logand %current-phtc (b-constant -1_16.)))
  (parallel (assign b-temp (dpb (b-constant -1) 12. 0 %current-phtc))
	    (jump clear-phtc)))

(defucode clear-phtc
  (parallel
    (start-memory write physical a-temp)
    (assign memory-data (set-type (b-constant -1) dtp-fix)))
  (assign a-temp (1+ a-temp))
  (if (lesser-or-equal-pointer a-temp b-temp)
      (goto clear-phtc)
    (return)))

;Unmap page whose address is in VMA, from both the map cache and the PHTC
(defucode clear-page-from-map-cache
  ;; Clobber both caps, not bothering to check whether they really map that address
  ;; Could read the map and dispatch on bits <33:32>
  (write-both-maps (a-constant -1))
  (start-esmory read address-phtc)
4,887,235
	347	348

  (assign b-temp (ldb vma 8 20.))	;Extract tag field of VMA
  (if (equal-fixnum b-temp (ldb memory-data 8 24.)) ;Compare against PHTC entry
      (parallel
        (start-memory write address-phtc)
	(assign memory-data (set-type (a-constant -1) dtp-fix))
	(return))
    (return)))

;Unmap page whose address is in b-temp. from both the map cache and the PHTC
(defucode clear-b-temp-page-from-map-cache
  (parallel (assign vma b-temp)
	    (jump clear-page-from-map-cache)))

;Channe map cache and PHTC to map page in VMA into corresponding stack buffer 0 page
(defucode map-page-to-stack-buffer
  (assign a-temp (logand vma (b-constant 3_8)))		;Stack buffer page
  (assign a-temp (logior a-temp (b-constant 177760_8)))	;Physical address
  (assign b-temp (logand (rotate vma 4) (b-constant 377_24.))) ;VMA tag
  (parallel
    (start-memory write address-phtc)			;Write PHTC with value to go in map
    (assign memory-data (set-type (logior a-temp b-temp) dtp-fix)))
  (dispatch-after-this map-select-code			;See if map needs to be written
		       (assign a-temp (logior a-temp b-temp))
      ((0) (parallel (write-lru-map a-temp)	;Map cache miss
		     (return)))
      ((1) (parallel (write-map-a a-temp)	;Replace map A
		     (return)))
      ((2) (parallel (wrote-map-b a-temp)	;Replace map B
		     (return)))
      ((3) (return))))				;Should not get here--ignore

;Map-miss traps here in normal case
(defucode-at-loc map-miss 10001
  ;; Copy VMA to B side while waiting for PHTC entry to come from memory
  (parallel
    (trap-save)
    (assign b-map-vma vma)
    (declare-memory-timing active-cycle))
  ;; Refill map from PHTC entry and see whether VMA tag in PHTC entry matches
  (parallel
   (trap-restore-1)
   (write-lru-map memory-data)
   (if map-load-successful
       (parallel
	 (trap-restore-2)	;exits
	 (assign %count-map-reloads (1+ %count-map-reloads)))
     (goto phtc-miss))))

;Come here if pace not found in PHTC, with a trap-restore-1 just done
(defucode phtc-miss
  ;; Check for page temporarily mapped into A-memory for stack buffer
  ;; Currently we know that there is only one mappable stack buffer, the main
  ;; stack buffer at 0@A. The auxiliary one is not mappable.
  (parallel
    (trap-save)		;undoes trap-restore-1
    (if (greater-or-equal-pointer b-map-vma %stack-buffer-low)
	(if (lesser-or-equal-pointer b-map-vma %stack-buffer-limit)
	    (sequential
	      (assign a-map-addr (logand b-map-vma (a-constant 3_8))) ;Which s.b. page
	      (parallel (assign a-map-addr (logior a-map-addr (b-constant 177760_8)))
			(jump map-miss-satisfied)))
	    (drop-through))
      (drop-through)))
  ;; Check for permanently-wired portion of virtual memory
  (if (lesser-pointer b-map-vma %wired-virtual-address-high)
      (sequential
        (assign a-map-addr (+ b-map-vma %wired-physical-address-low))
	(parallel (assign a-map-addr (logand a-map-addr (b-constant 177777_8)))
		  (jump map-miss-satisfied)))
    (drop-through))
  ;; Escape to macrecode map miss handler. Don't leave garbage in the map.
  (write-lru-map (a-constant -1))
  (parallel (assign a-temp %page-pht-miss)
	    (jump page-fault)))

;Here with a-map-addr containing the physical page to map to, in bits 23-8
(defucode map-miss-satisfied
  ;; Get VMA tag field properly aligned, and no write-protect
  (assign b-map-vma (logand (rotate vma 4) (b-constant 377_24.)))
  (assign a-map-addr (logior a-map-addr b-map-vma))
  (trap-restore
   ;; Maintain metering counter
   (assign %count-map-reloads (1+ %count-map-reloads))
   ;; Refill least-recentlu-used map location addressed by VMA
   (write-lru-map a-map-addr)))

4,887,235
	349	350
;Map miss while in block read. VMA incremented one or two extra times,
;no PHTC probe in procress.
;For these I am just going to pclsr and try again (could check PHTC first)
(defucode-at-loc map-miss-block1 10011
  (parallel
    (trap-save)
    (assign vma (- vma (b-constant 1))))
  (parallel (assign a-temp %page-pht-miss)
	    (jump page-fault)))

(defucode-at-loc map-miss-block2 10021
  (parallel
    (trap-save)
    (assign vma (- vma (b-constant 2))))
  (parallel (assign a-temp %page-pht-miss)
	    (jump page-fault)))

;Here if map miss while in block write, or write protect violation
;No proper PHTC probe in pronress
(defucode-at-loc map-write-miss 10031
  ;; Read the map to determine which it is
  (parallel
    (trap-save)
    (if (zero-fixnum map-select-code)
	(parallel
	  (trap-restore-1)
	  (assign b-map-vma vma)
	  (jump phtc-miss))
      (parallel
       (assign a-temp %page-write-fault)
       (jump page-fault)))))

;Hardware subprimitives

;Arguments are vma and word to be written
;We must clobber any previous mapping for that virtual page
;Macracode takes care of any necessary clobbering of PHTC
;The 0 case hsre is a little bit of overkilll we could simply never touch
;the map when there was a miss, and let a refill from PHTC take care of it.
(definst %map-cache-write (no-operand smashes-stack)
  (parallel
    (check-arg-type 0 next-on-stack dtp-fix)
    (assign vma next-on-stack)
    (decrement-stack-pointer))
  (parallel
    (dispatch-after-this map-select-code
			 (check-arg-type 1 (amem (stack-pointer 1)) dtp-fix)
      ((0) (if (all-ones (amem (stack-pointer 1)))	;Map cache miss. Clearing?
	       (parallel (decrement-stack-pointer)	;Clearing--leave alone
			 (next-instruction))
	       (parallel (write-lru-map (amem (stack-pointer 1))) ;Writing--put into LRU map
			 (decrement-stack-pointer)
			 (next-instruction))))
      ((1) (parallel (write-map-a (amem (stack-pointer 1)))	;Original TOS to map A
		    (decrement-stack-pointer)
		    (next-instruction)))
      ((2) (parallel (write-map-b (amem (stack-pointer 1)))	;Original TOS to map B
		     (decrement-stack-pointer)
		     (next-instruction)))
      ((3) (parallel (decrement-stack-pointer)			;Should not get here--ignore
		     (next-instruction))))))


;Use the PHTC hashbox to read an entry. Arg is virtual address.
(definst %phtc-read no-aperand
  (parallel
    (check-arg-type 0 top-of-stack-a dtp-fix)
    (assign vma top-of-stack-a))
  (start-memory read address-phtc)
  (nop)
  (parallel
    (transport data)				;Crash here if no data type tag
    (newtop memory-data)
    (next-instruction)))

;Use the PHTC hashbox to write an entry. Args are virtual address and entry.
(definst %phtc-write (no-operand smashes-stack)
  (parallel
    (check-arg-type 0 next-on-stack dtp-fix)
    (assign vma next-on-stack)
    (decrement-stack-pointer))
  (parallel
   (check-arg-type 1 (amem (stack-pointer 1)) dtp-fix)
   (start-memory write address-phtc)
   (assign memory-data (amem (stack-pointer 1)))
   (decrement-stack-pointer)
   (next-instruction)))

;Write into the PHTC address, size. ASN register
(definst %phtc-setup (no-operand needs-stack smashes-stack)
  (parallel
4,887,235
	351	352
	(check-fixnum-1arg-b top-of-stack)
	(write-lbus-dev 37 1 top-of-stack)
	(assign %current-phtc top-of-stack)
	(decrement-stack-pointer)
	(next-instruction)))

;Set up address for page tag
;You had better have disabed tasking in the previous cycle
(defmicro address-page-tag (phys-addr)
  '(start-memory read physical ,phys-addr inhibit-page-tags))

;Write into the page reference tag from t or nil
(definst %reference-tag-write (no-operand smashes-stack)
  (assign a-temp next-on-stack)			;Move address to faster memory
  (parallel
    (decrement-stack-pointer)
    (disable-tasking)
    (if (data-type? top-of-stack-a dtp-nil)
	(sequential
	  (parallel (check-arg-type 0 a-temp dtp-fix)
		    (address-page-tag a-temp))
	  (parallel (write-lbus-dev 36 21 nil)
		    (decrement-stack-pointer)
		    (next-instruction)))
      (sequential
       (parallel (check-arg-type 0 a-temp dtp-fix)
		 (address-page-tag a-temp))
       (parallel (write-lbus-dev 36 31 nil)
		 (decrement-stack-pointer)
		 (next-instruction))))))

;Read reference	tag as t or nil
(definst %reference-tag-read no-operand
  (parallel
    (disable-tasking)
    (assign a-temp top-of-stack-a))	;Move address to faster memory
  (parallel
   (check-arg-type 0 a-temp dtp-fix)
   (address-page-tag a-temp))
  (if (page-tag-bit 1)
      (goto true1)
      (goto false1)))

;Write into the GC tag from t or nil
(definst %gc-tag-write (no-operand smashes-stack)
   (assign a-temp next-on-stack)	;Move address to faster memory
   (parallel
     (disable-tasking)
     (decrement-stack-pointer)
     (if (data-type? top-of-stack-a dtp-nil)
	 (sequential
	   (parallel (check-arg-type 0 a-temp dtp-fix)
		     (address-page-tag a-temp))
	   (parallel (write-lbus-dev 36 0l nil)
		     (decrement-stack-pointer)
		     (next-instruction)))
         (sequential
	   (parallel (check-arg-type 0 a-temp dtp-fix)
		     (address-page-tag a-temp))
	   (parallel (write-lbus-dev 36 11 nil)
		     (decrement-stack-pointer)
		     (next-instruction))))))

;Read GC tag as t or nil
(definst %gc-tag-read no-operand
  (parallel
    (disable-tasking)
    (assign a-temp top-of-stack-a))	;Move address to faster memory
  (parallel
    (check-arg-type 0 a-temp dtp-fix)
    (address-page-tag a-temp))
  (if (page-tag-bit 0)
      (goto true1)
      (goto false1)))

;Scan the reference tags, returning NIL or the physical address of thin first page
;whose tag is not set. As we pass over each tag which is set, clear it.
;No time available for type checking the second argument
(definst %scan-reference-tags (no-operand needs-stack)
  (parallel
    (check-arg-type 0 next-on-stack dtp-fix)
    (if (greater-or-equal-fixnum-unsigned next-on-stack top-of-stack)
	(parallel (pop2push quote-nil)
		  (next-instruction))
        (drop-through)))
  (parallel
    (assign a-temp next-on-stack)		;Move address to faster memory
    (disable-tasking))
  (address-page-tag a-temp)
  (parallel
