;==============================================================================
; Project:     DivEmulator
;
; File:        ModHead.s
; Description: Software implementation of SDIV/UDIV instructions for CPUs
;              without hardware division instructions.
;
; The contents of this file are
; Copyright (C) Adrian Lees, 2015. All rights reserved
;==============================================================================
;
; Limitations:	Does not support use of R8-R12 in FIQ code, which should
;		almost certainly not be performing divisions anyway!
;
;==============================================================================

		AREA	|ARM$$code|,CODE,READONLY

		GET	OSLib:OSLib.Hdr.OS
		GET	OSLib:OSLib.Hdr.OSModule

		ENTRY

		;this switch controls only the expansion of LDM/STM
		;  instructions to save a few cycles on its XScale CPU.
		;  The generated code works fine on other targets too.
		GBLL	TUNGSTEN

mod_base	DCD	0
		DCD	mod_init - mod_base
		DCD	mod_fin  - mod_base
		DCD	0
		DCD	mod_title - mod_base
		DCD	mod_help  - mod_base
		DCD	0
		DCD	0
		DCD	0
		DCD	0
		DCD	0
		DCD	0
		DCD	mod_flags - mod_base
		DCD	-1				;spare words
		DCD	-1
		DCD	-1

mod_title	=	"DivEmulator",0
mod_help	=	"DivEmulator",9,"1.00 (18 Apr 2015)",0
		ALIGN

mod_flags	DCD	1

mod_init	ROUT
		MOV	R6,R14
		MOV	R0,#6
		MOV	R3,#end_stub + 4 - handler_stub
		SWI	XOS_Module
		MOVVS	PC,R6
		STR	R2,[R12]

		;copy handler code into workspace
		ASSERT	end_stub - handler_stub == 12 + 32 + 12
		ASSERT	end_stub == handler_addr
		ADR	R14,end_stub
		LDR	R0,[R14,#-4]!
		MOV	R3,#end_stub - handler_stub
		ADR	R5,div_handler
		ADD	R1,R2,R3
		ADD	R2,R2,R3
		STR	R5,[R1]
01		SUBS	R3,R3,#4
		STR	R0,[R1,#-4]!
		LDRHI	R0,[R14,#-4]!
		BHI	%BT01
		MOV	R0,#1
		SWI	XOS_SynchroniseCodeAreas
		MOV	R2,R1
		BVS	%F00

		MRS	R4,CPSR
		MOV	R0,#1
		ORR	R3,R4,#&80		;disable IRQs during installation
		ORR	R0,R0,#&100
		MSR	CPSR_c,R3
		SWI	XOS_ClaimProcessorVector
		;store address of "pass on" handler
		STRVC	R1,[R2,#handler_next - handler_stub]
		MSR	CPSR_c,R4		;restore IRQ state
		MOVVC	PC,R6

00		MOV	R3,R0			;remember error
		MOV	R0,#7
		SWI	XOS_Module
		MOV	R1,#0
		MOV	R0,R3
		STR	R1,[R12]
		MSR	CPSR_f,#V
		MOV	PC,R6

mod_fin		LDR	R2,[R12]
		MOV	R6,R14
		MOV	R0,#1
		LDR	R1,[R2,#handler_next - handler_stub]
		SWI	XOS_ClaimProcessorVector
		MOVVC	R0,#7
		SWIVC	XOS_Module
		MOVVC	R0,#0
		STRVC	R0,[R12]
		MOV	PC,R6

;
; Handler stub, copied into RAM to permit the main handler code to run
;   from ROM and still locate the address of the next ("pass on") handler
;
handler_stub	STR	R11,[R13,#-8]!		;leave space for "pass on" address
		STR	R10,[R13,#-4]!
		STR	R9,[R13,#-4]!

		STR	R8,[R13,#-4]!
		STR	R7,[R13,#-4]!
		LDR	R11,[R14,#-4]		;read faulted instruction
		MRS	R10,CPSR
		LDR	R9,handler_mask
		LDR	R8,handler_match
		LDR	R7,handler_next
		LDR	PC,handler_addr

handler_mask	DCD	&0FD0F0F0
handler_match	DCD	&0710F010
handler_next	DCD	0
handler_addr
end_stub
		ALIGN	32,12			;ensure next instr starts Icache line

div_handler	ROUT
		AND	R9,R9,R11
		TEQ	R9,R8
		MOVEQ	R9,R11,LSL #28

		ADREQ	R8,div_rn
		ADDEQ	PC,R8,R9,LSR #25
		;not <S|U>DIV, pass on to the next handler
		STR	R7,[R13,#20]
		MSR	CPSR_f,R10
		LDMIA	R13!,{R7-R11,PC}

		;retrieve Rn (dividend)
div_rn		MOV	R9,R0
		B	div_rm
		MOV	R9,R1
		B	div_rm
		MOV	R9,R2
		B	div_rm
		MOV	R9,R3
		B	div_rm
		MOV	R9,R4
		B	div_rm
		MOV	R9,R5
		B	div_rm
		MOV	R9,R6
		B	div_rm
		LDR	R9,[R13]		;Rn == R7
		B	div_rm
		LDR	R9,[R13,#4]		;Rn == R8
		B	div_rm
		LDR	R9,[R13,#8]		;Rn == R9
		B	div_rm
		LDR	R9,[R13,#12]		;Rn == R10
		B	div_rm
		LDR	R9,[R13,#16]		;Rn == R11
		B	div_rm
		MOV	R9,R12
		B	div_rm
		MOV	R9,#0
		B	div_Rn13
		MOV	R9,#4

div_Rn13	;free a few more registers
		STR	R12,[R13,#-4]!
		STR	R1,[R13,#-4]!
		STR	R0,[R13,#-4]!
		MRS	R7,SPSR			;for CPU mode

		;collect R13 and R14 from caller mode, then select
		MRS	R1,CPSR
		ANDS	R7,R7,#&1F		;caller mode
		SUB	R13,R13,#8
		TEQNE	R7,#&10			;USR32?
		BICNE	R0,R1,#&1F
		ORRNE	R7,R0,R7
		MOV	R0,R13
		MSRNE	CPSR_c,R7
		STMEQIA	R0,{R13,R14}^
		STRNE	R13,[R0]
		STRNE	R14,[R0,#4]
		MSRNE	CPSR_c,R1		;unbanked register
		LDR	R9,[R0,R9]		;load value of Rn

		LDR	R0,[R13,#8]!		;reload R0 so still valid
		LDR	R1,[R13,#4]		;reload R1 so still valid
		;leave R0,R1,R12 stacked
		B	div_rm2

div_rm		;free a few more registers
		STR	R12,[R13,#-4]!
		STR	R1,[R13,#-4]!
		STR	R0,[R13,#-4]!

div_rm2		;retrieve Rm (divisor)
		MOV	R8,R11,LSL #20
		MRS	R7,SPSR			;for CPU mode
		ADD	PC,PC,R8,LSR #25
		NOP
		MOV	R8,R0			;R0 still valid, also stacked
		B	div_op
		MOV	R8,R1			;R1 still valid, also stacked
		B	div_op
		MOV	R8,R2
		B	div_op
		MOV	R8,R3
		B	div_op
		MOV	R8,R4
		B	div_op
		MOV	R8,R5
		B	div_op
		MOV	R8,R6
		B	div_op
		LDR	R8,[R13,#12]		;Rm == R7
		B	div_op
		LDR	R8,[R13,#16]		;Rm == R8
		B	div_op
		LDR	R8,[R13,#20]		;Rm == R9
		B	div_op
		LDR	R8,[R13,#24]		;Rm == R10
		B	div_op
		LDR	R8,[R13,#28]		;Rm == R11
		B	div_op
		MOV	R8,R12			;R12 still valid, also stacked
		B	div_op
		MOV	R8,#0
		B	div_Rm13
		MOV	R8,#4

div_Rm13	MRS	R1,CPSR
		ANDS	R7,R7,#&1F		;caller mode
		SUB	R13,R13,#8
		TEQNE	R7,#&10			;USR32?
		BICNE	R0,R1,#&1F
		ORRNE	R7,R0,R7
		MOV	R0,R13
		MSRNE	CPSR_c,R7
		STMEQIA	R0,{R13,R14}^
		STRNE	R13,[R0]
		STRNE	R14,[R0,#4]
		MSRNE	CPSR_c,R1
		LDR	R8,[R0,R8]
		ADD	R13,R13,#8

div_op		;check for zero divisor which is defined to give a zero result
		MOVS	R0,R8
		ANDEQ	R11,R11,#&000F0000
		BEQ	div_result

		;extract Rd and ascertain whether result negation required
		ANDS	R12,R11,#&00200000
		AND	R11,R11,#&000F0000
		EOREQ	R7,R8,R9		;sign mismatch in bit 31
		BNE	div_start

		;signed division
		ORR	R11,R11,R7,LSR #31	;negation required
		CMP	R8,#0
		RSBMI	R8,R8,#0		;abs(Rm)
		CMP	R9,#0
		RSBMI	R9,R9,#0		;abs(Rn)

div_start	CMP	R9,#&80000000
		MOV	R7,R8
		MOVCS   R12,#&80000000
		MOVCC	R12,R9
		MOV	R0,#0

00		CMP	R12,R7
		BLS	%07
		CMP	R12,R7,LSL #1
		BLS	%06
		CMP	R12,R7,LSL #2
		BLS	%05
		CMP	R12,R7,LSL #3
		BLS	%04
		CMP	R12,R7,LSL #4
		BLS	%03
		CMP	R12,R7,LSL #5
		BLS	%02
		CMP	R12,R7,LSL #6
		BLS	%01
		CMP	R12,R7,LSL #7
		MOVHI	R7,R7,LSL #8
		BHI	%00

		;construct quotient in R0 since this is probably the
		;  most common destination
08		SUBS	R9,R9,R7,LSL #7
		ADDCC	R9,R9,R7,LSL #7
		ADC	R0,R0,R0
01		SUBS	R9,R9,R7,LSL #6
		ADDCC	R9,R9,R7,LSL #6
		ADC	R0,R0,R0
02		SUBS	R9,R9,R7,LSL #5
		ADDCC	R9,R9,R7,LSL #5
		ADC	R0,R0,R0
03		SUBS	R9,R9,R7,LSL #4
		ADDCC	R9,R9,R7,LSL #4
		ADC	R0,R0,R0
04		SUBS	R9,R9,R7,LSL #3
		ADDCC	R9,R9,R7,LSL #3
		ADC	R0,R0,R0
05		SUBS	R9,R9,R7,LSL #2
		ADDCC	R9,R9,R7,LSL #2
		ADC	R0,R0,R0
06		SUBS	R9,R9,R7,LSL #1
		ADDCC	R9,R9,R7,LSL #1
		ADC	R0,R0,R0
07		SUBS	R9,R9,R7
		ADDCC	R9,R9,R7
		ADC	R0,R0,R0
		CMP	R8,R7,LSR #1
		MOVLS	R7,R7,LSR #8
		BLS	%08

div_result	TST	R11,#1
		RSBNE	R0,R0,#0		;SDIV with -ve result
		ADD	PC,PC,R11,LSR #13
		NOP
		ADD	R13,R13,#4		;discard stacked R0
		B	div_done1
		MOV	R1,R0
		B	div_done0
		MOV	R2,R0
		B	div_done
		MOV	R3,R0
		B	div_done
		MOV	R4,R0
		B	div_done
		MOV	R5,R0
		B	div_done
		MOV	R6,R0
		B	div_done
		STR	R0,[R13,#12]		;returned R7
		B	div_done
		STR	R0,[R13,#16]		;returned R8
		B	div_done
		STR	R0,[R13,#20]		;returned R9
		B	div_done
		STR	R0,[R13,#24]		;returned R10
		B	div_done
		STR	R0,[R13,#28]		;returned R11
		B	div_done
		STR	R0,[R13,#8]		;returned R12
		B	div_done
		MOV	R9,#0
		B	div_Rd13
		MOV	R9,#4

div_Rd13	MRS	R7,SPSR			;for CPU mode

		;stack the result
		MRS	R1,CPSR
		STR	R0,[R13,#-4]!
		ANDS	R7,R7,#&1F		;caller mode
		TEQNE	R7,#&10			;USR32?
		MOV	R0,R13
		BEQ	div_RdUSR

		;for non-USR, change mode and load dest register
		TEQ	R9,#0
		BIC	R9,R1,#&1F
		ORR	R7,R9,R7
		MSR	CPSR_c,R7
		NOP				;avoid banked reg use
		LDREQ	R13,[R0]
		LDRNE	R14,[R0]
		MSR	CPSR_c,R1
		B	div_done

div_done0	LDR	R0,[R13],#8		;discard stacked R1
		B	div_done12

div_RdUSR	;for USR mode, load into dest register
		TEQ	R9,#0
		LDMEQIA	R0,{R13}^
		LDMNEIA	R0,{R14}^
		NOP				;avoid banked reg use
		ADD	R13,R13,#4		;discard result

div_done	LDR	R0,[R13],#4
div_done1	LDR	R1,[R13],#4
div_done12	LDR	R12,[R13],#4
		MSR	CPSR_f,R10
		[ :DEF: TUNGSTEN
		LDR	R7,[R13],#4
		LDR	R8,[R13],#4
		LDR	R9,[R13],#4
		LDR	R10,[R13],#4
		LDR	R11,[R13],#8		;discards "pass on" address
		|
		LDMIA	R13!,{R7-R11}
		ADD	R13,R13,#4
		]
		MOVS	PC,R14

		END
