Skip to content
Snippets Groups Projects
lib1funcs.S 8.56 KiB
Newer Older
  • Learn to ignore specific revisions
  • /*
     * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
     *
     * Author: Nicolas Pitre <nico@fluxnic.net>
     *   - contributed to gcc-3.4 on Sep 30, 2003
     *   - adapted for the Linux kernel on Oct 2, 2003
     */
    
    /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
    
     * SPDX-License-Identifier:	GPL-2.0+
     */
    
    
    #include <linux/linkage.h>
    #include <asm/assembler.h>
    
    /*
     * U-Boot compatibility bit, define empty UNWIND() macro as, since we
     * do not support stack unwinding and define CONFIG_AEABI to make all
     * of the functions available without diverging from Linux code.
     */
    #ifdef __UBOOT__
    #define UNWIND(x...)
    #define CONFIG_AEABI
    #endif
    
    .macro ARM_DIV_BODY dividend, divisor, result, curbit
    
    #if __LINUX_ARM_ARCH__ >= 5
    
    	clz	\curbit, \divisor
    	clz	\result, \dividend
    	sub	\result, \curbit, \result
    	mov	\curbit, #1
    	mov	\divisor, \divisor, lsl \result
    	mov	\curbit, \curbit, lsl \result
    	mov	\result, #0
    	
    #else
    
    	@ Initially shift the divisor left 3 bits if possible,
    	@ set curbit accordingly.  This allows for curbit to be located
    	@ at the left end of each 4 bit nibbles in the division loop
    	@ to save one loop in most cases.
    	tst	\divisor, #0xe0000000
    	moveq	\divisor, \divisor, lsl #3
    	moveq	\curbit, #8
    	movne	\curbit, #1
    
    	@ Unless the divisor is very big, shift it up in multiples of
    	@ four bits, since this is the amount of unwinding in the main
    	@ division loop.  Continue shifting until the divisor is 
    	@ larger than the dividend.
    1:	cmp	\divisor, #0x10000000
    	cmplo	\divisor, \dividend
    	movlo	\divisor, \divisor, lsl #4
    	movlo	\curbit, \curbit, lsl #4
    	blo	1b
    
    	@ For very big divisors, we must shift it a bit at a time, or
    	@ we will be in danger of overflowing.
    1:	cmp	\divisor, #0x80000000
    	cmplo	\divisor, \dividend
    	movlo	\divisor, \divisor, lsl #1
    	movlo	\curbit, \curbit, lsl #1
    	blo	1b
    
    	mov	\result, #0
    
    #endif
    
    	@ Division loop
    1:	cmp	\dividend, \divisor
    	subhs	\dividend, \dividend, \divisor
    	orrhs	\result,   \result,   \curbit
    	cmp	\dividend, \divisor,  lsr #1
    	subhs	\dividend, \dividend, \divisor, lsr #1
    	orrhs	\result,   \result,   \curbit,  lsr #1
    	cmp	\dividend, \divisor,  lsr #2
    	subhs	\dividend, \dividend, \divisor, lsr #2
    	orrhs	\result,   \result,   \curbit,  lsr #2
    	cmp	\dividend, \divisor,  lsr #3
    	subhs	\dividend, \dividend, \divisor, lsr #3
    	orrhs	\result,   \result,   \curbit,  lsr #3
    	cmp	\dividend, #0			@ Early termination?
    
    	movsne	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
    
    	movne	\divisor,  \divisor, lsr #4
    	bne	1b
    
    .endm
    
    
    .macro ARM_DIV2_ORDER divisor, order
    
    #if __LINUX_ARM_ARCH__ >= 5
    
    	clz	\order, \divisor
    	rsb	\order, \order, #31
    
    #else
    
    	cmp	\divisor, #(1 << 16)
    	movhs	\divisor, \divisor, lsr #16
    	movhs	\order, #16
    	movlo	\order, #0
    
    	cmp	\divisor, #(1 << 8)
    	movhs	\divisor, \divisor, lsr #8
    	addhs	\order, \order, #8
    
    	cmp	\divisor, #(1 << 4)
    	movhs	\divisor, \divisor, lsr #4
    	addhs	\order, \order, #4
    
    	cmp	\divisor, #(1 << 2)
    	addhi	\order, \order, #3
    	addls	\order, \order, \divisor, lsr #1
    
    #endif
    
    .endm
    
    
    .macro ARM_MOD_BODY dividend, divisor, order, spare
    
    #if __LINUX_ARM_ARCH__ >= 5
    
    	clz	\order, \divisor
    	clz	\spare, \dividend
    	sub	\order, \order, \spare
    	mov	\divisor, \divisor, lsl \order
    
    #else
    
    	mov	\order, #0
    
    	@ Unless the divisor is very big, shift it up in multiples of
    	@ four bits, since this is the amount of unwinding in the main
    	@ division loop.  Continue shifting until the divisor is 
    	@ larger than the dividend.
    1:	cmp	\divisor, #0x10000000
    	cmplo	\divisor, \dividend
    	movlo	\divisor, \divisor, lsl #4
    	addlo	\order, \order, #4
    	blo	1b
    
    	@ For very big divisors, we must shift it a bit at a time, or
    	@ we will be in danger of overflowing.
    1:	cmp	\divisor, #0x80000000
    	cmplo	\divisor, \dividend
    	movlo	\divisor, \divisor, lsl #1
    	addlo	\order, \order, #1
    	blo	1b
    
    #endif
    
    	@ Perform all needed subtractions to keep only the reminder.
    	@ Do comparisons in batch of 4 first.
    	subs	\order, \order, #3		@ yes, 3 is intended here
    	blt	2f
    
    1:	cmp	\dividend, \divisor
    	subhs	\dividend, \dividend, \divisor
    	cmp	\dividend, \divisor,  lsr #1
    	subhs	\dividend, \dividend, \divisor, lsr #1
    	cmp	\dividend, \divisor,  lsr #2
    	subhs	\dividend, \dividend, \divisor, lsr #2
    	cmp	\dividend, \divisor,  lsr #3
    	subhs	\dividend, \dividend, \divisor, lsr #3
    	cmp	\dividend, #1
    	mov	\divisor, \divisor, lsr #4
    
    	bge	1b
    
    	tst	\order, #3
    	teqne	\dividend, #0
    	beq	5f
    
    	@ Either 1, 2 or 3 comparison/subtractions are left.
    2:	cmn	\order, #2
    	blt	4f
    	beq	3f
    	cmp	\dividend, \divisor
    	subhs	\dividend, \dividend, \divisor
    	mov	\divisor,  \divisor,  lsr #1
    3:	cmp	\dividend, \divisor
    	subhs	\dividend, \dividend, \divisor
    	mov	\divisor,  \divisor,  lsr #1
    4:	cmp	\dividend, \divisor
    	subhs	\dividend, \dividend, \divisor
    5:
    .endm
    
    
    
    .pushsection .text.__udivsi3, "ax"
    
    ENTRY(__udivsi3)
    ENTRY(__aeabi_uidiv)
    UNWIND(.fnstart)
    
    	subs	r2, r1, #1
    	reteq	lr
    	bcc	Ldiv0
    	cmp	r0, r1
    	bls	11f
    	tst	r1, r2
    	beq	12f
    
    	ARM_DIV_BODY r0, r1, r2, r3
    
    	mov	r0, r2
    	ret	lr
    
    11:	moveq	r0, #1
    	movne	r0, #0
    	ret	lr
    
    12:	ARM_DIV2_ORDER r1, r2
    
    	mov	r0, r0, lsr r2
    	ret	lr
    
    UNWIND(.fnend)
    ENDPROC(__udivsi3)
    ENDPROC(__aeabi_uidiv)
    
    .pushsection .text.__umodsi3, "ax"
    
    ENTRY(__umodsi3)
    UNWIND(.fnstart)
    
    	subs	r2, r1, #1			@ compare divisor with 1
    	bcc	Ldiv0
    	cmpne	r0, r1				@ compare dividend with divisor
    	moveq   r0, #0
    	tsthi	r1, r2				@ see if divisor is power of 2
    	andeq	r0, r0, r2
    	retls	lr
    
    	ARM_MOD_BODY r0, r1, r2, r3
    
    	ret	lr
    
    UNWIND(.fnend)
    ENDPROC(__umodsi3)
    
    .pushsection .text.__divsi3, "ax"
    
    ENTRY(__divsi3)
    ENTRY(__aeabi_idiv)
    UNWIND(.fnstart)
    
    	cmp	r1, #0
    	eor	ip, r0, r1			@ save the sign of the result.
    	beq	Ldiv0
    	rsbmi	r1, r1, #0			@ loops below use unsigned.
    	subs	r2, r1, #1			@ division by 1 or -1 ?
    	beq	10f
    	movs	r3, r0
    	rsbmi	r3, r0, #0			@ positive dividend value
    	cmp	r3, r1
    	bls	11f
    	tst	r1, r2				@ divisor is power of 2 ?
    	beq	12f
    
    	ARM_DIV_BODY r3, r1, r0, r2
    
    	cmp	ip, #0
    	rsbmi	r0, r0, #0
    	ret	lr
    
    10:	teq	ip, r0				@ same sign ?
    	rsbmi	r0, r0, #0
    	ret	lr
    
    11:	movlo	r0, #0
    	moveq	r0, ip, asr #31
    	orreq	r0, r0, #1
    	ret	lr
    
    12:	ARM_DIV2_ORDER r1, r2
    
    	cmp	ip, #0
    	mov	r0, r3, lsr r2
    	rsbmi	r0, r0, #0
    	ret	lr
    
    UNWIND(.fnend)
    ENDPROC(__divsi3)
    ENDPROC(__aeabi_idiv)
    
    .pushsection .text.__modsi3, "ax"
    
    ENTRY(__modsi3)
    UNWIND(.fnstart)
    
    	cmp	r1, #0
    	beq	Ldiv0
    	rsbmi	r1, r1, #0			@ loops below use unsigned.
    	movs	ip, r0				@ preserve sign of dividend
    	rsbmi	r0, r0, #0			@ if negative make positive
    	subs	r2, r1, #1			@ compare divisor with 1
    	cmpne	r0, r1				@ compare dividend with divisor
    	moveq	r0, #0
    	tsthi	r1, r2				@ see if divisor is power of 2
    	andeq	r0, r0, r2
    	bls	10f
    
    	ARM_MOD_BODY r0, r1, r2, r3
    
    10:	cmp	ip, #0
    	rsbmi	r0, r0, #0
    	ret	lr
    
    UNWIND(.fnend)
    ENDPROC(__modsi3)
    
    .pushsection .text.__aeabi_uidivmod, "ax"
    
    ENTRY(__aeabi_uidivmod)
    UNWIND(.fnstart)
    UNWIND(.save {r0, r1, ip, lr}	)
    
    	stmfd	sp!, {r0, r1, ip, lr}
    	bl	__aeabi_uidiv
    	ldmfd	sp!, {r1, r2, ip, lr}
    	mul	r3, r0, r2
    	sub	r1, r1, r3
    	ret	lr
    
    UNWIND(.fnend)
    ENDPROC(__aeabi_uidivmod)
    
    .pushsection .text.__aeabi_uidivmod, "ax"
    
    ENTRY(__aeabi_idivmod)
    UNWIND(.fnstart)
    UNWIND(.save {r0, r1, ip, lr}	)
    
    	stmfd	sp!, {r0, r1, ip, lr}
    	bl	__aeabi_idiv
    	ldmfd	sp!, {r1, r2, ip, lr}
    	mul	r3, r0, r2
    	sub	r1, r1, r3
    	ret	lr
    
    UNWIND(.fnend)
    ENDPROC(__aeabi_idivmod)
    
    .pushsection .text.Ldiv0, "ax"
    
    Ldiv0:
    UNWIND(.fnstart)
    UNWIND(.pad #4)
    UNWIND(.save {lr})
    
    	str	lr, [sp, #-8]!
    	bl	__div0
    	mov	r0, #0			@ About as wrong as it could be.
    	ldr	pc, [sp], #8
    
    UNWIND(.fnend)
    ENDPROC(Ldiv0)
    
    
    /* Thumb-1 specialities */
    #if defined(CONFIG_SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
    
    .pushsection .text.__gnu_thumb1_case_sqi, "ax"
    
    ENTRY(__gnu_thumb1_case_sqi)
    	push	{r1}
    	mov	r1, lr
    	lsrs	r1, r1, #1
    	lsls	r1, r1, #1
    	ldrsb	r1, [r1, r0]
    	lsls	r1, r1, #1
    	add	lr, lr, r1
    	pop	{r1}
    	bx	lr
    ENDPROC(__gnu_thumb1_case_sqi)
    
    .pushsection .text.__gnu_thumb1_case_uqi, "ax"
    
    ENTRY(__gnu_thumb1_case_uqi)
    	push	{r1}
    	mov	r1, lr
    	lsrs	r1, r1, #1
    	lsls	r1, r1, #1
    	ldrb	r1, [r1, r0]
    	lsls	r1, r1, #1
    	add	lr, lr, r1
    	pop	{r1}
    	bx	lr
    ENDPROC(__gnu_thumb1_case_uqi)
    
    .pushsection .text.__gnu_thumb1_case_shi, "ax"
    
    ENTRY(__gnu_thumb1_case_shi)
    
    	push	{r0, r1}
    	mov	r1, lr
    	lsrs	r1, r1, #1
    	lsls	r0, r0, #1
    	lsls	r1, r1, #1
    	ldrsh	r1, [r1, r0]
    	lsls	r1, r1, #1
    	add	lr, lr, r1
    	pop	{r0, r1}
    	bx	lr
    ENDPROC(__gnu_thumb1_case_shi)
    
    .pushsection .text.__gnu_thumb1_case_uhi, "ax"
    
    ENTRY(__gnu_thumb1_case_uhi)
    
    	push	{r0, r1}
    	mov	r1, lr
    	lsrs	r1, r1, #1
    	lsls	r0, r0, #1
    	lsls	r1, r1, #1
    	ldrh	r1, [r1, r0]
    	lsls	r1, r1, #1
    	add	lr, lr, r1
    	pop	{r0, r1}
    	bx	lr
    ENDPROC(__gnu_thumb1_case_uhi)