/* Copyright (c) 2002 Michael Stumpf Copyright (c) 2006 Dmitry Xmelkov All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holders nor the names of contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* $Id: hypot.S,v 1.1 2007/01/14 15:00:47 dmix Exp $ */ #include "fp32def.h" #include "asmdef.h" #include "ntz.h" /* double hypot (double x, double y); The hypot() function returns `sqrt (x*x + y*y)'. This is the length of the hypotenuse of a right triangle with sides of length x and y, or the distance of the point (x, y) from the origin. Using this function instead of the direct formula is wise, since the error is much smaller. No underflow with small x and y. No overflow if result is in range. Notes: * Combination of NaN and Inf args is valid (like Glibc). Result: Inf. */ #define EDIFF_2BIG 13 /* such exponents difference is too big */ /* Next 2 constants are without 127-displacement. */ #define EXP_2BIG 63 /* exponent is too big, scaling needed */ #define EXP_2SMALL (-63) /* exponent is too small, scaling need. */ /* Next 2 values are positive both. SCALE_BIG is increased by 1 to provide '-SCALE_SMALL & SCALE_BIG': this is used below. */ #define SCALE_BIG (126 - EXP_2BIG + 2) #define SCALE_SMALL (149 + EXP_2SMALL + 1) /* Assert: no overlap */ #if EXP_2BIG - SCALE_BIG - (EDIFF_2BIG - 1) <= EXP_2SMALL \ || EXP_2SMALL + SCALE_SMALL + (EDIFF_2BIG - 1) >= EXP_2BIG # error #endif #define exp_lo r20 /* ldexp (float x, int exp); */ #define exp_hi r21 #define rC0 r14 #define rC1 r15 #define rC2 r16 #define rC3 r17 FUNCTION hypot .L_nf: rcall _U(__fp_pscA) breq 1f ; hypot(Inf, *) --> Inf rcall _U(__fp_pscB) breq 1f ; hypot(*, Inf) --> Inf rjmp _U(__fp_nan) ; NaN and finite, or both NaN 1: rjmp _U(__fp_inf) ; T is 0 after __fp_split3() .L_retB: X_movw rA0, rB0 X_movw rA2, rB2 .L_retA: rjmp _U(__fp_mpack) ENTRY hypot ; clear signs andi rA3, 0x7f andi rB3, 0x7f ; split and check args rcall _U(__fp_split3) brcs .L_nf tst rA3 breq .L_retB tst rB3 breq .L_retA ; sort exponents clr ZH ; scale factor cp rA3, rB3 brsh 3f ; exponent A < exponent B ; is an A too small ? mov ZL, rB3 sub ZL, rA3 cpi ZL, EDIFF_2BIG brsh .L_retB ; the A is too small ; is it needed to decrease scale ? cpi rB3, 127 + EXP_2BIG brlo 2f ldi ZH, SCALE_BIG ; positive for 'sub' instruction rjmp .L_scale ; is it needed to increase scale ? 2: cpi rA3, 127 + EXP_2SMALL brsh .L_sc0 rjmp .L_right ; exponent A >= exponent B ; is B too small? 3: mov ZL, rA3 sub ZL, rB3 cpi ZL, EDIFF_2BIG brsh .L_retA ; is it needed to decrease scale ? cpi rA3, 127 + EXP_2BIG brlo 4f ldi ZH, SCALE_BIG ; positive for 'sub' instruction rjmp .L_scale ; is it needed to increase scale ? 4: cpi rB3, 127 + EXP_2SMALL brsh .L_sc0 .L_right: ; 'shift to right' entry ldi ZH, -(SCALE_SMALL) ; normalize A, if needed tst rA2 brmi 6f 5: dec rA3 lsl rA0 rol rA1 rol rA2 brpl 5b ; normalize B, if needed 6: tst rB2 brmi 8f 7: dec rB3 lsl rB0 rol rB1 rol rB2 brpl 7b 8: ; scale and save the scale factor .L_scale: sub rA3, ZH sub rB3, ZH .L_sc0: ; `scale is 0' entry push ZH ; calculate sqrt(A*A + B*B) #if defined(__AVR_HAVE_MOVW__) && __AVR_HAVE_MOVW__ push rC3 push rC2 push rC1 push rC0 movw rC0, rB0 movw rC2, rB2 clr rAE mov rBE, rAE movw rB0, rA0 movw rB2, rA2 rcall _U(__mulsf3_pse) movw rB0, rC0 movw rB2, rC2 push rAE movw rC0, rA0 movw rC2, rA2 clr rBE mov rAE, rBE movw rA0, rB0 movw rA2, rB2 rcall _U(__mulsf3_pse) pop rBE movw rB0, rC0 movw rB2, rC2 pop rC0 pop rC1 pop rC2 pop rC3 #else /* to __AVR_HAVE_MOVW__ */ push rB3 push rB2 push rB1 push rB0 clr rAE mov rBE, rAE mov rB0, rA0 mov rB1, rA1 mov rB2, rA2 mov rB3, rA3 rcall _U(__mulsf3_pse) pop rB0 pop rB1 pop rB2 pop rB3 push rA3 push rA2 push rA1 push rA0 push rAE clr rBE mov rAE, rBE mov rA0, rB0 mov rA1, rB1 mov rA2, rB2 mov rA3, rB3 rcall _U(__mulsf3_pse) pop rBE pop rB0 pop rB1 pop rB2 pop rB3 #endif /* ! __AVR_HAVE_MOVW__ */ rcall _U(__addsf3x) rcall _U(__fp_round) rcall _U(sqrt) ; restore a scale #if (-SCALE_SMALL & SCALE_BIG) == 0 # error #endif pop exp_lo sbrs exp_lo, ntz(-SCALE_SMALL & SCALE_BIG) ret ; scale == 0 clr exp_hi sbrc exp_lo, 7 com exp_hi rjmp _U(ldexp) ENDFUNC