| @ |
| @ Written by Wilco Dijkstra, 1996. The following email exchange establishes the |
| @ license. |
| @ |
| @ From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com> |
| @ Date: Fri, Jun 24, 2011 at 3:20 AM |
| @ Subject: Re: sqrt routine |
| @ To: Kevin Ma <kma@google.com> |
| @ Hi Kevin, |
| @ Thanks for asking. Those routines are public domain (originally posted to |
| @ comp.sys.arm a long time ago), so you can use them freely for any purpose. |
| @ Cheers, |
| @ Wilco |
| @ |
| @ ----- Original Message ----- |
| @ From: "Kevin Ma" <kma@google.com> |
| @ To: <Wilco.Dijkstra@ntlworld.com> |
| @ Sent: Thursday, June 23, 2011 11:44 PM |
| @ Subject: Fwd: sqrt routine |
| @ Hi Wilco, |
| @ I saw your sqrt routine from several web sites, including |
| @ http://www.finesse.demon.co.uk/steven/sqrt.html. |
| @ Just wonder if there's any copyright information with your Successive |
| @ approximation routines, or if I can freely use it for any purpose. |
| @ Thanks. |
| @ Kevin |
| |
| @ Minor modifications in code style for WebRTC, 2012. |
| @ Output is bit-exact with the reference C code in spl_sqrt_floor.c. |
| |
| @ Input : r0 32 bit unsigned integer |
| @ Output: r0 = INT (SQRT (r0)), precision is 16 bits |
| @ Registers touched: r1, r2 |
| |
| #include "webrtc/system_wrappers/include/asm_defines.h" |
| |
| GLOBAL_FUNCTION WebRtcSpl_SqrtFloor |
| .align 2 |
| DEFINE_FUNCTION WebRtcSpl_SqrtFloor |
| mov r1, #3 << 30 |
| mov r2, #1 << 30 |
| |
| @ unroll for i = 0 .. 15 |
| |
| cmp r0, r2, ror #2 * 0 |
| subhs r0, r0, r2, ror #2 * 0 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 1 |
| subhs r0, r0, r2, ror #2 * 1 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 2 |
| subhs r0, r0, r2, ror #2 * 2 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 3 |
| subhs r0, r0, r2, ror #2 * 3 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 4 |
| subhs r0, r0, r2, ror #2 * 4 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 5 |
| subhs r0, r0, r2, ror #2 * 5 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 6 |
| subhs r0, r0, r2, ror #2 * 6 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 7 |
| subhs r0, r0, r2, ror #2 * 7 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 8 |
| subhs r0, r0, r2, ror #2 * 8 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 9 |
| subhs r0, r0, r2, ror #2 * 9 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 10 |
| subhs r0, r0, r2, ror #2 * 10 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 11 |
| subhs r0, r0, r2, ror #2 * 11 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 12 |
| subhs r0, r0, r2, ror #2 * 12 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 13 |
| subhs r0, r0, r2, ror #2 * 13 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 14 |
| subhs r0, r0, r2, ror #2 * 14 |
| adc r2, r1, r2, lsl #1 |
| |
| cmp r0, r2, ror #2 * 15 |
| subhs r0, r0, r2, ror #2 * 15 |
| adc r2, r1, r2, lsl #1 |
| |
| bic r0, r2, #3 << 30 @ for rounding add: cmp r0, r2 adc r2, #1 |
| bx lr |