add fast reciprocal sqrt() implementation

This commit is contained in:
Jeff Wang 2020-09-07 18:18:20 -04:00
parent 5f6a375c06
commit 273e2f5742
6 changed files with 202 additions and 32 deletions

View File

@ -206,7 +206,7 @@ I sometimes use an STC15F2K60S2 for development work. This microcontroller is av
## Programming with stcgal
Run `stcgal` as shown below, replacing `stc_rpncalc/main.hex` with the actual path to the main.hex you built. There are also prebuilt binaries in the `binaries` directory. In this example, I'm programming at a relatively high line rate of 230,400 bits/s. This works very reliably, but you may want to try at a slower speed to start (omit the `-b 230400` option).
Run `stcgal` as shown below, replacing `stc_rpncalc/main.hex` with the actual path to the main.hex you built. There are also prebuilt binaries in the `binaries` directory. In this example, I'm programming at a relatively high line rate of 230,400 bits/s. This works very reliably, but you may want to try at a slower speed to start (omit the `-b 230400` option), especially when using an inline resistor and diode.
~~~~
$ ./stcgal.py -P stc15 -b 230400 stc_rpncalc/main.hex
@ -251,7 +251,7 @@ Disconnected!
# Bugs
1. After division by 0, ln(-), over/underflow, or other operations which give an `Error`, it's possible to still do certain operations on `Error`. Many functions do check, and will not operate on `Error`, but not all of them yet. This is somewhat similar to old soviet Elektronika calculators where `Error` is just a number, and there wasn't enough ROM space to check for errors. (There are people who explore the inner-workings of these calculators by manipulating the `Error` "number".)
1. When shifted down, keys which do not have a shifted-down function will instead be interpreted as if there were no shift.
1. When shifted, keys which do not have a shifted function will instead be interpreted as if there were no shift.
1. Trigonometric functions are extremely slow and inaccurate.
1. There are probably more bugs waiting to be discovered.
@ -300,7 +300,9 @@ The number `0.135` would be stored the same way, except now the exponent is `0x7
- Exponentials are calculated similar to the HP 35 algorithm, as described [here](http://www.jacques-laporte.org/expx.htm) using the same constants as the logarithm algorithm.
- see `src/decn/proto/exp.cpp` for initial prototyping development work
- Powers are calculated using the identity y^x = e^(x*ln(y))
- Square roots are calculated using the identity sqrt(x) = e^(0.5*ln(x))
- Square roots are calculated using a fixed number of Newton-Raphson iterations to calculatie 1/sqrt(x) and then multiplying by x.
- the iteration for 1/sqrt(x) is new_estimate = 0.5*estimate * (3 - x * estimate * estimate)
- see `src/decn/proto/recip_sqrt.cpp for initial prototyping development work
- Trigonometric functions are calculated using algorithms similar to the [sinclair scientific](http://files.righto.com/calculator/sinclair_scientific_simulator.html), and are fairly slow and inaccurate.
## TODO

View File

@ -35,6 +35,7 @@
// #define DEBUG_LOG_ALL //even more verbose
// #define DEBUG_EXP
// #define DEBUG_EXP_ALL //even more verbose
// #define DEBUG_SQRT
#ifndef DESKTOP
//#undef EXTRA_CHECKS
@ -47,6 +48,7 @@
#undef DEBUG_LOG_ALL
#undef DEBUG_EXP
#undef DEBUG_EXP_ALL
#undef DEBUG_SQRT
#endif
#ifdef DESKTOP
@ -68,8 +70,8 @@ static const uint8_t num_digits_display = 16;
dec80 AccDecn;
__idata dec80 BDecn;
__idata dec80 TmpDecn; //used by add_decn() and mult_decn() and sqrt_decn()
__idata dec80 Tmp2Decn; //used by recip_decn() and ln_decn() and sincos_decn() and sqrt_decn()
__xdata dec80 Tmp3Decn; //used by recip_decn() and ln_decn() and sincos_decn() and sqrt_decn()
__idata dec80 Tmp2Decn; //used by recip_decn(), ln_decn(), exp_decn(), sqrt_decn(), and sincos_decn()
__idata dec80 Tmp3Decn; //used by ln_decn(), exp_decn(), sqrt_decn(), and sincos_decn()
__xdata dec80 Tmp4Decn; //used by sincos_decn()
__xdata dec80 TmpStackDecn[4];
@ -876,10 +878,10 @@ void recip_decn(void){
CURR_RECIP.lsu[i] = 0;
}
copy_decn(&AccDecn, &CURR_RECIP);
//do newton raphson iterations
//do newton-raphson iterations
for (i = 0; i < 6; i++){ //just fix number of iterations for now
#ifdef DEBUG_DIV
decn_to_str_complete(&curr_recip);
decn_to_str_complete(&CURR_RECIP);
printf("%2d: %s\n", i, Buf);
#endif
//Accum *= x_copy
@ -1288,6 +1290,144 @@ void pow_decn(void) {
exp_decn();
}
#ifdef USE_POW_SQRT_IMPL
void sqrt_decn(void) {
if (decn_is_zero(&AccDecn)) {
return;
}
if (decn_is_nan(&AccDecn)) {
return;
}
if (AccDecn.exponent < 0){ //negative
set_dec80_NaN(&AccDecn);
return;
}
st_push_decn(&BDecn); // sqrt should behave like an unary operation
//b = 0.5
set_dec80_zero(&BDecn);
BDecn.lsu[0] = 5;
pow_decn();
st_pop_decn(&BDecn);
}
#else
void sqrt_decn(void){
#define CURR_EST Tmp2Decn //holds current 1/sqrt(x) estimate
#define X_2 Tmp3Decn //holds copy of original x / 2
uint8_t i;
exp_t initial_exp;
if (decn_is_nan(&AccDecn)) {
return;
}
if (AccDecn.exponent < 0){ //negative
set_dec80_NaN(&AccDecn);
return;
}
//normalize
remove_leading_zeros(&AccDecn);
#ifdef DEBUG_SQRT
decn_to_str_complete(&AccDecn);
printf("sqrt in: %s\n", Buf);
#endif
//store copy of x
st_push_decn(&AccDecn);
//calculate x_orig / 2
set_dec80_zero(&BDecn);
BDecn.lsu[0] = 5;
mult_decn();
copy_decn(&X_2, &AccDecn);
//restore x
st_load_decn(&AccDecn);
//get initial estimate for 1/sqrt(x) == 10^(-0.5 * log(x)):
// approximate significand == 10^(-0.5 * log(x_signif))
// with linear approximation: -0.18 * x_signif + 2.5
// new exponent part is (10^(-0.5 * log(10^x_exp)))
// == 10^(-0.5 * x^exp)
initial_exp = get_exponent(&AccDecn);
set_exponent(&AccDecn, 0, 0); //clear exponent (Acc is not negative)
#ifdef DEBUG_SQRT
printf("sqrt exponent %d ", initial_exp);
#endif
if (initial_exp & 0x1){ //odd
#ifdef DEBUG_SQRT
printf("(odd) ");
#endif
//increment x_exp and
initial_exp++;
//approximate estimated significand as (-0.056*x_signif + 0.79) * 10^0.5
// == -0.18 * x_signif + 2.5
//b = -0.18
BDecn.lsu[0] = 18;
BDecn.exponent = -1; //negative, and exponent = -1
//a = -0.18 * x_signif
mult_decn();
//b = 2.5
BDecn.lsu[0] = 25;
BDecn.exponent = 0;
//a = -0.18 * x_signif + 2.5
add_decn();
} else { //even
//keep x_exp as is and approximate estimated significand as
// -0.056*x_signif + 0.79
//b = -0.056
BDecn.lsu[0] = 56;
set_exponent(&BDecn, -2, 1);
//a = -0.056 * x_signif
mult_decn();
//b = 0.79
BDecn.lsu[0] = 7;
BDecn.lsu[1] = 90;
BDecn.exponent = 0;
//a = -0.056*x_signif + 0.79
add_decn();
}
//est_exp = -x_exp / 2;
initial_exp = -initial_exp / 2;
//est_exp-- if AccDecn exponent is negative
// (AccDecn exponent is either 0 or -1, and AccDecn is positive)
if (AccDecn.exponent != 0){
initial_exp--;
}
set_exponent(&AccDecn, initial_exp, 0); //(initial estimate is never negative)
copy_decn(&CURR_EST, &AccDecn);
#ifdef DEBUG_SQRT
printf(" -> %d\n", initial_exp);
#endif
//do newton-raphson iterations
for (i = 0; i < 6; i++){ //just fix number of iterations for now
#ifdef DEBUG_SQRT
decn_to_str_complete(&CURR_EST);
printf("sqrt %2d: %s\n", i, Buf);
#endif
//accum = est * est;
copy_decn(&BDecn, &AccDecn);
mult_decn();
//accum *= x_orig_2; //accum = x/2 * est * est
copy_decn(&BDecn, &X_2);
mult_decn();
//accum = - x/2 * est * est
negate_decn(&AccDecn);
//b = 3/2
set_dec80_zero(&BDecn);
BDecn.lsu[0] = 15;
//accum = 3/2 - x/2 * est * est
add_decn();
//accum *= est; //accum = 0.5 * est * (3 - x * est * est)
copy_decn(&BDecn, &CURR_EST);
mult_decn();
//est = accum;
copy_decn(&CURR_EST, &AccDecn);
}
//calc sqrt from recip_sqrt
st_pop_decn(&BDecn);
mult_decn();
#undef CURR_EST
#undef X_COPY
}
#endif //USE_POW_SQRT_IMPL
// see W.E. Egbert, "Personal Calculator Algorithms II: Trigonometric functions"
void project_decn_into_0_2pi(void) {
const uint8_t is_negative = (AccDecn.exponent < 0);
@ -1446,25 +1586,6 @@ void pi_decn(void) {
mult_decn();
}
void sqrt_decn(void) {
if (decn_is_zero(&AccDecn)) {
return;
}
if (decn_is_nan(&AccDecn)) {
return;
}
if (AccDecn.exponent < 0){ //negative
set_dec80_NaN(&AccDecn);
return;
}
st_push_decn(&BDecn); // sqrt should behave like an unary operation
//b = 0.5
set_dec80_zero(&BDecn);
BDecn.lsu[0] = 5;
pow_decn();
st_pop_decn(&BDecn);
}
static void set_str_error(void){
Buf[0] = 'E';
Buf[1] = 'r';

View File

@ -341,6 +341,51 @@ TEST_CASE("division"){
);
}
static void sqrt_test(const char* x_str, int x_exp)
{
CAPTURE(x_str); CAPTURE(x_exp);
build_dec80(x_str, x_exp);
// decn_to_str_complete(&AccDecn);
// printf(" acc: %s\n", Buf);
sqrt_decn();
decn_to_str_complete(&AccDecn);
CAPTURE(Buf); // sqrt(x)
//calculate actual result
bmp::mpfr_float::default_precision(50);
std::string x_full_str(x_str);
x_full_str += "e" + std::to_string(x_exp);
CAPTURE(x_full_str);
bmp::mpfr_float x_actual(x_full_str);
CAPTURE(x_actual);
if (decn_is_nan(&AccDecn)){
//check that NaN is from result of sqrt(-)
CHECK(x_actual <= 0);
} else if (decn_is_zero(&AccDecn)){
//check actual is also 0
CHECK(x_actual == 0);
} else {
x_actual = sqrt(x_actual);
bmp::mpfr_float calculated(Buf);
bmp::mpfr_float rel_diff = abs((x_actual - calculated) / x_actual);
CHECK(rel_diff < 3e-16); //TODO
}
}
TEST_CASE("sqrt"){
sqrt_test("0", 0);
sqrt_test("2", 0);
sqrt_test("-1", 0);
sqrt_test("0.155", 0);
sqrt_test("10", 0);
sqrt_test("1.1", 10);
sqrt_test("2.02", -10);
sqrt_test("2.02", 0);
sqrt_test("1.5", 0);
sqrt_test("9", 99);
sqrt_test("123", 12345);
}
static void log_test(
//input
const char* x_str, int x_exp,

View File

@ -23,7 +23,7 @@
#include <boost/multiprecision/mpfr.hpp>
using namespace boost::multiprecision;
// #define DEBUG
#define DEBUG
using std::cout;
using std::endl;
@ -51,7 +51,7 @@ int main(void){
//loop through values to test
#ifdef DEBUG
mpfr_float x(0.1, CALC_PRECISION);
mpfr_float x(2.0, CALC_PRECISION);
{
#else
for (mpfr_float x(1e-99, CALC_PRECISION); x < 1e99; x *= 1.03){
@ -87,7 +87,7 @@ int main(void){
x_exp++;
est_signif = -0.18 * x_signif + 2.5;
} else { //even
//keep x_exp as is and approximate estimate significand as
//keep x_exp as is and approximate estimated significand as
// -0.056*x_signif + 0.79
est_signif = -0.056 * x_signif + 0.79;
}

View File

@ -3,6 +3,8 @@
"""
Created on Wed Dec 11 00:56:01 2019
Calculate constants used for "0x5F3759DF" reciprocal sqrt
@author: jeffrey
"""

View File

@ -140,7 +140,7 @@ static void latch_on(void)
__xdata char EntryBuf[MAX_CHARS_PER_LINE + 1];
__xdata uint8_t ExpBuf[2];
__code const char VER_STR[32+1] = "STC RPN Calculator v1.11";
__code const char VER_STR[32+1] = "STC RPN Calculator v1.12";
enum {