add fast reciprocal sqrt() implementation

This commit is contained in:
Jeff Wang 2020-09-07 18:18:20 -04:00
parent 5f6a375c06
commit 273e2f5742
6 changed files with 202 additions and 32 deletions

View File

@ -206,7 +206,7 @@ I sometimes use an STC15F2K60S2 for development work. This microcontroller is av
## Programming with stcgal ## Programming with stcgal
Run `stcgal` as shown below, replacing `stc_rpncalc/main.hex` with the actual path to the main.hex you built. There are also prebuilt binaries in the `binaries` directory. In this example, I'm programming at a relatively high line rate of 230,400 bits/s. This works very reliably, but you may want to try at a slower speed to start (omit the `-b 230400` option). Run `stcgal` as shown below, replacing `stc_rpncalc/main.hex` with the actual path to the main.hex you built. There are also prebuilt binaries in the `binaries` directory. In this example, I'm programming at a relatively high line rate of 230,400 bits/s. This works very reliably, but you may want to try at a slower speed to start (omit the `-b 230400` option), especially when using an inline resistor and diode.
~~~~ ~~~~
$ ./stcgal.py -P stc15 -b 230400 stc_rpncalc/main.hex $ ./stcgal.py -P stc15 -b 230400 stc_rpncalc/main.hex
@ -251,7 +251,7 @@ Disconnected!
# Bugs # Bugs
1. After division by 0, ln(-), over/underflow, or other operations which give an `Error`, it's possible to still do certain operations on `Error`. Many functions do check, and will not operate on `Error`, but not all of them yet. This is somewhat similar to old soviet Elektronika calculators where `Error` is just a number, and there wasn't enough ROM space to check for errors. (There are people who explore the inner-workings of these calculators by manipulating the `Error` "number".) 1. After division by 0, ln(-), over/underflow, or other operations which give an `Error`, it's possible to still do certain operations on `Error`. Many functions do check, and will not operate on `Error`, but not all of them yet. This is somewhat similar to old soviet Elektronika calculators where `Error` is just a number, and there wasn't enough ROM space to check for errors. (There are people who explore the inner-workings of these calculators by manipulating the `Error` "number".)
1. When shifted down, keys which do not have a shifted-down function will instead be interpreted as if there were no shift. 1. When shifted, keys which do not have a shifted function will instead be interpreted as if there were no shift.
1. Trigonometric functions are extremely slow and inaccurate. 1. Trigonometric functions are extremely slow and inaccurate.
1. There are probably more bugs waiting to be discovered. 1. There are probably more bugs waiting to be discovered.
@ -300,7 +300,9 @@ The number `0.135` would be stored the same way, except now the exponent is `0x7
- Exponentials are calculated similar to the HP 35 algorithm, as described [here](http://www.jacques-laporte.org/expx.htm) using the same constants as the logarithm algorithm. - Exponentials are calculated similar to the HP 35 algorithm, as described [here](http://www.jacques-laporte.org/expx.htm) using the same constants as the logarithm algorithm.
- see `src/decn/proto/exp.cpp` for initial prototyping development work - see `src/decn/proto/exp.cpp` for initial prototyping development work
- Powers are calculated using the identity y^x = e^(x*ln(y)) - Powers are calculated using the identity y^x = e^(x*ln(y))
- Square roots are calculated using the identity sqrt(x) = e^(0.5*ln(x)) - Square roots are calculated using a fixed number of Newton-Raphson iterations to calculatie 1/sqrt(x) and then multiplying by x.
- the iteration for 1/sqrt(x) is new_estimate = 0.5*estimate * (3 - x * estimate * estimate)
- see `src/decn/proto/recip_sqrt.cpp for initial prototyping development work
- Trigonometric functions are calculated using algorithms similar to the [sinclair scientific](http://files.righto.com/calculator/sinclair_scientific_simulator.html), and are fairly slow and inaccurate. - Trigonometric functions are calculated using algorithms similar to the [sinclair scientific](http://files.righto.com/calculator/sinclair_scientific_simulator.html), and are fairly slow and inaccurate.
## TODO ## TODO

View File

@ -35,6 +35,7 @@
// #define DEBUG_LOG_ALL //even more verbose // #define DEBUG_LOG_ALL //even more verbose
// #define DEBUG_EXP // #define DEBUG_EXP
// #define DEBUG_EXP_ALL //even more verbose // #define DEBUG_EXP_ALL //even more verbose
// #define DEBUG_SQRT
#ifndef DESKTOP #ifndef DESKTOP
//#undef EXTRA_CHECKS //#undef EXTRA_CHECKS
@ -47,6 +48,7 @@
#undef DEBUG_LOG_ALL #undef DEBUG_LOG_ALL
#undef DEBUG_EXP #undef DEBUG_EXP
#undef DEBUG_EXP_ALL #undef DEBUG_EXP_ALL
#undef DEBUG_SQRT
#endif #endif
#ifdef DESKTOP #ifdef DESKTOP
@ -68,8 +70,8 @@ static const uint8_t num_digits_display = 16;
dec80 AccDecn; dec80 AccDecn;
__idata dec80 BDecn; __idata dec80 BDecn;
__idata dec80 TmpDecn; //used by add_decn() and mult_decn() and sqrt_decn() __idata dec80 TmpDecn; //used by add_decn() and mult_decn() and sqrt_decn()
__idata dec80 Tmp2Decn; //used by recip_decn() and ln_decn() and sincos_decn() and sqrt_decn() __idata dec80 Tmp2Decn; //used by recip_decn(), ln_decn(), exp_decn(), sqrt_decn(), and sincos_decn()
__xdata dec80 Tmp3Decn; //used by recip_decn() and ln_decn() and sincos_decn() and sqrt_decn() __idata dec80 Tmp3Decn; //used by ln_decn(), exp_decn(), sqrt_decn(), and sincos_decn()
__xdata dec80 Tmp4Decn; //used by sincos_decn() __xdata dec80 Tmp4Decn; //used by sincos_decn()
__xdata dec80 TmpStackDecn[4]; __xdata dec80 TmpStackDecn[4];
@ -876,10 +878,10 @@ void recip_decn(void){
CURR_RECIP.lsu[i] = 0; CURR_RECIP.lsu[i] = 0;
} }
copy_decn(&AccDecn, &CURR_RECIP); copy_decn(&AccDecn, &CURR_RECIP);
//do newton raphson iterations //do newton-raphson iterations
for (i = 0; i < 6; i++){ //just fix number of iterations for now for (i = 0; i < 6; i++){ //just fix number of iterations for now
#ifdef DEBUG_DIV #ifdef DEBUG_DIV
decn_to_str_complete(&curr_recip); decn_to_str_complete(&CURR_RECIP);
printf("%2d: %s\n", i, Buf); printf("%2d: %s\n", i, Buf);
#endif #endif
//Accum *= x_copy //Accum *= x_copy
@ -1105,8 +1107,8 @@ void log10_decn(void){
void exp_decn(void){ void exp_decn(void){
uint8_t j, k; uint8_t j, k;
uint8_t need_recip = 0; uint8_t need_recip = 0;
#define SAVED Tmp2Decn #define SAVED Tmp2Decn
#define NUM_TIMES Tmp3Decn #define NUM_TIMES Tmp3Decn
//check not error //check not error
if (decn_is_nan(&AccDecn)){ if (decn_is_nan(&AccDecn)){
@ -1288,6 +1290,144 @@ void pow_decn(void) {
exp_decn(); exp_decn();
} }
#ifdef USE_POW_SQRT_IMPL
void sqrt_decn(void) {
if (decn_is_zero(&AccDecn)) {
return;
}
if (decn_is_nan(&AccDecn)) {
return;
}
if (AccDecn.exponent < 0){ //negative
set_dec80_NaN(&AccDecn);
return;
}
st_push_decn(&BDecn); // sqrt should behave like an unary operation
//b = 0.5
set_dec80_zero(&BDecn);
BDecn.lsu[0] = 5;
pow_decn();
st_pop_decn(&BDecn);
}
#else
void sqrt_decn(void){
#define CURR_EST Tmp2Decn //holds current 1/sqrt(x) estimate
#define X_2 Tmp3Decn //holds copy of original x / 2
uint8_t i;
exp_t initial_exp;
if (decn_is_nan(&AccDecn)) {
return;
}
if (AccDecn.exponent < 0){ //negative
set_dec80_NaN(&AccDecn);
return;
}
//normalize
remove_leading_zeros(&AccDecn);
#ifdef DEBUG_SQRT
decn_to_str_complete(&AccDecn);
printf("sqrt in: %s\n", Buf);
#endif
//store copy of x
st_push_decn(&AccDecn);
//calculate x_orig / 2
set_dec80_zero(&BDecn);
BDecn.lsu[0] = 5;
mult_decn();
copy_decn(&X_2, &AccDecn);
//restore x
st_load_decn(&AccDecn);
//get initial estimate for 1/sqrt(x) == 10^(-0.5 * log(x)):
// approximate significand == 10^(-0.5 * log(x_signif))
// with linear approximation: -0.18 * x_signif + 2.5
// new exponent part is (10^(-0.5 * log(10^x_exp)))
// == 10^(-0.5 * x^exp)
initial_exp = get_exponent(&AccDecn);
set_exponent(&AccDecn, 0, 0); //clear exponent (Acc is not negative)
#ifdef DEBUG_SQRT
printf("sqrt exponent %d ", initial_exp);
#endif
if (initial_exp & 0x1){ //odd
#ifdef DEBUG_SQRT
printf("(odd) ");
#endif
//increment x_exp and
initial_exp++;
//approximate estimated significand as (-0.056*x_signif + 0.79) * 10^0.5
// == -0.18 * x_signif + 2.5
//b = -0.18
BDecn.lsu[0] = 18;
BDecn.exponent = -1; //negative, and exponent = -1
//a = -0.18 * x_signif
mult_decn();
//b = 2.5
BDecn.lsu[0] = 25;
BDecn.exponent = 0;
//a = -0.18 * x_signif + 2.5
add_decn();
} else { //even
//keep x_exp as is and approximate estimated significand as
// -0.056*x_signif + 0.79
//b = -0.056
BDecn.lsu[0] = 56;
set_exponent(&BDecn, -2, 1);
//a = -0.056 * x_signif
mult_decn();
//b = 0.79
BDecn.lsu[0] = 7;
BDecn.lsu[1] = 90;
BDecn.exponent = 0;
//a = -0.056*x_signif + 0.79
add_decn();
}
//est_exp = -x_exp / 2;
initial_exp = -initial_exp / 2;
//est_exp-- if AccDecn exponent is negative
// (AccDecn exponent is either 0 or -1, and AccDecn is positive)
if (AccDecn.exponent != 0){
initial_exp--;
}
set_exponent(&AccDecn, initial_exp, 0); //(initial estimate is never negative)
copy_decn(&CURR_EST, &AccDecn);
#ifdef DEBUG_SQRT
printf(" -> %d\n", initial_exp);
#endif
//do newton-raphson iterations
for (i = 0; i < 6; i++){ //just fix number of iterations for now
#ifdef DEBUG_SQRT
decn_to_str_complete(&CURR_EST);
printf("sqrt %2d: %s\n", i, Buf);
#endif
//accum = est * est;
copy_decn(&BDecn, &AccDecn);
mult_decn();
//accum *= x_orig_2; //accum = x/2 * est * est
copy_decn(&BDecn, &X_2);
mult_decn();
//accum = - x/2 * est * est
negate_decn(&AccDecn);
//b = 3/2
set_dec80_zero(&BDecn);
BDecn.lsu[0] = 15;
//accum = 3/2 - x/2 * est * est
add_decn();
//accum *= est; //accum = 0.5 * est * (3 - x * est * est)
copy_decn(&BDecn, &CURR_EST);
mult_decn();
//est = accum;
copy_decn(&CURR_EST, &AccDecn);
}
//calc sqrt from recip_sqrt
st_pop_decn(&BDecn);
mult_decn();
#undef CURR_EST
#undef X_COPY
}
#endif //USE_POW_SQRT_IMPL
// see W.E. Egbert, "Personal Calculator Algorithms II: Trigonometric functions" // see W.E. Egbert, "Personal Calculator Algorithms II: Trigonometric functions"
void project_decn_into_0_2pi(void) { void project_decn_into_0_2pi(void) {
const uint8_t is_negative = (AccDecn.exponent < 0); const uint8_t is_negative = (AccDecn.exponent < 0);
@ -1446,25 +1586,6 @@ void pi_decn(void) {
mult_decn(); mult_decn();
} }
void sqrt_decn(void) {
if (decn_is_zero(&AccDecn)) {
return;
}
if (decn_is_nan(&AccDecn)) {
return;
}
if (AccDecn.exponent < 0){ //negative
set_dec80_NaN(&AccDecn);
return;
}
st_push_decn(&BDecn); // sqrt should behave like an unary operation
//b = 0.5
set_dec80_zero(&BDecn);
BDecn.lsu[0] = 5;
pow_decn();
st_pop_decn(&BDecn);
}
static void set_str_error(void){ static void set_str_error(void){
Buf[0] = 'E'; Buf[0] = 'E';
Buf[1] = 'r'; Buf[1] = 'r';

View File

@ -341,6 +341,51 @@ TEST_CASE("division"){
); );
} }
static void sqrt_test(const char* x_str, int x_exp)
{
CAPTURE(x_str); CAPTURE(x_exp);
build_dec80(x_str, x_exp);
// decn_to_str_complete(&AccDecn);
// printf(" acc: %s\n", Buf);
sqrt_decn();
decn_to_str_complete(&AccDecn);
CAPTURE(Buf); // sqrt(x)
//calculate actual result
bmp::mpfr_float::default_precision(50);
std::string x_full_str(x_str);
x_full_str += "e" + std::to_string(x_exp);
CAPTURE(x_full_str);
bmp::mpfr_float x_actual(x_full_str);
CAPTURE(x_actual);
if (decn_is_nan(&AccDecn)){
//check that NaN is from result of sqrt(-)
CHECK(x_actual <= 0);
} else if (decn_is_zero(&AccDecn)){
//check actual is also 0
CHECK(x_actual == 0);
} else {
x_actual = sqrt(x_actual);
bmp::mpfr_float calculated(Buf);
bmp::mpfr_float rel_diff = abs((x_actual - calculated) / x_actual);
CHECK(rel_diff < 3e-16); //TODO
}
}
TEST_CASE("sqrt"){
sqrt_test("0", 0);
sqrt_test("2", 0);
sqrt_test("-1", 0);
sqrt_test("0.155", 0);
sqrt_test("10", 0);
sqrt_test("1.1", 10);
sqrt_test("2.02", -10);
sqrt_test("2.02", 0);
sqrt_test("1.5", 0);
sqrt_test("9", 99);
sqrt_test("123", 12345);
}
static void log_test( static void log_test(
//input //input
const char* x_str, int x_exp, const char* x_str, int x_exp,

View File

@ -23,7 +23,7 @@
#include <boost/multiprecision/mpfr.hpp> #include <boost/multiprecision/mpfr.hpp>
using namespace boost::multiprecision; using namespace boost::multiprecision;
// #define DEBUG #define DEBUG
using std::cout; using std::cout;
using std::endl; using std::endl;
@ -51,7 +51,7 @@ int main(void){
//loop through values to test //loop through values to test
#ifdef DEBUG #ifdef DEBUG
mpfr_float x(0.1, CALC_PRECISION); mpfr_float x(2.0, CALC_PRECISION);
{ {
#else #else
for (mpfr_float x(1e-99, CALC_PRECISION); x < 1e99; x *= 1.03){ for (mpfr_float x(1e-99, CALC_PRECISION); x < 1e99; x *= 1.03){
@ -87,7 +87,7 @@ int main(void){
x_exp++; x_exp++;
est_signif = -0.18 * x_signif + 2.5; est_signif = -0.18 * x_signif + 2.5;
} else { //even } else { //even
//keep x_exp as is and approximate estimate significand as //keep x_exp as is and approximate estimated significand as
// -0.056*x_signif + 0.79 // -0.056*x_signif + 0.79
est_signif = -0.056 * x_signif + 0.79; est_signif = -0.056 * x_signif + 0.79;
} }

View File

@ -3,6 +3,8 @@
""" """
Created on Wed Dec 11 00:56:01 2019 Created on Wed Dec 11 00:56:01 2019
Calculate constants used for "0x5F3759DF" reciprocal sqrt
@author: jeffrey @author: jeffrey
""" """

View File

@ -140,7 +140,7 @@ static void latch_on(void)
__xdata char EntryBuf[MAX_CHARS_PER_LINE + 1]; __xdata char EntryBuf[MAX_CHARS_PER_LINE + 1];
__xdata uint8_t ExpBuf[2]; __xdata uint8_t ExpBuf[2];
__code const char VER_STR[32+1] = "STC RPN Calculator v1.11"; __code const char VER_STR[32+1] = "STC RPN Calculator v1.12";
enum { enum {