add fast reciprocal sqrt() implementation

2020-09-07 18:18:20 -04:00 · 2020-09-07 18:18:20 -04:00 · 273e2f5742
commit 273e2f5742
parent 5f6a375c06
6 changed files with 202 additions and 32 deletions
--- a/README.md
+++ b/README.md
@ -206,7 +206,7 @@ I sometimes use an STC15F2K60S2 for development work. This microcontroller is av
 ## Programming with stcgal
-Run `stcgal` as shown below, replacing `stc_rpncalc/main.hex` with the actual path to the main.hex you built. There are also prebuilt binaries in the `binaries` directory. In this example, I'm programming at a relatively high line rate of 230,400 bits/s. This works very reliably, but you may want to try at a slower speed to start (omit the `-b 230400` option).
+Run `stcgal` as shown below, replacing `stc_rpncalc/main.hex` with the actual path to the main.hex you built. There are also prebuilt binaries in the `binaries` directory. In this example, I'm programming at a relatively high line rate of 230,400 bits/s. This works very reliably, but you may want to try at a slower speed to start (omit the `-b 230400` option), especially when using an inline resistor and diode.
 ~~~~
 $ ./stcgal.py -P stc15 -b 230400 stc_rpncalc/main.hex
@ -251,7 +251,7 @@ Disconnected!
 # Bugs
 1. After division by 0, ln(-), over/underflow, or other operations which give an `Error`, it's possible to still do certain operations on `Error`. Many functions do check, and will not operate on `Error`, but not all of them yet. This is somewhat similar to old soviet Elektronika calculators where `Error` is just a number, and there wasn't enough ROM space to check for errors. (There are people who explore the inner-workings of these calculators by manipulating the `Error` "number".)
-1. When shifted down, keys which do not have a shifted-down function will instead be interpreted as if there were no shift.
+1. When shifted, keys which do not have a shifted function will instead be interpreted as if there were no shift.
 1. Trigonometric functions are extremely slow and inaccurate.
 1. There are probably more bugs waiting to be discovered.
@ -300,7 +300,9 @@ The number `0.135` would be stored the same way, except now the exponent is `0x7
 - Exponentials are calculated similar to the HP 35 algorithm, as described [here](http://www.jacques-laporte.org/expx.htm) using the same constants as the logarithm algorithm.
 	- see `src/decn/proto/exp.cpp` for initial prototyping development work
 - Powers are calculated using the identity y^x = e^(x*ln(y))
- Square roots are calculated using the identity sqrt(x) = e^(0.5*ln(x))
+- Square roots are calculated using a fixed number of Newton-Raphson iterations to calculatie 1/sqrt(x) and then multiplying by x.
 	- the iteration for 1/sqrt(x) is new_estimate = 0.5*estimate * (3 - x * estimate * estimate)
 	- see `src/decn/proto/recip_sqrt.cpp for initial prototyping development work
 - Trigonometric functions are calculated using algorithms similar to the [sinclair scientific](http://files.righto.com/calculator/sinclair_scientific_simulator.html), and are fairly slow and inaccurate.
 ## TODO
--- a/src/decn/decn.c
+++ b/src/decn/decn.c
@ -35,6 +35,7 @@
 // #define DEBUG_LOG_ALL //even more verbose
 // #define DEBUG_EXP
 // #define DEBUG_EXP_ALL //even more verbose
 // #define DEBUG_SQRT
 #ifndef DESKTOP
 //#undef EXTRA_CHECKS
@ -47,6 +48,7 @@
 #undef DEBUG_LOG_ALL
 #undef DEBUG_EXP
 #undef DEBUG_EXP_ALL
 #undef DEBUG_SQRT
 #endif
 #ifdef DESKTOP
@ -68,8 +70,8 @@ static const uint8_t num_digits_display = 16;
 dec80 AccDecn;
 __idata dec80 BDecn;
 __idata dec80 TmpDecn; //used by add_decn() and mult_decn() and sqrt_decn()
-__idata dec80 Tmp2Decn; //used by recip_decn() and ln_decn() and sincos_decn() and sqrt_decn()
+__idata dec80 Tmp2Decn; //used by recip_decn(), ln_decn(), exp_decn(), sqrt_decn(), and sincos_decn()
-__xdata dec80 Tmp3Decn; //used by recip_decn() and ln_decn() and sincos_decn() and sqrt_decn()
+__idata dec80 Tmp3Decn; //used by ln_decn(), exp_decn(), sqrt_decn(), and sincos_decn()
 __xdata dec80 Tmp4Decn; //used by sincos_decn()
 __xdata dec80 TmpStackDecn[4];
@ -876,10 +878,10 @@ void recip_decn(void){
 		      CURR_RECIP.lsu[i] = 0;
 	}
 	copy_decn(&AccDecn, &CURR_RECIP);
-	//do newton raphson iterations
+	//do newton-raphson iterations
 	for (i = 0; i < 6; i++){ //just fix number of iterations for now
 #ifdef DEBUG_DIV
-		decn_to_str_complete(&curr_recip);
+		decn_to_str_complete(&CURR_RECIP);
 		printf("%2d: %s\n", i, Buf);
 #endif
 		//Accum *= x_copy
@ -1105,8 +1107,8 @@ void log10_decn(void){
 void exp_decn(void){
 	uint8_t j, k;
 	uint8_t need_recip = 0;
-	#define SAVED Tmp2Decn
+#define SAVED Tmp2Decn
-	#define NUM_TIMES Tmp3Decn
+#define NUM_TIMES Tmp3Decn
 	//check not error
 	if (decn_is_nan(&AccDecn)){
@ -1288,6 +1290,144 @@ void pow_decn(void) {
 	exp_decn();
 }
 #ifdef USE_POW_SQRT_IMPL
 void sqrt_decn(void) {
 	if (decn_is_zero(&AccDecn)) {
 		return;
 	}
 	if (decn_is_nan(&AccDecn)) {
 		return;
 	}
 	if (AccDecn.exponent < 0){ //negative
 		set_dec80_NaN(&AccDecn);
 		return;
 	}
 	st_push_decn(&BDecn); // sqrt should behave like an unary operation
 	//b = 0.5
 	set_dec80_zero(&BDecn);
 	BDecn.lsu[0] = 5;
 	pow_decn();
 	st_pop_decn(&BDecn);
 }
 #else
 void sqrt_decn(void){
 #define CURR_EST Tmp2Decn //holds current 1/sqrt(x) estimate
 #define X_2      Tmp3Decn //holds copy of original x / 2
 	uint8_t i;
 	exp_t initial_exp;
 	if (decn_is_nan(&AccDecn)) {
 		return;
 	}
 	if (AccDecn.exponent < 0){ //negative
 		set_dec80_NaN(&AccDecn);
 		return;
 	}
 	//normalize
 	remove_leading_zeros(&AccDecn);
 #ifdef DEBUG_SQRT
 	decn_to_str_complete(&AccDecn);
 	printf("sqrt in: %s\n", Buf);
 #endif
 	//store copy of x
 	st_push_decn(&AccDecn);
 	//calculate x_orig / 2
 	set_dec80_zero(&BDecn);
 	BDecn.lsu[0] = 5;
 	mult_decn();
 	copy_decn(&X_2, &AccDecn);
 	//restore x
 	st_load_decn(&AccDecn);
 	//get initial estimate for 1/sqrt(x) == 10^(-0.5 * log(x)):
 	// approximate significand == 10^(-0.5 * log(x_signif))
 	//  with linear approximation: -0.18 * x_signif + 2.5
 	// new exponent part is (10^(-0.5 * log(10^x_exp)))
 	//                    == 10^(-0.5 * x^exp)
 	initial_exp = get_exponent(&AccDecn);
 	set_exponent(&AccDecn, 0, 0); //clear exponent (Acc is not negative)
 #ifdef DEBUG_SQRT
 	printf("sqrt exponent %d ", initial_exp);
 #endif
 	if (initial_exp & 0x1){ //odd
 #ifdef DEBUG_SQRT
 		printf("(odd) ");
 #endif
 		//increment x_exp and
 		initial_exp++;
 		//approximate estimated significand as (-0.056*x_signif + 0.79) * 10^0.5
 		//                                  == -0.18 * x_signif + 2.5
 		//b = -0.18
 		BDecn.lsu[0] = 18;
 		BDecn.exponent = -1; //negative, and exponent = -1
 		//a = -0.18 * x_signif
 		mult_decn();
 		//b = 2.5
 		BDecn.lsu[0] = 25;
 		BDecn.exponent = 0;
 		//a = -0.18 * x_signif + 2.5
 		add_decn();
 	} else { //even
 		//keep x_exp as is and approximate estimated significand as
 		//                   -0.056*x_signif + 0.79
 		//b = -0.056
 		BDecn.lsu[0] = 56;
 		set_exponent(&BDecn, -2, 1);
 		//a = -0.056 * x_signif
 		mult_decn();
 		//b = 0.79
 		BDecn.lsu[0] = 7;
 		BDecn.lsu[1] = 90;
 		BDecn.exponent = 0;
 		//a = -0.056*x_signif + 0.79
 		add_decn();
 	}
 	//est_exp = -x_exp / 2;
 	initial_exp = -initial_exp / 2;
 	//est_exp-- if AccDecn exponent is negative
 	// (AccDecn exponent is either 0 or -1, and AccDecn is positive)
 	if (AccDecn.exponent != 0){
 		initial_exp--;
 	}
 	set_exponent(&AccDecn, initial_exp, 0); //(initial estimate is never negative)
 	copy_decn(&CURR_EST, &AccDecn);
 #ifdef DEBUG_SQRT
 	printf(" -> %d\n", initial_exp);
 #endif
 	//do newton-raphson iterations
 	for (i = 0; i < 6; i++){ //just fix number of iterations for now
 #ifdef DEBUG_SQRT
 		decn_to_str_complete(&CURR_EST);
 		printf("sqrt %2d: %s\n", i, Buf);
 #endif
 		//accum = est * est;
 		copy_decn(&BDecn, &AccDecn);
 		mult_decn();
 		//accum *= x_orig_2;     //accum = x/2 * est * est
 		copy_decn(&BDecn, &X_2);
 		mult_decn();
 		//accum = - x/2 * est * est
 		negate_decn(&AccDecn);
 		//b = 3/2
 		set_dec80_zero(&BDecn);
 		BDecn.lsu[0] = 15;
 		//accum = 3/2 - x/2 * est * est
 		add_decn();
 		//accum *= est;          //accum = 0.5 * est * (3 - x * est * est)
 		copy_decn(&BDecn, &CURR_EST);
 		mult_decn();
 		//est = accum;
 		copy_decn(&CURR_EST, &AccDecn);
 	}
 	//calc sqrt from recip_sqrt
 	st_pop_decn(&BDecn);
 	mult_decn();
 #undef CURR_EST
 #undef X_COPY
 }
 #endif //USE_POW_SQRT_IMPL
 // see W.E. Egbert, "Personal Calculator Algorithms II: Trigonometric functions"
 void project_decn_into_0_2pi(void) {
 	const uint8_t is_negative = (AccDecn.exponent < 0);
@ -1446,25 +1586,6 @@ void pi_decn(void) {
 	mult_decn();
 }
 void sqrt_decn(void) {
 	if (decn_is_zero(&AccDecn)) {
 		return;
 	}
 	if (decn_is_nan(&AccDecn)) {
 		return;
 	}
 	if (AccDecn.exponent < 0){ //negative
 		set_dec80_NaN(&AccDecn);
 		return;
 	}
 	st_push_decn(&BDecn); // sqrt should behave like an unary operation
 	//b = 0.5
 	set_dec80_zero(&BDecn);
 	BDecn.lsu[0] = 5;
 	pow_decn();
 	st_pop_decn(&BDecn);
 }
 static void set_str_error(void){
 	Buf[0] = 'E';
 	Buf[1] = 'r';
--- a/src/decn/decn_tests.cpp
+++ b/src/decn/decn_tests.cpp
@ -341,6 +341,51 @@ TEST_CASE("division"){
 	);
 }
 static void sqrt_test(const char* x_str, int x_exp)
 {
 	CAPTURE(x_str); CAPTURE(x_exp);
 	build_dec80(x_str, x_exp);
 	// 	decn_to_str_complete(&AccDecn);
 	// 	printf(" acc: %s\n", Buf);
 	sqrt_decn();
 	decn_to_str_complete(&AccDecn);
 	CAPTURE(Buf);  // sqrt(x)
 	//calculate actual result
 	bmp::mpfr_float::default_precision(50);
 	std::string x_full_str(x_str);
 	x_full_str += "e" + std::to_string(x_exp);
 	CAPTURE(x_full_str);
 	bmp::mpfr_float x_actual(x_full_str);
 	CAPTURE(x_actual);
 	if (decn_is_nan(&AccDecn)){
 		//check that NaN is from result of sqrt(-)
 		CHECK(x_actual <= 0);
 	} else if (decn_is_zero(&AccDecn)){
 		//check actual is also 0
 		CHECK(x_actual == 0);
 	} else {
 		x_actual = sqrt(x_actual);
 		bmp::mpfr_float calculated(Buf);
 		bmp::mpfr_float rel_diff = abs((x_actual - calculated) / x_actual);
 		CHECK(rel_diff < 3e-16); //TODO
 	}
 }
 TEST_CASE("sqrt"){
 	sqrt_test("0", 0);
 	sqrt_test("2", 0);
 	sqrt_test("-1", 0);
 	sqrt_test("0.155", 0);
 	sqrt_test("10", 0);
 	sqrt_test("1.1", 10);
 	sqrt_test("2.02", -10);
 	sqrt_test("2.02", 0);
 	sqrt_test("1.5", 0);
 	sqrt_test("9", 99);
 	sqrt_test("123", 12345);
 }
 static void log_test(
 	//input
 	const char* x_str, int x_exp,
--- a/src/decn/proto/recip_sqrt.cpp
+++ b/src/decn/proto/recip_sqrt.cpp
@ -23,7 +23,7 @@
 #include <boost/multiprecision/mpfr.hpp>
 using namespace boost::multiprecision;
-// #define DEBUG
+#define DEBUG
 using std::cout;
 using std::endl;
@ -51,7 +51,7 @@ int main(void){
 	//loop through values to test
 #ifdef DEBUG
-	mpfr_float x(0.1, CALC_PRECISION);
+	mpfr_float x(2.0, CALC_PRECISION);
 	{
 #else
 	for (mpfr_float x(1e-99, CALC_PRECISION); x < 1e99; x *= 1.03){
@ -87,7 +87,7 @@ int main(void){
 			x_exp++;
 			est_signif = -0.18 * x_signif + 2.5;
 		} else { //even
-			//keep x_exp as is and approximate estimate significand as
+			//keep x_exp as is and approximate estimated significand as
 			//                   -0.056*x_signif + 0.79
 			est_signif = -0.056 * x_signif + 0.79;
 		}
--- a/src/decn/proto/recip_sqrt.py
+++ b/src/decn/proto/recip_sqrt.py
@ -3,6 +3,8 @@
 """
 Created on Wed Dec 11 00:56:01 2019
 Calculate constants used for "0x5F3759DF" reciprocal sqrt
@author: jeffrey
 """
--- a/src/main.c
+++ b/src/main.c
@ -140,7 +140,7 @@ static void latch_on(void)
 __xdata char EntryBuf[MAX_CHARS_PER_LINE + 1];
 __xdata uint8_t ExpBuf[2];
-__code const char VER_STR[32+1] = "STC RPN         Calculator v1.11";
+__code const char VER_STR[32+1] = "STC RPN         Calculator v1.12";
 enum {