add fast reciprocal sqrt() implementation

2020-09-07 18:18:20 -04:00 · 2020-09-07 18:18:20 -04:00 · 273e2f5742
commit 273e2f5742
parent 5f6a375c06
6 changed files with 202 additions and 32 deletions
--- a/README.md
+++ b/README.md
@ -206,7 +206,7 @@ I sometimes use an STC15F2K60S2 for development work. This microcontroller is av

 ## Programming with stcgal

-Run `stcgal` as shown below, replacing `stc_rpncalc/main.hex` with the actual path to the main.hex you built. There are also prebuilt binaries in the `binaries` directory. In this example, I'm programming at a relatively high line rate of 230,400 bits/s. This works very reliably, but you may want to try at a slower speed to start (omit the `-b 230400` option).
+Run `stcgal` as shown below, replacing `stc_rpncalc/main.hex` with the actual path to the main.hex you built. There are also prebuilt binaries in the `binaries` directory. In this example, I'm programming at a relatively high line rate of 230,400 bits/s. This works very reliably, but you may want to try at a slower speed to start (omit the `-b 230400` option), especially when using an inline resistor and diode.

 ~~~~
 $ ./stcgal.py -P stc15 -b 230400 stc_rpncalc/main.hex
@ -251,7 +251,7 @@ Disconnected!

 # Bugs
 1. After division by 0, ln(-), over/underflow, or other operations which give an `Error`, it's possible to still do certain operations on `Error`. Many functions do check, and will not operate on `Error`, but not all of them yet. This is somewhat similar to old soviet Elektronika calculators where `Error` is just a number, and there wasn't enough ROM space to check for errors. (There are people who explore the inner-workings of these calculators by manipulating the `Error` "number".)
-1. When shifted down, keys which do not have a shifted-down function will instead be interpreted as if there were no shift.
+1. When shifted, keys which do not have a shifted function will instead be interpreted as if there were no shift.
 1. Trigonometric functions are extremely slow and inaccurate.
 1. There are probably more bugs waiting to be discovered.

@ -300,7 +300,9 @@ The number `0.135` would be stored the same way, except now the exponent is `0x7
 - Exponentials are calculated similar to the HP 35 algorithm, as described [here](http://www.jacques-laporte.org/expx.htm) using the same constants as the logarithm algorithm.
 	- see `src/decn/proto/exp.cpp` for initial prototyping development work
 - Powers are calculated using the identity y^x = e^(x*ln(y))
- Square roots are calculated using the identity sqrt(x) = e^(0.5*ln(x))
+- Square roots are calculated using a fixed number of Newton-Raphson iterations to calculatie 1/sqrt(x) and then multiplying by x.
+	- the iteration for 1/sqrt(x) is new_estimate = 0.5*estimate * (3 - x * estimate * estimate)
+	- see `src/decn/proto/recip_sqrt.cpp for initial prototyping development work
 - Trigonometric functions are calculated using algorithms similar to the [sinclair scientific](http://files.righto.com/calculator/sinclair_scientific_simulator.html), and are fairly slow and inaccurate.

 ## TODO
--- a/src/decn/decn.c
+++ b/src/decn/decn.c
@ -35,6 +35,7 @@
 // #define DEBUG_LOG_ALL //even more verbose
 // #define DEBUG_EXP
 // #define DEBUG_EXP_ALL //even more verbose
+// #define DEBUG_SQRT

 #ifndef DESKTOP
 //#undef EXTRA_CHECKS
@ -47,6 +48,7 @@
 #undef DEBUG_LOG_ALL
 #undef DEBUG_EXP
 #undef DEBUG_EXP_ALL
+#undef DEBUG_SQRT
 #endif

 #ifdef DESKTOP
@ -68,8 +70,8 @@ static const uint8_t num_digits_display = 16;
 dec80 AccDecn;
 __idata dec80 BDecn;
 __idata dec80 TmpDecn; //used by add_decn() and mult_decn() and sqrt_decn()
-__idata dec80 Tmp2Decn; //used by recip_decn() and ln_decn() and sincos_decn() and sqrt_decn()
-__xdata dec80 Tmp3Decn; //used by recip_decn() and ln_decn() and sincos_decn() and sqrt_decn()
+__idata dec80 Tmp2Decn; //used by recip_decn(), ln_decn(), exp_decn(), sqrt_decn(), and sincos_decn()
+__idata dec80 Tmp3Decn; //used by ln_decn(), exp_decn(), sqrt_decn(), and sincos_decn()
 __xdata dec80 Tmp4Decn; //used by sincos_decn()

 __xdata dec80 TmpStackDecn[4];
@ -876,10 +878,10 @@ void recip_decn(void){
 		      CURR_RECIP.lsu[i] = 0;
 	}
 	copy_decn(&AccDecn, &CURR_RECIP);
-	//do newton raphson iterations
+	//do newton-raphson iterations
 	for (i = 0; i < 6; i++){ //just fix number of iterations for now
 #ifdef DEBUG_DIV
-		decn_to_str_complete(&curr_recip);
+		decn_to_str_complete(&CURR_RECIP);
 		printf("%2d: %s\n", i, Buf);
 #endif
 		//Accum *= x_copy
@ -1288,6 +1290,144 @@ void pow_decn(void) {
 	exp_decn();
 }

+#ifdef USE_POW_SQRT_IMPL
+void sqrt_decn(void) {
+	if (decn_is_zero(&AccDecn)) {
+		return;
+	}
+	if (decn_is_nan(&AccDecn)) {
+		return;
+	}
+	if (AccDecn.exponent < 0){ //negative
+		set_dec80_NaN(&AccDecn);
+		return;
+	}
+	st_push_decn(&BDecn); // sqrt should behave like an unary operation
+	//b = 0.5
+	set_dec80_zero(&BDecn);
+	BDecn.lsu[0] = 5;
+	pow_decn();
+	st_pop_decn(&BDecn);
+}
+#else
+void sqrt_decn(void){
+#define CURR_EST Tmp2Decn //holds current 1/sqrt(x) estimate
+#define X_2      Tmp3Decn //holds copy of original x / 2
+	uint8_t i;
+	exp_t initial_exp;
+	if (decn_is_nan(&AccDecn)) {
+		return;
+	}
+	if (AccDecn.exponent < 0){ //negative
+		set_dec80_NaN(&AccDecn);
+		return;
+	}
+	//normalize
+	remove_leading_zeros(&AccDecn);
+#ifdef DEBUG_SQRT
+	decn_to_str_complete(&AccDecn);
+	printf("sqrt in: %s\n", Buf);
+#endif
+	//store copy of x
+	st_push_decn(&AccDecn);
+	//calculate x_orig / 2
+	set_dec80_zero(&BDecn);
+	BDecn.lsu[0] = 5;
+	mult_decn();
+	copy_decn(&X_2, &AccDecn);
+	//restore x
+	st_load_decn(&AccDecn);
+	//get initial estimate for 1/sqrt(x) == 10^(-0.5 * log(x)):
+	// approximate significand == 10^(-0.5 * log(x_signif))
+	//  with linear approximation: -0.18 * x_signif + 2.5
+	// new exponent part is (10^(-0.5 * log(10^x_exp)))
+	//                    == 10^(-0.5 * x^exp)
+	initial_exp = get_exponent(&AccDecn);
+	set_exponent(&AccDecn, 0, 0); //clear exponent (Acc is not negative)
+#ifdef DEBUG_SQRT
+	printf("sqrt exponent %d ", initial_exp);
+#endif
+	if (initial_exp & 0x1){ //odd
+#ifdef DEBUG_SQRT
+		printf("(odd) ");
+#endif
+		//increment x_exp and
+		initial_exp++;
+		//approximate estimated significand as (-0.056*x_signif + 0.79) * 10^0.5
+		//                                  == -0.18 * x_signif + 2.5
+		//b = -0.18
+		BDecn.lsu[0] = 18;
+		BDecn.exponent = -1; //negative, and exponent = -1
+		//a = -0.18 * x_signif
+		mult_decn();
+		//b = 2.5
+		BDecn.lsu[0] = 25;
+		BDecn.exponent = 0;
+		//a = -0.18 * x_signif + 2.5
+		add_decn();
+	} else { //even
+		//keep x_exp as is and approximate estimated significand as
+		//                   -0.056*x_signif + 0.79
+		//b = -0.056
+		BDecn.lsu[0] = 56;
+		set_exponent(&BDecn, -2, 1);
+		//a = -0.056 * x_signif
+		mult_decn();
+		//b = 0.79
+		BDecn.lsu[0] = 7;
+		BDecn.lsu[1] = 90;
+		BDecn.exponent = 0;
+		//a = -0.056*x_signif + 0.79
+		add_decn();
+	}
+	//est_exp = -x_exp / 2;
+	initial_exp = -initial_exp / 2;
+	//est_exp-- if AccDecn exponent is negative
+	// (AccDecn exponent is either 0 or -1, and AccDecn is positive)
+	if (AccDecn.exponent != 0){
+		initial_exp--;
+	}
+	set_exponent(&AccDecn, initial_exp, 0); //(initial estimate is never negative)
+	copy_decn(&CURR_EST, &AccDecn);
+#ifdef DEBUG_SQRT
+	printf(" -> %d\n", initial_exp);
+#endif
+	//do newton-raphson iterations
+	for (i = 0; i < 6; i++){ //just fix number of iterations for now
+#ifdef DEBUG_SQRT
+		decn_to_str_complete(&CURR_EST);
+		printf("sqrt %2d: %s\n", i, Buf);
+#endif
+		//accum = est * est;
+		copy_decn(&BDecn, &AccDecn);
+		mult_decn();
+		//accum *= x_orig_2;     //accum = x/2 * est * est
+		copy_decn(&BDecn, &X_2);
+		mult_decn();
+		//accum = - x/2 * est * est
+		negate_decn(&AccDecn);
+		//b = 3/2
+		set_dec80_zero(&BDecn);
+		BDecn.lsu[0] = 15;
+		//accum = 3/2 - x/2 * est * est
+		add_decn();
+		//accum *= est;          //accum = 0.5 * est * (3 - x * est * est)
+		copy_decn(&BDecn, &CURR_EST);
+		mult_decn();
+		//est = accum;
+		copy_decn(&CURR_EST, &AccDecn);
+	}
+
+	//calc sqrt from recip_sqrt
+	st_pop_decn(&BDecn);
+	mult_decn();
+
+#undef CURR_EST
+#undef X_COPY
+}
+#endif //USE_POW_SQRT_IMPL
+
+
 // see W.E. Egbert, "Personal Calculator Algorithms II: Trigonometric functions"
 void project_decn_into_0_2pi(void) {
 	const uint8_t is_negative = (AccDecn.exponent < 0);
@ -1446,25 +1586,6 @@ void pi_decn(void) {
 	mult_decn();
 }

-void sqrt_decn(void) {
-	if (decn_is_zero(&AccDecn)) {
-		return;
-	}
-	if (decn_is_nan(&AccDecn)) {
-		return;
-	}
-	if (AccDecn.exponent < 0){ //negative
-		set_dec80_NaN(&AccDecn);
-		return;
-	}
-	st_push_decn(&BDecn); // sqrt should behave like an unary operation
-	//b = 0.5
-	set_dec80_zero(&BDecn);
-	BDecn.lsu[0] = 5;
-	pow_decn();
-	st_pop_decn(&BDecn);
-}
-
 static void set_str_error(void){
 	Buf[0] = 'E';
 	Buf[1] = 'r';
--- a/src/decn/decn_tests.cpp
+++ b/src/decn/decn_tests.cpp
@ -341,6 +341,51 @@ TEST_CASE("division"){
 	);
 }

+static void sqrt_test(const char* x_str, int x_exp)
+{
+	CAPTURE(x_str); CAPTURE(x_exp);
+	build_dec80(x_str, x_exp);
+	// 	decn_to_str_complete(&AccDecn);
+	// 	printf(" acc: %s\n", Buf);
+	sqrt_decn();
+	decn_to_str_complete(&AccDecn);
+	CAPTURE(Buf);  // sqrt(x)
+
+	//calculate actual result
+	bmp::mpfr_float::default_precision(50);
+	std::string x_full_str(x_str);
+	x_full_str += "e" + std::to_string(x_exp);
+	CAPTURE(x_full_str);
+	bmp::mpfr_float x_actual(x_full_str);
+	CAPTURE(x_actual);
+	if (decn_is_nan(&AccDecn)){
+		//check that NaN is from result of sqrt(-)
+		CHECK(x_actual <= 0);
+	} else if (decn_is_zero(&AccDecn)){
+		//check actual is also 0
+		CHECK(x_actual == 0);
+	} else {
+		x_actual = sqrt(x_actual);
+		bmp::mpfr_float calculated(Buf);
+		bmp::mpfr_float rel_diff = abs((x_actual - calculated) / x_actual);
+		CHECK(rel_diff < 3e-16); //TODO
+	}
+}
+
+TEST_CASE("sqrt"){
+	sqrt_test("0", 0);
+	sqrt_test("2", 0);
+	sqrt_test("-1", 0);
+	sqrt_test("0.155", 0);
+	sqrt_test("10", 0);
+	sqrt_test("1.1", 10);
+	sqrt_test("2.02", -10);
+	sqrt_test("2.02", 0);
+	sqrt_test("1.5", 0);
+	sqrt_test("9", 99);
+	sqrt_test("123", 12345);
+}
+
 static void log_test(
 	//input
 	const char* x_str, int x_exp,
--- a/src/decn/proto/recip_sqrt.cpp
+++ b/src/decn/proto/recip_sqrt.cpp
@ -23,7 +23,7 @@
 #include <boost/multiprecision/mpfr.hpp>
 using namespace boost::multiprecision;

-// #define DEBUG
+#define DEBUG

 using std::cout;
 using std::endl;
@ -51,7 +51,7 @@ int main(void){

 	//loop through values to test
 #ifdef DEBUG
-	mpfr_float x(0.1, CALC_PRECISION);
+	mpfr_float x(2.0, CALC_PRECISION);
 	{
 #else
 	for (mpfr_float x(1e-99, CALC_PRECISION); x < 1e99; x *= 1.03){
@ -87,7 +87,7 @@ int main(void){
 			x_exp++;
 			est_signif = -0.18 * x_signif + 2.5;
 		} else { //even
-			//keep x_exp as is and approximate estimate significand as
+			//keep x_exp as is and approximate estimated significand as
 			//                   -0.056*x_signif + 0.79
 			est_signif = -0.056 * x_signif + 0.79;
 		}
--- a/src/decn/proto/recip_sqrt.py
+++ b/src/decn/proto/recip_sqrt.py
@ -3,6 +3,8 @@
 """
 Created on Wed Dec 11 00:56:01 2019

+Calculate constants used for "0x5F3759DF" reciprocal sqrt
+
@author: jeffrey
 """

--- a/src/main.c
+++ b/src/main.c
@ -140,7 +140,7 @@ static void latch_on(void)

 __xdata char EntryBuf[MAX_CHARS_PER_LINE + 1];
 __xdata uint8_t ExpBuf[2];
-__code const char VER_STR[32+1] = "STC RPN         Calculator v1.11";
+__code const char VER_STR[32+1] = "STC RPN         Calculator v1.12";


 enum {