strtod.c - hbase - heirloom base

strtod.c (8562B)
      1 /* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */
      2 #include <stdlib.h>
      3 #include <math.h>
      4 #include <ctype.h>
      5 #include <stdlib.h>
      6 #include <string.h>
      7 #include <errno.h>
      8 #include "plan9.h"
      9 #include "fmt.h"
     10 #include "fmtdef.h"
     11 
     12 static ulong
     13 umuldiv(ulong a, ulong b, ulong c)
     14 {
     15 	double d;
     16 
     17 	d = ((double)a * (double)b) / (double)c;
     18 	if(d >= 4294967295.)
     19 		d = 4294967295.;
     20 	return (ulong)d;
     21 }
     22 
     23 /*
     24  * This routine will convert to arbitrary precision
     25  * floating point entirely in multi-precision fixed.
     26  * The answer is the closest floating point number to
     27  * the given decimal number. Exactly half way are
     28  * rounded ala ieee rules.
     29  * Method is to scale input decimal between .500 and .999...
     30  * with external power of 2, then binary search for the
     31  * closest mantissa to this decimal number.
     32  * Nmant is is the required precision. (53 for ieee dp)
     33  * Nbits is the max number of bits/word. (must be <= 28)
     34  * Prec is calculated - the number of words of fixed mantissa.
     35  */
     36 enum
     37 {
     38 	Nbits	= 28,				/* bits safely represented in a ulong */
     39 	Nmant	= 53,				/* bits of precision required */
     40 	Prec	= (Nmant+Nbits+1)/Nbits,	/* words of Nbits each to represent mantissa */
     41 	Sigbit	= 1<<(Prec*Nbits-Nmant),	/* first significant bit of Prec-th word */
     42 	Ndig	= 1500,
     43 	One	= (ulong)(1<<Nbits),
     44 	Half	= (ulong)(One>>1),
     45 	Maxe	= 310,
     46 
     47 	Fsign	= 1<<0,		/* found - */
     48 	Fesign	= 1<<1,		/* found e- */
     49 	Fdpoint	= 1<<2,		/* found . */
     50 
     51 	S0	= 0,		/* _		_S0	+S1	#S2	.S3 */
     52 	S1,			/* _+		#S2	.S3 */
     53 	S2,			/* _+#		#S2	.S4	eS5 */
     54 	S3,			/* _+.		#S4 */
     55 	S4,			/* _+#.#	#S4	eS5 */
     56 	S5,			/* _+#.#e	+S6	#S7 */
     57 	S6,			/* _+#.#e+	#S7 */
     58 	S7			/* _+#.#e+#	#S7 */
     59 };
     60 
     61 static	int	xcmp(char*, char*);
     62 static	int	fpcmp(char*, ulong*);
     63 static	void	frnorm(ulong*);
     64 static	void	divascii(char*, int*, int*, int*);
     65 static	void	mulascii(char*, int*, int*, int*);
     66 
     67 typedef	struct	Tab	Tab;
     68 struct	Tab
     69 {
     70 	int	bp;
     71 	int	siz;
     72 	char*	cmp;
     73 };
     74 
     75 double
     76 fmtstrtod(const char *as, char **aas)
     77 {
     78 	int na, ex, dp, bp, c, i, flag, state;
     79 	ulong low[Prec], hig[Prec], mid[Prec];
     80 	double d;
     81 	char *s, a[Ndig];
     82 
     83 	flag = 0;	/* Fsign, Fesign, Fdpoint */
     84 	na = 0;		/* number of digits of a[] */
     85 	dp = 0;		/* na of decimal point */
     86 	ex = 0;		/* exonent */
     87 
     88 	state = S0;
     89 	for(s=(char*)as;; s++) {
     90 		c = *s;
     91 		if(c >= '0' && c <= '9') {
     92 			switch(state) {
     93 			case S0:
     94 			case S1:
     95 			case S2:
     96 				state = S2;
     97 				break;
     98 			case S3:
     99 			case S4:
    100 				state = S4;
    101 				break;
    102 
    103 			case S5:
    104 			case S6:
    105 			case S7:
    106 				state = S7;
    107 				ex = ex*10 + (c-'0');
    108 				continue;
    109 			}
    110 			if(na == 0 && c == '0') {
    111 				dp--;
    112 				continue;
    113 			}
    114 			if(na < Ndig-50)
    115 				a[na++] = c;
    116 			continue;
    117 		}
    118 		switch(c) {
    119 		case '\t':
    120 		case '\n':
    121 		case '\v':
    122 		case '\f':
    123 		case '\r':
    124 		case ' ':
    125 			if(state == S0)
    126 				continue;
    127 			break;
    128 		case '-':
    129 			if(state == S0)
    130 				flag |= Fsign;
    131 			else
    132 				flag |= Fesign;
    133 		case '+':
    134 			if(state == S0)
    135 				state = S1;
    136 			else
    137 			if(state == S5)
    138 				state = S6;
    139 			else
    140 				break;	/* syntax */
    141 			continue;
    142 		case '.':
    143 			flag |= Fdpoint;
    144 			dp = na;
    145 			if(state == S0 || state == S1) {
    146 				state = S3;
    147 				continue;
    148 			}
    149 			if(state == S2) {
    150 				state = S4;
    151 				continue;
    152 			}
    153 			break;
    154 		case 'e':
    155 		case 'E':
    156 			if(state == S2 || state == S4) {
    157 				state = S5;
    158 				continue;
    159 			}
    160 			break;
    161 		}
    162 		break;
    163 	}
    164 
    165 	/*
    166 	 * clean up return char-pointer
    167 	 */
    168 	switch(state) {
    169 	case S0:
    170 		if(xcmp(s, "nan") == 0) {
    171 			if(aas != nil)
    172 				*aas = s+3;
    173 			goto retnan;
    174 		}
    175 	case S1:
    176 		if(xcmp(s, "infinity") == 0) {
    177 			if(aas != nil)
    178 				*aas = s+8;
    179 			goto retinf;
    180 		}
    181 		if(xcmp(s, "inf") == 0) {
    182 			if(aas != nil)
    183 				*aas = s+3;
    184 			goto retinf;
    185 		}
    186 	case S3:
    187 		if(aas != nil)
    188 			*aas = (char*)as;
    189 		goto ret0;	/* no digits found */
    190 	case S6:
    191 		s--;		/* back over +- */
    192 	case S5:
    193 		s--;		/* back over e */
    194 		break;
    195 	}
    196 	if(aas != nil)
    197 		*aas = s;
    198 
    199 	if(flag & Fdpoint)
    200 	while(na > 0 && a[na-1] == '0')
    201 		na--;
    202 	if(na == 0)
    203 		goto ret0;	/* zero */
    204 	a[na] = 0;
    205 	if(!(flag & Fdpoint))
    206 		dp = na;
    207 	if(flag & Fesign)
    208 		ex = -ex;
    209 	dp += ex;
    210 	if(dp < -Maxe){
    211 		errno = ERANGE;
    212 		goto ret0;	/* underflow by exp */
    213 	} else
    214 	if(dp > +Maxe)
    215 		goto retinf;	/* overflow by exp */
    216 
    217 	/*
    218 	 * normalize the decimal ascii number
    219 	 * to range .[5-9][0-9]* e0
    220 	 */
    221 	bp = 0;		/* binary exponent */
    222 	while(dp > 0)
    223 		divascii(a, &na, &dp, &bp);
    224 	while(dp < 0 || a[0] < '5')
    225 		mulascii(a, &na, &dp, &bp);
    226 
    227 	/* close approx by naive conversion */
    228 	mid[0] = 0;
    229 	mid[1] = 1;
    230 	for(i=0; (c=a[i]) != '\0'; i++) {
    231 		mid[0] = mid[0]*10 + (c-'0');
    232 		mid[1] = mid[1]*10;
    233 		if(i >= 8)
    234 			break;
    235 	}
    236 	low[0] = umuldiv(mid[0], One, mid[1]);
    237 	hig[0] = umuldiv(mid[0]+1, One, mid[1]);
    238 	for(i=1; i<Prec; i++) {
    239 		low[i] = 0;
    240 		hig[i] = One-1;
    241 	}
    242 
    243 	/* binary search for closest mantissa */
    244 	for(;;) {
    245 		/* mid = (hig + low) / 2 */
    246 		c = 0;
    247 		for(i=0; i<Prec; i++) {
    248 			mid[i] = hig[i] + low[i];
    249 			if(c)
    250 				mid[i] += One;
    251 			c = mid[i] & 1;
    252 			mid[i] >>= 1;
    253 		}
    254 		frnorm(mid);
    255 
    256 		/* compare */
    257 		c = fpcmp(a, mid);
    258 		if(c > 0) {
    259 			c = 1;
    260 			for(i=0; i<Prec; i++)
    261 				if(low[i] != mid[i]) {
    262 					c = 0;
    263 					low[i] = mid[i];
    264 				}
    265 			if(c)
    266 				break;	/* between mid and hig */
    267 			continue;
    268 		}
    269 		if(c < 0) {
    270 			for(i=0; i<Prec; i++)
    271 				hig[i] = mid[i];
    272 			continue;
    273 		}
    274 
    275 		/* only hard part is if even/odd roundings wants to go up */
    276 		c = mid[Prec-1] & (Sigbit-1);
    277 		if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0)
    278 			mid[Prec-1] -= c;
    279 		break;	/* exactly mid */
    280 	}
    281 
    282 	/* normal rounding applies */
    283 	c = mid[Prec-1] & (Sigbit-1);
    284 	mid[Prec-1] -= c;
    285 	if(c >= Sigbit/2) {
    286 		mid[Prec-1] += Sigbit;
    287 		frnorm(mid);
    288 	}
    289 	goto out;
    290 
    291 ret0:
    292 	return 0;
    293 
    294 retnan:
    295 	return __NaN();
    296 
    297 retinf:
    298 	/*
    299 	 * Unix strtod requires these.  Plan 9 would return Inf(0) or Inf(-1). */
    300 	errno = ERANGE;
    301 	if(flag & Fsign)
    302 		return -HUGE_VAL;
    303 	return HUGE_VAL;
    304 
    305 out:
    306 	d = 0;
    307 	for(i=0; i<Prec; i++)
    308 		d = d*One + mid[i];
    309 	if(flag & Fsign)
    310 		d = -d;
    311 	d = ldexp(d, bp - Prec*Nbits);
    312 	if(d == 0){	/* underflow */
    313 		errno = ERANGE;
    314 	}
    315 	return d;
    316 }
    317 
    318 static void
    319 frnorm(ulong *f)
    320 {
    321 	int i, c;
    322 
    323 	c = 0;
    324 	for(i=Prec-1; i>0; i--) {
    325 		f[i] += c;
    326 		c = f[i] >> Nbits;
    327 		f[i] &= One-1;
    328 	}
    329 	f[0] += c;
    330 }
    331 
    332 static int
    333 fpcmp(char *a, ulong* f)
    334 {
    335 	ulong tf[Prec];
    336 	int i, d, c;
    337 
    338 	for(i=0; i<Prec; i++)
    339 		tf[i] = f[i];
    340 
    341 	for(;;) {
    342 		/* tf *= 10 */
    343 		for(i=0; i<Prec; i++)
    344 			tf[i] = tf[i]*10;
    345 		frnorm(tf);
    346 		d = (tf[0] >> Nbits) + '0';
    347 		tf[0] &= One-1;
    348 
    349 		/* compare next digit */
    350 		c = *a;
    351 		if(c == 0) {
    352 			if('0' < d)
    353 				return -1;
    354 			if(tf[0] != 0)
    355 				goto cont;
    356 			for(i=1; i<Prec; i++)
    357 				if(tf[i] != 0)
    358 					goto cont;
    359 			return 0;
    360 		}
    361 		if(c > d)
    362 			return +1;
    363 		if(c < d)
    364 			return -1;
    365 		a++;
    366 	cont:;
    367 	}
    368 }
    369 
    370 static void
    371 divby(char *a, int *na, int b)
    372 {
    373 	int n, c;
    374 	char *p;
    375 
    376 	p = a;
    377 	n = 0;
    378 	while(n>>b == 0) {
    379 		c = *a++;
    380 		if(c == 0) {
    381 			while(n) {
    382 				c = n*10;
    383 				if(c>>b)
    384 					break;
    385 				n = c;
    386 			}
    387 			goto xx;
    388 		}
    389 		n = n*10 + c-'0';
    390 		(*na)--;
    391 	}
    392 	for(;;) {
    393 		c = n>>b;
    394 		n -= c<<b;
    395 		*p++ = c + '0';
    396 		c = *a++;
    397 		if(c == 0)
    398 			break;
    399 		n = n*10 + c-'0';
    400 	}
    401 	(*na)++;
    402 xx:
    403 	while(n) {
    404 		n = n*10;
    405 		c = n>>b;
    406 		n -= c<<b;
    407 		*p++ = c + '0';
    408 		(*na)++;
    409 	}
    410 	*p = 0;
    411 }
    412 
    413 static	Tab	tab1[] =
    414 {
    415 	 1,  0, "",
    416 	 3,  1, "7",
    417 	 6,  2, "63",
    418 	 9,  3, "511",
    419 	13,  4, "8191",
    420 	16,  5, "65535",
    421 	19,  6, "524287",
    422 	23,  7, "8388607",
    423 	26,  8, "67108863",
    424 	27,  9, "134217727",
    425 };
    426 
    427 static void
    428 divascii(char *a, int *na, int *dp, int *bp)
    429 {
    430 	int b, d;
    431 	Tab *t;
    432 
    433 	d = *dp;
    434 	if(d >= (int)(nelem(tab1)))
    435 		d = (int)(nelem(tab1))-1;
    436 	t = tab1 + d;
    437 	b = t->bp;
    438 	if(memcmp(a, t->cmp, t->siz) > 0)
    439 		d--;
    440 	*dp -= d;
    441 	*bp += b;
    442 	divby(a, na, b);
    443 }
    444 
    445 static void
    446 mulby(char *a, char *p, char *q, int b)
    447 {
    448 	int n, c;
    449 
    450 	n = 0;
    451 	*p = 0;
    452 	for(;;) {
    453 		q--;
    454 		if(q < a)
    455 			break;
    456 		c = *q - '0';
    457 		c = (c<<b) + n;
    458 		n = c/10;
    459 		c -= n*10;
    460 		p--;
    461 		*p = c + '0';
    462 	}
    463 	while(n) {
    464 		c = n;
    465 		n = c/10;
    466 		c -= n*10;
    467 		p--;
    468 		*p = c + '0';
    469 	}
    470 }
    471 
    472 static	Tab	tab2[] =
    473 {
    474 	 1,  1, "",				/* dp = 0-0 */
    475 	 3,  3, "125",
    476 	 6,  5, "15625",
    477 	 9,  7, "1953125",
    478 	13, 10, "1220703125",
    479 	16, 12, "152587890625",
    480 	19, 14, "19073486328125",
    481 	23, 17, "11920928955078125",
    482 	26, 19, "1490116119384765625",
    483 	27, 19, "7450580596923828125",		/* dp 8-9 */
    484 };
    485 
    486 static void
    487 mulascii(char *a, int *na, int *dp, int *bp)
    488 {
    489 	char *p;
    490 	int d, b;
    491 	Tab *t;
    492 
    493 	d = -*dp;
    494 	if(d >= (int)(nelem(tab2)))
    495 		d = (int)(nelem(tab2))-1;
    496 	t = tab2 + d;
    497 	b = t->bp;
    498 	if(memcmp(a, t->cmp, t->siz) < 0)
    499 		d--;
    500 	p = a + *na;
    501 	*bp -= b;
    502 	*dp += d;
    503 	*na += d;
    504 	mulby(a, p+d, p, b);
    505 }
    506 
    507 static int
    508 xcmp(char *a, char *b)
    509 {
    510 	int c1, c2;
    511 
    512 	while((c1 = *b++) != '\0') {
    513 		c2 = *a++;
    514 		if(isupper(c2))
    515 			c2 = tolower(c2);
    516 		if(c1 != c2)
    517 			return 1;
    518 	}
    519 	return 0;
    520 }
	hbase heirloom base
	git clone git://git.2f30.org/hbase
	Log \| Files \| Refs \| README