The program fpdemo.c is discussed in
section Section 2.3.2.2, and shows the effects of catastrophic
cancellation.
#include <stdio.h>
/* bytesex.h defines BIGENDIAN=1 (true) or 0 */
#include "bytesex.h"
/*
* pbits: given an integer i, put the rightmost n bits of the integer
* expressed in binary, into the string s.
*/
void pbits (unsigned int i, unsigned int n, char *s)
{
char *p;
for (p=s+n-1; p>=s; p--, i>>=1)
*p = ((i&1) ? '1' : '0');
return;
}
/* Given a float, return a (static) string holding its bit pattern */
char *print_float (float f)
{
static char b[35];
union {
float f;
struct {
#if BIGENDIAN
unsigned int s : 1;
unsigned int e : 8;
unsigned int m : 23;
#else
unsigned int m : 23;
unsigned int e : 8;
unsigned int s : 1;
#endif
} ieee;
} ieeefloat;
ieeefloat.f = f;
pbits (ieeefloat.ieee.s, 1, b);
b[1] = ' ';
pbits (ieeefloat.ieee.e, 8, &b[2]);
b[10] = ' ';
pbits (ieeefloat.ieee.m, 23, &b[11]);
b[34] = '\0';
return b;
}
main ()
{
float f, a, b, c;
double fd, ad, bd, cd;
printf ("\n You lose accuracy adding small numbers to large ones...\n");
/* 2^23+1 works OK... */
f = 8388608.0;
printf ("2^23 = %e = %s\n", f, print_float (f));
f = f + 1.0;
printf (" +1 = %e = %s\n", f, print_float (f));
/* ...but 2^24+1=2^24 - catastrophic loss of precision */
f = 16777216.0;
printf ("2^24 = %e = %s\n", f, print_float (f));
f = f + 1.0;
printf (" +1 = %e = %s\n", f, print_float (f));
printf ("\n\n Display the dread effects of catastrophic cancellation\n\
by calculating a*b-a*c, in several different ways. These are equivalent\n\
arithmetically, but not computationally.\n");
/* Now show the effect of catastrophic cancellation by calculating
* ab-ac, when a, b and c are such that the terms nearly cancel.
* To generate numbers which best demonstrate the effect,
* set a = (1+da/2^12), b=(1+db/2^12), etc. Thus
* a * b = (1 + db/2^12 + da/2^12 + da.db/2^24).
* Put da=1.0. Now pick db=1-, so that final term is just below 1/2^24,
* then dc=1+, so that final term is just above 1/2^24.
* Thus a*b rounds down, and a*c rounds up.
* Both are quite accurate, but large errors are revealed when
* they're subtracted. We can ameliorate this by rewriting the expression.
*/
/* First, do the calculation in double, to get relative errors. */
/* The cancellation is ignorable in double */
ad = 1.0 + ((double)1/4096);
bd = 1.0 + ((double)0.9/4096);
cd = 1.0 + ((double)1.1/4096);
fd = ad*bd-ad*cd;
a = 1.0 + ((float)1/4096); /* a=1.000244; */
b = 1.0 + ((float)0.9/4096); /* b=1.000220; */
c = 1.0 + ((float)1.1/4096); /* c=1.000269; */
/* first method - naive */
f = a*b-a*c;
printf ("a=%e b=%e c=%e\n", a, b, c);
printf ("a*b-a*c = %e (error=%e)\n", f, ((double)f/fd-1.0));
/* pre-subtract the nearly-equal b and c */
f = a*(b-c);
printf ("a*(b-c) = %e (%e)\n", f, ((double)f/fd-1.0));
/* rewrite the expression, to calculate a * ((b-1) - (c-1)).
Thus b-1 and c-1 have full accuracy */
b = ((float)0.9/4096); /* = (above b) -1 */
c = ((float)1.1/4096);
f = a*(b-c);
printf ("a((b-1)-(c-1))= %e (%e)\n", f, ((double)f/fd-1.0));
/* Can't do the same trick with a. If we calculate (a-1)*() + 1*(),
we don't get any improvement. We're not carelessly discarding
accuracy, now - we can't keep any more than this. */
printf ("\n ...and further illustrate what's happening by showing\n\
b and b-1. Note the extra accuracy in the latter.\n");
/* Display b and (b-1). Note extra accuracy in latter. */
b = 1.0 + ((float)0.9/4096);/* = (above b) */
printf ("1+1/10000 = %14.7e = %s\n", b, print_float (b));
b = ((float)0.9/4096); /* = (above b) -1 */
printf (" 1/10000 = %14.7e = %s\n", b, print_float (b));;
printf ("\n\n Display Infinity and NaN\n");
/* Display NaNs and Infinity */
/* Don't just write a=1.0/0.0, since compiler can warn about this */
a = 0.0; /* and log(0) */
a = 1/a;
b = 1/a;
c = a*b;
printf ("a = 1/0.0 = %14e = %s\n", a, print_float (a));
printf ("b = 1/a = %14e = %s\n", b, print_float (b));
printf ("c = a*b = %14e = %s\n", c, print_float (c));
}