Next Up Previous Contents
Next: A.4 Profiling
Up: A Example programs
Previous: A.2 fpp.c
[ID index][Keyword index]

A.3 fpdemo.c

The program fpdemo.c is discussed in section Section 2.3.2.2, and shows the effects of catastrophic cancellation.


#include <stdio.h>

/* bytesex.h defines BIGENDIAN=1 (true) or 0 */
#include "bytesex.h"

/*
 * pbits: given an integer i, put the rightmost n bits of the integer 
 * expressed in binary, into the string s.
 */
void pbits (unsigned int i, unsigned int n, char *s)
{
    char *p;
    for (p=s+n-1; p>=s; p--, i>>=1)
        *p = ((i&1) ? '1' : '0');
    return;
}

/* Given a float, return a (static) string holding its bit pattern */
char *print_float (float f)
{
    static char b[35];
    union {
        float f;
        struct {
#if BIGENDIAN
            unsigned int s : 1;
            unsigned int e : 8;
            unsigned int m : 23;
#else
            unsigned int m : 23;
            unsigned int e : 8;
            unsigned int s : 1;
#endif
        } ieee;
    } ieeefloat;

    ieeefloat.f = f;

    pbits (ieeefloat.ieee.s, 1,  b);
    b[1] = ' ';
    pbits (ieeefloat.ieee.e, 8,  &b[2]);
    b[10] = ' ';
    pbits (ieeefloat.ieee.m, 23, &b[11]);
    b[34] = '\0';
    
    return b;
}


main ()
{
    float f, a, b, c;
    double fd, ad, bd, cd;

    printf ("\n  You lose accuracy adding small numbers to large ones...\n");
    /* 2^23+1 works OK... */
    f = 8388608.0;
    printf ("2^23 = %e = %s\n", f, print_float (f));
    f = f + 1.0;
    printf ("  +1 = %e = %s\n", f, print_float (f));
    /* ...but 2^24+1=2^24 - catastrophic loss of precision */
    f = 16777216.0;
    printf ("2^24 = %e = %s\n", f, print_float (f));
    f = f + 1.0;
    printf ("  +1 = %e = %s\n", f, print_float (f));

    printf ("\n\n  Display the dread effects of catastrophic cancellation\n\
  by calculating a*b-a*c, in several different ways.  These are equivalent\n\
  arithmetically, but not computationally.\n");
    /* Now show the effect of catastrophic cancellation by calculating
     * ab-ac, when a, b and c are such that the terms nearly cancel.
     * To generate numbers which best demonstrate the effect,
     * set a = (1+da/2^12), b=(1+db/2^12), etc.  Thus
     * a * b = (1 + db/2^12 + da/2^12 + da.db/2^24).
     * Put da=1.0.  Now pick db=1-, so that final term is just below 1/2^24, 
     * then dc=1+, so that final term is just above 1/2^24.
     * Thus a*b rounds down, and a*c rounds up.
     * Both are quite accurate, but large errors are revealed when
     * they're subtracted.  We can ameliorate this by rewriting the expression.
     */

    /* First, do the calculation in double, to get relative errors. */
    /* The cancellation is ignorable in double */
    ad = 1.0 + ((double)1/4096);
    bd = 1.0 + ((double)0.9/4096);
    cd = 1.0 + ((double)1.1/4096);
    fd = ad*bd-ad*cd;

    a = 1.0 + ((float)1/4096);  /* a=1.000244; */
    b = 1.0 + ((float)0.9/4096); /* b=1.000220; */
    c = 1.0 + ((float)1.1/4096); /* c=1.000269; */

    /* first method - naive */
    f = a*b-a*c;
    printf ("a=%e  b=%e  c=%e\n", a, b, c);
    printf ("a*b-a*c       = %e  (error=%e)\n", f, ((double)f/fd-1.0));

    /* pre-subtract the nearly-equal b and c */
    f = a*(b-c);
    printf ("a*(b-c)       = %e  (%e)\n", f, ((double)f/fd-1.0));

    /* rewrite the expression, to calculate a * ((b-1) - (c-1)).
       Thus b-1 and c-1 have full accuracy */
    b = ((float)0.9/4096);      /* = (above b) -1 */
    c = ((float)1.1/4096);
    f = a*(b-c);

    printf ("a((b-1)-(c-1))= %e  (%e)\n", f, ((double)f/fd-1.0));
    /* Can't do the same trick with a.  If we calculate (a-1)*() + 1*(),
       we don't get any improvement.  We're not carelessly discarding
       accuracy, now - we can't keep any more than this. */

    printf ("\n  ...and further illustrate what's happening by showing\n\
  b and b-1.  Note the extra accuracy in the latter.\n");
    /* Display b and (b-1).  Note extra accuracy in latter. */
    b = 1.0 + ((float)0.9/4096);/* = (above b) */
    printf ("1+1/10000 = %14.7e = %s\n", b, print_float (b));
    b = ((float)0.9/4096);      /* = (above b) -1 */
    printf ("  1/10000 = %14.7e = %s\n", b, print_float (b));;

    printf ("\n\n  Display Infinity and NaN\n");
    /* Display NaNs and Infinity */
    /* Don't just write a=1.0/0.0, since compiler can warn about this */
    a = 0.0;                    /* and log(0) */
    a = 1/a;
    b = 1/a;
    c = a*b;
    printf ("a = 1/0.0 = %14e = %s\n", a, print_float (a));
    printf ("b = 1/a   = %14e = %s\n", b, print_float (b));
    printf ("c = a*b   = %14e = %s\n", c, print_float (c));
}
    


Next Up Previous Contents
Next: A.4 Profiling
Up: A Example programs
Previous: A.2 fpp.c
[ID index][Keyword index]
Theory and Modelling Resources Cookbook
Starlink Cookbook 13
Norman Gray
2 December 2001. Release 2-5. Last updated 10 March 2003