import astropy
from astropy.table import Table
import os
from glob import glob
import numpy as np
import george
from george import kernels
import emcee
import corner
import matplotlib.pyplot as plt
%matplotlib inline

The data to train the model comes from the Georgia Tech Waveform catalogue.

headers = ['Index', 'Name', 'tag', '$q$', '$a_{1x}$', '$a_{1y}$', '$a_{1z}$', '$a_{2x}$', '$a_{2y}$', '$a_{2z}$', '$L_x$', '$L_y$', '$L_z$', 'mf', 'af', 'mW']
t = Table.read('/home/daniel/data/gravitational-waves/gt-new/GT_CATALOG_TABLE.txt', format="ascii", names=headers)
def find_data(tag, path = "/home/daniel/data/gravitational-waves/gt-old/"):
    """
    Find the data files which contain the NR data for a given tag.
    """
    result = [y for x in os.walk(path) for y in glob(os.path.join(x[0], '*{}*.asc'.format(tag)))]
    return result

Let’s create a file for the training data set.

columns = ['t', '$q$', '$a_{1x}$', '$a_{1y}$', '$a_{1z}$', '$a_{2x}$', '$a_{2y}$', '$a_{2z}$']
training_x = []
training_y = []
for j,row in enumerate(t.to_pandas().iterrows()):
    waveform_file = find_data(row[1]['tag'])
    if len(waveform_file)!=1:
        continue
    data = np.loadtxt(waveform_file[0])[::5]
    
    hrss = np.sqrt(data[:,1]**2 + data[:,2]**2)
    
    data[:,0] = data[:,0] - data[np.argmax(hrss),0]
    times = data[:,0][hrss.argmax()-460:hrss.argmax() + 40]
    # Use ~250 times from each waveform
    if len(times)==0: continue
    rowdata = np.zeros((len(columns), len(times)))
    for i, col in enumerate(columns):
        if i == 0: 
            rowdata[i,:] = data[:,0][hrss.argmax()-460:hrss.argmax() + 40]
        else:
            rowdata[i,:] = np.tile(row[1][col], len(times))
    training_y.append(data[:,2][hrss.argmax() - 460:hrss.argmax() + 40])
    training_x.append(np.atleast_2d(rowdata))
training_y = np.hstack(training_y)
training_x = np.hstack(training_x)
np.savetxt("GT_training_x.txt", training_x)
np.savetxt("GT_training_y.txt", training_y)

In order to select the “zero” time of the waveform we find the maximum hrss, and define that as $t = 0$.

Let’s do a little further data reduction, and make all of the training_x values scale from 0 to 1 rather than their current ranges.

def normalise(array):
    array = array.copy().T
    dc = array.min(axis=0)
    array -= dc
    scale = array.max(axis=0)
    array /= scale
    return array.T, dc, scale
training_x, x_dc, x_scale = normalise(training_x)
#training_y, y_dc, y_scale = normalise(training_y)

Define the model using a Matern 5/2 kernel.

rng = np.random.RandomState(0)
ixs = rng.randint(len(data), size=1000)
training_x_batch, training_y_batch = training_x[:,ixs].T, training_y[ixs]
k0 =  np.std(training_y_batch)**2
sep = np.array([0.01, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])
sep = np.random.rand(len(sep))
k3 = kernels.Matern52Kernel(sep**2, ndim=len(sep))
kernel = k0 + k3
gp = george.GP(kernel, tol=1e-6, solver=george.HODLRSolver)
gp.compute(training_x_batch, yerr=0.001)

Training

Then train the model using an MCMC process to optimise the evidence of the GP by fitting the hyperparameters of the model.

Uniform priors are assigned to each hyperparameter (because they’re easiest to code-up: any prior could be used). For the log of the constant kernel (k0 in the code) this is $U(-20,\infty)$; for the time scale length $log(l_0) \sim U(0,15)$, and for all the other scale lengths is $U(-15, 15)$.

from scipy.optimize import minimize
def neg_ln_like(p):
    gp.set_vector(p)
    return -gp.lnlikelihood(training_y_batch)

def ln_like(p):
    if np.any(p[2:]>15): return -np.inf
    if np.any(p[2:]<-15): return -np.inf
    #if p[0]<2: return -np.inf
    if p[0]<-20: return -np.inf
    #if -12>p[1]>-8: return -np.inf
    if 0>p[1]>15: return -np.inf
    gp.set_vector(p)
    return gp.lnlikelihood(training_y_batch)

def grad_neg_ln_like(p):
    gp.set_vector(p)
    return gp.grad_lnlikelihood(training_y_batch)

One approach to finding the hyperparameters is to just use the MAP estimate, which we can do with the BFGS algorithm.

# First find the MAP estimate
MAP = minimize(neg_ln_like, gp.get_vector(), method="BFGS", )

However, to better understand the behaviour of the model with respect to its hyperparameters we can sample using an MCMC.

from IPython.core.display import clear_output
def run_sampler(sampler, initial, iterations):
    """
    Run the MCMC sampler for some number of iterations, 
    but output a progress bar so you can keep track of what's going on
    """
    sampler.run_mcmc(initial, 1)
    for iteration in xrange(iterations/10-1):
        sampler.run_mcmc(None, 10)
        clear_output()
        print "{}% \r".format(1000*float(iteration+1) / iterations)
    return sampler

Run 1000 samples for the burn-in

ndim, nwalkers = len(sep)+1, 100
p0 = [np.random.rand(len(sep)+1) for i in range(nwalkers)]
#p0 = [MAP.x for i in range(nwalkers)]
sampler = emcee.EnsembleSampler(nwalkers, ndim, ln_like, threads=4)
burn = run_sampler(sampler, p0, 1000)
#samples = sampler.run_mcmc(p0, 500)
#samples = sampler.chain[:, :, :].reshape((-1, ndim))
 99.0% 

Inspecting the burn-in it looks like the MCMC is converging fairly well for each scale length (however it is worth noting that this is only on a subset of the total training data.

samples = burn.chain[:, :, :].reshape((-1, ndim))
fig = corner.corner(samples, labels=gp.kernel.get_parameter_names() ,lines=np.median(samples, axis=0))
png
def draw_waveform(parameters, start, end):
    """
    Draw a sample waveform from the Gaussian Process.
    """
    times = np.linspace(start, end, 200)
    rowdata = np.zeros((len(parameters), len(times)))
    
    for i, col in enumerate(parameters):
        if i == 0: 
            rowdata[i,:] = times
        else:
            rowdata[i,:] = np.tile(parameters[i], len(times))
    prediction = gp.predict(training_y_batch, rowdata.T)
    return times, prediction

We can see the difference between the MCMC estimate of the hyperparameters and the MAP estimate:

print np.median(samples[900:], axis=0)
print MAP.x
gp.set_vector(np.median(samples, axis=0))
#gp.set_vector(MAP.x)
 [-6.3305358  -7.15205328  1.74945289 -0.80468957  4.03947842  3.37911173
 -3.82367335  0.88604704 -1.89391033]
[ -6.10398307e+00  -8.84487910e-01  -4.77508964e-03  -3.91684510e-01
  -3.00549832e-02  -4.03256862e-01  -7.82583915e-02  -5.46419445e-01
  -1.02489770e+01]

True
parameters = training_x_batch[1,:]
#gp.set_vector(hypers)
test_waveform = draw_waveform(parameters, 0, 1)

A quick sanity check shows that we’re getting something which is similar to the exact waveform, but the sparsity of the training data (and the use of the burnin sample) has probably resulted in the obvious errors in this waveform.

plt.plot(test_waveform[0], test_waveform[1][0])
#plt.plot(training_x_batch[0,:], training_y_batch)
plt.fill_between(test_waveform[0], test_waveform[1][0] - np.diag(test_waveform[1][1]), test_waveform[1][0] + np.diag(test_waveform[1][1]), alpha=0.2)
<matplotlib.collections.PolyCollection at 0x7f3cf89b4310>
png

Make the “production” samples.

sampler = run_sampler(sampler, p0, 2000)
 99.5% 

samples = sampler.chain[:, :, :].reshape((-1, ndim))
fig = corner.corner(samples, labels=gp.kernel.get_parameter_names() ,lines=np.median(samples, axis=0))
png
def gen2plane(col1, col2, intersept = [ 0,  1.5,    0.8,    0.8,   60. ,  180. ,   30. ,   75. ,   22. ], resolution = 100):
    
    pdata = np.zeros((100,100))
    udata = np.zeros((100,100))
    res = resolution
    col1_ax = np.linspace(0,1, res)#cols_axis[col1]
    col2_ax =  np.linspace(0,1, res)#cols_axis[col2]
    #
    col1_loc = cols.index(col1)
    col2_loc = cols.index(col2)
    #
    xv, yv = np.meshgrid(col1_ax, col2_ax, sparse=False, indexing='xy')
    for i in xrange(100):
        for j in xrange(100):
            new_vec = np.copy(intersept)
            new_vec[col1_loc] = xv[i,j]
            new_vec[col2_loc] = yv[i,j]
            # Calculate the spin/mass surface for time = 0.00
            
            pdata[i][j], udata[i][j] = gp.predict(training_y_batch, [new_vec])
    return pdata, udata, [col1_ax.min(), col1_ax.max(), col2_ax.min(), col2_ax.max()]

We can then look at the error behaviour of this model through a hyper-slice in the parameter space.

cols = columns
f, ax = plt.subplots(len(cols), len(cols), figsize = (13,13))
for i in range(0,len(cols)):
    for j in range(0,len(cols)):
        print "Producing plot at {} {}".format(i, j)
        if j<i: 
            ax[j,i].axis('off')
            continue
        elif i == j:
            ax[j,i].axis("off")
            #plt.setp(ax[j,i].get_yticklabels(), visible=True)
            #plt.setp(ax[j,i].get_xticklabels(), visible=False)
            wv = np.array(training_x_batch)
            #pars = [  0,  1.5,    0.8,    0.8,   60. ,  180. ,   30. ,   75. ,   22. ]
            pars = training_x_batch.T[:,0]
            diffs = np.array(wv / wv.max()) - pars/np.array(wv.max())
            ax[j,i].hist2d(wv[:,i], np.sqrt((diffs**2).sum(axis=1)), bins=20, cmap='Greys');
            
        else:
            
            plt.setp(ax[j,i].get_xticklabels(), visible=False, rotation='vertical');
            plt.setp(ax[j,i].get_yticklabels(), visible=False, rotation='vertical');
            pdata, udata, extent = gen2plane(cols[i], cols[j], pars)
            ax[j,i].imshow(udata, extent = extent, aspect = (extent[1] - extent[0]) / (extent[3] - extent[2]), origin='lower')
            ax[j,i].plot(pars[i], pars[j], 'o', c='red')
        
for i,val in enumerate(cols):
    ax[-1,i].set_xlabel(val);
    plt.setp(ax[-1,i].get_xticklabels(), visible=True, rotation='vertical');
    ax[i, 0].set_ylabel(val);
    plt.setp(ax[i, 0].get_yticklabels(), visible=True)
    
plt.savefig("draft.png")
 Producing plot at 0 0
Producing plot at 0 1

 /home/daniel/.virtualenvs/heron/lib/python2.7/site-packages/matplotlib-2.0.0-py2.7-linux-x86_64.egg/matplotlib/axes/_base.py:1292: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
  if aspect == 'normal':
/home/daniel/.virtualenvs/heron/lib/python2.7/site-packages/matplotlib-2.0.0-py2.7-linux-x86_64.egg/matplotlib/axes/_base.py:1297: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
  elif aspect in ('equal', 'auto'):

 Producing plot at 0 2
Producing plot at 0 3
Producing plot at 0 4
Producing plot at 0 5
Producing plot at 0 6
Producing plot at 0 7
Producing plot at 1 0
Producing plot at 1 1
Producing plot at 1 2
Producing plot at 1 3
Producing plot at 1 4
Producing plot at 1 5
Producing plot at 1 6
Producing plot at 1 7
Producing plot at 2 0
Producing plot at 2 1
Producing plot at 2 2
Producing plot at 2 3
Producing plot at 2 4
Producing plot at 2 5
Producing plot at 2 6
Producing plot at 2 7
Producing plot at 3 0
Producing plot at 3 1
Producing plot at 3 2
Producing plot at 3 3
Producing plot at 3 4
Producing plot at 3 5
Producing plot at 3 6
Producing plot at 3 7
Producing plot at 4 0
Producing plot at 4 1
Producing plot at 4 2
Producing plot at 4 3
Producing plot at 4 4
Producing plot at 4 5
Producing plot at 4 6
Producing plot at 4 7
Producing plot at 5 0
Producing plot at 5 1
Producing plot at 5 2
Producing plot at 5 3
Producing plot at 5 4
Producing plot at 5 5
Producing plot at 5 6
Producing plot at 5 7
Producing plot at 6 0
Producing plot at 6 1
Producing plot at 6 2
Producing plot at 6 3
Producing plot at 6 4
Producing plot at 6 5
Producing plot at 6 6
Producing plot at 6 7
Producing plot at 7 0
Producing plot at 7 1
Producing plot at 7 2
Producing plot at 7 3
Producing plot at 7 4
Producing plot at 7 5
Producing plot at 7 6
Producing plot at 7 7

png
spacings = np.exp(gp.get_vector()[1:])
axes = []
for axis in spacings:
    axes.append(np.arange(0, 1, axis))
grid = np.array(np.meshgrid(*axes))
points = grid.T.reshape(-1, 8)
values = []
for i in range(len(points)/10000):
    values += list(gp.predict(training_y_batch, points[10000*i:10000*(i+1)], return_cov=False, return_var=True)[1])#[1][0]

Variance of test points

The Gaussian process has given us a gridding of the parameter space, and from that we can then plot a histogram of the model’s variance at each of those points.

plt.hist(values, log=True, bins=50);
plt.xlabel("Variance")
<matplotlib.text.Text at 0x7f3cdc0ce510>
png

There are clearly a large number of points in the parameter space which have a large variance in the GP model.

We can also look at how this variance behaviour changes as a function of the test points’ distance from the training data.

# TODO Make a plot of the mean distance of each test point 
# from the mass of existing points (i.e. the mean pair-wise distance )
# against the variance at that point
from scipy.spatial import distance
distances = []
for i in range(len(points)/10000):
    distances += list(np.min(distance.cdist(training_x.T[:100], points[10000*i:10000*(i+1)]), axis=0))
plt.figure(figsize=(10,10))
plt.hist2d(distances, values, (50,50))
plt.ylabel("Variance")
plt.xlabel("Minimum distance from training data")
<matplotlib.text.Text at 0x7f3cf831d390>
png