Calibrating GPs using MCMC¶

[1]:

import os
# Ignore my broken HDF5 install...
os.putenv("HDF5_DISABLE_VERSION_CHECK", '1')

[2]:

import pandas as pd
import numpy as np
import iris

from utils import get_aeronet_data, get_bc_ppe_data
from esem import gp_model
from esem.sampler import MCMCSampler

import os

import matplotlib.pyplot as plt
%matplotlib inline

# GPU = "1"

# os.environ["CUDA_VISIBLE_DEVICES"] = GPU

Read in the parameters and observables¶

[3]:

ppe_params, ppe_aaod = get_bc_ppe_data()

[4]:

# Calculate the global, annual mean AAOD (CIS will automatically apply the weights)
mean_aaod, = ppe_aaod.collapsed(['latitude', 'longitude', 'time'], 'mean')

WARNING:root:Creating guessed bounds as none exist in file
WARNING:root:Creating guessed bounds as none exist in file
WARNING:root:Creating guessed bounds as none exist in file
C:\Users\duncan\miniconda3\envs\climatebench\lib\site-packages\iris\analysis\cartography.py:394: UserWarning: Using DEFAULT_SPHERICAL_EARTH_RADIUS.
  warnings.warn("Using DEFAULT_SPHERICAL_EARTH_RADIUS.")

[5]:

plt.scatter(ppe_params.BCnumber, ppe_params.Wetdep, c=mean_aaod.data)
plt.colorbar()

[5]:

<matplotlib.colorbar.Colorbar at 0x1807b701fd0>

../_images/examples_Calibrating_GPs_using_MCMC_6_1.png

[6]:

from esem.utils import plot_parameter_space

plot_parameter_space(ppe_params, fig_size=(3,6))

c:\users\duncan\pycharmprojects\gcem\esem\utils.py:119: MatplotlibDeprecationWarning: shading='flat' when X and Y have the same dimensions as C is deprecated since 3.3.  Either specify the corners of the quadrilaterals with X and Y, or pass shading='auto', 'nearest' or 'gouraud', or set rcParams['pcolor.shading'].  This will become an error two minor releases later.
  ax.pcolor(X, Y, vals[:, np.newaxis], vmin=0, vmax=1)

../_images/examples_Calibrating_GPs_using_MCMC_7_1.png

[7]:

n_test = 8

X_test, X_train = ppe_params[:n_test], ppe_params[n_test:]
Y_test, Y_train = mean_aaod[:n_test], mean_aaod[n_test:]

Setup and run the models¶

[8]:

model = gp_model(X_train, Y_train)

WARNING: Using default kernel - be sure you understand the assumptions this implies. Consult e.g.  http://www.cs.toronto.edu/~duvenaud/cookbook/ for an excellent description of different kernel choices.

[9]:

model.train()

[10]:

m, v = model.predict(X_test.values)

[11]:

Y_test.data

[11]:

masked_array(data=[0.002822510314728863, 0.002615034735649063,
                   0.002186747212596059, 0.0015112621889264352,
                   0.004456776854431466, 0.0025203727123839117,
                   0.0022378865435589133, 0.003730134076583199],
             mask=[False, False, False, False, False, False, False, False],
       fill_value=1e+20)

[12]:

from esem.utils import validation_plot

validation_plot(Y_test.data.flatten(), m.data.flatten(), v.data.flatten())

Proportion of 'Bad' estimates : 0.00%

../_images/examples_Calibrating_GPs_using_MCMC_14_1.png

[13]:

# Set the objective as one of the test datasets
sampler = MCMCSampler(model, Y_test[0])

[14]:

samples = sampler.sample(n_samples=800, mcmc_kwargs=dict(num_burnin_steps=100) )

Acceptance rate: 0.9262387969566703

[15]:

new_samples = pd.DataFrame(data=samples, columns=ppe_params.columns)
m, _ = model.predict(new_samples.values)
Zs = m.data

[16]:

print("Sample mean: {}".format(Zs.mean()))
print("Sample std dev: {}".format(Zs.std()))

Sample mean: 0.003150297212253644
Sample std dev: 0.0006305448129278108

[17]:

plot_parameter_space(new_samples, target_df=X_test.iloc[0])

c:\users\duncan\pycharmprojects\gcem\esem\utils.py:119: MatplotlibDeprecationWarning: shading='flat' when X and Y have the same dimensions as C is deprecated since 3.3.  Either specify the corners of the quadrilaterals with X and Y, or pass shading='auto', 'nearest' or 'gouraud', or set rcParams['pcolor.shading'].  This will become an error two minor releases later.
  ax.pcolor(X, Y, vals[:, np.newaxis], vmin=0, vmax=1)

../_images/examples_Calibrating_GPs_using_MCMC_19_1.png

[18]:

grr = pd.plotting.scatter_matrix(new_samples, c=Zs, figsize=(12, 12), marker='o',
                                 hist_kwds={'bins': 20}, s=60, alpha=.8, vmin=1e-3, vmax=5e-3)

../_images/examples_Calibrating_GPs_using_MCMC_20_0.png

[19]:

from esem.abc_sampler import ABCSampler, constrain
from esem.utils import get_random_params

[20]:

sampler = ABCSampler(model, Y_test[0])

samples = sampler.sample(n_samples=2000, threshold=0.5)
valid_points = pd.DataFrame(data=samples, columns=ppe_params.columns)

Acceptance rate: 0.33641715727502103

[21]:

m, _ = model.predict(valid_points.values)
Zs = m.data

[22]:

print("Sample mean: {}".format(Zs.mean()))
print("Sample std dev: {}".format(Zs.std()))

Sample mean: 0.002788395703569595
Sample std dev: 0.0003013221458724632

[23]:

plot_parameter_space(valid_points, target_df=X_test.iloc[0])

../_images/examples_Calibrating_GPs_using_MCMC_25_0.png

[24]:

grr = pd.plotting.scatter_matrix(valid_points, c=Zs, figsize=(12, 12), marker='o',
                                 hist_kwds={'bins': 20}, s=60, alpha=.8, vmin=1e-3, vmax=5e-3)

../_images/examples_Calibrating_GPs_using_MCMC_26_0.png

[ ]: