Calibrating GPs using MCMC

[1]:
import os
# Ignore my broken HDF5 install...
os.putenv("HDF5_DISABLE_VERSION_CHECK", '1')
[2]:
import pandas as pd
import numpy as np
import iris

from utils import get_aeronet_data, get_bc_ppe_data
from esem import gp_model
from esem.sampler import MCMCSampler

import os

import matplotlib.pyplot as plt
%matplotlib inline

# GPU = "1"

# os.environ["CUDA_VISIBLE_DEVICES"] = GPU

Read in the parameters and observables

[3]:
ppe_params, ppe_aaod = get_bc_ppe_data()
/Users/watson-parris/miniconda3/envs/gcem/lib/python3.8/site-packages/iris/__init__.py:249: IrisDeprecation: setting the 'Future' property 'netcdf_promote' is deprecated and will be removed in a future release. Please remove code that sets this property.
  warn_deprecated(msg.format(name))
/Users/watson-parris/miniconda3/envs/gcem/lib/python3.8/site-packages/iris/__init__.py:249: IrisDeprecation: setting the 'Future' property 'netcdf_promote' is deprecated and will be removed in a future release. Please remove code that sets this property.
  warn_deprecated(msg.format(name))
[4]:
# Calculate the global, annual mean AAOD (CIS will automatically apply the weights)
mean_aaod, = ppe_aaod.collapsed(['latitude', 'longitude', 'time'], 'mean')
WARNING:root:Creating guessed bounds as none exist in file
WARNING:root:Creating guessed bounds as none exist in file
WARNING:root:Creating guessed bounds as none exist in file
/Users/watson-parris/miniconda3/envs/gcem/lib/python3.8/site-packages/iris/analysis/cartography.py:394: UserWarning: Using DEFAULT_SPHERICAL_EARTH_RADIUS.
  warnings.warn("Using DEFAULT_SPHERICAL_EARTH_RADIUS.")
[5]:
plt.scatter(ppe_params.BCnumber, ppe_params.Wetdep, c=mean_aaod.data)
plt.colorbar()
[5]:
<matplotlib.colorbar.Colorbar at 0x7fc03dea0580>
../_images/examples_Calibrating_GPs_using_MCMC_6_1.png
[6]:
from esem.utils import plot_parameter_space

plot_parameter_space(ppe_params, fig_size=(3,6))
../_images/examples_Calibrating_GPs_using_MCMC_7_0.png
[7]:
n_test = 8

X_test, X_train = ppe_params[:n_test], ppe_params[n_test:]
Y_test, Y_train = mean_aaod[:n_test], mean_aaod[n_test:]

Setup and run the models

[8]:
model = gp_model(X_train, Y_train)
WARNING: Using default kernel - be sure you understand the assumptions this implies. Consult e.g.  http://www.cs.toronto.edu/~duvenaud/cookbook/ for an excellent description of different kernel choices.
[9]:
model.train()
[10]:
m, v = model.predict(X_test.values)
[11]:
Y_test.data
[11]:
masked_array(data=[0.002822510314728863, 0.002615034735649063,
                   0.002186747212596059, 0.0015112621889264352,
                   0.004456776854431466, 0.0025203727123839113,
                   0.0022378865435589133, 0.003730134076583199],
             mask=[False, False, False, False, False, False, False, False],
       fill_value=1e+20)
[12]:
from esem.utils import validation_plot

validation_plot(Y_test.data.flatten(), m.data.flatten(), v.data.flatten())
Proportion of 'Bad' estimates : 0.00%
../_images/examples_Calibrating_GPs_using_MCMC_14_1.png
[13]:
# Set the objective as one of the test datasets
sampler = MCMCSampler(model, Y_test[0])
[14]:
samples = sampler.sample(n_samples=8000, mcmc_kwargs=dict(num_burnin_steps=1000) )
Acceptance rate: 0.9026122150748647
[15]:
new_samples = pd.DataFrame(data=samples, columns=ppe_params.columns)
m, _ = model.predict(new_samples.values)
Zs = m.data
[16]:
print("Sample mean: {}".format(Zs.mean()))
print("Sample std dev: {}".format(Zs.std()))
Sample mean: 0.0026673169026350933
Sample std dev: 0.000807943471868746
[17]:
plot_parameter_space(new_samples, target_df=X_test.iloc[0])
../_images/examples_Calibrating_GPs_using_MCMC_19_0.png
[18]:
grr = pd.plotting.scatter_matrix(new_samples, c=Zs, figsize=(12, 12), marker='o',
                                 hist_kwds={'bins': 20}, s=60, alpha=.8, vmin=1e-3, vmax=5e-3)
../_images/examples_Calibrating_GPs_using_MCMC_20_0.png
[19]:
from esem.abc_sampler import ABCSampler, constrain
from esem.utils import get_random_params
[20]:
sampler = ABCSampler(model, Y_test[0])

samples = sampler.sample(n_samples=2000, threshold=0.5)
valid_points = pd.DataFrame(data=samples, columns=ppe_params.columns)
Acceptance rate: 0.34405642525374164
[21]:
m, _ = model.predict(valid_points.values)
Zs = m.data
[22]:
print("Sample mean: {}".format(Zs.mean()))
print("Sample std dev: {}".format(Zs.std()))
Sample mean: 0.0028015036302732293
Sample std dev: 0.00030028439418992273
[23]:
plot_parameter_space(valid_points, target_df=X_test.iloc[0])
../_images/examples_Calibrating_GPs_using_MCMC_25_0.png
[24]:
grr = pd.plotting.scatter_matrix(valid_points, c=Zs, figsize=(12, 12), marker='o',
                                 hist_kwds={'bins': 20}, s=60, alpha=.8, vmin=1e-3, vmax=5e-3)
../_images/examples_Calibrating_GPs_using_MCMC_26_0.png
[ ]: