Calibrating GPs using MCMC¶

[1]:

import os
# Ignore my broken HDF5 install...
os.putenv("HDF5_DISABLE_VERSION_CHECK", '1')

[2]:

import pandas as pd
import numpy as np
import iris

from utils import get_aeronet_data, get_bc_ppe_data
from esem import gp_model
from esem.sampler import MCMCSampler

import os

import matplotlib.pyplot as plt
%matplotlib inline

# GPU = "1"

# os.environ["CUDA_VISIBLE_DEVICES"] = GPU

Read in the parameters and observables¶

[3]:

ppe_params, ppe_aaod = get_bc_ppe_data()

/Users/watson-parris/miniconda3/envs/gcem/lib/python3.8/site-packages/iris/__init__.py:249: IrisDeprecation: setting the 'Future' property 'netcdf_promote' is deprecated and will be removed in a future release. Please remove code that sets this property.
  warn_deprecated(msg.format(name))
/Users/watson-parris/miniconda3/envs/gcem/lib/python3.8/site-packages/iris/__init__.py:249: IrisDeprecation: setting the 'Future' property 'netcdf_promote' is deprecated and will be removed in a future release. Please remove code that sets this property.
  warn_deprecated(msg.format(name))

[4]:

# Calculate the global, annual mean AAOD (CIS will automatically apply the weights)
mean_aaod, = ppe_aaod.collapsed(['latitude', 'longitude', 'time'], 'mean')

WARNING:root:Creating guessed bounds as none exist in file
WARNING:root:Creating guessed bounds as none exist in file
WARNING:root:Creating guessed bounds as none exist in file
/Users/watson-parris/miniconda3/envs/gcem/lib/python3.8/site-packages/iris/analysis/cartography.py:394: UserWarning: Using DEFAULT_SPHERICAL_EARTH_RADIUS.
  warnings.warn("Using DEFAULT_SPHERICAL_EARTH_RADIUS.")

[5]:

plt.scatter(ppe_params.BCnumber, ppe_params.Wetdep, c=mean_aaod.data)
plt.colorbar()

[5]:

<matplotlib.colorbar.Colorbar at 0x7fc03dea0580>

../_images/examples_Calibrating_GPs_using_MCMC_6_1.png

[6]:

from esem.utils import plot_parameter_space

plot_parameter_space(ppe_params, fig_size=(3,6))

../_images/examples_Calibrating_GPs_using_MCMC_7_0.png

[7]:

n_test = 8

X_test, X_train = ppe_params[:n_test], ppe_params[n_test:]
Y_test, Y_train = mean_aaod[:n_test], mean_aaod[n_test:]

Setup and run the models¶

[8]:

model = gp_model(X_train, Y_train)

WARNING: Using default kernel - be sure you understand the assumptions this implies. Consult e.g.  http://www.cs.toronto.edu/~duvenaud/cookbook/ for an excellent description of different kernel choices.

[9]:

model.train()

[10]:

m, v = model.predict(X_test.values)

[11]:

Y_test.data

[11]:

masked_array(data=[0.002822510314728863, 0.002615034735649063,
                   0.002186747212596059, 0.0015112621889264352,
                   0.004456776854431466, 0.0025203727123839113,
                   0.0022378865435589133, 0.003730134076583199],
             mask=[False, False, False, False, False, False, False, False],
       fill_value=1e+20)

[12]:

from esem.utils import validation_plot

validation_plot(Y_test.data.flatten(), m.data.flatten(), v.data.flatten())

Proportion of 'Bad' estimates : 0.00%

../_images/examples_Calibrating_GPs_using_MCMC_14_1.png

[13]:

# Set the objective as one of the test datasets
sampler = MCMCSampler(model, Y_test[0])

[14]:

samples = sampler.sample(n_samples=8000, mcmc_kwargs=dict(num_burnin_steps=1000) )

Acceptance rate: 0.9026122150748647

[15]:

new_samples = pd.DataFrame(data=samples, columns=ppe_params.columns)
m, _ = model.predict(new_samples.values)
Zs = m.data

[16]:

print("Sample mean: {}".format(Zs.mean()))
print("Sample std dev: {}".format(Zs.std()))

Sample mean: 0.0026673169026350933
Sample std dev: 0.000807943471868746

[17]:

plot_parameter_space(new_samples, target_df=X_test.iloc[0])

../_images/examples_Calibrating_GPs_using_MCMC_19_0.png

[18]:

grr = pd.plotting.scatter_matrix(new_samples, c=Zs, figsize=(12, 12), marker='o',
                                 hist_kwds={'bins': 20}, s=60, alpha=.8, vmin=1e-3, vmax=5e-3)

../_images/examples_Calibrating_GPs_using_MCMC_20_0.png

[19]:

from esem.abc_sampler import ABCSampler, constrain
from esem.utils import get_random_params

[20]:

sampler = ABCSampler(model, Y_test[0])

samples = sampler.sample(n_samples=2000, threshold=0.5)
valid_points = pd.DataFrame(data=samples, columns=ppe_params.columns)

Acceptance rate: 0.34405642525374164

[21]:

m, _ = model.predict(valid_points.values)
Zs = m.data

[22]:

print("Sample mean: {}".format(Zs.mean()))
print("Sample std dev: {}".format(Zs.std()))

Sample mean: 0.0028015036302732293
Sample std dev: 0.00030028439418992273

[23]:

plot_parameter_space(valid_points, target_df=X_test.iloc[0])

../_images/examples_Calibrating_GPs_using_MCMC_25_0.png

[24]:

grr = pd.plotting.scatter_matrix(valid_points, c=Zs, figsize=(12, 12), marker='o',
                                 hist_kwds={'bins': 20}, s=60, alpha=.8, vmin=1e-3, vmax=5e-3)

../_images/examples_Calibrating_GPs_using_MCMC_26_0.png

[ ]: