Generate EC Data

Generate EC Data#

This notebook uses the original, global mean annual ‘tas’ temperature timeseries from each of the 18 models in our paleomodel ensemble.

It returns as output two files:

  • ‘ec_data.csv’ - a dataset of temperature variability metrics and corresponding ECS values

  • ‘ts_rv.csv’ - the original temperature timeseries, but altered such that the volcanic forcing profile is regressed against and what’s left is the residuals.

Last updated: August 3, 2023

import the necessary libraries

from adjustText import adjust_text
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import signal
from scipy.stats import linregress
import matplotlib as mpl
from statsmodels.api import tsa
import statsmodels.api as sm
import xarray as xr

plot styling

mpl.rcParams['figure.facecolor'] = 'white'
mpl.rcParams['figure.dpi']= 150
mpl.rcParams['font.size'] = 12

define functions used for generating the timeseries

def get_forcing(saod):
    '''
    This function converts sulfuric aerosol optical depth (SAOD) into radiative forcing.
    
    input: a timeseries of SAOD (numpy array) [List]
    output: a forcing profile (in W/m^2) [List]
    
    *Formula taken from Marshall et al. (2020): https://doi.org/10.1029/2020GL090241
    '''
    return np.multiply(-20.7, np.subtract(1, np.exp(np.multiply(-1, saod))))

def remove_volcano(timeseries, temp_anomaly):
    '''
    The purpose of this function is to "regress out" biased volcanic influence in the model

    input: timeseries, python list timeseries of temperatures from the model [List]
           temp_anomaly, a python list of the temperature anomalies induced by volcanic forcing [List]
    output: temperature residuals with volcanic forcing 'regressed out' [List]
    '''
    X = temp_anomaly # independent variable
    y = timeseries   # dependent variable

    # to get intercept -- this is optional
    X = sm.add_constant(X)

    # fit the regression model
    reg = sm.OLS(y, X).fit()
    
    return reg.resid.values

# take a spatial average
def weighted_mean(da):
    '''
    This function takes a mean.
    
    input: a data field from GCM output [DataArray]
    output: a spatially-weighted mean field [DataArray]
    '''
    # make 2d array of weights in case that lat is 1d
    if len(da.lat.shape)==2:
        weights=np.cos(np.deg2rad(da.lat))
    elif len(da.lat.shape)==1:
        weights = xr.ones_like(da)* (np.cos(np.deg2rad((da.lat))).values)
    
    # turn weights into nan where da is nan
    weights = weights*da/da
    
    if 'lat' in da.dims:
        wm = (da*weights).sum(dim=['lat'], skipna=True) / weights.sum(dim=['lat'], skipna=True)
    elif 'i' in da.dims:
        wm = (da*weights).sum(dim=['i','j'], skipna=True) / weights.sum(dim=['i','j'], skipna=True)
    elif 'nlat' in da.dims:
        wm = (da*weights).sum(dim=['nlat','nlon'], skipna=True) / weights.sum(dim=['nlat','nlon'], skipna=True)
    elif 'x' in da.dims:
        wm = (da*weights).sum(dim=['x','y'], skipna=True) / weights.sum(dim=['x','y'], skipna=True)
    return wm

def compute_cox(x):
    '''
    This function computes the Cox metric for a timeseries of temperatures.
    
    input: inter-annual temperatures [list]
    output: the Cox metric [float]
    
    * taken from Cox et al., 2018: https://doi.org/10.1038/nature25450
    '''
    
    x = x[~np.isnan(x)]
    psi_vals=[]
    for i in np.arange(0, len(x)-55):
        y = signal.detrend(x[i:i+55])
        auto_m1 = tsa.acf(y,nlags=1) # autocorrelation function from statsmodels
        auto_m1b = auto_m1[1]    # select 1 lag autocorrelation value
        sigma_m1= np.std(y)
        log_m1= np.log(auto_m1b)
        log_m1b = np.abs(log_m1)   # take absolute value
        sqrt_m1 = np.sqrt(log_m1b)
        psi = sigma_m1/sqrt_m1
        psi_vals.append(psi)
    return np.nanmean(psi_vals)

def compute_nijsse(x, length=10):
    '''
    This function computes the Nijsse metric for a timeseries of temperatures.
    
    input: inter-annual temperatures [list]
    output: the Nijsse metric [float]
    
    * taken from Nijsse et al., 2019: https://doi.org/10.1038/s41558-019-0527-4
    '''
    # remove NaNs from the timeseries
    x = np.array(x)
    mask = ~np.isnan(x)
    x = x[mask]
    # fill it with slopes
    slopes = []
    i = 0
    while i < len(x)-length:
        slope, intercept, r, p, se = linregress(np.arange(0,length), x[i:i+length])
        slopes.append(length*slope)
        i+=length
    return np.nanstd(slopes)

regress out some of the volcanic influence in the temperature timeseries from 850-1850. We load each of the four volcanic forcings used by the PMIP3 and PMIP4 past1000/past2k experiments.

# evolv2k_v3 forcing
evolv2k_ts = weighted_mean(xr.open_dataset('data/evolv2k.nc')).sel(time=slice(850,1850))
evolv2k_saod = []
i=850
while i <= 1850:
    evolv2k_saod.append(float(evolv2k_ts.sel(time=slice(i, i+1)).mean(dim='time').aod550.values))
    i+=1
evolv2k_forcing = get_forcing(evolv2k_saod)

# Gao et al., (2008) forcing
gao_2008_saod = np.divide(pd.read_csv('data/gao_2008.csv')['gm'].values.astype(float), 1.2*10**3)
gao_2008_forcing = get_forcing(gao_2008_saod)

# Crowley et al., (2000) forcing
crowley_2000_forcing = pd.read_csv('data/crowley_2000.txt', delimiter = '\t')['Vol.hl.cct'].values

# Crowley et al., (2008)
crowley_2008_saod = pd.read_csv('data/crowley_2008.txt', delimiter = '\t')['AOD'].values
crowley_2008_forcing = get_forcing(crowley_2008_saod)

generate the timeseries according to the following procedure:

  1. For each model, load the raw temperature timeseries.

  2. Next, “regress out” some of the volcanic influence.

  3. Compute the Cox metric for both the original and “volcanic influence removed” timeseries

  4. Compute the Nijsse metric for both the original and “volcanic influence removed” timeseries

len(df['FGOALS-gl'][150:])
1000
df = pd.read_csv('data/ts.csv')
model_keys = df.keys()[2:]
forcings = ['gao', 'gao', 'crowley_08', 'crowley_00',
            'evolv2k', 'crowley_08', 'gao', 'evolv2k',
            'crowley_08', 'gao', 'evolv2k', 'gao', 
            'evolv2k', 'evolv2k', 'evolv2k', 'crowley_08',
            'evolv2k', 'evolv2k']
ecs_vals = []
original_cox = []
rv_cox = []
original_nijsse = []
rv_nijsse = []

t = pd.read_csv('data/ecs.csv')

df_rv = {}

for i in range(len(model_keys)):
    ts = df[model_keys[i]].values
    ts_historical = ts[1001:]
    ts_past1000 = ts[:1001]
    if model_keys[i] == 'HadCM3':
        X = crowley_2008_forcing
        y = ts_past1000
        X = sm.add_constant(X)
        reg = sm.OLS(y, X).fit()
        ecs_vals.append(t[t['model name']==model_keys[i]]['ecs'].values[0])
        original_cox.append(compute_cox(ts))
        rv_cox.append(compute_cox(np.append(np.add(reg.resid, reg.params[0]), ts_historical)))
        original_nijsse.append(compute_nijsse(ts))
        rv_nijsse.append(compute_nijsse(np.append(np.add(reg.resid, reg.params[0]), ts_historical)))
        df_rv[model_keys[i]] = np.append(np.add(reg.resid, reg.params[0]), ts_historical)
    elif forcings[i] == 'gao':
        X = gao_2008_forcing
        y = ts_past1000
        X = sm.add_constant(X)
        reg = sm.OLS(y, X).fit()
        ecs_vals.append(t[t['model name']==model_keys[i]]['ecs'].values[0])
        original_cox.append(compute_cox(ts))
        rv_cox.append(compute_cox(np.append(np.add(reg.resid, reg.params[0]), ts_historical)))
        original_nijsse.append(compute_nijsse(ts))
        rv_nijsse.append(compute_nijsse(np.append(np.add(reg.resid, reg.params[0]), ts_historical)))
        df_rv[model_keys[i]] = np.append(np.add(reg.resid, reg.params[0]), ts_historical)
    elif forcings[i] == 'crowley_00':
        X = crowley_2000_forcing
        y = ts[151:]
        X = sm.add_constant(X)
        reg = sm.OLS(y, X).fit()
        ecs_vals.append(t[t['model name']==model_keys[i]]['ecs'].values[0])
        original_cox.append(compute_cox(ts))
        rv_cox.append(compute_cox(np.add(reg.resid, reg.params[0])))
        original_nijsse.append(compute_nijsse(ts))
        rv_nijsse.append(compute_nijsse(np.add(reg.resid, reg.params[0])))
        df_rv[model_keys[i]] = np.append(ts[:151],np.add(reg.resid, reg.params[0]))
    elif forcings[i] == 'crowley_08':
        X = crowley_2008_forcing
        y = ts_past1000
        X = sm.add_constant(X)
        reg = sm.OLS(y, X).fit()
        ecs_vals.append(t[t['model name']==model_keys[i]]['ecs'].values[0])
        original_cox.append(compute_cox(ts))
        rv_cox.append(compute_cox(np.append(np.add(reg.resid, reg.params[0]), ts_historical)))
        original_nijsse.append(compute_nijsse(ts))
        rv_nijsse.append(compute_nijsse(np.append(np.add(reg.resid, reg.params[0]), ts_historical)))
        df_rv[model_keys[i]] = np.append(np.add(reg.resid, reg.params[0]), ts_historical)
    elif forcings[i] == 'evolv2k':
        X = evolv2k_forcing
        y = ts_past1000
        X = sm.add_constant(X)
        reg = sm.OLS(y, X).fit()
        ecs_vals.append(t[t['model name']==model_keys[i]]['ecs'].values[0])
        original_cox.append(compute_cox(ts))
        rv_cox.append(compute_cox(np.append(np.add(reg.resid, reg.params[0]), ts_historical)))
        original_nijsse.append(compute_nijsse(ts))
        rv_nijsse.append(compute_nijsse(np.append(np.add(reg.resid, reg.params[0]), ts_historical)))
        df_rv[model_keys[i]] = np.append(np.add(reg.resid, reg.params[0]), ts_historical)
/tmp/ipykernel_201421/985049880.py:75: RuntimeWarning: invalid value encountered in log
  log_m1= np.log(auto_m1b)

save the emergent constraint relevant data in a file called ‘ec_data.csv’

df = {'model': model_keys, 'generation': t['generation'].values, 'cox': original_cox, 'rv_cox': rv_cox, 'nijsse': original_nijsse, 'rv_nijsse': rv_nijsse, 'ecs': ecs_vals}
pd.DataFrame(df).to_csv('data/ec_data.csv')

now, save the timeseries data for each model with the volcanic influence “regressed out”

pd.DataFrame(df_rv).to_csv('data/ts_rv.csv')