Using the ERA5 O3 data in the WRF initial condition

Because of the COVID-19, CAM-chem output files aren’t available after June 2019.

I compared the WACCM output file and ERA5 reanalysis pressure level data and found that the upper tropospheric ozone is better for the ERA5 data.

As a result, I decided to directly replace the O3 variable in wrfinput* files by that of the ERA5 data.

Here’re the steps:

import numpy as np
import xarray as xr
import proplot as plot
from netCDF4 import Dataset
from wrf import getvar, ll_to_xy
import scipy.interpolate as interp

Read data

# read wrfinput file
ncfile = Dataset('./wrfinput/20200901/mozbc/wrfinput_d01', 'r+') # for modification
ds_wrf = xr.open_dataset('./wrfinput/20200901/mozbc/wrfinput_d01') # for reading data
# calculate the pressure levels (hPa)
p_wrf = (ds_wrf['P']+ds_wrf['PB']).isel(Time=0)/1e2
p_wrf.attrs['units'] = 'hPa'
# convert to ppbv
o3 = ds_wrf['o3']*1e3

# read era5 data
ds_era5 = xr.open_dataset('./era5/era5_20200901.nc')
era5_o3 = ds_era5['o3']*28.9644/47.9982*1e9

Correct the ERA5 data manually (optional)

I correct the ERA5 data based on the ozonesonde data:

# correct the ERA5 data manually
era5_o3 = xr.where((era5_o3.level<=500) & (era5_o3.level>=100) & (era5_o3-20>10), era5_o3-20, era5_o3)
era5_o3 = xr.where((era5_o3.level<=700) & (era5_o3.level>500) & (era5_o3-10>10), era5_o3-10, era5_o3)

Interpolation

Interpolating era5 data to wrf grids (lon/lat)

era5_o3_interp = era5_o3.interp(longitude=o3.XLONG.isel(Time=0), latitude=o3.XLAT.isel(Time=0))

## expand 1d to 3d
p_era5 = era5_o3_interp.level.expand_dims(['south_north', 'west_east'])
p_era5 = np.repeat(p_era5, repeats=o3.sizes['south_north'], axis=0)
p_era5 = np.repeat(p_era5, repeats=o3.sizes['west_east'], axis=1)
p_era5.attrs['units'] = 'hPa'

Check the result of interpolation:

f, axs = plot.subplots(ncols=2, share=0)

ax = axs[0]
m = ax.pcolormesh(era5_o3.longitude,
                  era5_o3.latitude,
                  era5_o3.isel(time=0, level=-1),
                  cmap='viridis')
ax.colorbar(m, label='O$_3$ (ppbv)', loc='r')
ax.format(title='Original')

ax = axs[1]
m = ax.pcolormesh(era5_o3_interp.longitude,
                  era5_o3_interp.latitude,
                  era5_o3_interp.isel(time=0, level=-1),
                  cmap='viridis')
ax.format(title='Interpolated')
ax.colorbar(m, label='O$_3$ (ppbv)', loc='r')

axs.format(xlim=(o3.XLONG.min(), o3.XLONG.max()),
           ylim=(o3.XLAT.min(), o3.XLAT.max()),
           xlabel='longitude',
           ylabel='latitude',)

overview

Interpolating to WRF pressure levels

Define the interpolation function used by xr.apply_ufunc:

def interp1d_np(data, x, xi):
    from scipy import interpolate
    f = interpolate.interp1d(x, data, fill_value='extrapolate')
    return f(xi)

Display the structue of each DataArray:

display(era5_o3, p_era5, p_wrf)
<xarray.DataArray (level: 37, time: 72, latitude: 201, longitude: 281)>
array([[[[2436.0215  , 2437.7124  , 2438.7883  , ..., 2450.778   ,
          2450.9316  , 2451.239   ],
         [2441.5552  , 2442.631   , 2443.246   , ..., 2456.158   ,
          2456.4653  , 2456.7725  ],
         [2446.0127  , 2447.0886  , 2447.0886  , ..., 2461.845   ,
          2462.1526  , 2462.4602  ],
         ...,
         [3067.625   , 3057.7874  , 3049.026   , ..., 3244.2405  ,
          3248.083   , 3255.9224  ],
         [3072.2366  , 3061.4768  , 3051.7925  , ..., 3257.9207  ,
          3262.9932  , 3269.449   ],
         [3075.7715  , 3064.397   , 3053.176   , ..., 3273.599   ,
          3278.3643  , 3284.8203  ]],

        [[2427.26    , 2429.2583  , 2431.2563  , ..., 2447.8574  ,
          2447.8574  , 2448.0112  ],
         [2431.8713  , 2433.716   , 2435.5603  , ..., 2451.3926  ,
          2451.3926  , 2451.3926  ],
         [2436.9438  , 2438.9421  , 2440.6328  , ..., 2454.7744  ,
          2454.467   , 2454.3132  ],
...
         [  26.428476,   26.428476,   26.428476, ...,   26.890043,
            26.890043,   26.736372],
         [  26.428476,   26.428476,   26.274803, ...,   27.043716,
            27.043716,   27.043716],
         [  26.428476,   26.428476,   26.274803, ...,   27.19739 ,
            27.19739 ,   27.19739 ]],

        [[  35.34426 ,   35.34426 ,   35.34426 , ...,   44.720516,
            44.720516,   44.720516],
         [  35.49793 ,   35.651604,   35.651604, ...,   44.720516,
            44.720516,   44.566837],
         [  35.80528 ,   35.95895 ,   35.80528 , ...,   44.720516,
            44.566837,   44.25949 ],
         ...,
         [  26.428476,   26.428476,   26.428476, ...,   26.736372,
            26.582697,   26.428476],
         [  26.428476,   26.428476,   26.274803, ...,   26.890043,
            26.890043,   26.890043],
         [  26.428476,   26.428476,   26.274803, ...,   27.043716,
            27.043716,   27.043716]]]], dtype=float32)
Coordinates:
  * level      (level) int32 1 2 3 5 7 10 20 30 ... 850 875 900 925 950 975 1000
  * longitude  (longitude) float32 90.0 90.25 90.5 90.75 ... 159.5 159.75 160.0
  * latitude   (latitude) float32 60.0 59.75 59.5 59.25 ... 10.5 10.25 10.0
  * time       (time) datetime64[ns] 2020-08-01 ... 2020-09-01T23:00:00
<xarray.DataArray 'level' (south_north: 119, west_east: 119, level: 37)>
array([[[   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        ...,
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000]],

       [[   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        ...,
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000]],

       [[   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        ...,
...
        ...,
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000]],

       [[   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        ...,
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000]],

       [[   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        ...,
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000],
        [   1,    2,    3, ...,  950,  975, 1000]]], dtype=int32)
Coordinates:
  * level    (level) int32 1 2 3 5 7 10 20 30 ... 850 875 900 925 950 975 1000
Dimensions without coordinates: south_north, west_east
Attributes:
    units:    hPa
<xarray.DataArray (bottom_top: 74, south_north: 119, west_east: 119)>
array([[[ 977.6088   ,  973.8247   ,  980.0365   , ..., 1005.25305  ,
         1005.3736   , 1005.49805  ],
        [ 972.2685   ,  972.64575  ,  980.2555   , ..., 1005.1616   ,
         1005.28357  , 1005.40784  ],
        [ 973.7658   ,  979.50134  ,  983.94495  , ..., 1005.05743  ,
         1005.1929   , 1005.32166  ],
        ...,
        [ 910.8054   ,  898.6346   ,  894.9356   , ..., 1013.1167   ,
         1012.9816   , 1012.9492   ],
        [ 903.1422   ,  885.46564  ,  880.11725  , ..., 1013.20825  ,
         1013.1019   , 1012.9691   ],
        [ 868.00275  ,  854.2058   ,  859.6906   , ..., 1009.3077   ,
         1013.17224  , 1013.0759   ]],

       [[ 970.978    ,  967.2171   ,  973.3924   , ...,  998.4201   ,
          998.54095  ,  998.6655   ],
        [ 965.6708   ,  966.0465   ,  973.6117   , ...,  998.33044  ,
          998.4526   ,  998.5758   ],
        [ 967.15784  ,  972.86304  ,  977.2757   , ...,  998.2269   ,
          998.3619   ,  998.4895   ],
...
        [  11.009878 ,   10.996195 ,   10.992074 , ...,   11.122887 ,
           11.122656 ,   11.1225195],
        [  11.0012455,   10.981459 ,   10.975507 , ...,   11.123066 ,
           11.122817 ,   11.1225195],
        [  10.961985 ,   10.946582 ,   10.95274  , ...,   11.118411 ,
           11.122959 ,   11.122741 ]],

       [[  10.351611 ,   10.350232 ,   10.352553 , ...,   10.362022 ,
           10.362007 ,   10.361997 ],
        [  10.349756 ,   10.349896 ,   10.352734 , ...,   10.361921 ,
           10.362008 ,   10.362052 ],
        [  10.350339 ,   10.352377 ,   10.3541   , ...,   10.361861 ,
           10.361966 ,   10.361929 ],
        ...,
        [  10.328756 ,   10.324332 ,   10.322899 , ...,   10.3655405,
           10.365465 ,   10.36542  ],
        [  10.325963 ,   10.319494 ,   10.317566 , ...,   10.3655205,
           10.3654995,   10.365434 ],
        [  10.313118 ,   10.308083 ,   10.31015  , ...,   10.364057 ,
           10.365552 ,   10.365443 ]]], dtype=float32)
Coordinates:
    XLAT     (south_north, west_east) float32 16.568985 16.612839 ... 45.305634
    XLONG    (south_north, west_east) float32 104.098694 104.3277 ... 138.15619
Dimensions without coordinates: bottom_top, south_north, west_east
Attributes:
    units:    hPa

It’s time to run the interpolation:

# Explanations are given in https://github.com/pydata/xarray/issues/3931
interped = xr.apply_ufunc(
    interp1d_np,
    era5_o3_interp,
    p_era5,
    p_wrf,
    input_core_dims=[['level'], ['level'], ["bottom_top"]],
    output_core_dims=[['bottom_top']],
    exclude_dims=set(("bottom_top",)),
    vectorize=True,
)

Reorgnizie interpolated data to WRF variable dimension:

# sort the dims and delete useless coordinates
interped_o3 = interped.sel(time='2020-09-01 00:00').transpose('bottom_top', ...).expand_dims('Time').drop_vars(['time', 'longitude', 'latitude'])
display(interped_o3)
<xarray.DataArray (Time: 1, bottom_top: 74, south_north: 119, west_east: 119)>
array([[[[  26.85544708,   26.89004326,   26.89004326, ...,
            27.85048673,   27.96454546,   27.97777195],
         [  26.79858192,   26.89004517,   26.90632874, ...,
            27.86999883,   28.03319995,   28.00973581],
         [  26.89004326,   26.91651859,   26.96341212, ...,
            27.75184335,   27.91883112,   28.01447156],
         ...,
         [  42.88559052,   42.90171678,   42.84873043, ...,
            43.76431619,   42.66415577,   42.48096888],
         [  43.05763403,   43.23619572,   42.85495261, ...,
            43.87452885,   42.7925106 ,   42.38859107],
         [  43.60155035,   43.34088363,   43.06596098, ...,
            44.04397503,   43.13010603,   42.49644742]],

        [[  26.86850858,   26.89004326,   26.89004326, ...,
            27.74865668,   27.90227851,   27.96283885],
         [  26.82568012,   26.89004517,   26.89004326, ...,
            27.74462751,   27.90841153,   27.95417291],
         [  26.89004326,   26.89004326,   26.90871055, ...,
            27.62621128,   27.79286227,   27.9352251 ],
...
         [7541.28326442, 7527.26233612, 7509.01665573, ...,
          7506.93546421, 7541.55757499, 7586.09060089],
         [7511.94835776, 7497.8423389 , 7482.09586978, ...,
          7479.4326681 , 7493.47807596, 7517.35630734],
         [7479.79887861, 7466.09814386, 7455.23409676, ...,
          7466.90025385, 7466.7015107 , 7469.38325193]],

        [[9694.82835713, 9693.7357371 , 9689.58353861, ...,
          9585.6528903 , 9590.35306113, 9595.75774179],
         [9686.9817295 , 9685.91018815, 9682.07826455, ...,
          9576.20928858, 9580.64839163, 9585.35674978],
         [9678.86203706, 9677.71921539, 9674.5757891 , ...,
          9566.05929144, 9569.49277165, 9574.19528187],
         ...,
         [7659.76051491, 7642.8204401 , 7622.65272498, ...,
          7648.34201255, 7683.81940009, 7730.1049812 ],
         [7626.72798026, 7609.10646871, 7591.36561556, ...,
          7622.32897339, 7635.5818719 , 7659.58415364],
         [7585.96803727, 7569.48853948, 7558.64050131, ...,
          7612.8093532 , 7611.19161405, 7612.15335775]]]])
Coordinates:
    XLAT     (south_north, west_east) float32 16.568985 16.612839 ... 45.305634
    XLONG    (south_north, west_east) float32 104.098694 104.3277 ... 138.15619
Dimensions without coordinates: Time, bottom_top, south_north, west_east

Check the surface O3 again:

f, axs = plot.subplots(ncols=2, share=0)

ax = axs[0]
m = ax.pcolormesh(era5_o3.longitude,
                  era5_o3.latitude,
                  era5_o3.isel(time=0, level=-1),
                  cmap='viridis',
                  vmin=20,
                  vmax=65)
ax.colorbar(m, label='O$_3$ (ppbv)', loc='r')
ax.format(title='Original')

ax = axs[1]
m = ax.pcolormesh(interped_o3.XLONG,
                  interped_o3.XLAT,
                  interped_o3.isel(Time=0, bottom_top=0),
                  cmap='viridis',
                  vmin=20,
                  vmax=65)
ax.format(title='Interpolated')
ax.colorbar(m, label='O$_3$ (ppbv)', loc='r')

axs.format(xlim=(o3.XLONG.min(), o3.XLONG.max()),
           ylim=(o3.XLAT.min(), o3.XLAT.max()),
           xlabel='longitude',
           ylabel='latitude',)

png

The interpolated result is different from the previous one, because the lowest level of ERA5 data is 1000 hPa while the WRF uses the lowest height for the defined eta level.

We can get the index of specific location and use it to pick the interpolated O3 profile.

# convert to index in wrf grid
x_y = ll_to_xy(ncfile, 32, 119)
x, y = x_y[0].values, x_y[1].values

Save and export data

Original surface ozone:

ds_wrf['o3'].isel(Time=0, bottom_top=0).plot(vmin=20*1e-3, vmax=65*1e-3)

waccm

Replace the era5 surface ozone:

ds_wrf['o3'] = interped_o3*1e-3 # back to ppmv
ds_wrf['o3'].isel(Time=0, bottom_top=0).plot(vmin=20*1e-3, vmax=65*1e-3)

era5

Because the coordinates in the attributes of variables are important for WRF, we should use netCDF4 instead of xarray to change the O3 value.

To make sure the structure and attributes of the new wrfinput is correct, it’s better to compare the head info using ncdump -h.

# ds_wrf.to_netcdf(path='./wrfinput/20200901/era5/wrfinput_d01',
#                  mode='w',
#                  engine='netcdf4')

# back up the original wrfinput file, please change the path by yourself
!cp ./wrfinput/20200901/mozbc/wrfinput_d01 ./wrfinput/20200901/mozbc/wrfinput_d01_bak

ncfile['o3'][:] = interped_o3*1e-3 # back to ppmv
ncfile.close()

# move the edited one to anywhere you like
!mv ./wrfinput/20200901/mozbc/wrfinput_d01 ./wrfinput/20200901/era5/wrfinput_d01

Comparison of O3 profiles

# check the O3 profile at one pixel
f, axs = plot.subplots(axwidth=4)

# o3_profile = era5_o3.sel(longitude=119, latitude=32, time='2020-09-01 00:00')
# legend_1 = axs.plot(o3_profile, o3_profile.level, label='Original')

interped_o3_profile = interped_o3.sel(west_east=x, south_north=y).isel(Time=0)
p_profile = p_wrf.sel(west_east=x, south_north=y)
legend_1 = axs.plot(interped_o3_profile,
                    p_profile,
                    label='interpolated ERA5 O3 profile',
                    linewidth=3)

wrf_o3_profile = o3.sel(west_east=x, south_north=y).isel(Time=0)
legend_2 = axs.plot(wrf_o3_profile, p_profile, label='orginal wrfinput')

new_wrf_o3_profile = ds_wrf['o3'].sel(west_east=x, south_north=y).isel(Time=0)*1e3 # ppbv
legend_3 = axs.plot(new_wrf_o3_profile,
                    p_profile,
                    label='updated wrfinput',
                    linewidth=1,
                    color='yellow5')

axs.legend([legend_1, legend_2, legend_3], loc='r', ncols=1)
axs.format(xlim=(0, 200),
           ylim=(1000, 100),
           xlabel='Pressure (hPa)',
           ylabel='O$_3$ (ppbv)',)

png

Version control

Version Action Time
1.0 Init 2020-10-27

Say something

Thank you

Your comment has been submitted and will be published once it has been approved.

Click here to see the pull request you generated.

OOPS!

Your comment has not been submitted. Please go back and try again. Thank You!

If this error persists, please open an issue by clicking here.