Overview
The NCEP/CPC 4km Global (60N – 60S) IR Dataset (MERG dataset http://mirador.gsfc.nasa.gov/collections/MERG__001.shtml) is an infrared brightness temperature (BT) satellite dataset created by the Climate Prediction Center (CPC), National Centers on Environmental Protection (NCEP) and the National Weather Service (NWS) from data collected by sensors sampling within the 10.2 and 12.5 μm spectral band on various geostationary satellite platforms. The data are representative of readings at the top of the atmosphere, and are already correct for errors such as the “zenith angle dependence”, but no intercalibration correction has been applied. In preparing the MERG dataset, the data have been regridded such that it exist on a rectangular latitude-longitude grid, and the time intervals are given at the hour and 30 minutes into the hour.
Here is a short tutorial of how to convert these files to a NETCDF format that can be ingested by OCW.The scripts provided here are very raw, but are hopefully documented sufficiently to accommodate your needs.
Needed:
*Python 2.7.4
Module dependences: Nio
*GrADS (We use OpenGrADS grads2 Version 2.0.1.oga.1)
*LATS4D
Steps
- Install GrADS (and LATS4D) according to their instructions
- Move all of the MERG raw files which were downloaded e.g. merg_2002080417_4km-pixel.Z or merg_2002080417_4km-pixel (if unzipped already) into one folder
- Edit preprocessing.py accordingly
Code Block | ||||||||
---|---|---|---|---|---|---|---|---|
| ||||||||
''' The scripts for processing MERG raw data #****************************************************************** ''' import fileinput import glob import os import re import string import subprocess import sys #----------------------- GLOBAL VARIABLES -------------------------- # --------------------- User defined variables --------------------- LATMIN = '-5.0' #min latitude; -ve values in the SH e.g. 5S = -5 LATMAX = '20.0' #max latitude; -ve values in the SH e.g. 5S = -5 LONMIN = '-30.0' #min longitude; -ve values in the WH e.g. 59.8W = -59.8 LONMAX = '30.0' #min longitude; -ve values in the WH e.g. 59.8W = -59.8 T_BB_MAX = '250' #warmest temp to allow in K T_BB_MIN = '180' #coolest temp to allow in K #------------------- End user defined Variables ------------------- #------------------------ End GLOBAL VARS ------------------------- def preprocessingMERG(MERGdirname): ''' Purpose:: Utility script for unzipping and converting the merg*.Z files from Mirador to NETCDF format. The files end up in a folder called mergNETCDF in the directory where the raw MERG data is NOTE: VERY RAW AND DIRTY Input:: Directory to the location of the raw MERG files, preferably zipped Output:: none Assumptions:: 1 GrADS (http://www.iges.org/grads/gadoc/) and lats4D (http://opengrads.org/doc/scripts/lats4d/) have been installed on the system and the user can access 2 User can write files in location where script is being called 3 the files havent been unzipped ''' os.chdir((MERGdirname+'/')) imgFilename = '' #Just incase the X11 server is giving problems subprocess.call('export DISPLAY=:0.0', shell=True) #for files in glob.glob("*-pixel"): for files in glob.glob("*.Z"): fname = os.path.splitext(files)[0] #unzip it bash_cmd = 'gunzip ' + files subprocess.call(bash_cmd, shell=True) #determine the time from the filename ftime = re.search('\_(.*)\_',fname).group(1) yy = ftime[0:4] mm = ftime[4:6] day = ftime[6:8] hr = ftime [8:10] #TODO: must be something more pythonic! if mm=='01': mth = 'Jan' if mm == '02': mth = 'Feb' if mm == '03': mth = 'Mar' if mm == '04': mth = 'Apr' if mm == '05': mth = 'May' if mm == '06': mth = 'Jun' if mm == '07': mth = 'Jul' if mm == '08': mth = 'Aug' if mm == '09': mth = 'Sep' if mm == '10': mth = 'Oct' if mm == '11': mth = 'Nov' if mm == '12': mth = 'Dec' subprocess.call('rm merg.ctl', shell=True) subprocess.call('touch merg.ctl', shell=True) replaceExpDset = 'echo DSET ' + fname +' >> merg.ctl' replaceExpTdef = 'echo TDEF 99999 LINEAR '+hr+'z'+day+mth+yy +' 30mn' +' >> merg.ctl' subprocess.call(replaceExpDset, shell=True) subprocess.call('echo "OPTIONS yrev little_endian template" >> merg.ctl', shell=True) subprocess.call('echo "UNDEF 330" >> merg.ctl', shell=True) subprocess.call('echo "TITLE globally merged IR data" >> merg.ctl', shell=True) subprocess.call('echo "XDEF 9896 LINEAR 0.0182 0.036378335" >> merg.ctl', shell=True) subprocess.call('echo "YDEF 3298 LINEAR -59.982 0.036383683" >> merg.ctl', shell=True) subprocess.call('echo "ZDEF 01 LEVELS 1" >> merg.ctl', shell=True) subprocess.call(replaceExpTdef, shell=True) subprocess.call('echo "VARS 1" >> merg.ctl', shell=True) subprocess.call('echo "ch4 1 -1,40,1,-1 IR BT (add "75" to this value)" >> merg.ctl', shell=True) subprocess.call('echo "ENDVARS" >> merg.ctl', shell=True) #generate the lats4D command for GrADS lats4D = 'lats4d -v -q -lat ' + LATMIN +' ' + LATMAX + ' ' + '-lon ' + LONMIN + ' ' + LONMAX + ' -time '+hr+'Z'+day+mth+yy + ' -func @+75 ' + '-i merg.ctl' + ' -o ' + fname gradscmd = 'grads -lbc ' + '\'' +lats4D + '\'' #run grads and lats4d command subprocess.call(gradscmd, shell=True) #----- Generate GrADS images-------------- imgFilename = hr+'Z'+day+mth+yy+'.gif' tempMaskedImages(imgFilename) #-------End Generate GrADS images --------- #remove the raw data file to save some resources rmCmd= "rm " + fname subprocess.call(rmCmd, shell=True) #when all the files have been converted, mv the netcdf files subprocess.call('mkdir mergNETCDF', shell=True) subprocess.call('mv *.nc mergNETCDF', shell=True) #mv all images subprocess.call('mkdir mergImgs', shell=True) subprocess.call('mv *.gif mergImgs', shell=True) return #****************************************************************** def tempMaskedImages(imgFilename): ''' Purpose:: To generate temperature-masked images for a first pass verification Input:: filename for the gif file Output:: None - gif images for each file of IR temperature < T_BB_MAX are generated in folder called mergImgs Assumptions:: Same as for preprocessingMERG 1 GrADS (http://www.iges.org/grads/gadoc/) and lats4D (http://opengrads.org/doc/scripts/lats4d/) have been installed on the system and the user can access 2 User can write files in location where script is being called 3 the files havent been unzipped ''' subprocess.call('rm tempMaskedImages.gs', shell=True) subprocess.call('touch tempMaskedImages.gs', shell=True) subprocess.call('echo "''\'open merg.ctl''\'" >> tempMaskedImages.gs', shell=True) subprocess.call('echo "''\'set mpdset hires''\'" >> tempMaskedImages.gs', shell=True) subprocess.call('echo "''\'set lat ' +LATMIN + ' ' +LATMAX +'\'" >> tempMaskedImages.gs', shell=True) subprocess.call('echo "''\'set lon ' +LONMIN + ' ' +LONMAX +'\'" >> tempMaskedImages.gs', shell=True) subprocess.call('echo "''\'set cint 10''\'" >> tempMaskedImages.gs', shell=True) subprocess.call('echo "''\'set cmax '+T_BB_MAX +'\'" >> tempMaskedImages.gs', shell=True) subprocess.call('echo "''\'set cmin '+T_BB_MIN +'\'" >> tempMaskedImages.gs', shell=True) subprocess.call('echo "''\'d ch4+75''\'" >> tempMaskedImages.gs', shell=True) subprocess.call('echo "''\'draw title Masked Temp @ '+imgFilename +'\'" >> tempMaskedImages.gs', shell=True) subprocess.call('echo "''\'printim '+imgFilename +' x1000 y800''\'" >> tempMaskedImages.gs', shell=True) subprocess.call('echo "''\'quit''\'" >> tempMaskedImages.gs', shell=True) gradscmd = 'grads -blc ' + '\'run tempMaskedImages.gs''\'' subprocess.call(gradscmd, shell=True) return #****************************************************************** |
- Provide the path to the data folder in controlProcessing.py
- Run controlProcessing.py
Code Block language python title controlProcessing.py linenumbers true collapse true ''' Program to run the preprocessing ''' import preprocessing #ensure preprocessing.py and this file are in the same folder def main(): #change path accordingly print "running ..." preprocessing.preprocessingMERG('/this/is/my_path') print "...outta here!" main()
The NETCDF files in the mergNETCDF file generated are now ready to be ingested into OCW.
- To ingest these files into OCW format, use the readMergData to open the files in OCW format. This step assumes that OCW installed.
Code Block | ||||||||
---|---|---|---|---|---|---|---|---|
| ||||||||
''' #Module to read MERG data in OCW format ''' import glob import itertools import Nio import numpy as np import numpy.ma as ma import sys #existing modules in services import process #----------------------- GLOBAL VARIABLES -------------------------- # --------------------- User defined variables --------------------- LATMIN = '-5.0' #min latitude; -ve values in the SH e.g. 5S = -5 LATMAX = '20.0' #max latitude; -ve values in the SH e.g. 5S = -5 LONMIN = '-30.0' #min longitude; -ve values in the WH e.g. 59.8W = -59.8 LONMAX = '30.0' #min longitude; -ve values in the WH e.g. 59.8W = -59.8 T_BB_MAX = 250 #warmest temp to allow #------------------- End user defined Variables ------------------- #------------------------ End GLOBAL VARS ------------------------- #****************************************************************** def readMergData(dirname): ''' Purpose:: Read MERG data into RCMES format Input:: Directory to the MERG files in NETCDF format Output:: A 3D masked array (t,lat,lon) with only the variables which meet the minimum temperature criteria for each frame **remove below** A dictionary of all the MERG data from the files in the directory given. The dictionary contains, a string representing the time (a datetime string), a tuple (lat,lon,value) representing only the data that meets the temperature requirements i.e. T_BB_MAX Assumptions:: The MERG data has been converted to NETCDF using LATS4D. See MERG utility function preprocessingMERG The data has the same lat/lon format ''' global LAT global LON # these strings are specific to the MERG data mergVarName = 'ch4' mergTimeVarName = 'time' mergLatVarName = 'latitude' mergLonVarName = 'longitude' filelistInstructions = dirname + '/*' filelist = glob.glob(filelistInstructions) #sat_img is the array that will contain all the masked frames mergImgs = [] #timelist of python time strings timelist = [] time2store = None filelist.sort() nfiles = len(filelist) # Crash nicely if there are no netcdf files if nfiles == 0: print 'Error: no files in this directory! Exiting elegantly' sys.exit() else: # Open the first file in the list to read in lats, lons and generate the grid for comparison tmp = Nio.open_file(filelist[0], format='nc') #clip the lat/lon grid according to user input if needed like #http://www.pyngl.ucar.edu/NioExtendedSelection.shtml #latsraw = tmp.variables[mergLatVarName][mergLatVarName+"|"+LATMIN+":"+LATMAX].astype('f2') #lonsraw = tmp.variables[mergLonVarName][mergLonVarName+"|"+LONMIN+":"+LONMAX].astype('f2') latsraw = tmp.variables[mergLatVarName][:].astype('f2') lonsraw = tmp.variables[mergLonVarName][:].astype('f2') lonsraw[lonsraw > 180] = lonsraw[lonsraw > 180] - 360. # convert to -180,180 if necessary LON, LAT = np.meshgrid(lonsraw, latsraw) #clean up latsraw =[] lonsraw = [] tmp.close for files in filelist: thisFile = Nio.open_file(files, format='nc') #mask the data and fill with zeros for later tempRaw = thisFile.variables[mergVarName][:].astype('int16') #clip the dataset according to user lat,long coordinates # tempRaw = thisFile.variables[mergVarName][mergLatVarName+"|"+LATMIN+":"+LATMAX \ # +" " +mergLonVarName+"|"+LONMIN+":"+LONMAX ].astype('int16') tempMask = ma.masked_array(tempRaw, mask=(tempRaw > T_BB_MAX), fill_value=0) #get the actual values that the mask returned tempMaskedValue = ma.zeros((tempRaw.shape)).astype('int16') for index, value in maenumerate(tempMask): time_index, lat_index, lon_index = index tempMaskedValue[time_index,lat_index, lon_index]=value #timesRaw = thisFile.variables[mergTimeVarName] #convert this time to a python datastring time2store, _ = process.getModelTimes(files, mergTimeVarName) print time2store #extend instead of append because getModelTimes returns a list already and we don't #want a list of list timelist.extend(time2store) #to get the actual masked data only mergImgs.extend(tempMaskedValue) thisFile.close mergImgs = ma.array(mergImgs) return mergImgs, timelist #****************************************************************** #****************************************************************** # # UTILITY SCRIPTS FOR MERG.PY # #****************************************************************** def maenumerate(mArray): ''' Purpose:: Utility script for returning the actual values from the masked array Taken from: http://stackoverflow.com/questions/8620798/numpy-ndenumerate-for-masked-arrays Input:: 1 variable mArray - the masked array returned from the ma.array() command Output:: 1 variable maskedValues - 3D (t,lat,lon), value of only masked values ''' mask = ~mArray.mask.ravel() for index, maskedValue in itertools.izip(np.ndenumerate(mArray), mask): if maskedValue: yield index |
I hope this helps!