# -*- coding: cp1252 -*-
# Script to analyse IGRA daily radiosonde data
# Copyright (C) 2013 Dr. Ronan Connolly
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see
#
# ********************************************************************
#
# This script uses NOAA NCDC IGRA radiosonde data
# see http://www.ncdc.noaa.gov/oa/climate/igra/
# and Durre et al., 2006 (J. Clim.; Vol 19, pp53-68)
#
# Download the appropriate files from here:
# ftp://ftp.ncdc.noaa.gov/pub/data/igra/derived-v2/
# Then download the xxxxx.dat.gz file for your station from:
# ftp://ftp.ncdc.noaa.gov/pub/data/igra/derived-v2/data-por/
# and place in the input_folder
import matplotlib
matplotlib.use('Agg') # Use a non-interactive back-end to avoid plt.close() errors (non-critical) - this means you can't use plt.show()
import matplotlib.pyplot as plt
import numpy as np
import os
from scipy import stats
import gzip
input_folder="input_data"
# Change ID to the station ID corresponding to your station
ID="03953" # Valentia Observatory, Ireland
#ID="48698" # Changi International Airport, Singapore
#ID="71043" # Norman Wells, Canada
#ID="89642" # Dumont D'Urville, Antarctica 66.67S, 140.02E
# flags
# flag for generating daily plots:
# if you want daily T, D and wind vs P plots
# set to 1 for all data or set to year for specific year
# otherwise keep as 0
generate_daily_plots=0
use_wind=100 # max wind speed (m/s) shown in graphs, set to -1, if you don't want to use
R=8.3145 # ideal gas constant J/K/mol
std_pressure=101325 # standard pressure (Pa)
region1_boundaries=[2000,12000] # assume these pressures are all in heavy phase
region2_boundaries=[45000,85000] # assume these pressures are all in light phase
tropopause_region=[5000,105000]
month_names=["Annual","January","February","March","April",
"May", "June", "July", "August", "September",
"October","November", "December"]
leap_years=[1900, 1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940,
1944, 1948, 1952, 1956, 1960, 1964, 1968, 1972, 1976, 1980, 1984,
1988, 1992, 1996, 2000, 2004, 2008, 2012, 2016, 2020, 2024, 2028]
days_of_month_nly=[31,28,31,30,31,30,31,31,30,31,30,31]
days_of_month_ly=[31,29,31,30,31,30,31,31,30,31,30,31]
# number of radiosondes needed to calculate monthly means
num_points_needed_per_month=1
# histogram settings
max_bin=50000
min_bin=-50000
num_bins=1000
bin_size=(max_bin-min_bin)/num_bins
# Check derived-stations.txt for station details, if available
station_identified=0
if os.path.isfile("derived-stations.txt"):
station_list=open("derived-stations.txt",'r')
for k in station_list:
cur_country=k[0:2]
cur_ID=k[4:9]
cur_name=k[11:46]
cur_lat=k[47:53]
cur_lon=k[54:61]
cur_elev=k[62:66]
cur_begin=k[72:76]
cur_end=k[77:81]
if ID==cur_ID:
station_identified=1
station_country=cur_country
station_name=cur_name.strip()
station_lat=cur_lat
station_lon=cur_lon
station_elev=cur_elev
station_begin=cur_begin
station_end=cur_end
print "Analysing "+station_name
print "Latitude, longitude = "+station_lat+", "+station_lon
print "Elevation = "+station_elev+" m"
print "Record spans "+station_begin+"-"+station_end
station_list.close()
if station_identified==0:
print "Could not identify station from derived-stations.txt. Will just refer to station as "+ID
station_name=ID
else:
print "Cannot find derived-stations.txt. Will just refer to station as "+ID
station_name=ID
# Create output folders
if generate_daily_plots!=0:
graph_folder="output_graphs"
if not os.path.isdir(graph_folder):
os.mkdir(graph_folder)
if not os.path.isdir(graph_folder+"/"+ID):
os.mkdir(graph_folder+"/"+ID)
analysis_folder="output_phase_changes"
if not os.path.isdir(analysis_folder):
os.mkdir(analysis_folder)
output_folder=analysis_folder+"/"+ID
if not os.path.isdir(output_folder):
os.mkdir(output_folder)
# Open station data file
if not os.path.isdir(analysis_folder):
print "Cannot find input folder, i.e., "+input_folder+"\nStopping..."
raise SystemExit
IGRA_filename=input_folder+"/"+ID+".dat.gz"
if os.path.isfile(IGRA_filename):
print "Counting number of lines in file..."
num_lines_in_file = sum(1 for ln in gzip.open(IGRA_filename))
print str(num_lines_in_file)+" lines"
print "Reading "+IGRA_filename+"..."
IGRA=gzip.open(IGRA_filename,'rb')
else:
print "Could not open "+IGRA_filename
raise SystemExit
# Open output files
f_phase_changes_all=open(output_folder+"/all_"+ID+"_phase_changes.txt",'w')
f_phase_changes_monthly=open(output_folder+"/monthly_"+ID+"_phase_changes.txt",'w')
f_phase_changes_yearly=open(output_folder+"/yearly_"+ID+"_phase_changes.txt",'w')
# generate header information for output files
if station_identified==1:
f_phase_changes_all.write(station_name+"\tID : "+ID+"\nLatitude, longitude : "+station_lat+", "+station_lon+"\n")
f_phase_changes_monthly.write(station_name+"\tID : "+ID+"\nLatitude, longitude : "+station_lat+", "+station_lon+"\n")
f_phase_changes_yearly.write(station_name+"\tID : "+ID+"\nLatitude, longitude : "+station_lat+", "+station_lon+"\n")
f_phase_changes_all.write("Elevation : "+station_elev+" m\nRecord spans : "+station_begin+"-"+station_end+"\n")
f_phase_changes_monthly.write("Elevation : "+station_elev+" m\nRecord spans : "+station_begin+"-"+station_end+"\n")
f_phase_changes_yearly.write("Elevation : "+station_elev+" m\nRecord spans : "+station_begin+"-"+station_end+"\n")
else:
f_phase_changes_all.write("ID : "+ID+"\n")
f_phase_changes_monthly.write("ID : "+ID+"\n")
f_phase_changes_yearly.write("ID : "+ID+"\n")
f_phase_changes_all.write("Date\ta_1\tc_1\ta_2\t_c2\tP@phase\tT@phase\twind@phase\t")
f_phase_changes_all.write("P@ground\tT@ground\twind@ground\n")
f_phase_changes_monthly.write("Year\tMon\tP_phase\tT_phase\tP_ground\tT_ground\n")
f_phase_changes_yearly.write("Year\tP_phase\tT_phase\tP_ground\tT_ground\n")
# Read in data, line by line, because the data files are large
for line_number, line in enumerate(IGRA):
# Is this the first line in the file?
if line_number==0:
# initialize data for first sonde
ID=line[1:6]
year=int(line[6:10])
month=int(line[10:12])
day=int(line[12:14])
hour=int(line[14:16])
reltime=int(line[16:20])
numlevel=int(line[20:24])
# calculate date in decimal form, i.e., year + fraction of year
if year in leap_years:
length_of_year=366
else:
length_of_year=365
year_day=day-1
cur_month=1
while cur_monthmax_P:
max_P=P
T=float(temperature)/10 # temperature (K)
if Tmax_T:
max_T=T
D=round(P/(R*T),2) # molar density (mol/m3)
if Dmax_D:
max_D=D
cur_sonde[P]={}
cur_sonde[P]["T"]=T
cur_sonde[P]["D"]=D
u_wind=float(zonal_wind)/10
v_wind=float(meridional_wind)/10
if (u_wind>-999) and (v_wind>-999):
wind=round(np.sqrt((u_wind)*(u_wind)+(v_wind)*(v_wind)),2) # wind speed (m/s)
cur_sonde[P]["wind"]=wind
if wind>max_wind:
max_wind=wind
wind_string=str(wind)
else:
wind_string=""
if line[0:1]=="#" or line_number==num_lines_in_file-1:
# We've already established this isn't the first line
# Therefore, this is either a header line for a new sonde or the end of the file
# So, we carry out our analysis for last sonde
# Initialize our analysis lists and variables
ground_P=0
T_pressures=[]
wind_pressures=[]
D_values=[]
T_values=[]
wind_values=[]
region1_pressures=[]
region1_molar_densities=[]
region2_pressures=[]
region2_molar_densities=[]
# Assign D, T and P values to lists
for P in sorted(cur_sonde):
if cur_sonde[P].has_key("T"):
if P>ground_P:
ground_P=P
T_pressures.append(P)
D_values.append(cur_sonde[P]["D"])
T_values.append(cur_sonde[P]["T"])
# Check if data point is in the defined Region 1 or Region 2 ranges
if (P>region1_boundaries[0]) and (Pregion2_boundaries[0]) and (P2:
slope1, intercept1, r_value1, p_value1, std_err1 = stats.linregress(region1_pressures,region1_molar_densities)
# plotting the line
region1_line=np.array(T_pressures)
line1 = (slope1*region1_line)+intercept1
fitted_regions[1]=1
f_phase_changes_all.write(str(round(slope1,6))+"\t"+str(round(intercept1,2))+"\t")
else:
f_phase_changes_all.write("\t\t")
# If there are enough data points in the Region 2 range, then calculate slope of linear fit
if len(region2_pressures)>2:
slope2, intercept2, r_value2, p_value2, std_err2 = stats.linregress(region2_pressures,region2_molar_densities)
# plotting the line
region2_line=np.array(T_pressures)
line2 = (slope2*region2_line)+intercept2
fitted_regions[2]=1
f_phase_changes_all.write(str(round(slope2,6))+"\t"+str(round(intercept2,2))+"\t")
else:
f_phase_changes_all.write("\t\t")
phase_change_identified=0
# Check if slopes have been calculated for both phases...
if fitted_regions[1]==1:
if fitted_regions[2]==1:
# Slopes 1 and 2 have been determined, therefore can calculate phase change
a=np.array([[-slope1,1],[-slope2,1]])
b=np.array([intercept1,intercept2])
phase_P,phase_D=np.linalg.solve(a,b) # phase change point
phase_change_x=[phase_P,phase_P]
phase_change_y=[0,1000]
# Check if phase change point occurred in atmosphere!
if phase_P>0:
f_phase_changes_all.write(str(int(phase_P))+"\t")
P_above_phase_for_T=100000
P_below_phase_for_T=0
P_above_phase_for_wind=100000
P_below_phase_for_wind=0
for P in sorted(cur_sonde):
if cur_sonde[P].has_key("T"):
if (P>=phase_P) and (PP_below_phase_for_T):
P_below_phase_for_T=P
if cur_sonde[P].has_key("wind"):
if (P>=phase_P) and (PP_below_phase_for_wind):
P_below_phase_for_wind=P
found_higher_and_lower_P=1
if P_above_phase_for_T==100000:
found_higher_and_lower_P=0
else:
T_above_phase_change=cur_sonde[P_above_phase_for_T]["T"]
if P_below_phase_for_T==0:
found_higher_and_lower_P=0
else:
T_below_phase_change=cur_sonde[P_below_phase_for_T]["T"]
if found_higher_and_lower_P==1:
if T_above_phase_change!=T_below_phase_change:
# We want to find T_c, i.e., T at the phase change, where
# P_above>=P_phase>=P_below
# Let us assume the three temperatures are linear with respect to P
# over this short region. Then we can use the equation of the line.
# first calculate the slope:
# m=(y_above-y_below)/(x_above-x_below)
m=(T_above_phase_change-T_below_phase_change)/(P_above_phase_for_T-P_below_phase_for_T)
# Then, since we know x_phase (i.e., "P_phase"), we can calculate y_phase (i.e., "T_phase")
# y_phase-y_below=m*(x_phase-x_below)
# y_phase=m*(x_phase-x_below)+y_below
T_at_phase_change=round(((phase_P-P_below_phase_for_T)*m)+T_below_phase_change,1)
else:
T_at_phase_change=T_above_phase_change
f_phase_changes_all.write(str(T_at_phase_change)+"\t")
# Calculate ln P/P_std and 1/T for determining delta H using van't Hoff equation
phase_changes_ln_P.append(round(np.log(phase_P/std_pressure),5)) # np.log = natural log, i.e., ln
phase_changes_reciprocal_T.append(round((1/T_at_phase_change),6))
# add values to monthly bins
monthly_P_phase.append(phase_P)
monthly_T_phase.append(T_at_phase_change)
monthly_P_ground.append(ground_P)
monthly_T_ground.append(cur_sonde[ground_P]["T"])
else:
f_phase_changes_all.write("\t")
if (P_above_phase_for_wind<100000) and (P_below_phase_for_wind>0):
wind_above_phase_change=cur_sonde[P_above_phase_for_wind]["wind"]
wind_below_phase_change=cur_sonde[P_below_phase_for_wind]["wind"]
# Using the same logic as for T_at_phase_change
if wind_above_phase_change!=wind_below_phase_change:
m=(wind_above_phase_change-wind_below_phase_change)/(P_above_phase_for_wind-P_below_phase_for_wind)
wind_at_phase_change=round(((phase_P-P_below_phase_for_wind)*m)+wind_below_phase_change,2)
else:
wind_at_phase_change=wind_above_phase_change
f_phase_changes_all.write(str(wind_at_phase_change)+"\t")
else:
f_phase_changes_all.write("\t")
f_phase_changes_all.write(str(ground_P)+"\t")
f_phase_changes_all.write(str(cur_sonde[ground_P]["T"])+"\t")
if cur_sonde[ground_P].has_key("wind"):
f_phase_changes_all.write(str(cur_sonde[ground_P]["wind"])+"\n")
else:
f_phase_changes_all.write("\n")
else:
f_phase_changes_all.write(str(round(phase_P,0))+"\n")
else:
# Slope 1 but not 2 has been determined
f_phase_changes_all.write("\n")
else:
# Slope 1 has not been determined
f_phase_changes_all.write("\n")
# If we want to generate daily plots, then do so...
if (generate_daily_plots==1) or (generate_daily_plots==cur_year):
# Make sure there is a folder for these plots
study_folder=graph_folder+"/"+ID+"/"+str(cur_year)
if not os.path.isdir(study_folder):
os.mkdir(study_folder)
plt.figure() # create blank figure canvas
# three subplots sharing x axis
f, (ax1, ax2, ax3) = plt.subplots(3, sharex=True, sharey=False)
ax1.set_ylabel('Temperature (K)',fontsize=12, fontweight='bold')
ax1.set_ylim([min_T,max_T])
if (fitted_regions[1]==1) and (fitted_regions[2]==1):
ax1.plot(phase_change_x,phase_change_y, 'k--',linewidth=3)
ax1.plot(T_pressures, T_values,linestyle='-', color='red', linewidth=3)
ax1.plot(T_pressures, T_values,marker='o', markerfacecolor='grey',
markeredgecolor='black', linestyle='none', markersize=6)
ax2.set_ylabel('D (mol/m3)',fontsize=12, fontweight='bold')
ax2.set_ylim([min_D,max_D])
if (fitted_regions[1]==1) and (fitted_regions[2]==1):
ax2.plot(region1_line, line1, color='darkorange', linestyle='-',linewidth=2)
ax2.plot(region2_line, line2, color='green', linestyle='-',linewidth=2)
ax2.plot(phase_change_x,phase_change_y, 'k--',linewidth=3)
# ax2.plot(T_pressures,D_values,'ko', markersize=5)
ax2.plot(T_pressures,D_values, linestyle='none',
marker='o', markerfacecolor='grey',
markersize=6, markeredgecolor='black')
ax3.set_ylabel('Wind speed (m/s)',fontsize=12, fontweight='bold')
if use_wind>0:
ax3.set_ylim([0,use_wind])
else:
ax3.set_ylim([0,max_wind])
if (fitted_regions[1]==1) and (fitted_regions[2]==1):
ax3.plot(phase_change_x,phase_change_y, 'k--',linewidth=3)
ax3.plot(wind_pressures,wind_values,color='blue', linewidth=3,
linestyle='-', marker='o', markerfacecolor='grey',
markersize=6, markeredgecolor='black')
plt.xlabel('Pressure (Pa)',fontsize=12, fontweight='bold')
plt.xlim([min_P,max_P])
if hour<10:
hour_string="0"+str(hour)
else:
hour_string=str(hour)
sonde_title=station_name+". "+str(day)+" "+month_names[month]+", "+str(year)+". "+str(hour_string)+" UTC"
plt.suptitle(sonde_title, fontsize=14, fontweight='bold')
plt.savefig(study_folder+"/"+sonde_name+".jpg")
plt.close()
# Is it the end of a month? If so, carry out analysis for previous month
if line[0:1]!="#" or int(line[10:12])!=cur_month:
# either we've reached 1) end-of-file or 2) header for a new month
if len(monthly_P_phase)>=num_points_needed_per_month:
avg_P_t=int(np.mean(monthly_P_phase))
avg_P_g=int(np.mean(monthly_P_ground))
avg_T_t=round(np.mean(monthly_T_phase),2)
avg_T_g=round(np.mean(monthly_T_ground),2)
f_phase_changes_monthly.write(str(year)+"\t"+str(cur_month)+"\t")
f_phase_changes_monthly.write(str(avg_P_t)+"\t"+str(avg_T_t)+"\t"+str(avg_P_g)+"\t"+str(avg_T_g)+"\n")
yearly_P_phase.append(avg_P_t)
yearly_T_phase.append(avg_T_t)
yearly_P_ground.append(avg_P_g)
yearly_T_ground.append(avg_T_g)
else:
f_phase_changes_monthly.write(str(year)+"\t"+str(cur_month)+"\n")
monthly_T_phase=[]
monthly_P_phase=[]
monthly_T_ground=[]
monthly_P_ground=[]
cur_month=int(line[10:12])
# Is it the end of a year? If so, carry out analysis for previous year
if line[0:1]!="#" or int(line[6:10])!=cur_year:
# either we've reached 1) end-of-file or 2) header for a new year
if len(yearly_P_phase)==12:
avg_P_t=int(np.mean(yearly_P_phase))
avg_P_g=int(np.mean(yearly_P_ground))
avg_T_t=round(np.mean(yearly_T_phase),2)
avg_T_g=round(np.mean(yearly_T_ground),2)
f_phase_changes_yearly.write(str(year)+"\t")
f_phase_changes_yearly.write(str(avg_P_t)+"\t"+str(avg_T_t)+"\t"+str(avg_P_g)+"\t"+str(avg_T_g)+"\n")
else:
f_phase_changes_yearly.write(str(year)+"\n")
yearly_T_phase=[]
yearly_P_phase=[]
yearly_T_ground=[]
yearly_P_ground=[]
if line[0:1]=="#":
# This is a header line, so initialize data for latest sonde
ID=line[1:6]
year=int(line[6:10])
month=int(line[10:12])
day=int(line[12:14])
hour=int(line[14:16])
reltime=int(line[16:20])
numlevel=int(line[20:24])
# calculate date in decimal form, i.e., year + fraction of year
if year in leap_years:
length_of_year=366
else:
length_of_year=365
year_day=day-1
cur_month=1
while cur_month