# simple climatology on daily GHCN weather station data # data scraping script is a minor extension of Steve McIntyre's http://www.climateaudit.org/scripts/station/read.ghcnd.txt to include precipitation # read.ghcnd("StationName") returns a dataframe of TMAX, TMIN, TMEAN and PREC with valid data daycount number for each month read.ghcnd=function(id){ loc=file.path("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/all",paste(id,".dly",sep="")) test=download.file(loc,"temp.dat") fred=readLines("temp.dat") year=as.numeric(substr(fred,12,15));range(year); month=as.numeric(substr(fred,16,17));range(month); type0=substr(fred,18,21);unique(type0) #""PRCP" "TMAX" "TMIN" temp2=(type0=="TMAX");sum(temp2); # number of T_Max data lines temp3=(type0=="TMIN");sum(temp3) temp4=(type0=="PRCP");sum(temp4) time0=100*year+month; year1<-min(year):max(year) g<-function(x,y){g<-100*x+y;g} chron<-sort(c(outer(year1,1:12,g))); #generates vector of dates chron=data.frame(chron); # coerce chron to dataframe names(chron)="time"; widths0=c(21,rep(c(5,3),31)) y=cumsum(widths0) widths0=cbind(1+c(0,y[1:(length(y)-1)]),y ) nrow(widths0) #100 widths0=widths0[seq(2,62,2),] # scrape quality control flags etc from GHCN daily files #maximums x=fred[temp2]; #T_MAX data lines only X=array(NA,dim=c(sum(temp2),31) ); X=data.frame(X) f=function(i){ f=substr(x,widths0[i,1],widths0[i,2]);f} for(i in 1:31){X[,i]=f(i)} for(i in 1:31){X[,i]=as.numeric(X[,i])}; # X contains vector of daily T_MAX temp=(X== -9999);sum(temp);X[temp]=NA # replace missing data -999 with NA count=apply(!is.na(X),1,sum) # count is the number of good months xtime=time0[temp2]; # months which contain T_MAX data test=match(chron$time,xtime); temp=!is.na(test); # temp vector element is TRUE when days when data exists chron$count_max=NA;chron$tmax=NA; chron[temp,"count_max"]=count; chron[temp,"tmax"]= apply(X[test[temp],],1,mean,na.rm=TRUE) #minimums x=fred[temp3] X=array(NA,dim=c(sum(temp3),31) ); X=data.frame(X) f=function(i){ f=substr(x,widths0[i,1],widths0[i,2]);f} for(i in 1:31){X[,i]=f(i)} for(i in 1:31){X[,i]=as.numeric(X[,i])} temp=(X== -9999);sum(temp);X[temp]=NA count=apply(!is.na(X),1,sum) xtime=time0[temp3] test=match(chron$time,xtime);temp=!is.na(test) chron$count_min=NA;chron$tmin=NA chron[temp,"count_min"]=count;chron[temp,"tmin"]= apply(X[test[temp],],1,mean,na.rm=TRUE) chron[,c("tmax","tmin")]=round(chron[,c("tmax","tmin")]/10,2) #AVERAGES chron$tmean=apply(chron[,c("tmax","tmin")],1,mean) #precipitation x=fred[temp4]; X=array(NA,dim=c(sum(temp4),31) ); X=data.frame(X) f=function(i){ f=substr(x,widths0[i,1],widths0[i,2]);f} for(i in 1:31){X[,i]=f(i)} for(i in 1:31){X[,i]=as.numeric(X[,i])} temp=(X== -9999);sum(temp);X[temp]=NA count=apply(!is.na(X),1,sum) xtime=time0[temp4] test=match(chron$time,xtime);temp=!is.na(test) chron$count_prec=NA;chron$prec=NA chron[temp,"count_prec"]=count;chron[temp,"prec"]= apply(X[test[temp],],1,mean,na.rm=TRUE) read.ghcnd=chron read.ghcnd }; # generate longest available contiguous time-series of station data. This should automatically include the most recent data in most cases. getTMean.ts = function(gsn){ startyear <- trunc(gsn$time[1]/100); gsn.ts <-na.contiguous(ts(gsn$tmean, start=c(startyear,1),deltat=1/12)); getTMean.ts = gsn.ts getTMean.ts }; # mean temperature getTMax.ts = function(gsn){ startyear <- trunc(gsn$time[1]/100); gsn.ts <-na.contiguous(ts(gsn$tmax, start=c(startyear,1),deltat=1/12)); getTMax.ts = gsn.ts getTMax.ts }; # TMax getTMin.ts = function(gsn){ startyear <- trunc(gsn$time[1]/100); gsn.ts <-na.contiguous(ts(gsn$tmin, start=c(startyear,1),deltat=1/12)); getTMin.ts = gsn.ts getTMin.ts }; # TMin getPrec.ts = function(gsn){ startyear <- trunc(gsn$time[1]/100); gsn.ts <-na.contiguous(ts(gsn$prec, start=c(startyear,1),deltat=1/12)); getPrec.ts = gsn.ts getPrec.ts }; # Precipitation (mm/day) # seasonal analysis using stl loess getTrend.ts = function( gsn.ts){ seas=stl(gsn.ts,"per",t.window=151)$time.series[,2]; seas.ts = ts( seas, start=start(gsn.ts), deltat=1/12); seas.ts }; getResidual.ts = function( gsn.ts){ seas=stl(gsn.ts,"per",t.window=151)$time.series[,3]; seas.ts = ts( seas, start=start(gsn.ts), deltat=1/12); seas.ts }; #Example: Mean Monthly Temperature UCCLE Belgium: trend and residual #gsn1 <-read.ghcnd("BE000006447") #gsn1.ts <- getTMean.ts(gsn1) #plot(gsn1.ts, main="Mean Monthly Temperature UCCLE Belgium", ylab="o C",xlab="Year",font.axis=2,font.lab=2) #lines(getTrend.ts(gsn1.ts),col=2,lwd=2); #lines( #maxTrend <- max(getTrend.ts(gsn1.ts); #maxLine.ts <- ts( rep(maxTrend,length(gsn1.ts)), start= start(gsn1.ts), deltat=1/12) #plot(getResidual.ts(gsn1.ts), main="Monthly Temperature Anomaly UCCLE Belgium", ylab="o C",xlab="Year",font.axis=2,font.lab=2,col=3);