#ANALYZE TREE RING PRINCIPAL COMPONENTS # this uses the compilations of data described in "tree.collate" which describes collation of NGDC *.crn data into datasets corresponding to Mann lists #this script produces and save 32 graphics comparing verification and Mann PCs for proxies in directory "c:/climate/images/treepc" # it calculates and saves variance and correlation information in "relationlist" directory "c:/climate/data/articles/mbh98.treepc" #DIRECTORY INFORMATION base.in<-"c:/climate/data/tree" # this is base.out in "tree.collate" image.out<-"c:/climate/images/final" data.out<-"c:/climate/data/articles/final" base.out<-image.out #LOAD MANN DATA load("c:/climate/data/mann/proxy.tab") # loads proxy proxy0<-proxy load("c:/climate/data/prname.tab") #load prname - names and info on proxies load("c:/climate/data/mann/mwpproxy.tab") # loads mwp # this is mwp proxies #REGIONS ARE AS FOLLOWS: region<-c("Texas-Oklahoma (Stahle)","Texas-Mexico (Stahle)","North America ITRDB MBH98","South America","Australia","Western USA ITRDB MBH99","Western USA - 6 long MJ2003") n.region<-length(region) #lists of ITRDB identifications in each dataset were compiled from MBH98 data_supp and saved in several text-files recovered as follows: treelist<-list("stahle.texoke","stahle.texmex","mbh98","southamerica","australia","mbh99","mj2003") #this is a parameterization to keep track of periods in MBH period<-list(c(1400,1994),c(1400,1994),c(1400,1994),c(1400,1994),c(1400,1994),c(470,1991),c(470,1991)) pclim<-c(3,3,3,3,3,3,1) #keeps tracks of # to plot in output mannid<-list(c(69:71),c(72:80),c(84:92),c(93:95),c(96:99),c(10:12),10) nmann<-c(3,9,9,3,4,3,1)#of of PCs in each dataset #BETA-FUNCTION betam<-function(y,yest) { betam<-1-(sum((y-yest)^2))/sum(y^2); betam; } #SET UP EMPTY ARRAYS start.pc<-rep(list(c(NA,NA)),n.region) end.pc<-rep(list(c(NA,NA)),n.region) dates.mann<-rep(list(c(NA,NA)),n.region) variance.out<-array(c(rep(NA,12*n.region)),dim=c(n.region,12)) b<-c("total.maxp","recalc.maxp","mann.maxp","total.overlap","recalc.overlap","mann.overlap") b1<-paste("norm",b,sep=".") b2<-c(b,b1) variance.out<-data.frame(variance.out) dimnames(variance.out)[[2]]<-b2 relationlist<-NULL for (m in 1:5) { loc.out<-file.path(base.in,paste("tree",treelist[m],"data","txt",sep=".")) tree<-read.table(file=loc.out,sep="\t") tree.period<-tree[,1] #COMPARE TO MANN #EXTRACT APPLICABLE PCS FROM MANN PROXY COLLATIONS AND OBTAIN MAXIMAL NON-NA DATES if((m==6) | (m==7)) proxy<-mwp else proxy<-proxy0 pc.mann<-proxy[,mannid[[m]]] #this picks up applicable PCs in MBH dates<-tsp(pc.mann) #this is start.date and end.date in MBH compilation 1400,1994 for MBH98 and 1000,1991 for MBH99 temp<-as.matrix(!is.na(pc.mann)) #find start dates and end dates for MBH PCs start.mann.1<-as.vector(c(rep(NA,length(mannid[[m]])))) end.mann.1<-start.mann.1 for (i in 1:length(mannid[[m]])) { start.mann.1[i]<-min(c(period[[m]][1]:period[[m]][2])[temp[,i]]) end.mann.1[i]<-max(c(period[[m]][1]:period[[m]][2])[temp[,i]]) } start.mann.1 #1750 1750 1750 1750 for 5-Australia end.mann.1 #1980 1980 1980 1980 for 5-Australia start.mann<-max(start.mann.1) end.mann<-min(end.mann.1) mann.period<-c(start.mann:end.mann) if (nmann[m]==1) pc.mann<-as.matrix(pc.mann) pc.mann<-ts (pc.mann[((start.mann-dates[1]+1):(end.mann-dates[1]+1)),],start=start.mann,end=end.mann) tsp(pc.mann) #this is truncated to non-NA period #RELATE PERIODS #overlap is the period of overlap between the maximal Mann period and maximal tree period #the two temps are used to extract periods from series neatly overlap<-c(max(min(mann.period),min(tree.period)):min(max(mann.period),max(tree.period))) temp1<-!is.na( match(tree.period,overlap)) temp2<-!is.na( match(mann.period,overlap)) dates.tree<-c(min(tree[,1]),max(tree[,1])) dates.tree tree<-tree[,2:ncol(tree)] #removes year marker in column 1 #DO PC ANALYSIS #THIS IS DONE IN THE FOLLOWING 4 PERMUTATIONS TRYING TO DECODE AND REPLICATE MANN #1) #MAXIMAL PERIOD - I.E. THE PERIOD FOR WHICH ALL TREE SITES ARE AVAILABLE #OVERLAP PERIOD - I.E. THE PERIOD OF OVERLAP BETWEEN MANN PC CALCULATION AND MAXIMAL PERIOD #2) #BASE-1000 STANDARDIZATION. SITE INDEX NUMBERS ("CHRONOLOGIES") CONVENTIONALLY HAVE BASE 1000 IN *.CRN #(MEAN=0,SD=1) SITE-BY-SITE TRANSFORMATION. BY EXPERIMENTATION AND REPLICATION OF MBH99, THIS IS WHAT MBH DID. #BY EXACT REPLICATION OF MBH99 USING (MEAN=0,SD=1) THIS METHOD IS USED IN REPLICATION #MANN USUALLY HAS SEVERAL YEARS BETWEEN END OF MAXIMAL PERIOD AND 1980 IN WHICH HE HAS PLUGGED DATA #MANN'S START POINTS DON'T RECONCILE IN ANY OBVIOUS WAY WITH MAXIMAL PERIODS #PC ANALYSIS IS DONE ON BOTH BASE AND SD-TRANSFORMED DATASETS AND EXPLAINED VARIANCE COMPILED AND COLLATED IN "variance.out" #EXAMPLE DATA IN SCRIPT IS FOR AUSTRALIA m=5 tree<-scale(tree) tree<-ts(tree,start=dates.tree[1],end=dates.tree[2]) #hard to do variances in data.frame #BASE-1000 DATA FIRST (MBH IS THE BASE-0 DATA IN RUN A BIT BELOW) #FIRST COLLECT TOTAL VARIANCES variance.out[m,7]<-var(c(tree))*length(c(tree)); # [1] 5584.997 #info is for m=5 variance.out[m,10]<-var(c(tree[temp1,])) *length (c(tree[temp1,])) #[1] 3808.255 #A. COMPARE ON OVERLAP #A1. RECALCULATED SVD ON OVERLAP PERIOD w<-svd(tree[temp1,]) A<-w$u %*% diag(w$d) E<-t(w$v) k<-length(mannid[[m]]) #pick first length (mannid[[m]] components if(k>1) A<-A[,1:k] else A<-as.matrix(A[,1:k]) if(k>1) E<-E[1:k,] else E<-t(as.matrix(E[1:k,])) diff<-tree[temp1,]- A %*% E #dim 225x16 variance.out$norm.recalc.overlap[m]<-var(c(diff))*length(c(diff)) # var.recalc [1] 1-variance.out$norm.recalc.overlap[m]/variance.out[m,10] # [1] 0.5336284 - amount explained in 4 PCs #CALCULATE EOF FOR MANN IN TRUNCATED PERIOD if(k>1) A<-pc.mann[temp2,] else A<-as.matrix(pc.mann[temp2]) E<-array ( c ( rep(NA,k*ncol(tree)) ), dim=c(k,ncol(tree))) for (i in 1:ncol(tree)) { E[,i]<-qr.solve(A,tree[temp1,i],tol=1e-7) } diff<-tree[temp1,]- A %*% E #dim 225 x16 variance.out$norm.mann.overlap[m]<-var(c(diff))*length(c(diff)) # var.recalc [1] 1-variance.out$norm.mann.overlap[m]/variance.out[m,10] # [1] 0.4631811 - amount explained in 4 PCs #CALCULATE PC USING MANN EOF CALCULATED ABOVE IN BALANCE PERIOD E.mann<-E A.mann<-A A<-array ( c ( rep(NA,nrow(tree)*k) ), dim=c(nrow(tree),k)) A[temp1,]<-A.mann A[!temp1,]<-tree[!temp1,] %*% t(E.mann) for (j in c(1:nrow(tree))[!temp1]) { A[j,]<-qr.solve(t(E.mann),tree[j,],tol=1e-7) } diff<-tree- A %*% E #dim 225 x16 variance.out$norm.mann.maxp[m]<-var(c(diff))*length(c(diff)) # var.recalc [1] 1-variance.out$norm.mann.maxp[m]/variance.out[m,7] # [1] 0.4475386- amount explained in 4 PCs #DO SVD FOR MAXIMUM AVAILABLE PERIOD w<-svd(tree) A<-w$u %*% diag(w$d) E<-t(w$v) if (k>1) A<-A[,1:k] else A<-as.matrix(A[,1:k]) if (k>1) E<-E[1:k,] else E<-t(as.matrix(E[1:k,])) diff<-tree - A %*% E #dim 225x16 variance.out$norm.recalc.maxp[m]<-var(c(diff))*length(c(diff)) # var.recalc [1] 8606600 1-variance.out$norm.recalc.maxp[m]/variance.out[m,7] # [1] 0.5138065- amount explained in 4 PCs #CALCULATE BETA AND CORRELATION IN OVERLAP verify<-ts(A,start=dates.tree[1],end=dates.tree[2]) save(verify,file=file.path(data.out,paste("pcrecalc",m,"tab",sep="."))) mann<-pc.mann cor1temp<-NULL beta1temp<-NULL for (j in 1:k) { if (k>1) cor1temp<-c(cor1temp,cor(verify[,j][temp1],mann[,j][temp2])) else cor1temp<-c(cor1temp,cor(verify[temp1],mann[temp2])) if (k>1) beta1temp<-c(beta1temp,betam(verify[,j][temp1],mann[,j][temp2])) else beta1temp<-c(beta1temp,betam(verify[temp1],mann[temp2])) } relationlist<-c(relationlist,list(m,cor1temp)) mann<-proxy[,mannid[[m]]] #PLOT RPCs for (j in 1:k) { if (k>1) MBH<-mann[,j] if (k>1) RECALC<-verify[,j] if(cor1temp[j]<0) combine<-ts.union(-MBH,RECALC) else combine<-ts.union(MBH,RECALC) #if (k>1) combine<-ts.union(MBH,RECALC) else combine<-ts.union(verify,mann) #plot.ts(combine, main=paste( prname[mannid[[m]][j],3] )) diff<-scale(combine)[,1]-scale(combine)[,2] if(cor1temp[j]<0) combine<-ts.union(-MBH,RECALC,diff) else combine<-ts.union(MBH,RECALC,diff) if (m==6) plot.ts(combine, main=paste( prname[mannid[[3]][j],3] )) else plot.ts(combine, main=paste( prname[mannid[[m]][j],3] )) loc2<-file.path(base.out,paste("region",m,"pc",j,"png",sep=".")) png(file=loc2, bg="white",width= 800,height=600) if (m==6) plot.ts(combine, main=paste( prname[mannid[[3]][j],3] )) else plot.ts(combine, main=paste( prname[mannid[[m]][j],3] )) dev.off() } #Save RPCs for North America for side comparison if(m==3) compare1<-verify if(m==6) compare2<-verify } #m-iteration save(relationlist,file=file.path(data.out,"correlations.tab")) save(variance.out,file=file.path(data.out,"variance.tab")) write.table(variance.out, file=file.path(base.out,"variance.txt"),quote=FALSE,sep="\t") #VALUES FOR UNEXPLAINED VARIANCE #these values are used directly in explained variance table and analysis variance.out[1:5,7:12] # norm.total.maxp norm.recalc.maxp norm.mann.maxp norm.total.overlap norm.recalc.overlap norm.mann.overlap #1 3948.997 1525.7280 1913.981 3915.667 1510.3193 1897.681 #2 4340.996 745.0217 4106.992 4340.996 745.0217 4106.992 #3 81312.997 48146.5745 67647.271 52018.887 28789.7553 44541.798 #4 7272.998 4049.9940 4965.383 3912.119 1983.3039 2516.753 #5 5584.997 2715.3892 3085.495 3808.255 1776.0618 2044.343 pc.correlation<-array ( c(rep(NA,5*9)), dim=c(9,5) ) for (k in 1:5) { n<-length(relationlist[[2*k]]) pc.correlation[,k][1:n]<-relationlist[[2*k]] } write.table(pc.correlation,file=file.path(base.out,"pc.correlation.txt"),quote=FALSE,sep="\t") #NOT USED HERE #SIDE COMPARISON OF MBH99 and MBH98 - THIS IS VERIFICATION VERSION mbh98<-compare1 mbh99<-compare2 #PLOT COMPARISON for (j in 1:3) { fred<-ts.union(mbh99[,j],mbh98[,j]) plot.ts(fred,plot.type="multiple",main=paste("US PC",j,"Compare Verification"),font.main=6) loc2<-file.path(base.out,paste("us.compare.verify",j,"png",sep=".")) png(file=loc2, bg="white",width= 800,height=480) plot.ts(fred,plot.type="multiple",main=paste("US PC",j,"Comparison"),font.main=6) dev.off() } #SIDE COMPARISON OF MBH DATA load("c:/climate/data/mann/mwpproxy.tab") MBH99<-mwp[,mannid[[6]]][,1] MBH98<-proxy0[,mannid[[3]]][,1] RECALC.98<-verify[,1] combine<-ts.union(-MBH99,MBH98,RECALC.98) plot.ts(combine,main="US PC1",xlim=c(1000,1990)) loc2<-file.path(image.out,paste("us.compare.data",j,"png",sep=".")) png(file=loc2, bg="white",width= 800,height=600) plot.ts(combine,main="US PC1",xlim=c(1000,1990)) dev.off() mbh99<-ts(mbh99[(1000-469):(1980-469),],start=1000,end=1980) mbh98<-ts(mbh98[1:(1980-1399),],start=1400,end=1980) #for (j in 1:1) { #fred<-ts.union(-mbh99[,j],mbh98[,j]) plot.ts(fred,plot.type="multiple",main=paste("US PC",j,"Compare MBH"),font.main=6) cor.compare<-c(rep(NA,3)) for (j in 1:3) { cor.compare[j]<-cor(mbh98[,j],mbh99[(1400-999):(1980-999),j],use="pairwise.complete.obs") } cor.compare #[1] -0.45797874 0.35018607 -0.04918506 #SIDE COMPARISON OF MBH SPLICING load("c:/climate/data/mann/mbh99.nh.recon.tab") mbh99<-mbh99.nh.recon mbh99<-ts(mbh99,start=1000,end=1980) ts.plot(mbh99[,2]) load("c:/climate/data/mann/nhmann.tab") dim(nhmann) nhmann<-ts(nhmann,start=1400,end=1994) mbh98<-nhmann[,2] fred<-ts.union(mbh99[,2],mbh98) plot.ts(fred,plot.type="multiple",main=paste("MBH NH Comparison"),font.main=6) loc2<-file.path(image.out,paste("MBH.NH.compare.data","png",sep=".")) png(file=loc2, bg="white",width= 800,height=480) plot.ts(fred,plot.type="multiple",main=paste("MBH NH Comparison"),font.main=6) dev.off()