########################################################### # Sources of the package PLSS for the R version 2.12.2 under WINDOWS # release 20.69 06/06/2015 # modifications new MVcut function, remove the option 4 in the MAPLSS menu (Remove/Add main effects or interactions # release 20.68 29/03/2015 # modifications : adds the Hotelling function to detect outliers and plot the T2 ellipse on component scatterplots # release 20.67 05/10/2012 # modifications : the bug in MAPLSS in the menu for removing main effects and/or interactions # release 20.66 14/09/2012 # modifications : the bug in MAPLSS concerning plssinter and plss.plotinter . # release 20.64 29/04/2011 # modifications : the bug of the return() that now not accepts multi arguments. # release 20.61, 01/09/2008 # modifications : the bug in the multi-response classification case with interactions ############################################################ # REFERENCES # Durand, J.F., (2008), "La régression Partial Least-Squares boostée", revue MODULAD, 63-86. # # Durand, J.F., Roman, S. et Vivien M. (1998), "Guide de la régression PLS linéaire sous S-Plus". # Rapport de Recherche 98-06, Groupe de Biostatistique et d'Analyses des Systèmes, # ENSA.M-INRA-UM II. # Durand, J.F., (2000), "La régression Partial Least Squares Spline - PLSS - Guide d'utilisation # sous S-Plus". Rapport de Recherche 00-06, Groupe de Biostatistique et d'Analyses # des Systèmes, ENSA.M-INRA-UM II. # Durand, J.F., (2001), "Local Polynomial Additive Regression through PLS and Splines: PLSS", # Chemometrics and Intelligent Laboratory Systems, vol. 58, issue 2, 235-246. # Durand, J.F., (2002), "Elements de calcul matriciel et d'Analyse Factorielle de Données", cours # polycopié, Université Montpellier II. # Durand, J.F. and Lombardo, R., (2003), "Interactions terms in nonlinear PLS via additive spline # transformations", Between Data Science and Applied Data Analysis, Studies # in Classification, Data Analysis, and Knowledge Organization. Eds M.Schader, # W. Gaul and M. Vichi, Springer, 22-29. ########################################################### objetspls<-c("PLSL","plscalibration","bivarplot","pls" ,"plscv" ,"plscv.plot" ,"Dcenter" ,"Dcentred", "invgene" ,"Dproj", "Dcp" ,"Dvar" ,"WDop" ,"VQop" ,"lss", "plss" ,"plss.plot", "plsscv" , "Gnorm" ,"traceAtB", "tucker", "grad", "Projtuc" ,"Gscal" ,"traceAB" ,"Projn", "Codisjc", "discri" ,"codisj","Bsplinen","noeuds","selectvar","Mahalanobis", "MAPLSS","plssinter","plss.plotinter","surfaceinter","MVcut","f","mygraphics","simulMAPLSS", "maplss2","Bspline","simulMARS","Hotelling") ###### library(splines) ########################################################### PLSL<-function(X,Y,Xtest,Ytest,standX=T,standY=T,D=1,A=1,eps=1e-08,smooth="reg.spline",lambda=0.6, impres=T,graph=T,titlepar=T,prop=0.1,typedata=T,cexpar=0.7,pchpar=1,askpar=F,ptypar="m", colpar=1,matcol=1,matrow=1,bgpar="wheat",qual,names.qual) { # bgpar, chacter string of the background color. default is "wheat",try "transparent" "peachpuff", "lightblue", "gray", # "lightpink", "thistle", "tan"....;use colors() to choose. mapalette=c("black","red","green3","blue","cyan","magenta","chocolate3","gray","pink3","peru","orange","salmon","deepskyblue","yellow") palette(mapalette) # pie(rep(1,length(mapalette)),col=mapalette,radius=0.9) oldpar<-par(no.readonly = TRUE) par(ask=F,bg=bgpar,mfrow=c(1,1),bty="n",xaxt="n",yaxt="n") plot(0,0,type="n",bg=par("bg"),xlab="",ylab="",xlim=c(-4,4),ylim=c(-4,4)) text(0,2,"Linear Partial Least-Squares",cex=1.7,col="red") text(0,0,"Regression",cex=1.7,col="red") text(0,-2,"J.F. Durand, Montpellier 2 University",cex=1,col="blue") text(0,-3,"www.jf-durand-pls.com",cex=0.9,col="blue") par(oldpar) par(ask=askpar,bg=bgpar,xaxt="s",yaxt="s") X0<-as.matrix(X) if(!missing(Xtest))Xtest0<-as.matrix(Xtest) Y<-as.matrix(Y) Xvariables<-rep(T,ncol(X0)) repeat{ X<-X0[,Xvariables,drop=F] if(!missing(Xtest))Xtest<-Xtest0[,Xvariables] p<-ncol(X) q<-ncol(Y) cat("==========================================================\n") reponse<-menu(c("Bivariate Analysis","Cross Validation", "Generalized Cross Validation","Conversational PLS","Selection of the predictors (Remove/Add)"),title="'Linear PLS' (0 to exit)") cat("==========================================================\n") if(reponse==1) { if(q==1) menuvec<-c("Inside the X variables","Between X and Y") else menuvec<-c("Inside the X variables","Between X and Y","Inside the Y variables") cat("==========================================================\n") reponseb<-menu(menuvec,title=" What kind of relationship ?") cat("==========================================================\n") switch(reponseb, {Ybiv<-X;Xbiv<-X},{Ybiv<-Y;Xbiv<-X},{Ybiv<-Y;Xbiv<-Y}) if(reponseb!=0) { cat("Graphs or matrix of Correlations (g/c)?\n") plctY<-scan(quiet=T,"",character(),1) if((length(plctY)==0)|(plctY=="c")) { if(reponseb==1) print(round(Dvar(X,D=D,cor=T)$V,2)) if(reponseb==2) print(round(Dvar(cbind(X,Y),D=D,cor=T)$V,2)) if(reponseb==3) print(round(Dvar(Y,D=D,cor=T)$V,2)) } else {cat("\n") cat("Bivariate plots between X and Y variables.\n") cat("Try different polynomial spline smoothers, degree and knots\n") bivarplot(X=Xbiv,Y=Ybiv,matrow=matrow,matcol=matcol,smooth=smooth,lambda=lambda,degree=1,knots=0, titlepar=titlepar,typedata=typedata,cexpar=cexpar,pchpar=pchpar,ptypar=ptypar, colpar=colpar,askpar=askpar,bgpar=bgpar) } par(mfrow=c(1,1),pty="m") def.par <- par(no.readonly = TRUE) layout(matrix(c(3,4,5,6,1,2),2,3,byrow=TRUE), heights=c(2,10),widths= c(3,10,1), TRUE) switch(reponseb, {par(mar=c(4,4,0,3)); image(1:p,1:p,Dvar(X,D=D,cor=T)$V,xlab="predictors",ylab="predictors",col=topo.colors(64),cex=cexpar); box(); par(mar=c(4,0,0,1)); image(1,seq(min(Dvar(X,D=D,cor=T)$V),1,length=64),t(matrix(1:64,64,1)),xaxt="n",yaxt="s",col=topo.colors(64),xlab="",ylab="",cex=cexpar); box(); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(-0.2,0,"Var.",cex=cexpar+0.2,col="red"); text(-0.2,-0.5,"names",cex=cexpar+0.2,col="red"); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(0,0,"Image of Correlations",cex=cexpar+0.5,col="red"); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(-0.2,0,"r",cex=cexpar+0.2,col="red"); text(-0.2,-0.5,"level",cex=cexpar+0.2,col="red"); plot(rep(0,5),-2:2,type="n",xlab="",ylab="",axes=F); text(rep(-0.3,p),seq(-1.5,2,length=p),paste(format(1:p),dimnames(X)[[2]]),cex=cexpar); }, {par(mar=c(4,4,0,3)); image(1:(p+q),1:(p+q),Dvar(cbind(X,Y),D=D,cor=T)$V,xlab="predictors + responses",ylab="predictors + responses",col=topo.colors(64),cex=cexpar); box(); par(mar=c(4,0,0,1)); image(1,seq(min(Dvar(cbind(X,Y),D=D,cor=T)$V),1,length=64),t(matrix(1:64,64,1)),xaxt="n",yaxt="s",col=topo.colors(64),xlab="",ylab="",cex=cexpar); box(); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(-0.2,0,"Var.",cex=cexpar+0.2,col="red"); text(-0.2,-0.5,"names",cex=cexpar+0.2,col="red"); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(0,0,"Image of Correlations",cex=cexpar+0.5,col="red"); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(-0.2,0,"r",cex=cexpar+0.2,col="red"); text(-0.2,-0.5,"level",cex=cexpar+0.2,col="red"); plot(rep(0,5),-2:2,type="n",xlab="",ylab="",axes=F); text(rep(-0.8,p+q),seq(-1.5,2,length=p+q),paste(format(1:(p+q)),c(dimnames(X)[[2]],dimnames(Y)[[2]])), col=c(rep("blue",p),rep("magenta",q)),pos=4,cex=cexpar) }, {par(mar=c(4,4,0,3)); image(1:q,1:q,Dvar(Y,D=D,cor=T)$V,xlab="responses",ylab="responses",col=topo.colors(64),cex=cexpar); box(); par(mar=c(4,0,0,1)); image(1,seq(min(Dvar(Y,D=D,cor=T)$V),1,length=64),t(matrix(1:64,64,1)),xaxt="n",yaxt="s",col=topo.colors(64),xlab="",ylab="",cex=cexpar); box(); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(-0.2,0,"Var.",cex=cexpar+0.2,col="red"); text(-0.2,-0.5,"names",cex=cexpar+0.2,col="red"); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(0,0,"Image of Correlations",cex=cexpar+0.5,col="red"); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(-0.2,0,"r",cex=cexpar+0.2,col="red"); text(-0.2,-0.5,"level",cex=cexpar+0.2,col="red"); plot(rep(0,5),-2:2,type="n",xlab="",ylab="",axes=F); text(rep(-0.3,q),seq(-1.5,2,length=q),paste(format(1:q),dimnames(Y)[[2]]),cex=cexpar)} ) } par(def.par) }#endreponse1 #--------------------------------------------------------- if(reponse==4) { if(missing(Xtest)) plsresult<-pls(X,Y,standX=standX,standY=standY,A=A,eps=eps,splflag=F,impres=impres,graph=graph, askpar=askpar,cexpar=cexpar,,pchpar=pchpar,colpar=colpar,pty=ptypar,qual=qual,names.qual=names.qual) else { if(missing(Ytest)) plsresult<-pls(X,Y,Xtest=Xtest,standX=standX,standY=standY,A=A,eps=eps,splflag=F,impres=impres, graph=graph,askpar=askpar,cexpar=cexpar,pchpar=pchpar,colpar=colpar,pty=ptypar,qual=qual,names.qual=names.qual) else plsresult<-pls(X,Y,Xtest=Xtest,Ytest=Ytest,standX=standX,standY=standY,A=A,eps=eps,splflag=F, impres=impres,graph=graph,askpar=askpar,cexpar=cexpar,pchpar=pchpar,colpar=colpar,pty=ptypar,qual=qual,names.qual=names.qual) } A<-plsresult$A } #--------------------------------------------------------- if((reponse==2)|(reponse==3)) { cat("Enter the maximum number of components (dimensions) to explore \n") Aexplore<-scan(quiet=T,"",numeric(),1) if(reponse==2) i<-1 else i<-0 campaign<-matrix(0,1,Aexplore+2) if(reponse==2) dimnames(campaign)<-list(format(i),c("prop","Dim",format(1:Aexplore))) else dimnames(campaign)<-list(format(i),c("alpha","Dim",format(1:Aexplore))) if(reponse==2) { repeat{ plscvresul<-plscv(X,Y,standX=standX,standY=standY,A=Aexplore,prop=prop,GCV=0) plscvresul<-plscv.plot(plscvresul,colpar=colpar,cexpar=cexpar,titlepar=titlepar,bgpar=bgpar) #browser() campaign[i,]<-c(prop,plscvresul$A,round(plscvresul[[1]],5)) cat("Another proportion of left-predicted observations,(y/n)") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if((repo!="y")&(repo!="Y"))break cat("Enter prop (< 0.5):") prop<-scan(quiet=T,"",numeric(),1) i<-i+1 campaign<-rbind(campaign,rep(0,Aexplore+2)) dimnames(campaign)[[1]][i]<-format(i) }#endrepeat matrice<-campaign[,3:(Aexplore + 2),drop=F] campaignmean<-apply(matrice,2,"mean") campaignstdv<-sqrt(apply(matrice,2,"var")) campaign<-rbind(campaign,c(NA,NA,campaignmean)) campaign[nrow(campaign),2]<-round(campaign[nrow(campaign),2]) dimnames(campaign)[[1]][nrow(campaign)]<-"mean" campaign<-rbind(campaign,c(prop,plscvresul$A,campaignmean + 2*campaignstdv)) dimnames(campaign)[[1]][nrow(campaign)]<-"mean+2sdv" campaign<-rbind(campaign,c(prop,plscvresul$A,campaignmean - 2*campaignstdv)) dimnames(campaign)[[1]][nrow(campaign)]<-"mean-2sdv" campaign[(nrow(campaign)-1):nrow(campaign),2]<-NA if(nrow(campaign)>4){ par(mfrow=c(1,1)) ts.plot(ts(t(campaign[,3:(Aexplore+2)])),gpars=list(xlab="Model Dim.",ylab="PRESS",type="n", main="CV experiments' summary",cex=cexpar)) for(i in 1:(nrow(campaign)-2)){ points(1:Aexplore,campaign[i,3:(Aexplore+2)],pch=i,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[i,3:(Aexplore+2)],type="l",lty=i,col=colpar+i) } points(1:Aexplore,campaign[nrow(campaign)-1,3:(Aexplore+2)],pch=i+1,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign)-1,3:(Aexplore+2)],type="l",lty=i,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign),3:(Aexplore+2)],pch=i+2,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign),3:(Aexplore+2)],type="l",lty=i,col=colpar+i) cat("Click to locate the top left corner of the legend\n") legend(locator(1),c(campaign[1:(nrow(campaign)-3),1],"mean","mean+2sdv","mean-2sdv"),pch=1:nrow(campaign),bty="o",col=c(colpar+(1:(nrow(campaign)-2)), colpar+rep((nrow(campaign)-2),2)),cex=cexpar,ncol=max(nrow(campaign)%/%5,1), text.col=c(colpar+(1:(nrow(campaign)-2)),colpar+rep((nrow(campaign)-2),2))) print(campaign[1:(nrow(campaign)-2),]) cat("Choose the best experiment row number ( 1<= # <=",nrow(campaign)-3,")") exp0<-scan(quiet=T,"",numeric(),1) plscvresul[[2]]<-campaign[exp0,3:(Aexplore+2)] plscvresul$A<-campaign[exp0,2] #prop<-eval(parse("",text=dimnames(campaign)[[1]][exp0])) prop<-campaign[exp0,1] cat("Retained experiment values\n") cat("PRESS(",prop,",",plscvresul$A,")=",plscvresul[[2]][plscvresul$A],"\n",sep="") } } else { repeat {# repeat{ cat("GCV(k,alpha)= ASR/(1-alpha*k/n)^2\n") cat("where ASR=Average Squared Residuals, k=dimension\n") cat("Enter alpha (>0) the tuning parameter :\n") GCV<-scan(quiet=T,"",numeric(),1) if(GCV*Aexplore/nrow(X)>=1) cat("Try a smaller value!\n") else break } i<-i+1 plscvresul<-plscv(X,Y,standX=standX,standY=standY,A=Aexplore,prop=prop,GCV=GCV) plscvresul<-plscv.plot(plscvresul,colpar=colpar,cexpar=cexpar,titlepar=titlepar,bgpar=bgpar) campaign<-rbind(campaign,matrix(c(GCV,plscvresul$A,round(plscvresul[[1]],5)),1,Aexplore+2)) if(i==1)campaign<-campaign[-1,,drop=F] dimnames(campaign)[[1]][i]<-format(i) #campaign[i,]<-c(GCV,plscvresul$A,round(plscvresul[[2]],5)) cat("Another GCV try,(y/n)") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if(repo!="y")break }#endrepeat if(reponse==3) {## matrice<-campaign[,3:(Aexplore + 2),drop=F] campaignmean<-apply(matrice,2,"mean") campaignstdv<-sqrt(apply(matrice,2,"var")) campaign<-rbind(campaign,c(NA,NA,campaignmean)) campaign[nrow(campaign),2]<-round(campaign[nrow(campaign),2]) dimnames(campaign)[[1]][nrow(campaign)]<-"mean" campaign<-rbind(campaign,c(GCV,plscvresul$A,campaignmean + 2*campaignstdv)) dimnames(campaign)[[1]][nrow(campaign)]<-"mean+2sdv" campaign<-rbind(campaign,c(GCV,plscvresul$A,campaignmean - 2*campaignstdv)) dimnames(campaign)[[1]][nrow(campaign)]<-"mean-2sdv" campaign[(nrow(campaign)-1):nrow(campaign),2]<-NA if(nrow(campaign)>4){ par(mfrow=c(1,1)) ts.plot(ts(t(campaign[,3:(Aexplore+2)])),gpars=list(xlab="Model Dim.",ylab="GCV",type="n", main="CV experiments' summary",cex=cexpar)) for(i in 1:(nrow(campaign)-2)){ points(1:Aexplore,campaign[i,3:(Aexplore+2)],pch=i,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[i,3:(Aexplore+2)],type="l",lty=i,col=colpar+i) } points(1:Aexplore,campaign[nrow(campaign)-1,3:(Aexplore+2)],pch=i+1,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign)-1,3:(Aexplore+2)],type="l",lty=i,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign),3:(Aexplore+2)],pch=i+2,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign),3:(Aexplore+2)],type="l",lty=i,col=colpar+i) cat("Click to locate the top left corner of the legend\n") legend(locator(1),c(campaign[1:(nrow(campaign)-3),1],"mean","mean+2sdv","mean-2sdv"),pch=1:nrow(campaign),bty="o",col=c(colpar+(1:(nrow(campaign)-2)), colpar+rep((nrow(campaign)-2),2)),cex=cexpar,ncol=max(nrow(campaign)%/%5,1), text.col=c(colpar+(1:(nrow(campaign)-2)),colpar+rep((nrow(campaign)-2),2))) print(campaign[1:(nrow(campaign)-2),]) cat("Choose the best experiment row number ( 1<= # <=",nrow(campaign)-3,")") exp0<-scan(quiet=T,"",numeric(),1) plscvresul[[2]]<-campaign[exp0,3:(Aexplore+2)] plscvresul$A<-campaign[exp0,2] #prop<-eval(parse("",text=dimnames(campaign)[[1]][exp0])) GCV<-campaign[exp0,1] cat("Retained experiment values\n") cat("PRESS(",GCV,",",plscvresul$A,")=",plscvresul[[2]][plscvresul$A],"\n",sep="") } }## } A<-plscvresul$A } #--------------------------------------------------------- if(reponse==5) { Xvariables<-selectvar(X0,Xvariables=Xvariables) } #--------------------------------------------------------- if(reponse==0)break }#finrepeat #browser() Components=plsresult$TX dimnames(Components)[[1]]=dimnames(X)[[1]] invisible(return(list(Xvariables=Xvariables,plsresult=plsresult,Components=Components))) } ########################################################### PLSS<-function(X,Y,Xtest,Ytest,standX=T,standY=T,D=1,A=2,degree=1,knots=0,equiknots=F,eps=1e-8, listknots=listknots,smooth="reg.spline",lambda=0.6,interaction=NULL,listinteraction,qual,names.qual, ptypar="s",typedata=T,titlepar=T,pchpar=1,cexpar=0.7,nbpoints=100,colpar=0,askpar=F, matrow=1,matcol=1,prop=0.1,impres=T,bgpar="lavenderblush2") { # Bibliography : # J. F. Durand. "Local Polynomial Additive Regression through PLS and Splines: PLSS", # Chemometrics and Intelligent Laboratory Systems 58, 235-246, 2001. # library(splines) mapalette=c("black","red","green3","blue","cyan","magenta","gray","chocolate3","pink3","peru","orange","salmon","deepskyblue","yellow") palette(mapalette) # pie(rep(1,length(mapalette)),col=mapalette,radius=0.9) oldpar<-par(no.readonly = TRUE) par(ask=F,bg=bgpar,mfrow=c(1,1),bty="n",xaxt="n",yaxt="n") plot(0,0,type="n",bg=par("bg"),xlab="",ylab="",xlim=c(-4,4),ylim=c(-4,4)) text(0,2.5,"Partial Least-Squares",cex=2,col="red") text(0,0,"Splines : PLSS",cex=2,col="red") text(0,-2,"J.F. Durand, Montpellier 2 University",cex=1,col="blue") text(0,-3,"www.jf-durand-pls.com",cex=0.9,col="blue") par(oldpar) par(ask=askpar,bg=bgpar,xaxt="s",yaxt="s",mai=rep(0.75,4)) resulpls<-NULL X0<-as.matrix(X) degree0<-degree if(length(degree0)==1)degree0<-rep(degree,ncol(X0)) knots0<-knots if(length(knots0)==1)knots0<-rep(knots,ncol(X0)) equiknots0<-equiknots if(length(equiknots0)==1)equiknots0<-rep(equiknots,ncol(X0)) if(!missing(listknots)) listknots0<-listknots else { listknots0<-list(NULL) for(i in 1:(ncol(X0)-1)) listknots0<-c(listknots0,list(NULL)) } if(!missing(Xtest)) { if(is.null(dimnames(Xtest))){ if(length(Xtest)==dim(X)[2])Xtest0<-matrix(Xtest,1,length(Xtest)) dimnames(Xtest0)<-list(paste("x",1:(length(Xtest)/length(X)),sep=""),dimnames(X)[[2]]) } else Xtest0<-as.matrix(Xtest) } Y<-as.matrix(Y) Xvariables<-rep(T,ncol(X0)) repeat{ flag<-sum(Xvariables)==ncol(X0) X<-as.matrix(X0[,Xvariables,drop=F]) p<-ncol(X) q<-ncol(Y) if(length(degree0)>1)degree<-degree0[Xvariables] if(length(knots0)>1)knots<-knots0[Xvariables] if(length(equiknots0)>1)equiknots<-equiknots0[Xvariables] #if(!missing(listknots)) listknots<-listknots0[Xvariables] listknots<-listknots0[Xvariables] aa<-NULL for(i in 1:length(listknots))aa<-c(aa,listknots[[i]]) listknotsnullflag<-is.null(aa) if(!missing(Xtest))Xtest<-Xtest0[,Xvariables,drop=F] cat("==========================================================\n") if((smooth=="reg.spline")&(matrow+matcol==2)) menu1<-"Bivariate Analysis for Spline Inputs" else menu1<-"Bivariate Analysis" if(A>1) menu41<-"exploration and modeling" else menu41<-"modeling" if(missing(Xtest)) menu4<-paste("A look at graphics for ",menu41,sep="") else menu4<-paste("Prediction and a look at graphics for ",menu41,sep="") reponse<-menu(c(menu1,"Cross Validation", "Generalized Cross Validation",menu4,"Selection of the predictors (Remove/Add)"),title="'PLS Spline' (0 to exit)") cat("===========================================================\n") if((reponse==1)&flag) { if(q==1) menuvec<-c("Inside the X variables","Between X and Y") else menuvec<-c("Inside the X variables","Between X and Y","Inside the Y variables") cat("============================================================\n") reponseb<-menu(menuvec,title=" What kind of relationship?") cat("============================================================\n") switch(reponseb, {Ybiv<-X;Xbiv<-X},{Ybiv<-Y;Xbiv<-X},{Ybiv<-Y;Xbiv<-Y}) if(reponseb!=0) { cat("Graphs or matrix of Correlations (g/c)?\n") plctY<-scan(quiet=T,"",character(),1) if((length(plctY)==0)|(plctY=="c")) { if(reponseb==1) print(round(Dvar(X,D=D,cor=T)$V,2)) if(reponseb==2) print(round(Dvar(cbind(X,Y),D=D,cor=T)$V,2)) if(reponseb==3) print(round(Dvar(Y,D=D,cor=T)$V,2)) } else if(smooth=="reg.spline") { if(missing(listknots)|listknotsnullflag) resu<-bivarplot(X=Xbiv,Y=Ybiv,matrow=matrow,matcol=matcol,smooth=smooth,degree=degree,knots=knots, equiknots=equiknots,lambda=lambda,titlepar=titlepar,qual=qual,names.qual=names.qual, typedata=typedata,cexpar=cexpar,pchpar=pchpar,ptypar=ptypar,colpar=colpar,askpar=askpar,bgpar=bgpar,nbpoints=nbpoints) else resu<-bivarplot(X=Xbiv,Y=Ybiv,matrow=matrow,matcol=matcol,smooth=smooth,degree=degree,knots=knots, equiknots=equiknots,listknots=listknots,lambda=lambda,titlepar=titlepar,qual=qual,names.qual=names.qual, typedata=typedata,cexpar=cexpar,pchpar=pchpar,ptypar=ptypar,colpar=colpar,askpar=askpar,bgpar=bgpar,nbpoints=nbpoints) listknots0<-resu$listknots degree0<-resu$degree for(i in 1:ncol(X0))knots0[i]<-length(listknots0[[i]]) names(degree0)<-dimnames(X0)[[2]] names(listknots0)<-dimnames(X0)[[2]] names(knots0)<-dimnames(X0)[[2]] #browser() } else bivarplot(X=Xbiv,Y=Ybiv,matrow=matrow,matcol=matcol,smooth=smooth,lambda=lambda,titlepar=titlepar, typedata=typedata,cexpar=cexpar,pchpar=pchpar,ptypar=ptypar,colpar=colpar,askpar=askpar,bgpar=bgpar) par(mfrow=c(1,1),pty="m") #cat("Click on the plot to locate the legend !\n") def.par <- par(no.readonly = TRUE) layout(matrix(c(3,4,5,6,1,2),2,3,byrow=TRUE), heights=c(2,10),widths= c(3,10,1), TRUE) switch(reponseb, {par(mar=c(4,4,0,3)); image(1:p,1:p,Dvar(X,D=D,cor=T)$V,xlab="predictors",ylab="predictors",col=topo.colors(64),cex=cexpar); box(); par(mar=c(4,0,0,1)); image(1,seq(min(Dvar(X,D=D,cor=T)$V),1,length=64),t(matrix(1:64,64,1)),xaxt="n",yaxt="s",col=topo.colors(64),xlab="",ylab="",cex=cexpar); box(); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(-0.2,0,"Var.",cex=cexpar+0.2,col="red"); text(-0.2,-0.5,"names",cex=cexpar+0.2,col="red"); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(0,0,"Image of Correlations",cex=cexpar+0.5,col="red"); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(-0.2,0,"r",cex=cexpar+0.2,col="red"); text(-0.2,-0.5,"level",cex=cexpar+0.2,col="red"); plot(rep(0,5),-2:2,type="n",xlab="",ylab="",axes=F); text(rep(-0.3,p),seq(-1.5,2,length=p),paste(format(1:p),dimnames(X)[[2]]),cex=cexpar); }, {par(mar=c(4,4,0,3)); image(1:(p+q),1:(p+q),Dvar(cbind(X,Y),D=D,cor=T)$V,xlab="predictors + responses",ylab="predictors + responses",col=topo.colors(64),cex=cexpar); box(); par(mar=c(4,0,0,1)); image(1,seq(min(Dvar(cbind(X,Y),D=D,cor=T)$V),1,length=64),t(matrix(1:64,64,1)),xaxt="n",yaxt="s",col=topo.colors(64),xlab="",ylab="",cex=cexpar); box(); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(-0.2,0,"Var.",cex=cexpar+0.2,col="red"); text(-0.2,-0.5,"names",cex=cexpar+0.2,col="red"); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(0,0,"Image of Correlations",cex=cexpar+0.5,col="red"); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(-0.2,0,"r",cex=cexpar+0.2,col="red"); text(-0.2,-0.5,"level",cex=cexpar+0.2,col="red"); plot(rep(0,5),-2:2,type="n",xlab="",ylab="",axes=F); text(rep(-0.8,p+q),seq(-1.5,2,length=p+q),paste(format(1:(p+q)),c(dimnames(X)[[2]],dimnames(Y)[[2]])), col=c(rep("blue",p),rep("magenta",q)),cex=cexpar,pos=4)}, {par(mar=c(4,4,0,3)); image(1:q,1:q,Dvar(Y,D=D,cor=T)$V,xlab="responses",ylab="responses",col=topo.colors(64),cex=cexpar); box(); par(mar=c(4,0,0,1)); image(1,seq(min(Dvar(Y,D=D,cor=T)$V),1,length=64),t(matrix(1:64,64,1)),xaxt="n",yaxt="s",col=topo.colors(64),xlab="",ylab="",cex=cexpar); box(); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(-0.2,0,"Var.",cex=cexpar+0.2,col="red"); text(-0.2,-0.5,"names",cex=cexpar+0.2,col="red"); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(0,0,"Image of Correlations",cex=cexpar+0.5,col="red"); par(mar=c(0,0,0,0)); plot(0,0,type="n",xlab="",ylab="",axes=F); text(-0.2,0,"r",cex=cexpar+0.2,col="red"); text(-0.2,-0.5,"level",cex=cexpar+0.2,col="red"); plot(rep(0,5),-2:2,type="n",xlab="",ylab="",axes=F); text(rep(-0.3,q),seq(-1.5,2,length=q),paste(format(1:q),dimnames(Y)[[2]]),cex=cexpar)} ) } par(def.par) }#endreponse1 #--------------------------------------------------------- if(reponse==4) { if(missing(listknots)|listknotsnullflag) { if(missing(Xtest)) { resulpls<-plss(X=X,Y=Y,standX=standX,standY=standY,D=D,A=A,degree=degree,knots=knots,equiknots=equiknots,eps=eps,impres=F, interaction=interaction,listinteraction=listinteraction,colpar=colpar,cexpar=cexpar,titlepar=titlepar) cat("NUMERICAL AND GRAPHICAL ISSUES :\n") cat("\n") plss.plot(resulpls,ptypar=ptypar,typedata=typedata,titlepar=titlepar,pchpar=pchpar,cexpar=cexpar, nbpoints=nbpoints,colpar=colpar,askpar=askpar,qual=qual,names.qual=names.qual) } else { if(missing(Ytest)) resulpls<-plss(X=X,Y=Y,Xtest=Xtest,standX=standX,standY=standY,D=D,A=A,degree=degree,knots=knots, equiknots=equiknots,eps=eps,impres=T,interaction=interaction,listinteraction=listinteraction, colpar=colpar,cexpar=cexpar,titlepar=titlepar) else resulpls<-plss(X=X,Y=Y,Xtest=Xtest,Ytest=Ytest,standX=standX,standY=standY,D=D,A=A,degree=degree, knots=knots,equiknots=equiknots,eps=eps,impres=T,interaction=interaction,listinteraction=listinteraction,colpar=colpar,cexpar=cexpar,titlepar=titlepar) cat("NUMERICAL AND GRAPHICAL ISSUES :\n") cat("\n") plss.plot(resulpls,Xtest,ptypar=ptypar,typedata=typedata,titlepar=titlepar,pchpar=pchpar,cexpar=cexpar, nbpoints=nbpoints,colpar=colpar,askpar=askpar,qual=qual,names.qual=names.qual) }#endmissingXtest } else { if(missing(Xtest)) { resulpls<-plss(X=X,Y=Y,standX=standX,standY=standY,D=D,A=A,degree=degree,listknots=listknots, eps=eps,impres=F,interaction=interaction,listinteraction=listinteraction,colpar=colpar,cexpar=cexpar, titlepar=titlepar) if(resulpls$nomfichX=="NON4")return() cat("NUMERICAL AND GRAPHICAL ISSUES :\n") cat("\n") plss.plot(resulpls,ptypar=ptypar,typedata=typedata,titlepar=titlepar,pchpar=pchpar, cexpar=cexpar,nbpoints=nbpoints,colpar=colpar,askpar=askpar,qual=qual,names.qual=names.qual) } else { if(missing(Ytest)) resulpls<-plss(X=X,Y=Y,Xtest=Xtest,standX=standX,standY=standY,D=D,A=A,degree=degree,listknots= listknots,eps=eps,impres=T,interaction=interaction,listinteraction=listinteraction,colpar=colpar,cexpar=cexpar,titlepar=titlepar) else resulpls<-plss(X=X,Y=Y,Xtest=Xtest,Ytest=Ytest,standX=standX,standY=standY,D=D,A=A,degree=degree, listknots=listknots,eps=eps,impres=T,interaction=interaction,listinteraction=listinteraction,colpar=colpar,cexpar=cexpar,titlepar=titlepar) cat("NUMERICAL AND GRAPHICAL ISSUES :\n") cat("\n") plss.plot(resulpls,Xtest,ptypar=ptypar,typedata=typedata,titlepar=titlepar,pchpar=pchpar,cexpar=cexpar, nbpoints=nbpoints,colpar=colpar,askpar=askpar,qual=qual,names.qual=names.qual) }#endmissingXtest }#endifelsemissinglistknots A<-resulpls$axes } #--------------------------------------------------------- if((reponse==2)|(reponse==3)) { cat("Enter the maximum number of components (dimensions) to explore \n") Aexplore<-scan(quiet=T,"",numeric(),1) if(reponse==2) i<-1 else i<-0 campaign<-matrix(0,1,Aexplore+2) if(reponse==2) dimnames(campaign)<-list(format(i),c("prop","Dim",format(1:Aexplore))) else dimnames(campaign)<-list(format(i),c("alpha","Dim",format(1:Aexplore))) if(reponse==2) { repeat{ if(missing(listknots)|listknotsnullflag) plscvresul<-plsscv(X,Y,standX=standX,standY=standY,interaction=interaction,D=D,A=Aexplore, degree=degree,knots=knots,equiknots=equiknots,prop=prop,GCV=0) else plscvresul<-plsscv(X,Y,standX=standX,standY=standY,interaction=NULL,D=D,A=Aexplore, degree=degree,knots=knots,equiknots=equiknots,listknots=listknots,prop=prop,GCV=0) plscvresul<-plscv.plot(plscvresul,colpar=colpar,cexpar=cexpar,titlepar=titlepar,bg=par("bg")) campaign[i,]<-c(prop,plscvresul$A,round(plscvresul[[1]],5)) cat("Another proportion of left-predicted observations,(y/n)") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if((repo!="y")&(repo!="Y"))break cat("Enter prop (< 0.5):") prop<-scan(quiet=T,"",numeric(),1) i<-i+1 campaign<-rbind(campaign,rep(0,Aexplore+2)) dimnames(campaign)[[1]][i]<-format(i) }#endrepeat matrice<-campaign[,3:(Aexplore + 2),drop=F] campaignmean<-apply(matrice,2,"mean") campaignstdv<-sqrt(apply(matrice,2,"var")) campaign<-rbind(campaign,c(NA,NA,campaignmean)) campaign[nrow(campaign),2]<-round(campaign[nrow(campaign),2]) dimnames(campaign)[[1]][nrow(campaign)]<-"mean" campaign<-rbind(campaign,c(prop,plscvresul$A,campaignmean + 2*campaignstdv)) dimnames(campaign)[[1]][nrow(campaign)]<-"mean+2sdv" campaign<-rbind(campaign,c(prop,plscvresul$A,campaignmean - 2*campaignstdv)) dimnames(campaign)[[1]][nrow(campaign)]<-"mean-2sdv" campaign[(nrow(campaign)-1):nrow(campaign),2]<-NA if(nrow(campaign)>4){ par(mfrow=c(1,1)) ts.plot(ts(t(campaign[,3:(Aexplore+2)])),gpars=list(xlab="Model Dim.",ylab="PRESS",type="n", main="CV experiments' summary",cex=cexpar,bg=par("bg"))) for(i in 1:(nrow(campaign)-2)){ points(1:Aexplore,campaign[i,3:(Aexplore+2)],pch=i,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[i,3:(Aexplore+2)],type="l",lty=i,col=colpar+i) } points(1:Aexplore,campaign[nrow(campaign)-1,3:(Aexplore+2)],pch=i+1,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign)-1,3:(Aexplore+2)],type="l",lty=i,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign),3:(Aexplore+2)],pch=i+2,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign),3:(Aexplore+2)],type="l",lty=i,col=colpar+i) cat("Click to locate the top left corner of the legend\n") legend(locator(1),c(campaign[1:(nrow(campaign)-3),1],"mean","mean+2sdv","mean-2sdv"),pch=1:nrow(campaign),bty="o",col=c(colpar+(1:(nrow(campaign)-2)), colpar+rep((nrow(campaign)-2),2)),cex=cexpar,ncol=max(nrow(campaign)%/%5,1), text.col=c(colpar+(1:(nrow(campaign)-2)),colpar+rep((nrow(campaign)-2),2))) print(campaign[1:(nrow(campaign)-2),]) cat("Choose the best experiment row number ( 1<= # <=",nrow(campaign)-3,")") exp0<-scan(quiet=T,"",numeric(),1) plscvresul[[2]]<-campaign[exp0,3:(Aexplore+2)] plscvresul$A<-campaign[exp0,2] #prop<-eval(parse("",text=dimnames(campaign)[[1]][exp0])) prop<-campaign[exp0,1] cat("Retained experiment values\n") cat("PRESS(",prop,",",plscvresul$A,")=",plscvresul[[2]][plscvresul$A],"\n",sep="") } }#endreponse==2 else { repeat {# repeat{ cat("GCV(k,alpha)= ASR/(1-alpha*k/n)^2\n") cat("where ASR=Average Squared Residuals, k=dimension\n") cat("Enter alpha (>0) the tuning parameter :\n") GCV<-scan(quiet=T,"",numeric(),1) if(GCV*Aexplore/nrow(X)>=1) cat("Try a smaller value!\n") else break } i<-i+1 if(missing(listknots)|listknotsnullflag) plscvresul<-plsscv(X,Y,standX=standX,standY=standY,interaction=interaction,D=D,A=Aexplore, degree=degree,knots=knots,equiknots=equiknots,prop=prop,GCV=GCV) else plscvresul<-plsscv(X,Y,standX=standX,standY=standY,interaction=NULL,D=1,A=Aexplore, degree=degree,knots=knots,equiknots=equiknots,listknots=listknots,prop=prop,GCV=GCV) plscvresul<-plscv.plot(plscvresul,colpar=colpar,cexpar=cexpar,titlepar=titlepar,bg=par("bg")) campaign<-rbind(campaign,matrix(c(GCV,plscvresul$A,round(plscvresul[[1]],5)),1,Aexplore+2)) if(i==1)campaign<-campaign[-1,,drop=F] dimnames(campaign)[[1]][i]<-format(i) #campaign[i,]<-c(GCV,plscvresul$A,round(plscvresul[[2]],5)) cat("Another GCV try,(y/n)") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if(repo!="y")break }#endrepeat if(reponse==3) {## matrice<-campaign[,3:(Aexplore + 2),drop=F] campaignmean<-apply(matrice,2,"mean") campaignstdv<-sqrt(apply(matrice,2,"var")) campaign<-rbind(campaign,c(NA,NA,campaignmean)) campaign[nrow(campaign),2]<-round(campaign[nrow(campaign),2]) dimnames(campaign)[[1]][nrow(campaign)]<-"mean" campaign<-rbind(campaign,c(GCV,plscvresul$A,campaignmean + 2*campaignstdv)) dimnames(campaign)[[1]][nrow(campaign)]<-"mean+2sdv" campaign<-rbind(campaign,c(GCV,plscvresul$A,campaignmean - 2*campaignstdv)) dimnames(campaign)[[1]][nrow(campaign)]<-"mean-2sdv" campaign[(nrow(campaign)-1):nrow(campaign),2]<-NA if(nrow(campaign)>4){ par(mfrow=c(1,1)) ts.plot(ts(t(campaign[,3:(Aexplore+2)])),gpars=list(xlab="Model Dim.",ylab="GCV",type="n", main="CV experiments' summary",cex=cexpar)) for(i in 1:(nrow(campaign)-2)){ points(1:Aexplore,campaign[i,3:(Aexplore+2)],pch=i,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[i,3:(Aexplore+2)],type="l",lty=i,col=colpar+i) } points(1:Aexplore,campaign[nrow(campaign)-1,3:(Aexplore+2)],pch=i+1,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign)-1,3:(Aexplore+2)],type="l",lty=i,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign),3:(Aexplore+2)],pch=i+2,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign),3:(Aexplore+2)],type="l",lty=i,col=colpar+i) cat("Click to locate the top left corner of the legend\n") legend(locator(1),c(campaign[1:(nrow(campaign)-3),1],"mean","mean+2sdv","mean-2sdv"),pch=1:nrow(campaign),bty="o",col=c(colpar+(1:(nrow(campaign)-2)), colpar+rep((nrow(campaign)-2),2)),cex=cexpar,ncol=max(nrow(campaign)%/%5,1), text.col=c(colpar+(1:(nrow(campaign)-2)),colpar+rep((nrow(campaign)-2),2))) print(campaign[1:(nrow(campaign)-2),]) cat("Choose the best experiment row number ( 1<= # <=",nrow(campaign)-3,")") exp0<-scan(quiet=T,"",numeric(),1) plscvresul[[2]]<-campaign[exp0,3:(Aexplore+2)] plscvresul$A<-campaign[exp0,2] #prop<-eval(parse("",text=dimnames(campaign)[[1]][exp0])) GCV<-campaign[exp0,1] cat("Retained experiment values\n") cat("GCV(",GCV,",",plscvresul$A,")=",plscvresul[[2]][plscvresul$A],"\n",sep="") } }## }#end else <==> endreponse==3 A<-plscvresul$A } #--------------------------------------------------------- if(reponse==5) { Xvariables<-selectvar(X0,Xvariables=Xvariables) } #--------------------------------------------------------- if(reponse==0) { break} }#finrepeat #browser() if(is.null(resulpls)) invisible(return(list(Xvariables=Xvariables,degree=degree0,knots=knots0,equiknots=equiknots0,listknots=listknots0))) else { Components=resulpls$compX dimnames(Components)[[1]]=dimnames(X)[[1]] invisible(return(list(Xvariables=Xvariables,degree=degree0,knots=knots0,equiknots=equiknots0,listknots=listknots0,Components=Components,resulpls=resulpls))) } } ########################################################### plscalibration<-function(X,Y,spect=1:nrow(X),Xtest,Ytest,spectest,byscale=1,standX=F,standY=T,A=2, prop=0.1,thetapar=-60,phipar=30,rpar=10,steps=F,steporder=T,cexpar=0.7,askpar=F,ptypar="m",colpar=1, titlestring="Absorbance",matcol=3,matrow=1,titlepar=T,bgpar="peachpuff") { par(ask=askpar) oldpar<-par(no.readonly = TRUE) par(ask=F,bg=bgpar,mfrow=c(1,1),bty="n",xaxt="n",yaxt="n") plot(0,0,type="n",bg=par("bg"),xlab="",ylab="",xlim=c(-4,4),ylim=c(-4,4)) text(0,3.5,"Linear Partial Least-Squares",cex=1.7,col="red") text(0,1.5,"for Calibration in",cex=1.7,col="red") text(0,-0.5,"Near Infrared Spectroscopy",cex=1.7,col="red") text(0,-2,"J.F. Durand, Montpellier 2 University",cex=1,col="blue") text(0,-3,"www.jf-durand-pls.com",cex=0.9,col="blue") par(oldpar) par(ask=askpar,bg=bgpar,xaxt="s",yaxt="s") Xini<-as.matrix(X) Yini<-as.matrix(Y) X<-Xini[spect,,drop=F] Y<-Yini[spect,,drop=F] n<-nrow(X)# nombre de spectres effectivement traites pini<-ncol(X)# nombre de "temps" initiaux (peut etre trop nombreux) q<-ncol(Y) echini<-dimnames(X)[[2]] mode(echini)<-"numeric"# echini est le vecteur des "temps" initialement pris en compte #ech<-seq(min(echini),max(echini),by=byscale)# ech est le sous-vecteur de echini avec un "pas" egal a byscale nbcol<-seq(1,ncol(Xini),by=byscale) X<-X[,nbcol]# matrice des absorbances effectivement prise en compte ech<-dimnames(X)[[2]] mode(ech)<-"numeric"# ech est le vecteur des "temps" pris en compte p<-ncol(X)# nombre de "temps" effectivement traités #calcul du spectre moyen et des ecarts types moyen<-apply(X,2,mean) sigma<-sqrt(apply(X,2,var)) if(missing(Xtest)){ Xtest<-Xini if(missing(spectest)) spectest<-spect Xtest<-Xtest[spectest,nbcol,drop=F] } else { if(missing(spectest)) spectest<-1:nrow(Xtest) Xtest<-Xtest[spectest,nbcol,drop=F] } repeat { if(p>=1000)cat("Too many wavelegths (>1000). Use byscale = 2 ou 3...\n") cat("==========================================================\n") reponse<-menu(c("Looking at the spectra","Cross Validation","Generalized Cross Validation", "Conversational PLS"),title="PLS for 'Near Infrared Spectroscopy' (0 to exit)") cat("==========================================================\n") if(reponse==1) { par(mfrow=c(1,1),pty=ptypar) if(steporder){debut<-1 fin<-n pas<-1} else {debut<-n fin<-1 pas<--1} plot(ech,X[debut,],xlab="wavelengths",ylab=titlestring,xlim=range(echini),ylim=range(X),col=1,type="l",cex=cexpar) for(i in (debut+pas):fin) { if(steps){ cat(paste("spectrum",dimnames(X)[[1]][i-pas]," ",dimnames(Y)[[2]],"=",Y[i-pas],"\n")) cat("ENTER for the next") plctY<-scan(quiet=T,"",character(),n=1) } points(ech,X[i,],type="l",col=i) } if(steps){ cat(paste("spectrum",dimnames(X)[[1]][fin],"\n")) cat("ENTER to end") scan(quiet=T,"",character(),n=1) } #plot du spectre moyen et de la dispersion par(mfrow=c(matrow,matcol),pty=ptypar) moyen.rts<- moyen moyen.df2 <- diff(moyen.rts, 1,2) moyen.df1<- diff(moyen.rts, 1,1) ylimite<-range(cbind(moyen,moyen+2*sigma,moyen-2*sigma)) plot(ech,moyen,xlab="wavelengths",ylab="Mean (+ -) 2*Stand. Dev.",xlim=range(echini), ylim=ylimite,type="n",cex=cexpar,bg=par("bg")) points(ech,moyen,type="l",col=colpar) points(ech,moyen+2*sigma,col=colpar+2,type="l") points(ech,moyen-2*sigma,col=colpar+2,type="l") plot(supsmu(ech[-1],moyen.df1/(byscale/diff(range(echini)))),type="n",cex=cexpar,xlab="wavelengths",ylab="First Derivative") lines(supsmu(ech[-1],moyen.df1/(byscale/diff(range(echini)))),col=colpar+1) abline(h=0) plot(supsmu(ech[-(1:2)],moyen.df2/(byscale/diff(range(echini)))^2),type="n",cex=cexpar,xlab="wavelengths",ylab="Second Derivative") lines(supsmu(ech[-(1:2)],moyen.df2/(byscale/diff(range(echini)))^2),col=colpar+1) abline(h=0) #if((matrow!=1)|(matcol!=1))mtext(side=3,line=-2,cex=cexpar+0.5,outer=TRUE,titlestring) #plot du graphique matriciel (surface) par(mfrow=c(1,1),pty=ptypar) # persp.setup(lty=c(1,1,1), col=c(colpar+1,colpar+1,colpar+1), lwd=c(1,0,1)) persp(1:n,ech,X,xlab="samples",ylab="wavelengths",zlab=titlestring,theta=thetapar,phi=phipar,r=rpar,ticktype="detailed",cex=cexpar,col=colpar+1) # title(main = paste(titlestring,"Calibration Sample"),col=colpar+1) def.par <- par(no.readonly = TRUE) layout(matrix(c(3,4,1,2),2,2,byrow=TRUE), c(10,1), c(1,10), TRUE) #layout(matrix(c(1,2),1,2,byrow=T),widths=c(85,15)) par(mar=c(4,4,0,3)) image(ech,1:n,t(X),ylab="samples",xlab="wavelengths",col=topo.colors(64)) box() par(mar=c(4,0,0,1)) image(1,seq(min(X),max(X),length=64),t(matrix(1:64,64,1)),xaxt="n",yaxt="s",col=topo.colors(64), xlab="",ylab="",cex=cexpar) box() par(mar=c(0,0,0,0)) plot(0,0,type="n",xlab="",ylab="",axes=F) #text(0,0,paste(titlestring,"Calibration Sample"),cex=cexpar+0.5,col="red") par(mar=c(0,0,0,0)) plot(0,0,type="n",xlab="",ylab="",axes=F) text(-0.2,0,"Abso.",cex=cexpar+0.2,col="red") text(-0.2,-0.5,"Level",cex=cexpar+0.2,col="red") par(def.par) } #--------------------------------------------------------- if(reponse==4) {if(!missing(Ytest)) plsresult<-pls(X,Y,Xtest=Xtest,Ytest=Ytest[spectest,,drop=F],standX=standX,standY=standY,A=A,askpar=askpar, calibration=T,cexpar=cexpar,colpar=colpar,pty=ptypar) else plsresult<-pls(X,Y,Xtest=Xtest,standX=F,standY=standY,A=A,askpar=askpar,calibration=T, cexpar=cexpar,colpar=colpar,pty=ptypar) A<-plsresult$A } #--------------------------------------------------------- if((reponse==2)|(reponse==3)) { cat("Enter the maximum number of components (dimensions) to explore \n") Aexplore<-scan(quiet=T,"",numeric(),1) if(reponse==2) i<-1 else i<-0 campaign<-matrix(0,1,Aexplore+2) if(reponse==2) dimnames(campaign)<-list(format(i),c("prop","Dim",format(1:Aexplore))) else dimnames(campaign)<-list(format(i),c("alpha","Dim",format(1:Aexplore))) if(reponse==2) { repeat{ plscvresul<-plscv(X,Y,standX=F,standY=standY,A=Aexplore,prop=prop,GCV=0) plscvresul<-plscv.plot(plscvresul,colpar=colpar,cexpar=cexpar,titlepar=titlepar,bg=par("bg")) campaign[i,]<-c(prop,plscvresul$A,round(plscvresul[[1]],5)) cat("Another proportion of left-predicted observations,(y/n)") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if(repo!="y")break cat("Enter prop (< 0.5):") prop<-scan(quiet=T,"",numeric(),1) i<-i+1 campaign<-rbind(campaign,rep(0,Aexplore+2)) dimnames(campaign)[[1]][i]<-format(i) }#endrepeat matrice<-campaign[,3:(Aexplore + 2),drop=F] campaignmean<-apply(matrice,2,"mean") campaignstdv<-sqrt(apply(matrice,2,"var")) campaign<-rbind(campaign,c(NA,NA,campaignmean)) campaign[nrow(campaign),2]<-round(campaign[nrow(campaign),2]) dimnames(campaign)[[1]][nrow(campaign)]<-"mean" campaign<-rbind(campaign,c(prop,plscvresul$A,campaignmean + 2*campaignstdv)) dimnames(campaign)[[1]][nrow(campaign)]<-"mean+2sdv" campaign<-rbind(campaign,c(prop,plscvresul$A,campaignmean - 2*campaignstdv)) dimnames(campaign)[[1]][nrow(campaign)]<-"mean-2sdv" campaign[(nrow(campaign)-1):nrow(campaign),2]<-NA if(nrow(campaign)>4){ par(mfrow=c(1,1)) ts.plot(ts(t(campaign[,3:(Aexplore+2)])),gpars=list(xlab="Model Dim.",ylab="PRESS",type="n", main="CV experiments' summary",cex=cexpar)) for(i in 1:(nrow(campaign)-2)){ points(1:Aexplore,campaign[i,3:(Aexplore+2)],pch=i,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[i,3:(Aexplore+2)],type="l",lty=i,col=colpar+i) } points(1:Aexplore,campaign[nrow(campaign)-1,3:(Aexplore+2)],pch=i+1,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign)-1,3:(Aexplore+2)],type="l",lty=i,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign),3:(Aexplore+2)],pch=i+2,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign),3:(Aexplore+2)],type="l",lty=i,col=colpar+i) cat("Click to locate the top left corner of the legend\n") legend(locator(1),c(campaign[1:(nrow(campaign)-3),1],"mean","mean+2sdv","mean-2sdv"),pch=1:nrow(campaign),bty="o",col=c(colpar+(1:(nrow(campaign)-2)), colpar+rep((nrow(campaign)-2),2)),cex=cexpar,ncol=max(nrow(campaign)%/%5,1), text.col=c(colpar+(1:(nrow(campaign)-2)),colpar+rep((nrow(campaign)-2),2))) print(campaign[1:(nrow(campaign)-2),]) cat("Choose the best experiment row number ( 1<= # <=",nrow(campaign)-3,")") exp0<-scan(quiet=T,"",numeric(),1) plscvresul[[2]]<-campaign[exp0,3:(Aexplore+2)] plscvresul$A<-campaign[exp0,2] prop<-campaign[exp0,1] cat("Retained experiment values\n") cat("PRESS(",prop,",",plscvresul$A,")=",plscvresul[[2]][plscvresul$A],"\n",sep="") } } else { repeat {# repeat{ cat("GCV(k,alpha)= ASR/(1-alpha*k/n)^2\n") cat("where ASR=Average Squared Residuals, k=dimension\n") cat("Enter alpha (>0) the tuning parameter:") GCV<-scan(quiet=T,"",numeric(),1) if(GCV*Aexplore/nrow(X)>=1) cat("Try a smaller value!\n") else break } i<-i+1 plscvresul<-plscv(X,Y,standX=F,standY=standY,A=Aexplore,prop=prop,GCV=GCV) plscvresul<-plscv.plot(plscvresul,colpar=colpar,cexpar=cexpar,titlepar=titlepar,bg=par("bg")) campaign<-rbind(campaign,matrix(c(GCV,plscvresul$A,round(plscvresul[[1]],5)),1,Aexplore+2)) if(i==1)campaign<-campaign[-1,,drop=F] dimnames(campaign)[[1]][i]<-format(i) cat("Another GCV try,(y/n)") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if(repo!="y")break }#endrepeat if(reponse==3) {## matrice<-campaign[,3:(Aexplore + 2),drop=F] campaignmean<-apply(matrice,2,"mean") campaignstdv<-sqrt(apply(matrice,2,"var")) campaign<-rbind(campaign,c(NA,NA,campaignmean)) campaign[nrow(campaign),2]<-round(campaign[nrow(campaign),2]) dimnames(campaign)[[1]][nrow(campaign)]<-"mean" campaign<-rbind(campaign,c(GCV,plscvresul$A,campaignmean + 2*campaignstdv)) dimnames(campaign)[[1]][nrow(campaign)]<-"mean+2sdv" campaign<-rbind(campaign,c(GCV,plscvresul$A,campaignmean - 2*campaignstdv)) dimnames(campaign)[[1]][nrow(campaign)]<-"mean-2sdv" campaign[(nrow(campaign)-1):nrow(campaign),2]<-NA if(nrow(campaign)>4){ par(mfrow=c(1,1)) ts.plot(ts(t(campaign[,3:(Aexplore+2)])),gpars=list(xlab="Model Dim.",ylab="GCV",type="n", main="CV experiments' summary",cex=cexpar)) for(i in 1:(nrow(campaign)-2)){ points(1:Aexplore,campaign[i,3:(Aexplore+2)],pch=i,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[i,3:(Aexplore+2)],type="l",lty=i,col=colpar+i) } points(1:Aexplore,campaign[nrow(campaign)-1,3:(Aexplore+2)],pch=i+1,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign)-1,3:(Aexplore+2)],type="l",lty=i,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign),3:(Aexplore+2)],pch=i+2,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign),3:(Aexplore+2)],type="l",lty=i,col=colpar+i) cat("Click to locate the top left corner of the legend\n") legend(locator(1),c(campaign[1:(nrow(campaign)-3),1],"mean","mean+2sdv","mean-2sdv"),pch=1:nrow(campaign),bty="o",col=c(colpar+(1:(nrow(campaign)-2)), colpar+rep((nrow(campaign)-2),2)),cex=cexpar,ncol=max(nrow(campaign)%/%5,1), text.col=c(colpar+(1:(nrow(campaign)-2)),colpar+rep((nrow(campaign)-2),2))) print(campaign[1:(nrow(campaign)-2),]) cat("Choose the best experiment row number ( 1<= # <=",nrow(campaign)-3,")") exp0<-scan(quiet=T,"",numeric(),1) plscvresul[[2]]<-campaign[exp0,3:(Aexplore+2)] plscvresul$A<-campaign[exp0,2] GCV<-campaign[exp0,1] cat("Retained experiment values\n") cat("GCV(",GCV,",",plscvresul$A,")=",plscvresul[[2]][plscvresul$A],"\n",sep="") } }## } A<-plscvresul$A } #--------------------------------------------------------- if(reponse==0)break }#endrepeat Components=plsresult$TX dimnames(Components)[[1]]=dimnames(X)[[1]] invisible(return(list(plsresult=plsresult,Components=Components))) } ########################################################### bivarplot<-function(X,Y=X,matrow=1,matcol=1,smooth="reg.spline",degree=1,knots=0, equiknots=F,listknots,lambda=0.6,titlepar=T,typedata=T,qual,names.qual, cexpar=0.7,pchpar=1,ptypar="s",colpar=1,askpar=T,bgpar="gray",nbpoints=100) { X<-as.matrix(X) Y<-as.matrix(Y) p<-ncol(X) q<-ncol(Y) n<-nrow(X) nods<-knots if(is.null(dimnames(X)))dimnames(X)<-list(format(1:n),paste("X",1:p,sep="")) if(is.null(dimnames(Y)))dimnames(Y)<-list(format(1:n),paste("Y",1:q,sep="")) if(length(dimnames(X)[[1]])==0) dimnames(X)[[1]]<- format(1:n) if(length(dimnames(Y)[[1]])==0) dimnames(Y)[[1]]<- format(1:n) if(length(dimnames(X)[[2]])==0) dimnames(X)[[2]]<- paste("X",1:p,sep="") if(length(dimnames(Y)[[2]])==0) {cat("Affect name(s) to the response(s)\n") prov<-as.vector(format(1:q)) for(i in 1:q) {cat(paste("response ",format(i),"\n")) prov[i]<-scan(quiet=T,what="", 1) } dimnames(Y)[[2]]<-prov } if(q>1){ cat("The number of the response? (<=",q,")") j <- scan(quiet=T,"", numeric(),1) } else j<-1 par(mfrow=c(matrow,matcol),pty=ptypar) par(ask=askpar) par(bg=bgpar) devtype<-sum(c(matrow,matcol)) if(smooth=="reg.spline"){ if(devtype==2){cat(paste(dimnames(Y)[[2]][j]," simple regressions by Least-Squares Splines:",sep=""),"\n") cat("----------\n") } knots<-list(NULL) for(i in 1:(ncol(X)-1))knots<-c(knots,c(list(NULL))) names(knots)<-dimnames(X)[[2]] knots2<-knots degree<-degree if(length(degree)==1)degree<-rep(degree,ncol(X)) if(length(nods)==1)nods<-rep(nods,ncol(X)) if(!missing(listknots))knots1<-listknots else knots1<-knots if((devtype>2)&!missing(names.qual)) { oldpar<-par(no.readonly = TRUE) par(bty="n",xaxt="n",yaxt="n") plot(c(-1,1),c(-1,1),xlab="",ylab="",type="n") legend(0,0,names.qual,x.intersp=cexpar,y.intersp=cexpar,bty="n", text.col=colpar+(min(qual):max(qual))+1,cex=1,fill=colpar+(min(qual):max(qual))+1,xjust=0.5,yjust=0.5) par(bty="o",xaxt="s",yaxt="s") #par(oldpar) } } for(i in 1:p) {# Z<-sum((Y[,j]-X[,i])^2) if(Z>1e-16) { if(smooth!="reg.spline"){ if((matrow==1)&(matcol==1))plot(X[,i],Y[,j],type="n",xlab=dimnames(X)[[2]][i],ylab=dimnames(Y)[[2]][j], cex=cexpar,main=paste("R² = ",format(round(cor(X[,i],Y[,j])^2,4)),sep=""),bg=par("bg")) else plot(X[,i],Y[,j],type="n",xlab=dimnames(X)[[2]][i],ylab="",cex=cexpar,bg=par("bg")) if(!typedata)points(X[, i],Y[,j],pch=pchpar,cex=cexpar,col=colpar+3) else text(X[,i],Y[,j],dimnames(X)[[1]],cex=cexpar,col=colpar+3) } if(smooth=="reg.spline") {## if(!missing(listknots)) resu<-lss(X[,i,drop=F],Y[,j,drop=F],listknots=list(c(knots1[[i]])),degree=degree[i],knots=nods[i], equiknots=equiknots[i],fonct=T,askpar=F,cexpar=cexpar,colpar=colpar,ptypar=ptypar,nbpoints=nbpoints,qual=qual) else resu<-lss(X[,i,drop=F],Y[,j,drop=F],degree=degree[i],knots=nods[i], equiknots=equiknots[i],fonct=T,askpar=F,cexpar=cexpar,colpar=colpar,ptypar=ptypar,nbpoints=nbpoints,qual=qual) par(ask=askpar) #cat(paste(dimnames(X)[[2]][i],": GCV = ",format(round(resu$GCV,4))," , R² = ",format(round(resu$R2,4)),sep=""),"\n") if(titlepar)title(main=paste("GCV = ",format(round(resu$GCV,4))," , R² = ",format(round(resu$R2,4)),sep=""),cex.main=cexpar+0.3) if(!missing(names.qual)) { long<-length(names.qual) if(sum(c(matrow,matcol))==2) legend(max(X[,i])-diff(range(X[,i]))/7,max(Y[,j]),names.qual,x.intersp=cexpar,y.intersp=cexpar,bty="o", text.col=colpar+(min(qual):max(qual))+1,cex=cexpar,fill=colpar+(min(qual):max(qual))+1) } if(devtype==2) {#11 repeat {### if(resu$knots!=0)knots[[i]]<-resu$intknots[[1]] repeat{ cat(paste(dimnames(X)[[2]][i],": GCV = ",format(round(resu$GCV,4))," , R² = ",format(round(resu$R2,4)),sep=""),"\n") cat("Try another degree and knots for",dimnames(X)[[2]][i],"(y,Y/n,N)?") repo<-scan(quiet=T,what=character(),n=1) if(length(repo)==0) {repo<-"n" break } if((repo=="y")|(repo=="Y"))break if((repo=="n")|(repo=="N"))break } if((repo=="y")|(repo=="Y")) {#y repeat{ cat("the degree :") toto<-scan(quiet=T,"", character(),n=1) cat("OK (y,Y/n,N) for ",toto," ") repon<-scan(quiet=T,what=character(),n=1) if(length(repon)==0)repon<-"y" if((repon=="y")|(repon=="Y")) if((length(toto)!=0)&&!is.na(as.numeric(toto)))break } degree[i]<-as.numeric(toto) repeat{ cat("the knots' locations, NULL if no knots :\n") toto<-scan(quiet=T,what=character()) if(length(toto)==0)cat("OK (y,Y/n,N) for ",format(knots[[i]])," ") else cat("OK (y,Y/n,N) for ",toto," ") repin<-scan(quiet=T,what=character(),n=1) if(length(repin)==0)repin<-"y" if((repin=="y")|(repin=="Y"))break } if((length(toto)==1)&&(toto!="NULL"))knots[[i]]<-eval(parse("",text=toto)) if((length(toto)==1)&&(toto=="NULL")) { knotsinter<-knots knots<-knots2 if(i>1) for(k in 1:(i-1))if(!is.null(knotsinter[[k]]))knots[[k]]<-knotsinter[[k]] } if(length(toto)>1) {#* titi<-rep(0,length(toto)) for(k in 1:length(toto)) titi[k]<-eval(parse("",text=toto[k])) knots[[i]]<-titi }#* resu<-lss(X[,i,drop=F],Y[,j,drop=F],listknots=list(c(knots[[i]])),degree=degree[i],fonct=T,askpar=F, cexpar=cexpar,colpar=colpar,ptypar=ptypar,nbpoints=nbpoints,qual=qual,D=1,impres=T) par(ask=askpar) if(titlepar)title(main=paste("GCV = ",format(round(resu$GCV,4))," , R² = ",format(round(resu$R2,4)),sep=""),cex.main=cexpar+0.3) if(!missing(names.qual)) { long<-length(names.qual) legend(max(X[,i])-diff(range(X[,i]))/7,max(Y[,j]),names.qual,x.intersp=cexpar,y.intersp=cexpar,bty="o", text.col=colpar+(min(qual):max(qual))+1,cex=cexpar,fill=colpar+(min(qual):max(qual))+1) } #cat(paste(dimnames(X)[[2]][i],": GCV = ",format(round(resu$GCV,4))," , R² = ",format(round(resu$R2,4)),sep=""),"\n") #cat("Another try? ") }#y else break }### }#11 }## else{ switch(smooth,lowess=,lines(lowess(X[,i],Y[,j],f=lambda),col=colpar+1), supsmu=,lines(supsmu(X[,i],Y[,j],span=lambda),col=colpar+1), ksmooth=,lines(ksmooth(X[,i],Y[,j],bandwith=lambda,kernel="normal"),col=colpar+1), smooth.spline=,lines(smooth.spline(X[,i],Y[,j],spar=lambda),col=colpar+1), ) } } cat("----------\n") }# if(titlepar) if((matrow!=1)|(matcol!=1)){ if(smooth!="reg.spline") mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,paste("(",dimnames(Y)[[2]][j]," , predictor) scatterplots smoothed by '", smooth,"(",lambda,")'",sep="")) } if(smooth=="reg.spline") return(list(smooth=smooth,degree=degree,listknots=knots)) else invisible(smooth) } ########################################################### pls<-function(X,Y,Xtest,Ytest,standX=T,standY=T,D=1,A=1,eps=1e-08,splflag=F,impres=T,graph=T, titlepar=T,ptypar="s",cexpar=0.8,pchpar=1,typedata=T,colpar=0.7,askpar=T,calibration=F,qual,names.qual) { # # Regression PLS lineaire # lecture des donnees if(impres==F)graph<-F nomfichX<-deparse(substitute(X)) nomfichY<-deparse(substitute(Y)) X<-as.matrix(X) Y<-as.matrix(Y) Yinitial<-Y n<-nrow(X) p<-ncol(X) q<-ncol(Y) if(!missing(qual)) { if(sum(dim(qual))>n) { indic=rep(0,n) indic=qual[,1] qual=indic } } if(is.null(dimnames(X)))dimnames(X)<-list(format(1:n),paste("X",1:p,sep="")) if(is.null(dimnames(Y)))dimnames(Y)<-list(format(1:n),paste("Y",1:q,sep="")) if(length(dimnames(X)[[1]])==0) dimnames(X)[[1]]<- format(1:n) if(length(dimnames(Y)[[1]])==0) dimnames(Y)[[1]]<- format(1:n) if(length(dimnames(X)[[2]])==0) dimnames(X)[[2]]<- paste("X",1:p,sep="") if(length(dimnames(Y)[[2]])==0) dimnames(Y)[[2]]<- paste("Y",1:q,sep="") # calcul de la metrique si D=1 # if(length(D)==1) D<-rep(1/n,n) else D<-as.vector(D) # centrage et reduction centX<-Dcentred(X,D=D) centY<-Dcentred(Y,D=D) if(standX) X<-centX$Xcr else X<-centX$Xc if(standY) Y<-centY$Xcr else Y<-centY$Xc if(!missing(Xtest)) { if(is.null(dimnames(Xtest)))dimnames(Xtest)<-list(format(1:nrow(Xtest)),dimnames(X)[[2]]) Xinitest<-sweep(Xtest, 2,centX$moy) if(standX) Xinitest<- sweep(Xinitest,2,sqrt(centX$var),FUN="/") dimnames(Xinitest)<-dimnames(Xtest) } if(!missing(Ytest)) { if(is.null(dimnames(Ytest)))dimnames(Ytest)<-list(format(1:nrow(Ytest)),dimnames(Y)[[2]]) Yinitest<-sweep(Ytest, 2,centY$moy) if(standY) Yinitest<-sweep(Yinitest,2,sqrt(centY$var),FUN="/") dimnames(Yinitest)<-dimnames(Ytest) } Xini<-as.matrix(X) Yini<-as.matrix(Y) varX<-centX$var varY<-centY$var SYX<-NULL # matrice de covariance entre Y et X inertotY<-sum(diag(VQop(Y,D=D))) inertotX<-sum(diag(VQop(X,D=D))) if(!splflag) VARX<-"VAR X" else VARX<-"VAR B" if(impres) { cat("__________________________________________________________________\n") cat(" - Linear PLS -\n") cat(paste("Total Variance of Y =",format(inertotY),"\n")) if(splflag) cat(paste("Total Variance of B =",format(inertotX),"\n")) else cat(paste("Total Variance of X =",format(inertotX),"\n")) } # initialisation w<-NULL # facteur de X cc<-NULL # facteur de Y tt<-NULL # composante de X u<-NULL # composante de Y RtX<-NULL # correlations entre t et X RtY<-NULL # correlations entre t et Y YH<-list(NULL) # modele de Y YR<-list(NULL) # residu de Y alpha <- matrix(0,nrow=p,ncol=1)# poids de X pour t alph <- list(NULL) # liste des alpha COMP<-matrix(0,nrow=p,ncol=p) # compteur pour le calcul des alpha BETA<-matrix(0,nrow=p,ncol=q) # coefficients du modele Y explique par X apres A etapes dimnames(BETA)<-list(dimnames(X)[[2]],dimnames(Y)[[2]]) BETALCR<-list(NULL) # liste des coeff du modele CR suivant le nb d'axes BETAL<-list(NULL) # liste des coeff du modele suivant le nb d'axes cov<-as.numeric(0) # covariance optimale covXY<-NULL # vecteur des covariances optimales VY<-NULL # vecteur des variances des composantes de Y VX<-NULL # vecteur des variances des composantes de X IEY<-NULL # vecteur des inerties de Y expliquees IEX<-NULL # vecteur des inerties de X expliquees R2<-rep(0,q) # vecteur des R2 R2c<-matrix(0,q,1) WX<-matrix(0,p,A) # matrice des facteurs de X CY<-matrix(0,q,A) # matrice des facteurs de Y TX<-NULL # matrice des composantes de X UY<-NULL # matrice des composantes de Y RU<-NULL # matrice de correlations entre les composantes de Y normc<-as.numeric(0) normw<-as.numeric(0) k<-0 inerexpY<-as.numeric(0) inerexpX<-as.numeric(0) varty<-NULL # debut boucle axe repeat { k<-k+1 if(impres) { cat("__________________________________________________________________\n") cat(paste("Dimension",format(k),"\n")) cat("\n") } XtDY<-Dcp(X,Y,D=D) if(p<=q) { vpvp<-eigen(XtDY%*%t(XtDY),symmetric=T) cov<-sqrt(vpvp$values[1]) if(p==1){tt<-X w<-matrix(1,1,1) } else{ w<-as.matrix(vpvp$vectors[,1]) tt<-X%*%w } cc<-t(XtDY)%*%w normc<-sqrt(sum(cc^2)) cc<-cc/normc u<-Y%*%cc } else { vpvp<-eigen(t(XtDY)%*%XtDY,symmetric=T) cov<-sqrt(vpvp$values[1]) if(q==1){u<-Y cc<-matrix(1,1,1) } else {cc<-as.matrix(vpvp$vectors[,1]) u<-Y%*%cc } w<-XtDY%*%cc normw<-sqrt(sum(w^2)) w<-w/normw tt<-X%*%w } #browser() vx<-as.numeric(Dcp(tt,D=D)) vy<-as.numeric(Dcp(u,D=D)) rtX<-as.matrix(Dcp(X,tt,D=D))/sqrt(vx) rtY<-as.matrix(Dcp(Y,tt,D=D))/sqrt(vx) if(!standX)rtX<-rtX/as.matrix(sqrt(varX)) if(!standY)rtY<-rtY/as.matrix(sqrt(varY)) r<-rtY/sqrt(vx) if(k>1)ruUY<-Dcp(Dvar(UY,D=D,cor=T)$U,Dvar(u,D=D,cor=T)$U,D=D) # stockage des differents objets covXY<-c(covXY,cov) VX<-c(VX,vx) VY<-c(VY,vy) TX<-cbind(TX,tt) UY<-cbind(UY,u) WX[,k]<-w CY[,k]<-cc RtX<-cbind(RtX,rtX) RtY<-cbind(RtY,rtY) if(k>1)RU<-c(RU,ruUY) # calcul des coefficients beta du modele ajuste if(k==1){alpha<-w alph[[1]]<-w } else { alpha<-w-COMP%*%Dcp(Xini,D=D)%*%w alph[[k]]<-alpha } COMP<-COMP + alpha%*%t(alpha)/vx BETA<-BETA+alpha%*%t(alpha)%*%Dcp(Xini,Y,D=D)/vx BETALCR[[k]]<-BETA BETAL[[k]]<-BETA for(j in 1:p) for(i in 1:q) { if((standX) & (standY)) BETAL[[k]][j,i]<-BETA[j,i]*sqrt(varY[i]/varX[j]) if((standX) & (! standY))BETAL[[k]][j,i]<-BETA[j,i]/sqrt(varX[j]) if((! standX) & (standY)) BETAL[[k]][j,i]<-BETA[j,i]*sqrt(varY[i]) if((! standX) & (! standY)) BETAL[[k]][j,i]<-BETA[j,i] } if(p==1)bidon<-t(as.matrix(centY$moy))-t(matrix(1,1,p)%*%centX$moy%*%BETAL[[k]]) else bidon<-t(as.matrix(centY$moy))-t(matrix(1,1,p)%*%diag(as.vector(centX$moy))%*%BETAL[[k]]) dimnames(bidon)<-list(dimnames(Y)[[2]],c("Const")) BETAL[[k]]<-cbind(bidon,t(BETAL[[k]])) BETALCR[[k]]<-t(BETALCR[[k]]) # calcul des nouveaux X et Y #browser() ProjX<-Dproj(tt,X,D=D,eps=1e-08) ProjY<-Dproj(tt,Y,D=D,eps=1e-08) inerexpY<-sum(diag(VQop(ProjY$Yhat,D=D))) inerexpX<-sum(diag(VQop(ProjX$Yhat,D=D))) R2<-R2+diag(VQop(ProjY$Yhat,D=D))/diag(VQop(Yini,D=D)) if(k==1)R2c[,1]<-R2 else R2c<-cbind(R2c,R2) IEX<-c(IEX,inerexpX) IEY<-c(IEY,inerexpY) X<-ProjX$Yres Y<-ProjY$Yres YH[[k]]<-ProjY$Yhat YR[[k]]<-Y sinerexpY<-sum(IEY) sinerexpX<-sum(IEX) if(impres) { cat(" cov(t,u)=",format(cov)," r(t,u)=",format(round(cov/sqrt(vx*vy),3))) cat(" stdev(t)=",format(sqrt(vx))," stdev(u)=",format(sqrt(vy)),"\n") cat("\n") affich<-matrix(round(diag(VQop(ProjY$Yhat,D=D))/diag(VQop(Yini,D=D)),3),1,q) affich<-cbind(affich,round(inerexpY/inertotY*100,3)) if(k==1){dimnames(affich)<-list(c("R2 part."),c(dimnames(Y)[[2]]," % VAR Y")) print(affich) varty<-c(varty,affich[1,q+1]) } else{affich1<-matrix(round(R2,3),1,q) affich1<-cbind(affich1,round(sinerexpY/inertotY*100,3)) affich<-rbind(affich,affich1) dimnames(affich)<-list(c("partial R2","cum. R2"),c(dimnames(Y)[[2]]," % VAR Y") ) print(affich) varty<-c(varty,affich[2,q+1]) } cat("\n") cat(paste("% ",VARX," accounted for by the comp. ="),inerexpX/inertotX*100,"\n") if(k>1)cat(paste("% explained ",VARX," ="),sinerexpX/inertotX*100,"\n") } # test d'arret de la boucle axes if(k==A) { if(impres)cat("_______________________________________________________________\n") if((impres)&((sinerexpX/inertotX*100!=100)&(A !=p))){cat("number of supplementary components ?") repeat{ Asup<-scan(quiet=T,"",numeric(),1) if((A+Asup <=p)&(A+Asup <=n))break else cat("a bit too large!!\n") } } else Asup<-0 if(Asup==0)break else {A<-A+Asup CYY<-matrix(0,q,Asup) CY<-cbind(CY,CYY) WXX<-matrix(0,p,Asup) WX<-cbind(WX,WXX) } } # fin boucle axes } # dimnames des fichiers dimnames(TX)<-list(paste((1:n),sep=""),paste("t",(1:A),sep="")) dimnames(UY)<-list(paste((1:n),sep=""),paste("u",(1:A),sep="")) dimnames(RtX)<-list(dimnames(X)[[2]],paste("t",(1:A),sep="")) dimnames(RtY)<-list(dimnames(Y)[[2]],paste("t",(1:A),sep="")) names(BETALCR)<-paste("Beta(",format(1:A),")",sep="") names(BETAL)<-paste("Beta(",format(1:A),")",sep="") if(impres){ if ((!splflag)&(!calibration)){ cat("_______________________________________________________________\n") if(standX & standY)choixx<-"On standardized variables" else choixx<-"On centered variables (standardized)" choix<-c(choixx,"On initial variables") reponse<-menu(choix,title="PLS models according to the dimension (0 to exit)") switch(reponse,print(BETALCR),print(BETAL),stop("Bad choice!")) } cat("_______________________________________________________________\n") if (!splflag){ if(!missing(Xtest)) { Tsup<-matrix(0,nrow(Xtest),A) for(i in 1:A)Tsup[,i]<-Tsup[,i]+as.matrix(Xinitest)%*%alph[[i]] repeat{ cat("\n") if(!missing(Ytest)) cat("Validation and prediction on the test sample ? (y/n)\n") else cat("prediction on the test sample ? (y/n)\n") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if(repo!="y")break else { if(!missing(Ytest)) { cat("VALIDATION :\n") cat("Mean Squared Errors of the standardized response(s) according to the dimension\n") affich1<-matrix(0,1,A) dimnames(affich1)<-list("MSE",format(1:A)) for(i in 1:A) affich1[1,i]<-round(mean(apply((Yinitest-as.matrix(Xinitest)%*%t(BETALCR[[i]]))^2,2,mean)),3) print(affich1) par(mfrow=c(1,1)) plot(ts(affich1[1,]),type="b",pch=pchpar,xlab="MODEL DIMENSION",ylab="MSE",cex=cexpar) if(titlepar)title(main=paste("Opt. MSE(",order(affich1[1,])[1],")=",affich1[1,order(affich1[1,])[1]])) cat("Choose the model dimension (<=",A,")\n") dimopt<-scan(quiet=T,"",numeric(),1) #if(p==1)bidon<-t(as.matrix(centY$moy))-t(matrix(1,1,p)%*%centX$moy%*%t(BETAL[[dimopt]][,-1,drop=F])) #else #bidon<-t(as.matrix(centY$moy))-t(matrix(1,1,p)%*%diag(as.vector(centX$moy))%*%t(BETAL[[dimopt]][,-1,drop=F])) # dimnames(bidon)<-list(dimnames(Y)[[2]],c("Const")) # BETAP<-cbind(bidon,BETAL[[dimopt]][,-1,drop=F]) #Yest<-as.matrix(cbind(matrix(1,nrow(Xtest),1),Xtest))%*%t(BETAP) Yest<-as.matrix(cbind(matrix(1,nrow(Xtest),1),Xtest))%*%t(BETAL[[dimopt]][,,drop=F]) dimnames(Yest)<-list(dimnames(Xtest)[[1]],paste("est.",dimnames(Y)[[2]],sep="")) Yerr<-Ytest-Yest dimnames(Yerr)<-list(dimnames(Ytest)[[1]],paste("err.",dimnames(Ytest)[[2]],sep="")) Yaff<-NULL for(i in 1:ncol(Ytest))Yaff<-cbind(Yaff,as.matrix(cbind(Yest[,i,drop=F],Yerr[,i,drop=F]))) cat("Prediction error Yerr = Ytest - Yest \n") print(Yaff) #browser() if(sum(Ytest==0)==0){ cat("Absolute relative error in % of Ytest: |Yerr|/|Ytest|*100\n") erreur100<-abs(Yerr)/abs(Ytest)*100 erreur100<-rbind(erreur100,apply(erreur100,2,mean),sqrt(apply(erreur100,2,var))) dimnames(erreur100)<-list(c(dimnames(Ytest)[[1]],"Mean","Stdv"),dimnames(Ytest)[[2]]) print(round(erreur100,2)) } cat("\n") cat(paste("MSE(",dimopt,")=",affich1[1,dimopt],sep=""),"\n") par(ask=askpar) cat("plot of estimated Ytest versus observed Ytest\n") if(q>1){ cat("How many plots per row (<=",q,")?\n") colplots<-scan(quiet=T,"",numeric(),1) cat("How many rows?\n") rowplots<-scan(quiet=T,"",numeric(),1) par(mfrow=c(rowplots,colplots)) } else par(mfrow=c(1,1)) par(pty="s") for(i in 1:q) { if(abs(sum(Yini^2)-sum(Yinitest^2))1) {#points(RtY[,axespar]) text(RtY[,axespar],dimnames(RtY)[[1]],col=colpar+1,cex=cexpar) } else { text(t(as.matrix(RtY[,axespar])),dimnames(RtY)[[1]],col=colpar+1,cex=cexpar) } }#cercle corr else {#else corr par(mfrow=c(1,1),pty="m") plot(rbind(Xcoord,Ycoord)[,axespar],xlab=paste("t",axespar[1]," (",round(Inertexpx[axespar[1]],2),"%VX) (",round(Inertexpy[axespar[1]],2),"%VY)"),ylab=paste("t",axespar[2]," (",round(Inertexpx[axespar[2]],2), "%VX) (",round(Inertexpy[axespar[2]],2),"%VY)"),type="n",cex=cexpar) abline(h=0) abline(v=0) #points(Xcoord[,axespar]) text(Xcoord[,axespar],dimnames(RtX)[[1]],col=colpar+3,cex=cexpar) if(q>1) {#points(Ycoord[,axespar]) text(Ycoord[,axespar],dimnames(RtY)[[1]],col=colpar+1,cex=cexpar) } else { text(t(Ycoord[,axespar]),dimnames(RtY)[[1]],col=colpar+1,cex=cexpar) } }#else corr }#fin else break }#finrepeat cat("_______________________________________________________________\n") repeat { cat("Representation of the observations (y/n) ?\n") plctY<-scan(quiet=T,"",character(),1) if( (length(plctY)!=0)&&((plctY=="Y")|(plctY=="y"))) { if(standX)Xcoord<-RtX else Xcoord<-diag(sqrt(varX))%*%RtX if(standY) { if(q!=1)Ycoord<-RtY else Ycoord<-as.matrix(RtY) } else { if(q!=1)Ycoord<-diag(sqrt(varY))%*%RtY else Ycoord<-as.matrix(RtY*sqrt(varY)) } cat("Horizontal axis number (<=",A,") ?\n") plctYh<-scan(quiet=T,"",numeric(),1) cat("Vertical axis number (<=",A,") ?\n") plctYv<-scan(quiet=T,"",numeric(),1) axespar<-c(plctYh,plctYv) cat("_______________________\n") repeat {#repeatmenu if(A!=1) resmenu<-menu(c("Usual components, pseudo-projection based representation (t)", "Projection based representation (t*)","Regression of t* onto t","All the preceding ones", "Projection based representation and Correlation circle"), title="Maps of the observations (0 to exit)") else resmenu<-menu(c("Usual Components, pseudo-projection based representation (t)", "Projection based representation (t*)","Regression of t* onto t","All the preceding ones", "Projection based representation of observations and variables"), title="Maps of the observations (0 to exit)") ########### if(resmenu==1){ par(mfrow=c(1,1),pty="m") if(!missing(Xtest)) plot(rbind(TX[,axespar],Tsup[,axespar,drop=F]),xlab=paste("t",axespar[1],sep=""),ylab=paste("t",axespar[2],sep=""),type="n",cex=cexpar) else plot(TX[,axespar],xlab=paste("t",axespar[1],sep=""),ylab=paste("t",axespar[2],sep=""),type="n",cex=cexpar) abline(h=0) abline(v=0) text(TX[,axespar],dimnames(Xini)[[1]],col=colpar+3,cex=cexpar) if(!missing(Xtest))text(Tsup[,axespar,drop=F],dimnames(Xinitest)[[1]],col=colpar+5,cex=cexpar) if(missing(qual)) { cat("Map of the observations colored according to the levels of a categorical variable? (y/n)\n") plto <- scan(quiet=T,"", character(), 1) if( (length(plto)!=0)&&((plto=="Y")|(plto=="y"))) { repeat{ cat("Enter the name of the vector or that of the matrix containing the column-variable\n") nom <- scan(quiet=T,"", character(), 1) matricenom<-as.matrix(get(nom,pos=1)) if(nrow(matricenom)==nrow(Xini))break else cat("misfit on the number of rows\n") } if(is.vector(get(nom,pos=1))) {dimnames(matricenom)<-list(dimnames(Xini)[[1]],c(nom)) numer<-1 variab<-get(nom,pos=1) } if((is.matrix(get(nom,pos=1)))|(is.data.frame(get(nom,pos=1)))) { cat(paste(dimnames(matricenom)[[2]],"(",1:ncol(matricenom),"),",sep=""),"\n") cat("enter the column number of integer values\n") numer <- scan(quiet=T,"",numeric(), 1) variab<-matricenom[,numer] cat("name of the variable : ",dimnames(matricenom)[[2]][numer],"\n") } cat("click to locate the top left corner of the legend\n") for(i in min(variab):max(variab)) text(TX[variab==i,axespar],dimnames(Xini)[[1]][variab==i],col=i+1,cex=cexpar) legend(locator(1),paste(dimnames(matricenom)[[2]][numer],min(variab):max(variab),sep=""), fill=(min(variab):max(variab))+1,cex=cexpar) } } else { variab<-qual if(missing(names.qual)){ cat("\n") cat("Before clicking to locate the top left corner of the legend\n") repeat{ cat("Enter the",max(qual),"levels' names\n") names.qual<-scan(quiet=T,what=character()) if(length(names.qual)==max(qual)-min(qual)+1)break else cat("not the right nb of levels' names,should be",max(qual)-min(qual)+1,"!\n") } } for(i in min(variab):max(variab)) text(TX[variab==i,axespar],dimnames(Xini)[[1]][variab==i],col=i+1,cex=cexpar) legend(locator(1),names.qual,x.intersp=cexpar,y.intersp=cexpar,bty="o", text.col=min(variab):max(variab)+1,cex=cexpar,fill=(min(qual):max(qual))+1) } } ########### if(resmenu==2){ par(mfrow=c(1,1),pty="m") if(!missing(Xtest)) plot(rbind(Tstar[,axespar],Tstarsup[,axespar,drop=F]),xlab=paste("t *",axespar[1]," (",round(vartstar[axespar[1]]/IPLS*100,2)," %) (r",axespar[1],"=",round(TXTstarcor[axespar[1]],3),")",sep=""),ylab=paste("t *",axespar[2]," (",round(vartstar[axespar[2]]/IPLS*100,2)," %) (r",axespar[2],"=",round(TXTstarcor[axespar[2]],3),")",sep=""),type="n",cex=cexpar) else plot(Tstar[,axespar],xlab=paste("t *",axespar[1]," (",round(vartstar[axespar[1]]/IPLS*100,2)," %) (r",axespar[1],"=",round(TXTstarcor[axespar[1]],3),")",sep=""),ylab=paste("t *",axespar[2]," (",round(vartstar[axespar[2]]/IPLS*100,2)," %) (r",axespar[2],"=",round(TXTstarcor[axespar[2]],3),")",sep=""),type="n",cex=cexpar) text(Tstar[,axespar],dimnames(Xini)[[1]],col=colpar+3,cex=cexpar) if(!missing(Xtest))text(Tstarsup[,axespar,drop=F],dimnames(Xinitest)[[1]],col=colpar+5,cex=cexpar) abline(h=0) abline(v=0) cat("a coloured map of training samples according to the levels of a categorical variable? (y/n)\n") plto <- scan(quiet=T,"", character(), 1) if( (length(plto)!=0)&&((plto=="Y")|(plto=="y"))) { repeat{ cat("enter the name of the vector-variable or that of the matrix containing the column-variable\n") nom <- scan(quiet=T,"", character(), 1) matricenom<-as.matrix(get(nom,pos=1)) if(nrow(matricenom)==nrow(Xini))break else cat("misfit of the nb of rows\n") } if(is.vector(get(nom,pos=1))) {dimnames(matricenom)<-list(dimnames(Xini)[[1]],c(nom)) numer<-1 variab<-get(nom,pos=1) } if((is.matrix(get(nom,pos=1)))|(is.data.frame(get(nom,pos=1)))) { cat(paste(dimnames(matricenom)[[2]],"(",1:ncol(matricenom),"),",sep=""),"\n") cat("enter the nunber of the column of integer values\n") numer <- scan(quiet=T,"",numeric(), 1) variab<-matricenom[,numer] cat("name of the variable : ",dimnames(matricenom)[[2]][numer],"\n") } cat("click to locate the top left corner of the legend\n") for(i in min(variab):max(variab)) text(Tstar[variab==i,axespar],dimnames(Xini)[[1]][variab==i],col=i+1,cex=cexpar) legend(locator(1),paste(dimnames(matricenom)[[2]][numer],min(variab):max(variab),sep=""),fill=(min(variab):max(variab))+1,cex=cexpar) } } ########### if(resmenu==3){ par(mfrow=c(1,2),pty="s") plot(TX[,axespar[1]],Tstar[,axespar[1]],xlab=paste("t",axespar[1],sep=""),ylab=paste("t*",axespar[1],sep=""),main=paste("r",axespar[1]," = ",format(round(TXTstarcor[axespar[1]],3)),sep=""),type="n",cex=cexpar) text(TX[,axespar[1]],Tstar[,axespar[1]],dimnames(Xini)[[1]],col=colpar+3,cex=cexpar) abline(h=0) abline(v=0) abline(a=0,b=TXTstarcor[axespar[1]]*sqrt(diag(Dcp(Tstar,D=D)))[axespar[1]]/sqrt(diag(Dcp(TX,D=D)))[axespar[1]]) plot(TX[,axespar[2]],Tstar[,axespar[2]],xlab=paste("t",axespar[2],sep=""),ylab=paste("t*",axespar[2],sep=""),main=paste("r",axespar[2]," = ",format(round(TXTstarcor[axespar[2]],3)),sep=""),type="n",cex=cexpar) text(TX[,axespar[2]],Tstar[,axespar[2]],dimnames(Xini)[[1]],col=colpar+3,cex=cexpar) abline(h=0) abline(v=0) abline(a=0,b=TXTstarcor[axespar[2]]*sqrt(diag(Dcp(Tstar,D=D)))[axespar[2]]/sqrt(diag(Dcp(TX,D=D)))[axespar[2]]) } ########### if(resmenu==4){ par(mfrow=c(2,2),pty="m") if(!missing(Xtest)) plot(rbind(TX[,axespar],Tsup[,axespar,drop=F]),xlab=paste("t",axespar[1],sep=""),ylab=paste("t",axespar[2],sep=""),type="n",cex=cexpar) else plot(TX[,axespar],xlab=paste("t",axespar[1],sep=""),ylab=paste("t",axespar[2],sep=""),type="n",cex=cexpar) abline(h=0) abline(v=0) text(TX[,axespar],dimnames(Xini)[[1]],col=colpar+3,cex=cexpar) if(!missing(Xtest))text(Tsup[,axespar,drop=F],dimnames(Xinitest)[[1]],col=colpar+5,cex=cexpar) if(!missing(Xtest)) plot(rbind(Tstar[,axespar],Tstarsup[,axespar,drop=F]),xlab=paste("t *",axespar[1]," (",round(vartstar[axespar[1]]/IPLS*100,2)," %)",sep=""),ylab=paste("t *",axespar[2]," (",round(vartstar[axespar[2]]/IPLS*100,2)," %)",sep=""),type="n",cex=cexpar) else plot(Tstar[,axespar],xlab=paste("t*",axespar[1]," (",round(vartstar[axespar[1]]/IPLS*100,2)," %)",sep=""),ylab=paste("t*",axespar[2]," (",round(vartstar[axespar[2]]/IPLS*100,2)," %)",sep=""),type="n",cex=cexpar) text(Tstar[,axespar],dimnames(Xini)[[1]],col=colpar+3,cex=cexpar) if(!missing(Xtest))text(Tstarsup[,axespar,drop=F],dimnames(Xinitest)[[1]],col=colpar+5,cex=cexpar) abline(h=0) abline(v=0) plot(TX[,axespar[1]],Tstar[,axespar[1]],xlab=paste("t",axespar[1],sep=""),ylab=paste("t*",axespar[1],sep=""),main=paste("r",axespar[1]," = ",format(round(TXTstarcor[axespar[1]],3)),sep=""),type="n",cex=cexpar) text(TX[,axespar[1]],Tstar[,axespar[1]],dimnames(Xini)[[1]],col=colpar+3,cex=cexpar) abline(h=0) abline(v=0) abline(a=0,b=TXTstarcor[axespar[1]]*sqrt(diag(Dcp(Tstar,D=D)))[axespar[1]]/sqrt(diag(Dcp(TX,D=D)))[axespar[1]]) plot(TX[,axespar[2]],Tstar[,axespar[2]],xlab=paste("t",axespar[2],sep=""),ylab=paste("t*",axespar[2],sep=""),main=paste("r",axespar[2]," = ",format(round(TXTstarcor[axespar[2]],3)),sep=""),type="n",cex=cexpar) text(TX[,axespar[2]],Tstar[,axespar[2]],dimnames(Xini)[[1]],col=colpar+3,cex=cexpar) abline(h=0) abline(v=0) abline(a=0,b=TXTstarcor[axespar[2]]*sqrt(diag(Dcp(Tstar,D=D)))[axespar[2]]/sqrt(diag(Dcp(TX,D=D)))[axespar[2]]) } ########### if(resmenu==5){ par(mfrow=c(1,1),pty="s") if(!missing(Xtest)) plot(rbind(Tstar[,axespar],Tstarsup[,axespar,drop=F]),xlab=paste("t *",axespar[1]," (",round(vartstar[axespar[1]]/IPLS*100,2)," %) (r",axespar[1],"=",round(TXTstarcor[axespar[1]],3),")",sep=""),ylab=paste("t *",axespar[2]," (",round(vartstar[axespar[2]]/IPLS*100,2)," %) (r",axespar[2],"=",round(TXTstarcor[axespar[2]],3),")",sep=""),type="n",cex=cexpar) else plot(Tstar[,axespar],xlab=paste("t *",axespar[1]," (",round(vartstar[axespar[1]]/IPLS*100,2)," %) (r",axespar[1],"=",round(TXTstarcor[axespar[1]],3),")",sep=""),ylab=paste("t *",axespar[2]," (",round(vartstar[axespar[2]]/IPLS*100,2)," %) (r",axespar[2],"=",round(TXTstarcor[axespar[2]],3),")",sep=""),type="n",cex=cexpar) text(Tstar[,axespar],dimnames(Xini)[[1]],col=colpar+3,cex=cexpar) if(!missing(Xtest))text(Tstarsup[,axespar,drop=F],dimnames(Xinitest)[[1]],col=colpar+5,cex=cexpar) abline(h=0) abline(v=0) if(!missing(qual)) { variab<-qual if(missing(names.qual)){ cat("\n") cat("Before clicking to locate the top left corner of the legend\n") repeat{ cat("Enter the",max(qual),"levels' names\n") names.qual<-scan(quiet=T,what=character()) if(length(names.qual)==max(qual)-min(qual)+1)break else cat("not the right nb of levels' names,should be",max(qual)-min(qual)+1,"!\n") } } for(i in min(variab):max(variab)) text(Tstar[variab==i,axespar],dimnames(Xini)[[1]][variab==i],col=i+1,cex=cexpar) legend(locator(1),names.qual,x.intersp=cexpar,y.intersp=cexpar,bty="o", text.col=min(variab):max(variab)+1,cex=cexpar,fill=(min(qual):max(qual))+1) } if(A!=1) { theta<-seq(0,20,.05) x<-cos(theta) y<-sin(theta) plot(x,y,type="l",xlab=paste("t",axespar[1]," (",round(Inertexpx[axespar[1]],2),"%IX) (", round(Inertexpy[axespar[1]],2),"%IY)"),ylab=paste("t",axespar[2]," (",round(Inertexpx[axespar[2]],2), "%IX) (",round(Inertexpy[axespar[2]],2),"%IY)"),cex=cexpar) abline(h=0) abline(v=0) text(RtX[,axespar],dimnames(RtX)[[1]],col=colpar+3,cex=cexpar) if(q>1) {#points(RtY[,axespar]) text(RtY[,axespar],dimnames(RtY)[[1]],col=colpar+1,cex=cexpar) } else { #points(t(as.matrix(RtY[,axespar]))) text(t(as.matrix(RtY[,axespar])),dimnames(RtY)[[1]],col=colpar+1,cex=cexpar) } }#endifA!=1 else { plot(rbind(Xcoord,Ycoord)[,axespar],xlab=paste("t",axespar[1]," (",round(Inertexpx[axespar[1]],2),"%VX) (",round(Inertexpy[axespar[1]],2),"%VY)"),ylab=paste("t",axespar[2]," (",round(Inertexpx[axespar[2]],2), "%VX) (",round(Inertexpy[axespar[2]],2),"%VY)"),type="n",cex=cexpar) abline(h=0) abline(v=0) #points(Xcoord[,axespar]) text(Xcoord[,axespar],dimnames(RtX)[[1]],col=colpar+3,cex=cexpar) if(q>1) {#points(Ycoord[,axespar]) text(Ycoord[,axespar],dimnames(RtY)[[1]],col=colpar+1,cex=cexpar) } else { #points(t(Ycoord[,axespar])) text(t(Ycoord[,axespar]),dimnames(RtY)[[1]],col=colpar+1,cex=cexpar) } } ########### } cat("_______________________\n") if(resmenu ==0)break }#endrepeatmenu }#fin else break }#finrepeat cat("_______________________________________________________________\n") repeat { cat("regression of Y onto t (y/n) ?\n") plcYt<-scan(quiet=T,"",character(),1) if( (length(plcYt)!=0)&&((plcYt=="Y")|(plcYt=="y"))) { cat("t number (<=",A,") ?\n") plcYtn<-scan(quiet=T,"",numeric(),1) for(j in 1:q) { par(mfrow=c(1,1),pty="s") axespar<-c(plcYtn) if(missing(Ytest)) plot(TX[,axespar],Yini[,j],xlab=paste("t",axespar[1]),ylab=dimnames(Y)[[2]][j],type="n") else plot(c(TX[,axespar],Tsup[,axespar]),c(Yini[,j],Yinitest[,j]),xlab=paste("t",axespar[1]),ylab=dimnames(Y)[[2]][j],type="n") text(TX[,axespar],Yini[,j],dimnames(X)[[1]],col=colpar+4,cex=cexpar) if(!missing(Ytest)) text(Tsup[,axespar,drop=F],Yinitest[,j],dimnames(Xtest)[[1]],col=colpar+5,cex=cexpar) abline(lm(Yini[,j]~TX[,axespar]),col=colpar+1) } } else break } repeat { if(!calibration)break cat("_______________________________________________________________\n") cat("plot of the BETA's according to the dimension (y/n) ?\n") pltctY<-scan(quiet=T,"",character(),1) if( (length(pltctY)!=0)&&((pltctY=="Y")|(pltctY=="y"))) { if(q>1) { cat("Number of Y-plots per row ? (<=",q,")\n") colo<-scan(quiet=T,"",numeric(),1) cat("Number of rows ?\n") lign<-scan(quiet=T,"",numeric(),1) par(mfrow=c(lign,colo),pty="m") } else par(mfrow=c(1,1),pty="m") echini<-dimnames(Xini)[[2]] mode(echini)<-"numeric"# echini est le vecteur des "temps" initialement pris en compte cat("dimension du modele (<=",A,") ?\n") dimmod<-scan(quiet=T,"",numeric(),1) for(j in 1:q){ plot(echini,BETALCR[[dimmod]][j,],type="n",ylab=paste("BETA , dimension ",dimmod,sep=""),xlab="wavelengths",main=dimnames(Yini)[[2]][j],cex=cexpar) points(echini,BETALCR[[dimmod]][j,],type="l",col=colpar+j) abline(h=0) } } else break } cat("_______________________________________________________________\n") repeat { if(standX==F)break cat("evolution of the BETA's according to the dimension (y/n) ?\n") plcYt<-scan(quiet=T,"",character(),1) if(length(plcYt)==0) plcYt<-"n" if(plcYt=="n")break else { if(q>1){ cat("one Individual plot per response or one plot for All the responses (I/A)\n") pltu<- scan(quiet=T,"", character(), 1) if((pltu=="I")|(pltu=="i")) par(mfrow=c(1,1),pty="s") else{ if(p<=20) cat("Nunber of plots per row ? (<=",q+1,", count 1 more plot for the legend)\n") else cat("Nunber of plots per row ? (<=",q,")\n") colo<-scan(quiet=T,"",numeric(),1) cat("Nunber of rows ?\n") lign<-scan(quiet=T,"",numeric(),1) par(mfrow=c(lign,colo),pty="s") } } else {par(mfrow=c(1,1)) pltu<-"I"} yco<-matrix(ncol=A,nrow=p,NA) for(j in 1:q) { if(p<=20){ if((pltu=="I")|(pltu=="i"))cat("Click on the plot to locate the legend !\n") else if(j==q)cat("Click on the plot to locate the legend !\n") } for(k in 1:A)yco[,k]<-BETALCR[[k]][j,] ts.plot(ts(t(yco)),gpars=list(xlab="MODEL DIM.",ylab=dimnames(Y)[[2]][j],type="n",cex=cexpar)) for(i in 1:p) { points(yco[i,],pch=i,cex=cexpar+0.4,col=i) points(yco[i,],type="l",lty=i,col=i) } if((pltu=="I")|(pltu=="i")){ if(p<=20){ if(p>5)legend(locator(1),dimnames(X)[[2]],pch=1:p,bty="n",cex=cexpar,col=1:p,ncol=2) else legend(locator(1),dimnames(X)[[2]],pch=1:p,bty="n",cex=cexpar,col=1:p) } } else { if((j==q)&(p<=20)) {plot(0,0,type="n",xlab="",main="LEGEND FOR PREDICTORS",ylab="",axes=F) if(p>5)legend(locator(1),dimnames(X)[[2]],pch=1:p,bty="n",cex=cexpar,col=1:p,ncol=2) else legend(locator(1),dimnames(X)[[2]],pch=1:p,bty="n",cex=cexpar,col=1:p) } } } } } cat("_______________________________________________________________\n") repeat { if(calibration)break cat("representation of the most influential predictors on a response (y/n) ?\n") plctY<- scan(quiet=T,"", character(), 1) if( (length(plctY)!=0)&&((plctY=="Y")|(plctY=="y"))) {if(q!=1){ cat("response number (<=",q,") ?\n") numres<-scan(quiet=T,"",numeric(),1) } else numres<-1 cat("model dimension (<=",A,") ?\n") dimmod<-scan(quiet=T,"",numeric(),1) #browser() invtx<-rev(order(rank((abs(BETALCR[[dimmod]][numres,]))))) ZZ<-BETALCR[[dimmod]] dimnames(ZZ)[[2]]<-paste(dimnames(ZZ)[[2]],"(",1:ncol(ZZ),")",sep="") print(round(abs(ZZ[numres,invtx]),3)) if(titlepar==F)par(mfrow=c(1,2),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(1,2),pty=ptypar) if(p<=10){ barplot(BETALCR[[dimmod]][numres,invtx],ylab="regression coeff.",names=dimnames(X)[[2]][invtx], xlab="predictors",density=20,space=1.4,col=1:p,cex.axis=cexpar,cex.names=cexpar) barplot(abs(BETALCR[[dimmod]][numres,invtx]),ylab="absolute values",names=dimnames(X)[[2]][invtx],xlab="predictors",density=20,space=1.4, col=1:p,cex.axis=cexpar,cex.names=cexpar) } else{ barplot(BETALCR[[dimmod]][numres,invtx],ylab="regression coeff.",names=format(invtx),xlab="predictors",density=20,space=1.4, col=1:p,cex.axis=cexpar,cex.names=cexpar) barplot(abs(BETALCR[[dimmod]][numres,invtx]),ylab="absolute values",names=format(invtx),xlab="predictors", density=20,space=1.4,col=1:p,legend=paste(invtx,dimnames(X)[[2]][invtx]),cex.axis=cexpar,cex.names=cexpar) } if(titlepar==T)mtext(side=3,line=-4,cex=cexpar+1,outer=TRUE,paste("PLS model for ",dimnames(Yini)[[2]][numres]," (dim ",dimmod,")",sep="")) cat("Number of retained predictors ?,(<=",p,") \n") nb<- scan(quiet=T,"", numeric(), 1) cat("How many plots per row ?,(<=",nb,") \n") pc<- scan(quiet=T,"", numeric(), 1) cat("How many rows ? \n") pl<-scan(quiet=T,"", numeric(), 1) par(oma=c(0,0,4,0),mfrow=c(pl,pc),pty="s") if(missing(Xtest)){ rangemin<-min(Xini*BETALCR[[dimmod]][numres,invtx[1]]) rangemax<-max(Xini*BETALCR[[dimmod]][numres,invtx[1]]) } else{ rangemin<-min(rbind(Xini,Xinitest)*BETALCR[[dimmod]][numres,invtx[1]]) rangemax<-max(rbind(Xini,Xinitest)*BETALCR[[dimmod]][numres,invtx[1]]) } for(i in invtx[1:nb]) { if(missing(Xtest)) plot(Xini[,i],Xini[,i]*BETALCR[[dimmod]][numres,i],ylim=c(rangemin,rangemax),xlab="",ylab="",type="n", main=paste(round(BETALCR[[dimmod]][numres,i],3),dimnames(Xini)[[2]][i]),cex=cexpar) else plot(c(Xini[,i],Xinitest[,i]),c(Xini[,i],Xinitest[,i])*BETALCR[[dimmod]][numres,i],ylim=c(rangemin,rangemax),xlab="",ylab="",type="n", main=paste(round(BETALCR[[dimmod]][numres,i],3),dimnames(Xini)[[2]][i]),cex=cexpar) if(missing(qual)) text(Xini[,i],Xini[,i]*BETALCR[[dimmod]][numres,i],dimnames(Xini)[[1]],col=colpar+2,cex=cexpar) else text(Xini[,i],Xini[,i]*BETALCR[[dimmod]][numres,i],dimnames(Xini)[[1]],col=qual+1,cex=cexpar) if(!missing(Xtest)) text(Xinitest[,i],Xinitest[,i]*BETALCR[[dimmod]][numres,i],dimnames(Xinitest)[[1]],col=colpar+2,cex=cexpar) abline(0,BETALCR[[dimmod]][numres,i],col=colpar+1) if(titlepar) mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,paste("Predictors' influence on ",dimnames(Yini)[[2]][numres], " (dim ",dimmod,")",sep="")) } } else break } cat("\n") cat("__________________________________________________________________\n") cat("plot of Yhat versus Y (y/n)?\n") plt3 <- scan(quiet=T,,what="", 1) if((length(plt3)!=0)&&(plt3=="y")){ par(mfrow=c(1,1)) repeat{ cat("Enter the dimension ?,(<=",A,") \n") pa<- scan(quiet=T,"", numeric(), 1) YHAT<-matrix(0,n,q) for(k in 1:pa){ YHAT<-YHAT+YH[[k]] } if(q>1){ cat("How many plots per row (<=",q,")?\n") colplots<-scan(quiet=T,"",numeric(),1) cat("How many rows?\n") rowplots<-scan(quiet=T,"",numeric(),1) par(mfrow=c(rowplots,colplots)) } else par(mfrow=c(1,1)) for(i in 1:q) { yy<-YHAT[,i]*sqrt(varY[i])+centY$moy[i] if(sum(Yinitial[,i]==0)==0) { titrepl<-"nean relative err (%) = " relativeerreur<-Dvar(abs(Yinitial[,i]-yy)/abs(Yinitial[,i])*100,D=D)$mean } else { relativeerreur<-sqrt(Dvar(Yinitial[,i]-yy,D=D)$var) titrepl<-" stdv err = " } if(titlepar) plot(Yinitial[,i],yy,xlab=dimnames(Yini)[[2]][i], ylab=paste("modeled ",dimnames(Yini)[[2]][i],",",pa," Dim.",sep=""), type="n",xlim=range(c(Yinitial[,i],yy)),ylim=range(c(Yinitial[,i],yy)), main=paste(titrepl,round(relativeerreur,2))) else plot(Yinitial[,i],yy,xlab=dimnames(Yini)[[2]][i], ylab=paste("modeled ",dimnames(Yini)[[2]][i],",",pa," Dim.",sep=""), type="n",xlim=range(c(Yinitial[,i],yy)),ylim=range(c(Yinitial[,i],yy))) if(!typedata)points(Yinitial[,i],yy,pch=pchpar,cex=cexpar) else text(Yinitial[,i],yy,dimnames(Yini)[[1]],col=colpar+3) abline(a=0,b=1,col=colpar+1) } cat("A different dimension ? (y/n)\n") plcYt<-scan(quiet=T,"",character(),1) if(length(plcYt)==0) plcYt<-"n" if(plcYt=="n")break }#repeat } cat("__________________________________________________________________\n") cat("\n") cat("Residual plots according to the dimensions? (y/n)\n") plt <- scan(quiet=T,"", character(), 1) if( (length(plt)!=0)&&((plt=="y")|(plt=="Y"))) { repeat{ cat("Enter the dimension ?,(<=",A,") \n") pa<- scan(quiet=T,"", numeric(), 1) if(q==1){cat("How many plots per row ? (<=",pa,")\n") repoc<-scan(quiet=T,"",numeric(),1) if(pa-repoc==0)repol<-1 else repol<-ceiling(pa/repoc) } else { cat("How many plots per row ? (<=",q,")\n") repoc<-scan(quiet=T,"",numeric(),1) cat("How many rows ? (<=",pa,")\n") repol<-scan(quiet=T,"",numeric(),1) } if(titlepar==F)par(mfrow=c(repol,repoc),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(repol,repoc),pty=ptypar) YHAT<-matrix(0,n,q) for(k in 1:pa){ #YHAT<-Xini%*%BETALCR[[k]] YHAT<-YHAT+YH[[k]] if(!missing(Ytest))YHtest<-as.matrix(Xinitest)%*%t(BETALCR[[k]]) for(i in 1:q) { if(missing(Ytest)){ if(k==1) variat<-range(Yini-YHAT) variat<-range(variat,range(Yini-YHAT)) plot(YHAT[,i],Yini[,i]-YHAT[,i],ylim=variat,xlab=paste(dimnames(Yini)[[2]][i]," , ",k,sep=""),ylab="",type="n",cex=cexpar) } else{ if(k==1) variat<-range(rbind(Yini,Yinitest)-rbind(YHAT,YHtest)) variat<-range(variat,range(rbind(Yini,Yinitest)-rbind(YHAT,YHtest))) plot(c(YHAT[,i],YHtest[,i]),c(Yini[,i]-YHAT[,i],Yinitest[,i]-YHtest[,i]),ylim=variat,xlab=paste(dimnames(Yini)[[2]][i]," , ",k,sep=""),ylab="",type="n",cex=cexpar) } if(!typedata)text(YHAT[,i],Yini[,i]-YHAT[,i],cex=cexpar,col=colpar+1) else text(YHAT[,i],Yini[,i]-YHAT[,i],dimnames(Yini)[[1]],cex=cexpar,col=colpar+1) if(!missing(Ytest)) { if(!typedata)text(YHtest[,i],Yinitest[,i]-YHtest[,i],cex=cexpar,col=colpar+2) else text(YHtest[,i],Yinitest[,i]-YHtest[,i],dimnames(Yinitest)[[1]],cex=cexpar,col=colpar+2) } } } if(titlepar==T)mtext(side=3,line=-1,cex=2,outer=TRUE,"Residuals with PLS dimensions") cat("A different dimension? (y/n)\n") plcYt<-scan(quiet=T,"",character(),1) if(length(plcYt)==0) plcYt<-"n" if(plcYt=="n")break } } cat("\n") cat("__________________________________________________________________\n") if(q==2){ cat(paste("Bidimensional plot (", dimnames(Yini)[[2]][1],",",dimnames(Yini)[[2]][2], ") of the fitted responses (y/n)? \n",sep="")) plt3 <- scan(quiet=T,,what="", 1) if((length(plt3)!=0)&&(plt3=="y")){ if(titlepar==F)par(mfrow=c(1,2),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(1,2),pty=ptypar) repeat{ cat("Enter the dimension ?,(<=",A,") \n") pa<- scan(quiet=T,"", numeric(), 1) YYY<-matrix(0,n,2) for(i in 1:pa)YYY<-YYY+YH[[i]] range1<-range(c(YYY[, 1],Yini[, 1])) range2<-range(c(YYY[, 2],Yini[, 2])) plot(YYY[, 1],YYY[,2],xlim=range1,ylim=range2,xlab=paste("fitted",dimnames(Yini)[[2]][1]),ylab=paste("fitted",dimnames(Yini)[[2]][2]),type="n",pch=pchpar,main="Reconstituted shape") if(!typedata)points(YYY[, 1],YYY[, 2],pch=pchpar,cex=cexpar) else text(YYY[, 1],YYY[, 2],dimnames(Yini)[[1]],col=colpar+3) plot(Yini[, 1],Yini[, 2],xlim=range1,ylim=range2,xlab=dimnames(Yini)[[2]][1],ylab=dimnames(Yini)[[2]][2],type="n",pch=pchpar,main="Original shape") if(!typedata)points(Yini[, 1],Yini[, 2],pch=pchpar,cex=cexpar) else text(Yini[, 1],Yini[, 2],dimnames(Yini)[[1]],col=colpar+3) if(titlepar==T)mtext(side=3,line=0,cex=2,outer=TRUE,paste("PLS (",pa," dim.)",sep="")) cat("A different dimension? (y/n)\n") repo<-scan(quiet=T,"",character(),1) if((length(repo)==0)|(repo!="y"))break } } } # fin des graphiques } #browser() invisible(return(list(nomfichX=nomfichX,Xini=Xini,nomfichY=nomfichY,Yini=Yini,covXY=covXY,A=A,VX=VX,VY=VY,TX=TX,UY=UY, WX=WX,CY=CY,IEY=IEY,IEX=IEX,YH=YH,YR=YR,RtX=RtX,RtY=RtY,BETALCR=BETALCR,BETAL=BETAL,alph=alph,R2c=R2c))) #return(nomfichX,Xini,nomfichY,Yini,covXY,A,VX,VY,TX,UY,WX,CY,IEY,IEX,YH,YR,RtX,RtY,BETALCR,BETAL,#alph,R2c,xlocator) } ######################################################## plscv<-function(X,Y,standX=T,standY=T,A=2,D=1,prop=0.1,GCV=2,impres=T) { # cross-validation sur PLS Xinitial<-as.matrix(X) Yinitial<-as.matrix(Y) n<-nrow(Xinitial) p<-ncol(Xinitial) q<-ncol(Yinitial) if(is.null(dimnames(Xinitial)))dimnames(Xinitial)<-list(format(1:n),paste("X",1:p,sep="")) if(is.null(dimnames(Yinitial)))dimnames(Yinitial)<-list(format(1:n),paste("Y",1:q,sep="")) if(length(dimnames(Xinitial)[[1]])==0) dimnames(Xinitial)[[1]]<- format(1:n) if(length(dimnames(Yinitial)[[1]])==0) dimnames(Yinitial)[[1]]<- format(1:n) if(length(dimnames(Xinitial)[[2]])==0) dimnames(Xinitial)[[2]]<- paste("X",1:p,sep="") if(length(dimnames(Yinitial)[[2]])==0) {cat("Enter the name of the response(s)\n") prov<-as.vector(format(1:q)) for(i in 1:q) {cat(paste("response ",format(i),"\n")) prov[i]<-scan(quiet=T,,what="", 1) } dimnames(Yinitial)[[2]]<-prov } # calcul de la metrique si D=1 if(length(D)==1) D<-rep(1/n,n) else D<-as.vector(D) # centrage et reduction centX<-Dcentred(Xinitial,D=D) centY<-Dcentred(Yinitial,D=D) if(standX) Xini<-as.matrix(centX$Xcr) else Xini<-as.matrix(centX$Xc) if(standY) Yini<-as.matrix(centY$Xcr) else Yini<-as.matrix(centY$Xc) if(GCV==0) {#GCV==0 pp<-round(n*prop) if(pp==0)repeat{ cat("choose an another proportion of samples >",prop," !!\n") prop<- scan(quiet=T,"", numeric(), 1) pp<-round(n*prop) if(pp !=0)break } if(impres)cat(pp,"objects out\n") ppc<-floor(n/pp) PRESS<-array(0,c(ppc,q,A)) predict<-array(0,c(n,q,A)) for(i in 0:(ppc-1)) { ii<-(1:pp)+pp*i if(impres)cat(ii,"*") plsresul<-pls(Xinitial[-ii,],Yinitial[-ii,],A=A,standX=standX,standY=standY,impres=F,graph=F, cexpar=cexpar,colpar=colpar,titlepar=titlepar) for(k in 1:A){ PRESS[i+1,,k]<-PRESS[i+1,,k]+apply((Yini[ii,,drop=F]-Xini[ii,,drop=F]%*%t(plsresul$BETALCR[[k]]))^2,2,sum) predict[ii,,k]<-Xini[ii,,drop=F]%*%t(plsresul$BETALCR[[k]]) } } if(n%%pp!=0){ ii<-(1:(n%%pp))+pp*ppc if(impres)cat(ii,"*") plsresul<-pls(Xinitial[-ii,],Yinitial[-ii,],A=A,standX=standX,standY=standY,impres=F,graph=F,cexpar=cexpar,colpar=colpar,titlepar=titlepar) for(k in 1:A){ PRESS[ppc, ,k]<-PRESS[ppc, ,k]+apply((Yini[ii,,drop=F]-Xini[ii,,drop=F]%*%t(plsresul$BETALCR[[k]]))^2,2,sum) predict[ii,,k]<-Xini[ii,,drop=F]%*%t(plsresul$BETALCR[[k]]) } } if(impres)cat("\n") dimnames(PRESS)<-list(format(1:ppc),dimnames(Yinitial)[[2]],paste("Dim",1:A,sep="")) dimnames(predict)<-list(dimnames(Yinitial)[[1]],dimnames(Yinitial)[[2]],paste("Dim",1:A,sep="")) PRESStot<-apply(PRESS,3,sum)/n PRESSpar<-apply(PRESS,c(2,3),sum)/n invisible(return(list(Xinitial=Xinitial,Yinitial=Yinitial,Yini=Yini,A=A,PRESS=PRESS,PRESStot=PRESStot,PRESSpar=PRESSpar,predict=predict,prop=prop,GCV=GCV))) }#finGCV==0 else {#GCV!=0 plsresul<-pls(Xinitial,Yinitial,A=A,standX=standX,standY=standY,impres=F,graph=F) GCrit<-matrix(0,q,A) for(i in 1:q) { YHAT<-matrix(0,n,1) for(k in 1:A){ YHAT<-YHAT+plsresul$YH[[k]][,i] Yres<-Yini[,i]-YHAT #Average squared residual ASR<-sum(diag(as.matrix(VQop(Yres,D=D)))) GCrit[i,k]<-ASR/(1-k*GCV/n)^2 } dimnames(GCrit)<-list(dimnames(Yinitial)[[2]],paste("Dim",1:A,sep="")) } GCritot<-apply(GCrit,2,sum) #browser() invisible(list(Xinitial=Xinitial,Yinitial=Yinitial,Yini=Yini,A=A,predict=predict,GCV=GCV,GCrit=GCrit,GCritot=GCritot)) }#finGCV!=0 } ######################################################## plscv.plot<-function(plscvresul,D=1,cexpar=0.7,ptypar="s",colpar=1,titlepar=T,askpar=T,bgpar="gray") { # plots de la validation croisee n<-nrow(plscvresul$Yinitial) q<-ncol(plscvresul$Yinitial) par(ask=askpar) par(bg=bgpar) if(plscvresul$GCV==0) { if(q>1){ par(mfrow=c(1,2),pty=ptypar) if(titlepar) plot(plscvresul$PRESStot,type="n",xlab="Model Dim.",ylab="PRESS",main=paste("opt. Dim.",order(plscvresul$PRESStot)[1]," , ","PRESS =",round(min(plscvresul$PRESStot),4),"(",round(n*plscvresul$prop),"out )"), cex=cexpar,bg=par("bg")) else plot(plscvresul$PRESStot,type="n",xlab="Model Dim.",ylab="PRESS",cex=cexpar,bg=par("bg")) points(plscvresul$PRESStot,pch=10,cex=cexpar+0.7) points(plscvresul$PRESStot,type="l") if(titlepar) ts.plot(ts(t(plscvresul$PRESSpar)),gpars=list(xlab="Model Dim.",ylab="Partial PRESS", type="n",main=paste("PRESS/Responses","(",round(n*plscvresul$prop),"out )"),cex=cexpar)) else ts.plot(ts(t(plscvresul$PRESSpar)),gpars=list(xlab="Model Dim.",ylab="Partial PRESS",type="n",cex=cexpar,bg=par("bg"))) for(i in 1:q){ points(1:plscvresul$A,plscvresul$PRESSpar[i,],pch=i,cex=cexpar+0.7,col=colpar+i) points(1:plscvresul$A,plscvresul$PRESSpar[i,],type="l",lty=i,col=colpar+i) } cat("Click to locate the top left corner of the legend\n") #legend(2,14,dimnames(plscvresul$Yinitial)[[2]],marks=1:q,cex=cexpar,ncol=ncolpar,bty="n",col= #colpar+(1:q)) legend(locator(1),dimnames(plscvresul$Yinitial)[[2]],pch=1:q,cex=cexpar, bty="n",x.intersp=.5,y.intersp=.5,col=colpar+(1:q)) } else{ par(mfrow=c(1,1),pty=ptypar) if(titlepar) plot(plscvresul$PRESStot,type="n",xlab="Model Dim." ,ylab="PRESS",main=paste("opt. Dim.",order(plscvresul$PRESStot)[1]," , ",dimnames(plscvresul$Yinitial)[[2]],"PRESS =",round(min(plscvresul$PRESStot),4),"(",round(n*plscvresul$prop),"out )"), cex=cexpar,bg=par("bg")) else plot(plscvresul$PRESStot,type="n",xlab="Model Dim." ,ylab="PRESS",cex=cexpar,bg=par("bg")) points(plscvresul$PRESStot,pch=10,cex=cexpar+0.7) points(plscvresul$PRESStot,type="l") } A<-1 cat("Observed Y-samples against CV predicted Y-samples along with the dimension\n") repeat{ cat("Choose the model dimension or 0 to exit (<=",plscvresul$A,")") dimension<-scan(quiet=T,"",numeric(),1) if(length(dimension)==0)break if(dimension!=0) {A<-dimension print(paste("PRESS(",plscvresul$prop,",",A,")=",round(plscvresul$PRESStot[A],4),sep="")) } else A<-A if(dimension==0)break #browser() if(q>1){ cat("How many plots per row (<=",q,")?") colplots<-scan(quiet=T,"",numeric(),1) cat("How many rows?") rowplots<-scan(quiet=T,"",numeric(),1) par(mfrow=c(rowplots,colplots)) } else par(mfrow=c(1,1)) varmeany<-Dvar(plscvresul$Yinitial,D=D) for(j in 1:q){ yy<-plscvresul$predict[,j,dimension]*sqrt(varmeany$var[j])+varmeany$mean[j] #if(sum(plscvresul$Yinitial[,j]==0)>0) #{ #titreplot<-paste(dimnames(plscvresul$Yinitial)[[2]][j]," stdv err = ") #sigmapredict<-round(sqrt(Dvar(plscvresul$Yinitial[,j]-yy,D=D)$var),2) #} #else #{ #titreplot<-paste(dimnames(plscvresul$Yinitial)[[2]][j],"mean relative err in % = ") #sigmapredict<-round(Dvar(abs((plscvresul$Yinitial[,j]-yy)/plscvresul$Yinitial[,j])*100,D=D)$mean,2) #} if(titlepar) plot(plscvresul$Yinitial[,j],yy,xlab=paste("obs. ",dimnames(plscvresul$Yinitial)[[2]][j]), ylab=paste("est. ",dimnames(plscvresul$Yinitial)[[2]][j],", Dim. ",dimension), xlim=range(c(plscvresul$Yinitial[,j],yy)),ylim=range(c(plscvresul$Yinitial[,j],yy)), main=paste("PRESS = ",round(plscvresul$PRESSpar[j,A],4)," (",round(n*plscvresul$prop)," out)",sep=""),type="n",cex=cexpar,bg=par("bg")) else plot(plscvresul$Yinitial[,j],yy,xlab=paste("obs. ",dimnames(plscvresul$Yinitial)[[2]][j]), xlim=range(c(plscvresul$Yinitial[,j],yy)),ylim=range(c(plscvresul$Yinitial[,j],yy)), ylab=paste("est. ",dimnames(plscvresul$Yinitial)[[2]][j],", Dim. ",dimension),type="n",cex=cexpar,bg=par("bg")) abline(a=0,b=1,col=colpar+1) text(plscvresul$Yinitial[,j],yy,dimnames(plscvresul$Yinitial)[[1]],col=colpar+3,cex=cexpar) #cat(titreplot,sigmapredict,"\n") }#endforj=1:q }#endrepeat #browser() invisible(return(list(plscvresul$PRESStot,plscvresul$PRESSpar,A=A))) }#finGCV==0 else { if(q>1){ par(mfrow=c(1,2),pty=ptypar) #browser() if(titlepar) plot(plscvresul$GCritot,type="n",xlab="Model Dim.",ylab="GCV", main=paste("opt. Dim.",order(plscvresul$GCritot)[1],",","GCV=",round(min(plscvresul$GCritot),4), "(alpha=",plscvresul$GCV,")"),cex=cexpar,bg=par("bg")) else plot(plscvresul$GCritot,type="n",xlab="Model Dim.",ylab="GCV",cex=cexpar,bg=par("bg")) points(plscvresul$GCritot,pch=10,cex=cexpar+0.7) points(plscvresul$GCritot,type="l") if(titlepar) ts.plot(ts(t(plscvresul$GCrit)), gpars=list(xlab="Model Dim.",ylab="Partial GCV", type="n",main=paste("GCV/Responses"),cex=cexpar,bg=par("bg"))) else ts.plot(ts(t(plscvresul$GCrit)),gpars=list(xlab="Model Dim.",ylab="Partial GCV",type="n",cex=cexpar,bg=par("bg"))) for(i in 1:q){ points(1:plscvresul$A,plscvresul$GCrit[i,],pch=i,cex=cexpar+0.7,col=colpar+i) points(1:plscvresul$A,plscvresul$GCrit[i,],type="l",lty=i,col=colpar+i) } cat("Click to locate the top left corner of the legend\n") legend(locator(1),dimnames(plscvresul$Yinitial)[[2]],pch=1:q,cex=cexpar, bty="n",x.intersp=.5,y.intersp=.5,col=colpar+(1:q)) } else { par(mfrow=c(1,1),pty=ptypar) if(titlepar) plot(plscvresul$GCritot,type="n",xlab="Model Dim.",ylab="GCV", main=paste("opt. Dim.",order(plscvresul$GCritot)[1],",",dimnames(plscvresul$Yinitial)[[2]], "GCV=",round(min(plscvresul$GCritot),4), "(alpha=",plscvresul$GCV,")"),cex=cexpar,bg=par("bg")) else plot(1:plscvresul$A,plscvresul$GCrit,xlab="Dim",ylab="GCV",type="n",cex=cexpar,bg=par("bg")) points(plscvresul$GCritot,pch=10,cex=cexpar+0.7) points(plscvresul$GCritot,type="l") } cat(paste("Dimension retained (<=",plscvresul$A,"), A=")) A<-scan(quiet=T,"",numeric(),1) #browser() print(paste("GCV(",plscvresul$GCV,",",A,")=",round(plscvresul$GCritot[A],4),sep="")) invisible(return(list(plscvresul$GCritot,plscvresul$GCrit,A=A))) } } ############################################## Dcenter<-function(X,D=1) #D-centrage de la matrice X { X<-as.matrix(X) if(all(!is.na(X))){ if(length(D)==1) mx<-apply(X,2,"mean") else {idx<-D*X mx<-apply(idx,2,"sum") } X<-sweep(X,2,mx) } else { mx<-rep(0,ncol(X)) if(length(D)==1){for(i in 1:ncol(X)) mx[i]<-mean(X[,i][!is.na(X[,i])])} else {idx<-D*X mx<-apply(idx,2,"sum") } X<-sweep(X,2,mx) } return(list(X=X,mx=mx)) } ########################################################### Dcentred<-function(X,D=1) { # D-centrage de la matrice X # # Entree # X matrice des variables # D metrique des poids # Sorties # Xc matrice deduite de X, D-centree # Xcr matrice deduite de X, D-centree et reduite # moy vecteur des D-moyennes # var vecteur des D-variances X<-as.matrix(X) n<-nrow(X) p<-ncol(X) n2<-n*n n1<-matrix(1,nrow=n,ncol=1) if(length(D)==1)D<-diag(rep(1/n,n),nrow=n) if(length(D)==n)D<-diag(D,nrow=n) if(length(D)==n2)D<-as.matrix(D) moy<-t(n1)%*%D%*%X Xc<-sweep(X,2,moy) var<-diag(t(Xc)%*%D%*%Xc) ect<-sqrt(var) Xcr<-sweep(Xc,2,ect,FUN="/") return(list(Xc=Xc,Xcr=Xcr,moy=moy,var=var)) } ############################################################## invgene<-function(A,eps=1e-06) # inverse generalisee de A { A<-as.matrix(A) # decomposition valeurs singulieres de A et calcul de son inverse dans RR valsin<-svd(A) diago<-valsin$d[valsin$d>eps] # browser() if(length(diago) == 0){RR<-matrix(0,ncol(A),nrow(A)) return(list(RR=RR))} if(length(diago)==1) RR <- as.matrix(valsin$v[, 1:length(diago)])%*%t(as.matrix(valsin$u[, 1:length(diago)]))/diago else RR<-valsin$v[,1:length(diago)]%*%diag(1/diago)%*%t(valsin$u[,1:length(diago)]) return(RR=RR) } ############################################################## Dproj<-function(X,Y,D=1,eps=1e-08,inv=F) # D-proj de Y sur X # sortie du residu Yres=Y-P Y de Yhat=P Y et des coef. beta # X X { X<-as.matrix(X) Y<-as.matrix(Y) if(inv) R<-solve(Dcp(X,D=D)) else R<-invgene(Dcp(X,D=D),eps=eps) H<-WDop(X,Q=R,D=D) Yhat<-H%*%Y Yres<-Y-Yhat beta<-R%*%Dcp(X,Y,D=D) return(list(H=H,Yhat=Yhat,Yres=Yres,R=R,beta=beta)) } ################################################################ Dcp<-function(X,Y=X,D=1) { # signification de Dcp : Diagonalcrossproduct, i.e., X'DY # D est un vecteur des poids, par defaut 1/nrow(X) Y<-as.matrix(Y) X<-as.matrix(X) if(length(D)==1) DD<-rep(1/nrow(X),nrow(X)) else DD<-as.vector(D) for(i in 1:nrow(X)) X[i,]<-DD[i]*X[i,] V<-crossprod(X,Y) dimnames(V)<-NULL return(V) } ################################################################ Dvar<-function(X,D=1,cor=F) # VALUE: # V: matrice des D-variances de X si cor=F, sinon matrice des D-correlations # U: matrice D-centree de X si cor=F, sinon matrice D-centree reduite # mean: vecteur des D-moyennes # var: vecteur des D-variances { mat<-FALSE if(is.matrix(X)) mat<-TRUE X<-as.matrix(X) centrage<-Dcenter(X,D=D) Y<-centrage$X mean<-centrage$mx V<-VQop(Y,D=D) var<-diag(V) if(cor==F){ U<-Y } else { ect<-sqrt(var) U<-sweep(Y,2,ect,FUN="/") V<-VQop(U,D=D) } if(mat){ dimnames(V)<-list(dimnames(X)[[2]],dimnames(X)[[2]]) dimnames(U)<-dimnames(X) } return(list(V=V,U=U,mean=mean,var=var)) } ################################################################# WDop<-function(X,Y=X,Q=1,D=1) { # X,Y vecteurs ou matrices n x p, par defaut Y=X; # operateur des produits scalaires entre individus de X et Y, XQY'D; # Q est la metrique de l'espace des individus (par defaut Q=Ipp);Q matrice # D est la metrique de l'espace des variables de Y:(par defaut 1/n Inn), # D vecteur X<-as.matrix(X) Y<-as.matrix(Y) if(length(D)==1) DD<-rep(1/nrow(Y),nrow(Y)) else DD<-as.vector(D) if(!is.matrix(Q)) W<-X%*%t(Y) else W<-X%*%Q%*%t(Y) for(i in 1:ncol(W)) W[,i]<-DD[i]*W[,i] return(W) } ################################################################## VQop<-function(X,Y=X,Q=1,D=1) { # operateur crossproduct generalise, i.e., X'DYQ; # Q matrice ou vecteur est une metrique sur l'espace des variables de Y, # par defaut Q=1 donne l'identite # si Q=k alors Q=kI # si Q=vecteur alors Q=diag(vecteur); # D est un vecteur, par defaut D=1 donne le vecteur 1/nrow(X); Y<-as.matrix(Y) X<-as.matrix(X) if(length(D)==1) DD<-rep(1/nrow(X),nrow(X)) else DD<-as.vector(D) for(i in 1:nrow(X)) X[i,]<-DD[i]*X[i,] if(! is.matrix(Q)) { QQ<-as.vector(Q) if(length(QQ)==1 & QQ[1]==1){ V<-crossprod(X,Y) return(V) } if(length(QQ)==1 & QQ[1]!=1) QQ<-rep(QQ,ncol(Y)) for(i in 1:ncol(Y)) Y[,i]<-Y[,i]*QQ[i] V<-crossprod(X,Y) return(V) } V<-crossprod(X,Y)%*%Q return(V) } ################################################################### lss<-function(X,Y,listknots,centerX=F,centerY=F,standX=F,standY=F,D=1,degree=1,knots=0,equiknots=F, invpar=T,fonct=F,impres=T,nbpoints=50,titlepar=F,qual,ptypar="s",pchpar=1,cexpar=0.7,typedata=T, colpar=0,ncolpar=2,askpar=T) { # version 10.30 programee par J.F. Durand # Regression sur codage spline des predicteurs # # Entrees # invpar boolean, when T the inverse of the B'B is computed, if F the Moore-Penrose inverse is used. # impres impression des resultats si T, si F rien # graph trace des graphiques si T, si F pas de trace # nomfichX<-deparse(substitute(X)) nomfichY<-deparse(substitute(Y)) Xinitial<-as.matrix(X) Yinitial<-as.matrix(Y) if(standX)centerX<-TRUE if(standY)centerY<-TRUE if(!centerX)standX<-FALSE if(!centerY)standY<-FALSE centrageX <- Dvar(Xinitial, D = D, cor = F) meanX <- centrageX$mean varX <- centrageX$var centrageY <- Dvar(Yinitial, D = D, cor = F) meanY <- centrageY$mean varY <- centrageY$var n<-nrow(Xinitial) p<-ncol(Xinitial) q<-ncol(Yinitial) if(!missing(listknots)) { if(length(listknots)!=p){cat("nb incorrect de vecteurs de noeuds\n") return()} knots<-vector("numeric",p) for(i in 1:p)knots[i]<-length(listknots[[i]]) } if(is.null(dimnames(Xinitial)))dimnames(Xinitial)<-list(format(1:n),paste("X",1:p,sep="")) if(is.null(dimnames(Yinitial)))dimnames(Yinitial)<-list(format(1:n),paste("Y",1:q,sep="")) # calcul de la metrique si D=1 if(length(D)==1) DD<-rep(1/n,n) else DD<-as.vector(D) # centrage et reduction centX<-Dcentred(Xinitial,D=DD) centY<-Dcentred(Yinitial,D=DD) Xini<-Xinitial Yini<-Yinitial if(centerX)Xini<-as.matrix(centX$Xc) if(centerY)Yini<-as.matrix(centY$Xc) if(standX) Xini<-as.matrix(centX$Xcr) if(standY) Yini<-as.matrix(centY$Xcr) dimnames(Xini)<-dimnames(Xinitial) dimnames(Yini)<-dimnames(Yinitial) # calcul de la matrice de codage spline de Xini listknotsn<-list(NULL)#list of centered (possibly scaled) interior knots if(missing(listknots)){ listknotsn<-list(NULL) if(p>1) for(i in 1:(p-1))listknotsn<-c(listknotsn,list(NULL))} else { for(i in 1:p){ listknotsn[[i]]<-listknots[[i]] if(centerX)listknotsn[[i]]<-listknots[[i]]-meanX[i] if(standX)listknotsn[[i]]<-(listknotsn[[i]]-meanX[i])/sqrt(varX[i]) } } BsplineX <- Bsplinen(Xini,ordre=degree+1,nbni=knots,noeudequi=equiknots, center=centerX,D=D,tt=listknotsn) itX <- BsplineX$v dimension<-BsplineX$ordre+BsplineX$nbni Bcod<-NULL for(i in 1:p)Bcod<-cbind(Bcod,itX[[i]][[1]]) ######## resullsspl<-Dproj(Bcod,Yini,D=DD,inv=invpar) ######## Yhat<-resullsspl$Yhat beta<-resullsspl$beta Yres<-resullsspl$Yres R2<-Dvar(Yhat,D=DD)$var/Dvar(Yini,D=DD)$var if(length(D)==1) { fit<-sum(Yres^2) diaginvBtB<-diag(resullsspl$R)/n sigma2<-fit/(n-sum(diag(resullsspl$H)))^2 intervconfbeta<-2*sqrt(diaginvBtB*sigma2) fit<-fit/n GCV=sigma2*n } else fit<-sum(diag(Dcp(Yres,D=DD))) if(impres){# if(fonct==F){ par(ask=askpar) cat("\n") cat("R2 \n") print(R2) cat("\n") cat("GCV = var(residuals)/(1-trace(H)/n)²\n") print(GCV) cat("\n") } pX<-p pY<-q AA<-list(NULL) ranget<-list(NULL) Transf<-list(NULL) for(j in 1:pY) { Transf[[j]]<-matrix(0,n,pX) dd<-1 AA[[j]]<-list(NULL) for(i in 1:pX) { AA[[j]][[i]]<-as.matrix(beta[dd:(dd+dimension[i]-1),j]) dd<-dd+dimension[i] #browser() Transf[[j]][,i]<-BsplineX$v[[i]][[1]]%*%AA[[j]][[i]] ranget[[j]][i]<-diff(range(Transf[[j]][,i])) } } if(fonct==F){ cat("\n") cat("The B-spline transformations of the",pX,"predictor(s) are now being computed \n") } xx<-list(NULL) spl<-list(NULL) tt<-list(NULL) ttint<-list(NULL) for(i in 1:pX){ if(fonct==F){ cat(i," ") } minXini<-min(Xini[, i]) maxXini<-max(Xini[, i]) xx[[i]]<-seq(minXini,maxXini,length=nbpoints) if(BsplineX$nbni[i]!=0){ tt[[i]]<-BsplineX$v[[i]][[2]] ttint[[i]]<-tt[[i]][(BsplineX$ordre[i]+1):(BsplineX$ordre[i]+BsplineX$nbni[i])] if(centerX) spl[[i]]<-sweep(Bsplinen(xx[[i]],ordre=BsplineX$ordre[i],nbni=BsplineX$nbni[i],D=D,tt=list(ttint[[i]]))$v[[1]][[1]],2,BsplineX$meansc[[i]]) else spl[[i]]<-Bsplinen(xx[[i]],ordre=BsplineX$ordre[i],nbni=BsplineX$nbni[i],D=D,tt=list(ttint[[i]]))$v[[1]][[1]] } else { if(centerX) spl[[i]]<-sweep(Bsplinen(xx[[i]],ordre=BsplineX$ordre[i],nbni=0,D=D)$v[[1]][[1]],2,BsplineX$meansc[[i]]) else spl[[i]]<-Bsplinen(xx[[i]],ordre=BsplineX$ordre[i],nbni=0,D=D)$v[[1]][[1]] } } if(fonct==F){#### cat("\n") cat("__________________________________________________________________\n") }#### yy<-list(NULL) noeudint<-list(NULL) if(p==1){#### for(j in 1:q){#* if((titlepar==F)|missing(qual))par(pty=ptypar) else par(oma=c(0,0,4,0),pty=ptypar) yy[[1]]<-spl[[1]]%*%AA[[j]][[1]] if(titlepar==T) matplot(Xini[,1],Yini[,j],ylim=range(c(Yini[,j],yy[[1]])),xlab=paste(dimnames(Xini)[[2]][j],sep=""), ylab="",cex=cexpar,main=paste("L-S splines for ",dimnames(Yini)[[2]][j],sep=""),type="n") else matplot(Xini[,1],Yini[,j],ylim=range(c(Yini[,j],yy[[1]])),xlab=paste(dimnames(Xini)[[2]][j],sep=""), ylab=dimnames(Yini)[[2]][j],cex=cexpar,type="n") points(xx[[1]],yy[[1]],type="l",col=colpar+2) rug(Xini[,1],ticksize=0.02,lwd=1.5,col="red") if(typedata) { if(missing(qual)) text(Xini[,1],Yini[,j],dimnames(Xini)[[j]],cex=cexpar,col=colpar+1) else for(i in (min(qual):max(qual)))text(Xini[qual==i,1],Yini[qual==i,j],dimnames(Xini)[[1]][qual==i],cex=cexpar,col=colpar+i+1) } else points(Xini[,1],Yini[,j],pch=pchpar,cex=cexpar) if(BsplineX$nbni[1]!=0){ noeudint[[1]]<-BsplineX$v[[1]][[2]][(BsplineX$v[[1]][[2]]!=min(BsplineX$v[[1]][[2]]))&(BsplineX$v[[1]][[2]]!=max(BsplineX$v[[1]][[2]]))] abline(v=noeudint[[1]],lty=2) } }#* }#### else{#### cat("\n") cat("additive influence of the predictors on a response? (y/n)") plt <- scan("", character(), 1) if( (length(plt)!=0)&&((plt=="y")|(plt=="Y"))) { noeudint<-list(NULL) repeat {# if(pY==1)j<-1 else{ cat("The number of the response variable?,(<=",pY,")") j<- scan("", numeric(), 1) } if(titlepar==F)par(mfrow=c(1,1),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(1,1),pty=ptypar) if(pX<21) { xxf<-Transf[[j]][,1] catf<-rep(1,n) if(pX>1)for(i in 2:pX){xxf<-c(xxf,Transf[[j]][,i]) catf<-c(catf,rep(i,n)) } tecno24<-split(xxf,catf) boxplot(tecno24,sub="predictors",ylab="transf. dat.",cex=0.7) cat("Click to indicate the legend location\n") legend(locator(1),paste(1:p,dimnames(Xini)[[2]]),cex=cexpar,ncol=ncolpar,bty="n") if(titlepar==T)mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,dimnames(Yini)[[2]][j]) } cat("\n") cat("Influence of the predictors on :",dimnames(Yini)[[2]][j]," \n") class<-NULL class<-matrix(0,1,pX) class[1,]<-round(rev(sort(ranget[[j]])),5) ordreinv<-rev(order(rank((ranget[[j]])),1:pX)) dimnames(class)<-list("",paste(dimnames(Xini)[[2]][ordreinv],"(",format(ordreinv),")",sep="")) cat("\n") cat("Range of the transformed predictors in descending order:\n") cat("\n") print(class) cat("\n") normedclass=class[1,]/class[1,1] if(any(normedclass<0.25)) { cat("------------\n") cat("Suggestion: remove the predictors (threshold 25%) :\n") cat(dimnames(class)[[2]][normedclass<0.25],"\n") cat("------------\n") } if(pX>50) barplot(class[1,],ylab="range of the transf. predictors",names=NULL,xlab="predictors",density=20,space=1.4) else {if(pX>10){ barplot(class[1,],ylab="range of the transf. predictors",names=format(ordreinv),xlab="predictors",density=20,space=1.4,col=colpar+(1:pX)) cat("Click to indicate the legend location\n") legend(locator(1),paste(ordreinv,dimnames(Xini)[[2]][ordreinv]),cex=cexpar,ncol=ncolpar,fill=colpar+(1:pX),bty="n") } else barplot(class[1,],ylab="range of the transf. predictors",names=dimnames(Xini)[[2]][ordreinv],xlab="predictors",density=20,space=1.4) } if((titlepar==T)) mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,paste("influence of the predictors on",dimnames(Yini)[[2]][j])) cat("Coordinate function plots of the main influence predictors? (y/n)") plt <- scan("", character(), 1) if( (length(plt)!=0)&&((plt=="y")|(plt=="Y"))) {### cat("How many predictors?,(<=",pX,")") nb<- scan("", numeric(), 1) cat("How many plots on a row ?,(<=",nb,")") pc<- scan("", numeric(), 1) cat("How many row(s) ?") pl<-scan("", numeric(), 1) if(titlepar==F)par(mfrow=c(pl,pc),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(pl,pc),pty=ptypar) cat("Do you need curves, data or both? (c,d,b)") repeat{ plt<-scan("",character(),1) if(length(plt)==0){plt<-"d" break} if((plt=="c")|(plt=="d")|(plt=="b"))break } mintransf<-1e+18 maxtransf<-(-1e+18) yy<-list(NULL) for(i in ordreinv[1:nb]) { if(!(plt=="d")){ yy[[i]]<-spl[[i]]%*%AA[[j]][[i]] mintransf<-min(mintransf,min(yy[[i]])) maxtransf<-max(maxtransf,max(yy[[i]])) } else{mintransf<-min(mintransf,min(Transf[[j]][,i])) maxtransf<-max(maxtransf,max(Transf[[j]][,i])) } } for(i in ordreinv[1:nb]) { if(!(plt=="d")) { plot(xx[[i]],yy[[i]],ylim=c(mintransf,maxtransf),xlab=paste(dimnames(Xini)[[2]][i],sep=""),ylab="",type="n",cex=cexpar) points(xx[[i]],yy[[i]],type="l",col=colpar+3) } else plot(Xini[,i],Transf[[j]][,i],ylim=c(mintransf,maxtransf),xlab=paste(dimnames(Xini)[[2]][i],sep=""),ylab="",type="n") if(!(plt=="c")) { if(!typedata)points(Xini[, i],Transf[[j]][,i],pch=pchpar,cex=cexpar,col=colpar+7) else text(Xini[, i],Transf[[j]][,i],dimnames(Xini)[[1]],cex=cexpar,col=colpar+7) } noeudint[[i]]<-BsplineX$v[[i]][[2]][(BsplineX$v[[i]][[2]]!=min(BsplineX$v[[i]][[2]]))&(BsplineX$v[[i]][[2]]!=max(BsplineX$v[[i]][[2]]))] abline(v=noeudint[[i]],lty=2) } if(titlepar==T)mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,paste("L-S splines for ",dimnames(Yini)[[2]][j],sep="")) #dev.off() }### cat("Another reponse ? (y/n)") repo<-scan("",character(),1) if((length(repo)==0)|(repo!="y"))break }#fin du repeat } }#### if(fonct==F){#### cat("\n") cat("__________________________________________________________________\n") cat("\n") cat("Plots of the response residuals ? (y/n)") plt <- scan("", character(), 1) if( (length(plt)!=0)&&((plt=="y")|(plt=="Y"))) { if(titlepar==F)par(mfrow=c(1,pY),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(1,pY),pty=ptypar) variat<-range(Yini-Yhat) for(i in 1:pY) { sigmai<-sqrt(Dvar(Yini[,i]-Yhat[,i],D=D)$var) plot(Yhat[,i],Yini[,i]-Yhat[,i],ylim=c(min(variat[1],-2*sigmai),max(variat[2],2*sigmai)),xlab=paste("Fitted ",dimnames(Yinitial)[[2]][i],sep="") ,ylab="residuals",type="n",cex=cexpar) if(!typedata)points(Yhat[,i],Yini[,i]-Yhat[,i],pch=pchpar,cex=cexpar) else text(Yhat[,i],Yini[,i]-Yhat[,i],dimnames(Yinitial)[[1]],cex=cexpar,col=colpar+1) abline(h=0) abline(h=2*sigmai,lty=2) abline(h=-2*sigmai,lty=2) } if(titlepar==T)mtext(side=3,line=-1,cex=cexpar+0.7,outer=TRUE,"Residuals in Least-Squares splines") } cat("__________________________________________________________________\n") cat("plot of Yhat against Y (y/n)?") plt3 <- scan(,what="", 1) if((length(plt3)!=0)&&(plt3=="y")){ par(mfrow=c(1,1)) for(i in 1:pY) { plot(Yini[,i],Yhat[,i],xlab=dimnames(Yini)[[2]][i],ylab=paste("modeled ",dimnames(Yini)[[2]][i],sep=""),type="n",cex=cexpar) if(!typedata)points(Yini[,i],Yhat[,i],pch=pchpar,cex=cexpar) else text(Yini[,i],Yhat[,i],dimnames(Yini)[[1]],col=colpar+2) abline(a=0,b=1,col=colpar+3) } } cat("\n") cat("__________________________________________________________________\n") cat("\n") if(pY==2){ cat("Plot of the reconstituted responses (y/n)?") plt3 <- scan(,what="", 1) if((length(plt3)!=0)&&(plt3=="y")){ if(titlepar==F)par(mfrow=c(1,2),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(1,2),pty=ptypar) range1<-range(c(Yhat[, 1],Yini[, 1])) range2<-range(c(Yhat[, 2],Yini[, 2])) plot(Yhat[, 1],Yhat[,2],xlim=range1,ylim=range2,xlab=paste("fitted",dimnames(Yinitial)[[2]][1]),ylab=paste("fitted",dimnames(Yinitial)[[2]][2]),type="n",pch=pchpar,main="Reconstituted shape") if(!typedata)points(Yhat[, 1],Yhat[, 2],pch=pchpar,cex=cexpar) else text(Yhat[, 1],Yhat[, 2],dimnames(Yinitial)[[1]],col=colpar+(1:n)) plot(Yini[, 1],Yini[, 2],xlim=range1,ylim=range2,xlab=dimnames(Yinitial)[[2]][1],ylab=dimnames(Yini)[[2]][2],type="n",pch=pchpar,main="Original shape") if(!typedata)points(Yini[, 1],Yini[, 2],pch=pchpar,cex=cexpar) else text(Yini[, 1],Yini[, 2],dimnames(Yinitial)[[1]],col=colpar+(1:n)) if(titlepar==T)mtext(side=3,line=0,cex=2,outer=TRUE,"Least-Squares Splines") } } }#### }# #browser() if(impres&fonct)invisible(return(list(R2=R2,fit=fit,beta=beta,Bcod=Bcod,xx=xx,spl=spl,intknots=BsplineX$intknots,knots=BsplineX$nbni,GCV=GCV))) invisible( return(list(R2=R2,fit=fit,beta=beta,Bcod=Bcod,GCV=GCV))) } ####################################################### interactionij<-function(Bsplineresult,i,j) { Xij<-NULL for(k in 1:ncol(Bsplineresult$v[[i]][[1]])) Xij<-cbind(Xij,Bsplineresult$v[[i]][[1]][,k]*Bsplineresult$v[[j]][[1]]) return(Xij) } ####################################################### plss<-function(X,Y,Xtest,Ytest,standX=T,standY=T,D=1,A=2,degree=1,knots=0,equiknots=F,eps=1e-8,listknots, impres=T,interaction=NULL,listinteraction,cexpar=0.7,colpar=1,titlepar=T) { # version 9.6 programmee par JF Durand # Regression PLS sur codage spline des predicteurs # order<-degree +1 nomfichX<-deparse(substitute(X)) nomfichY<-deparse(substitute(Y)) Xinitial<-as.matrix(X) Yinitial<-as.matrix(Y) centrageX <- Dvar(Xinitial, D = D, cor = F) meanX <- centrageX$mean varX <- centrageX$var centrageY <- Dvar(Yinitial, D = D, cor = F) meanY <- centrageY$mean varY <- centrageY$var if(!missing(Xtest)){ centrageXtest <- Dvar(Xtest, D = D, cor = F) meanXtest <- centrageXtest$mean varXtest <- centrageXtest$var } n<-nrow(Xinitial) p<-ncol(Xinitial) q<-ncol(Yinitial) if(!missing(listknots)) { { if(length(listknots)!=p){cat("nb incorrect de vecteurs de noeuds\n") return()} knots<-vector("numeric",p) for(i in 1:p) { knots[i]<-length(listknots[[i]]) if(knots[i]!=0){ rangeknots<-range(listknots[[i]]) rangevar<-range(Xinitial[,i]) if((rangeknots[1]<=rangevar[1])|(rangeknots[2]>=rangevar[2])) { cat("range of interior knots out of range of variable ",dimnames(Xinitial)[[2]][i],"!!!\n") return(list(nomfichX="NON4")) }} } } } if(is.null(dimnames(Xinitial)))dimnames(Xinitial)<-list(format(1:n),paste("X",1:p,sep="")) if(is.null(dimnames(Yinitial)))dimnames(Yinitial)<-list(format(1:n),paste("Y",1:q,sep="")) if(length(dimnames(Xinitial)[[1]])==0) dimnames(Xinitial)[[1]]<- format(1:n) if(length(dimnames(Yinitial)[[1]])==0) dimnames(Yinitial)[[1]]<- format(1:n) if(length(dimnames(Xinitial)[[2]])==0) dimnames(Xinitial)[[2]]<- paste("X",1:p,sep="") if(length(dimnames(Yinitial)[[2]])==0) {cat("Affect name(s) to response(s)\n") prov<-as.vector(format(1:q)) for(i in 1:q) {cat(paste("response ",format(i),"\n")) prov[i]<-scan(quiet=T,,what="", 1) } dimnames(Yinitial)[[2]]<-prov } # centrage et reduction centX<-Dcentred(Xinitial,D=D) centY<-Dcentred(Yinitial,D=D) if(!missing(Xtest)) { Xinitest<-sweep(Xtest, 2,centX$moy) if(standX) Xinitest<- sweep(Xinitest,2,sqrt(centX$var),FUN="/") dimnames(Xinitest)<-dimnames(Xtest) } if(!missing(Ytest)) { Yinitest<-sweep(Ytest, 2,centY$moy) if(standY) Yinitest<-sweep(Yinitest,2,sqrt(centY$var),FUN="/") dimnames(Yinitest)<-dimnames(Ytest) } if(standX) Xini<-as.matrix(centX$Xcr) else Xini<-as.matrix(centX$Xc) if(standY) Yini<-as.matrix(centY$Xcr) else Yini<-as.matrix(centY$Xc) dimnames(Xini)<-dimnames(Xinitial) dimnames(Yini)<-dimnames(Yinitial) # calcul de la matrice de codage spline de Xini listknotsn<-list(NULL)#list of centered (possibly scaled) interior knots if(missing(listknots)){ listknotsn<-list(NULL) if(p>1) for(i in 1:(p-1))listknotsn<-c(listknotsn,list(NULL)) listknots<-listknotsn } else {if(p>1) for(i in 1:(p-1))listknotsn<-c(listknotsn,list(NULL)) for(i in 1:p){ if(!is.null(listknots[[i]])){ #if(centerX)listknotsn[[i]]<-listknots[[i]]-meanX[i] if(standX)listknotsn[[i]]<-(listknots[[i]]-meanX[i])/sqrt(varX[i]) else listknotsn[[i]]<-listknots[[i]]-meanX[i] } } } BsplineX <- Bsplinen(Xini,ordre=order,nbni=knots,noeudequi=equiknots, center=T,D=D,tt=listknotsn) itX <- BsplineX$v dimension<-BsplineX$ordre+BsplineX$nbni Bcod<-NULL for(i in 1:p)Bcod<-cbind(Bcod,itX[[i]][[1]]) Bcodintcum<-NULL if((missing(listinteraction))&(length(interaction)!=0)) { BsplineXint <- Bsplinen(Xini,ordre=order,nbni=knots,noeudequi=equiknots, center=F,D=D,tt=listknotsn) rint<-length(interaction) Bcodint<-list(NULL) Bcodintmean<-list(NULL) dimensionint<-matrix(0,rint,rint) namesint<-matrix("",rint,rint) namesint1<-NULL for(i in 1:(rint-1)) { Bcodint[[i]]<-list(NULL) for(j in (i+1):rint) { Bcodint[[i]][[j]]<-Dcenter(interactionij(BsplineXint,interaction[i],interaction[j]),D=D)$X Bcodintmean[[i]][[j]]<-Dcenter(interactionij(BsplineXint,interaction[i],interaction[j]),D=D)$mx Bcodintcum<-cbind(Bcodintcum,Bcodint[[i]][[j]]) namesint[i,j]<-paste(dimnames(Xini)[[2]][interaction[i]],"*",dimnames(Xini)[[2]][interaction[j]],sep="") namesint1<-c(namesint1,namesint[i,j]) dimensionint[i,j]<-dimension[interaction[i]]*dimension[interaction[j]] } }} if(!missing(listinteraction)) { namesint1<-NULL dimensionint<-rep(0,length(listinteraction)) BsplineXint <- Bsplinen(Xini,ordre=order,nbni=knots,noeudequi=equiknots, center=F,D=D,tt=listknotsn) Bcodint<-list(NULL) Bcodintmean<-list(NULL) for(i in 1:length(listinteraction)) { Bcodint[[i]]<-Dcenter(interactionij(BsplineXint,listinteraction[[i]][1],listinteraction[[i]][2]),D=D)$X dimensionint[i]<-ncol(Bcodint[[i]]) Bcodintcum<-cbind(Bcodintcum,Bcodint[[i]]) namesint1<-c(namesint1,paste(dimnames(Xini)[[2]][listinteraction[[i]][1]],"*",dimnames(Xini)[[2]][listinteraction[[i]][2]],sep="")) Bcodintmean[[i]]<-Dcenter(interactionij(BsplineXint,listinteraction[[i]][1],listinteraction[[i]][2]),D=D)$mx } } #browser() #cat(svd(Bcod)$d) if(!missing(Xtest)){ BsplineXtest <- Bsplinen(rbind(Xinitest,apply(Xini,2,min),apply(Xini,2,max)),ordre=order, nbni=knots,center=F,D=D,tt=BsplineX$intknots) itXtest <- BsplineXtest$v Bcodtest<-NULL for(i in 1:p)Bcodtest<-cbind(Bcodtest,sweep(itXtest[[i]][[1]][1:nrow(Xtest),,drop=F],2,BsplineX$meansc[[i]])) #add interactions if any!!!! } if(impres){ cat("\n") if(length(interaction)==0) cat("PLSS : LINEAR PLS ON SPLINE CODED PREDICTORS\n") else cat("PLSS : LINEAR PLS ON SPLINE CODED PREDICTORS + INTERACTIONS\n") cat("\n") } ######################### if(missing(Xtest))resulpls<-pls(X=cbind(Bcod,Bcodintcum),Y=Yini,standX=F,standY=F,A=A,D=D, splflag=T,impres=impres,graph=F,eps=eps,cexpar=cexpar,colpar=colpar,titlepar=titlepar) else {if(missing(Ytest)) resulpls<-pls(X=Bcod,Y=Yini,Xtest=Bcodtest,standX=F,standY=F,A=A,D=D,splflag=T,impres=impres, graph=F,eps=eps,cexpar=cexpar,colpar=colpar,titlepar=titlepar) else resulpls<-pls(X=Bcod,Y=Yini,Xtest=Bcodtest,Ytest=Yinitest,standX=F,standY=F,A=A,D=D,splflag=T, impres=impres,graph=F,eps=eps,cexpar=cexpar,colpar=colpar,titlepar=titlepar) } ######################### cat("\n") alph<-resulpls$alph R2<-resulpls$R2c pX<-p pY<-q noeudint<-list(NULL) axes<-resulpls$A corX<-standX corY<-standY covXY<-resulpls$covXY alphaX<-NULL compX<-resulpls$TX compY<-resulpls$UY cX<-resulpls$WX cY<-resulpls$CY BETALCR<-resulpls$BETALCR YH<-resulpls$YH Yhat<-matrix(0,n,pY) dimnames(Yhat)<-dimnames(Yini) VY<-NULL for(i in 1:axes){ Yhat<-Yhat+YH[[i]] VY<-c(VY,sum(Dvar(YH[[i]],D=D)$var)) } AA<-list(NULL) ranget<-list(NULL) Transf<-list(NULL) if(length(interaction)!=0) { rangetint<-list(NULL) Transfint<-list(NULL) AAint<-list(NULL) } for(j in 1:pY) { Transf[[j]]<-matrix(0,n,pX) ranget[[j]]<-rep(0,pX) if(length(interaction)!=0){ Transfint[[j]]<-array(0,c(n,rint,rint)) AAint[[j]]<-list(NULL) rangetint[[j]]<-matrix(0,rint,rint) } dd<-1 AA[[j]]<-list(NULL) for(i in 1:pX) { AA[[j]][[i]]<-as.matrix(BETALCR[[axes]][j,dd:(dd+dimension[i]-1)]) dd<-dd+dimension[i] Transf[[j]][,i]<-BsplineX$v[[i]][[1]]%*%AA[[j]][[i]] ranget[[j]][i]<-diff(range(Transf[[j]][,i])) } names(ranget[[j]])<-dimnames(Xini)[[2]] #browser() if(length(interaction)!=0) { for(i in 1:(rint-1)) { AAint[[j]][[i]]<-list(NULL) for(k in (i+1):rint){ AAint[[j]][[i]][[k]]<-as.matrix(BETALCR[[axes]][j,dd:(dd+dimensionint[i,k]-1)]) dd<-dd+dimensionint[i,k] Transfint[[j]][,i,k]<-Bcodint[[i]][[k]]%*%AAint[[j]][[i]][[k]] rangetint[[j]][i,k]<-diff(range(Transfint[[j]][,i,k])) dimnames(rangetint[[j]])<-list(dimnames(Xini)[[2]][interaction],dimnames(Xini)[[2]][interaction]) } } }#endifinteraction }#endjloop if(length(interaction)==0) { corYtrans<-matrix(0,q,pX) dimnames(corYtrans)<-list(dimnames(Yini)[[2]],dimnames(Xini)[[2]]) } else { corYtrans<-matrix(0,q,pX+rint*(rint-1)/2) dimnames(corYtrans)<-list(dimnames(Yini)[[2]],c(dimnames(Xini)[[2]],namesint1)) } for(j in 1:q) corYtrans[j,1:pX]<-cor(cbind(Yini[,j],Transf[[j]]))[2:(1+pX),1] if(length(interaction)!=0) { for(j in 1:q) {compt<-1 for(i in 1:(rint-1)) for(k in (i+1):rint) { corYtrans[j,pX+compt]<-cor(Yini[,j],Transfint[[j]][,i,k]) compt<-compt+1 }} } corYtrans<-round(corYtrans,2) #browser() if(impres){ if((!missing(Xtest))&(missing(Ytest))){ repeat{ cat("\n") cat("Prediction of the test sample according to the dimension ? (y/n)") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if(repo!="y")break { cat("Choose the dimension (<=",axes,")") dimopt<-scan(quiet=T,"",numeric(),1) AAt<-list(NULL) Transftest<-list(NULL) for(j in 1:pY) { Transftest[[j]]<-matrix(0,nrow(Xtest),pX) dd<-1 AAt[[j]]<-list(NULL) for(i in 1:pX) { AAt[[j]][[i]]<-as.matrix(BETALCR[[dimopt]][j,dd:(dd+dimension[i]-1)]) dd<-dd+dimension[i] Transftest[[j]][,i]<-sweep(BsplineXtest$v[[i]][[1]][1:nrow(Xtest),,drop=F],2,BsplineX$meansc[[i]],FUN="-")%*%AAt[[j]][[i]] } } Yaju<-matrix(0,nrow(Xtest),q) dimnames(Yaju)<-list(dimnames(Xtest)[[1]],paste("est.",dimnames(Yini)[[2]],sep="")) for(j in 1:pY) for(i in 1:pX) Yaju[,j]<-Yaju[,j]+Transftest[[j]][,i] for(i in 1:pY){ if(standY)Yaju[,i]<-Yaju[,i]*sqrt(centY$var[i])+centY$moy[i] else Yaju[,i]<-Yaju[,i]+centY$moy[i] } print(Yaju) #browser() } #else break }##fin du repeat }#fin du if missing Xtest if((!missing(Xtest))&(!missing(Ytest))) { repeat{#2 cat("\n") cat("Validation of the model with the test sample according to the dimension ? (y/n)") repospe<-scan(quiet=T,"",character(),1) if((length(repospe)!=0)&&((repospe=="y")|(repospe=="Y"))) { Yerr<-as.list(1:axes) Yajust<-as.list(1:axes) ERRMOY<-vector("numeric",axes) for(dimopt in 1:axes) { Yerr[[dimopt]]<-matrix(0,nrow(Ytest),ncol(Ytest)) AAt<-list(NULL) Transftest<-list(NULL) for(j in 1:pY) { Transftest[[j]]<-matrix(0,nrow(Xtest),pX) dd<-1 AAt[[j]]<-list(NULL) for(i in 1:pX) { AAt[[j]][[i]]<-as.matrix(BETALCR[[dimopt]][j,dd:(dd+dimension[i]-1)]) dd<-dd+dimension[i] Transftest[[j]][,i]<-sweep(BsplineXtest$v[[i]][[1]][1:nrow(Xtest),,drop=F],2,BsplineX$meansc[[i]],FUN="-")%*%AAt[[j]][[i]] } } Yaju<-matrix(0,nrow(Ytest),q) dimnames(Yaju)<-list(dimnames(Ytest)[[1]],paste("est.",dimnames(Ytest)[[2]],sep="")) Yajustandar<-Yaju for(j in 1:pY) for(i in 1:pX) Yajustandar[,j]<-Yajustandar[,j]+Transftest[[j]][,i] #browser() for(i in 1:pY){ if(standY)Yaju[,i]<-Yajustandar[,i]*sqrt(centY$var[i])+centY$moy[i] else Yaju[,i]<-Yajustandar[,i]+centY$moy[i] } Yajust[[dimopt]]<-Yaju Yerr[[dimopt]]<- Yinitest-Yajustandar ERRMOY[dimopt]<-mean(apply((Yerr[[dimopt]])^2,2,mean)) }#fin du for sur dimopt cat("Mean Squared Errors of the standardized response(s) according to the dimension\n") errmoy<-matrix(ERRMOY,1,axes) dimnames(errmoy)<-list("MSE",1:axes) print(round(errmoy,7)) par(mfrow=c(1,1)) if(titlepar) plot(errmoy[1,],xlab="Model Dim.",ylab="MSE",type="l",main=paste("Opt. Dim. ",order(errmoy[1,])[1]," , MSE(",order(errmoy[1,])[1],") = ",round(errmoy[1,order(errmoy[1,])[1]],7),sep="")) else plot(errmoy[1,],xlab="Model Dim.",ylab="MSE on test data",type="l",sep="") points(errmoy[1,],pch=10,cex=cexpar+0.7) cat("Choose the dimension (<=",axes,")") dimoptopt<-scan(quiet=T,"",numeric(),1) cat("Estimated Responses (Yest) as well as Errors (Yerr = Ytest - Yest) \n") dimnames(Yerr[[dimoptopt]])[[2]]<-paste("err.",dimnames(Ytest)[[2]],sep="") Yaff<-NULL #browser() for(i in 1:ncol(Ytest))Yaff<-cbind(Yaff,as.matrix(cbind(Yajust[[dimoptopt]][,i,drop=F],Ytest[,i,drop=F]-Yajust[[dimoptopt]][,i,drop=F]))) print(round(Yaff,4)) if(sum(Ytest==0)==0){ cat("Relative Errors in %: |Yerr|/|Ytest|*100\n") erreur100<-abs(Ytest-Yajust[[dimoptopt]])/abs(Ytest)*100 erreur100<-rbind(erreur100,apply(erreur100,2,mean),sqrt(apply(erreur100,2,var))) dimnames(erreur100)<-list(c(dimnames(Ytest)[[1]],"Mean","Stdv"),dimnames(Ytest)[[2]]) print(round(erreur100,2)) } cat("\n") cat(paste("MSE(",dimoptopt,")=",round(errmoy[1,dimoptopt],4),sep=""),"\n") cat("plot of estimated versus observed Ytest\n") if(q>1){ cat("How many plots per row (<=",q,")?\n") colplots<-scan(quiet=T,"",numeric(),1) cat("How many rows?\n") rowplots<-scan(quiet=T,"",numeric(),1) par(mfrow=c(rowplots,colplots)) } else par(mfrow=c(1,1)) par(pty="s") for(i in 1:q) { if(titlepar) { if(sum(Ytest==0)==0){ if(abs(sum(Yini^2)-sum(Yinitest^2))1, des NA sinon . # center, boolleen indiquant le centrage # ordre . # nbni . # noeudequi . # intknots liste des vecteurs des noeuds interieurs # si center=T, sortie des vecteurs D-moyennes des colonnes de X (mx) # et de v[[.]][[1]] #________________________________________________________________________________ X <- as.matrix(X) #cat("Spline transformations of columns are now being computed...\n") #-------------------------------------------------------------------------------- #centrage eventuel de X if(center == T) { trans <- Dcenter(X, D = D) X <- trans$X mx <- trans$mx } #--------------------------------------------------------------------------------- ordre <- as.vector(ordre) if(length(ordre) == 1) ordre <- rep(ordre, ncol(X)) nbni <- as.vector(nbni) if(length(nbni) == 1) nbni <- rep(nbni, ncol(X)) noeudequi <- as.vector(noeudequi) if(length(noeudequi) == 1) noeudequi <- rep(noeudequi, ncol(X)) v <- list(NULL) if(missing(tt)) {tt <- list(NULL) if(ncol(X)>1)for(i in 1:(ncol(X)-1))tt<-c(tt,list(NULL)) } meansc <- list(NULL) intknots<-list(NULL) for(i in 1:ncol(X)) {## #cat("* ") if(diff(range(X[,i]))==0) { if(!is.null(dimnames(X)[[2]])) stop(paste("The variable '",dimnames(X)[[2]][i],"' is constant, remove it!!\n",sep="")) else stop(paste("The variable number ",i," is constant, remove it!!\n",sep="")) } nod<-noeuds(X[,i],nbn=nbni[i],ord=ordre[i],equi=noeudequi[i],tt=tt[[i]]) a<-vector("numeric",ordre[i]+nbni[i]) if((ordre[i]>1)&(!is.null(nod)))for(j in 1:(ordre[i]+nbni[i]))a[j]<-mean(nod[(j+1):(j+ordre[i]-1)]) else a<-rep(NA,ordre[i]+nbni[i]) intknots[[i]]<-nod[(ordre[i]+1):(ordre[i]+nbni[i])] if(ordre[i]!=1) { if(is.null(nod))v[i]<-list(list(bs(X[,i],knots=NULL,intercept=T,degree=ordre[i]-1),nod,a)) else v[i]<-list(list(bs(X[,i],knots=nod[(ordre[i]+1):(ordre[i]+nbni[i])],intercept=T,degree=ordre[i]-1),nod,a)) } else { if(is.null(nod))v[i]<-list(list(matrix(1,nrow(X),1),nod,a)) else { breakpoints=list(nod[(ordre[i]+1):(ordre[i]+nbni[i])]) if(sum(diff(breakpoints[[1]])>0) 1)for(j in 2:resulpls$axes)R2tab[j,]<-R2tab[j,]+R2tab[j-1,] print(R2tab) cat("______________________________________________________________________\n") cat("additive influence of the predictors on a response? (y/n)") plt <- scan(quiet=T,"", character(), 1) if( (length(plt)!=0)&&((plt=="y")|(plt=="Y"))) { noeudint<-list(NULL) repeat {# if(pY==1)j<-1 else{ cat("The number of the reponse variable?,(<=",pY,")") j<- scan(quiet=T,"", numeric(), 1) } if(titlepar==F)par(mfrow=c(1,1),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(1,1),pty=ptypar) if((length(interactionpar)==0)& is.null(resulpls$listinteraction)) { if(pX>1){ if(pX<31) { xxf<-resulpls$Transf[[j]][,1] catf<-rep(1,n) if(pX>1)for(i in 2:pX){xxf<-c(xxf,resulpls$Transf[[j]][,i]) catf<-c(catf,rep(i,n)) } tecno24<-split(xxf,catf) if(pX>30) boxplot(tecno24,sub="predictors",ylab="transf. data",cex=cexpar) else boxplot(tecno24,sub="",ylab="transf. data",cex=cexpar,names=dimnames(resulpls$Xini)[[2]]) if(titlepar==T)mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,dimnames(resulpls$Yini)[[2]][j]) } cat("\n") cat("Influence of the predictors on the response:",dimnames(resulpls$Yini)[[2]][j]," \n") class<-NULL class<-matrix(0,1,pX) class[1,]<-round(rev(sort(resulpls$ranget[[j]])),5) ordreinv<-rev(order(rank((resulpls$ranget[[j]])),1:pX)) dimnames(class)<-list("",paste(dimnames(resulpls$Xini)[[2]][ordreinv],"(",format(ordreinv),")",sep="")) cat("\n") cat("Range of the transformed predictors in descending order:\n") #cat("\n") print(class) normedclass=class[1,]/class[1,1] if(any(normedclass<0.25)) { cat("------------\n") cat("Suggestion: remove the predictors (threshold 25%) :\n") cat(dimnames(class)[[2]][normedclass<0.25],"\n") cat("------------\n") } cat("A look at coordinate functions? (y/n)") plt1 <- scan(quiet=T,"", character(), 1) if( (length(plt1)!=0)&&((plt1=="y")|(plt1=="Y"))) plotflag<-T else plotflag<-F #cat("\n") if(pX>1){ if(pX<=30){ if(any(normedclass<0.25)) barplot(class[1,],xlab="range of the transf. predictors",names=dimnames(resulpls$Xini)[[2]][ordreinv], ylab="",density=20,space=1.4,horiz=T,col=c(rep("blue",ncol(class))[normedclass>=0.25],rep("red",ncol(class))[normedclass<0.25]),cex.names=cexpar,cex.axis=cexpar) else barplot(class[1,],xlab="range of the transf. predictors",names=dimnames(resulpls$Xini)[[2]][ordreinv], ylab="",density=20,space=1.4,horiz=T,col="blue",cex.names=cexpar,cex.axis=cexpar) } if(titlepar==T)mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,paste("Predictors' Influence on",dimnames(resulpls$Yini)[[2]][j],",",format(axes),"Dim.")) } if(plotflag){ cat("Ordered predictors? (y/n)") plt <- scan(quiet=T,"", character(), 1) if( (length(plt)!=0)&&((plt=="y")|(plt=="Y"))) ordrepredict<-ordreinv else ordrepredict<-1:pX #{### cat("How many predictors?,(<=",pX,")") nb<- scan(quiet=T,"", numeric(), 1) cat("How many plots on a row ?,(<=",nb,")") pc<- scan(quiet=T,"", numeric(), 1) cat("How many row(s) ?") pl<-scan(quiet=T,"", numeric(), 1) }#endplotflag } else {pl<-1 pc<-1 nb<-1 ordrepredict<-1} if(plotflag){### if(titlepar==F)par(mfrow=c(pl,pc),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(pl,pc),pty=ptypar) cat("Do you need curves, data or both? (c,d,b)") repeat{ plt<-scan(quiet=T,"",character(),1) if(length(plt)==0){plt<-"d" break} if((plt=="c")|(plt=="d")|(plt=="b"))break } mintransf<-1e+18 maxtransf<-(-1e+18) for(i in ordrepredict[1:nb]) { if(!(plt=="d")){ A[[i]]<-resulpls$AA[[j]][[i]] yy[[i]]<-spl[[i]]%*%A[[i]] mintransf<-min(mintransf,min(yy[[i]])) maxtransf<-max(maxtransf,max(yy[[i]])) } else{mintransf<-min(mintransf,min(resulpls$Transf[[j]][,i])) maxtransf<-max(maxtransf,max(resulpls$Transf[[j]][,i])) } } for(i in ordrepredict[1:nb]) { if(pX==1){ if(pX!=1) plot(resulpls$Xini[,i],resulpls$Yini[,j],xlab=paste(dimnames(Xini)[[2]][i],sep=""),ylab="",type="n",cex=cexpar) text(resulpls$Xini[,i],resulpls$Yini[,j],dimnames(Xini)[[1]],cex=cexpar,col=colpar+3) } if(!(plt=="d")){ if(pX!=1) plot(xx[[i]],yy[[i]],ylim=c(mintransf,maxtransf),xlab=paste(dimnames(Xini)[[2]][i],sep=""),ylab="",type="n",cex=cexpar) points(xx[[i]],yy[[i]],type="l",col=colorpar+1) } else plot(Xini[,i],resulpls$Transf[[j]][,i],ylim=c(mintransf,maxtransf),xlab=dimnames(Xini)[[2]][i],ylab="",type="n") if(!(plt=="c")) { if(!typedata)points(Xini[, i],resulpls$Transf[[j]][,i],pch=pchpar,cex=cexpar,col=colorpar+3) else text(Xini[, i],resulpls$Transf[[j]][,i],dimnames(Xini)[[1]],cex=cexpar,col=colorpar+3) } if(resulpls$BsplineX$nbni[i]!=0){ noeudint[[i]]<-BsplineX$v[[i]][[2]][(BsplineX$v[[i]][[2]]!=min(BsplineX$v[[i]][[2]]))&(BsplineX$v[[i]][[2]]!=max(BsplineX$v[[i]][[2]]))] abline(v=noeudint[[i]],lty=2)} } if(titlepar==T)mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,paste("Predictors' influence on ",dimnames(Yini)[[2]][j]," (",axes," dim.)",sep="")) }### }#endofinternull else { cat("\n") cat("Influence of the predictors on the response:",dimnames(resulpls$Yini)[[2]][j]," \n") class<-NULL rangetint1<-NULL if(length(interactionpar)!=0) { for(i in 1:(rint-1)) for(k in (i+1):rint) rangetint1<-c(rangetint1,rangetint[[j]][i,k]) names(rangetint1)<-namesint1 } else rangetint1<-rangetint[[j]] #cat("titi\n") #browser() class<-matrix(0,1,sum(resulpls$Xvariables)+interactnumber) class[1,]<-round(rev(sort(c(resulpls$ranget[[j]],rangetint1))),5) ordreinv<-rev(order(rank(c(resulpls$ranget[[j]][resulpls$Xvariables],rangetint1)),1:(sum(resulpls$Xvariables)+interactnumber))) dimnames(class)<-list("",paste(c(dimnames(resulpls$Xini)[[2]][resulpls$Xvariables],namesint1)[ordreinv]," ")) cat("\n") cat("Range of the transformed predictors in descending order:\n") cat("\n") print(class) normedclass=class[1,]/class[1,1] if(any(normedclass<0.25)) { cat("------------\n") cat("Suggestion: remove the predictors (threshold 25%) :\n") cat(dimnames(class)[[2]][normedclass<0.25],"\n") cat("------------\n") } cat("A look at coordinate functions? (y/n)") plt1 <- scan(quiet=T,"", character(), 1) if( (length(plt1)!=0)&&((plt1=="y")|(plt1=="Y"))) plotflag<-T else plotflag<-F #cat("\n") if(sum(resulpls$Xvariables)+interactnumber<=30){ if(any(normedclass<0.25)) barplot(class[1,],xlab="range of the transf. predictors",names=dimnames(class)[[2]],ylab="", density=20,space=1.4,horiz=T,col=c(rep("blue",ncol(class))[normedclass>=0.25],rep("red",ncol(class))[normedclass<0.25]),cex.names=cexpar,cex.axis=cexpar) else barplot(class[1,],xlab="range of the transf. predictors",names=dimnames(class)[[2]],ylab="", density=20,space=1.4,horiz=T,col="blue",cex.names=cexpar,cex.axis=cexpar) if(titlepar==T)mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,paste("Predictors' Influence on",dimnames(resulpls$Yini)[[2]][j],",",format(axes),"Dim.")) } if(plotflag){ cat("Coordinate function plots of the decreasing influential effects\n") ordrepredict<-ordreinv cat("How many predictors?,(<=",sum(resulpls$Xvariables)+interactnumber,")") nb<- scan(quiet=T,"", numeric(), 1) cat("How many plots on a row ?,(<=",nb,")") pc<- scan(quiet=T,"", numeric(), 1) cat("How many row(s) ?") pl<-scan(quiet=T,"", numeric(), 1) if(titlepar==F)par(mfrow=c(pl,pc),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(pl,pc),pty=ptypar) mintransf<-1e+18 maxtransf<-(-1e+18) grille<-matrix(0,nbpoints,nbpoints) for(i in ordrepredict[1:nb]) { if(i<=sum(resulpls$Xvariables)) { A[[((1:pX)[resulpls$Xvariables])[i]]]<-resulpls$AA[[j]][[((1:pX)[resulpls$Xvariables])[i]]] yy[[((1:pX)[resulpls$Xvariables])[i]]]<-spl[[((1:pX)[resulpls$Xvariables])[i]]]%*%A[[((1:pX)[resulpls$Xvariables])[i]]] mintransf<-min(mintransf,min(yy[[((1:pX)[resulpls$Xvariables])[i]]])) maxtransf<-max(maxtransf,max(yy[[((1:pX)[resulpls$Xvariables])[i]]])) }#endif i else {ii<-i-sum(resulpls$Xvariables) grille<-surfaceinter(splnc[[listinteraction[[ii]][1]]],splnc[[listinteraction[[ii]][2]]],resulpls$Bcodintmean[[ii]],resulpls$AAint[[j]][[ii]]) mintransf<-min(mintransf,min(grille)) maxtransf<-max(maxtransf,max(grille)) #browser() } }#endfor for(i in ordrepredict[1:nb]) { if(i<=sum(resulpls$Xvariables)) { plot(xx[[((1:pX)[resulpls$Xvariables])[i]]],yy[[((1:pX)[resulpls$Xvariables])[i]]],ylim=c(mintransf,maxtransf),xlab=paste(dimnames(Xini)[[2]][((1:pX)[resulpls$Xvariables])[i]],sep=""),ylab="",type="n",cex=cexpar) points(xx[[((1:pX)[resulpls$Xvariables])[i]]],yy[[((1:pX)[resulpls$Xvariables])[i]]],type="l",col=colorpar+1) text(Xini[, ((1:pX)[resulpls$Xvariables])[i]],resulpls$Transf[[j]][,((1:pX)[resulpls$Xvariables])[i]],dimnames(Xini)[[1]],cex=cexpar,col=colorpar+3) if(resulpls$BsplineX$nbni[((1:pX)[resulpls$Xvariables])[i]]!=0){ noeudint[[((1:pX)[resulpls$Xvariables])[i]]]<-BsplineX$v[[((1:pX)[resulpls$Xvariables])[i]]][[2]][(BsplineX$v[[((1:pX)[resulpls$Xvariables])[i]]][[2]]!=min(BsplineX$v[[((1:pX)[resulpls$Xvariables])[i]]][[2]]))&(BsplineX$v[[((1:pX)[resulpls$Xvariables])[i]]][[2]]!=max(BsplineX$v[[((1:pX)[resulpls$Xvariables])[i]]][[2]]))] abline(v=noeudint[[((1:pX)[resulpls$Xvariables])[i]]],lty=2)} }#endif i else {ii<-i-sum(resulpls$Xvariables) plot(1:10,type="n",xlab=paste(namesint1[ii],"interaction"),ylab="",axes=F,cex=cexpar) par(new=T) grille<-surfaceinter(splnc[[listinteraction[[ii]][1]]], splnc[[listinteraction[[ii]][2]]],resulpls$Bcodintmean[[ii]],resulpls$AAint[[j]][[ii]]) zout<-persp(x=xx[[listinteraction[[ii]][1]]],y=xx[[listinteraction[[ii]][2]]],z=grille, xlab=dimnames(Xini)[[2]][listinteraction[[ii]][1]],ylab=dimnames(Xini)[[2]][listinteraction[[ii]][2]], zlab="",zlim=c(mintransf,maxtransf),theta=thetapar,phi=phipar,r=rpar,cex=cexpar-0.1,col=colpar+2, ticktype="detailed") par(new=F) } }#endfor if(titlepar==T)mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,paste("Main Effects and Interactions on ",dimnames(Yini)[[2]][j]," (",axes," Dim.)",sep="")) #browser() } }#endofinter if(ncol(Yini)>1){ cat("Another reponse ? (y/n)") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if(repo!="y")break } else break }#fin du repeat } } ################################################ plss.plot<-function(resulpls,Xtest,ptypar="s",typedata=T,titlepar=T,pchpar=1,cexpar=0.7, nbpoints=50,colpar=1,askpar=T,qual,names.qual) # # DESCRIPTION # give usual plots for interpreting the results of the PLSS method. # # ARGUMENTS # # resulpls S-PLUS object returned from the plss function. # Xtest test sample matrix, missing data are not allowed. The number of columns should # be the same as the number of columns of the predictor matrix for the plss # function. If Xtest is not missing, the Y-predicted matrix is Ypred. #VALUE # # no object except when Xtest is not missing. When Xtest is present, a list # containing the folllowing component: # Ypred Y predicted matrix. The number of columns is the same of the number of # columns of the response matrix for the plss function. The number of rows is # the same as in Xtest. { colorpar<-colpar Xini<-resulpls$Xini Yini<-resulpls$Yini pX<-ncol(Xini) pY<-ncol(Yini) n<-nrow(Xini) A<-list(NULL) noeudint<-list(NULL) axes<-resulpls$axes corX<-resulpls$corX corY<-resulpls$corY covXY<-resulpls$covXY compX<-resulpls$compX dimnames(compX)[[1]]=dimnames(Xini)[[1]] compY<-resulpls$compY cX<-resulpls$cX cY<-resulpls$cY Yhat<-resulpls$Yhat BsplineX<-resulpls$BsplineX itX <- BsplineX$v if(!missing(Xtest)){ Xinitest<-sweep(Xtest, 2,resulpls$meanX) if( sum(Dvar(resulpls$Xini,cor=F)$var)==ncol(Xtest)) Xinitest<- sweep(Xinitest,2,sqrt(resulpls$varX),FUN="/") dimnames(Xinitest)<-dimnames(Xtest) BsplineXtest <- Bsplinen(rbind(Xinitest,apply(resulpls$Xini,2,min),apply(resulpls$Xini,2,max)), ordre=resulpls$BsplineX$ordre,nbni=resulpls$BsplineX$nbni,center=F,D=D,tt=resulpls$BsplineX$intknots) itXtest <- BsplineXtest$v Bcodtest<-NULL for(i in 1:ncol(Xtest))Bcodtest<-cbind(Bcodtest,sweep(itXtest[[i]][[1]][1:nrow(Xtest),,drop=F],2,resulpls$BsplineX$meansc[[i]])) } if(!missing(qual)) { if(sum(dim(qual))>n) { indic=rep(0,n) indic=qual[,1] qual=indic } } dimension<-resulpls$dimension interactionpar<-resulpls$interaction if(length(interactionpar)!=0) { dimensionint<-resulpls$dimensionint rint<-length(interactionpar) rangetint<-resulpls$rangetint namesint1<-resulpls$namesint1 corYtrans<-resulpls$corYtrans } #browser() yy<-list(NULL) nom<-deparse(substitute(resulpls)) par(ask=askpar) varX<-resulpls$varX varX<-as.matrix(varX) varX1<-round(cbind(resulpls$meanX,sqrt(varX)),2) dimnames(varX1)<-list(dimnames(resulpls$Xini)[[2]],c(" mean","st.dv.")) varY<-resulpls$varY varY<-as.matrix(varY) varY1<-round(cbind(resulpls$meanY,sqrt(varY)),2) dimnames(varY1)<-list(dimnames(resulpls$Yini)[[2]],c(" mean","st.dv.")) cat(" General information on the data? (y/n)") general<- scan(quiet=T,"", character(), 1) if(length(general)==0)general<-"n" cat("\n") cat("__________________________________________________________________\n") cat("\n") if((general=="y")|(general=="Y")) { cat("\n") cat("__________________________________________________________________\n") cat("\n") cat(paste(" File of the PLSS results: ",nom,"\n")) cat("__________________________________________________________________\n") cat("\n") cat(paste(" 'X' data file:",resulpls$nomfichX,"\n")) cat(paste(format(n)," objects, ",format(pX)," variables","\n")) if(resulpls$corX)cat("The data are scaled\n") else cat("The data are centered\n") cat("------------------------------------------------------------------\n") cat(paste("'Y' data file:",resulpls$nomfichY,"\n")) cat(paste(format(n)," objects, ",format(pY)," variables","\n")) if(resulpls$corY)cat("The data are scaled\n") else cat("The data are centered\n") cat("------------------------------------------------------------------\n") cat("mean and stand. dev. of the X variables: \n") cat("\n") print(t(varX1)) cat("------------------------------------------------------------------\n") cat("mean and stand. dev. of the Y variables : \n") cat("\n") print(t(varY1)) cat("------------------------------------------------------------------\n") cat("Nature of the B-splines :\n") cat("\n") for(i in 1:pX){ if(resulpls$knots[[i]]!=0){ if(length(resulpls$listknots[[i]])==0) { if(resulpls$BsplineX$noeudequi[i])noeudqual<-", equally spaced\n" else noeudqual<-",quantiles\n" } else{ noeudqual<-", imposed\n" nombrenoeuds<-length(resulpls$listknots[[i]]) } } else{ nombrenoeuds<-0 noeudqual<-" NULL (polynomial)\n" } if(noeudqual==", imposed\n") cat(dimnames(resulpls$Xini)[[2]][i],":"," degree: ",resulpls$BsplineX$ordre[i]-1,"; knots: nb ",resulpls$knots[i],", location",format(resulpls$listknots[[i]]),"\n") else cat(dimnames(resulpls$Xini)[[2]][i],":"," degree: ",resulpls$BsplineX$ordre[i]-1,"; knots: nb ",resulpls$knots[i],", location",noeudqual) } cat("------------------------------------------------------------------\n") cat(paste("number of components : ",resulpls$axes,"\n")) cat("------------------------------------------------------------------\n") cat(paste("Optimal covariances of the (t,u): ")) cat(round(resulpls$covXY,3)) cat("\n") cat("______________________________________________________________________\n") } xx<-list(NULL) spl<-list(NULL) tt<-rep(list(NULL),pX) ttint<-rep(list(NULL),pX) for(i in 1:pX)if(resulpls$knots[i]!=0)tt[[i]]<-BsplineX$v[[i]][[2]] for(i in 1:pX)if(length(tt[[i]])!=0)ttint[[i]]<-tt[[i]][(BsplineX$ordre[i]+1):(BsplineX$ordre[i]+BsplineX$nbni[i])] for(i in 1:pX){ #cat(i," ") minXini<-min(Xini[, i]) maxXini<-max(Xini[, i]) xx[[i]]<-seq(minXini,maxXini,length=nbpoints) spl[[i]]<-sweep(Bsplinen(xx[[i]],ordre=BsplineX$ordre[i],nbni=BsplineX$nbni[i],D=D,tt=list(ttint[[i]]))$v[[1]][[1]],2,BsplineX$meansc[[i]]) varX<-Dvar(resulpls$compX,cor=T,D=resulpls$D) varY<-Dvar(resulpls$compY,cor=T,D=resulpls$D) varx<-Dvar(resulpls$compX,D=resulpls$D)$V vary<-Dvar(resulpls$compY,D=resulpls$D)$V CompXN<-varX$U CompYN<-varY$U } #cat("\n") inertreconst<-round(sum(diag(Dvar(resulpls$Yhat,D=resulpls$D)$V)),4) varYini<-Dvar(resulpls$Yini,D=resulpls$D)$V suminer<-round(sum(diag(varYini)),4) cat(paste("Y variance =",suminer,", reconstituted variance =",inertreconst,"\n")) valtab<-matrix(0,resulpls$axes,3) dimnames(valtab)<-list(format(1:resulpls$axes),c(" Y var."," % "," cumul. %")) valtab[,1]<-round(resulpls$VY,digits=4) valtab[,2]<-round(resulpls$VY/suminer*100,digits=2) for(i in 1:resulpls$axes)valtab[i,3]<-sum(valtab[1:i,2]) print(valtab) cat("______________________________________________________________________\n") cat("\n") cat("R2 of the responses on the t subspace dimensions\n") R2tab<-matrix(0,resulpls$axes,pY) dimnames(R2tab)<-list(paste(format(1:resulpls$axes)," "),dimnames(resulpls$Yini)[[2]]) for(k in 1:resulpls$axes) for(j in 1:pY) R2tab[k,j]<-round(as.numeric(Dcp(resulpls$YH[[k]][,j],D=resulpls$D))/varYini[j,j],3) if(resulpls$axes > 1)for(j in 2:resulpls$axes)R2tab[j,]<-R2tab[j,]+R2tab[j-1,] print(R2tab) if(sum(apply(resulpls$Yinitial,1,sum))==nrow(resulpls$Xini)) { cat("______________________________________________________________________\n") indicator=rep(0,nrow(resulpls$Xini)) for(i in 1:nrow(resulpls$Xini)) for(j in 1:ncol(resulpls$Yinitial)) if(resulpls$Yinitial[i,j]==1)indicator[i]=j #browser() #affectresul=discri(resulpls$compX,Xbool=resulpls$Yinitial) affectresul=discri(resulpls$compX,indicator,ResponseName=dimnames(resulpls$Yinitial)[[2]],graph=F) cat("\n") cat("table of real and predicted training groups :\n") print(affectresul$tableau) dimnames(resulpls$Xini)[[1]]=indicator qual=indicator names.qual=dimnames(Yini)[[2]] cat("Percentage of missclassified individuals :",round(100*(nrow(resulpls$Xini)-sum(diag(affectresul$tableau)))/nrow(resulpls$Xini),2),"%\n") } cat("______________________________________________________________________\n") cat("additive influence of the predictors on a component? (y/n)") plt <- scan(quiet=T,"", character(), 1) cat("\n") if( (length(plt)!=0)&&((plt=="y")|(plt=="Y"))) {#1 repeat { cat("The number of the t component ?,(<=",axes,")") j<- scan(quiet=T,"", numeric(), 1) cat("......................................\n") if(titlepar==F)par(mfrow=c(1,1),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(1,1),pty=ptypar) tttj<-list(NULL) # liste des transformes des predicteurs expliquant additivement tj rangetttj<-rep(0,pX) dd<-1 for(i in 1:pX) { tttj[[i]]<-itX[[i]][[1]]%*%resulpls$alph[[j]][dd:(dd+dimension[i]-1),1] dd<-dd+dimension[i] rangetttj[i]<-max(tttj[[i]])-min(tttj[[i]]) } orderinv<-rev(order(rangetttj)) #cat("\n") classwTX<-matrix(0,1,pX) classwTX[1,]<-round(rev(sort(rangetttj)),4) dimnames(classwTX)<-list("",paste(dimnames(Xini)[[2]][orderinv],"(",format(orderinv),")",sep="")) #cat("\n") cat("Range of the transformed predictors in descending order:\n") #cat("\n") print(classwTX) #cat("\n") if(pX>1){ if(pX<=30){ barplot(classwTX[1,],xlab="range of the transf. predictors",horiz=T,names=dimnames(Xini)[[2]][orderinv], density=20,space=1.4,col=1:pX,cex.names=cexpar,cex.axis=cexpar) #legend(locator(1),paste(format(orderinv)," ",dimnames(resulpls$Xini)[[2]][orderinv]),col=1:pX,ncol=2,bty="n",cex=cexpar) if(titlepar==T)mtext(side=3,line=0,cex=2,outer=TRUE,paste("Contributions of the predictors to t",j,sep="")) }} if(pX>1){ cat("Coordinate function plots for t",j," :\n",sep="") cat("Ordered predictors? (y/n)") plt <- scan(quiet=T,"", character(), 1) if( (length(plt)!=0)&&((plt=="y")|(plt=="Y"))) ordrepredict<-orderinv else ordrepredict<-1:pX #{### cat("How many predictors?,(<=",pX,")") nb<- scan(quiet=T,"", numeric(), 1) cat("How many plots on a row ?,(<=",nb,")") pc<- scan(quiet=T,"", numeric(), 1) cat("How many row(s) ?") pl<-scan(quiet=T,"", numeric(), 1) } else { pl<-1 pc<-1 nb<-1 ordrepredict<-1} if(titlepar==F)par(mfrow=c(pl,pc),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(pl,pc),pty=ptypar) cat("Do you need curves, data or both? (c,d,b)") repeat{ plt<-scan(quiet=T,"",character(),1) if(length(plt)==0){plt<-"d" break} if((plt=="c")|(plt=="d")|(plt=="b"))break } minTX<-1e+18 maxTX<-(-1e+18) for(i in ordrepredict[1:nb]){ if(!(plt=="d")){ if(i>1) { dd<-0 for(ii in 1:(i-1)) dd<-dd+dimension[ii] dd<-dd+1 } else {dd<-1} A[[i]]<-resulpls$alph[[j]][dd:(dd+dimension[i]-1),1,drop=F] yy[[i]]<-spl[[i]]%*%A[[i]] minTX<-min(minTX,min(yy[[i]])) maxTX<-max(maxTX,max(yy[[i]])) } else {minTX<-min(minTX,min(tttj[[i]])) maxTX<-max(maxTX,max(tttj[[i]])) } } #browser() for(i in ordrepredict[1:nb]) { if(!(plt=="d")) { plot(xx[[i]],yy[[i]],ylim=c(minTX,maxTX),xlab=dimnames(Xini)[[2]][i],ylab="", cex=cexpar,type="n") points(xx[[i]],yy[[i]],type="l",col=colorpar+1) } else plot(Xini[, i],tttj[[i]],ylim=c(minTX,maxTX), xlab=dimnames(Xini)[[2]][i],ylab="",type="n",cex=cexpar) if(!(plt=="c")) { if(!typedata)points(Xini[, i],tttj[[i]],pch=pchpar,cex=cexpar,col=colorpar+1) else { if(!is.null(qual)) text(Xini[, i],tttj[[i]],dimnames(Xini)[[1]],cex=cexpar,col=qual+1) else text(Xini[, i],tttj[[i]],dimnames(Xini)[[1]],cex=cexpar,col=colpar+1) } } if(resulpls$BsplineX$nbni[i]!=0){ noeudint[[i]]<-BsplineX$v[[i]][[2]][(BsplineX$v[[i]][[2]]!=min(BsplineX$v[[i]][[2]]))&(BsplineX$v[[i]][[2]]!=max(BsplineX$v[[i]][[2]]))] abline(v=noeudint[[i]],lty=2)} } if(titlepar==T)mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,paste("Predictors' influence on t",j,sep="")) #}### cat("......................................\n") cat("Another t component ? (y/n)") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if(repo!="y")break }#fin du repeat }#1 cat("\n") cat("__________________________________________________________________\n") cat("\n") if(axes>1){ cat("scatterplot of observations and correlations' circle? (y/n)") plt <- scan(quiet=T,"", character(), 1) if( (length(plt)!=0)&&((plt=="Y")|(plt=="y"))) { ah<-as.numeric(1) av<-as.numeric(2) par(mfrow=c(1,1),mar=c(5,5,4,2),pty="m") repeat { corYtah<-Dvar(cbind(resulpls$Yini,resulpls$compX[,ah]),cor=T,D=resulpls$D)$V[,pY+1] corYtav<-Dvar(cbind(resulpls$Yini,resulpls$compX[,av]),cor=T,D=resulpls$D)$V[,pY+1] if(!missing(Xtest)) { auxtest<-matrix(0,ncol(Bcodtest),length(resulpls$alph)) for(i in 1:length(resulpls$alph))auxtest[,i]<-resulpls$alph[[i]] compos<-rbind(compX,Bcodtest%*%auxtest) } else compos<-compX #browser() plot(compos[,ah],compos[,av],type="n",xlab=paste("t",format(ah),sep=""),ylab=paste("t",format(av),sep=""),cex=cexpar) abline(h=0) abline(v=0) if(length(dimnames(resulpls$Xini)[[1]])==0)text(compX[,ah],compX[,av],cex=cexpar,col=colorpar+1) else text(compX[,ah,drop=F],compX[,av,drop=F],cex=cexpar,dimnames(compX)[[1]],col=colorpar+1) if(!missing(Xtest)) text(compos[-(1:nrow(resulpls$Xini)),ah,drop=F],compos[-(1:nrow(resulpls$Xini)),av,drop=F],cex=cexpar,dimnames(Xtest)[[1]],col=colorpar+6) # cat("Map of the observations colored according to the levels of a categorical variable? (y/n)\n") # plto <- scan(quiet=T,"", character(), 1) # if( (length(plto)!=0)&&((plto=="Y")|(plto=="y"))) # { #repeat{ # cat("Enter the name of the vector or that of the matrix containing the column-variable\n") # nom <- scan(quiet=T,"", character(), 1) # matricenom<-as.matrix(get(nom,pos=1)) # if(nrow(matricenom)==nrow(Xini))break #else # cat("misfit on the number of rows\n") # } # if(is.vector(get(nom,pos=1))) # {dimnames(matricenom)<-list(dimnames(Xini)[[1]],c(nom)) # numer<-1 # variab<-get(nom,pos=1) # } # if((is.matrix(get(nom,pos=1)))|(is.data.frame(get(nom,pos=1)))) # { # cat(paste(dimnames(matricenom)[[2]],"(",1:ncol(matricenom),"),",sep=""),"\n") # cat("enter the column number of integer values\n") # numer <- scan(quiet=T,"",numeric(), 1) # variab<-matricenom[,numer] # cat("name of the variable : ",dimnames(matricenom)[[2]][numer],"\n") # } # cat("click to locate the top left corner of the legend\n") # for(i in min(variab):max(variab)) # text(compX[variab==i,c(ah,av)],dimnames(Xini)[[1]][variab==i],col=i+1,cex=cexpar) # legend(locator(1),paste(dimnames(matricenom)[[2]][numer],min(variab):max(variab),sep=""), # fill=(min(variab):max(variab))+1,cex=cexpar) # } if(sum(apply(resulpls$Yinitial,1,sum))==nrow(resulpls$Xini)) { cat("mark the badly affected training samples y/n") repo= scan(quiet=T,"", character(), 1) if( (length(repo)!=0)&&((repo=="Y")|(repo=="y"))) # browser() #points(resulpls$compX[affectresul$g!=affectresul$groupe,c(ah,av),drop=F],pch=pchpar,cex=cexpar+1.3) discri(compX,indicator,ti=ah,tj=av,cexpar=cexpar,graph=T,ResponseName=dimnames(resulpls$Yinitial)[[2]]) } else { if(!missing(qual)) { variab<-qual if(missing(names.qual)){ cat("\n") cat("Before clicking to locate the top left corner of the legend\n") repeat{ cat("Enter the",max(qual),"levels' names\n") names.qual<-scan(quiet=T,what=character()) if(length(names.qual)==(max(qual)-min(qual)+1))break else cat("not the right nb of levels' names,should be",max(qual)-min(qual)+1,"!\n") } } for(i in min(variab):max(variab)) text(resulpls$compX[variab==i,ah],resulpls$compX[variab==i,av],dimnames(resulpls$Xini)[[1]][variab==i],col=i+1,cex=cexpar) legend(locator(1),names.qual,x.intersp=cexpar,y.intersp=cexpar,bty="o", text.col=min(variab):max(variab)+1,cex=cexpar,fill=(min(qual):max(qual))+1) } } par(pty="s") cat("The circle of correlations (y/n)") repo= scan(quiet=T,"", character(), 1) if( (length(repo)!=0)&&((repo=="Y")|(repo=="y"))) { plot(cos(seq(0,2*pi,length=100)),sin(seq(0,2*pi,length=100)),type="l",xlab=paste("t",ah,sep=""),ylab=paste("t",av,sep=""),cex=cexpar) abline(h=0) abline(v=0) text(corYtah[1:pY],corYtav[1:pY],dimnames(resulpls$Yini)[[2]],cex=cexpar,col=colorpar+1) #arrows(rep(0,pY),rep(0,pY),corYtah[1:pY]*0.9,corYtav[1:pY]*0.9,col=colorpar+1,angle=15,length=0.15) } cat("\n") cat("Different components (y/n) ?") plt5 <- scan(quiet=T,"", character(), 1) if(length(plt5)==0)plt5<-"n" if(plt5!="y")break cat("first axis (horizontal) ?") ah <- scan(quiet=T,"", numeric(),1) cat("second axis (vertical) ?") av <- scan(quiet=T,"", numeric(),1) par(pty="m") } } cat("\n") cat("______________________________________________________________________\n") cat("\n") }#fin scatterplot cat("additive influence of the predictors on a response? (y/n)") plt <- scan(quiet=T,"", character(), 1) if( (length(plt)!=0)&&((plt=="y")|(plt=="Y"))) { if(pX==1) plotflag=T noeudint<-list(NULL) repeat {# if(pY==1)j<-1 else{ cat("The number of the reponse variable?,(<=",pY,")") j<- scan(quiet=T,"", numeric(), 1) } if(titlepar==F)par(mfrow=c(1,1),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(1,1),pty=ptypar) if(length(interactionpar)==0) { if(pX>1){ if(pX<31) { xxf<-resulpls$Transf[[j]][,1] catf<-rep(1,n) if(pX>1)for(i in 2:pX){xxf<-c(xxf,resulpls$Transf[[j]][,i]) catf<-c(catf,rep(i,n)) } tecno24<-split(xxf,catf) if(pX>30) boxplot(tecno24,sub="predictors",ylab="transf. data",cex=cexpar) else boxplot(tecno24,sub="",ylab="transf. data",cex=cexpar,names=dimnames(resulpls$Xini)[[2]]) if(titlepar==T)mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,dimnames(resulpls$Yini)[[2]][j]) } cat("\n") cat("Influence of the predictors on the response:",dimnames(resulpls$Yini)[[2]][j]," \n") class<-NULL class<-matrix(0,1,pX) class[1,]<-round(rev(sort(resulpls$ranget[[j]])),5) ordreinv<-rev(order(rank((resulpls$ranget[[j]])),1:pX)) dimnames(class)<-list("",paste(dimnames(resulpls$Xini)[[2]][ordreinv],"(",format(ordreinv),")",sep="")) cat("\n") cat("Range of the transformed predictors in descending order:\n") cat("\n") print(class) normedclass=class[1,]/class[1,1] if(any(normedclass<0.25)) { cat("------------\n") cat("Suggestion: remove the predictors (threshold 25%) :\n") cat(dimnames(class)[[2]][normedclass<0.25],"\n") cat("------------\n") } cat("A look at coordinate functions? (y/n)") plt1 <- scan(quiet=T,"", character(), 1) if( (length(plt1)!=0)&&((plt1=="y")|(plt1=="Y"))) plotflag<-T else plotflag<-F #cat("\n") if(pX>1){ if(pX<=30){ if(any(normedclass<0.25)) barplot(class[1,],xlab="range of the transf. predictors", names=dimnames(resulpls$Xini)[[2]][ordreinv],ylab="", density=20,space=1.4,horiz=T,col=c(rep("blue",ncol(class))[normedclass>=0.25],rep("red",ncol(class))[normedclass<0.25]),cex.names=cexpar,cex.axis=cexpar) else barplot(class[1,],xlab="range of the transf. predictors", names=dimnames(resulpls$Xini)[[2]][ordreinv],ylab="", density=20,space=1.4,horiz=T,col="blue",cex.names=cexpar,cex.axis=cexpar) } if((titlepar==T)&(plotflag)) mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,paste("influence of the predictors on",dimnames(resulpls$Yini)[[2]][j])) } if(plotflag){ cat("Ordered predictors? (y/n)") plt <- scan(quiet=T,"", character(), 1) if( (length(plt)!=0)&&((plt=="y")|(plt=="Y"))) ordrepredict<-ordreinv else ordrepredict<-1:pX cat("How many predictors?,(<=",pX,")") nb<- scan(quiet=T,"", numeric(), 1) cat("How many plots on a row ?,(<=",nb,")") pc<- scan(quiet=T,"", numeric(), 1) cat("How many row(s) ?") pl<-scan(quiet=T,"", numeric(), 1) }#endplotflag } else {pl<-1 pc<-1 nb<-1 ordrepredict<-1} if(plotflag){### if(titlepar==F)par(pty=ptypar) else par(oma=c(0,0,4,0),pty=ptypar) if(pX>1)par(mfrow=c(pl,pc)) cat("Do you need curves, data or both? (c,d,b)") repeat{ plt<-scan(quiet=T,"",character(),1) if(length(plt)==0){plt<-"d" break} if((plt=="c")|(plt=="d")|(plt=="b"))break } mintransf<-1e+18 maxtransf<-(-1e+18) for(i in ordrepredict[1:nb]) { if(!(plt=="d")){ A[[i]]<-resulpls$AA[[j]][[i]] yy[[i]]<-spl[[i]]%*%A[[i]] mintransf<-min(mintransf,min(yy[[i]])) maxtransf<-max(maxtransf,max(yy[[i]])) } else{mintransf<-min(mintransf,min(resulpls$Transf[[j]][,i])) maxtransf<-max(maxtransf,max(resulpls$Transf[[j]][,i])) } } for(i in ordrepredict[1:nb]) { if(pX==1){ plot(resulpls$Xini[,i],resulpls$Yini[,j],xlab=paste(dimnames(Xini)[[2]][i],sep=""),ylab="",type="n",cex=cexpar) text(resulpls$Xini[,i],resulpls$Yini[,j],dimnames(Xini)[[1]],cex=cexpar,col=colpar+2) } if(!(plt=="d")){ if(pX!=1) plot(xx[[i]],yy[[i]],ylim=c(mintransf,maxtransf),xlab=paste(dimnames(Xini)[[2]][i],sep=""),ylab="",type="n",cex=cexpar) points(xx[[i]],yy[[i]],type="l",col=colorpar+1) } else plot(Xini[,i],resulpls$Transf[[j]][,i],ylim=c(mintransf,maxtransf),xlab=dimnames(Xini)[[2]][i],ylab="",type="n") if(!(plt=="c")) { if(!typedata)points(Xini[, i],resulpls$Transf[[j]][,i],pch=pchpar,cex=cexpar,col=colorpar+3) else text(Xini[, i],resulpls$Transf[[j]][,i],dimnames(Xini)[[1]],cex=cexpar,col=colorpar+3) } if(resulpls$BsplineX$nbni[i]!=0){ noeudint[[i]]<-BsplineX$v[[i]][[2]][(BsplineX$v[[i]][[2]]!=min(BsplineX$v[[i]][[2]]))&(BsplineX$v[[i]][[2]]!=max(BsplineX$v[[i]][[2]]))] abline(v=noeudint[[i]],lty=2)} } if(titlepar==T)mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,paste("Predictors' influence on ",dimnames(Yini)[[2]][j]," (",axes," dim.)",sep="")) #dev.off() }### }#endofinternull else { cat("\n") cat("Influence of the predictors on the response:",dimnames(resulpls$Yini)[[2]][j]," \n") cat("Correlation coefficients between",dimnames(resulpls$Yini)[[2]][j]," and the transformed predictors : \n") auxmatrix<-corYtrans[j,][rev(order(rank(abs(corYtrans[j,])),1:(pX+rint*(rint-1)/2)))] names(auxmatrix)<-dimnames(corYtrans)[[2]][rev(order(rank(corYtrans[j,,drop=F]),1:(pX+rint*(rint-1)/2)))] print(auxmatrix) class<-NULL rangetint1<-NULL for(i in 1:(rint-1)) for(k in (i+1):rint) rangetint1<-c(rangetint1,rangetint[[j]][i,k]) names(rangetint1)<-namesint1 class<-matrix(0,1,sum(Xvariables)+rint*(rint-1)/2) class[1,]<-round(rev(sort(c(resulpls$ranget[[j]],rangetint1))),5) ordreinv<-rev(order(rank(c(resulpls$ranget[[j]],rangetint1)),1:(pX+rint*(rint-1)/2))) dimnames(class)<-list("",paste(c(dimnames(resulpls$Xini)[[2]],namesint1)[],"(",format(),")",sep="")) cat("\n") cat("Range of the transformed predictors in descending order:\n") cat("\n") print(class) normedclass=class[1,]/class[1,1] if(any(normedclass<0.25)) { cat("------------\n") cat("Suggestion: remove the predictors (threshold 25%) :\n") cat(dimnames(class)[[2]][normedclass<0.25],"\n") cat("------------\n") } cat("\n") if(pX>1){ if(pX+rint*(rint-1)/2<=30){ barplot(class[1,],xlab="range of the transf. predictors",names=dimnames(class)[[2]],ylab="", density=20,space=1.4,horiz=T,cex=cexpar,col=1:pX,cex.names=cexpar,cex.axis=cexpar) } if(titlepar==T)mtext(side=3,line=0,cex=cexpar+0.7,outer=TRUE,paste("influence of the predictors on",dimnames(resulpls$Yini)[[2]][j])) } }#endofinter if(ncol(Yini)>1){ cat("Another reponse ? (y/n)") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if(repo!="y")break } else break }#fin du repeat } #cat("\n") cat("__________________________________________________________________\n") cat("plot of Yhat against Y (y/n)?") plt3 <- scan(quiet=T,,what="", 1) if((length(plt3)!=0)&&(plt3=="y")){ par(mfrow=c(1,1),pty=ptypar) repeat{ cat("How many dimensions ?,(<=",axes,")") pa<- scan(quiet=T,"", numeric(), 1) YHAT<-matrix(0,n,pY) for(k in 1:pa){ YHAT<-YHAT+resulpls$YH[[k]] } if(pY>1){ cat("How many plots per row (<=",pY,")?\n") colplots<-scan(quiet=T,"",numeric(),1) cat("How many rows?\n") rowplots<-scan(quiet=T,"",numeric(),1) par(mfrow=c(rowplots,colplots)) } else par(mfrow=c(1,1)) for(i in 1:pY) { yy<-YHAT[,i]*sqrt(resulpls$varY[i])+resulpls$meanY[i] if(sum(resulpls$Yinitial[,i]==0)==0) { titrepl<-"nean relative err (%) = " relativeerreur<-Dvar(abs(resulpls$Yinitial[,i]-yy)/abs(resulpls$Yinitial[,i])*100,D=D)$mean } else { relativeerreur<-sqrt(Dvar(resulpls$Yinitial[,i]-yy,D=resulpls$D)$var) titrepl<-"stdv err = " } plot(resulpls$Yinitial[,i],yy,xlab=dimnames(Yini)[[2]][i], ylab=paste("modeled ",dimnames(Yini)[[2]][i],",",pa," Dim.",sep=""), type="n",xlim=range(c(resulpls$Yinitial[,i],yy)),ylim=range(c(resulpls$Yinitial[,i],yy)), main=paste(titrepl,round(relativeerreur,2))) if(!typedata)points(resulpls$Yinitial[,i],yy,pch=pchpar,cex=cexpar) else text(resulpls$Yinitial[,i],yy,dimnames(Yini)[[1]],col=colorpar+3) abline(a=0,b=1,col=colorpar+1) } cat("Another axis number? (y/n)") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if(repo!="y")break }#repeat } cat("\n") cat("__________________________________________________________________\n") cat("\n") cat("Plots of the reponse residuals according to the number of components? (y/n)") plt <- scan(quiet=T,"", character(), 1) if( (length(plt)!=0)&&((plt=="y")|(plt=="Y"))) { repeat{ cat("How many axes ?,(<=",axes,")") pa<- scan(quiet=T,"", numeric(), 1) if(pY>1) { if(titlepar==F)par(mfrow=c(pa,pY),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(pa,pY),pty=ptypar) } else par(mfrow=c(pY,pa),pty=ptypar) YHAT<-matrix(0,n,pY) for(k in 1:pa){ YHAT<-YHAT+resulpls$YH[[k]] for(i in 1:pY) { if(k==1) variat<-range(Yini-YHAT) variat<-range(variat,range(Yini-YHAT)) plot(YHAT[,i],Yini[,i]-YHAT[,i],ylim=variat,xlab=paste("est. ",dimnames(Yini)[[2]][i],", ",k," dim",sep="") ,ylab="",type="n",cex=cexpar) if(!typedata)points(YHAT[,i],Yini[,i]-YHAT[,i],pch=pchpar,cex=cexpar) else text(YHAT[,i],Yini[,i]-YHAT[,i],dimnames(Yini)[[1]],cex=cexpar,col=colorpar+3) } } if(titlepar==T)mtext(side=3,line=-1,cex=cexpar+0.7,outer=TRUE,"Residuals according to PLSS dimensions") cat("Another axis number? (y/n)") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if(repo!="y")break } } cat("\n") cat("__________________________________________________________________\n") if(pY==2){ cat(paste("Plot of the reconstructed responses (", dimnames(Yini)[[2]][1],",",dimnames(Yini)[[2]][2],") (y/n)?",sep="")) plt3 <- scan(quiet=T,,what="", 1) if((length(plt3)!=0)&&(plt3=="y")){ if(titlepar==F)par(mfrow=c(1,2),pty=ptypar) else par(oma=c(0,0,4,0),mfrow=c(1,2),pty=ptypar) repeat{ cat("How many components ?,(<=",axes,")") pa<- scan(quiet=T,"", numeric(), 1) YYY<-matrix(0,n,2) for(i in 1:pa)YYY<-YYY+resulpls$YH[[i]] range1<-range(c(YYY[, 1],Yini[, 1])) range2<-range(c(YYY[, 2],Yini[, 2])) plot(YYY[, 1],YYY[,2],xlim=range1,ylim=range2,xlab=paste("fitted",dimnames(Yini)[[2]][1]),ylab=paste("fitted",dimnames(Yini)[[2]][2]),type="n",pch=pchpar,main="Reconstituted shape") if(!typedata)points(YYY[, 1],YYY[, 2],pch=pchpar,cex=cexpar) else text(YYY[, 1],YYY[, 2],dimnames(Yini)[[1]],col=colorpar+3,cex=cexpar) plot(Yini[, 1],Yini[, 2],xlim=range1,ylim=range2,xlab=dimnames(Yini)[[2]][1],ylab=dimnames(Yini)[[2]][2],type="n",pch=pchpar,main="Original shape") if(!typedata)points(Yini[, 1],Yini[, 2],pch=pchpar,cex=cexpar) else text(Yini[, 1],Yini[, 2],dimnames(Yini)[[1]],col=colorpar+3) if(titlepar==T)mtext(side=3,line=0,cex=2,outer=TRUE,paste("PLSS (",pa," dim.)",sep="")) cat("More components? (y/n)") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if(repo!="y")break } } } } ################################################ plsscv<-function(X,Y,standX=T,standY=T,interaction=NULL,D=1,A=2, degree=1,knots=0,equiknots=F,listknots,prop=0.1,GCV=2,impres=T) { # version 9.9.9 programmee par JF Durand # Regression PLS sur codage spline des predicteurs # order<-degree+1 nomfichX<-deparse(substitute(X)) nomfichY<-deparse(substitute(Y)) Xinitial<-as.matrix(X) Yinitial<-as.matrix(Y) centrageX <- Dvar(Xinitial, D = D, cor = F) meanX <- centrageX$mean varX <- centrageX$var centrageY <- Dvar(Yinitial, D = D, cor = F) meanY <- centrageY$mean varY <- centrageY$var n<-nrow(Xinitial) p<-ncol(Xinitial) q<-ncol(Yinitial) if(!missing(listknots)) { if(length(listknots)!=p){cat("incorrect number of knot vectors\n") return()} knots<-vector("numeric",p) for(i in 1:p)knots[i]<-length(listknots[[i]]) } if(is.null(dimnames(Xinitial)))dimnames(Xinitial)<-list(format(1:n),paste("X",1:p,sep="")) if(is.null(dimnames(Yinitial)))dimnames(Yinitial)<-list(format(1:n),paste("Y",1:q,sep="")) if(length(dimnames(Xinitial)[[1]])==0) dimnames(Xinitial)[[1]]<- format(1:n) if(length(dimnames(Yinitial)[[1]])==0) dimnames(Yinitial)[[1]]<- format(1:n) if(length(dimnames(Xinitial)[[2]])==0) dimnames(Xinitial)[[2]]<- paste("X",1:p,sep="") if(length(dimnames(Yinitial)[[2]])==0) {cat("Enter the name of the response(s)\n") prov<-as.vector(format(1:q)) for(i in 1:q) {cat(paste("response ",format(i),"\n")) prov[i]<-scan(quiet=T,,what="", 1) } dimnames(Yinitial)[[2]]<-prov } # calcul de la metrique si D=1 if(length(D)==1) D<-rep(1/n,n) else D<-as.vector(D) # centrage et reduction centX<-Dcentred(Xinitial,D=D) centY<-Dcentred(Yinitial,D=D) if(standX) Xini<-as.matrix(centX$Xcr) else Xini<-as.matrix(centX$Xc) if(standY) Yini<-as.matrix(centY$Xcr) else Yini<-as.matrix(centY$Xc) dimnames(Xini)<-dimnames(Xinitial) dimnames(Yini)<-dimnames(Yinitial) listknotsn<-list(NULL)#list of centered (possibly scaled) interior knots if(missing(listknots)){ listknotsn<-list(NULL) if(p>1) for(i in 1:(p-1))listknotsn<-c(listknotsn,list(NULL)) listknots<-listknotsn } else {if(p>1) for(i in 1:(p-1))listknotsn<-c(listknotsn,list(NULL)) for(i in 1:p){ if(!is.null(listknots[[i]])){ listknotsn[[i]]<-listknots[[i]] #if(centerX)listknotsn[[i]]<-listknots[[i]]-meanX[i] if(standX)listknotsn[[i]]<-(listknotsn[[i]]-meanX[i])/sqrt(varX[i]) else listknotsn[[i]]<-listknots[[i]]-meanX[i] } } } BsplineX <- Bsplinen(Xini,ordre=order,nbni=knots,noeudequi=equiknots, center=T,D=D,tt=listknotsn) itX <- BsplineX$v Bcod<-NULL for(i in 1:p)Bcod<-cbind(Bcod,itX[[i]][[1]]) if(impres)cat("\n") Bcodintcum<-NULL if(length(interaction)!=0) { BsplineXint <- Bsplinen(Xini,ordre=order,nbni=knots,noeudequi=equiknots, center=F,D=D,tt=listknotsn) rint<-length(interaction) Bcodint<-list(NULL) for(i in 1:(rint-1)) { Bcodint[[i]]<-list(NULL) for(j in (i+1):rint) { Bcodint[[i]][[j]]<-Dcenter(interactionij(BsplineXint,interaction[i],interaction[j]),D=D)$X Bcodintcum<-cbind(Bcodintcum,Bcodint[[i]][[j]]) } }} plscvresul<-plscv(cbind(Bcod,Bcodintcum),Yinitial,standX=F,standY=standY,A=A,D=D,prop=prop,GCV=GCV,impres=impres) if(GCV==0) { PRESS<-plscvresul$PRESS PRESStot<-plscvresul$PRESStot PRESSpar<-plscvresul$PRESSpar predict<-plscvresul$predict Yini<-plscvresul$Yini invisible(return(list(Xinitial=Xinitial,Yinitial=Yinitial,Yini=Yini,A=A,PRESS=PRESS,PRESStot=PRESStot,PRESSpar=PRESSpar,predict=predict,prop=prop,GCV=GCV))) } else { GCrit<-plscvresul$GCrit GCritot<-plscvresul$GCritot invisible(return(list(Xinitial=Xinitial,Yinitial=Yinitial,Yini=Yini,A=A,GCrit=GCrit,GCritot=GCritot,GCV=GCV))) } } ################################################# Gnorm<-function(G,X) # Generalized norm = sqrt(trace(X'GX)) # Sortie: U=GX et norm la norme de X au sens de G { X<-as.matrix(X) U<-matrix(0,nrow(X),ncol(X)) if(length(G)==1) U<-X/nrow(X) if(length(G)==nrow(X)) for(i in 1:ncol(X)) U[,i]<-X[,i]*G if(length(G)==nrow(X)*nrow(X)) U<-G%*%X n<-traceAtB(U,X) norm<-as.numeric(0) norm<-sqrt(n) return(list(norm=norm,U=U)) } ################################################## traceAtB<-function(A,B=A) { # A,B matrices, par defaut B=A; # sortie : tt = trace(A %*% t(B)) # A<-as.matrix(A) # B<-as.matrix(B) # u<-as.numeric(0) # for(i in 1:nrow(A)) # for(j in 1:ncol(A)) # u<-u+A[i,j]*B[i,j] # t<-as.numeric(u) tt<-sum(A*B) return(tt) } ################################################## tucker<-function(X,Y,D=1,QX=1,QY=1) # Analyse de Tucker inter-battery # Fournit les combinaisons lineaires X QX x # et Y QY y maximisant la covariance # X et Y matrices ayant le meme nb de lignes # si QX=1 QX est l'identite # si QY=1 QY est l'identite # si D=1 metrique des poids equi-repartis 1/nrow(X) # sorties: # x vecteur des combinaisons de X # y vecteur des combinaisons de Y # mu2 val propre la + grande # Y'DX { X<-as.matrix(X) Y<-as.matrix(Y) if((! is.matrix(QX))&(length(QX)!=1)) QX<-diag(QX) if((! is.matrix(QY))&(length(QY)!=1)) QY<-diag(QY) if(ncol(X)<=ncol(Y)){ bx<-TRUE A<-X B<-Y QA<-QX QB<-QY } else { bx<-FALSE A<-Y B<-X QA<-QY QB<-QX } U<-Dcp(B,A,D=D) # U=B'DA if(!is.matrix(QB)) W<-t(U)%*%U else W<-t(U)%*%QB%*%U if(is.matrix(QA)){L<-chol(QA) W<-t(L)%*%W%*%L } #browser() propre<-eigen(W) mu2<-propre$values[1] if(!is.matrix(propre$vectors))propre$vectors<-as.matrix(propre$vectors) a<-propre$vectors[,1] if(is.matrix(QA)) a<- solve(t(L))%*%a if(!is.matrix(QA)) b<-U%*%a else b<-U%*%QA%*%a mu2<-sqrt(mu2) b<-b/mu2 if(bx) { if(length(QA)==1)compX<-A%*%a else compX<-A%*%QA%*%a if(length(QB)==1)compY<-B%*%b else compY<-B%*%QB%*%b } else { if(length(QA)==1)compY<-A%*%a else compY<-A%*%QA%*%a if(length(QB)==1)compX<-B%*%b else compX<-B%*%QB%*%b } if(bx)sortie<-list(a,b,mu2,compX,compY) else sortie<-list(b,a,mu2,compX,compY) names(sortie[1])<-"x" names(sortie[2])<-"y" names(sortie[3])<-"cov(XY)" names(sortie[4])<-"compX" names(sortie[5])<-"compY" return(sortie) } ############################################### grad<-function(x,y,Y,itX,D=1,QX=1,QY=1) # gradient par rapport a PARX { x<-as.matrix(x) y<-as.matrix(y) z<-list(NULL) for(i in 1:length(x)) z[[i]]<-VQop(itX[[i]][[1]],Y,Q=QY,D=D) #w<-VQop(t(y),t(x),Q=QX,D=rep(1,length(x))) w<-y%*%t(x) if(is.matrix(QX))w<-w%*%QX g<-list(NULL) for(i in 1:length(x)) g[[i]]<-z[[i]]%*%w[,i] #browser() return(g) } ############################################### Projtuc<-function(c,Y,D=1) # D-proj de Y sur c # sortie du residu Y-PcY et de PcY { c<-as.matrix(c) R<-as.numeric(Dcp(c,D=D)) TT<-c%*%Dcp(c,Y,D=D)/R Z<-Y-TT return(list(Z=Z,TT=TT)) } ############################################### Gscal<-function(G,X,Y=X) # Generalized scal = trace(X'GY) # Sortie: U=GY et le prod scal de X et Y au sens de G { X<-as.matrix(X) Y<-as.matrix(Y) U<-matrix(0,nrow(X),ncol(Y)) if(length(G)==1) U<-Y/nrow(Y) if(length(G)==nrow(Y)) for(i in 1:ncol(Y)) U[,i]<-Y[,i]*G if(length(G)==nrow(X)*nrow(Y)) U<-G%*%Y scal<-traceAB(t(X),U) return(list(scal=scal,U=U)) } ############################################## traceAB<-function(A,B=A) { # A,B matrices, par defaut B=A; # sortie : u = trace(A %*% B) A<-as.matrix(A) B<-as.matrix(B) tt<-as.numeric(0) for(i in 1:nrow(A)) for(j in 1:ncol(A)) tt<-tt+A[i,j]*B[j,i] u<-as.numeric(tt) return(u) } ################################################ Projn<-function(c,Y) # D-proj de Y sur c # sortie du residu Y-PcY et de PcY { R<-invgene(crossprod(c)) Yhat<-c%*%R%*%t(c)%*%Y Yres<-Y-Yhat return(list(Yres=Yres,Yhat=Yhat)) } ################################################ Codisjc<-function(X) { # codage disjonctif complet de la matrice X # # Entree # X matrice (numerique) des modalites des variables qualitatives # Sorties # nbmodal vecteur des nombres de modalites de chaque variable # nbmodaltot nombre total de modalites # U matrice du codage # B matrice de Burt associee a U # X<-as.matrix(X) n<-nrow(X) p<-ncol(X) U<-NULL nbmodal<-NULL nommod<-NULL ylab<-NULL nbmodaltot<-as.numeric(0) for(j in 1:p) { nbmodal[j]<-max(X[,j]) nommod<-paste(dimnames(X)[[2]][j],(1:nbmodal[j]),sep="") nbmodaltot<-nbmodaltot+nbmodal[j] V<-matrix(0,n,nbmodal[j]) for(i in 1:n) V[i,X[i,j]]<-1 U<-cbind(U,V) ylab<-cbind(ylab,nommod) } B<-t(U)%*%U dimnames(U)<-list(dimnames(X)[[1]],ylab) dimnames(B)<-list(ylab,ylab) return(list(nbmodal=nbmodal,nbmodaltot=nbmodaltot,U=U,B=B)) } #################################################### codisj<-function(x) { x<-as.vector(x) nbmodal<-max(x) X<-matrix(0,length(x),nbmodal) for(i in 1:length(x)) X[i,x[i]]<-1 return(X) } ####################################################### surfaceinter<-function(Bspline1,Bspline2,moyenne,beta) { beta<-as.matrix(beta) Z<-matrix(0,nrow(Bspline1),nrow(Bspline2)) B1B2kro<-kronecker(Bspline1,Bspline2) compteur<-0 for(k in 1:nrow(Bspline1)) { Z[k,]<-sweep(B1B2kro[(compteur+1):(compteur+nrow(Bspline2)),],2,moyenne)%*%beta compteur<-compteur+nrow(Bspline2) } return(Z) } ####################################################### MAPLSS<-function(X,Y,Xtest,Ytest,standX=T,standY=T,D=1,A=2,degree=1,knots=0,equiknots=F,eps=1e-8,listknots, interaction=1:ncol(X),GCV=1,prop=0.1,colpar=1,typedata=T,titlepar=T,pchpar=1,cexpar=0.7, nbpoints=50,ptypar="s",askpar=T,PRESSprop=0.2,thetapar=-60,phipar=30,rpar=10,impres=F,bgpar="lightblue") { # Bibliography : # # J. F. Durand. "Local Polynomial Additive Regression through PLS and Splines: PLSS", # Chemometrics and Intelligent Laboratory Systems 58, 235-246, 2001. # # J. F. Durand and R. Lombardo. "Interactions terms in nonlinear PLS via additive spline # transformations". «Between Data Science and Applied Data Analysis», Studies in Classification, # Data Analysis, and Knowledge Organization. Eds M.Schader, W. Gaul and M. Vichi, Springer, 22-29, 2003 # library(splines) oldpar<-par(no.readonly = TRUE) par(ask=F,bg=bgpar,mfrow=c(1,1),bty="n",xaxt="n",yaxt="n") plot(0,0,type="n",bg=par("bg"),xlab="",ylab="",xlim=c(-4,4),ylim=c(-4,4)) text(0,1.5,"Partial Least-Squares",cex=2,col="red") text(0,3.5,"Multivariate Additive",cex=2,col="red") text(0,-0.5,"Splines : MAPLSS",cex=2,col="red") text(0,-2,"J.F. Durand, Montpellier 2 University",cex=1,col="blue") text(0,-3,"www.jf-durand-pls.com",cex=0.9,col="blue") par(oldpar) par(ask=askpar,bg=bgpar,xaxt="s",yaxt="s",mai=rep(0.75,4)) par(bg=bgpar) GCVnew=-1 interflag<-F p<-ncol(X) q<-ncol(Y) n<-nrow(X) resulini=NULL ordre<-degree +1 if(length(equiknots)==1)equiknots=rep(equiknots,p) if(length(knots)==1)knots=rep(knots,p) resulini<-NULL if(GCV==0) texto<-paste("PRESS(",prop,", . )",sep="") else texto<-paste("GCV(",GCV,", . )",sep="") X0<-as.matrix(X) Y<-as.matrix(Y) Xinitial=X0 Yinitial=Y centrageX <- Dvar(Xinitial, D = D, cor = F) meanX <- centrageX$mean varX <- centrageX$var centrageY <- Dvar(Yinitial, D = D, cor = F) meanY <- centrageY$mean varY <- centrageY$var # centrage et reduction des matrices tests par rapport aux données d'aprentissage centX<-Dcentred(Xinitial,D=D) centY<-Dcentred(Yinitial,D=D) if(!missing(Ytest)) { Yinitest<-sweep(Ytest, 2,centY$moy) if(standY) Yinitest<-sweep(Yinitest,2,sqrt(centY$var),FUN="/") dimnames(Yinitest)<-dimnames(Ytest) } if(standX) Xini<-as.matrix(centX$Xcr) else Xini<-as.matrix(centX$Xc) if(standY) Yini<-as.matrix(centY$Xcr) else Yini<-as.matrix(centY$Xc) dimnames(Xini)<-dimnames(Xinitial) dimnames(Yini)<-dimnames(Yinitial) Xvariables<-rep(T,ncol(X)) interaction01<-rep(F,p) interaction01[interaction]<-T if(!missing(listknots)) { if(length(listknots)!=p){cat("incorrect number of knot vectors\n") return()} knots<-vector("numeric",p) for(i in 1:p)knots[i]<-length(listknots[[i]]) } listknotsn<-list(NULL)#list of centered (possibly scaled) interior knots # if(missing(listknots)){ listknotsn<-list(NULL) # if(p>1) for(i in 1:(p-1))listknotsn<-c(listknotsn,list(NULL)) # listknots<-listknotsn # } if(missing(listknots)) { listknotsn<-list(NULL) if(p>1) for(i in 1:(p-1))listknotsn<-c(listknotsn,list(NULL)) listknots<-listknotsn for(i in 1:p) { if(knots[i]!=0) { mi=min(Xinitial[,i]) ma=max(Xinitial[,i]) if(equiknots[i]){h<-(ma-mi)/(knots[i]+1) for(j in 1:knots[i]) listknots[[i]]<-c(listknots[[i]],mi+j*h) } else listknots[[i]]<- quantile(Xinitial[,i],seq(0,1,1/(knots[i]+1)))[2:(knots[i]+1)] } } }#fin missinglistknots if(p>1) for(i in 1:(p-1))listknotsn<-c(listknotsn,list(NULL)) for(i in 1:p){ if(!is.null(listknots[[i]])){ #if(centerX)listknotsn[[i]]<-listknots[[i]]-meanX[i] if(standX)listknotsn[[i]]<-(listknots[[i]]-meanX[i])/sqrt(varX[i]) else listknotsn[[i]]<-listknots[[i]]-meanX[i] } } order=degree+1 BsplineXini <- Bsplinen(Xini,ordre=order,nbni=knots,noeudequi=equiknots, center=T,D=D,tt=listknotsn) BsplineXintini <- Bsplinen(Xini,ordre=order,nbni=knots,noeudequi=equiknots, center=F,D=D,tt=listknotsn) itXini <- BsplineXini$v dimensionini<-BsplineXini$ordre+BsplineXini$nbni if(!missing(Xtest)) { predictexto=", External prediction" if(is.null(dimnames(Xtest))){ if(length(Xtest)==dim(X)[2])Xtest0<-matrix(Xtest,1,length(Xtest)) dimnames(Xtest0)<-list(paste("x",1:(length(Xtest)/length(X)),sep=""),dimnames(X)[[2]]) } else Xtest0<-as.matrix(Xtest) } else predictexto="" if(!missing(Xtest)) { Xinitest<-sweep(Xtest0, 2,centX$moy) if(standX) Xinitest<- sweep(Xinitest,2,sqrt(centX$var),FUN="/") dimnames(Xinitest)<-dimnames(Xtest0) BsplineXtest <- Bsplinen(rbind(Xinitest,apply(Xini,2,min),apply(Xini,2,max)),ordre=order, nbni=knots,center=F,D=D,tt=listknotsn) itXtest <- BsplineXtest$v } repeat { cat("==========================================================\n") reponse<-menu(c(paste(texto,"for pure main effects models (mandatory)"), "Automatic selection of interactions", paste("Validation, ANOVA plots",predictexto,sep="")), title="Multivariate Additive Partial Least-Squares Splines (0 to exit)") cat("==========================================================\n") if(reponse==0)break if((reponse==1)&(!interflag)) { cat("*************************************************\n") cat("1:",texto," for main effects models (mandatory)\n") cat("*************************************************\n") if(length(degree)>1)degree0<-degree[Xvariables] else degree0<-degree if(length(knots)>1)knots0<-knots[Xvariables] else knots0<-knots if(length(equiknots)>1)equiknots0<-equiknots[Xvariables] else equiknots0<-equiknots if(!missing(listknots)) listknots0<-listknots[Xvariables] ordre0<-degree0+1 cat("Number of dimensions to explore = ") Aexplore<-scan(quiet=T,"",numeric(),1) i<-1 exp0=1 crossvalmem=NULL if(GCV==0)Count=prop else Count=GCV repeat { if(missing(listknots)) { resul<-plsscv(X[,Xvariables],Y,standX=standX,standY=standY,D=D,A=Aexplore,degree=degree0,knots=knots0 ,equiknots=equiknots0,interaction=NULL,GCV=GCV,prop=prop) } else { resul<-plsscv(X[,Xvariables],Y,standX=standX,standY=standY,D=D,A=Aexplore,degree=degree0,knots=knots0 ,equiknots=equiknots0,listknots=listknots0,interaction=NULL,GCV=GCV,prop=prop) } crossval<-plscv.plot(resul,askpar=askpar,bgpar=bgpar) crossvalmem[[i]]=crossval if(GCV!=0) cat("Another tuning-parameter alpha,(y/n)") else cat("Another proportion of left-predicted observations,(y/n)") repo<-scan(quiet=T,"",character(),1) if(length(repo)==0)repo<-"n" if(repo!="y")break if(GCV!=0)cat("Enter alpha :") else cat("Enter prop (< 0.5):") titi<-scan(quiet=T,"",numeric(),1) if(GCV!=0) GCV<-titi else prop<-titi i<-i+1 Count=c(Count,titi) }#endrepeat campaign<-matrix(0,i,Aexplore+2) if(GCV==0) { dimnames(campaign)<-list(format(1:i),c("prop","Dim",format(1:Aexplore))) ylabel="PRESS" } else { dimnames(campaign)<-list(format(1:i),c("alpha","Dim",format(1:Aexplore))) ylabel="GCV" } for(j in 1:i) { campaign[j,1]=Count[j] campaign[j,2]=crossvalmem[[j]]$A campaign[j,3:(Aexplore+2)]=crossvalmem[[j]][[1]] } matrice<-campaign[,3:(Aexplore + 2),drop=F] campaignmean<-apply(matrice,2,"mean") campaignstdv<-sqrt(apply(matrice,2,"var")) campaign<-rbind(campaign,c(NA,NA,campaignmean)) #campaign[nrow(campaign),2]<-round(campaign[nrow(campaign),2]) dimnames(campaign)[[1]][nrow(campaign)]<-"mean" campaign<-rbind(campaign,c(NA,NA,campaignmean + 2*campaignstdv)) dimnames(campaign)[[1]][nrow(campaign)]<-"mean+2sdv" campaign<-rbind(campaign,c(NA,NA,campaignmean - 2*campaignstdv)) dimnames(campaign)[[1]][nrow(campaign)]<-"mean-2sdv" if(nrow(campaign)>4){ par(mfrow=c(1,1)) ts.plot(ts(t(campaign[,3:(Aexplore+2)])),gpars=list(xlab="Model Dim.",ylab=ylabel,type="n", main=paste(ylabel," experiments' summary",sep=""),cex=cexpar)) for(i in 1:(nrow(campaign)-2)){ points(1:Aexplore,campaign[i,3:(Aexplore+2)],pch=i,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[i,3:(Aexplore+2)],type="l",lty=i,col=colpar+i) } points(1:Aexplore,campaign[nrow(campaign)-1,3:(Aexplore+2)],pch=i+1,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign)-1,3:(Aexplore+2)],type="l",lty=i,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign),3:(Aexplore+2)],pch=i+2,cex=cexpar+0.7,col=colpar+i) points(1:Aexplore,campaign[nrow(campaign),3:(Aexplore+2)],type="l",lty=i,col=colpar+i) cat("Click to locate the top left corner of the legend\n") legend(locator(1),c(campaign[1:(nrow(campaign)-3),1],"mean","mean+2sdv","mean-2sdv"),pch=1:nrow(campaign),bty="o",col=c(colpar+(1:(nrow(campaign)-2)), colpar+rep((nrow(campaign)-2),2)),cex=cexpar,ncol=max(nrow(campaign)%/%5,1), text.col=c(colpar+(1:(nrow(campaign)-2)),colpar+rep((nrow(campaign)-2),2))) print(campaign[1:(nrow(campaign)-2),]) cat("Choose the best experiment row number ( 1<= # <=",nrow(campaign)-3,")") exp0<-scan(quiet=T,"",numeric(),1) crossval<-crossvalmem[[exp0]] #prop<-eval(parse("",text=dimnames(campaign)[[1]][exp0])) if(GCV==0)prop<-campaign[exp0,1] else GCV=campaign[exp0,1] cat("Retained experiment values\n") cat(ylabel,"(",campaign[exp0,1],",",crossval$A,")=",crossval[[1]][crossval$A],"\n",sep="") } if(GCV==0) texto<-paste("PRESS(",campaign[exp0,1],", . )",sep="") else texto<-paste("GCV(",campaign[exp0,1],", . )",sep="") cat(texto," values according to Dimensions\n") PRESSmat<-rbind(crossval[[2]],crossval[[1]]) if(GCV==0)dimnames(PRESSmat)[[1]][q+1]<-"TOTAL PRESS" else { dimnames(PRESSmat)[[1]][q+1]<-"TOTAL GCV" } print(round(PRESSmat,4)) A<-crossval$A PRESScounter<-crossval[[1]][A] if(missing(listknots)) { resulini<-plss(X[,Xvariables],Y,standX=standX,standY=standY,D=D,A=A,degree=degree0,knots=knots0, equiknots=equiknots0,eps=eps,interaction=NULL,impres=F) } else { resulini<-plss(X[,Xvariables],Y,standX=standX,standY=standY,D=D,A=A,degree=degree0,knots=knots0, equiknots=equiknots0,eps=eps,listknots=listknots0,interaction=NULL,impres=F) } R2mat<-resulini$R2 R2mat<-rbind(R2mat,cumsum(resulini$VY/sum(Dvar(resulini$Yini)$var)*100)) dimnames(R2mat)<-list(c(dimnames(Y)[[2]],"% TOTAL Y-VAR"),paste("Dim",1:resulini$axes,sep="")) cat("R2 coefficients according to Dimensions\n") print(round(R2mat,4)) Bcod<-resulini$Bcod BsplineXint <- Bsplinen(resulini$Xini,ordre=ordre0,nbni=knots0,noeudequi=equiknots0, center=F,D=D,tt=resulini$listknotsn) }#endreponse=1 if((reponse==2)&(!interflag)) { rint<-sum(interaction01) if(GCV>0) How="GCV" else How="CV, be patient..." cat("*************************************************\n") cat(paste("2.1: Evaluating Separately the",rint*(rint-1)/2,"Possible Interactions by",How,"\n")) cat("*************************************************\n") cat("Number of Dimensions to explore = ") Aexplore<-scan(quiet=T,"",numeric(),1) Bcodintcum<-NULL crittot<-0 namescrittot<-NULL PRESScritcum<-NULL if(rint==1) {cat("No possible interactions, ncol(X)=1 !!") break } intermatrix<-matrix(0,rint*(rint-1)/2,8) cat("All", rint*(rint-1)/2, "possible bivariate interactions are tested. Please WAIT....\n") count<-1 for(i in 1:(rint-1)) { for(j in (i+1):rint) { cat(count,",") Bcodint<-Dcenter(interactionij(BsplineXint,interaction[i],interaction[j]),D=D)$X Bcodintcum<-cbind(Bcod,Bcodint) resulpls<-pls(X=cbind(Bcod,Bcodintcum),Y=resulini$Yini,standX=F,standY=F,A=Aexplore,D=D,splflag=T,impres=F,graph=F,eps=eps) #crit<-round((resulpls$R2c[A]-resulini$R2[A])/resulini$R2[A],7) #intermatrix[count,1:3]<-c(interaction[i],interaction[j],crit) crossval<-plscv(X=cbind(Bcod,Bcodintcum),Y,standX=F,standY=standY,prop=prop,A=Aexplore,GCV=GCV,impres=F) if(GCV==0){ PRESSmin<-min(crossval$PRESStot) Ami<-order(crossval$PRESStot)[1] } else { PRESSmin<-min(crossval$GCritot) Ami<-order(crossval$GCritot)[1] } PRESScrit<-(PRESScounter-PRESSmin)/PRESScounter PRESScritcum<-c(PRESScritcum,PRESScrit) crit<-round((resulpls$R2c[Ami]-resulini$R2[A])/resulini$R2[A],7) intermatrix[count,1:3]<-c(interaction[i],interaction[j],crit) crittot<-c(crittot,crit+PRESScrit) namescrittot<-c(namescrittot,paste(dimnames(resulini$Xini)[[2]][interaction[i]],"*",dimnames(resulini$Xini)[[2]] [interaction[j]],sep="")) intermatrix[count,4:6]<-c(PRESScrit,crit+PRESScrit,Ami) count<-count+1 #browser() } } cat("\n") crittot<-crittot[-1] names(crittot)<-namescrittot if(GCV==0) dimnames(intermatrix)<-list(namescrittot,c("i","j",paste("R2CRIT(",format(A),")",sep=""), "PRESSCRIT","TOTCRIT","A","PRESS","%rel.PRESSgain")) else dimnames(intermatrix)<-list(namescrittot,c("i","j",paste("R2CRIT(",format(A),")",sep=""), "PRESSCRIT","TOTCRIT","A","GCV","%rel.GCVgain")) revorder<-rev(order(crittot)) orderinteract<-rev(sort(crittot)) selectintermatrix<-intermatrix[revorder[1:nrow(intermatrix)],,drop=F] if(impres)print(selectintermatrix[,1:6]) par(mfrow=c(1,1)) barplot(orderinteract,ylab=paste("TOTCRIT = R2CRIT +",texto,"CRIT"), xlab="candidate interactions",col=2:nrow(intermatrix),cex.names=cexpar,cex.axis=cexpar) abline(v=sum(selectintermatrix[,5]>0)*1.225,col=colpar+1) i<-1 listinteraction<-list(NULL) Bcodintcum<-Bcod currentintermatrix<-matrix(0,nrow=1,ncol=8) CVseq<-NULL cat("*************************************************\n") cat("2.2: Incorporating interactions step by step : \n") cat("*************************************************\n") if(GCV==0) cat(paste("Reference : Main effects PRESS(",prop,",",A,") = ",round(PRESScounter,7),sep=""),"\n") else cat(paste("Reference : Main effects GCV(",GCV,",",A,") = ",round(PRESScounter,7),sep=""),"\n") cat("-------------------------------------------------\n") PRESScounter0<-PRESScounter repeat { cat("candidate ",i," : ",dimnames(selectintermatrix)[[1]][i]) if(selectintermatrix[i,5]<0){ i<-i-1 cat(" REFUSED.\n") break} listinteraction[[i]]<-selectintermatrix[i,1:2] Bcodintcum<-cbind(Bcodintcum, Dcenter(interactionij(BsplineXint,selectintermatrix[i,1],selectintermatrix[i,2]),D=D)$X) crossval<-plscv(X=Bcodintcum,Y,standX=F,standY=standY,GCV=GCV,prop=prop,A=Aexplore,impres=F) if(GCV==0)PRESSmin<-min(crossval$PRESStot) else PRESSmin<-min(crossval$GCritot) gain<-(PRESScounter-PRESSmin)/PRESScounter if(gain=0)) GCV=GCVnew break } #**************************************** if(reponse==2) { if(length(degree0)==1)degree0=rep(degree0,ncol(X)) if(length(knots0)==1)knots0=rep(knots0,ncol(X)) if(!interflag){ cat("no interaction!\n") if(missing(listknots)) resulini<-plss(X[,Xvariables],Y,standX=standX,standY=standY,D=D,A=A,degree=degree0[Xvariables],knots=knots0[Xvariables],equiknots=equiknots0[Xvariables],impres=F) else resulini<-plss(X[,Xvariables],Y,standX=standX,standY=standY,D=D,A=A,degree=degree0[Xvariables],listknots=listknots0[Xvariables],impres=F) } else { #cat("la 1, MAPLSS\n") #browser() numberinteract<-c(length(listinteraction)) if(missing(listknots)) resulini<-plssinter(X,Y,standX=standX,standY=standY,D=D,A=A,degree=degree0,knots=knots0 ,equiknots=equiknots0,listinteraction=listinteraction,impres=F,Xvariables=Xvariables) else resulini<-plssinter(X,Y,standX=standX,standY=standY,D=D,A=A,degree=degree0,listknots=listknots0, listinteraction=listinteraction,impres=F,Xvariables=Xvariables) cat("Remaining main effects + interactions\n") cat(dimnames(X)[[2]][Xvariables],dimnames(selectintermatrix)[[1]][1:length(listinteraction)] [intervariables],"\n") } #cat("la 2, MAPLSS\n") #browser() if(interflag) { #browser() plss.plotinter(resulini,colpar=colpar,typedata=typedata,titlepar=titlepar,pchpar=pchpar, cexpar=cexpar,nbpoints=nbpoints,ptypar=ptypar,askpar=askpar,thetapar=thetapar,phipar=phipar,rpar=rpar) } else plss.plot(resulini,colpar=colpar,typedata=typedata,titlepar=titlepar,pchpar=pchpar, cexpar=cexpar,nbpoints=nbpoints,ptypar=ptypar,askpar=askpar) } #****************************************finreponse1 if(reponse==1) { repeat{ cat("Choose Cross-Validation (1) or Generalized Cross-Validation (2)? (1/2)") choix<-scan(quiet=T,what=character(),n=1) if(length(choix)==0)choix="4" if((choix=="1")|(choix=="2"))break } if(choix=="1"){ GCVnew=0 cat(paste("Different proportion than",prop,"of left-predicted observations? (y/n)")) choose=scan(quiet=T,what=character(),n=1) if(length(choose)==0)choose="n" if((choose=="y")|(choose=="Y")) {cat("prop=") prop<-scan(quiet=T,what=numeric(),n=1) } texto<-paste("PRESS(",prop,", . )",sep="") cat("Cross-Validation, ",round(n*prop)," out at a time ( prop=",prop,")\n") } else { cat(paste("Different positive parameter than",GCV,"to penalize the MSE? (y/n)")) choose=scan(quiet=T,what=character(),n=1) if(length(choose)==0)choose="n" if((choose=="y")|(choose=="Y")) {repeat{ cat("alpha=") GCVnew<-scan(quiet=T,what=numeric(),n=1) if((GCVnew>0))break }} else GCVnew=GCV texto<-paste("GCV(",GCVnew,", . )",sep="") cat("Generalized Cross-Validation, alpha=",GCVnew,"\n") } Bcod<-NULL if(sum(Xvariables)!=0) { if(length(degree)>1)degree<-degree0[Xvariables] if(length(knots)>1)knots<-knots0[Xvariables] if(length(equiknots)>1)equiknots<-equiknots0[Xvariables] if(!missing(listknots)) {knots0<-NULL for(i in 1:ncol(X))knots0[i]<-length(listknots0[[i]]) } BsplineX <- Bsplinen(resulini$Xini[,Xvariables],ordre=degree0+1,nbni=knots0,noeudequi=equiknots0, center=T,D=D,tt=resulini$listknotsn[Xvariables]) itX <- BsplineX$v for(i in 1:ncol(X[,Xvariables,drop=F]))Bcod<-cbind(Bcod,itX[[i]][[1]]) } Bcodintcum<-NULL if(interflag){ if(sum(intervariables)!=0) { if(!missing(listknots)) {knots0<-NULL for(i in 1:ncol(X))knots0[i]<-length(listknots0[[i]]) } BsplineXint <- Bsplinen(resulini$Xini,ordre=degree0+1,nbni=knots0,noeudequi=equiknots0,center=F,D=D, tt=resulini$listknotsn) rint1<-length(intervariables) Bcodint<-list(NULL) AAA<-selectintermatrix[(1:rint1)[intervariables],1:2,drop=F] for(i in 1:nrow(AAA)) { Bcodint[[i]]<-Dcenter(interactionij(BsplineXint,AAA[i,1],AAA[i,2]),D=D)$X Bcodintcum<-cbind(Bcodintcum,Bcodint[[i]]) } } } cat("Number of dimensions to explore = ") Aexplore<-scan(quiet=T,"",numeric(),1) plscvresul<-plscv(cbind(Bcod,Bcodintcum),Y,standX=F,standY=standY,A=Aexplore,D=D,prop=prop,GCV=GCVnew,impres=impres) plscvresul<-plscv.plot(plscvresul,bgpar=bgpar) A<-plscvresul$A } #****************************************finreponse2 if(reponse==3) { if(!missing(Xtest)) { #predicteurs if(sum(Xvariables)!=0) { cat("selected main effects\n") cat(dimnames(Xtest)[[2]][Xvariables],"\n") } else cat("no main effect\n") if(interflag) { cat("selected interactions\n") for(i in 1:length(listinteraction)) cat(" ",paste(dimnames(Xtest)[[2]][listinteraction[[i]][1]],"*",dimnames(Xtest)[[2]][listinteraction[[i]][2]],sep="")) } else cat("no interaction\n") cat("\n") if(interflag|(sum(Xvariables)!=0)) { Bcod<-NULL Bcodtest<-NULL Bcodintcum=NULL Bcodtestintcum=NULL #------------------------------ # calcul du codage des effets principaux (apprentissage et test) if(sum(Xvariables)!=0) { preds=(1:p)[Xvariables] pX=sum(Xvariables) for(i in preds) { Bcod<-cbind(Bcod,itXini[[i]][[1]]) Bcodtest<-cbind(Bcodtest,sweep(itXtest[[i]][[1]][1:nrow(Xtest),,drop=F],2,BsplineXini$meansc[[i]])) } }#end if(sum(Xvariables)!=0) #------------------------------ #codage des interactions (apprentissage et test) if(interflag) { namesint1<-NULL dimensionint<-rep(0,length(listinteraction)) Bcodint<-list(NULL) Bcodintmean<-list(NULL) Bcodtestint<-list(NULL) for(i in 1:length(listinteraction)) { centredinteract=Dcenter(interactionij(BsplineXintini,listinteraction[[i]][1],listinteraction[[i]][2]),D=D) Bcodint[[i]]<-centredinteract$X Bcodintmean[[i]]<-centredinteract$mx dimensionint[i]<-ncol(Bcodint[[i]]) Bcodintcum<-cbind(Bcodintcum,Bcodint[[i]]) namesint1<-c(namesint1,paste(dimnames(Xini)[[2]][listinteraction[[i]][1]],"*",dimnames(Xini)[[2]][listinteraction[[i]][2]],sep="")) Bcodtestint[[i]]<-sweep(interactionij(BsplineXtest,listinteraction[[i]][1],listinteraction[[i]][2])[1:nrow(Xtest),],2,Bcodintmean[[i]]) Bcodtestintcum<-cbind(Bcodtestintcum,Bcodtestint[[i]]) } }#fin de interflag #------------------------------ #codage de l'apprentissage Bcod=cbind(Bcod,Bcodintcum) #codage du test Bcodtest=cbind(Bcodtest,Bcodtestintcum) #------------------------------ # pls linéaire sur les codages if(missing(Ytest)) resulpls<-pls(X=Bcod,Y=Yini,Xtest=Bcodtest,standX=F,standY=F,A=A,D=D,splflag=T,impres=impres, graph=F,eps=eps,cexpar=cexpar,colpar=colpar,titlepar=titlepar) else resulpls<-pls(X=Bcod,Y=Yini,Xtest=Bcodtest,Ytest=Yinitest,standX=F,standY=F,A=Aexplore,D=D,splflag=T, impres=impres,graph=F,eps=eps,cexpar=cexpar,colpar=colpar,titlepar=titlepar) #------------------------------ pY<-q axes<-resulpls$A BETALCR<-resulpls$BETALCR YH<-resulpls$YH Yhat<-matrix(0,n,q) dimnames(Yhat)<-dimnames(Yini) VY<-NULL for(i in 1:axes){ Yhat<-Yhat+YH[[i]] VY<-c(VY,sum(Dvar(YH[[i]],D=D)$var)) } if((!missing(Xtest))&(missing(Ytest))){ cat("Prediction of the test sample with",axes,"dimensions\n") dimopt<-axes AAt<-list(NULL) Transftest<-list(NULL) Transftestint=list(NULL) AAtint=list(NULL) for(j in 1:q) { dd<-1 if(sum(Xvariables)!=0) { Transftest[[j]]<-matrix(0,nrow(Xtest),pX) AAt[[j]]<-list(NULL) for(i in 1:pX) { AAt[[j]][[i]]<-as.matrix(BETALCR[[dimopt]][j,dd:(dd+dimensionini[preds[i]]-1)]) Transftest[[j]][,i]<-Bcodtest[,dd:(dd+dimensionini[preds[i]]-1)]%*%AAt[[j]][[i]] dd<-dd+dimensionini[preds[i]] } }#endif sum(Xvariables)!=0 if(interflag) { Transftestint[[j]]<-matrix(0,nrow(Xtest),length(listinteraction)) AAtint[[j]]<-list(NULL) for(i in 1:length(listinteraction)) { AAtint[[j]][[i]]<-as.matrix(BETALCR[[dimopt]][j,dd:(dd+dimensionint[i]-1)]) dd<-dd+dimensionint[i] Transftestint[[j]][,i]<-Bcodtestint[[i]]%*%AAtint[[j]][[i]] } }#endif interflag } #browser() Yaju<-matrix(0,nrow(Xtest),q) dimnames(Yaju)<-list(dimnames(Xtest)[[1]],paste("est.",dimnames(Yini)[[2]],sep="")) for(j in 1:q) { if(sum(Xvariables)!=0) { for(i in 1:pX) Yaju[,j]<-Yaju[,j]+Transftest[[j]][,i] } if(interflag) { for(i in 1:length(listinteraction)) Yaju[,j]<-Yaju[,j]+Transftestint[[j]][,i] } } for(i in 1:q){ if(standY)Yaju[,i]<-Yaju[,i]*sqrt(centY$var[i])+centY$moy[i] else Yaju[,i]<-Yaju[,i]+centY$moy[i] } print(Yaju) #browser() }#fin du if missing Xtest if((!missing(Xtest))&(!missing(Ytest))) { #repeat{#2 axes=Aexplore cat("Validation of the model with the test sample according to",axes,"dimensions\n") #{ Yerr<-as.list(1:axes) Yajust<-as.list(1:axes) ERRMOY<-vector("numeric",axes) for(dimopt in 1:axes) { Yerr[[dimopt]]<-matrix(0,nrow(Ytest),ncol(Ytest)) AAt<-list(NULL) Transftest<-list(NULL) Transftestint=list(NULL) AAtint=list(NULL) for(j in 1:q) { dd<-1 if(sum(Xvariables)!=0) { Transftest[[j]]<-matrix(0,nrow(Xtest),pX) AAt[[j]]<-list(NULL) for(i in 1:pX) { AAt[[j]][[i]]<-as.matrix(BETALCR[[dimopt]][j,dd:(dd+dimensionini[preds[i]]-1)]) Transftest[[j]][,i]<-Bcodtest[,dd:(dd+dimensionini[preds[i]]-1)]%*%AAt[[j]][[i]] dd<-dd+dimensionini[preds[i]] } }#endif sum(Xvariables)!=0 if(interflag) { Transftestint[[j]]<-matrix(0,nrow(Xtest),length(listinteraction)) AAtint[[j]]<-list(NULL) for(i in 1:length(listinteraction)) { AAtint[[j]][[i]]<-as.matrix(BETALCR[[dimopt]][j,dd:(dd+dimensionint[i]-1)]) dd<-dd+dimensionint[i] Transftestint[[j]][,i]<-Bcodtestint[[i]]%*%AAtint[[j]][[i]] } }#endif interflag } #browser() Yaju<-matrix(0,nrow(Xtest),q) dimnames(Yaju)<-list(dimnames(Xtest)[[1]],paste("est.",dimnames(Yini)[[2]],sep="")) for(j in 1:q) { if(sum(Xvariables)!=0) { for(i in 1:pX) Yaju[,j]<-Yaju[,j]+Transftest[[j]][,i] } if(interflag) { for(i in 1:length(listinteraction)) Yaju[,j]<-Yaju[,j]+Transftestint[[j]][,i] } } for(i in 1:q){ if(standY)Yaju[,i]<-Yaju[,i]*sqrt(centY$var[i])+centY$moy[i] else Yaju[,i]<-Yaju[,i]+centY$moy[i] } Yajust[[dimopt]]<-Yaju Yerr[[dimopt]]<- Ytest-Yaju ERRMOY[dimopt]<-mean(apply((Yerr[[dimopt]])^2,2,mean)) }#fin du for sur dimopt cat("Mean Squared Error of the response(s) according to the dimension\n") errmoy<-matrix(ERRMOY,1,axes) dimnames(errmoy)<-list("MSE",1:axes) print(round(errmoy,7)) par(mfrow=c(1,1)) if(titlepar) plot(errmoy[1,],xlab="Model Dim.",ylab="MSE",type="l",main=paste("Opt. Dim. ",order(errmoy[1,])[1]," , MSE(",order(errmoy[1,])[1],") = ",round(errmoy[1,order(errmoy[1,])[1]],7),sep="")) else plot(errmoy[1,],xlab="Model Dim.",ylab="MSE on test data",type="l",sep="") points(errmoy[1,],pch=10,cex=cexpar+0.7) cat("Choose the dimension (<=",axes,")") dimoptopt<-scan(quiet=T,"",numeric(),1) A=dimoptopt cat("Estimated Responses (Yest) as well as Errors (Yerr = Ytest - Yest) \n") dimnames(Yerr[[dimoptopt]])[[2]]<-paste("err.",dimnames(Ytest)[[2]],sep="") Yaff<-NULL for(i in 1:ncol(Ytest))Yaff<-cbind(Yaff,as.matrix(cbind(Yajust[[dimoptopt]][,i,drop=F],Yerr[[dimoptopt]][,i,drop=F]))) print(round(Yaff,4)) if(sum(Ytest==0)==0){ cat("Relative Errors in %: |Yerr|/|Ytest|*100\n") erreur100<-abs(Yerr[[dimoptopt]])/abs(Ytest)*100 erreur100<-rbind(erreur100,apply(erreur100,2,mean),sqrt(apply(erreur100,2,var))) dimnames(erreur100)<-list(c(dimnames(Ytest)[[1]],"Mean","Stdv"),dimnames(Ytest)[[2]]) print(round(erreur100,2)) } cat(paste("MSE(",dimoptopt,")=",round(errmoy[1,dimoptopt],4),sep=""),"\n") cat("plot of estimated versus observed Ytest\n") if(q>1){ cat("How many plots per row (<=",q,")?\n") colplots<-scan(quiet=T,"",numeric(),1) cat("How many rows?\n") rowplots<-scan(quiet=T,"",numeric(),1) par(mfrow=c(rowplots,colplots)) } else par(mfrow=c(1,1)) par(pty="s") for(i in 1:q) { if(titlepar) { if(sum(Ytest==0)==0){ if(abs(sum(Yini^2)-sum(Yinitest^2))1e-06] if(length(diago)==1)R<-matrix(1/diago,1,1) else R<-valsin$u[,1:length(diago)]%*%diag(1/diago)%*%t(valsin$u[,1:length(diago)]) #_______________________________________________________________________________ return(R) } #################################################### plssinter<-function(X,Y,Xtest,Ytest,standX=T,standY=T,D=1,A=2,degree=1,knots=0,equiknots=F, eps=1e-8,listknots,impres=T,interaction=NULL,listinteraction,Xvariables=1:ncol(X)) { # version 9.6 programmee par JF Durand # Regression PLS sur codage spline des predicteurs # order<-degree +1 nomfichX<-deparse(substitute(X)) nomfichY<-deparse(substitute(Y)) Xinitial<-as.matrix(X) Yinitial<-as.matrix(Y) centrageX <- Dvar(Xinitial, D = D, cor = F) meanX <- centrageX$mean varX <- centrageX$var centrageY <- Dvar(Yinitial, D = D, cor = F) meanY <- centrageY$mean varY <- centrageY$var if(!missing(Xtest)){ centrageXtest <- Dvar(Xtest, D = D, cor = F) meanXtest <- centrageXtest$mean varXtest <- centrageXtest$var } n<-nrow(Xinitial) p<-ncol(Xinitial) q<-ncol(Yinitial) if(!missing(listknots)) { if(length(listknots)!=p){cat("nb incorrect de vecteurs de noeuds\n") return()} knots<-vector("numeric",p) for(i in 1:p)knots[i]<-length(listknots[[i]]) } if(is.null(dimnames(Xinitial)))dimnames(Xinitial)<-list(format(1:n),paste("X",1:p,sep="")) if(is.null(dimnames(Yinitial)))dimnames(Yinitial)<-list(format(1:n),paste("Y",1:q,sep="")) if(length(dimnames(Xinitial)[[1]])==0) dimnames(Xinitial)[[1]]<- format(1:n) if(length(dimnames(Yinitial)[[1]])==0) dimnames(Yinitial)[[1]]<- format(1:n) if(length(dimnames(Xinitial)[[2]])==0) dimnames(Xinitial)[[2]]<- paste("X",1:p,sep="") if(length(dimnames(Yinitial)[[2]])==0) {cat("Affect name(s) to response(s)\n") prov<-as.vector(format(1:q)) for(i in 1:q) {cat(paste("response ",format(i),"\n")) prov[i]<-scan(quiet=T,,what="", 1) } dimnames(Yinitial)[[2]]<-prov } # centrage et reduction centX<-Dcentred(Xinitial,D=D) centY<-Dcentred(Yinitial,D=D) if(!missing(Xtest)) { Xinitest<-sweep(Xtest, 2,centX$moy) if(standX) Xinitest<- sweep(Xinitest,2,sqrt(centX$var),FUN="/") dimnames(Xinitest)<-dimnames(Xtest) } if(!missing(Ytest)) { Yinitest<-sweep(Ytest, 2,centY$moy) if(standY) Yinitest<-sweep(Yinitest,2,sqrt(centY$var),FUN="/") dimnames(Yinitest)<-dimnames(Ytest) } if(standX) Xini<-as.matrix(centX$Xcr) else Xini<-as.matrix(centX$Xc) if(standY) Yini<-as.matrix(centY$Xcr) else Yini<-as.matrix(centY$Xc) dimnames(Xini)<-dimnames(Xinitial) dimnames(Yini)<-dimnames(Yinitial) # calcul de la matrice de codage spline de Xini listknotsn<-list(NULL)#list of centered (possibly scaled) interior knots if(missing(listknots)){ listknotsn<-list(NULL) if(p>1) for(i in 1:(p-1))listknotsn<-c(listknotsn,list(NULL)) listknots<-listknotsn } else {if(p>1) for(i in 1:(p-1))listknotsn<-c(listknotsn,list(NULL)) for(i in 1:p){ if(!is.null(listknots[[i]])){ #if(centerX)listknotsn[[i]]<-listknots[[i]]-meanX[i] if(standX)listknotsn[[i]]<-(listknots[[i]]-meanX[i])/sqrt(varX[i]) else listknotsn[[i]]<-listknots[[i]]-meanX[i] } } } #cat("la bas plssinter\n") #browser() BsplineX <- Bsplinen(Xini[,Xvariables],ordre=order[Xvariables],nbni=knots[Xvariables],noeudequi=equiknots[Xvariables], center=T,D=D,tt=listknotsn[Xvariables]) itX <- BsplineX$v dimension<-BsplineX$ordre+BsplineX$nbni Bcod<-NULL if(sum(Xvariables)!=0) for(i in (1:sum(Xvariables)))Bcod<-cbind(Bcod,itX[[i]][[1]]) Bcodintcum<-NULL if((missing(listinteraction))&(length(interaction)!=0)) { BsplineXint <- Bsplinen(Xini,ordre=order,nbni=knots,noeudequi=equiknots, center=F,D=D,tt=listknotsn) rint<-length(interaction) Bcodint<-list(NULL) Bcodintmean<-list(NULL) dimensionint<-matrix(0,rint,rint) namesint<-matrix("",rint,rint) namesint1<-NULL for(i in 1:(rint-1)) { Bcodint[[i]]<-list(NULL) for(j in (i+1):rint) { Bcodint[[i]][[j]]<-Dcenter(interactionij(BsplineXint,interaction[i],interaction[j]),D=D)$X Bcodintmean[[i]][[j]]<-Dcenter(interactionij(BsplineXint,interaction[i],interaction[j]),D=D)$mx Bcodintcum<-cbind(Bcodintcum,Bcodint[[i]][[j]]) namesint[i,j]<-paste(dimnames(Xini)[[2]][interaction[i]],"*",dimnames(Xini)[[2]][interaction[j]],sep="") namesint1<-c(namesint1,namesint[i,j]) dimensionint[i,j]<-dimension[interaction[i]]*dimension[interaction[j]] } }} if((!missing(listinteraction))&(length(listinteraction)!=0)) { namesint1<-NULL dimensionint<-rep(0,length(listinteraction)) BsplineXint <- Bsplinen(Xini,ordre=order,nbni=knots,noeudequi=equiknots, center=F,D=D,tt=listknotsn) Bcodint<-list(NULL) Bcodintmean<-list(NULL) for(i in 1:length(listinteraction)) { Bcodint[[i]]<-Dcenter(interactionij(BsplineXint,listinteraction[[i]][1],listinteraction[[i]][2]),D=D)$X dimensionint[i]<-ncol(Bcodint[[i]]) Bcodintcum<-cbind(Bcodintcum,Bcodint[[i]]) namesint1<-c(namesint1,paste(dimnames(Xini)[[2]][listinteraction[[i]][1]],"*",dimnames(Xini)[[2]][listinteraction[[i]][2]],sep="")) Bcodintmean[[i]]<-Dcenter(interactionij(BsplineXint,listinteraction[[i]][1],listinteraction[[i]][2]),D=D)$mx } } if(!missing(Xtest)){ BsplineXtest <- Bsplinen(rbind(Xinitest[,Xvariables],apply(Xini[,Xvariables],2,min),apply(Xini[,Xvariables],2,max)),ordre=order[Xvariables],nbni=knots[Xvariables],center=F,D=D,tt=BsplineX$intknots[Xvariables]) itXtest <- BsplineXtest$v Bcodtest<-NULL for(i in (1:length(Xvariables)))Bcodtest<-cbind(Bcodtest,sweep(itXtest[[i]][[1]][1:nrow(Xtest),,drop=F],2,BsplineX$meansc[[i]])) #add interactions if any!!!! } if(impres){ cat("\n") if((length(interaction)==0)& (missing(listinteraction))) cat("PLSS : PLS LINEAIRE SUR LE CODAGE SPLINE DES PREDICTEURS\n") else cat("PLSS : PLS LINEAIRE SUR LE CODAGE SPLINE DES PREDICTEURS + INTERACTIONS\n") cat("\n") } ######################### if(missing(Xtest)) { resulpls<-pls(X=cbind(Bcod,Bcodintcum),Y=Yini,standX=F,standY=F,A=A,D=D,splflag=T,impres=impres,graph=F,eps=eps) } else {if(missing(Ytest)) resulpls<-pls(X=Bcod,Y=Yini,Xtest=Bcodtest,standX=F,standY=F,A=A,D=D,splflag=T,impres=impres,graph=F,eps=eps) else resulpls<-pls(X=Bcod,Y=Yini,Xtest=Bcodtest,Ytest=Yinitest,standX=F,standY=F,A=A,D=D,splflag=T,impres=impres,graph=F,eps=eps) } ######################### if(impres)cat("\n") alph<-resulpls$alph R2<-resulpls$R2c pX<-p pY<-q noeudint<-list(NULL) axes<-resulpls$A corX<-standX corY<-standY covXY<-resulpls$covXY alphaX<-NULL compX<-resulpls$TX compY<-resulpls$UY cX<-resulpls$WX cY<-resulpls$CY BETALCR<-resulpls$BETALCR YH<-resulpls$YH Yhat<-matrix(0,n,pY) dimnames(Yhat)<-dimnames(Yini) VY<-NULL for(i in 1:axes){ Yhat<-Yhat+YH[[i]] VY<-c(VY,sum(Dvar(YH[[i]],D=D)$var)) } AA<-list(NULL) ranget<-list(NULL) Transf<-list(NULL) if((length(interaction)!=0)| !missing(listinteraction)) { rangetint<-list(NULL) Transfint<-list(NULL) AAint<-list(NULL) } for(j in 1:pY) { Transf[[j]]<-matrix(0,n,pX) if(sum(Xvariables)!=0) { ranget[[j]]<-rep(0,sum(Xvariables)) names(ranget[[j]])<-dimnames(Xini)[[2]][Xvariables] } if((length(interaction)!=0)& missing(listinteraction)){ Transfint[[j]]<-array(0,c(n,rint,rint)) AAint[[j]]<-list(NULL) rangetint[[j]]<-matrix(0,rint,rint) } if(!missing(listinteraction)){ Transfint[[j]]<-matrix(0,n,length(listinteraction)) AAint[[j]]<-list(NULL) rangetint[[j]]<-rep(0,length(listinteraction)) } dd<-1 if(sum(Xvariables)!=0) { AA[[j]]<-list(NULL) compteur=NULL #browser() for(i in 1:sum(Xvariables)) { AA[[j]][[i]]<-as.matrix(BETALCR[[axes]][j,dd:(dd+dimension[i]-1)]) dd<-dd+dimension[i] Transf[[j]][,i]<-BsplineX$v[[i]][[1]]%*%AA[[j]][[i]] # browser() compteur<-c(compteur,diff(range(Transf[[j]][,i]))) } ranget[[j]]=compteur } if((length(interaction)!=0)& missing(listinteraction)) { for(i in 1:(rint-1)) { AAint[[j]][[i]]<-list(NULL) for(k in (i+1):rint){ AAint[[j]][[i]][[k]]<-as.matrix(BETALCR[[axes]][j,dd:(dd+dimensionint[i,k]-1)]) dd<-dd+dimensionint[i,k] Transfint[[j]][,i,k]<-Bcodint[[i]][[k]]%*%AAint[[j]][[i]][[k]] rangetint[[j]][i,k]<-diff(range(Transfint[[j]][,i,k])) dimnames(rangetint[[j]])<-list(dimnames(Xini)[[2]][interaction],dimnames(Xini)[[2]][interaction]) }} #browser() }#endif if(!missing(listinteraction)) { for(i in 1:length(listinteraction)) { #browser() AAint[[j]][[i]]<-as.matrix(BETALCR[[axes]][j,dd:(dd+dimensionint[i]-1)]) dd<-dd+dimensionint[i] Transfint[[j]][,i]<-Bcodint[[i]]%*%AAint[[j]][[i]] rangetint[[j]][i]<-diff(range(Transfint[[j]][,i])) }#endfor names(rangetint[[j]])<-namesint1 #browser() }#endif }#endjloop #browser() if(impres){ if((!missing(Xtest))&(missing(Ytest))){ repeat{ cat("\n") cat("Prediction pour l'echantillon test par le modele spline additif suivant la dimension ? (o/n)") repo<-scan(quiet=T,"",character(),1) if((length(repo)==0)|(repo!="o"))break else { cat("Choisissez la dimension du modele (<=",axes,")") dimopt<-scan(quiet=T,"",numeric(),1) AAt<-list(NULL) Transftest<-list(NULL) for(j in 1:pY) { Transftest[[j]]<-matrix(0,nrow(Xtest),pX) dd<-1 AAt[[j]]<-list(NULL) for(i in 1:pX) { AAt[[j]][[i]]<-as.matrix(BETALCR[[dimopt]][j,dd:(dd+dimension[i]-1)]) dd<-dd+dimension[i] Transftest[[j]][,i]<-BsplineXtest$v[[i]][[1]][1:nrow(Xtest),,drop=F]%*%AAt[[j]][[i]] } } Yaju<-matrix(0,nrow(Ytest),q) dimnames(Yaju)<-list(dimnames(Ytest)[[1]],paste("est.",dimnames(Ytest)[[2]],sep="")) for(j in 1:pY) for(i in 1:pX) Yaju[,j]<-Yaju[,j]+Transftest[[j]][,i] for(i in 1:pY){ if(standY)Yaju[,i]<-Yaju[,i]*sqrt(centY$var[i])+centY$moy[i] else Yaju[,i]<-Yaju[,i]+centY$moy[i] } print(Yaju) #browser() } }##fin du repeat }#fin du if missing Xtest if((!missing(Xtest))&(!missing(Ytest))) { repeat{#2 cat("\n") cat("Validation par l'echantillon test suivant la dimension ? (o/n)") repo<-scan(quiet=T,"",character(),1) if((length(repo)==0)|(repo!="o"))break else { Yerr<-as.list(1:axes) Yajust<-as.list(1:axes) ERRMOY<-vector("numeric",axes) for(dimopt in 1:axes) { Yerr[[dimopt]]<-matrix(0,nrow(Ytest),ncol(Ytest)) AAt<-list(NULL) Transftest<-list(NULL) for(j in 1:pY) { Transftest[[j]]<-matrix(0,nrow(Xtest),pX) dd<-1 AAt[[j]]<-list(NULL) for(i in 1:pX) { AAt[[j]][[i]]<-as.matrix(BETALCR[[dimopt]][j,dd:(dd+dimension[i]-1)]) dd<-dd+dimension[i] Transftest[[j]][,i]<-BsplineXtest$v[[i]][[1]][1:nrow(Xtest),,drop=F]%*%AAt[[j]][[i]] } } Yaju<-matrix(0,nrow(Ytest),q) dimnames(Yaju)<-list(dimnames(Ytest)[[1]],paste("est.",dimnames(Ytest)[[2]],sep="")) for(j in 1:pY) for(i in 1:pX) Yaju[,j]<-Yaju[,j]+Transftest[[j]][,i] for(i in 1:pY){ if(standY)Yaju[,i]<-Yaju[,i]*sqrt(centY$var[i])+centY$moy[i] else Yaju[,i]<-Yaju[,i]+centY$moy[i] } Yajust[[dimopt]]<-Yaju Yerr[[dimopt]]<- Ytest-Yaju ERRMOY[dimopt]<-mean(sqrt(apply(Yerr[[dimopt]]^2,2,mean))) }#fin du for if(ncol(Ytest)==1)cat("Erreur moyenne de la reponse suivant la dimension\n") else cat("Moyenne des erreurs suivant la dimension\n") errmoy<-matrix(ERRMOY,1,axes) dimnames(errmoy)<-list("MSE",1:axes) print(round(errmoy,3)) cat("Choisissez la dimension du modele (<=",axes,")") dimoptopt<-scan(quiet=T,"",numeric(),1) cat("Reponses estimees et erreurs de prediction Yerr = Ytest - Yest \n") dimnames(Yerr[[dimoptopt]])[[2]]<-paste("err.",dimnames(Ytest)[[2]],sep="") Yaff<-NULL for(i in 1:ncol(Ytest))Yaff<-cbind(Yaff,cbind(Yajust[[dimoptopt]][,i,drop=F],Yerr[[dimoptopt]][,i,drop=F])) print(round(Yaff,4)) } }#fin du repeat2 }#fin du if((!missing(Xtest))&(!missing(Ytest))) }#fin de impres if(length(knots)==1)knots<-rep(knots,pX) if((!missing(Xtest))&(!missing(Ytest))) invisible(return(list(nomfichX=nomfichX, axes=axes, Xinitial=Xinitial, meanX=meanX, varX=varX, corX=corX,Xini=Xini,covXY=covXY, cX=cX,compX=compX,BsplineX=BsplineX,Bcod=Bcod,Transf=Transf,AA=AA,ranget=ranget,D=D,nomfichY=nomfichY,Yinitial=Yinitial, dimension=dimension,alph=alph, meanY=meanY,varY=varY,VY=VY,corY=corY,Yini=Yini,cY=cY,compY=compY,Yhat=Yhat,BETALCR=BETALCR,YH=YH,listknots=listknots,listknotsn=listknotsn,R2=R2,knots=knots,Yerr=Yerr,Xvariables=Xvariables))) else { if((length(interaction)==0)& missing(listinteraction)) invisible(return(list(nomfichX=nomfichX, axes=axes, Xinitial=Xinitial, meanX=meanX, varX=varX, corX=corX,Xini=Xini,covXY=covXY, cX=cX,compX=compX,BsplineX=BsplineX,Bcod=Bcod,Transf=Transf,AA=AA,ranget=ranget,D=D,nomfichY=nomfichY,Yinitial=Yinitial, dimension=dimension,alph=alph, meanY=meanY,varY=varY,VY=VY,corY=corY,Yini=Yini,cY=cY,compY=compY,Yhat=Yhat,BETALCR=BETALCR,YH=YH,listknots=listknots,listknotsn=listknotsn,R2=R2,knots=knots,interaction=interaction,Xvariables=Xvariables))) else { if(missing(listinteraction)) invisible(return(list(nomfichX=nomfichX, axes=axes, Xinitial=Xinitial, meanX=meanX, varX=varX, corX=corX,Xini=Xini,covXY=covXY, cX=cX,compX=compX,BsplineX=BsplineX,Bcod=Bcod,Transf=Transf,AA=AA,ranget=ranget,D=D,nomfichY=nomfichY,Yinitial=Yinitial, dimension=dimension,alph=alph, meanY=meanY,varY=varY,VY=VY,corY=corY,Yini=Yini,cY=cY,compY=compY,Yhat=Yhat,BETALCR=BETALCR,YH=YH,listknots=listknots,listknotsn=listknotsn,R2=R2,knots=knots,interaction=interaction,Bcodint=Bcodint,dimensionint=dimensionint,namesint=namesint, namesint1=namesint1,Transfint=Transfint,rangetint=rangetint,AAint=AAint,Bcodintmean=Bcodintmean,Xvariables=Xvariables))) else invisible(return(list(nomfichX=nomfichX, axes=axes, Xinitial=Xinitial, meanX=meanX, varX=varX, corX=corX,Xini=Xini,covXY=covXY, cX=cX,compX=compX,BsplineX=BsplineX,Bcod=Bcod,Transf=Transf,AA=AA,ranget=ranget,D=D,nomfichY=nomfichY,Yinitial=Yinitial, dimension=dimension,alph=alph, meanY=meanY,varY=varY,VY=VY,corY=corY,Yini=Yini,cY=cY,compY=compY,Yhat=Yhat,BETALCR=BETALCR,YH=YH,listknots=listknots,listknotsn=listknotsn,R2=R2,knots=knots,Bcodint=Bcodint,dimensionint=dimensionint, namesint1=namesint1,Transfint=Transfint,rangetint=rangetint,AAint=AAint,Bcodintmean=Bcodintmean,listinteraction=listinteraction,Xvariables=Xvariables))) } } } #################################################### MVcut<-function(X,qual,nbmod=3,equibreaks=F,breaks,labelsquant,labelsqual,graph=F,cexpar=1,labpar=c(5,7,7)) { # "M"ulti "V"ariables "cut"ing of quantitative variables # codage disjonctif complet de variables quantitatives et qualitatives placées en dernier #ENTREES # X matrice de variables quantitatives + d'éventuelles variables qualitatives (booleennes ou entières) #qual, entier numero de la première colonne qualitative (les autres suivent) #nbmod vecteur entiers, nombre de modalités de chaque variable quantitative #equibreaks vecteur booleen, si T séparateurs internes equidistants si F aux nbmod quantiles #breaks liste, liste des vecteurs de separateurs interieurs a chaque variable quantitative #labelsquant liste des noms des modalités des variables quantitatives #labelsqual liste des noms des modalités des variables qualitatives # labpar defaults to c(5,7,7), that is c(x,y,len) where x and y give the (approximate) number of tickmarks on the x and y axes # graph, T or F, if T, one can choose graphically the nb of levels and the location of break points when breaks is missing #SORTIES #X dataframe, tableau d'entiers des modalites #U matrice du codage disjonctif complet #nbmod vecteur du nb de modalités #breaks liste des vecteurs des séparateurs intérieurs + le max de la variable quantitavive #labels liste des noms des modalités des variables #EXEMPLE # # stateafcm<-MVcut(cbind(state.x77,Region=codes(state.region)),qual=ncol(state.x77)+1,labelsqual=sort(levels(state.region))) # ou bien # stateafcm<-MVcut(cbind(state.x77,Region=as.numeric(state.region)),qual=ncol(state.x77)+1,labelsqual=levels(state.region)) # afc(stateafcm,AFCM=T) # pour l'afcm du tableau des var quantitatives + la var qualitative des 4 modalités région # # par(pty="m",lab=labpar) X<-as.matrix(X) n<-nrow(X) pX<-ncol(X) if(!missing(qual)){ if(qual>1){ Xquant<-X[,1:(qual-1),drop=F] dimnames(Xquant)[[2]]=dimnames(X)[[2]][1:(qual-1)] } else { Xquant<-NULL U<-NULL } Xqual<-X[,qual:pX,drop=F] } else { Xquant<-X Xqual<-NULL } if(is.matrix(Xquant)) {# p<-ncol(Xquant) if(length(nbmod)==1) nbmod<-rep(nbmod,p) if(length(equibreaks)==1)equibreaks<-rep(equibreaks,p) if(missing(breaks)) { breaks<-rep(list(NULL),p) for(i in 1:p) { if(equibreaks[i]) {etend<-range(Xquant[,i]) hh<-diff(etend)/nbmod[i] for(j in 1:(nbmod[i])) breaks[[i]][j]<-etend[1]+j*hh } else { breaks[[i]]<-quantile(Xquant[,i],probs=(1:nbmod[i])/nbmod[i]) } endbreaks=breaks[[i]][nbmod[i]] if(graph) { titre="breaks : " for(j in 1:(nbmod[i]-1)) titre=c(titre,as.character(round(breaks[[i]][j],2))) titi=cut(Xquant[,i],breaks=c(min(Xquant[,i])-1,breaks[[i]]),labels=F) plot(Xquant[,i],ylab=dimnames(X)[[2]][i],type="n") text(Xquant[,i],dimnames(Xquant)[[1]],cex=cexpar,col=titi+1) abline(h=breaks[[i]][1:(nbmod[i]-1)],lty=6,lwd=2) title(main=titre) repeat{ cat(paste(dimnames(X)[[2]][i]," :\n")) cat(paste("nunber of levels : ",nbmod[i],"\n")) cat("Change or not that number ? (y/n)") repo1<-scan(quiet=T,what=character(),n=1) if(length(repo1)==0) repo1="n" if((repo1=="y")|(repo1=="Y")) {#y1 cat("nb of levels for ",dimnames(X)[[2]][i], " : ") nbmod[i]=scan(quiet=T,what=numeric(),n=1) cat("\n") }#y1 end if(repo1=="n") {cat("Try new",nbmod[i]-1 ,"break points for ",dimnames(X)[[2]][i],"(y,Y/n,N)?") repo<-scan(quiet=T,what=character(),n=1) } else { if(nbmod[i]==2) cat("Try new",nbmod[i]-1 ,"break point for ",dimnames(X)[[2]][i]) else cat("Try new",nbmod[i]-1 ,"break points for ",dimnames(X)[[2]][i],"\n") repo="y" } if(length(repo)==0)repo="n" if((repo=="y")|(repo=="Y")) {#y # repeat{ if(nbmod[i]>2)cat("the breaks :\n") toto<-scan(quiet=T,"", numeric(),n=nbmod[i]) #cat("OK (y,Y/n,N) for ",toto," ") #repon<-scan(quiet=T,what=character(),n=1) #if(length(repon)==0)repon<-"y" #if((repon=="y")|(repon=="Y")) break #if(length(toto)==nbmod[i])break # } breaks[[i]]<-c(toto,endbreaks) titre="breaks : " for(j in 1:(nbmod[i]-1)) titre=c(titre,as.character(round(breaks[[i]][j],2))) titi=cut(Xquant[,i],breaks=c(min(Xquant[,i])-1,breaks[[i]]),labels=F) plot(Xquant[,i],ylab=dimnames(X)[[2]][i],type="n") text(Xquant[,i],dimnames(Xquant)[[1]],cex=cexpar,col=titi+1) abline(h=breaks[[i]][1:(nbmod[i]-1)],lty=4,lwd=2) title(titre) #browser() }#end y if(repo=="n") { cat("------------------------------\n") break } }#end repeat }#end graph }# end for }#end missing breaks else {nbmod<-rep(0,p) for(i in 1:p) {breaks[[i]]<-c(breaks[[i]],max(Xquant[,i])) nbmod[i]<-length(breaks[[i]]) } } #browser() if(missing(labelsquant)){ labelsquant<-rep(list(NULL),p) for(i in 1:p) labelsquant[[i]]<-paste(dimnames(Xquant)[[2]][i],format(1:nbmod[i]),sep="") } mat<-Xquant for(i in 1:p) mat[,i]<-cut(Xquant[,i],breaks=c(min(Xquant[,i])-1,breaks[[i]]),labels=F) codage<-Codisjc(mat) X<-mat U<-codage$U }# if(is.matrix(Xqual)) {## cat("variables Entières ou Codage disjonctif complet?(E ou C)\n") pltc <- scan(quiet=T,"", character(), 1) if((pltc=="E")|(pltc=="e")) {codage1<-Codisjc(Xqual) V<-codage1$U if(missing(labelsqual)) labelsqual<-codage1$labels else dimnames(V)[[2]]<-labelsqual U<-cbind(U,V) nbmodqual<-codage1$nbmod } else { pqual<-sum(Xqual[1,]) if(pqual==1)cat("rentrez le nombre de modalités de cette variable\n") else cat("Rentrez le nombre de modalités pour chacune des",pqual,"variables\n") nbmodqual<-scan(quiet=T,"",numeric(),pqual) U<-cbind(U,Xqual) if(missing(labelsqual)) labelsqual<-dimnames(Xqual)[[2]] } }## #B<-t(U)%*%U if((is.matrix(Xquant))&(is.matrix(Xqual))) { if((pltc=="E")|(pltc=="e")) X<-cbind(mat,Xqual) labels<-c(labelsquant,labelsqual) nbmod<-c(nbmod,nbmodqual) } if((!is.matrix(Xquant))&(is.matrix(Xqual))) { labels<-labelsqual nbmod<-nbmodqual } if((is.matrix(Xquant))&(!is.matrix(Xqual))) labels<-labelsquant X<-as.data.frame(X) if(is.matrix(Xquant)) { for(i in 1:dim(Xquant)[2]) for(j in 1:nbmod[i]) { if(sum(U[,j])==0) { cat("the variable ",dimnames(Xquant)[[2]][i]," is of ",j,"th empty level!\n") return() } } return(list(X=X,U=U,nbmod=nbmod,labels=labels,breaks=breaks)) } else return(list(U=U,nbmod=nbmod,labels=labels)) } #################################################### f<-function(signal="",n=100,p=1,stdv=1,seedpar=20) { # FUNCTION of X to generate (X,Y) training data bases for multivariate responses # INPUTS: # signal, vector of character strings of the signal as a function of X # examples : signal="10*sin(pi*X[,1]*X[,2])+20*(X[,3]-0.5)^2+10*X[,4]+5*X[,5]" # signal=c("10*sin(pi*X[,1]*X[,2])","15*exp(-(X[,4]+2*X[,5]))") # n, number of observations # p, number of predictors # stdv, vector of standard deviations of the normal errors from the signal # seedpar, positive integer, the seed of the random numbers # OUTPUTS # X, matrix nxp of the sample predictors (uniform on [0,1]) # Y, matrix nxlength(signal) of the responses Y_i = f_i(X) +eps_i # Ysignal, matrix nxlength(signal) of the responses Y_i = f_i(X) #example : # f2<-"10*sin(pi*X[,1]*X[,2])+20*(X[,3]-0.5)^2+10*X[,4]+5*X[,5]" # MAPLSS(f(f2,100,10)$X,f(f1,100,10)$Y,degree=c(1,1,2,1,1,1),listknots=list(0.5,0.5,0.5,NULL,NULL)) set.seed(seedpar) X<-matrix(runif(p*n,0,1),n,p) dimnames(X)<-list(1:n,paste("X",1:p,sep="")) Y<-matrix(0,n,length(signal)) Ysignal=matrix(0,n,length(signal)) if(length(signal)!=length(stdv))stdv=rep(stdv[1],length(signal)) for(i in 1:length(signal)) { eps<-rnorm(n,sd=stdv[i]) Y[,i]<-eval(parse("",text=signal[i])) if(is.null(Y[,i]))Y[,i]<-0 Ysignal[,i]=Y[,i] Y[,i]<-Y[,i]+eps } if(length(signal)>1) { dimnames(Y)<-list(1:n,paste("Y",1:length(signal),sep="")) dimnames(Ysignal)<-list(1:n,paste("signal",1:length(signal),sep="")) } else { dimnames(Y)<-list(1:n,"Y") dimnames(Ysignal)<-list(1:n,"signal") } return(list(X=X,Y=Y,Ysignal=Ysignal)) } ################################################################### simulMAPLSS<-function(p,N,Ntest=N,signal,sigmaN=1,nbiter=1,seedpargap=nbiter,GCV=2,standX=T,standY=T,D=1,A=8, degree=1,knots=0,equiknots=F,listknots,prop=0.1,PRESSprop=0.2,interaction=1:p) { # the function that computes MSE's with MAPLSS on simulated training-test datasets created by the function f() # INPUTS # p, nb of predictors # N, nb of the training samples # Ntest, nb of test samples, default=N # nbiter, integer, the number of simulations, default 1 # seedpargap, integer, default=nbiter, the difference between both random seeds for generating through f() # the training X-data and the testing X data # (at the ith simulation, seed for training = i, seed for testing = i + seedpargap) # signal, character string, text of the signal as a function of X[,1],.....,X[,p] # sigmaN, real positive, default 1, the stdev of the additive normal error # PRESSprop, real positive in [0,1], default 0.2, the PRESS/GCV threshold for rejecting # an interaction candidate # GCV, real positive, default=2, the tuning GCV parameter in [0,4[, if 0, natural CV is processed. # prop, real positive in [0,1], the proportion of out samples in CV (active when GCV=0). # A, integer, default 8, the max number of dimensions to explore # degree, knots, equiknots, listknots, the spline parameters of the PLSS function # OUTPUTS # simul, a list of nbiter detailed results of the simulations # Aexplore, integer equal to A # trainingsamples, equal to N # testsamples, equal to Ntest # seedpargap # NBsimul, equal to nbiter # threshold, equal to PRESSprop*100 # durationHMS, vector of the duration c(Hour,Minutes,Seconds) # MSE, vector of nbiter optimal (according to dimension) MSE values # interactionflag, boolean vector of nbiter T/F detected interactions (not necessarily the good ones) # have a look at simul for the detected interactions. # EXAMPLE #> f1="10*sin(pi*X[,1]*X[,2])+20*(X[,3]-0.5)^2+10*X[,4]+5*X[,5]" #>try1=simulMAPLSS(10,100,signal=f1,nbiter=100,degree=c(1,1,2,1,1,1,1,1,1,1),listknots=list(0.5,0.5,0.5,NULL,NULL,NULL,NULL,NULL,NULL,NULL)) #> try1$durationHMS # H M S # 0 45 50 #> summary(try1$MSE) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 0.2563 0.3982 0.4614 0.5015 0.5541 1.0560 Time=c("debut","fin") Time[1]=date() library(splines) options(warn=-1) step<-list(NULL) resul<-list(NULL) for(i in 1:nbiter) { datasets=f(signal=signal,n=N,p=p,stdv=sigmaN,seedpar=i) matX=datasets$X y=datasets$Y ysignal=datasets$Ysignal #generation of test sample data datasetstest=f(signal=signal,n=Ntest,p=p,stdv=sigmaN,seedpar=i+seedpargap) matXtest=datasetstest$X ysignaltest=datasetstest$Ysignal if(missing(listknots)) { step[[i]]<-maplss2(matX,y,Xtest=matXtest,Ytest=ysignaltest,standX=standX,standY=standY,GCV=GCV,D=D,A=A, degree=degree,knots=knots,equiknots=equiknots,eps=eps,interaction=interaction, prop=prop,colpar=colpar,typedata=typedata, titlepar=titlepar,pchpar=pchpar,cexpar=cexpar,nbpoints=nbpoints,ptypar=ptypar, askpar=askpar,PRESSprop=PRESSprop) } else {step[[i]]<-maplss2(matX,y,Xtest=matXtest,Ytest=ysignaltest,standX=standX,standY=standY,GCV=GCV,D=D,A=A, degree=degree,knots=knots,equiknots=equiknots,listknots=listknots,eps=eps,interaction=interaction, prop=prop,colpar=colpar,typedata=typedata, titlepar=titlepar,pchpar=pchpar,cexpar=cexpar,nbpoints=nbpoints,ptypar=ptypar, askpar=askpar,PRESSprop=PRESSprop) } resul[[i]]<-step[[i]] } Time[2]=date() names(Time)=c("start","stop") Time1=Time[1] Time2=Time[2] T1J=substr(Time1,09,10) T2J=substr(Time2,09,10) mode(T2J)="real" mode(T1J)="real" T1H=substr(Time1,12,13) T2H=substr(Time2,12,13) mode(T2H)="real" mode(T1H)="real" T2H=T2H+24*(T2J-T1J) T1H=T1H*3600 T2H=T2H*3600 T1M=substr(Time1,15,16) T2M=substr(Time2,15,16) mode(T2M)="real" mode(T1M)="real" T1H=T1H+T1M*60 T2H=T2H+T2M*60 T1S=substr(Time1,18,19) T2S=substr(Time2,18,19) mode(T2S)="real" mode(T1S)="real" T1H=T1H+T1S T2H=T2H+T2S Lasting=T2H-T1H LastingH=Lasting%/%3600 LastingM=(Lasting-LastingH*3600)%/%60 LastingS=Lasting%%60 cat("Duration =",LastingH,"h",LastingM,"mn",LastingS,"s ~~~~~~",Lasting/nbiter,"seconds/simulation\n") durationHMS=c(LastingH,LastingM,LastingS) names(durationHMS)=c("H","M","S") #browser() MSE=NULL interactionflag=NULL for(i in 1:nbiter) { MSE=c(MSE,resul[[i]]$MSE[resul[[i]]$Aopt]) interactionflag=c(interactionflag,resul[[i]]$interactionflag[1]) } cat("summary of the MSE's for",nbiter,"simulations\n") print(summary(MSE)) return(list(simul=resul,Aexplore=A,trainingsamples=N,seedpargap=seedpargap,testsamples=Ntest,NBsimul=nbiter,threshold=PRESSprop*100,GCV=GCV,durationHMS=durationHMS,MSE=MSE,interactionflag=interactionflag)) } #################################################### maplss2<-function(X,Y,Xtest,Ytest,standX=T,standY=T,D=1,A=2,degree=1,knots=0,equiknots=F,eps=1e-8,listknots, interaction=1:ncol(X),GCV=2,prop=0.1,colpar=1,typedata=T,titlepar=T,pchpar=1,cexpar=0.7, nbpoints=50,ptypar="s",askpar=F,PRESSprop=0.2,thetapar=-60,phipar=30,rpar=10,impres=F,bgpar="lightblue") { # Bibliography : # # J. F. Durand. "Local Polynomial Additive Regression through PLS and Splines: PLSS", # Chemometrics and Intelligent Laboratory Systems 58, 235-246, 2001. # # J. F. Durand and R. Lombardo. "Interactions terms in nonlinear PLS via additive spline # transformations". «Between Data Science and Applied Data Analysis», Studies in Classification, # Data Analysis, and Knowledge Organization. Eds M.Schader, W. Gaul and M. Vichi, Springer, 22-29, 2003 # interflag<-F p<-ncol(X) q<-ncol(Y) n<-nrow(X) ordre<-degree +1 resulini<-NULL if(GCV==0) texto<-paste("PRESS(",prop,", . )",sep="") else texto<-paste("GCV(",GCV,", . )",sep="") X0<-as.matrix(X) Y<-as.matrix(Y) Xinitial=X0 Yinitial=Y centrageX <- Dvar(Xinitial, D = D, cor = F) meanX <- centrageX$mean varX <- centrageX$var centrageY <- Dvar(Yinitial, D = D, cor = F) meanY <- centrageY$mean varY <- centrageY$var # centrage et reduction des matrices tests par rapport aux données d'aprentissage centX<-Dcentred(Xinitial,D=D) centY<-Dcentred(Yinitial,D=D) if(!missing(Ytest)) { Yinitest<-sweep(Ytest, 2,centY$moy) if(standY) Yinitest<-sweep(Yinitest,2,sqrt(centY$var),FUN="/") dimnames(Yinitest)<-dimnames(Ytest) } if(standX) Xini<-as.matrix(centX$Xcr) else Xini<-as.matrix(centX$Xc) if(standY) Yini<-as.matrix(centY$Xcr) else Yini<-as.matrix(centY$Xc) dimnames(Xini)<-dimnames(Xinitial) dimnames(Yini)<-dimnames(Yinitial) Xvariables<-rep(T,ncol(X)) interaction01<-rep(F,p) interaction01[interaction]<-T if(!missing(listknots)) { if(length(listknots)!=p){cat("incorrect number of knot vectors\n") return()} knots<-vector("numeric",p) for(i in 1:p)knots[i]<-length(listknots[[i]]) } listknotsn<-list(NULL)#list of centered (possibly scaled) interior knots if(missing(listknots)){ listknotsn<-list(NULL) if(p>1) for(i in 1:(p-1))listknotsn<-c(listknotsn,list(NULL)) listknots<-listknotsn } else {if(p>1) for(i in 1:(p-1))listknotsn<-c(listknotsn,list(NULL)) for(i in 1:p){ if(!is.null(listknots[[i]])){ #if(centerX)listknotsn[[i]]<-listknots[[i]]-meanX[i] if(standX)listknotsn[[i]]<-(listknots[[i]]-meanX[i])/sqrt(varX[i]) else listknotsn[[i]]<-listknots[[i]]-meanX[i] } } } order=degree+1 BsplineXini <- Bsplinen(Xini,ordre=order,nbni=knots,noeudequi=equiknots, center=T,D=D,tt=listknotsn) BsplineXintini <- Bsplinen(Xini,ordre=order,nbni=knots,noeudequi=equiknots, center=F,D=D,tt=listknotsn) itXini <- BsplineXini$v dimensionini<-BsplineXini$ordre+BsplineXini$nbni if(!missing(Xtest)) { predictexto=", External prediction" if(is.null(dimnames(Xtest))){ if(length(Xtest)==dim(X)[2])Xtest0<-matrix(Xtest,1,length(Xtest)) dimnames(Xtest0)<-list(paste("x",1:(length(Xtest)/length(X)),sep=""),dimnames(X)[[2]]) } else Xtest0<-as.matrix(Xtest) } else predictexto="" if(!missing(Xtest)) { Xinitest<-sweep(Xtest0, 2,centX$moy) if(standX) Xinitest<- sweep(Xinitest,2,sqrt(centX$var),FUN="/") dimnames(Xinitest)<-dimnames(Xtest0) BsplineXtest <- Bsplinen(rbind(Xinitest,apply(Xini,2,min),apply(Xini,2,max)),ordre=order, nbni=knots,center=F,D=D,tt=listknotsn) itXtest <- BsplineXtest$v } #repeat #{ #cat("==========================================================\n") #reponse<-menu(c(paste(texto,"for pure main effects models (mandatory)"),"Automatic selection of interactions",paste("Validate the dimension, Look at the model",predictexto,sep=""), "Prune main effects and interactions"),title="Multivariate Additive Partial Least-Squares Splines (0 to exit)") #cat("==========================================================\n") #if(reponse==0)break #if((reponse==1)&(!interflag)) #{ #cat("*************************************************\n") #cat("1:",texto," for main effects models (mandatory)\n") #cat("*************************************************\n") if(length(degree)>1)degree0<-degree[Xvariables] else degree0<-degree if(length(knots)>1)knots0<-knots[Xvariables] else knots0<-knots if(length(equiknots)>1)equiknots0<-equiknots[Xvariables] else equiknots0<-equiknots if(!missing(listknots)) listknots0<-listknots[Xvariables] ordre0<-degree0+1 #cat("Number of dimensions to explore?\n") #Aexplore<-scan(quiet=T,"",numeric(),1) Aexplore=A i<-1 exp0=1 crossvalmem=NULL if(GCV==0)Count=prop else Count=GCV #repeat #{ if(missing(listknots)) { resul<-plsscv(X[,Xvariables],Y,standX=standX,standY=standY,D=D,A=Aexplore,degree=degree0,knots=knots0 ,equiknots=equiknots0,interaction=NULL,GCV=GCV,prop=prop,impres=F) } else { resul<-plsscv(X[,Xvariables],Y,standX=standX,standY=standY,D=D,A=Aexplore,degree=degree0,knots=knots0 ,equiknots=equiknots0,listknots=listknots0,interaction=NULL,GCV=GCV,prop=prop,impres=F) } if(GCV==0){ A=order(resul$PRESStot)[1] PRESScounter<-resul$PRESStot[A] } else { A<-order(resul$GCrit)[1] PRESScounter<-resul$GCrit[A] } if(missing(listknots)) { resulini<-plss(X[,Xvariables],Y,standX=standX,standY=standY,D=D,A=A,degree=degree0,knots=knots0, equiknots=equiknots0,eps=eps,interaction=NULL,impres=F) } else { resulini<-plss(X[,Xvariables],Y,standX=standX,standY=standY,D=D,A=A,degree=degree0,knots=knots0, equiknots=equiknots0,eps=eps,listknots=listknots0,interaction=NULL,impres=F) } R2mat<-resulini$R2 R2mat<-rbind(R2mat,cumsum(resulini$VY/sum(Dvar(resulini$Yini)$var)*100)) dimnames(R2mat)<-list(c(dimnames(Y)[[2]],"% TOTAL Y-VAR"),paste("Dim",1:resulini$axes,sep="")) #cat("R2 coefficients according to Dimensions\n") #print(round(R2mat,4)) Bcod<-resulini$Bcod BsplineXint <- Bsplinen(resulini$Xini,ordre=ordre0,nbni=knots0,noeudequi=equiknots0, center=F,D=D,tt=resulini$listknotsn) #}#endreponse=1 #if((reponse==2)&(!interflag)) #{ rint<-sum(interaction01) #cat("*************************************************\n") #cat("2.1: Evaluating Separately the",rint*(rint-1)/2,"Possible Interactions : \n") #cat("*************************************************\n") #cat("Number of Dimensions to explore?\n") #Aexplore<-scan(quiet=T,"",numeric(),1) Bcodintcum<-NULL crittot<-0 namescrittot<-NULL PRESScritcum<-NULL if(rint==1) {cat("No possible interactions, ncol(X)=1 !!") break } intermatrix<-matrix(0,rint*(rint-1)/2,8) #cat("All", rint*(rint-1)/2, "possible bivariate interactions are tested. Please WAIT....\n") count<-1 for(i in 1:(rint-1)) { for(j in (i+1):rint) { #cat(count,",") Bcodint<-Dcenter(interactionij(BsplineXint,interaction[i],interaction[j]),D=D)$X Bcodintcum<-cbind(Bcod,Bcodint) resulpls<-pls(X=cbind(Bcod,Bcodintcum),Y=resulini$Yini,standX=F,standY=F,A=Aexplore,D=D,splflag=T,impres=F,graph=F,eps=eps) #crit<-round((resulpls$R2c[A]-resulini$R2[A])/resulini$R2[A],7) #intermatrix[count,1:3]<-c(interaction[i],interaction[j],crit) crossval<-plscv(X=cbind(Bcod,Bcodintcum),Y,standX=F,standY=standY,prop=prop,A=Aexplore,GCV=GCV,impres=F) if(GCV==0){ PRESSmin<-min(crossval$PRESStot) Ami<-order(crossval$PRESStot)[1] } else { PRESSmin<-min(crossval$GCritot) Ami<-order(crossval$GCritot)[1] } PRESScrit<-(PRESScounter-PRESSmin)/PRESScounter PRESScritcum<-c(PRESScritcum,PRESScrit) crit<-round((resulpls$R2c[Ami]-resulini$R2[A])/resulini$R2[A],7) intermatrix[count,1:3]<-c(interaction[i],interaction[j],crit) crittot<-c(crittot,crit+PRESScrit) namescrittot<-c(namescrittot,paste(dimnames(resulini$Xini)[[2]][interaction[i]],"*",dimnames(resulini$Xini)[[2]] [interaction[j]],sep="")) intermatrix[count,4:6]<-c(PRESScrit,crit+PRESScrit,Ami) count<-count+1 #browser() } } #cat("\n") crittot<-crittot[-1] names(crittot)<-namescrittot if(GCV==0) dimnames(intermatrix)<-list(namescrittot,c("i","j",paste("R2CRIT(",format(A),")",sep=""), "PRESSCRIT","TOTCRIT","A","PRESS","%rel.PRESSgain")) else dimnames(intermatrix)<-list(namescrittot,c("i","j",paste("R2CRIT(",format(A),")",sep=""), "PRESSCRIT","TOTCRIT","A","GCV","%rel.GCVgain")) revorder<-rev(order(crittot)) orderinteract<-rev(sort(crittot)) selectintermatrix<-intermatrix[revorder[1:nrow(intermatrix)],,drop=F] #if(impres)print(selectintermatrix[,1:6]) #browser() #par(mfrow=c(1,1)) #barplot(orderinteract,ylab=paste("TOTCRIT = R2CRIT +",texto,"CRIT"),xlab="candidate interactions",col=2:nrow(intermatrix),histo=T,cex.names=cexpar,cex.axis=cexpar)abline(v=sum(selectintermatrix[,5]>0)*1.225,col=colpar+1) i<-1 listinteraction<-list(NULL) Bcodintcum<-Bcod currentintermatrix<-matrix(0,nrow=1,ncol=8) CVseq<-NULL #cat("*************************************************\n") #cat("2.2: Incorporating interactions step by step : \n") #cat("*************************************************\n") #if(GCV==0) #cat(paste("Reference : Main effects PRESS(",prop,",",A,") = ",round(PRESScounter,7),sep=""),"\n") #else #cat(paste("Reference : Main effects GCV(",GCV,",",A,") = ",round(PRESScounter,7),sep=""),"\n") #cat("-------------------------------------------------\n") PRESScounter0<-PRESScounter repeat { #cat("candidate ",i," : ",dimnames(selectintermatrix)[[1]][i]) if(selectintermatrix[i,5]<0){ i<-i-1 #cat(" REFUSED.\n") break} listinteraction[[i]]<-selectintermatrix[i,1:2] Bcodintcum<-cbind(Bcodintcum, Dcenter(interactionij(BsplineXint,selectintermatrix[i,1],selectintermatrix[i,2]),D=D)$X) crossval<-plscv(X=Bcodintcum,Y,standX=F,standY=standY,GCV=GCV,prop=prop,A=Aexplore,impres=F) if(GCV==0)PRESSmin<-min(crossval$PRESStot) else PRESSmin<-min(crossval$GCritot) gain<-(PRESScounter-PRESSmin)/PRESScounter if(gain f1="10*sin(pi*X[,1]*X[,2])+20*(X[,3]-0.5)^2+10*X[,4]+5*X[,5]" #> try1=simulMARS(10,100,signal=f1,nbiter=100) #> try1$durationHMS # Duration = 0 h 0 mn 2 s ~~~~~~ 0.02 seconds/simulation #> summary(try1$MSE) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 0.3968 0.8199 1.1880 1.4630 1.8430 7.4470 #(results with simulMAPLSS) # H M S # 0 45 50 #> summary(try1$MSE) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 0.2563 0.3982 0.4614 0.5015 0.5541 1.0560 Time=c("debut","fin") Time[1]=date() library(mda) options(warn=-1) step<-list(NULL) resul<-list(NULL) MSE=rep(0,nbiter) for(i in 1:nbiter) { datasets=f(signal=signal,n=N,p=p,stdv=sigmaN,seedpar=i) matX=datasets$X y=datasets$Y ysignal=datasets$Ysignal #generation of test sample data datasetstest=f(signal=signal,n=Ntest,p=p,stdv=sigmaN,seedpar=i+seedpargap) matXtest=datasetstest$X ysignaltest=datasetstest$Ysignal resul[[i]]=mars(matX,y,degree=degree,penalty=penalty) pred.mars=predict(resul[[i]],matXtest) MSE[i]=sum((pred.mars-ysignaltest)^2)/length(ysignaltest) } Time[2]=date() names(Time)=c("start","stop") Time1=Time[1] Time2=Time[2] T1J=substr(Time1,09,10) T2J=substr(Time2,09,10) mode(T2J)="real" mode(T1J)="real" T1H=substr(Time1,12,13) T2H=substr(Time2,12,13) mode(T2H)="real" mode(T1H)="real" T2H=T2H+24*(T2J-T1J) T1H=T1H*3600 T2H=T2H*3600 T1M=substr(Time1,15,16) T2M=substr(Time2,15,16) mode(T2M)="real" mode(T1M)="real" T1H=T1H+T1M*60 T2H=T2H+T2M*60 T1S=substr(Time1,18,19) T2S=substr(Time2,18,19) mode(T2S)="real" mode(T1S)="real" T1H=T1H+T1S T2H=T2H+T2S Lasting=T2H-T1H LastingH=Lasting%/%3600 LastingM=(Lasting-LastingH*3600)%/%60 LastingS=Lasting%%60 cat("Duration =",LastingH,"h",LastingM,"mn",LastingS,"s ~~~~~~",Lasting/nbiter,"seconds/simulation\n") durationHMS=c(LastingH,LastingM,LastingS) names(durationHMS)=c("H","M","S") #browser() cat("summary of the MSE's for",nbiter,"simulations\n") print(summary(MSE)) return(list(simul=resul,trainingsamples=N,seedpargap=seedpargap,testsamples=Ntest,NBsimul=nbiter,durationHMS=durationHMS,MSE=MSE)) } #################################################### Bspline=function(X,beta,degree=1,knots=0,equiknots=T,listknots,matrow=1,matcol=1, nbpoints=150,cexpar=0.7,bgpar="white",askpar=T,newplot=T,data=T,graph=T,titlepar=T,colorpar=F) # Bspline is used compute the B matrix, when graph=T, to display B-splines functions and secondarily the regression spline function # inputs # X, vector or matrix whose values are to be transformed by B-splines. If ncol(X)>1, select one column to display curves # beta, vector of the weights, could be missing # graph=T the graphs of the B-splines are displayed for the selected predictor # newplot, T displays one Bspline per plot, F, all Bsplines displayed on a unique plot # data, T displays the transformed data # titlepar, T displays the beta values at the top of the plot, F, no title. # colorpar, F curves in black, T B-spline curves colored when newplot=F # outputs: # the coding matrix B and the spline inputs for a vector or a matrix X, the beta values if any { X=as.matrix(X) columnlist2=NULL if(is.null(dimnames(X))) dimnames(X)=list(format(1:nrow(X)),paste("X",1:ncol(X),sep="")) if(!missing(listknots))knots=length(listknots[[1]]) BB=Bsplinen(X,ordre=degree+1,nbni=knots,noeudequi=equiknots,center=F,D=D,tt=listknots) B=BB$v[[1]][[1]] if(!missing(beta)&&(length(beta)!=dim(B)[2]))cat("length of beta not available!\n") if(missing(beta)|((!missing(beta))&&(length(beta)!=dim(B)[2]))) { cat("To plot the regression spline, enter the",dim(BB$v[[1]][[1]])[2]," beta weights, if not enter RETURN\n") beta=scan(quiet=T,"",numeric(),n=dim(BB$v[[1]][[1]])[2]) } if(graph&(length(beta)!=0))cat("beta = ",beta,"\n") titi="" for(i in 1:length(beta))titi=paste(titi,beta[i]) #degree=BB$ordre-1 #knots=BB$nbni #equiknots=BB$noeudequi listknots=BB$intknots if(degree[1]!=0) dimnames(B)=list(dimnames(X)[[1]],paste("B1*",format(1:ncol(BB$v[[1]][[1]])),sep="")) else dimnames(B)=list(dimnames(X)[[1]],paste(dimnames(X)[[2]][1],format(1:ncol(BB$v[[1]][[1]])),sep="")) if(ncol(X)>1) { for(i in 2:ncol(X)) { if(BB$ordre[i]!=1) dimnames(BB$v[[i]][[1]])[[2]]=paste("B",i,"*",1:ncol(BB$v[[i]][[1]]),sep="") else dimnames(BB$v[[i]][[1]])[[2]]=paste(dimnames(X)[[2]][i],1:ncol(BB$v[[i]][[1]]),sep="") B=cbind(B,BB$v[[i]][[1]]) }} if(graph) { par(ask=askpar) if(ncol(X)>1) {cat(paste("The variable column number (<=",ncol(X),")")) column=scan(quiet=T,"", numeric(), 1) } else column=1 BX=Bsplinen(X[,column,drop=F],ordre=(degree+1)[column],nbni=knots[column],noeudequi=equiknots[column],center=F,D=D,tt=listknots[column]) x=seq(min(X[,column]),max(X[,column]),length=nbpoints) bbx=Bsplinen(x,ordre=(degree+1)[column],nbni=knots[column],noeudequi=equiknots[column],center=F,D=D,tt=listknots[column]) bx=bbx$v[[1]][[1]] #browser() par(mfrow=c(matrow,matcol),bg=bgpar) for(i in 1:dim(BX$v[[1]][[1]])[2]) { if(newplot)titre=paste("B",i,sep="") else if(i==1)titre="" if((i>1)&(newplot==F))par(new=T) if(colorpar)plot(x,bx[,i],xlab="",ylab="",ylim=c(0,1),main=titre,type="l",col=i) else plot(x,bx[,i],xlab="",ylab="",ylim=c(0,1),main=titre,type="l") rug(X[,column],ticksize=0.02,lwd=1.5,col="red") if(data)text(X[,column],BX$v[[1]][[1]][,i],dimnames(B)[[1]],col="red",cex=cexpar) if(knots!=0)abline(v=BX$intknots[[1]],lty=2) } if(length(beta)!=0) { if(titlepar) plot(x,bx%*%matrix(beta,length(beta),1),type="l",xlab=dimnames(X)[[2]][column],ylab="Spline",main=paste("beta=",titi)) else plot(x,bx%*%matrix(beta,length(beta),1),type="l",xlab=dimnames(X)[[2]][column],ylab="Spline") if(knots!=0)abline(v=BX$intknots[[1]],lty=2) rug(X[,column],ticksize=0.02,lwd=1.5,col="red") if(data)text(X[,column],BX$v[[1]][[1]]%*%matrix(beta,length(beta),1),dimnames(B)[[1]],col="red",cex=cexpar) } }#endifplots if(length(beta)!=0) return(list(B=B,degree=degree,knots=knots,equiknots=equiknots,listknots=listknots,beta=beta)) else return(list(B=B,degree=degree,knots=knots,equiknots=equiknots,listknots=listknots)) } ########################################################## Hotelling<-function(X,H=ncol(X),FSthreshold=0.95,ellipse=T,ii=1,jj=2,nbpoints=100,cexpar=0.8,titlepar=T) { ###### #INPUTS #X is the matrix of components #H the selected optimal number of components (optimal PRESS-dimension),defaults to ncol(X) #FSthreshold is the Fisher-Snedecor confidence level, defaults to 0.95 #ellipse, boolean, T for displaying the FSthreshold-Hotelling ellipse on the t(ii),t(jj) scatterplot #nbpoints defaults to 100 used to draw the ellipse with 100 points. #ii and jj, integers (smaller than H) indicating what ii,jj pair of components to display, default to 1 and 2. # OUTPUTS #T2 is the n by H matrix of the Hotelling T^2 of the n observations computed using components 1, 1&2,...,1&2&...&H #threshold is the vector of length H giving the F-S thresholds when using 1,2,...,H components #outliers is a 0/1 vector of length n, 1 indicating that the observation is an H-outlier ###### X=as.matrix(X) n<-nrow(X) outliers<-rep(0,n) # outliers calculated on H components, if a value is 1 the observation is an H-outlier T2<-matrix(0,nrow=n,ncol=H) dimnames(T2)=list(dimnames(X)[[1]],seq(1,H)) threshold=rep(0,H) #Hotelling's T2 statistic based on H components #### for(i in 1:H){ S<-var(X[,1:i])# variance, division by n-1 Sinv<-solve(as.matrix(S)) XSX<-(n/(n-1))*X[,1:i]%*%Sinv%*%t(X[,1:i]) T2[,i]<-diag(XSX) threshold[i]<-(i*(n^2-1)/(n*(n-i)))*qf(FSthreshold,i,n-i,lower.tail=T) # Obtain upper threshold percentile for Hotelling's T2 based on selected components }#end for #### #comparison with F critical: H-outliers for(i in 1:n){ if((T2[i,H])>=threshold[H]) outliers[i]<-1 }# end for # Detecting and printing eventual H-outliers ######## if(sum(outliers)>0) { out=(1:nrow(X))[outliers==1] print(paste("Outliers with",H,"components")) print(dimnames(X)[[1]][out]) cat("\n") } ######## #displaying the ellipse on the t(ii), t(jj) component's scatterplot if(ellipse) { par(pty="m",mfrow=c(1,1)) variance=apply(X,2,var)# variance, division by n-1 quadratic=variance[c(ii,jj)]*threshold[2] angles=seq(0, 2*pi, length.out=nbpoints) Xell=sqrt(quadratic[1])*cos(angles) Yell=sqrt(quadratic[2])*sin(angles) plot(c(X[,ii],Xell),c(X[,jj],Yell),type="n",xlab=paste("t",ii),ylab=paste("t",jj),cex=cexpar) abline(h=0,v=0) if(titlepar) title(main="Hotelling Ellipse T2 95%") points(Xell,Yell,type="l") text(X[,ii],X[,jj],dimnames(X)[[1]],cex=cexpar) }#end if ellipse ####### return(list(Components=X,T2=T2,H=H,threshold=threshold,FSthreshold=FSthreshold,outliers=outliers)) } ########################################################## discri<-function(X,g,Y,ResponseName="Y",imin=1,imax=nrow(Y),D=1, graph=T,ti=1,tj=2,cexpar=1,askpar=T) { # entrees: # X matrice dont les lignes sont les coordonnees de points(matrix of components) # g vecteur des modalites des groupes ou matrice du codage disjonctif complet # Y matrice des vecteurs lignes des coordonnees des points a affecter; ncol(Y)=ncol(X) # si Y manque, alors Y=X. # imin et imax indices des individus de Y a affecter. # D vecteur des poids des individus. # objectif: a quel groupe (a quelle modalite de g) affecter les lignes de Y ? # sortie : liste de composantes: # La matrice des centres de gravite des groupes, A. # La metrique de Mahalanobis de X, V. # Le vecteur des numero des groupes d'affectation des individus de Y, # groupe. # Si Y=X, (Y manquant) alors: # Le tableau croisant les groupes reels avec les groupes affectes, tableau. #-------------------------------------------------------------------- if(missing(Y)){ Y<-as.matrix(X) indicmiss=T } if(length(dim(g))>1) { if(dim(g)[2]==1) { indicator=rep(0,dim(g)[1]) indicator=g[,1] } if(sum(g)==dim(X)[1]) { indicator=rep(0,nrow(X)) for(i in 1:nrow(X)) for(j in 1:ncol(g)) if(g[i,j]==1)indicator[i]=j } g=indicator } # A matrice des coordonnees des centres de gravites # A est max(g) x ncol(X) ng<-max(g) codg<-codisj(g) G<-Mahalanobis(codg,D) if(length(D)==1) A<-G%*%t(codg)%*%X/nrow(X) else { XC<-D*X A<-G%*%t(codg)%*%XC } V<-Mahalanobis(X,D) #browser() groupe<-rep(0,nrow(Y)) for(j in imin:imax) { x<-Y[j,] U<-NULL for(i in 1:ng) U<-rbind(U,x) U<-(U-A) UU<-U%*%V aa<-NULL for(i in 1:ng) aa[i]<-sum(UU[i,]*U[i,]) distmin<-min(aa) i<-0 repeat {i<-i+1 if(aa[i]==distmin) break } groupe[j]<-i } tableau<-matrix(0,ng,ng) if(indicmiss) { for(i in 1:nrow(X)) tableau[groupe[i],g[i]]<-tableau[groupe[i],g[i]]+1 dimnames(tableau)<-list(paste("predicted",format(1:ng)),paste("real",format(1:ng))) if(graph) { cat("click on the plot to locate the legend\n") par(mfrow=c(1,1),pty="m",ask=askpar) if(length(ResponseName)==1) ResponseLevel=paste(ResponseName,1:max(g)) else {if(length(ResponseName)==max(g)) ResponseLevel=ResponseName else ResponseLevel=paste("Y",1:max(g)) } notgood=g!=groupe plot(X[,c(ti,tj)],type="n",xlab=dimnames(X)[[2]][ti],ylab=dimnames(X)[[2]][tj]) text(X[,c(ti,tj)],dimnames(X)[[1]],cex=cexpar,col=g+1) abline(h=0,v=0) points(A,pch=8,cex=cexpar*2,col=(min(g):max(g))+1) points(X[notgood,c(ti,tj)],pch=1,cex=cexpar*2.5) legend(locator(1),ResponseLevel,cex=cexpar,fill=(min(g):max(g))+1) } return(list(A=A,V=V,groupe=groupe,tableau=tableau)) } else return(list(A=A,V=V,groupe=groupe)) } ####################################################