# 2021/06/03 ######################################################################### # R commands for the # # # # "SourcesPls2090R.txt" package # ######################################################################### # New Hotelling Ellipses on the PLSL Regression on the Cornell Data : #------------------------------------------------------------------------ # To display a family of T2 ellipses on the same scatterplot initialized at T2=0.95 # one have to use alternatively Hotelling() and HotellingEllipse() # cornell PLSL results (3 components retained) cornellresult=PLSL(cornell[,1:7],cornell[,8,drop=F]) names(cornellresult) [1] "Xvariables" "plsresult" "Components" # Initializing the plot with defaults: FSthreshold=0.95, ii=1 and jj=2 # The first 95%-ellipse is drawn. Hotelling(cornellresult$Components,axislabel=list("Dim 1","Dim 2","Dim 3")) # Next ellipse at 0.80 HOTcornell80=Hotelling(cornellresult$Components,FSthreshold=0.80) [1] "Outliers with components 1 and 2 ,T2=80% :" [1] "4" HotellingEllipse(HOTcornell80,colpar="violet") ######################################################################### # Modifications (release 2080) in the PLSL Steps for the One-Response-Case. # Adjusted R2 is computed. #------------------------------------------------------------------------ PLSL(juiceX[,1:6],juiceY[,1,drop=F],bgpar="white") # the optimal dimension is 2 (default prop=0.1, 2 out), PRESS(0.1,2)=0.2672 __________________________________________________________________ - Linear PLS - Total Variance of X = 6 Variance of Heavy = 1 , y0 = Heavy __________________________________________________________________ Dimension 1 cov(t1,y0)= 1.532 r(t1,y0)= 0.893 stdev(t1)= 1.716 stdev(y0)= 1 Heavy % of VAR R2 part. 0.797 79.717 .................. adjusted R2 for Heavy with dim. 1 and 6 predictors : R2_adjust= 0.725 .................. % of VAR X accounted for by the current comp. = 49.269 __________________________________________________________________ Dimension 2 cov(t2,y1)= 0.099 r(t2,y1)= 0.216 stdev(t2)=1.013 stdev(y1)= 0.45 Heavy % of VAR partial R2 0.010 0.950 cum. R2 0.807 80.667 .................. adjusted R2 for Heavy with dim. 2 and 6 predictors : R2_adjust= 0.739 .................. % of VAR X accounted for by the current comp. = 21.267 % of VAR X accounted for by all the comp. = 70.536 _______________________________________________________________ number of supplementary components ?1: 4 _______________________________________________________________ Dimension 6 cov(t6,y5)= 0.025 r(t6,y5)= 0.592 stdev(t6)=0.107 stdev(y5)= 0.392 Heavy % of VAR partial R2 0.054 5.377 cum. R2 0.900 90.016 .................. adjusted R2 for Heavy with dim. 6 and 6 predictors : R2_adjust= 0.865 .................. % of VAR X accounted for by the current comp. = 0.191 % of VAR X accounted for by all the comp. = 100 ¤¤¤¤¤ The 6 dimensional PLS regression becomes the OLS regression ¤¤¤¤¤ _______________________________________________________________ # Notice that the largest dimension 6 (the rank of X) gives: PLS model = OLS model # Optimal Dim. $Beta2 Intercept COND SiO2 Na K Ca Mg Heavy 7.728 0.002 -0.03 0.016 -0.046 0.007 0.033 #Largest Dim. $Beta6 Intercept COND SiO2 Na K Ca Mg Heavy 6.16 0.03 -0.046 -0.084 0.014 -0.073 -0.051 #--------- Checking that Dim 6 provides PLS=OLS ------------------ attach(juiceX) attach(juiceY) summary(lm(Heavy~COND+SiO2+Na+K+Ca+Mg)) Call: lm(formula = Heavy ~ COND + SiO2 + Na + K + Ca + Mg) Residuals: Min 1Q Median 3Q Max -2.7500 -0.7007 0.1290 0.8468 2.3331 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 6.160368 0.781616 7.882 4.47e-07 *** COND 0.030081 0.007777 3.868 0.00123 ** SiO2 -0.045545 0.042522 -1.071 0.29910 Na -0.084456 0.034200 -2.469 0.02443 * K 0.013509 0.149198 0.091 0.92891 Ca -0.073056 0.020452 -3.572 0.00235 ** Mg -0.051383 0.044329 -1.159 0.26243 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 1.52 on 17 degrees of freedom Multiple R-squared: 0.9002, Adjusted R-squared: 0.8649 F-statistic: 25.55 on 6 and 17 DF, p-value: 1.29e-07 ######################################################################### # MVcut on the orange juice data #------------------------------------------------------------------------ # the graph option allows to modify graphically the break points that default to quantiles juicelevel=MVcut(juice,graph=T) ########################################################################## # Linear PLS calibration on TECATOR Data plscalibration(meatX,meatY[,1,drop=F],spect=1:172,Ytest=meatY[,1,drop=F], spectest=173:215,matrow=3,matcol=1,titlestring="Absorbance Calibration Sample",askpar=T) #training samples 1:172, test samples 173:215 plscalibration(meatX,meatY[,1,drop=F],spect=1:172,Ytest=meatY[,1,drop=F], spectest=173:215,matrow=3,matcol=1,titlestring="Calibration Sample", steps=T,steporder=F,askpar=T) #absorbance spectra are step by step sequentially displayed in descending order ######################################################################### # PLSS to model satisfaction of Sanitary Services in the hospital of Aversa #------------------------------------------------------------------------- # Preparing the Data #------------------------------------------------------------------------- # transform the data text file into a numeric R-matrix (in fact a dataframe) aversaX=dget("aversaX.txt") attach(aversaX) aversaY=dget("aversaY.txt") aversaY3=dget("aversaY3.txt") #---------------------------------------------- # construct a binary indicator matrix of the 3 levels of the new response indicatorY3=Bspline(aversaY3,degree=0,knots=2,equiknots=T,graph=T) #------------------------------------------------------------------ # First try of PLSS to prune the additive model try1=PLSS(aversaX,indicatorY3,degree=0,knots=2,equiknots=T,prop=0.2) PRESS(0.1,2)=2.1851 #------------------------------------------------------------------ Influence of the predictors on the response: Glob.Satisf.1 Range of the transformed predictors in descending order: Cras3(17) Cris3(14) CRas1(15) E2(20) R2(10) R1( 9) 0.2259 0.21248 0.20183 0.18996 0.18706 0.16806 T1( 6) CRis1(12) E1(19) T2( 7) Cras4(18) CRis2(13) 0.16357 0.16254 0.16237 0.15882 0.1522 0.1315 Cras2(16) R3(11) T3( 8) TARIFFA( 5) ETA( 1) 0.1239 0.09853 0.09708 0.03156 0.0285 TITOLODISTUDIO( 2) PESO( 3) GD( 4) 0.02428 0.02009 0.00972 #------------------------------------------------------------------ Influence of the predictors on the response: Glob.Satisf.3 Range of the transformed predictors in descending order: T1( 6) Cras3(17) T2( 7) E1(19) CRas1(15) Cris3(14) 0.2088 0.17204 0.17058 0.16444 0.16086 0.1585 CRis1(12) E2(20) T3( 8) R2(10) Cras4(18) R1( 9) 0.15219 0.15108 0.14676 0.13358 0.13209 0.12213 CRis2(13) Cras2(16) R3(11) TITOLODISTUDIO( 2) TARIFFA( 5) 0.10456 0.08816 0.08441 0.06658 0.03628 PESO( 3) ETA( 1) GD( 4) 0.03171 0.00989 0.0067 #------------------------------------------------------------------ Influence of the predictors on the response: Glob.Satisf.2 Range of the transformed predictors in descending order: T1( 6) T2( 7) Cras3(17) CRas1(15) Cris3(14) E2(20) 0.24834 0.21189 0.1981 0.18286 0.17856 0.1781 CRis1(12) R1( 9) E1(19) T3( 8) Cras4(18) 0.162 0.14852 0.1444 0.14088 0.1132 TITOLODISTUDIO( 2) CRis2(13) R2(10) TARIFFA( 5) PESO( 3) 0.08438 0.0829 0.07198 0.05963 0.04653 R3(11) Cras2(16) ETA( 1) GD( 4) 0.03916 0.03872 0.02404 0.00703 #------------------------------------------------------------------ #Remove the predictors :1,2,3,4,5,11,13,16 PLSS(aversaX[,c(6:10,12,14:15,17:20)],indicatorY3,degree=0,knots=2,equiknots=T,prop=0.2,qual=Cras3,names.qual=paste("kindness",1:5)) PRESS(0.2,2)=2.1288 #------------------------------------------------------------------ additive influence of the predictors on the t components? (y/n)1: y The number of the t component ?,(<= 2 )1: 1 Range of the transformed predictors in descending order: Cras3( 9) E1(11) Cris3( 7) E2(12) R2( 5) CRas1( 8) CRis1( 6) T1( 1) Cras4(10) T2( 2) R1( 4) T3( 3) 0.4555 0.455 0.4303 0.4197 0.416 0.3911 0.3867 0.3753 0.3593 0.3505 0.3235 0.2997 The number of the t component ?,(<= 2 )1: 2 Range of the transformed predictors in descending order: Cras3( 9) Cris3( 7) CRas1( 8) T1( 1) E2(12) R1( 4) T2( 2) CRis1( 6) E1(11) T3( 3) Cras4(10) R2( 5) 0.5099 0.4787 0.4641 0.4635 0.397 0.39 0.3889 0.3702 0.3101 0.2966 0.284 0.2271 #------------------------------------------------------------------ Influence of the predictors on the response: Glob.Satisf.1 Range of the transformed predictors in descending order: Cras3( 9) Cris3( 7) CRas1( 8) R2( 5) E2(12) E1(11) 0.25404 0.23984 0.23379 0.21789 0.21491 0.18851 R1( 4) CRis1( 6) T1( 1) Cras4(10) T2( 2) T3( 3) 0.18734 0.1816 0.17876 0.17709 0.17572 0.11101 #------------------------------------------------------------------ Influence of the predictors on the response: Glob.Satisf.2 Range of the transformed predictors in descending order: T1( 1) T2( 2) Cras3( 9) CRas1( 8) Cris3( 7) E2(12) 0.25451 0.21812 0.20594 0.18917 0.18781 0.18571 CRis1( 6) R1( 4) E1(11) T3( 3) Cras4(10) R2( 5) 0.16676 0.15165 0.14992 0.14765 0.11661 0.07521 #------------------------------------------------------------------ Influence of the predictors on the response: Glob.Satisf.3 Range of the transformed predictors in descending order: T1( 1) E1(11) Cras3( 9) T2( 2) CRas1( 8) E2(12) 0.22078 0.18983 0.18346 0.17973 0.17576 0.17493 Cris3( 7) CRis1( 6) T3( 3) R2( 5) Cras4(10) R1( 4) 0.17007 0.1631 0.15965 0.15906 0.14438 0.13175 #------------------------------------------------------------------ # Classification of the ranks of the Sanitary Services GS1 GS2 GS3 T1 9 1 1 T2 11 2 4 T3 12 10 9 R1 7 8 12 R2 4 12 10 CRAS1 3 4 5 CRAS3 1 3 3 CRAS4 10 11 11 CRIS1 8 7 8 CRIS3 2 5 7 E1 6 9 2 E2 5 6 6 ########################################################################## # Main Effects Additive PLSS on the ORANGE JUICE Data #------------------------------------------------------------------------- # The building model stage #------------------------------------------------------------------------- try0<-PLSS(juicX,juicY[,1,drop=F],matrow=3,matcol=3) #default spline parameters for all predictors: degree = 1, knots = 0 try1<-PLSS(juicX,juicY[,1,drop=F],matrow=3,matcol=3,degree=2,knots=2) #spline parameters for all predictors: degree = 2, 2 knots at quantiles try2<-PLSS(juicX,juicY[,1,drop=F],matrow=3,matcol=3,degree=2,knots=2,equiknots=T) #spline parameters for all predictors: degree = 2, 2 equally spaced knots # by default, equiknots=F for knots at quantiles localknots<-list(c(400,1600),c(10,20,40),c(10,40),c(2.5,5),c(160,400),c(40,110), c(4,11,30),c(400,1700),c(100,300,500),c(600,2600)) try3<-PLSS(juicX,juicY[,1,drop=F],matrow=3,matcol=3,degree=2,listknots=localknots) # spline parameters : degree = 2, individual knots for each predictor # try3 is a list of 5 ojects: # $Xvariables is the boolean indicator of the retained predictors in the additive model # $degree is the vector of the degrees # $knots is the vector of the number of knots # $equiknots is the boolean indicator for eualy spaced knots (T) or at knots-quantiles # $listknots is the list of the locations of knots (when non null, knots and equiknots not available) #------------------------------------------------------------------------- # The final selection of predictors, using MENU 5 #------------------------------------------------------------------------- > try3 $Xvariables [1] FALSE FALSE FALSE FALSE TRUE TRUE FALSE TRUE TRUE [10] FALSE $degree [1] 2 2 2 2 2 2 2 2 2 2 $knots [1] 0 0 0 0 0 0 0 0 0 0 $equiknots [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [10] FALSE $listknots $listknots[[1]] [1] 400 1600 $listknots[[2]] [1] 10 20 40 $listknots[[3]] [1] 10 40 $listknots[[4]] [1] 2.5 5.0 $listknots[[5]] [1] 160 400 $listknots[[6]] [1] 40 110 $listknots[[7]] [1] 4 11 30 $listknots[[8]] [1] 400 1700 $listknots[[9]] [1] 100 300 500 $listknots[[10]] [1] 600 2600 #------------------------------------------------------------------------- # Results of the final PLSS model #------------------------------------------------------------------------- PLSS(juicX[,try3$Xvariables],juicY[,1,drop=F],degree=2,listknots=localknots[try3$Xvariables]) # Selected predictors : dimnames(juicX)[[2]][try3$Xvariables] [1] "Ca" "Mg" "SO4" "HCO3" # Optimal dimension : 2 # degree : 2 # knots location : localknots[try3$Xvariables] # Optimal PRESS : "PRESS(0.1,2)=0.1157" with "prop=0.1" the proportion of observations out # Optimal GCV : "GCV(1.8,2)=0.1144" with "alpha=1.8" the tuning GCV parameter ########################################################################## # MAPLSS for bivariate interactions on the ORANGE JUICE Data #------------------------------------------------------------------------- # use the GCV=1.8 value calibrated to give similar PRESS PLSS results MAPLSS(juicX,juicY[,1,drop=F],degree=2,listknots=localknots,GCV=1.8,interaction=c(5,6,8,9)) # interaction: vector of predictors numbers for candidate interactions, by # default, 1:ncol(X),all bivariate interactions are examined MAPLSS(juicX,juicY[,1,drop=F],degree=2,listknots=localknots,GCV=1.8) # NO INTERACTION DETECTED. ########################################################################## # MAPLSS with bivariate interactions on simulated Data # #------------------------------------------------------------------------- f2<-"10*sin(pi*X[,1]*X[,2])+20*(X[,3]-0.5)^2+10*X[,4]+5*X[,5]" experiment1<-f(f2,n=100,p=10,seedpar=20) # The signal doest not depend on the 5 last predictors. All possible 45 # interactions are candidates. Xtrain<-experiment1$X Ytrain<-experiment1$Y experiment2<-f(f2,n=100,p=10,seedpar=120) Xtest<-experiment2$X signaltest<-experiment2$Ysignal MAPLSS(X=Xtrain,Y=Ytrain,Xtest=Xtest,Ytest=signaltest,degree=c(1,1,2,1,1,1, 1,1,1,1),listknots=list(0.5,0.5,0.5,NULL,NULL,NULL,NULL,NULL,NULL,NULL)) # THE TRUE INTERACTION X1*X2 IS DETECTED (relative gain in GCV= 53.96% : 2.2: Incorporating interactions step by step : ------------------------------------------------- Reference : Main effects GCV(1,3) = 0.114791 ------------------------------------------------- candidate 1 : X1*X2 ACCEPTED. at 20 % relative GCV gain i j A GCV %rel.GCVgain X1*X2 1 2 6 0.04967867 56.72 candidate 2 : X2*X4 NOT ACCEPTED. at 20 % relative GCV gain i j A GCV %rel.GCVgain X2*X4 2 4 6 0.04581933 7.77 Continue anyway exploring (y/n)?1: n ------------------------------------------------- Range of the transformed predictors in descending order: X4 X1*X2 X3 X5 X1 X2 X10 X7 X9 1.75962 1.31994 0.95828 0.95438 0.79419 0.76246 0.14731 0.08212 0.0694 X8 X6 0.06561 0.05034 # One can now prune the 7 lowest ANOVA terms and retain only the first 4. Because Xtest # is not missing, external prediction is present in the third item of the menu. --------------------------------------- Validation, ANOVA plots, External prediction, 0 to exit 1: Validation 2: ANOVA plots 3: External prediction Selection : 3 --------------------------------------- selected main effects X3 X4 X5 selected interactions X1*X2 Validation of the model with the test sample according to 10 dimensions Mean Squared Error of the response(s) according to the dimension 1 2 3 4 5 6 7 8 9 10 MSE 5.27 2.004 0.77 0.609 0.49 0.473 0.392 0.383 0.38 0.383 Choose the dimension (<= 10 )1: 8 MSE(8)=0.383 ##########################################################################