getwd()
## [1] "/Users/willdrake/Documents/Data Science Primer"
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(cluster)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(descr)
library(sjPlot)
library(ggcorrplot)

WC2022 = read.csv('QACDATAWC2022.csv', col.names = c('Player','Position','Squad','Age','Born','Goals','Shots','Shots on Target','Average Shot Distance','Free Kicks Made','Penalty Kicks Made','Penalty Kicks Attempted','Touches','Tackles','Tackles Won','Blocks','Interceptions','Passes Completed','Passes Attempted'))

WC2022$Team.Rank = with(WC2022,ifelse(Squad == 'Argentina', 1
                              ,ifelse(Squad == 'France', 2
                              ,ifelse(Squad == 'Croatia', 3
                              ,ifelse(Squad == 'Morocco', 4
                              ,ifelse(Squad == 'Netherlands', 5
                              ,ifelse(Squad == 'England', 6
                              ,ifelse(Squad == 'Brazil', 7
                              ,ifelse(Squad == 'Portugal', 8
                              ,ifelse(Squad == 'Japan', 9
                              ,ifelse(Squad == 'Senegal', 10
                              ,ifelse(Squad == 'Australia', 11
                              ,ifelse(Squad == 'Switzerland', 12
                              ,ifelse(Squad == 'Spain', 13
                              ,ifelse(Squad == 'United States', 14
                              ,ifelse(Squad == 'Poland', 15
                              ,ifelse(Squad == 'Korea Republic', 16
                              ,ifelse(Squad == 'Germany', 17
                              ,ifelse(Squad == 'Ecuador', 18
                              ,ifelse(Squad == 'Cameroon', 19
                              ,ifelse(Squad == 'Uruguay', 20
                              ,ifelse(Squad == 'Tunisia', 21
                              ,ifelse(Squad == 'Mexico', 22
                              ,ifelse(Squad == 'Belgium', 23
                              ,ifelse(Squad == 'Ghana', 24
                              ,ifelse(Squad == 'Saudi Arabia', 25
                              ,ifelse(Squad == 'Iran', 26
                              ,ifelse(Squad == 'Costa Rica', 27
                              ,ifelse(Squad == 'Denmark', 28
                              ,ifelse(Squad == 'Serbia', 29
                              ,ifelse(Squad == 'Wales', 30
                              ,ifelse(Squad == 'Canada', 31
                              ,ifelse(Squad == 'Qatar', 32, NA)))))))))))))))))))))))))))))))))
for(x in 1:nrow(WC2022)){
  WC2022$Position[x] <- substring(WC2022$Position[x],1,2)
}

Subset1 = WC2022[,c(2,6:ncol(WC2022))]
Subset1 = na.omit(Subset1)

Subset1_Scale = scale(select(Subset1, -c("Position")))
rownames(Subset1_Scale) <- Subset1$Position
fviz_nbclust(Subset1_Scale, kmeans, method = "wss")

output <- kmeans(Subset1_Scale, centers = 3, nstart = 20)
print(output)
## K-means clustering with 3 clusters of sizes 91, 321, 16
## 
## Cluster means:
##        Goals       Shots Shots.on.Target Average.Shot.Distance Free.Kicks.Made
## 1 -0.1445601  0.07926315     0.004141754            0.10869628      0.01542088
## 2 -0.1011214 -0.14702650    -0.112514891           -0.04105078     -0.11835060
## 3  2.8509330  2.49890996     2.233773776            0.20537126      2.28670268
##   Penalty.Kicks.Made Penalty.Kicks.Attempted    Touches    Tackles Tackles.Won
## 1         -0.1490460              -0.1606747  1.4143279  1.2590552   1.1494660
## 2         -0.1373561              -0.1234168 -0.4137715 -0.3347281  -0.3034328
## 3          3.6034065               3.3898875  0.2573018 -0.4453945  -0.4499669
##        Blocks Interceptions Passes.Completed Passes.Attempted  Team.Rank
## 1  1.02292402     1.1264147       1.38969869       1.39934763 -0.7458276
## 2 -0.29296210    -0.2940783      -0.39486264      -0.40163821  0.2383738
## 3  0.05967175    -0.5065374       0.01802045       0.09907699 -0.5404811
## 
## Clustering vector:
## MF DF MF FW MF FW DF MF FW DF DF DF FW MF MF FW DF MF DF MF MF DF DF FW FW FW 
##  2  2  2  2  2  2  1  1  2  2  1  1  2  2  2  3  2  1  2  2  1  2  2  2  2  2 
## FW FW DF MF FW FW MF DF DF FW DF FW DF MF MF MF MF MF FW MF DF FW FW DF DF DF 
##  2  2  2  2  2  2  2  2  2  2  2  2  1  1  2  2  1  2  2  2  1  1  2  2  2  2 
## MF MF FW MF MF MF MF DF FW DF FW MF MF DF DF FW MF FW DF DF MF MF DF MF FW FW 
##  1  2  2  2  2  1  2  2  2  1  2  2  1  2  2  2  3  2  2  1  2  2  2  2  2  2 
## DF DF DF FW MF DF MF MF FW MF MF FW DF DF MF FW DF DF FW FW FW DF FW FW DF FW 
##  1  1  2  2  2  2  2  2  2  2  1  1  2  2  2  2  2  1  2  2  2  2  2  2  2  2 
## DF DF DF FW FW MF MF FW DF MF MF FW DF FW FW FW FW MF FW MF MF MF MF DF FW FW 
##  1  2  2  2  2  1  2  2  2  2  2  2  2  2  2  3  2  1  2  1  2  2  1  2  2  2 
## FW DF MF MF DF MF MF FW DF FW DF FW MF MF MF MF FW MF MF DF FW MF MF MF DF MF 
##  1  2  2  2  2  2  2  3  2  2  2  2  2  2  2  2  2  1  2  2  2  2  2  3  1  2 
## DF FW DF MF MF FW MF MF MF MF DF FW FW FW MF FW MF MF MF MF MF MF DF MF MF MF 
##  1  2  2  2  2  2  2  2  2  2  1  2  2  2  2  2  2  2  2  2  1  2  2  2  2  2 
## DF FW FW MF FW DF FW DF MF FW FW FW DF FW MF FW MF MF DF MF FW MF DF MF FW DF 
##  2  2  2  2  2  2  2  2  1  2  2  2  1  2  2  3  2  2  2  2  2  1  2  2  2  1 
## MF DF DF DF MF FW MF MF FW DF MF DF DF FW DF DF FW MF FW MF FW DF DF FW FW MF 
##  1  2  1  1  1  2  1  2  2  2  2  2  1  2  2  2  2  2  3  2  2  2  1  2  2  2 
## MF FW DF MF MF DF DF FW FW FW DF MF FW DF FW FW FW MF FW MF DF MF FW DF DF FW 
##  1  2  1  2  2  1  1  2  2  2  2  2  2  1  3  2  2  2  2  2  2  2  3  2  1  2 
## MF DF DF FW MF FW DF MF FW DF DF DF FW MF FW DF MF MF MF FW FW MF MF DF FW FW 
##  2  1  2  2  2  2  2  1  2  1  2  2  2  1  2  2  2  2  2  2  2  1  2  1  2  2 
## MF MF DF MF MF MF FW FW DF FW MF FW MF DF MF MF DF MF MF MF MF MF FW DF FW MF 
##  1  3  2  2  2  2  2  2  1  1  2  2  2  1  2  1  2  2  2  1  1  1  2  2  2  2 
## DF FW FW DF FW MF MF DF DF DF FW MF DF FW MF FW FW DF FW DF FW DF DF MF FW DF 
##  1  1  2  2  2  2  2  2  2  2  2  1  2  2  2  1  3  1  2  2  2  2  1  2  3  2 
## DF MF DF FW MF DF DF FW FW FW FW MF FW MF DF FW DF FW MF FW DF FW MF DF MF MF 
##  1  2  1  2  2  2  2  2  2  2  2  2  3  2  2  2  2  2  2  2  1  2  1  1  1  2 
## FW MF MF FW DF DF MF DF FW DF FW DF FW MF DF DF MF DF FW MF MF FW FW MF DF FW 
##  2  2  2  2  1  1  2  2  2  1  2  2  2  2  1  2  2  2  3  1  2  2  2  2  2  3 
## MF DF FW DF FW MF DF DF FW MF FW DF MF FW FW DF FW FW DF FW DF FW MF MF MF MF 
##  2  1  2  1  3  1  1  1  2  2  2  2  2  2  2  2  2  2  1  2  2  2  2  2  2  1 
## DF FW MF DF FW DF MF MF MF DF DF FW 
##  2  2  1  1  2  2  2  2  2  1  2  1 
## 
## Within cluster sum of squares by cluster:
## [1] 1242.5010 2099.0900  865.3604
##  (between_SS / total_SS =  34.3 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
kmcluster <- output$cluster

fviz_cluster(output, data = select(Subset1, -c("Position")))

test_df <- cbind(Subset1, cluster = output$cluster)

prop.table(table(test_df$Position, test_df$cluster),margin = 1)
##     
##               1          2          3
##   DF 0.36923077 0.63076923 0.00000000
##   FW 0.04861111 0.86111111 0.09027778
##   MF 0.23376623 0.74675325 0.01948052
lm1 = lm(formula = Team.Rank ~ Goals, Subset1)
lm2 = lm(formula = Team.Rank ~ Shots, Subset1)
lm3 = lm(formula = Team.Rank ~ Shots.on.Target, Subset1)
lm4 = lm(formula = Team.Rank ~ Average.Shot.Distance, Subset1)
lm5 = lm(formula = Team.Rank ~ Free.Kicks.Made, Subset1)
lm6 = lm(formula = Team.Rank ~ Penalty.Kicks.Made, Subset1)
lm7 = lm(formula = Team.Rank ~ Penalty.Kicks.Attempted, Subset1)
lm8 = lm(formula = Team.Rank ~ Touches, Subset1)
lm9 = lm(formula = Team.Rank ~ Tackles, Subset1)
lm10 = lm(formula = Team.Rank ~ Tackles.Won, Subset1)
lm11 = lm(formula = Team.Rank ~ Blocks, Subset1)
lm12 = lm(formula = Team.Rank ~ Interceptions, Subset1)
lm13 = lm(formula = Team.Rank ~ Passes.Completed, Subset1)
lm14 = lm(formula = Team.Rank ~ Passes.Attempted, Subset1)
lm15 = lm(formula = Team.Rank ~ Goals + Shots + Shots.on.Target + Average.Shot.Distance + Free.Kicks.Made + Penalty.Kicks.Made + Penalty.Kicks.Made + Penalty.Kicks.Attempted + Touches + Tackles + Tackles.Won + Blocks + Interceptions + Passes.Completed + Passes.Attempted, Subset1)

tab_model(lm1, lm2, lm3, lm4, lm5, lm6, lm7, lm8, lm9, lm10, lm11, lm12, lm13, lm14, lm15, dv.labels = c('model1','model2','model3','model4','model5','model6', 'model7', 'model8', 'model9','Model10', 'Model11', 'Model12', 'Model13', 'Model14', 'Model15'), show.aic = TRUE)
  model1 model2 model3 model4 model5 model6 model7 model8 model9 Model10 Model11 Model12 Model13 Model14 Model15
Predictors Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p
(Intercept) 15.98 15.04 – 16.92 <0.001 17.33 16.12 – 18.55 <0.001 16.67 15.62 – 17.71 <0.001 12.52 10.25 – 14.79 <0.001 15.31 14.41 – 16.21 <0.001 15.20 14.32 – 16.08 <0.001 15.17 14.29 – 16.05 <0.001 19.33 18.05 – 20.60 <0.001 17.87 16.69 – 19.04 <0.001 17.47 16.37 – 18.57 <0.001 17.49 16.24 – 18.73 <0.001 17.08 15.96 – 18.21 <0.001 18.40 17.24 – 19.56 <0.001 18.76 17.54 – 19.97 <0.001 18.05 15.53 – 20.58 <0.001
Goals -2.26 -3.27 – -1.25 <0.001 -1.34 -2.99 – 0.30 0.109
Shots -0.67 -0.94 – -0.41 <0.001 -0.18 -0.70 – 0.33 0.483
Shots on Target -1.42 -1.97 – -0.87 <0.001 -0.49 -1.58 – 0.61 0.383
Average Shot Distance 0.15 0.03 – 0.26 0.017 0.15 0.03 – 0.26 0.011
Free Kicks Made -2.00 -4.10 – 0.09 0.061 -1.17 -3.38 – 1.03 0.296
Penalty Kicks Made -2.98 -6.25 – 0.28 0.073 -5.09 -14.62 – 4.44 0.294
Penalty Kicks Attempted -1.65 -4.26 – 0.96 0.214 6.84 -0.62 – 14.29 0.072
Touches -0.03 -0.04 – -0.02 <0.001 -0.05 -0.17 – 0.07 0.420
Tackles -0.73 -0.94 – -0.51 <0.001 0.03 -0.59 – 0.65 0.926
Tackles Won -1.07 -1.39 – -0.75 <0.001 -0.58 -1.38 – 0.23 0.158
Blocks -0.88 -1.22 – -0.55 <0.001 0.06 -0.40 – 0.52 0.799
Interceptions -1.04 -1.43 – -0.65 <0.001 -0.14 -0.64 – 0.37 0.602
Passes Completed -0.03 -0.04 – -0.03 <0.001 -0.08 -0.17 – 0.01 0.083
Passes Attempted -0.03 -0.04 – -0.02 <0.001 0.10 -0.07 – 0.27 0.231
Observations 428 428 428 428 428 428 428 428 428 428 428 428 428 428 428
R2 / R2 adjusted 0.044 / 0.041 0.057 / 0.055 0.057 / 0.055 0.013 / 0.011 0.008 / 0.006 0.008 / 0.005 0.004 / 0.001 0.144 / 0.142 0.093 / 0.091 0.092 / 0.090 0.059 / 0.057 0.062 / 0.060 0.128 / 0.126 0.132 / 0.129 0.229 / 0.203
AIC 3097.461 3091.485 3091.257 3110.804 3113.026 3113.327 3114.995 3050.204 3074.809 3075.389 3090.476 3089.283 3057.996 3056.200 3031.007
Subset1B = Subset1 %>% select(-Team.Rank, -Position)
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
corPlot(Subset1B)

Subset1$Position = as.factor(Subset1$Position)
linear1 = lm(formula = Goals ~ relevel(Position, ref="FW"), Subset1)
linear2 = lm(formula = Shots ~ relevel(Position, ref='FW'), Subset1)
linear3 = lm(formula = Shots.on.Target ~ relevel(Position, ref='FW'), Subset1)
linear4 = lm(formula = Average.Shot.Distance ~ relevel(Position, ref='FW'), Subset1)
linear5 = lm(formula = Tackles ~ Position, Subset1)
linear6 = lm(formula = Tackles.Won ~ Position, Subset1)
linear7 = lm(formula = Blocks ~ Position, Subset1)
linear8 = lm(formula = Interceptions ~ Position, Subset1)
linear9= lm(formula = Passes.Completed ~ relevel(Position, ref = 'MF'), Subset1)
linear10 = lm(formula = Passes.Attempted ~ relevel(Position, ref = 'MF'), Subset1)
linear11 = lm(formula = Touches ~ relevel(Position, ref = 'MF'), Subset1)

summary(linear1)
## 
## Call:
## lm(formula = Goals ~ relevel(Position, ref = "FW"), data = Subset1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7986 -0.2273 -0.1538  0.2014  7.2014 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      0.79861    0.06656  11.998  < 2e-16 ***
## relevel(Position, ref = "FW")DF -0.64476    0.09663  -6.672 7.86e-11 ***
## relevel(Position, ref = "FW")MF -0.57134    0.09259  -6.171 1.59e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7987 on 425 degrees of freedom
## Multiple R-squared:  0.1153, Adjusted R-squared:  0.1112 
## F-statistic:  27.7 on 2 and 425 DF,  p-value: 4.9e-12
summary(linear2)
## 
## Call:
## lm(formula = Shots ~ relevel(Position, ref = "FW"), data = Subset1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0139 -1.8896 -0.8896  0.9923 23.9861 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       5.0139     0.2499  20.063  < 2e-16 ***
## relevel(Position, ref = "FW")DF  -3.0062     0.3628  -8.286 1.55e-15 ***
## relevel(Position, ref = "FW")MF  -2.1243     0.3476  -6.111 2.25e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.999 on 425 degrees of freedom
## Multiple R-squared:  0.1483, Adjusted R-squared:  0.1443 
## F-statistic:    37 on 2 and 425 DF,  p-value: 1.538e-15
summary(linear3)
## 
## Call:
## lm(formula = Shots.on.Target ~ relevel(Position, ref = "FW"), 
##     data = Subset1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9167 -0.9167 -0.3923  0.6077 11.0833 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       1.9167     0.1181  16.226  < 2e-16 ***
## relevel(Position, ref = "FW")DF  -1.5244     0.1715  -8.889  < 2e-16 ***
## relevel(Position, ref = "FW")MF  -0.9426     0.1643  -5.737 1.83e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.417 on 425 degrees of freedom
## Multiple R-squared:  0.1607, Adjusted R-squared:  0.1568 
## F-statistic: 40.69 on 2 and 425 DF,  p-value: < 2.2e-16
summary(linear4)
## 
## Call:
## lm(formula = Average.Shot.Distance ~ relevel(Position, ref = "FW"), 
##     data = Subset1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18.8617  -5.2647  -0.7651   4.9264  30.5264 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      16.0736     0.5861  27.427  < 2e-16 ***
## relevel(Position, ref = "FW")DF   0.1949     0.8508   0.229    0.819    
## relevel(Position, ref = "FW")MF   4.2881     0.8152   5.260 2.29e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.033 on 425 degrees of freedom
## Multiple R-squared:  0.07637,    Adjusted R-squared:  0.07203 
## F-statistic: 17.57 on 2 and 425 DF,  p-value: 4.655e-08
summary(linear5)
## 
## Call:
## lm(formula = Tackles ~ Position, data = Subset1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.023 -2.111 -1.023  1.539 20.977 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.0231     0.3203  15.684  < 2e-16 ***
## PositionFW   -2.9120     0.4418  -6.591  1.3e-10 ***
## PositionMF   -0.5620     0.4349  -1.292    0.197    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.652 on 425 degrees of freedom
## Multiple R-squared:  0.106,  Adjusted R-squared:  0.1018 
## F-statistic: 25.19 on 2 and 425 DF,  p-value: 4.593e-11
summary(linear6)
## 
## Call:
## lm(formula = Tackles.Won ~ Position, data = Subset1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9538 -1.5844 -0.5844  1.0462 14.4156 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.9538     0.2181  13.545  < 2e-16 ***
## PositionFW   -1.7594     0.3008  -5.849  9.9e-09 ***
## PositionMF   -0.3694     0.2962  -1.247    0.213    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.487 on 425 degrees of freedom
## Multiple R-squared:  0.08429,    Adjusted R-squared:  0.07998 
## F-statistic: 19.56 on 2 and 425 DF,  p-value: 7.472e-09
summary(linear7)
## 
## Call:
## lm(formula = Blocks ~ Position, data = Subset1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3615 -1.8750 -0.3615  1.1250 10.0195 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.3615     0.2151  15.627  < 2e-16 ***
## PositionFW   -1.4865     0.2967  -5.010 8.01e-07 ***
## PositionMF   -0.3810     0.2921  -1.304    0.193    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.453 on 425 degrees of freedom
## Multiple R-squared:  0.06123,    Adjusted R-squared:  0.05682 
## F-statistic: 13.86 on 2 and 425 DF,  p-value: 1.474e-06
summary(linear8)
## 
## Call:
## lm(formula = Interceptions ~ Position, data = Subset1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8769 -1.1948 -0.7639  0.8052 11.8052 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.8769     0.1764  16.305  < 2e-16 ***
## PositionFW   -2.1130     0.2434  -8.681  < 2e-16 ***
## PositionMF   -0.6821     0.2396  -2.847  0.00463 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.012 on 425 degrees of freedom
## Multiple R-squared:  0.1581, Adjusted R-squared:  0.1542 
## F-statistic: 39.92 on 2 and 425 DF,  p-value: < 2.2e-16
summary(linear9)
## 
## Call:
## lm(formula = Passes.Completed ~ relevel(Position, ref = "MF"), 
##     data = Subset1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -146.60  -51.43  -18.51   23.96  488.40 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      103.182      7.155  14.421  < 2e-16 ***
## relevel(Position, ref = "MF")DF   50.418     10.575   4.768 2.57e-06 ***
## relevel(Position, ref = "MF")FW  -56.342     10.293  -5.474 7.54e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 88.79 on 425 degrees of freedom
## Multiple R-squared:  0.1892, Adjusted R-squared:  0.1854 
## F-statistic: 49.59 on 2 and 425 DF,  p-value: < 2.2e-16
summary(linear10)
## 
## Call:
## lm(formula = Passes.Attempted ~ relevel(Position, ref = "MF"), 
##     data = Subset1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -171.85  -59.67  -20.86   30.66  506.15 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      125.357      8.088  15.499  < 2e-16 ***
## relevel(Position, ref = "MF")DF   57.497     11.954   4.810 2.10e-06 ***
## relevel(Position, ref = "MF")FW  -60.690     11.635  -5.216 2.86e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 100.4 on 425 degrees of freedom
## Multiple R-squared:  0.1826, Adjusted R-squared:  0.1788 
## F-statistic: 47.47 on 2 and 425 DF,  p-value: < 2.2e-16
summary(linear11)
## 
## Call:
## lm(formula = Touches ~ relevel(Position, ref = "MF"), data = Subset1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -198.31  -69.63  -21.29   42.59  498.69 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      151.286      9.011  16.788  < 2e-16 ***
## relevel(Position, ref = "MF")DF   61.022     13.319   4.581 6.08e-06 ***
## relevel(Position, ref = "MF")FW  -61.876     12.963  -4.773 2.50e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 111.8 on 425 degrees of freedom
## Multiple R-squared:  0.1628, Adjusted R-squared:  0.1588 
## F-statistic: 41.31 on 2 and 425 DF,  p-value: < 2.2e-16
Subset1_Scale1 = as.data.frame(Subset1_Scale)

WC2022_2 = na.omit(WC2022)

Subset1_Scale1 = cbind(WC2022_2$Player,WC2022_2$Position,Subset1_Scale1)

names(Subset1_Scale1)[1] = "Players"
names(Subset1_Scale1)[2] = "Position"

Subset1_Scale1 %>% 
  dplyr::filter(Position == 'FW') -> ALL_FW

ALL_FW$BestFW = ALL_FW$Goals + ALL_FW$Shots + ALL_FW$Shots.on.Target - ALL_FW$Average.Shot.Distance

max(ALL_FW$BestFW)
## [1] 23.37355
ALL_FW %>% group_by(Players) %>% arrange(-BestFW) %>% head(5) %>% select(Players)
## # A tibble: 5 × 1
## # Groups:   Players [5]
##   Players       
##   <chr>         
## 1 Kylian Mbappé 
## 2 Lionel Messi  
## 3 Olivier Giroud
## 4 Julián Álvarez
## 5 Marc Rashford
Subset1_Scale1 %>% 
  dplyr::filter(Position == 'MF') -> ALL_MF

ALL_MF$BestMF = ALL_MF$Passes.Completed + ALL_MF$Passes.Attempted + ALL_MF$Touches

ALL_MF %>% group_by(Players) %>% arrange(-BestMF) %>% head(5) %>% select(Players)
## # A tibble: 5 × 1
## # Groups:   Players [5]
##   Players            
##   <chr>              
## 1 Rodrigo Paul       
## 2 Luka Modrić        
## 3 Marcelo Brozović   
## 4 Aurélien Tchouaméni
## 5 Enzo Fernández
Subset1_Scale1 %>% 
  dplyr::filter(Position == 'DF') -> ALL_DF

ALL_DF$BestDF = ALL_DF$Tackles + ALL_DF$Tackles.Won + ALL_DF$Blocks + ALL_DF$Interceptions + ALL_DF$Passes.Completed + ALL_DF$Passes.Attempted + ALL_DF$Touches

ALL_DF %>% group_by(Players) %>% arrange(-BestDF) %>% head(5) %>% select(Players)
## # A tibble: 5 × 1
## # Groups:   Players [5]
##   Players         
##   <chr>           
## 1 Achraf Hakimi   
## 2 Nicolás Otamendi
## 3 Joško Gvardiol  
## 4 Rodri           
## 5 Theo Hernández