getwd()
## [1] "/Users/willdrake/Documents/Data Science Primer"
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(cluster)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(descr)
library(sjPlot)
library(ggcorrplot)
WC2022 = read.csv('QACDATAWC2022.csv', col.names = c('Player','Position','Squad','Age','Born','Goals','Shots','Shots on Target','Average Shot Distance','Free Kicks Made','Penalty Kicks Made','Penalty Kicks Attempted','Touches','Tackles','Tackles Won','Blocks','Interceptions','Passes Completed','Passes Attempted'))
WC2022$Team.Rank = with(WC2022,ifelse(Squad == 'Argentina', 1
,ifelse(Squad == 'France', 2
,ifelse(Squad == 'Croatia', 3
,ifelse(Squad == 'Morocco', 4
,ifelse(Squad == 'Netherlands', 5
,ifelse(Squad == 'England', 6
,ifelse(Squad == 'Brazil', 7
,ifelse(Squad == 'Portugal', 8
,ifelse(Squad == 'Japan', 9
,ifelse(Squad == 'Senegal', 10
,ifelse(Squad == 'Australia', 11
,ifelse(Squad == 'Switzerland', 12
,ifelse(Squad == 'Spain', 13
,ifelse(Squad == 'United States', 14
,ifelse(Squad == 'Poland', 15
,ifelse(Squad == 'Korea Republic', 16
,ifelse(Squad == 'Germany', 17
,ifelse(Squad == 'Ecuador', 18
,ifelse(Squad == 'Cameroon', 19
,ifelse(Squad == 'Uruguay', 20
,ifelse(Squad == 'Tunisia', 21
,ifelse(Squad == 'Mexico', 22
,ifelse(Squad == 'Belgium', 23
,ifelse(Squad == 'Ghana', 24
,ifelse(Squad == 'Saudi Arabia', 25
,ifelse(Squad == 'Iran', 26
,ifelse(Squad == 'Costa Rica', 27
,ifelse(Squad == 'Denmark', 28
,ifelse(Squad == 'Serbia', 29
,ifelse(Squad == 'Wales', 30
,ifelse(Squad == 'Canada', 31
,ifelse(Squad == 'Qatar', 32, NA)))))))))))))))))))))))))))))))))
for(x in 1:nrow(WC2022)){
WC2022$Position[x] <- substring(WC2022$Position[x],1,2)
}
Subset1 = WC2022[,c(2,6:ncol(WC2022))]
Subset1 = na.omit(Subset1)
Subset1_Scale = scale(select(Subset1, -c("Position")))
rownames(Subset1_Scale) <- Subset1$Position
fviz_nbclust(Subset1_Scale, kmeans, method = "wss")

output <- kmeans(Subset1_Scale, centers = 3, nstart = 20)
print(output)
## K-means clustering with 3 clusters of sizes 91, 321, 16
##
## Cluster means:
## Goals Shots Shots.on.Target Average.Shot.Distance Free.Kicks.Made
## 1 -0.1445601 0.07926315 0.004141754 0.10869628 0.01542088
## 2 -0.1011214 -0.14702650 -0.112514891 -0.04105078 -0.11835060
## 3 2.8509330 2.49890996 2.233773776 0.20537126 2.28670268
## Penalty.Kicks.Made Penalty.Kicks.Attempted Touches Tackles Tackles.Won
## 1 -0.1490460 -0.1606747 1.4143279 1.2590552 1.1494660
## 2 -0.1373561 -0.1234168 -0.4137715 -0.3347281 -0.3034328
## 3 3.6034065 3.3898875 0.2573018 -0.4453945 -0.4499669
## Blocks Interceptions Passes.Completed Passes.Attempted Team.Rank
## 1 1.02292402 1.1264147 1.38969869 1.39934763 -0.7458276
## 2 -0.29296210 -0.2940783 -0.39486264 -0.40163821 0.2383738
## 3 0.05967175 -0.5065374 0.01802045 0.09907699 -0.5404811
##
## Clustering vector:
## MF DF MF FW MF FW DF MF FW DF DF DF FW MF MF FW DF MF DF MF MF DF DF FW FW FW
## 2 2 2 2 2 2 1 1 2 2 1 1 2 2 2 3 2 1 2 2 1 2 2 2 2 2
## FW FW DF MF FW FW MF DF DF FW DF FW DF MF MF MF MF MF FW MF DF FW FW DF DF DF
## 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 1 2 2 2 1 1 2 2 2 2
## MF MF FW MF MF MF MF DF FW DF FW MF MF DF DF FW MF FW DF DF MF MF DF MF FW FW
## 1 2 2 2 2 1 2 2 2 1 2 2 1 2 2 2 3 2 2 1 2 2 2 2 2 2
## DF DF DF FW MF DF MF MF FW MF MF FW DF DF MF FW DF DF FW FW FW DF FW FW DF FW
## 1 1 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2
## DF DF DF FW FW MF MF FW DF MF MF FW DF FW FW FW FW MF FW MF MF MF MF DF FW FW
## 1 2 2 2 2 1 2 2 2 2 2 2 2 2 2 3 2 1 2 1 2 2 1 2 2 2
## FW DF MF MF DF MF MF FW DF FW DF FW MF MF MF MF FW MF MF DF FW MF MF MF DF MF
## 1 2 2 2 2 2 2 3 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 3 1 2
## DF FW DF MF MF FW MF MF MF MF DF FW FW FW MF FW MF MF MF MF MF MF DF MF MF MF
## 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2
## DF FW FW MF FW DF FW DF MF FW FW FW DF FW MF FW MF MF DF MF FW MF DF MF FW DF
## 2 2 2 2 2 2 2 2 1 2 2 2 1 2 2 3 2 2 2 2 2 1 2 2 2 1
## MF DF DF DF MF FW MF MF FW DF MF DF DF FW DF DF FW MF FW MF FW DF DF FW FW MF
## 1 2 1 1 1 2 1 2 2 2 2 2 1 2 2 2 2 2 3 2 2 2 1 2 2 2
## MF FW DF MF MF DF DF FW FW FW DF MF FW DF FW FW FW MF FW MF DF MF FW DF DF FW
## 1 2 1 2 2 1 1 2 2 2 2 2 2 1 3 2 2 2 2 2 2 2 3 2 1 2
## MF DF DF FW MF FW DF MF FW DF DF DF FW MF FW DF MF MF MF FW FW MF MF DF FW FW
## 2 1 2 2 2 2 2 1 2 1 2 2 2 1 2 2 2 2 2 2 2 1 2 1 2 2
## MF MF DF MF MF MF FW FW DF FW MF FW MF DF MF MF DF MF MF MF MF MF FW DF FW MF
## 1 3 2 2 2 2 2 2 1 1 2 2 2 1 2 1 2 2 2 1 1 1 2 2 2 2
## DF FW FW DF FW MF MF DF DF DF FW MF DF FW MF FW FW DF FW DF FW DF DF MF FW DF
## 1 1 2 2 2 2 2 2 2 2 2 1 2 2 2 1 3 1 2 2 2 2 1 2 3 2
## DF MF DF FW MF DF DF FW FW FW FW MF FW MF DF FW DF FW MF FW DF FW MF DF MF MF
## 1 2 1 2 2 2 2 2 2 2 2 2 3 2 2 2 2 2 2 2 1 2 1 1 1 2
## FW MF MF FW DF DF MF DF FW DF FW DF FW MF DF DF MF DF FW MF MF FW FW MF DF FW
## 2 2 2 2 1 1 2 2 2 1 2 2 2 2 1 2 2 2 3 1 2 2 2 2 2 3
## MF DF FW DF FW MF DF DF FW MF FW DF MF FW FW DF FW FW DF FW DF FW MF MF MF MF
## 2 1 2 1 3 1 1 1 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1
## DF FW MF DF FW DF MF MF MF DF DF FW
## 2 2 1 1 2 2 2 2 2 1 2 1
##
## Within cluster sum of squares by cluster:
## [1] 1242.5010 2099.0900 865.3604
## (between_SS / total_SS = 34.3 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
kmcluster <- output$cluster
fviz_cluster(output, data = select(Subset1, -c("Position")))

test_df <- cbind(Subset1, cluster = output$cluster)
prop.table(table(test_df$Position, test_df$cluster),margin = 1)
##
## 1 2 3
## DF 0.36923077 0.63076923 0.00000000
## FW 0.04861111 0.86111111 0.09027778
## MF 0.23376623 0.74675325 0.01948052
lm1 = lm(formula = Team.Rank ~ Goals, Subset1)
lm2 = lm(formula = Team.Rank ~ Shots, Subset1)
lm3 = lm(formula = Team.Rank ~ Shots.on.Target, Subset1)
lm4 = lm(formula = Team.Rank ~ Average.Shot.Distance, Subset1)
lm5 = lm(formula = Team.Rank ~ Free.Kicks.Made, Subset1)
lm6 = lm(formula = Team.Rank ~ Penalty.Kicks.Made, Subset1)
lm7 = lm(formula = Team.Rank ~ Penalty.Kicks.Attempted, Subset1)
lm8 = lm(formula = Team.Rank ~ Touches, Subset1)
lm9 = lm(formula = Team.Rank ~ Tackles, Subset1)
lm10 = lm(formula = Team.Rank ~ Tackles.Won, Subset1)
lm11 = lm(formula = Team.Rank ~ Blocks, Subset1)
lm12 = lm(formula = Team.Rank ~ Interceptions, Subset1)
lm13 = lm(formula = Team.Rank ~ Passes.Completed, Subset1)
lm14 = lm(formula = Team.Rank ~ Passes.Attempted, Subset1)
lm15 = lm(formula = Team.Rank ~ Goals + Shots + Shots.on.Target + Average.Shot.Distance + Free.Kicks.Made + Penalty.Kicks.Made + Penalty.Kicks.Made + Penalty.Kicks.Attempted + Touches + Tackles + Tackles.Won + Blocks + Interceptions + Passes.Completed + Passes.Attempted, Subset1)
tab_model(lm1, lm2, lm3, lm4, lm5, lm6, lm7, lm8, lm9, lm10, lm11, lm12, lm13, lm14, lm15, dv.labels = c('model1','model2','model3','model4','model5','model6', 'model7', 'model8', 'model9','Model10', 'Model11', 'Model12', 'Model13', 'Model14', 'Model15'), show.aic = TRUE)
|
model1
|
model2
|
model3
|
model4
|
model5
|
model6
|
model7
|
model8
|
model9
|
Model10
|
Model11
|
Model12
|
Model13
|
Model14
|
Model15
|
Predictors
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
Estimates
|
CI
|
p
|
(Intercept)
|
15.98
|
15.04 – 16.92
|
<0.001
|
17.33
|
16.12 – 18.55
|
<0.001
|
16.67
|
15.62 – 17.71
|
<0.001
|
12.52
|
10.25 – 14.79
|
<0.001
|
15.31
|
14.41 – 16.21
|
<0.001
|
15.20
|
14.32 – 16.08
|
<0.001
|
15.17
|
14.29 – 16.05
|
<0.001
|
19.33
|
18.05 – 20.60
|
<0.001
|
17.87
|
16.69 – 19.04
|
<0.001
|
17.47
|
16.37 – 18.57
|
<0.001
|
17.49
|
16.24 – 18.73
|
<0.001
|
17.08
|
15.96 – 18.21
|
<0.001
|
18.40
|
17.24 – 19.56
|
<0.001
|
18.76
|
17.54 – 19.97
|
<0.001
|
18.05
|
15.53 – 20.58
|
<0.001
|
Goals
|
-2.26
|
-3.27 – -1.25
|
<0.001
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-1.34
|
-2.99 – 0.30
|
0.109
|
Shots
|
|
|
|
-0.67
|
-0.94 – -0.41
|
<0.001
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-0.18
|
-0.70 – 0.33
|
0.483
|
Shots on Target
|
|
|
|
|
|
|
-1.42
|
-1.97 – -0.87
|
<0.001
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-0.49
|
-1.58 – 0.61
|
0.383
|
Average Shot Distance
|
|
|
|
|
|
|
|
|
|
0.15
|
0.03 – 0.26
|
0.017
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
0.15
|
0.03 – 0.26
|
0.011
|
Free Kicks Made
|
|
|
|
|
|
|
|
|
|
|
|
|
-2.00
|
-4.10 – 0.09
|
0.061
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-1.17
|
-3.38 – 1.03
|
0.296
|
Penalty Kicks Made
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-2.98
|
-6.25 – 0.28
|
0.073
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-5.09
|
-14.62 – 4.44
|
0.294
|
Penalty Kicks Attempted
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-1.65
|
-4.26 – 0.96
|
0.214
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6.84
|
-0.62 – 14.29
|
0.072
|
Touches
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-0.03
|
-0.04 – -0.02
|
<0.001
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-0.05
|
-0.17 – 0.07
|
0.420
|
Tackles
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-0.73
|
-0.94 – -0.51
|
<0.001
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
0.03
|
-0.59 – 0.65
|
0.926
|
Tackles Won
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-1.07
|
-1.39 – -0.75
|
<0.001
|
|
|
|
|
|
|
|
|
|
|
|
|
-0.58
|
-1.38 – 0.23
|
0.158
|
Blocks
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-0.88
|
-1.22 – -0.55
|
<0.001
|
|
|
|
|
|
|
|
|
|
0.06
|
-0.40 – 0.52
|
0.799
|
Interceptions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-1.04
|
-1.43 – -0.65
|
<0.001
|
|
|
|
|
|
|
-0.14
|
-0.64 – 0.37
|
0.602
|
Passes Completed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-0.03
|
-0.04 – -0.03
|
<0.001
|
|
|
|
-0.08
|
-0.17 – 0.01
|
0.083
|
Passes Attempted
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-0.03
|
-0.04 – -0.02
|
<0.001
|
0.10
|
-0.07 – 0.27
|
0.231
|
Observations
|
428
|
428
|
428
|
428
|
428
|
428
|
428
|
428
|
428
|
428
|
428
|
428
|
428
|
428
|
428
|
R2 / R2 adjusted
|
0.044 / 0.041
|
0.057 / 0.055
|
0.057 / 0.055
|
0.013 / 0.011
|
0.008 / 0.006
|
0.008 / 0.005
|
0.004 / 0.001
|
0.144 / 0.142
|
0.093 / 0.091
|
0.092 / 0.090
|
0.059 / 0.057
|
0.062 / 0.060
|
0.128 / 0.126
|
0.132 / 0.129
|
0.229 / 0.203
|
AIC
|
3097.461
|
3091.485
|
3091.257
|
3110.804
|
3113.026
|
3113.327
|
3114.995
|
3050.204
|
3074.809
|
3075.389
|
3090.476
|
3089.283
|
3057.996
|
3056.200
|
3031.007
|
Subset1B = Subset1 %>% select(-Team.Rank, -Position)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
corPlot(Subset1B)

Subset1$Position = as.factor(Subset1$Position)
linear1 = lm(formula = Goals ~ relevel(Position, ref="FW"), Subset1)
linear2 = lm(formula = Shots ~ relevel(Position, ref='FW'), Subset1)
linear3 = lm(formula = Shots.on.Target ~ relevel(Position, ref='FW'), Subset1)
linear4 = lm(formula = Average.Shot.Distance ~ relevel(Position, ref='FW'), Subset1)
linear5 = lm(formula = Tackles ~ Position, Subset1)
linear6 = lm(formula = Tackles.Won ~ Position, Subset1)
linear7 = lm(formula = Blocks ~ Position, Subset1)
linear8 = lm(formula = Interceptions ~ Position, Subset1)
linear9= lm(formula = Passes.Completed ~ relevel(Position, ref = 'MF'), Subset1)
linear10 = lm(formula = Passes.Attempted ~ relevel(Position, ref = 'MF'), Subset1)
linear11 = lm(formula = Touches ~ relevel(Position, ref = 'MF'), Subset1)
summary(linear1)
##
## Call:
## lm(formula = Goals ~ relevel(Position, ref = "FW"), data = Subset1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7986 -0.2273 -0.1538 0.2014 7.2014
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.79861 0.06656 11.998 < 2e-16 ***
## relevel(Position, ref = "FW")DF -0.64476 0.09663 -6.672 7.86e-11 ***
## relevel(Position, ref = "FW")MF -0.57134 0.09259 -6.171 1.59e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7987 on 425 degrees of freedom
## Multiple R-squared: 0.1153, Adjusted R-squared: 0.1112
## F-statistic: 27.7 on 2 and 425 DF, p-value: 4.9e-12
summary(linear2)
##
## Call:
## lm(formula = Shots ~ relevel(Position, ref = "FW"), data = Subset1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0139 -1.8896 -0.8896 0.9923 23.9861
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.0139 0.2499 20.063 < 2e-16 ***
## relevel(Position, ref = "FW")DF -3.0062 0.3628 -8.286 1.55e-15 ***
## relevel(Position, ref = "FW")MF -2.1243 0.3476 -6.111 2.25e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.999 on 425 degrees of freedom
## Multiple R-squared: 0.1483, Adjusted R-squared: 0.1443
## F-statistic: 37 on 2 and 425 DF, p-value: 1.538e-15
summary(linear3)
##
## Call:
## lm(formula = Shots.on.Target ~ relevel(Position, ref = "FW"),
## data = Subset1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9167 -0.9167 -0.3923 0.6077 11.0833
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.9167 0.1181 16.226 < 2e-16 ***
## relevel(Position, ref = "FW")DF -1.5244 0.1715 -8.889 < 2e-16 ***
## relevel(Position, ref = "FW")MF -0.9426 0.1643 -5.737 1.83e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.417 on 425 degrees of freedom
## Multiple R-squared: 0.1607, Adjusted R-squared: 0.1568
## F-statistic: 40.69 on 2 and 425 DF, p-value: < 2.2e-16
summary(linear4)
##
## Call:
## lm(formula = Average.Shot.Distance ~ relevel(Position, ref = "FW"),
## data = Subset1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.8617 -5.2647 -0.7651 4.9264 30.5264
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.0736 0.5861 27.427 < 2e-16 ***
## relevel(Position, ref = "FW")DF 0.1949 0.8508 0.229 0.819
## relevel(Position, ref = "FW")MF 4.2881 0.8152 5.260 2.29e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.033 on 425 degrees of freedom
## Multiple R-squared: 0.07637, Adjusted R-squared: 0.07203
## F-statistic: 17.57 on 2 and 425 DF, p-value: 4.655e-08
summary(linear5)
##
## Call:
## lm(formula = Tackles ~ Position, data = Subset1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.023 -2.111 -1.023 1.539 20.977
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.0231 0.3203 15.684 < 2e-16 ***
## PositionFW -2.9120 0.4418 -6.591 1.3e-10 ***
## PositionMF -0.5620 0.4349 -1.292 0.197
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.652 on 425 degrees of freedom
## Multiple R-squared: 0.106, Adjusted R-squared: 0.1018
## F-statistic: 25.19 on 2 and 425 DF, p-value: 4.593e-11
summary(linear6)
##
## Call:
## lm(formula = Tackles.Won ~ Position, data = Subset1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9538 -1.5844 -0.5844 1.0462 14.4156
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.9538 0.2181 13.545 < 2e-16 ***
## PositionFW -1.7594 0.3008 -5.849 9.9e-09 ***
## PositionMF -0.3694 0.2962 -1.247 0.213
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.487 on 425 degrees of freedom
## Multiple R-squared: 0.08429, Adjusted R-squared: 0.07998
## F-statistic: 19.56 on 2 and 425 DF, p-value: 7.472e-09
summary(linear7)
##
## Call:
## lm(formula = Blocks ~ Position, data = Subset1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3615 -1.8750 -0.3615 1.1250 10.0195
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.3615 0.2151 15.627 < 2e-16 ***
## PositionFW -1.4865 0.2967 -5.010 8.01e-07 ***
## PositionMF -0.3810 0.2921 -1.304 0.193
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.453 on 425 degrees of freedom
## Multiple R-squared: 0.06123, Adjusted R-squared: 0.05682
## F-statistic: 13.86 on 2 and 425 DF, p-value: 1.474e-06
summary(linear8)
##
## Call:
## lm(formula = Interceptions ~ Position, data = Subset1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8769 -1.1948 -0.7639 0.8052 11.8052
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.8769 0.1764 16.305 < 2e-16 ***
## PositionFW -2.1130 0.2434 -8.681 < 2e-16 ***
## PositionMF -0.6821 0.2396 -2.847 0.00463 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.012 on 425 degrees of freedom
## Multiple R-squared: 0.1581, Adjusted R-squared: 0.1542
## F-statistic: 39.92 on 2 and 425 DF, p-value: < 2.2e-16
summary(linear9)
##
## Call:
## lm(formula = Passes.Completed ~ relevel(Position, ref = "MF"),
## data = Subset1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -146.60 -51.43 -18.51 23.96 488.40
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 103.182 7.155 14.421 < 2e-16 ***
## relevel(Position, ref = "MF")DF 50.418 10.575 4.768 2.57e-06 ***
## relevel(Position, ref = "MF")FW -56.342 10.293 -5.474 7.54e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 88.79 on 425 degrees of freedom
## Multiple R-squared: 0.1892, Adjusted R-squared: 0.1854
## F-statistic: 49.59 on 2 and 425 DF, p-value: < 2.2e-16
summary(linear10)
##
## Call:
## lm(formula = Passes.Attempted ~ relevel(Position, ref = "MF"),
## data = Subset1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -171.85 -59.67 -20.86 30.66 506.15
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 125.357 8.088 15.499 < 2e-16 ***
## relevel(Position, ref = "MF")DF 57.497 11.954 4.810 2.10e-06 ***
## relevel(Position, ref = "MF")FW -60.690 11.635 -5.216 2.86e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 100.4 on 425 degrees of freedom
## Multiple R-squared: 0.1826, Adjusted R-squared: 0.1788
## F-statistic: 47.47 on 2 and 425 DF, p-value: < 2.2e-16
summary(linear11)
##
## Call:
## lm(formula = Touches ~ relevel(Position, ref = "MF"), data = Subset1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -198.31 -69.63 -21.29 42.59 498.69
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 151.286 9.011 16.788 < 2e-16 ***
## relevel(Position, ref = "MF")DF 61.022 13.319 4.581 6.08e-06 ***
## relevel(Position, ref = "MF")FW -61.876 12.963 -4.773 2.50e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 111.8 on 425 degrees of freedom
## Multiple R-squared: 0.1628, Adjusted R-squared: 0.1588
## F-statistic: 41.31 on 2 and 425 DF, p-value: < 2.2e-16
Subset1_Scale1 = as.data.frame(Subset1_Scale)
WC2022_2 = na.omit(WC2022)
Subset1_Scale1 = cbind(WC2022_2$Player,WC2022_2$Position,Subset1_Scale1)
names(Subset1_Scale1)[1] = "Players"
names(Subset1_Scale1)[2] = "Position"
Subset1_Scale1 %>%
dplyr::filter(Position == 'FW') -> ALL_FW
ALL_FW$BestFW = ALL_FW$Goals + ALL_FW$Shots + ALL_FW$Shots.on.Target - ALL_FW$Average.Shot.Distance
max(ALL_FW$BestFW)
## [1] 23.37355
ALL_FW %>% group_by(Players) %>% arrange(-BestFW) %>% head(5) %>% select(Players)
## # A tibble: 5 × 1
## # Groups: Players [5]
## Players
## <chr>
## 1 Kylian Mbappé
## 2 Lionel Messi
## 3 Olivier Giroud
## 4 Julián Álvarez
## 5 Marc Rashford
Subset1_Scale1 %>%
dplyr::filter(Position == 'MF') -> ALL_MF
ALL_MF$BestMF = ALL_MF$Passes.Completed + ALL_MF$Passes.Attempted + ALL_MF$Touches
ALL_MF %>% group_by(Players) %>% arrange(-BestMF) %>% head(5) %>% select(Players)
## # A tibble: 5 × 1
## # Groups: Players [5]
## Players
## <chr>
## 1 Rodrigo Paul
## 2 Luka Modrić
## 3 Marcelo Brozović
## 4 Aurélien Tchouaméni
## 5 Enzo Fernández
Subset1_Scale1 %>%
dplyr::filter(Position == 'DF') -> ALL_DF
ALL_DF$BestDF = ALL_DF$Tackles + ALL_DF$Tackles.Won + ALL_DF$Blocks + ALL_DF$Interceptions + ALL_DF$Passes.Completed + ALL_DF$Passes.Attempted + ALL_DF$Touches
ALL_DF %>% group_by(Players) %>% arrange(-BestDF) %>% head(5) %>% select(Players)
## # A tibble: 5 × 1
## # Groups: Players [5]
## Players
## <chr>
## 1 Achraf Hakimi
## 2 Nicolás Otamendi
## 3 Joško Gvardiol
## 4 Rodri
## 5 Theo Hernández