library(tidyverse)
library(janitor)
library(gt)
library(PerformanceAnalytics)
library(plotly)
library(kableExtra)
library(ggrepel)
library(factoextra)
Análise por Componentes Principais - PCA
Base de Dados
# Selecionar variáveis quantitativas
<- mtcars |>
df select (mpg, disp, hp, drat, wt, qsec) |>
# Fazer o z-score (padronizacao)
scale() |> data.frame()
Correlações
chart.Correlation(df, histogram = TRUE, method = "pearson")
::plot_correlation(df) DataExplorer
PCA
Análise de Componentes Principais
<- prcomp(df)
afpc summary(afpc)
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6
Standard deviation 2.0463 1.0715 0.57737 0.39289 0.3533 0.22799
Proportion of Variance 0.6979 0.1913 0.05556 0.02573 0.0208 0.00866
Cumulative Proportion 0.6979 0.8892 0.94481 0.97054 0.9913 1.00000
# Atributos:
# sdev: corresponde à raiz quadrada dos eigenvalues, ou seja, os desvios-padrão dos componentes principais.
# rotation: corresponde à matriz de tamanho jxj de eigenvectors, em que j
# representa a quantidade de variáveis da base de dados.
# center: médias de cada variável utilizadas para após a padronização.
# scale: desvios-padrão de cada variável utilizadas para a padronização.
data.frame(afpc$rotation) |>
mutate(var = names(df)) |>
pivot_longer(cols = !var) |>
mutate(var = factor(var)) |>
ggplot(aes(x = var, y = value, fill = var)) +
geom_col() +
facet_wrap(~name) +
labs(x = NULL, y = NULL, fill = "Legenda:")
ggplotly(
fviz_eig(X = afpc,
ggtheme = theme_bw(),
barcolor = "dodgerblue4",
barfill = "dodgerblue4",
linecolor = "darkgoldenrod3")
)
Cargas Fatoriais e Comunalidades
# Extraindo as Cargas Fatoriais
<- sum((afpc$sdev ^ 2) > 1) #número de variáveis presentes na base de dados com carga maior que 1
k <- afpc$rotation[, 1:k] %*% diag(afpc$sdev[1:k])
cargas_fatoriais
# Visualizando as cargas fatoriais
data.frame(cargas_fatoriais) %>%
rename(F1 = X1,
F2 = X2) %>%
kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = T,
font_size = 12)
F1 | F2 | |
---|---|---|
mpg | -0.9386100 | -0.0628714 |
disp | 0.9536542 | 0.0649896 |
hp | 0.8714293 | -0.3873213 |
drat | -0.7511939 | -0.4677369 |
wt | 0.8975495 | 0.3209513 |
qsec | -0.5173734 | 0.8173924 |
#Visualizando as Comunalidades
data.frame(rowSums(cargas_fatoriais ^ 2)) %>%
rename(comunalidades = 1) %>%
kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = T,
font_size = 12)
comunalidades | |
---|---|
mpg | 0.8849415 |
disp | 0.9136799 |
hp | 0.9094068 |
drat | 0.7830701 |
wt | 0.9086048 |
qsec | 0.9358055 |
# Relatório das cargas fatoriais e das comunalidades
data.frame(cargas_fatoriais) %>%
rename(F1 = X1,
F2 = X2) %>%
mutate(Comunalidades = rowSums(cargas_fatoriais ^ 2)) %>%
kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = T,
font_size = 12)
F1 | F2 | Comunalidades | |
---|---|---|---|
mpg | -0.9386100 | -0.0628714 | 0.8849415 |
disp | 0.9536542 | 0.0649896 | 0.9136799 |
hp | 0.8714293 | -0.3873213 | 0.9094068 |
drat | -0.7511939 | -0.4677369 | 0.7830701 |
wt | 0.8975495 | 0.3209513 | 0.9086048 |
qsec | -0.5173734 | 0.8173924 | 0.9358055 |
# Plotagem das Cargas Fatoriais
data.frame(cargas_fatoriais) %>%
ggplot(aes(x = X1, y = X2)) +
geom_point(color = "dodgerblue4") +
geom_hline(yintercept = 0, color = "darkgoldenrod3", linetype = "dashed") +
geom_vline(xintercept = 0, color = "darkgoldenrod3", linetype = "dashed") +
geom_text_repel(label = row.names(cargas_fatoriais)) +
labs(x = paste("F1", paste0("(",
round(summary(afpc)$importance[2,1] * 100,
digits = 2),
"%)")),
y = paste("F2", paste0("(",
round(summary(afpc)$importance[2,2] * 100,
digits = 2),
"%)"))) +
theme_bw()
Note
Para o cálculo das coordenadas no gráfico usamos (Cargas Fatorias) Scores Fatorias * Desv Padrão:
Por exemplo, calculando a coordenada das variáveis “mpg” e “wt”
#Scores fatorias* o desvio padrão.
#Exemplo com "mpg"
<- afpc$rotation["mpg", 1] * afpc$sdev[1]
Dim1_mpg <- afpc$rotation["mpg", 2] * afpc$sdev[2]
Dim2_mpg
paste0("Dim1= ", round(Dim1_mpg,3), ": Dim2= ", round(Dim2_mpg,2) )
[1] "Dim1= -0.939: Dim2= -0.06"
#Exemplo com "wt"
<- afpc$rotation["wt", 1] * afpc$sdev[1]
Dim1_wt <- afpc$rotation["wt", 2] * afpc$sdev[2]
Dim2_wt
paste0("Dim1= ", round(Dim1_wt,3), ": Dim2= ", round(Dim2_wt,2) )
[1] "Dim1= 0.898: Dim2= 0.32"
fviz_pca_biplot(afpc)
Scores Fatoriais
# Scores Fatoriais
<- t(afpc$rotation)/afpc$sdev
scores_fatoriais colnames(scores_fatoriais) <- colnames(df)
scores_fatoriais
mpg disp hp drat wt qsec
PC1 -0.2241512 0.22774394 0.2081077 -0.17939402 0.2143455 -0.1235549
PC2 -0.0547607 0.05660566 -0.3373549 -0.40739654 0.2795470 0.7119448
PC3 0.3373784 -0.16780224 -0.2531053 -1.38644346 -0.7235598 -0.5898997
PC4 -1.9905415 -1.52720257 -0.3131144 -0.05750395 -0.2656826 -0.1086348
PC5 0.3146458 -0.83401953 2.2808409 -0.40697924 -0.6515060 1.1942189
PC6 -1.5461096 2.49249695 -0.2092904 0.49466253 -3.0372769 1.0593866
%>%
scores_fatoriais t() %>%
data.frame() %>%
rename(PC1 = 1,
PC2 = 2) %>%
select(PC1, PC2) %>%
kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = T,
font_size = 12)
PC1 | PC2 | |
---|---|---|
mpg | -0.2241512 | -0.0547607 |
disp | 0.2277439 | 0.0566057 |
hp | 0.2081077 | -0.3373549 |
drat | -0.1793940 | -0.4073965 |
wt | 0.2143455 | 0.2795470 |
qsec | -0.1235549 | 0.7119448 |
Ranking
# Proposta da construção de um ranking ------------------------------------
#Assumindo-se apenas o F1 e F2 como indicadores, calculam-se os scores
#fatorias
<- scores_fatoriais[1,]
score_D1 score_D1
mpg disp hp drat wt qsec
-0.2241512 0.2277439 0.2081077 -0.1793940 0.2143455 -0.1235549
<- scores_fatoriais[2,]
score_D2 score_D2
mpg disp hp drat wt qsec
-0.05476070 0.05660566 -0.33735491 -0.40739654 0.27954697 0.71194477
#Estabelecendo o ranking dos indicadores assumido
<- map2_dfr(df, score_D1, ~ .x*.y) |>
F1 cbind(as_tibble(rownames(df))) |>
column_to_rownames("value")
<- map2_dfr(df, score_D2, ~ .x*.y) |>
F2 cbind(as_tibble(rownames(df))) |>
column_to_rownames("value")
#Na construção de rankings no R, devemos efetuar a multiplicação por -1,
#visto que os scores fatoriais das observações mais fortes são, por padrão,
#apresentados acompanhados do sinal de menos.
<- data.frame(F1) %>%
F1 mutate(fator1 = rowSums(.) * -1)
%>%
F1 kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = T,
font_size = 12)
mpg | disp | hp | drat | wt | qsec | fator1 | |
---|---|---|---|---|---|---|---|
Mazda RX4 | -0.0338210 | -0.1299552 | -0.1113569 | -0.1018086 | -0.1308364 | 0.0960226 | 0.4117555 |
Mazda RX4 Wag | -0.0338210 | -0.1299552 | -0.1113569 | -0.1018086 | -0.0749749 | 0.0573024 | 0.3946142 |
Datsun 710 | -0.1007657 | -0.2255080 | -0.1629567 | -0.0850327 | -0.1965558 | -0.0526352 | 0.8234541 |
Hornet 4 Drive | -0.0486976 | 0.0501250 | -0.1113569 | 0.1733157 | -0.0004929 | -0.1100241 | 0.0471308 |
Hornet Sportabout | 0.0517194 | 0.2375554 | 0.0859364 | 0.1498295 | 0.0487967 | 0.0573024 | -0.6311398 |
Valiant | 0.0740343 | -0.0105142 | -0.1265333 | 0.2806813 | 0.0531780 | -0.1639557 | -0.1068902 |
Duster 360 | 0.2153620 | 0.2375554 | 0.2984062 | 0.1296984 | 0.0772751 | 0.1388913 | -1.0971885 |
Merc 240D | -0.1602721 | -0.1543947 | -0.2570505 | -0.0313499 | -0.0059695 | -0.1487442 | 0.7577809 |
Merc 230 | -0.1007657 | -0.1652362 | -0.1568862 | -0.1085189 | -0.0147321 | -0.3492594 | 0.8953985 |
Merc 280 | 0.0331237 | -0.1159898 | -0.0718982 | -0.1085189 | 0.0487967 | -0.0312009 | 0.2456875 |
Merc 280C | 0.0851918 | -0.1159898 | -0.0718982 | -0.1085189 | 0.0487967 | -0.0726867 | 0.2351053 |
Merc 450SE | 0.1372599 | 0.0828335 | 0.1011128 | 0.1766709 | 0.1868074 | 0.0310280 | -0.7157124 |
Merc 450SL | 0.1037875 | 0.0828335 | 0.1011128 | 0.1766709 | 0.1123254 | 0.0171994 | -0.5939295 |
Merc 450SLC | 0.1818897 | 0.0828335 | 0.1011128 | 0.1766709 | 0.1232787 | -0.0104579 | -0.6553276 |
Cadillac Fleetwood | 0.3604088 | 0.4433614 | 0.1769949 | 0.2236433 | 0.4453037 | -0.0090750 | -1.6406371 |
Lincoln Continental | 0.3604088 | 0.4213107 | 0.2073477 | 0.2001571 | 0.4834210 | 0.0019879 | -1.6746333 |
Chrysler Imperial | 0.2004854 | 0.3845597 | 0.2528770 | 0.1229881 | 0.4661149 | 0.0296451 | -1.4566702 |
Fiat 128 | -0.4578041 | -0.2793483 | -0.2449093 | -0.1622017 | -0.2228436 | -0.1120983 | 1.4792053 |
Honda Civic | -0.3834211 | -0.2848609 | -0.2874033 | -0.4473915 | -0.3509964 | -0.0464123 | 1.8004855 |
Toyota Corolla | -0.5135913 | -0.2933137 | -0.2479446 | -0.2091741 | -0.3028022 | -0.1418299 | 1.7086558 |
Toyota Corona | -0.0524168 | -0.2032736 | -0.1508156 | -0.0347051 | -0.1647914 | -0.1494357 | 0.7554381 |
Dodge Challenger | 0.1707322 | 0.1603782 | 0.0100544 | 0.2806813 | 0.0663218 | 0.0676739 | -0.7558417 |
AMC Javelin | 0.1818897 | 0.1346525 | 0.0100544 | 0.1498295 | 0.0477013 | 0.0379423 | -0.5620696 |
Camaro Z28 | 0.2525535 | 0.2191799 | 0.2984062 | -0.0447706 | 0.1364225 | 0.1686229 | -1.0304144 |
Pontiac Firebird | 0.0331237 | 0.3110576 | 0.0859364 | 0.1733157 | 0.1375179 | 0.0552281 | -0.7961793 |
Fiat X1-9 | -0.2681274 | -0.2787970 | -0.2449093 | -0.1622017 | -0.2808957 | -0.0726867 | 1.3076179 |
Porsche 914-2 | -0.2197785 | -0.2029061 | -0.1690273 | -0.2796328 | -0.2359874 | 0.0794282 | 1.0279039 |
Lotus Europa | -0.3834211 | -0.2492124 | -0.1022511 | -0.0581913 | -0.3733410 | 0.0655996 | 1.1008173 |
Ford Pantera L | 0.1595748 | 0.2210174 | 0.3560766 | -0.2091741 | -0.0103508 | 0.2315432 | -0.7486870 |
Ferrari Dino | 0.0145279 | -0.1575185 | 0.0859364 | -0.0078637 | -0.0979767 | 0.1624000 | 0.0004945 |
Maserati Bora | 0.1893280 | 0.1291398 | 0.5715816 | 0.0189777 | 0.0772751 | 0.2246288 | -1.2109310 |
Volvo 142E | -0.0486976 | -0.2016198 | -0.1143922 | -0.1722672 | -0.0957860 | -0.0519438 | 0.6847066 |
<- data.frame(F2) %>%
F2 mutate(fator2 = rowSums(.) * -1)
%>%
F2 kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = T,
font_size = 12)
mpg | disp | hp | drat | wt | qsec | fator2 | |
---|---|---|---|---|---|---|---|
Mazda RX4 | -0.0082626 | -0.0323003 | 0.1805162 | -0.2312031 | -0.1706354 | -0.5532987 | 0.8151838 |
Mazda RX4 Wag | -0.0082626 | -0.0323003 | 0.1805162 | -0.2312031 | -0.0977814 | -0.3301863 | 0.5192175 |
Datsun 710 | -0.0246173 | -0.0560499 | 0.2641625 | -0.1931058 | -0.2563459 | 0.3032933 | -0.0373370 |
Hornet 4 Drive | -0.0118969 | 0.0124585 | 0.1805162 | 0.3935929 | -0.0006428 | 0.6339777 | -1.2080056 |
Hornet Sportabout | 0.0126352 | 0.0590443 | -0.1393081 | 0.3402567 | 0.0636401 | -0.3301863 | -0.0060818 |
Valiant | 0.0180868 | -0.0026133 | 0.2051181 | 0.6374158 | 0.0693541 | 0.9447413 | -1.8721027 |
Duster 360 | 0.0526135 | 0.0590443 | -0.4837342 | 0.2945399 | 0.1007813 | -0.8003159 | 0.7770711 |
Merc 240D | -0.0391549 | -0.0383747 | 0.4166941 | -0.0711944 | -0.0077854 | 0.8570900 | -1.1172748 |
Merc 230 | -0.0246173 | -0.0410694 | 0.2543218 | -0.2464420 | -0.0192134 | 2.0124931 | -1.9354727 |
Merc 280 | 0.0080922 | -0.0288292 | 0.1165513 | -0.2464420 | 0.0636401 | 0.1797847 | -0.0927971 |
Merc 280C | 0.0208126 | -0.0288292 | 0.1165513 | -0.2464420 | 0.0636401 | 0.4188336 | -0.3445663 |
Merc 450SE | 0.0335329 | 0.0205882 | -0.1639099 | 0.4012124 | 0.2436321 | -0.1787887 | -0.3562671 |
Merc 450SL | 0.0253556 | 0.0205882 | -0.1639099 | 0.4012124 | 0.1464936 | -0.0991057 | -0.3306341 |
Merc 450SLC | 0.0444361 | 0.0205882 | -0.1639099 | 0.4012124 | 0.1607786 | 0.0602602 | -0.5233657 |
Cadillac Fleetwood | 0.0880488 | 0.1101973 | -0.2869193 | 0.5078849 | 0.5807602 | 0.0522920 | -1.0522638 |
Lincoln Continental | 0.0880488 | 0.1047166 | -0.3361230 | 0.4545487 | 0.6304723 | -0.0114544 | -0.9302089 |
Chrysler Imperial | 0.0489791 | 0.0955821 | -0.4099286 | 0.2793010 | 0.6079018 | -0.1708204 | -0.4510151 |
Fiat 128 | -0.1118427 | -0.0694319 | 0.3970126 | -0.3683535 | -0.2906301 | 0.6459301 | -0.2026846 |
Honda Civic | -0.0936707 | -0.0708021 | 0.4658978 | -1.0160079 | -0.4577656 | 0.2674360 | 0.9049125 |
Toyota Corolla | -0.1254716 | -0.0729030 | 0.4019330 | -0.4750260 | -0.3949112 | 0.8172485 | -0.1508697 |
Toyota Corona | -0.0128055 | -0.0505236 | 0.2444810 | -0.0788138 | -0.2149191 | 0.8610741 | -0.7484931 |
Dodge Challenger | 0.0417103 | 0.0398619 | -0.0162987 | 0.6374158 | 0.0864962 | -0.3899486 | -0.3992369 |
AMC Javelin | 0.0444361 | 0.0334678 | -0.0162987 | 0.3402567 | 0.0622115 | -0.2186302 | -0.2454433 |
Camaro Z28 | 0.0616995 | 0.0544771 | -0.4837342 | -0.1016722 | 0.1779207 | -0.9716343 | 1.2629434 |
Pontiac Firebird | 0.0080922 | 0.0773132 | -0.1393081 | 0.3935929 | 0.1793493 | -0.3182339 | -0.2008057 |
Fiat X1-9 | -0.0655042 | -0.0692949 | 0.3970126 | -0.3683535 | -0.3663410 | 0.4188336 | 0.0536473 |
Porsche 914-2 | -0.0536924 | -0.0504322 | 0.2740033 | -0.6350347 | -0.3077722 | -0.4576791 | 1.2306073 |
Lotus Europa | -0.0936707 | -0.0619416 | 0.1657551 | -0.1321501 | -0.4869072 | -0.3779961 | 0.9869106 |
Ford Pantera L | 0.0389845 | 0.0549338 | -0.5772213 | -0.4750260 | -0.0134994 | -1.3341918 | 2.3060202 |
Ferrari Dino | 0.0035492 | -0.0391512 | -0.1393081 | -0.0178581 | -0.1277801 | -0.9357769 | 1.2563252 |
Maserati Bora | 0.0462533 | 0.0320976 | -0.9265678 | 0.0430976 | 0.1007813 | -1.2943503 | 1.9986883 |
Volvo 142E | -0.0118969 | -0.0501125 | 0.1854366 | -0.3912119 | -0.1249231 | 0.2993092 | 0.0933987 |
#Importando o data frame original
<- mtcars
df_orig
# Importando as colunas de fatores F1 e F2
"Fator1"] <- F1$fator1
df_orig["Fator2"] <- F2$fator2
df_orig[
#Criando um ranking pela soma ponderada dos fatores por sua variância
#compartilhada:
#Calculando a variância compartilhada
<- (afpc$sdev ^ 2/sum(afpc$sdev ^ 2))
var_compartilhada var_compartilhada
[1] 0.697899413 0.191352020 0.055559444 0.025726757 0.020799335 0.008663031
<- df_orig %>%
df_orig mutate(pontuacao = Fator1 * var_compartilhada[1] +
* var_compartilhada[2])
Fator2
# Visualizando o ranking final
%>%
df_orig arrange(desc(pontuacao)) %>%
kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = T,
font_size = 12)
mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | Fator1 | Fator2 | pontuacao | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Honda Civic | 30.4 | 4 | 75.7 | 52 | 4.93 | 1.615 | 18.52 | 1 | 1 | 4 | 2 | 1.8004855 | 0.9049125 | 1.4297146 |
Toyota Corolla | 33.9 | 4 | 71.1 | 65 | 4.22 | 1.835 | 19.90 | 1 | 1 | 4 | 1 | 1.7086558 | -0.1508697 | 1.1636007 |
Fiat 128 | 32.4 | 4 | 78.7 | 66 | 4.08 | 2.200 | 19.47 | 1 | 1 | 4 | 1 | 1.4792053 | -0.2026846 | 0.9935524 |
Lotus Europa | 30.4 | 4 | 95.1 | 113 | 3.77 | 1.513 | 16.90 | 1 | 1 | 5 | 2 | 1.1008173 | 0.9869106 | 0.9571071 |
Porsche 914-2 | 26.0 | 4 | 120.3 | 91 | 4.43 | 2.140 | 16.70 | 0 | 1 | 5 | 2 | 1.0279039 | 1.2306073 | 0.9528527 |
Fiat X1-9 | 27.3 | 4 | 79.0 | 66 | 4.08 | 1.935 | 18.90 | 1 | 1 | 4 | 1 | 1.3076179 | 0.0536473 | 0.9228513 |
Datsun 710 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 | 0.8234541 | -0.0373370 | 0.5675436 |
Volvo 142E | 21.4 | 4 | 121.0 | 109 | 4.11 | 2.780 | 18.60 | 1 | 1 | 4 | 2 | 0.6847066 | 0.0933987 | 0.4957284 |
Mazda RX4 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 | 0.4117555 | 0.8151838 | 0.4433510 |
Toyota Corona | 21.5 | 4 | 120.1 | 97 | 3.70 | 2.465 | 20.01 | 1 | 0 | 3 | 1 | 0.7554381 | -0.7484931 | 0.3839941 |
Mazda RX4 Wag | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 | 0.3946142 | 0.5192175 | 0.3747543 |
Merc 240D | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 | 0.7577809 | -1.1172748 | 0.3150620 |
Merc 230 | 22.8 | 4 | 140.8 | 95 | 3.92 | 3.150 | 22.90 | 1 | 0 | 4 | 2 | 0.8953985 | -1.9354727 | 0.2545415 |
Ferrari Dino | 19.7 | 6 | 145.0 | 175 | 3.62 | 2.770 | 15.50 | 0 | 1 | 5 | 6 | 0.0004945 | 1.2563252 | 0.2407455 |
Merc 280 | 19.2 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.30 | 1 | 0 | 4 | 4 | 0.2456875 | -0.0927971 | 0.1537082 |
Merc 280C | 17.8 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.90 | 1 | 0 | 4 | 4 | 0.2351053 | -0.3445663 | 0.0981464 |
Ford Pantera L | 15.8 | 8 | 351.0 | 264 | 4.22 | 3.170 | 14.50 | 0 | 1 | 5 | 4 | -0.7486870 | 2.3060202 | -0.0812466 |
Hornet 4 Drive | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 | 0.0471308 | -1.2080056 | -0.1982618 |
Valiant | 18.1 | 6 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 | -0.1068902 | -1.8721027 | -0.4328293 |
AMC Javelin | 15.2 | 8 | 304.0 | 150 | 3.15 | 3.435 | 17.30 | 0 | 0 | 3 | 2 | -0.5620696 | -0.2454433 | -0.4392341 |
Hornet Sportabout | 18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 | -0.6311398 | -0.0060818 | -0.4416359 |
Maserati Bora | 15.0 | 8 | 301.0 | 335 | 3.54 | 3.570 | 14.60 | 0 | 1 | 5 | 8 | -1.2109310 | 1.9986883 | -0.4626550 |
Camaro Z28 | 13.3 | 8 | 350.0 | 245 | 3.73 | 3.840 | 15.41 | 0 | 0 | 3 | 4 | -1.0304144 | 1.2629434 | -0.4774588 |
Merc 450SL | 17.3 | 8 | 275.8 | 180 | 3.07 | 3.730 | 17.60 | 0 | 0 | 3 | 3 | -0.5939295 | -0.3306341 | -0.4777705 |
Merc 450SLC | 15.2 | 8 | 275.8 | 180 | 3.07 | 3.780 | 18.00 | 0 | 0 | 3 | 3 | -0.6553276 | -0.5233657 | -0.5574998 |
Merc 450SE | 16.4 | 8 | 275.8 | 180 | 3.07 | 4.070 | 17.40 | 0 | 0 | 3 | 3 | -0.7157124 | -0.3562671 | -0.5676677 |
Pontiac Firebird | 19.2 | 8 | 400.0 | 175 | 3.08 | 3.845 | 17.05 | 0 | 0 | 3 | 2 | -0.7961793 | -0.2008057 | -0.5940777 |
Dodge Challenger | 15.5 | 8 | 318.0 | 150 | 2.76 | 3.520 | 16.87 | 0 | 0 | 3 | 2 | -0.7558417 | -0.3992369 | -0.6038963 |
Duster 360 | 14.3 | 8 | 360.0 | 245 | 3.21 | 3.570 | 15.84 | 0 | 0 | 3 | 4 | -1.0971885 | 0.7770711 | -0.6170330 |
Chrysler Imperial | 14.7 | 8 | 440.0 | 230 | 3.23 | 5.345 | 17.42 | 0 | 0 | 3 | 4 | -1.4566702 | -0.4510151 | -1.1029119 |
Cadillac Fleetwood | 10.4 | 8 | 472.0 | 205 | 2.93 | 5.250 | 17.98 | 0 | 0 | 3 | 4 | -1.6406371 | -1.0522638 | -1.3463525 |
Lincoln Continental | 10.4 | 8 | 460.0 | 215 | 3.00 | 5.424 | 17.82 | 0 | 0 | 3 | 4 | -1.6746333 | -0.9302089 | -1.3467229 |
#Visualizando o ranking
|>
df_orig rownames_to_column(var = "modelo") |>
mutate (modelo = as_factor(modelo)) |>
ggplot(aes(
x = fct_reorder(modelo, pontuacao),
y = pontuacao))+
coord_flip()+
geom_col(aes(fill = pontuacao))+
scale_fill_gradient(low = "darkslategray", high = "green")
Apendice - Cluster
Clusterizando pelos scores_fatorias:
<- hclust(dist(afpc$x), method = "ward.D2")
mt_cluster plot(mt_cluster)
<- cutree(mt_cluster, k = 3)
mt_cluster_k3 plot(mt_cluster)
rect.hclust(mt_cluster, k = 3, border = "red")
Cluster pelos 3 primeiros grupos
<- data.frame(afpc$x, "cluster" = factor(mt_cluster_k3)) mt_grupos
Visualizando os clusteres dos scores:
|>
mt_grupos ggplot(aes(x=PC1, y=PC2)) +
geom_text_repel(aes(label = rownames(mt_grupos))) +
theme_classic() +
geom_hline(yintercept = 0, color = "gray70") +
geom_vline(xintercept = 0, color = "gray70") +
geom_point(aes(color = cluster), alpha = 0.55, size = 3) +
xlab("PC1") +
ylab("PC2") +
xlim(-5, 6) +
ggtitle("PCA do MTCARS")
<- tribble(~A, ~B, ~C,
A 1, 2, 3,
4, 1, 2,
5, 4, 8)
|> summarise() A
# A tibble: 1 × 0
Estudo Teórico:
Eigen Values
#Matriz de estudo
<- tribble(~A, ~B, ~C,
A 1, 2, 4,
4, 1, 2,
5, 4, 8)
#Matriz padronizada
<- as_tibble(scale(A))
Az
#Matriz de correlação (rho)
<- (cor(Az))
rho
#eigen values da matriz (rho) - Somando dá o número de variáveis
<- eigen(rho)
ev <- 1:sum(ev$values)
componentes_principais
# Outra maneira de ver os eigen values é a variância compartilhada entre os componentes.
<- ev$values/sum(ev$values)
var_compartilhada <- cumsum(var_compartilhada)
var_compartilhada_cum
<- tibble(pc = paste0("PC", componentes_principais),
rel_eigen ev = ev$values,
vc = var_compartilhada,
vc_cum = var_compartilhada_cum)
rel_eigen
# A tibble: 3 × 4
pc ev vc vc_cum
<chr> <dbl> <dbl> <dbl>
1 PC1 2.28e+ 0 7.59e- 1 0.759
2 PC2 7.24e- 1 2.41e- 1 1
3 PC3 5.55e-17 1.85e-17 1
Visualizando:
%>%
rel_eigen ggplot(aes(x = componentes_principais,
y = var_compartilhada,
group = 1,
label = paste0(round(var_compartilhada * 100,
digits = 2), "%"))) +
geom_col(fill = "dodgerblue4", color = "black") +
geom_line(color = "darkgoldenrod3",
size = 1.2) +
geom_point(size = 2) +
geom_text(size = 3, vjust = 2, color = "white") +
labs(x = "Componentes Principais",
y = "Variância Compartilhada") +
theme_bw()
Eigen Vectors:
Enquanto os eigen values representava o variância capturada em cada componente, os eigen vector mostram o peso que cada variável tem em cada componente:
data.frame(ev$vectors) %>%
rename(PC1 = X1, PC2 = X2, PC3 = X3) %>%
mutate(var = names(Az[1:3])) %>%
pivot_longer(cols = !"var") %>%
mutate(var = factor(var)) %>%
ggplot(aes(x = var, y = value, fill = var)) +
geom_bar(stat = "identity", color = "black") +
facet_wrap(~name) +
labs(x = NULL, y = NULL, fill = "Legenda:") +
scale_fill_viridis_d() +
theme_bw()
# Determinados os eigenvectors, podemos confirmar sua ligação direta com os
# eigenvalues aferidos, da seguinte maneira:
# Estabelecendo a matriz diagonal de eigenvalues (L2)
<- diag(ev$values)
L2 L2
[,1] [,2] [,3]
[1,] 2.27566 0.0000000 0.000000e+00
[2,] 0.00000 0.7243401 0.000000e+00
[3,] 0.00000 0.0000000 5.551115e-17
# Assim, com os eigenvectors calculados, podemos provar que V'.rho.V = L2
<- t(ev$vectors) %*% rho %*% ev$vectors
prova_01 round(x = prova_01,digits = 14)
[,1] [,2] [,3]
[1,] 2.27566 0.0000000 0
[2,] 0.00000 0.7243401 0
[3,] 0.00000 0.0000000 0