UAS Probstat¶
EFA¶
Persiapan Data¶
In [1]:
import pandas as pd
columns = [
"Aplikasi sering menampilkan konten yang sesuai dengan minat saya.", # AE1
"Saya sering mendapatkan rekomendasi postingan atau video.", # AE2
"Fitur autoplay membuat saya terus menonton konten.", # AE3
"Notifikasi sering membuat saya kembali membuka aplikasi.", # AE4
"Saya merasa aplikasi sangat baik dalam mempersonalisasi konten.", # AE5
"Waktu layar harian saya di media sosial cukup tinggi.", # PE1
"Saya membuka media sosial berkali-kali dalam sehari.", # PE2
"Saya menghabiskan waktu lama dalam satu sesi scrolling.", # PE3
"Saya mengecek media sosial secara kebiasaan.", # PE4
"Setelah mulai scrolling, saya sulit berhenti.", # PE5
"Saya merasa lelah secara mental setelah scrolling lama.", # DF1
"Konsentrasi saya menurun setelah menggunakan media sosial.", # DF2
"Konten negatif meningkatkan rasa cemas saya.", # DF3
"Scrolling larut malam mengganggu tidur saya.", # DF4
"Saya merasa kesal atau lelah setelah terlalu lama menggunakan media sosial.", # DF5
"Saya membuka media sosial tanpa berpikir.", # IU1
"Saya mengecek ponsel tanpa alasan jelas.", # IU2
"Saya merasa gelisah ketika tidak bisa mengakses media sosial.", # IU3
"Saya kesulitan mengontrol kebiasaan scrolling saya.", # IU4
"Saya menggunakan scrolling untuk menghindari tugas atau tanggung jawab.", # IU5
]
shortened_columns = [
"AE1", # Aplikasi sering menampilkan konten yang sesuai dengan minat saya.
"AE2", # Saya sering mendapatkan rekomendasi postingan atau video.
"AE3", # Fitur autoplay membuat saya terus menonton konten.
"AE4", # Notifikasi sering membuat saya kembali membuka aplikasi.
"AE5", # Saya merasa aplikasi sangat baik dalam mempersonalisasi konten.
"PE1", # Waktu layar harian saya di media sosial cukup tinggi.
"PE2", # Saya membuka media sosial berkali-kali dalam sehari.
"PE3", # Saya menghabiskan waktu lama dalam satu sesi scrolling.
"PE4", # Saya mengecek media sosial secara kebiasaan.
"PE5", # Setelah mulai scrolling, saya sulit berhenti.
"DF1", # Saya merasa lelah secara mental setelah scrolling lama.
"DF2", # Konsentrasi saya menurun setelah menggunakan media sosial.
"DF3", # Konten negatif meningkatkan rasa cemas saya.
"DF4", # Scrolling larut malam mengganggu tidur saya.
"DF5", # Saya merasa kesal atau lelah setelah terlalu lama menggunakan media sosial.
"IU1", # Saya membuka media sosial tanpa berpikir.
"IU2", # Saya mengecek ponsel tanpa alasan jelas.
"IU3", # Saya merasa gelisah ketika tidak bisa mengakses media sosial.
"IU4", # Saya kesulitan mengontrol kebiasaan scrolling saya.
"IU5", # Saya menggunakan scrolling untuk menghindari tugas atau tanggung jawab.
]
mapped_columns = dict(zip(columns, shortened_columns))
df = pd.read_csv("data.csv")[columns].rename(columns=mapped_columns)
print(df.head())
AE1 AE2 AE3 AE4 AE5 PE1 PE2 PE3 PE4 PE5 DF1 DF2 DF3 DF4 DF5 \ 0 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 5 5 5 2 5 4 5 5 5 5 5 4 4 5 5 2 5 5 1 1 5 1 5 1 5 5 1 5 5 5 1 3 4 4 4 4 4 4 3 4 4 4 4 3 4 3 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 IU1 IU2 IU3 IU4 IU5 0 3 3 3 3 3 1 3 3 5 5 5 2 1 1 1 1 1 3 4 4 4 4 4 4 3 3 3 3 3
Menguji Kelayakan (Exploratory Factor Analysis) EFA¶
In [2]:
from factor_analyzer.factor_analyzer import calculate_kmo, calculate_bartlett_sphericity
kmo_all, kmo_model = calculate_kmo(df)
print(f"KMO Model: {kmo_model}\n")
chi_square_value, p_value = calculate_bartlett_sphericity(df)
print(f"Barlett's Test\nChi-Square: {chi_square_value}\np-value: {p_value}")
KMO Model: 0.5422426485444672 Barlett's Test Chi-Square: 469.57367662907563 p-value: 1.1492952292878107e-25
/home/fadhilkholaf/Projects/college/probstat/uas2/.venv/lib/python3.14/site-packages/factor_analyzer/utils.py:244: UserWarning: The inverse of the variance-covariance matrix was calculated using the Moore-Penrose generalized matrix inversion, due to its determinant being at or very close to zero. warnings.warn(
Ekstraksi Faktor Menggunakan Principal Axis Factoring (PAF)¶
In [3]:
from factor_analyzer import FactorAnalyzer
fa = FactorAnalyzer(rotation=None)
fa.fit(df)
eigenvalues, _ = fa.get_eigenvalues()
print(f"Eigenvalues: {eigenvalues}")
Eigenvalues: [7.53869336 3.15181626 2.57648467 1.52836706 1.18951401 0.81788869 0.72583431 0.61773111 0.42444872 0.35746078 0.28440267 0.21952092 0.17494287 0.12704772 0.09713793 0.06818905 0.05050328 0.026496 0.01548844 0.00803212]
/home/fadhilkholaf/Projects/college/probstat/uas2/.venv/lib/python3.14/site-packages/sklearn/utils/deprecation.py:132: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8. warnings.warn(
Menentukan Jumlah Faktor¶
In [4]:
import matplotlib.pyplot as plt
plt.plot(range(1, len(eigenvalues) + 1), eigenvalues, marker="o")
plt.title("Scree Plot")
plt.xlabel("Factors")
plt.ylabel("Eigenvalues")
plt.show()
Rotasi Faktor dan Mengevaluasi Loading Faktor¶
In [5]:
fa = FactorAnalyzer(rotation="varimax", n_factors=3)
fa.fit(df)
factor_loadings = pd.DataFrame(
fa.loadings_,
columns=["Factor 1", "Factor 2", "Factor 3"],
index=df.columns,
)
print(factor_loadings)
Factor 1 Factor 2 Factor 3 AE1 -0.011047 0.800300 0.181785 AE2 0.058655 0.719559 -0.068026 AE3 0.069883 0.028497 0.686960 AE4 0.193769 0.037247 0.723839 AE5 -0.097684 0.737522 0.156248 PE1 0.705539 0.257549 0.242947 PE2 0.483122 0.842333 -0.069255 PE3 0.434521 0.399817 0.536871 PE4 0.492928 0.612245 0.211371 PE5 0.489805 0.467536 0.203912 DF1 0.193417 0.173129 0.732688 DF2 0.038070 0.517966 0.499749 DF3 -0.271809 0.519422 0.541725 DF4 0.469166 0.495133 0.119520 DF5 0.137849 0.058899 0.705001 IU1 0.748417 0.025168 -0.110192 IU2 0.855393 -0.078035 -0.025692 IU3 0.718472 -0.041228 0.383872 IU4 0.727780 0.113522 0.454467 IU5 0.552754 0.098727 0.385518
/home/fadhilkholaf/Projects/college/probstat/uas2/.venv/lib/python3.14/site-packages/sklearn/utils/deprecation.py:132: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8. warnings.warn(
Menafsirkan dan Memberi Nama Faktor¶
In [6]:
# Faktor 1: Impulsive / Habitual Use
# PE1 0.705539
# IU1 0.748417
# IU2 0.855393
# IU3 0.718472
# IU4 0.727780
# IU5 0.552754
#
# Faktor 2: Algorithmic Reinforcement / High Engagement
# AE1 0.800300
# AE2 0.719559
# AE5 0.737522
# PE2 0.842333
# PE4 0.612245
# DF2 0.517966
#
# Faktor 3: Digital Fatigue / Cognitive–Emotional Strain
# AE4 0.723839
# PE3 0.536871
# DF1 0.732688
# DF3 0.541725
# DF5 0.705001
Informasi Tambahan¶
Komunaliti¶
In [7]:
communalities = pd.DataFrame(
fa.get_communalities(), index=df.columns, columns=["Communalitiy"]
)
print(communalities)
Communalitiy AE1 0.673648 AE2 0.525833 AE3 0.477610 AE4 0.562876 AE5 0.577895 PE1 0.623140 PE2 0.947727 PE3 0.636893 PE4 0.662500 PE5 0.500078 DF1 0.604215 DF2 0.519487 DF3 0.637145 DF4 0.479558 DF5 0.519497 IU1 0.572904 IU2 0.738446 IU3 0.665260 IU4 0.749092 IU5 0.463908
Skor Faktor¶
In [8]:
factor_scores = fa.transform(df)
factor_scores_df = pd.DataFrame(
factor_scores, columns=["Factor 1", "Factor 2", "Factor 3"]
)
print(factor_scores_df.head())
Factor 1 Factor 2 Factor 3 0 -0.209038 -1.162801 0.088975 1 0.651179 1.112795 0.646581 2 -1.861857 2.117487 -1.860610 3 0.232958 -0.990855 1.219262 4 -0.209038 -1.162801 0.088975
/home/fadhilkholaf/Projects/college/probstat/uas2/.venv/lib/python3.14/site-packages/sklearn/utils/deprecation.py:132: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8. warnings.warn(
Mengevaluasi Keandalan Faktor¶
In [9]:
import pandas as pd
import numpy as np
def cronbach_alpha(df):
item_variance = df.var(ddof=1, axis=0)
total_variance = df.sum(axis=1).var(ddof=1)
n_items = df.shape[1]
alpha = (n_items / (n_items - 1)) * (1 - item_variance.sum() / total_variance)
return alpha
faktor_1 = df[
[
"PE1",
"IU1",
"IU2",
"IU3",
"IU4",
"IU5",
]
]
alpha_faktor_1 = cronbach_alpha(faktor_1)
print(f"Cronbach's Alpha untuk Faktor 1: {alpha_faktor_1:.2f}")
faktor_2 = df[
[
"AE1",
"AE2",
"AE5",
"PE2",
"PE4",
"DF2",
]
]
alpha_faktor_2 = cronbach_alpha(faktor_2)
print(f"Cronbach's Alpha untuk Faktor 2: {alpha_faktor_2:.2f}")
faktor_3 = df[
[
"AE4",
"PE3",
"DF1",
"DF3",
"DF5",
]
]
alpha_faktor_3 = cronbach_alpha(faktor_3)
print(f"Cronbach's Alpha untuk Faktor 3: {alpha_faktor_3:.2f}")
Cronbach's Alpha untuk Faktor 1: 0.88 Cronbach's Alpha untuk Faktor 2: 0.85 Cronbach's Alpha untuk Faktor 3: 0.83
In [10]:
import numpy as np
def ave(loadings):
return np.mean(np.square(loadings))
ave_faktor_1 = ave(
[
factor_loadings.loc["PE1", "Factor 1"],
factor_loadings.loc["IU1", "Factor 1"],
factor_loadings.loc["IU2", "Factor 1"],
factor_loadings.loc["IU3", "Factor 1"],
factor_loadings.loc["IU4", "Factor 1"],
factor_loadings.loc["IU5", "Factor 1"],
]
)
ave_faktor_2 = ave(
[
factor_loadings.loc["AE1", "Factor 2"],
factor_loadings.loc["AE2", "Factor 2"],
factor_loadings.loc["AE5", "Factor 2"],
factor_loadings.loc["PE2", "Factor 2"],
factor_loadings.loc["PE4", "Factor 2"],
factor_loadings.loc["DF2", "Factor 2"],
]
)
ave_faktor_3 = ave(
[
factor_loadings.loc["AE4", "Factor 3"],
factor_loadings.loc["PE3", "Factor 3"],
factor_loadings.loc["DF1", "Factor 3"],
factor_loadings.loc["DF3", "Factor 3"],
factor_loadings.loc["DF5", "Factor 3"],
]
)
print(f"AVE untuk faktor 1: {ave_faktor_1:.4f}")
print(f"AVE untuk faktor 2: {ave_faktor_2:.4f}")
print(f"AVE untuk faktor 3: {ave_faktor_3:.4f}")
AVE untuk faktor 1: 0.5235 AVE untuk faktor 2: 0.5091 AVE untuk faktor 3: 0.4279
In [11]:
data = {
"Factor_1": [
factor_loadings.loc["PE1", "Factor 1"],
factor_loadings.loc["IU1", "Factor 1"],
factor_loadings.loc["IU2", "Factor 1"],
factor_loadings.loc["IU3", "Factor 1"],
factor_loadings.loc["IU4", "Factor 1"],
factor_loadings.loc["IU5", "Factor 1"],
],
"Factor_2": [
factor_loadings.loc["AE1", "Factor 2"],
factor_loadings.loc["AE2", "Factor 2"],
factor_loadings.loc["AE5", "Factor 2"],
factor_loadings.loc["PE2", "Factor 2"],
factor_loadings.loc["PE4", "Factor 2"],
factor_loadings.loc["DF2", "Factor 2"],
],
"Factor_3": [
factor_loadings.loc["AE4", "Factor 3"],
factor_loadings.loc["PE3", "Factor 3"],
factor_loadings.loc["DF1", "Factor 3"],
factor_loadings.loc["DF3", "Factor 3"],
factor_loadings.loc["DF5", "Factor 3"],
],
}
df_scores = pd.DataFrame(factor_scores, columns=["Factor_1", "Factor_2", "Factor_3"])
correlation_matrix = df_scores.corr()
print("\n=== Matriks Korelasi Antar Faktor ===")
print(correlation_matrix.to_string())
correlation_squared = correlation_matrix**2
ave_values = {
"Factor_1": ave_faktor_1,
"Factor_2": ave_faktor_2,
"Factor_3": ave_faktor_3,
}
print("\n=== Average Variance Extracted (AVE) ===")
for factor, ave in ave_values.items():
print(f"{factor}: {ave:.3f}")
print("\n=== Validitas Diskriminan ===")
for factor, ave in ave_values.items():
for other_factor in ave_values:
if factor != other_factor:
r_squared = correlation_squared.loc[factor, other_factor]
print(f"{factor} vs {other_factor}:")
print(f" R²: {r_squared:.3f}")
print(f" AVE ({factor}): {ave:.3f}")
if r_squared < ave:
print(" Validitas diskriminan terpenuhi ✔️")
else:
print(" Validitas diskriminan tidak terpenuhi ❌")
=== Matriks Korelasi Antar Faktor ===
Factor_1 Factor_2 Factor_3
Factor_1 1.000000 0.039229 0.004695
Factor_2 0.039229 1.000000 -0.028065
Factor_3 0.004695 -0.028065 1.000000
=== Average Variance Extracted (AVE) ===
Factor_1: 0.524
Factor_2: 0.509
Factor_3: 0.428
=== Validitas Diskriminan ===
Factor_1 vs Factor_2:
R²: 0.002
AVE (Factor_1): 0.524
Validitas diskriminan terpenuhi ✔️
Factor_1 vs Factor_3:
R²: 0.000
AVE (Factor_1): 0.524
Validitas diskriminan terpenuhi ✔️
Factor_2 vs Factor_1:
R²: 0.002
AVE (Factor_2): 0.509
Validitas diskriminan terpenuhi ✔️
Factor_2 vs Factor_3:
R²: 0.001
AVE (Factor_2): 0.509
Validitas diskriminan terpenuhi ✔️
Factor_3 vs Factor_1:
R²: 0.000
AVE (Factor_3): 0.428
Validitas diskriminan terpenuhi ✔️
Factor_3 vs Factor_2:
R²: 0.001
AVE (Factor_3): 0.428
Validitas diskriminan terpenuhi ✔️
Explained Variance¶
In [12]:
eigenvalues = fa.get_eigenvalues()[0]
explained_variance = eigenvalues / eigenvalues.sum() * 100
explained_variance_df = pd.DataFrame(
{
"Factor": [f"Factor {i + 1}" for i in range(len(eigenvalues))],
"Eigenvalue": eigenvalues,
"Explained Variance (%)": explained_variance,
"Cumulative Explained Variance (%)": explained_variance.cumsum(),
}
)
print("\n=== Explained Variance ===")
print(explained_variance_df.to_string(index=False))
=== Explained Variance === Factor Eigenvalue Explained Variance (%) Cumulative Explained Variance (%) Factor 1 7.538693 37.693467 37.693467 Factor 2 3.151816 15.759081 53.452548 Factor 3 2.576485 12.882423 66.334971 Factor 4 1.528367 7.641835 73.976807 Factor 5 1.189514 5.947570 79.924377 Factor 6 0.817889 4.089443 84.013820 Factor 7 0.725834 3.629172 87.642992 Factor 8 0.617731 3.088656 90.731647 Factor 9 0.424449 2.122244 92.853891 Factor 10 0.357461 1.787304 94.641195 Factor 11 0.284403 1.422013 96.063208 Factor 12 0.219521 1.097605 97.160813 Factor 13 0.174943 0.874714 98.035527 Factor 14 0.127048 0.635239 98.670766 Factor 15 0.097138 0.485690 99.156455 Factor 16 0.068189 0.340945 99.497401 Factor 17 0.050503 0.252516 99.749917 Factor 18 0.026496 0.132480 99.882397 Factor 19 0.015488 0.077442 99.959839 Factor 20 0.008032 0.040161 100.000000