UAS Probstat¶

EFA¶

Persiapan Data¶

In [1]:
import pandas as pd

columns = [
    "Aplikasi sering menampilkan konten yang sesuai dengan minat saya.",  # AE1
    "Saya sering mendapatkan rekomendasi postingan atau video.",  # AE2
    "Fitur autoplay membuat saya terus menonton konten.",  # AE3
    "Notifikasi sering membuat saya kembali membuka aplikasi.",  # AE4
    "Saya merasa aplikasi sangat baik dalam mempersonalisasi konten.",  # AE5
    "Waktu layar harian saya di media sosial cukup tinggi.",  # PE1
    "Saya membuka media sosial berkali-kali dalam sehari.",  # PE2
    "Saya menghabiskan waktu lama dalam satu sesi scrolling.",  # PE3
    "Saya mengecek media sosial secara kebiasaan.",  # PE4
    "Setelah mulai scrolling, saya sulit berhenti.",  # PE5
    "Saya merasa lelah secara mental setelah scrolling lama.",  # DF1
    "Konsentrasi saya menurun setelah menggunakan media sosial.",  # DF2
    "Konten negatif meningkatkan rasa cemas saya.",  # DF3
    "Scrolling larut malam mengganggu tidur saya.",  # DF4
    "Saya merasa kesal atau lelah setelah terlalu lama menggunakan media sosial.",  # DF5
    "Saya membuka media sosial tanpa berpikir.",  # IU1
    "Saya mengecek ponsel tanpa alasan jelas.",  # IU2
    "Saya merasa gelisah ketika tidak bisa mengakses media sosial.",  # IU3
    "Saya kesulitan mengontrol kebiasaan scrolling saya.",  # IU4
    "Saya menggunakan scrolling untuk menghindari tugas atau tanggung jawab.",  # IU5
]

shortened_columns = [
    "AE1",  # Aplikasi sering menampilkan konten yang sesuai dengan minat saya.
    "AE2",  # Saya sering mendapatkan rekomendasi postingan atau video.
    "AE3",  # Fitur autoplay membuat saya terus menonton konten.
    "AE4",  # Notifikasi sering membuat saya kembali membuka aplikasi.
    "AE5",  # Saya merasa aplikasi sangat baik dalam mempersonalisasi konten.
    "PE1",  # Waktu layar harian saya di media sosial cukup tinggi.
    "PE2",  # Saya membuka media sosial berkali-kali dalam sehari.
    "PE3",  # Saya menghabiskan waktu lama dalam satu sesi scrolling.
    "PE4",  # Saya mengecek media sosial secara kebiasaan.
    "PE5",  # Setelah mulai scrolling, saya sulit berhenti.
    "DF1",  # Saya merasa lelah secara mental setelah scrolling lama.
    "DF2",  # Konsentrasi saya menurun setelah menggunakan media sosial.
    "DF3",  # Konten negatif meningkatkan rasa cemas saya.
    "DF4",  # Scrolling larut malam mengganggu tidur saya.
    "DF5",  # Saya merasa kesal atau lelah setelah terlalu lama menggunakan media sosial.
    "IU1",  # Saya membuka media sosial tanpa berpikir.
    "IU2",  # Saya mengecek ponsel tanpa alasan jelas.
    "IU3",  # Saya merasa gelisah ketika tidak bisa mengakses media sosial.
    "IU4",  # Saya kesulitan mengontrol kebiasaan scrolling saya.
    "IU5",  # Saya menggunakan scrolling untuk menghindari tugas atau tanggung jawab.
]

mapped_columns = dict(zip(columns, shortened_columns))

df = pd.read_csv("data.csv")[columns].rename(columns=mapped_columns)

print(df.head())
   AE1  AE2  AE3  AE4  AE5  PE1  PE2  PE3  PE4  PE5  DF1  DF2  DF3  DF4  DF5  \
0    3    3    3    3    3    3    3    3    3    3    3    3    3    3    3   
1    5    5    5    2    5    4    5    5    5    5    5    4    4    5    5   
2    5    5    1    1    5    1    5    1    5    5    1    5    5    5    1   
3    4    4    4    4    4    4    3    4    4    4    4    3    4    3    4   
4    3    3    3    3    3    3    3    3    3    3    3    3    3    3    3   

   IU1  IU2  IU3  IU4  IU5  
0    3    3    3    3    3  
1    3    3    5    5    5  
2    1    1    1    1    1  
3    4    4    4    4    4  
4    3    3    3    3    3  

Menguji Kelayakan (Exploratory Factor Analysis) EFA¶

In [2]:
from factor_analyzer.factor_analyzer import calculate_kmo, calculate_bartlett_sphericity

kmo_all, kmo_model = calculate_kmo(df)
print(f"KMO Model: {kmo_model}\n")

chi_square_value, p_value = calculate_bartlett_sphericity(df)
print(f"Barlett's Test\nChi-Square: {chi_square_value}\np-value: {p_value}")
KMO Model: 0.5422426485444672

Barlett's Test
Chi-Square: 469.57367662907563
p-value: 1.1492952292878107e-25
/home/fadhilkholaf/Projects/college/probstat/uas2/.venv/lib/python3.14/site-packages/factor_analyzer/utils.py:244: UserWarning: The inverse of the variance-covariance matrix was calculated using the Moore-Penrose generalized matrix inversion, due to its determinant being at or very close to zero.
  warnings.warn(

Ekstraksi Faktor Menggunakan Principal Axis Factoring (PAF)¶

In [3]:
from factor_analyzer import FactorAnalyzer

fa = FactorAnalyzer(rotation=None)
fa.fit(df)

eigenvalues, _ = fa.get_eigenvalues()
print(f"Eigenvalues: {eigenvalues}")
Eigenvalues: [7.53869336 3.15181626 2.57648467 1.52836706 1.18951401 0.81788869
 0.72583431 0.61773111 0.42444872 0.35746078 0.28440267 0.21952092
 0.17494287 0.12704772 0.09713793 0.06818905 0.05050328 0.026496
 0.01548844 0.00803212]
/home/fadhilkholaf/Projects/college/probstat/uas2/.venv/lib/python3.14/site-packages/sklearn/utils/deprecation.py:132: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(

Menentukan Jumlah Faktor¶

In [4]:
import matplotlib.pyplot as plt

plt.plot(range(1, len(eigenvalues) + 1), eigenvalues, marker="o")
plt.title("Scree Plot")
plt.xlabel("Factors")
plt.ylabel("Eigenvalues")
plt.show()
No description has been provided for this image

Rotasi Faktor dan Mengevaluasi Loading Faktor¶

In [5]:
fa = FactorAnalyzer(rotation="varimax", n_factors=3)
fa.fit(df)

factor_loadings = pd.DataFrame(
    fa.loadings_,
    columns=["Factor 1", "Factor 2", "Factor 3"],
    index=df.columns,
)
print(factor_loadings)
     Factor 1  Factor 2  Factor 3
AE1 -0.011047  0.800300  0.181785
AE2  0.058655  0.719559 -0.068026
AE3  0.069883  0.028497  0.686960
AE4  0.193769  0.037247  0.723839
AE5 -0.097684  0.737522  0.156248
PE1  0.705539  0.257549  0.242947
PE2  0.483122  0.842333 -0.069255
PE3  0.434521  0.399817  0.536871
PE4  0.492928  0.612245  0.211371
PE5  0.489805  0.467536  0.203912
DF1  0.193417  0.173129  0.732688
DF2  0.038070  0.517966  0.499749
DF3 -0.271809  0.519422  0.541725
DF4  0.469166  0.495133  0.119520
DF5  0.137849  0.058899  0.705001
IU1  0.748417  0.025168 -0.110192
IU2  0.855393 -0.078035 -0.025692
IU3  0.718472 -0.041228  0.383872
IU4  0.727780  0.113522  0.454467
IU5  0.552754  0.098727  0.385518
/home/fadhilkholaf/Projects/college/probstat/uas2/.venv/lib/python3.14/site-packages/sklearn/utils/deprecation.py:132: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(

Menafsirkan dan Memberi Nama Faktor¶

In [6]:
# Faktor 1: Impulsive / Habitual Use
# PE1  0.705539
# IU1  0.748417
# IU2  0.855393
# IU3  0.718472
# IU4  0.727780
# IU5  0.552754
# 
# Faktor 2: Algorithmic Reinforcement / High Engagement
# AE1  0.800300
# AE2  0.719559
# AE5  0.737522
# PE2  0.842333
# PE4  0.612245
# DF2  0.517966
# 
# Faktor 3: Digital Fatigue / Cognitive–Emotional Strain
# AE4  0.723839
# PE3  0.536871
# DF1  0.732688
# DF3  0.541725
# DF5  0.705001

Informasi Tambahan¶

Komunaliti¶

In [7]:
communalities = pd.DataFrame(
    fa.get_communalities(), index=df.columns, columns=["Communalitiy"]
)
print(communalities)
     Communalitiy
AE1      0.673648
AE2      0.525833
AE3      0.477610
AE4      0.562876
AE5      0.577895
PE1      0.623140
PE2      0.947727
PE3      0.636893
PE4      0.662500
PE5      0.500078
DF1      0.604215
DF2      0.519487
DF3      0.637145
DF4      0.479558
DF5      0.519497
IU1      0.572904
IU2      0.738446
IU3      0.665260
IU4      0.749092
IU5      0.463908

Skor Faktor¶

In [8]:
factor_scores = fa.transform(df)

factor_scores_df = pd.DataFrame(
    factor_scores, columns=["Factor 1", "Factor 2", "Factor 3"]
)
print(factor_scores_df.head())
   Factor 1  Factor 2  Factor 3
0 -0.209038 -1.162801  0.088975
1  0.651179  1.112795  0.646581
2 -1.861857  2.117487 -1.860610
3  0.232958 -0.990855  1.219262
4 -0.209038 -1.162801  0.088975
/home/fadhilkholaf/Projects/college/probstat/uas2/.venv/lib/python3.14/site-packages/sklearn/utils/deprecation.py:132: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(

Mengevaluasi Keandalan Faktor¶

In [9]:
import pandas as pd
import numpy as np


def cronbach_alpha(df):
    item_variance = df.var(ddof=1, axis=0)

    total_variance = df.sum(axis=1).var(ddof=1)

    n_items = df.shape[1]

    alpha = (n_items / (n_items - 1)) * (1 - item_variance.sum() / total_variance)
    return alpha


faktor_1 = df[
    [
        "PE1",
        "IU1",
        "IU2",
        "IU3",
        "IU4",
        "IU5",
    ]
]

alpha_faktor_1 = cronbach_alpha(faktor_1)
print(f"Cronbach's Alpha untuk Faktor 1: {alpha_faktor_1:.2f}")

faktor_2 = df[
    [
        "AE1",
        "AE2",
        "AE5",
        "PE2",
        "PE4",
        "DF2",
    ]
]

alpha_faktor_2 = cronbach_alpha(faktor_2)
print(f"Cronbach's Alpha untuk Faktor 2: {alpha_faktor_2:.2f}")

faktor_3 = df[
    [
        "AE4",
        "PE3",
        "DF1",
        "DF3",
        "DF5",
    ]
]

alpha_faktor_3 = cronbach_alpha(faktor_3)
print(f"Cronbach's Alpha untuk Faktor 3: {alpha_faktor_3:.2f}")
Cronbach's Alpha untuk Faktor 1: 0.88
Cronbach's Alpha untuk Faktor 2: 0.85
Cronbach's Alpha untuk Faktor 3: 0.83
In [10]:
import numpy as np


def ave(loadings):
    return np.mean(np.square(loadings))


ave_faktor_1 = ave(
    [
        factor_loadings.loc["PE1", "Factor 1"],
        factor_loadings.loc["IU1", "Factor 1"],
        factor_loadings.loc["IU2", "Factor 1"],
        factor_loadings.loc["IU3", "Factor 1"],
        factor_loadings.loc["IU4", "Factor 1"],
        factor_loadings.loc["IU5", "Factor 1"],
    ]
)

ave_faktor_2 = ave(
    [
        factor_loadings.loc["AE1", "Factor 2"],
        factor_loadings.loc["AE2", "Factor 2"],
        factor_loadings.loc["AE5", "Factor 2"],
        factor_loadings.loc["PE2", "Factor 2"],
        factor_loadings.loc["PE4", "Factor 2"],
        factor_loadings.loc["DF2", "Factor 2"],
    ]
)

ave_faktor_3 = ave(
    [
        factor_loadings.loc["AE4", "Factor 3"],
        factor_loadings.loc["PE3", "Factor 3"],
        factor_loadings.loc["DF1", "Factor 3"],
        factor_loadings.loc["DF3", "Factor 3"],
        factor_loadings.loc["DF5", "Factor 3"],
    ]
)

print(f"AVE untuk faktor 1: {ave_faktor_1:.4f}")
print(f"AVE untuk faktor 2: {ave_faktor_2:.4f}")
print(f"AVE untuk faktor 3: {ave_faktor_3:.4f}")
AVE untuk faktor 1: 0.5235
AVE untuk faktor 2: 0.5091
AVE untuk faktor 3: 0.4279
In [11]:
data = {
    "Factor_1": [
        factor_loadings.loc["PE1", "Factor 1"],
        factor_loadings.loc["IU1", "Factor 1"],
        factor_loadings.loc["IU2", "Factor 1"],
        factor_loadings.loc["IU3", "Factor 1"],
        factor_loadings.loc["IU4", "Factor 1"],
        factor_loadings.loc["IU5", "Factor 1"],
    ],
    "Factor_2": [
        factor_loadings.loc["AE1", "Factor 2"],
        factor_loadings.loc["AE2", "Factor 2"],
        factor_loadings.loc["AE5", "Factor 2"],
        factor_loadings.loc["PE2", "Factor 2"],
        factor_loadings.loc["PE4", "Factor 2"],
        factor_loadings.loc["DF2", "Factor 2"],
    ],
    "Factor_3": [
        factor_loadings.loc["AE4", "Factor 3"],
        factor_loadings.loc["PE3", "Factor 3"],
        factor_loadings.loc["DF1", "Factor 3"],
        factor_loadings.loc["DF3", "Factor 3"],
        factor_loadings.loc["DF5", "Factor 3"],
    ],
}

df_scores = pd.DataFrame(factor_scores, columns=["Factor_1", "Factor_2", "Factor_3"])

correlation_matrix = df_scores.corr()

print("\n=== Matriks Korelasi Antar Faktor ===")
print(correlation_matrix.to_string())

correlation_squared = correlation_matrix**2

ave_values = {
    "Factor_1": ave_faktor_1,
    "Factor_2": ave_faktor_2,
    "Factor_3": ave_faktor_3,
}

print("\n=== Average Variance Extracted (AVE) ===")
for factor, ave in ave_values.items():
    print(f"{factor}: {ave:.3f}")

print("\n=== Validitas Diskriminan ===")
for factor, ave in ave_values.items():
    for other_factor in ave_values:
        if factor != other_factor:
            r_squared = correlation_squared.loc[factor, other_factor]
            print(f"{factor} vs {other_factor}:")
            print(f"  R²: {r_squared:.3f}")
            print(f"  AVE ({factor}): {ave:.3f}")
            if r_squared < ave:
                print("  Validitas diskriminan terpenuhi ✔️")
            else:
                print("  Validitas diskriminan tidak terpenuhi ❌")
=== Matriks Korelasi Antar Faktor ===
          Factor_1  Factor_2  Factor_3
Factor_1  1.000000  0.039229  0.004695
Factor_2  0.039229  1.000000 -0.028065
Factor_3  0.004695 -0.028065  1.000000

=== Average Variance Extracted (AVE) ===
Factor_1: 0.524
Factor_2: 0.509
Factor_3: 0.428

=== Validitas Diskriminan ===
Factor_1 vs Factor_2:
  R²: 0.002
  AVE (Factor_1): 0.524
  Validitas diskriminan terpenuhi ✔️
Factor_1 vs Factor_3:
  R²: 0.000
  AVE (Factor_1): 0.524
  Validitas diskriminan terpenuhi ✔️
Factor_2 vs Factor_1:
  R²: 0.002
  AVE (Factor_2): 0.509
  Validitas diskriminan terpenuhi ✔️
Factor_2 vs Factor_3:
  R²: 0.001
  AVE (Factor_2): 0.509
  Validitas diskriminan terpenuhi ✔️
Factor_3 vs Factor_1:
  R²: 0.000
  AVE (Factor_3): 0.428
  Validitas diskriminan terpenuhi ✔️
Factor_3 vs Factor_2:
  R²: 0.001
  AVE (Factor_3): 0.428
  Validitas diskriminan terpenuhi ✔️

Explained Variance¶

In [12]:
eigenvalues = fa.get_eigenvalues()[0]
explained_variance = eigenvalues / eigenvalues.sum() * 100

explained_variance_df = pd.DataFrame(
    {
        "Factor": [f"Factor {i + 1}" for i in range(len(eigenvalues))],
        "Eigenvalue": eigenvalues,
        "Explained Variance (%)": explained_variance,
        "Cumulative Explained Variance (%)": explained_variance.cumsum(),
    }
)

print("\n=== Explained Variance ===")
print(explained_variance_df.to_string(index=False))
=== Explained Variance ===
   Factor  Eigenvalue  Explained Variance (%)  Cumulative Explained Variance (%)
 Factor 1    7.538693               37.693467                          37.693467
 Factor 2    3.151816               15.759081                          53.452548
 Factor 3    2.576485               12.882423                          66.334971
 Factor 4    1.528367                7.641835                          73.976807
 Factor 5    1.189514                5.947570                          79.924377
 Factor 6    0.817889                4.089443                          84.013820
 Factor 7    0.725834                3.629172                          87.642992
 Factor 8    0.617731                3.088656                          90.731647
 Factor 9    0.424449                2.122244                          92.853891
Factor 10    0.357461                1.787304                          94.641195
Factor 11    0.284403                1.422013                          96.063208
Factor 12    0.219521                1.097605                          97.160813
Factor 13    0.174943                0.874714                          98.035527
Factor 14    0.127048                0.635239                          98.670766
Factor 15    0.097138                0.485690                          99.156455
Factor 16    0.068189                0.340945                          99.497401
Factor 17    0.050503                0.252516                          99.749917
Factor 18    0.026496                0.132480                          99.882397
Factor 19    0.015488                0.077442                          99.959839
Factor 20    0.008032                0.040161                         100.000000