library(tidyverse)
library(Matrix)
library(broom)
Fisher Scoring dan IRLS untuk GLM dengan variabel respon biner
Package
Fisher Scoring untuk GLM
Formula Fisher Scoring yang digunakan untuk mendapatkan penduga bagi koefisien GLM adalah sebagai berikut:
\[ \boldsymbol{\beta}_{(p+1) \times 1}^{(i+1)}=\boldsymbol{\beta}_{(p+1) \times 1}^{(i)} + \left[\boldsymbol{\mathcal{I}}_{p \times p }^{(i)} \right]^{-1} \boldsymbol{S}_{(p+1) \times 1}^{(i)} \]
dengan
\[ \boldsymbol{S}_{(p+1) \times 1}^{(i)}= \boldsymbol{X}^{t}_{ (p+1) \times n} \boldsymbol{W}_{n \times n} ^{(i)} (\boldsymbol{D}_{n \times n}^{(i)})^{-1} (\boldsymbol{y}_{n \times 1}-\boldsymbol{\mu}_{n \times 1}^{(i)}) \] dan
\[ \boldsymbol{\mathcal{I}}_{p \times p }^{(i)}=\boldsymbol{X}^{t}_{ (p+1) \times n} \boldsymbol{W}_{n \times n} ^{(i)} \boldsymbol{X}_{n \times (p+1)} \]
Algoritme Fisher Scoring dapat dituliskan sebagai berikut:
- Tentukan perkiraan awal nilai optimal \(\boldsymbol{\beta}_{(p+1) \times 1}^{(0)}\) dan stopping criterion
- Tentukan Score function \(\boldsymbol{S}_{(p+1) \times 1}^{(0)}\) dan Expected Fisher Information \(\boldsymbol{\mathcal{I}}_{p \times p }^{(0)}\).
- Hitung \(\boldsymbol{\beta}_{(p+1) \times 1}^{(1)}=\boldsymbol{\beta}_{(p+1) \times 1}^{(0)} + \left[\boldsymbol{\mathcal{I}}_{p \times p }^{(0)} \right]^{-1} \boldsymbol{S}_{(p+1) \times 1}^{(0)}\), sehingga diperoleh perkiraan nilai optimal \(\boldsymbol{\beta}_{(p+1) \times 1}^{(1)}\)
- Lakukan Langkah 5 sampai stopping criterion terpenuhi
- Hitung \(\boldsymbol{\beta}_{(p+1) \times 1}^{(i+1)}=\boldsymbol{\beta}_{(p+1) \times 1}^{(i)} + \left[\boldsymbol{\mathcal{I}}_{p \times p }^{(i)} \right]^{-1} \boldsymbol{S}_{(p+1) \times 1}^{(i)}\) untuk iterasi \(i=1,2,\ldots\)
- Saat stopping criterion terpenuhi maka \(\boldsymbol{\beta}_{(p+1) \times 1}^{(i+1)}\) merupakan nilai penduga bagi parameter \(\boldsymbol{\beta}_{(p+1) \times 1}\)
- Ragam dari \(\boldsymbol{\beta}_{(p+1) \times 1}\) diperoleh dengan $Var(_{(p+1)})=^{-1} $saat stopping criterion terpenuhi
Iteratively reweighted least squares (IRLS) untuk GLM
IRLS untuk GLM merupakan hasil reformulasi dari Fisher Scoring untuk GLM. Berikut proses penurunanya
\[ \begin{aligned} \boldsymbol{\beta}_{(p+1)Γ1}^{(π+1)} &= \boldsymbol{\beta}_{((π+1)Γ1)}^{(i)}+[\boldsymbol{\mathcal{I}}_{p\times p}^{(i)} ]^{β1} \boldsymbol{S}_{(π+1)Γ1}^{(i)} \\ \boldsymbol{\mathcal{I}}_{p\times p}^{(i)} \boldsymbol{\beta}_{(π+1)Γ1}^{(π+1)} &= \boldsymbol{\mathcal{I}}_{p\times p}^{(i)} \boldsymbol{\beta}_{((π+1)Γ1)}^{(i)}+ \boldsymbol{S}_{(π+1)Γ1}^{(i)} \end{aligned} \]
kemudiann dengan mensubtitusikan \(\boldsymbol{\mathcal{I}}_{p\times p}^{(i)}\) dan \(\boldsymbol{S}_{(π+1)Γ1}^{(i)}\) sesuai dengan definisi yang ada di fisher scoring, didapat
\[ \begin{aligned} \left\{ \boldsymbol{X}_{(π+1) \times n }^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{X}_{n \times (π+1)} \right\} \boldsymbol{\beta}_{(p+1)Γ1}^{(π+1)} &= \left\{ \boldsymbol{X}_{(π+1) \times n}^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{X}_{n \times (π+1)} \right\} \boldsymbol{\beta}_{((π+1)Γ1)}^{(i)}+ \left[ \boldsymbol{X}_{(π+1) \times n}^{t} \boldsymbol{W}_{n \times n}^{(i)} (\boldsymbol{D}_{n \times n}^{^{(i)}})^{-1} (\boldsymbol{y}_{n \times 1}β\boldsymbol{\mu}_{πΓ1}^{(i)}) \right] \\ &= \boldsymbol{X}_{(π+1) \times n}^{t} \boldsymbol{W}_{n \times n}^{(i)} \left[\boldsymbol{X}_{(π+1) \times n}^{t}\boldsymbol{\beta}_{((π+1)Γ1)}^{(i)} + (\boldsymbol{D}_{n \times n}^{^{(i)}})^{-1} (\boldsymbol{y}_{n \times 1}β\boldsymbol{\mu}_{πΓ1}^{(i)}) \right] \\ &\text{ misal } \boldsymbol{z}_{n \times1}=\boldsymbol{X}_{(π+1) \times n}^{t}\boldsymbol{\beta}_{((π+1)Γ1)}^{(i)} + (\boldsymbol{D}_{n \times n}^{^{(i)}})^{-1} (\boldsymbol{y}_{n \times 1}β\boldsymbol{\mu}_{πΓ1}^{(i)}) \\ &=\boldsymbol{X}_{(p+1) \times n}^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{z}_{n \times1}^{(i)} \end{aligned} \]
maka diperoleh persamaan normal (normal equation) sebagai berikut:
\[ \begin{aligned} \left\{ \boldsymbol{X}_{(π+1) \times n }^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{X}_{n \times (π+1)} \right\} \boldsymbol{\beta}_{(p+1)Γ1}^{(π+1)} &= \boldsymbol{X}_{ (p+1) \times n}^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{z}_{n \times1}^{(i)} \\ \boldsymbol{\beta}_{(p+1)Γ1}^{(π+1)} &= \left\{ \boldsymbol{X}_{ (p+1) \times n}^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{X}_{n \times (π+1)} \right\}^{-1} \boldsymbol{X}_{ (p+1) \times n}^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{z}_{n \times1}^{(i)} \end{aligned} \]
Kemudian, solusi dari persamaan normal diatas adalah
\[ \boldsymbol{\beta}_{(p+1)Γ1}^{(π+1)} = \left\{ \boldsymbol{X}_{(π+1) \times n }^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{X}_{n \times (π+1)} \right\}^{-1} \boldsymbol{X}_{(π+1) \times n}^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{z}_{n \times1}^{(i)} \]
jadi, formulasi untuk IRLS adalah
\[ \boldsymbol{\beta}_{(p+1)Γ1}^{(π+1)} = \left\{ \boldsymbol{X}_{n \times (π+1)}^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{X}_{n \times (π+1)} \right\}^{-1} \boldsymbol{X}_{n \times (p+1)}^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{z}_{n \times1}^{(i)} \]
Algoritme Fisher Scoring dapat dituliskan sebagai berikut:
- Tentukan perkiraan awal nilai optimal \(\boldsymbol{\beta}_{(p+1) \times 1}^{(0)}\) dan stopping criterion
- Tentukan \(\boldsymbol{W}_{n \times n}^{(0)}\) dan \(\boldsymbol{z}_{n \times1}^{(0)}\).
- Hitung \(\boldsymbol{\beta}_{(p+1)Γ1}^{(1)} = \left\{ \boldsymbol{X}_{n \times (π+1)}^{t} \boldsymbol{W}_{n \times n}^{(0)} \boldsymbol{X}_{n \times (π+1)} \right\}^{-1} \boldsymbol{X}_{n \times (p+1)}^{t} \boldsymbol{W}_{n \times n}^{(0)} \boldsymbol{z}_{n \times1}^{(0)}\), sehingga diperoleh perkiraan nilai optimal \(\boldsymbol{\beta}_{(p+1) \times 1}^{(1)}\)
- Lakukan Langkah 5 sampai stopping criterion terpenuhi
- Hitung \(\boldsymbol{\beta}_{(p+1)Γ1}^{(π+1)} = \left\{ \boldsymbol{X}_{n \times (π+1)}^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{X}_{n \times (π+1)} \right\}^{-1} \boldsymbol{X}_{n \times (p+1)}^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{z}_{n \times1}^{(i)}\) untuk iterasi \(i=1,2,\ldots\)
- Saat stopping criterion terpenuhi maka \(\boldsymbol{\beta}_{(p+1) \times 1}^{(i+1)}\) merupakan nilai penduga bagi parameter \(\boldsymbol{\beta}_{(p+1) \times 1}\)
- Ragam dari \(\boldsymbol{\beta}_{(p+1) \times 1}\) diperoleh dengan \(Var(\boldsymbol{\beta}_{(p+1)})=\left\{ \boldsymbol{X}_{n \times (π+1)}^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{X}_{n \times (π+1)} \right\}^{-1}\)saat stopping criterion terpenuhi
Data
No | Variabel | Keterangan |
---|---|---|
1 | age | umur nasabah(numeric) |
2 | job | jenis pekerjaan nasabah(categorical:"admin.","unknown","unemployed","management","housemaid","entrepreneur","student","blue-collar","self-employed","retired","technician","services") |
3 | marital | status perkawinan nasabah (categorical: "married","divorced","single") |
4 | education | tingkat pendidikan(categorical: "unknown","secondary","primary","tertiary") |
5 | default | apakah nasabah memiliki kredit yang macet? (binary: "yes","no") |
6 | balance | saldo tahunan rata-rata dalam euros (numeric) |
7 | housing | apakah nasabah memiliki cicilan rumah? (binary: "yes","no") |
8 | loan | apakah nasabah memiliki pinjaman pribadi? (binary: "yes","no") |
9 | contact | perangkat telekomunikasi (categorical: "unknown","telephone","cellular") |
10 | duration | durasi panggilan terakhir, dalam detik (numeric) |
11 | pdays | banyaknya hari yang telah berlalu setelah klien terakhir kali dihubungi dari campaign sebelumnya (angka, -1 berarti klien tidak dihubungi sebelumnya) |
12 | previous | banyaknya komunikasi yang dilakukan sebelum campaign ini(numeric) |
13 | poutcome | hasil dari promosi campaign sebelumnya (categorical: "unknown","other","failure","success") |
14 | subscribed | response variable (desired target),apakah nasabah telah berlangganan deposito berjangka? (binary: "yes","no") |
Data bisa didownload di link berikut
Import Data
<- read.csv("bank_marketing.csv",stringsAsFactors = TRUE) bank_marketing
glimpse(bank_marketing)
Rows: 45,211
Columns: 14
$ subscribed <fct> no, no, no, no, no, no, no, no, no, no, no, no, no, no, no,β¦
$ age <int> 58, 44, 33, 47, 33, 35, 28, 42, 58, 43, 41, 29, 53, 58, 57,β¦
$ job <fct> management, technician, entrepreneur, blue-collar, unknown,β¦
$ marital <fct> married, single, married, married, single, married, single,β¦
$ education <fct> tertiary, secondary, secondary, unknown, unknown, tertiary,β¦
$ default <fct> no, no, no, no, no, no, no, yes, no, no, no, no, no, no, noβ¦
$ balance <int> 2143, 29, 2, 1506, 1, 231, 447, 2, 121, 593, 270, 390, 6, 7β¦
$ housing <fct> yes, yes, yes, yes, no, yes, yes, yes, yes, yes, yes, yes, β¦
$ loan <fct> no, no, yes, no, no, no, yes, no, no, no, no, no, no, no, nβ¦
$ contact <fct> unknown, unknown, unknown, unknown, unknown, unknown, unknoβ¦
$ duration <int> 261, 151, 76, 92, 198, 139, 217, 380, 50, 55, 222, 137, 517β¦
$ pdays <int> -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,β¦
$ previous <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,β¦
$ poutcome <fct> unknown, unknown, unknown, unknown, unknown, unknown, unknoβ¦
Fisher Scoring dan IRLS untuk Binomial GLM
Misal Binomial GLM didefinisikan sebagai
\[ \boldsymbol{\pi} = \frac{\exp{( \boldsymbol{X}\boldsymbol{\beta} )}}{1+\exp{( \boldsymbol{X}\boldsymbol{\beta} )}} \] atau
\[ \text{logit}(\boldsymbol{\pi}) = \log{ \left( \frac{\boldsymbol{\pi}}{1-\boldsymbol{\pi}} \right)}= \boldsymbol{X}\boldsymbol{\beta} \] dengan
\[ \boldsymbol{\pi}=P(\boldsymbol{y}=1) \]
Untuk ilustrasi penggunaan Fisher-scoring dan IRLS kita akan menggunakan variabel prediktor balance
, age
, housing
dan marital
set.seed(2045)
<- bank_marketing %>%
bank_marketing_mini select(subscribed,balance,age,housing,marital) %>%
slice_sample(n=250,by = subscribed)
glimpse(bank_marketing_mini)
Rows: 500
Columns: 5
$ subscribed <fct> no, no, no, no, no, no, no, no, no, no, no, no, no, no, no,β¦
$ balance <int> 354, 5838, 39, -123, 1150, 263, -11, 1733, 14, 416, 2, 0, 4β¦
$ age <int> 29, 56, 55, 35, 33, 40, 51, 33, 50, 34, 26, 27, 32, 23, 45,β¦
$ housing <fct> yes, no, no, yes, yes, yes, no, yes, yes, no, yes, yes, yesβ¦
$ marital <fct> single, married, married, single, single, single, married, β¦
Kemudian kita akan ekstrak matriks modelnya \(\boldsymbol{X}_{n \times (p+1)}\)
<- model.matrix(subscribed~balance+age+housing+marital,data = bank_marketing_mini)
X #dimensi matrix X
dim(X)
[1] 500 6
dan kemudian untuk \(\boldsymbol{y}_{n\times 1}\), kita misalkan kategori yes
sebagai 1 dan 0 adalah untuk kategori no
<- ifelse(bank_marketing_mini$subscribed=="yes",1,0)
y # ukuran variabel respon
length(y)
[1] 500
# banyaknya nilai 1 dan 0
table(y)
y
0 1
250 250
berikut overview dari matriks \(\boldsymbol{X}_{n \times (p+1)}\)
1:10,] X[
(Intercept) balance age housingyes maritalmarried maritalsingle
1 1 354 29 1 0 1
2 1 5838 56 0 1 0
3 1 39 55 0 1 0
4 1 -123 35 1 0 1
5 1 1150 33 1 0 1
6 1 263 40 1 0 1
7 1 -11 51 0 1 0
8 1 1733 33 1 0 1
9 1 14 50 1 1 0
10 1 416 34 0 0 1
Binomial sebagai distribusi keluarga eksponensial
Selanjutnya kita akan mendefinisikan matriks-matriks lain yang dibutuhkan untuk fisher scoring dan IRLS. Pertama-tama kita ingat terlebih dahulu tentang distribusi binomail sebagai distribusi keluarga eksponensial.
Distribusi Binomial yang digunakan untuk GLM adalah \(Y \sim\frac{\text{Binomial}(m,\pi)}{m}\). Ingat bahwa, Cumulant function \(b(\theta)\) untuk \(Y \sim\frac{\text{Binomial}(m,\pi)}{m}\) adalah
\[ b(\theta) = \log{[1 + \exp{(\theta})]} \]
dan parameter dispersi \(a(\phi)\)
\[ a(\phi)=\frac{1}{m} \]
Kemudian, \(\mu\) dan \(Var(Y)\) dari \(Y\frac{\text{Binomial}(m,\pi)}{m}\) sebagai keluarga eksponensial adalah
\[ \begin{aligned} \mu=E(Y) &= \frac{d b(\theta)}{d\theta} \\ &=\frac{\exp{(\theta)}}{1+\exp{(\theta)}} \end{aligned} \]
dan
$$Var(Y) &= Var() a()
&= a()
&=
$$
Ingat bahwa koneksi (pemisalan) \(\theta\) dengan parameter \(\pi\) adalah sebagai berikut
\[ \theta = \log{ \left( \frac{\pi}{1-\pi} \right)} \]
Jika kita subtitusi ke dalam \(\mu\) dan \(Var(Y)\)
\[ \begin{aligned} \mu &= \frac{\exp{(\theta)}}{1+\exp{(\theta)}} \\ &= \frac{ \left( \frac{\pi}{1-\pi} \right) }{1 + \left( \frac{\pi}{1-\pi} \right)} \\ &= \pi \end{aligned} \]
\[ \begin{aligned} Var(Y) &= \frac{\exp{(\theta)}}{[1+\exp{(\theta)}]^{2}} \frac{1}{m} \\ &= \frac{ \left( \frac{\pi}{1-\pi} \right) }{ \left[1 + \left( \frac{\pi}{1-\pi} \right) \right]^{2}} \frac{1}{m} \\ &= \pi (1-\pi) \frac{1}{m} \\ &= \frac{\pi (1-\pi)}{m} \end{aligned} \]
jadi \(\mu=\pi\) dan \(Var(Y)=\frac{\pi (1-\pi)}{m}\). Kemudian, fungsi hubung (link function) yang digunakan adalah fungsi hubung kanonik (canonical link function) yaitu
\[ \eta=\text{logit}(\mu)=\text{logit}(\pi)=\log{\left( \frac{\pi}{1-\pi} \right)} \]
jika kita ubah menjadi fungsi dari \(\eta\)
\[ g^{-1}(\eta)=\mu=\pi = \frac{\exp{(\eta)}}{1+\exp{(\eta)}} \]
Mendefiniskan matriks untuk Fisher Scoring dan IRLS
Kemudian kita akan dapatkan matriks \(\boldsymbol{W}_{n \times n}\) yang didefinisikan
\[ \boldsymbol{W}_{n \times n} = \begin{bmatrix} \frac{\left(\frac{d \mu_{1}}{d\eta_{1}} \right)^{2}}{Var(y_{1})} & 0 & \ldots & 0 \\ 0 & \frac{\left(\frac{d \mu_{2}}{d\eta_{2}} \right)^{2}}{Var(y_{2})} & \ldots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & 0 & \frac{\left(\frac{d \mu_{n}}{d\eta_{2}} \right)^{2}}{Var(y_{n})} \end{bmatrix} \]
elemen-elemen diagonalnya sebagai berikut:
\[ \begin{aligned} \frac{d \mu_{i}}{d\eta_{i}} =\frac{d \pi_{i}}{d\eta_{i}} &= \frac{d \left( \frac{\exp{(\eta_i)}}{1+\exp{(\eta_i)}} \right)}{d\eta_{i}} \\ &= \frac{\exp{(\eta_i)}}{[1+\exp{(\eta_i)}]^{2}} \\ &= \frac{ \left( \frac{\pi_i}{1-\pi_i} \right) }{ \left[1 + \left( \frac{\pi_i}{1-\pi_i} \right) \right]^{2}}\\ &= \pi_i (1-\pi_i) \end{aligned} \]
dan
\[ Var(y_{i})= \frac{\pi_i (1-\pi_i)}{m_i} \]
sehingga
\[ \frac{\left(\frac{d \mu_{i}}{d\eta_{i}} \right)^{2}}{Var(y_{i})} = \frac{[\pi_i (1-\pi_i)]^{2}}{\frac{\pi_i (1-\pi_i)}{m_i}} = m_i\pi_i (1-\pi_i) \]
Jadi matriks \(\boldsymbol{W}_{n \times n}\) adalah
\[ \boldsymbol{W}_{n \times n}= \begin{bmatrix} m_{1}\pi_1 (1-\pi_1) & 0 & \ldots & 0 \\ 0 & m_{2}\pi_2 (1-\pi_2) & \ldots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & 0 & m_{n}\pi_n (1-\pi_n) \end{bmatrix} \]
kita tuliskan matriks \(\boldsymbol{W}_{n \times n}\) dalam sintaks R
<- function(m,pi){
W <- m * pi * ( 1- pi )
wi Diagonal(x=as.numeric(wi))
}
misalkan \(m_1=m_2=m_3=1\) dan \(\pi_{1}=0.5,\pi_{2}=0.6,\pi_{3}=0.7\) maka \(\boldsymbol{W}_{n \times n}\) adalah
W(m=c(1,1,1),pi=c(0.5,0.6,0.7))
3 x 3 diagonal matrix of class "ddiMatrix"
[,1] [,2] [,3]
[1,] 0.25 . .
[2,] . 0.24 .
[3,] . . 0.21
Selanjutnya kita akan dapatkan matriks \(D_{n \times n}\) yang didefinisikan
\[ \boldsymbol{D}_{n \times n} = \begin{bmatrix} \frac{d \mu_{1}}{d\eta_{1}} & 0 & \ldots & 0 \\ 0 & \frac{d \mu_{2}}{d\eta_{2}} & \ldots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & 0 & \frac{d \mu_{n}}{d\eta_{n}} \end{bmatrix} \]
elemen-elemen diagonalnya sebagai berikut:
\[ \begin{aligned} \frac{d \mu_{i}}{d\eta_{i}} =\frac{d \pi_{i}}{d\eta_{i}} &= \frac{d \left( \frac{\exp{(\eta_i)}}{1+\exp{(\eta_i)}} \right)}{d\eta_{i}} \\ &= \frac{\exp{(\eta_i)}}{[1+\exp{(\eta_i)}]^{2}} \\ &= \frac{ \left( \frac{\pi_i}{1-\pi_i} \right) }{ \left[1 + \left( \frac{\pi_i}{1-\pi_i} \right) \right]^{2}}\\ &= \pi_i (1-\pi_i) \end{aligned} \]
Jadi matriks \(\boldsymbol{D}_{n \times n}\) adalah
\[ \boldsymbol{D}_{n \times n}= \begin{bmatrix} \pi_1 (1-\pi_1) & 0 & \ldots & 0 \\ 0 & \pi_2 (1-\pi_2) & \ldots & 0 \\ \vdots & \vdots & \ddots & \vdots \\ 0 & 0 & 0 & \pi_n (1-\pi_n) \end{bmatrix} \]
kita tuliskan matriks \(\boldsymbol{D}_{n \times n}\) dalam sintaks R
<- function(pi){
D <- pi * ( 1- pi )
di Diagonal(x=as.numeric(di))
}
misalkan \(\pi_{1}=0.5,\pi_{2}=0.6,\pi_{3}=0.7\) maka \(\boldsymbol{D}_{n \times n}\) adalah
D(pi=c(0.5,0.6,0.7))
3 x 3 diagonal matrix of class "ddiMatrix"
[,1] [,2] [,3]
[1,] 0.25 . .
[2,] . 0.24 .
[3,] . . 0.21
Selanjutnya kita akan dapatkan matriks \(\boldsymbol{\mu}_{n \times 1}\) yang didefinisikan
\[ \boldsymbol{\mu}_{n \times 1}=g^{-1}(\boldsymbol{X}\boldsymbol{\beta})=\frac{\exp{(\boldsymbol{X}_{n \times (p+1)}\boldsymbol{\beta}_{(p+1)})}}{1+\exp{(\boldsymbol{X}_{n \times (p+1)}\boldsymbol{\beta}_{(p+1)})}} \]
<- function(X,beta){
mu exp(X %*% beta) / ( 1+exp( X%*% beta) )
}
misalkan \(\boldsymbol{X}_{n \times (p+1)}\) berasal dari data bank_marketing_mini
dan \(\boldsymbol{\beta}_{(p+1)}=[0.230 0.001 \space 0.002 \space -0.769 \space -0.166 \space 0.109]^{t}\) maka \(\boldsymbol{\mu}_{n \times 1}\) adalah
# memeriksa dimensi
dim( mu(X=X,beta = c(0.230,0.001,0.002,-0.769,-0.166,0.109) ) )
[1] 500 1
# menampilkan 10 elemen pertama
mu(X=X,beta = c(0.230,0.001,0.002,-0.769,-0.166,0.109) )[1:10]
[1] 0.4955001 0.9975617 0.5530496 0.3815440 0.6869718 0.4782637 0.5386726
[8] 0.7972185 0.3564054 0.6948728
Fisher Scoring
matriks \(\boldsymbol{S}_{(p+1) \times 1}\) adalah
\[ \boldsymbol{S}_{(p+1) \times 1}= \boldsymbol{X}^{t}_{ (p+1) \times n} \boldsymbol{W}_{n \times n} (\boldsymbol{D}_{n \times n})^{-1} (\boldsymbol{y}_{n \times 1}-\boldsymbol{\mu}_{n \times 1}) \]
kita tuliskan matriks \(\boldsymbol{S}_{(p+1) \times 1}\) dalam sintaks R
<- function(X,W,D,y,mu){
S <- chol2inv(chol(D))
inv_D crossprod( X, W %*% inv_D %*% (y-mu) )
}
misalkan \(\boldsymbol{W}_{n \times n} ,\boldsymbol{D}_{n \times n}\) dan \(\boldsymbol{\mu}_{n \times 1}\) adalah sebagai berikut:
set.seed(2045)
<- W(m = 1,pi = rep(0.5,nrow(X)))
W_coba dim(W_coba)
[1] 500 500
1:5,1:5] W_coba[
5 x 5 diagonal matrix of class "ddiMatrix"
[,1] [,2] [,3] [,4] [,5]
[1,] 0.25 . . . .
[2,] . 0.25 . . .
[3,] . . 0.25 . .
[4,] . . . 0.25 .
[5,] . . . . 0.25
<- D(pi = rep(0.5,nrow(X)))
D_coba dim(D_coba)
[1] 500 500
1:5,1:5] D_coba[
5 x 5 diagonal matrix of class "ddiMatrix"
[,1] [,2] [,3] [,4] [,5]
[1,] 0.25 . . . .
[2,] . 0.25 . . .
[3,] . . 0.25 . .
[4,] . . . 0.25 .
[5,] . . . . 0.25
<- as.matrix( runif(n = nrow(X),min = 0,max = 1 ) )
mu_coba dim(mu_coba)
[1] 500 1
head(mu_coba)
[,1]
[1,] 0.77550659
[2,] 0.94264755
[3,] 0.47241345
[4,] 0.08665263
[5,] 0.63646300
[6,] 0.42453500
S(X = X,W = W_coba,D = D_coba,y = y,mu = mu_coba)
6 x 1 Matrix of class "dgeMatrix"
[,1]
(Intercept) -5.12588
balance 64379.67806
age -132.00826
housingyes -27.00537
maritalmarried -9.45959
maritalsingle 5.37219
matriks \(\boldsymbol{\mathcal{I}}_{p \times p}\) adalah
\[ \boldsymbol{\mathcal{I}}_{p \times p }=\boldsymbol{X}_{ (p+1) \times n} \boldsymbol{W}_{n \times n} \boldsymbol{X}_{n \times (p+1)} \]
kita tuliskan matriks \(\boldsymbol{\mathcal{I}}_{p \times p }\) dalam sintaks R
<- function(X,W){
I_fs crossprod(X,W %*% X)
}
set.seed(2045)
<- W(m = 1,pi = rep(0.5,nrow(X)))
W_coba dim(W_coba)
[1] 500 500
1:5,1:5] W_coba[
5 x 5 diagonal matrix of class "ddiMatrix"
[,1] [,2] [,3] [,4] [,5]
[1,] 0.25 . . . .
[2,] . 0.25 . . .
[3,] . . 0.25 . .
[4,] . . . 0.25 .
[5,] . . . . 0.25
I_fs(X = X,W = W_coba)[1:5,1:5]
5 x 5 Matrix of class "dgeMatrix"
(Intercept) balance age housingyes maritalmarried
(Intercept) 125.0 1.901442e+05 5160.00 58.00 68.50
balance 190144.2 1.821748e+09 8400186.75 59549.75 101843.00
age 5160.0 8.400187e+06 230156.50 2279.50 3062.25
housingyes 58.0 5.954975e+04 2279.50 58.00 31.00
maritalmarried 68.5 1.018430e+05 3062.25 31.00 68.50
Supaya lebih mudah, kita kumpulkan semua definisi matriks yang sudah dituliskan ke program R dibawah ini
<- function(m,pi){
W <- m * pi * ( 1- pi )
wi Diagonal(x=as.numeric(wi))
}
<- function(pi){
D <- pi * ( 1- pi )
di Diagonal(x=as.numeric(di))
}
<- function(X,beta){
mu exp(X %*% beta) / ( 1+exp( X%*% beta) )
}
<- function(X,W,D,y,mu){
S <- chol2inv(chol(D))
inv_D crossprod( X, W %*% inv_D %*% (y-mu) )
}
<- function(X,W){
I_fs crossprod(X,W %*% X)
}
Selanjutnya kita akan mulai menuliskan program R untuk iterasi fisher scoring
<- function(beta_old,beta_new,tol) {
stop_criterion <- as.matrix(abs(beta_new-beta_old))
error <- all(error < tol)
status return(list(error=as.data.frame(t(error)),status=status))
}
stop_criterion(beta_old = c(0.5,-0.9),beta_new = c(0.51,-0.91),tol = 1e-5)
$error
V1 V2
1 0.01 0.01
$status
[1] FALSE
stop_criterion(beta_old = c(0.5,-0.9),beta_new = c(0.50001,-0.90001),tol = 1e-5)
$error
V1 V2
1 1e-05 1e-05
$status
[1] TRUE
stop_criterion(beta_old = c(0.5,-0.9),beta_new = c(0.5001,-0.90001),tol = 1e-5)
$error
V1 V2
1 1e-04 1e-05
$status
[1] FALSE
berikut adalah nilai awal dari \(\boldsymbol{\beta}_{(p+1)}\), nilai toleransi, iterasi maksimum dan nilai dari \(m_i\). Khusus untuk \(m_i=1\) karena respon subscribed
adalah respon distribusi bernouli.
<- rep(0,ncol(X))
beta_new names(beta_new) <- colnames(X)
beta_new
(Intercept) balance age housingyes maritalmarried
0 0 0 0 0
maritalsingle
0
<- 1000
max_iter <- 1
mi
<- 1e-15 tolerance
Ingat Formula Fisher Scoring yang digunakan adalah sebagai berikut:
\[ \boldsymbol{\beta}_{(p+1) \times 1}^{(i+1)}=\boldsymbol{\beta}_{(p+1) \times 1}^{(i)} + \left[\boldsymbol{\mathcal{I}}_{p \times p }^{(i)} \right]^{-1} \boldsymbol{S}_{(p+1) \times 1}^{(i)} \]
for(i in 1:max_iter){
#menyimpan beta lama
<- beta_new
beta_old
<- mu(X = X,beta = beta_new)
mu_new <- W(m = mi,pi = mu_new)
W_new <- D(pi = mu_new)
D_new
<- chol2inv(chol( I_fs(X = X,W = W_new) ))
I_inv
#formulasi fisher scoring
<- beta_new +I_inv %*% S(X=X,W = W_new,D = D_new,y =y ,mu = mu_new)
beta_new
#menghitung stopping criterion
<- stop_criterion(
stop_criterion_iter beta_old = beta_old,
beta_new = beta_new,
tol = tolerance)
#iterasi berhenti jika sudah memenuhi stopping criterion
if(stop_criterion_iter$status) break
}
Saat Convergent maka kita mendapatkan
\[ Var(\hat{\boldsymbol{\beta}}_{(p+1) \times 1})= \left[\boldsymbol{\mathcal{I}}_{p \times p } \right]^{-1} \] kemudian standard error dari \(\hat{\boldsymbol{\beta}}_{(p+1) \times 1}\) adalah akar dari diagonal utama \(\left[\boldsymbol{\mathcal{I}}_{p \times p } \right]^{-1}\).
<- list(iter_convergence=i,coefficients=as.numeric(beta_new),var_beta=I_inv)
result_fs $iter_convergence result_fs
[1] 31
$var_beta result_fs
6 x 6 Matrix of class "dpoMatrix"
(Intercept) balance age housingyes
(Intercept) 2.743637e-01 -5.891964e-07 -4.050466e-03 -3.584211e-02
balance -5.891964e-07 1.308069e-09 -3.003060e-08 6.255880e-07
age -4.050466e-03 -3.003060e-08 8.377425e-05 3.146135e-04
housingyes -3.584211e-02 6.255880e-07 3.146135e-04 3.568298e-02
maritalmarried -7.623728e-02 -2.022522e-07 2.006810e-04 5.139815e-03
maritalsingle -1.230458e-01 -4.027429e-07 1.169965e-03 8.360438e-03
maritalmarried maritalsingle
(Intercept) -7.623728e-02 -1.230458e-01
balance -2.022522e-07 -4.027429e-07
age 2.006810e-04 1.169965e-03
housingyes 5.139815e-03 8.360438e-03
maritalmarried 8.060487e-02 6.751034e-02
maritalsingle 6.751034e-02 1.072151e-01
<- data.frame(variables=rownames(beta_new),
summary_fs Estimate=round(result_fs$coefficients,10),
Std_Err=round(sqrt(diag(as.matrix(result_fs$var_beta))),10),row.names = NULL) %>%
mutate(z=Estimate / Std_Err, p_value=round(2*pnorm(q=abs(z),lower.tail = F),3))
summary_fs
IRLS
matriks \(\boldsymbol{z}\) adalah
\[ \boldsymbol{z}_{n \times 1}=\boldsymbol{X}_{ n \times (p+1) }\boldsymbol{\beta}_{((π+1)Γ1)} + (\boldsymbol{D}_{n \times n})^{-1} (\boldsymbol{y}_{n \times 1}β\boldsymbol{\mu}_{πΓ1}) \]
kita tuliskan matriks \(\boldsymbol{z}\) dalam sintaks R
<- function(X,beta,D,y,mu){
z <- chol2inv(chol(D))
inv_D %*% beta + inv_D %*% (y-mu)
X }
misalkan \(\boldsymbol{W}_{n \times n} ,\boldsymbol{\beta}_{((π+1)Γ1)},\boldsymbol{D}_{n \times n}\) dan \(\boldsymbol{\mu}_{n \times 1}\) adalah sebagai berikut:
set.seed(2045)
<- W(m = 1,pi = rep(0.5,nrow(X)))
W_coba dim(W_coba)
[1] 500 500
1:5,1:5] W_coba[
5 x 5 diagonal matrix of class "ddiMatrix"
[,1] [,2] [,3] [,4] [,5]
[1,] 0.25 . . . .
[2,] . 0.25 . . .
[3,] . . 0.25 . .
[4,] . . . 0.25 .
[5,] . . . . 0.25
<- c(0.230,0.001,0.002,-0.769,-0.166,0.109)
beta_coba beta_coba
[1] 0.230 0.001 0.002 -0.769 -0.166 0.109
<- D(pi = rep(0.5,nrow(X)))
D_coba dim(mu_coba)
[1] 500 1
1:5,1:5] D_coba[
5 x 5 diagonal matrix of class "ddiMatrix"
[,1] [,2] [,3] [,4] [,5]
[1,] 0.25 . . . .
[2,] . 0.25 . . .
[3,] . . 0.25 . .
[4,] . . . 0.25 .
[5,] . . . . 0.25
<- as.matrix( runif(n = nrow(X),min = 0,max = 1 ) )
mu_coba dim(mu_coba)
[1] 500 1
head(mu_coba)
[,1]
[1,] 0.77550659
[2,] 0.94264755
[3,] 0.47241345
[4,] 0.08665263
[5,] 0.63646300
[6,] 0.42453500
<- z(X = X,beta = beta_coba,D = D_coba,y = y,mu = mu_coba)
z_coba dim(z_coba)
[1] 500 1
1:10] z_coba[
[1] -3.1200264 2.2434098 -1.6766538 -0.8296105 -1.7598520 -1.7851400
[7] -3.2987687 -1.1895575 -1.5743409 0.1808609
Supaya lebih mudah, kita kumpulkan semua definisi matriks yang sudah dituliskan ke program R dibawah ini
<- function(X,beta){
mu exp(X %*% beta) / ( 1+exp( X%*% beta) )
}
<- function(m,pi){
W <- m * pi * ( 1- pi )
wi Diagonal(x=as.numeric(wi))
}
<- function(pi){
D <- pi * ( 1- pi )
di Diagonal(x=as.numeric(di))
}
<- function(X,beta,D,y,mu){
z <- chol2inv(chol(D))
inv_D %*% beta + inv_D %*% (y-mu)
X }
Selanjutnya kita akan mulai menuliskan program R untuk iterasi IRLS
<- function(beta_old,beta_new,tol) {
stop_criterion <- as.matrix(abs(beta_new-beta_old))
error <- all(error < tol)
status return(list(error=as.data.frame(t(error)),status=status))
}
stop_criterion(beta_old = c(0.5,-0.9),beta_new = c(0.51,-0.91),tol = 1e-5)
$error
V1 V2
1 0.01 0.01
$status
[1] FALSE
stop_criterion(beta_old = c(0.5,-0.9),beta_new = c(0.50001,-0.90001),tol = 1e-5)
$error
V1 V2
1 1e-05 1e-05
$status
[1] TRUE
stop_criterion(beta_old = c(0.5,-0.9),beta_new = c(0.5001,-0.90001),tol = 1e-5)
$error
V1 V2
1 1e-04 1e-05
$status
[1] FALSE
berikut adalah nilai awal dari \(\boldsymbol{\beta}_{(p+1)}\) nilai toleransi, iterasi maksimum dan nilai dari \(m_i\). Khusus untuk \(m_i=1\) karena respon subscribed
adalah respon distribusi bernouli.
<- rep(0,ncol(X))
beta_new names(beta_new) <- colnames(X)
beta_new
(Intercept) balance age housingyes maritalmarried
0 0 0 0 0
maritalsingle
0
<- 1000
max_iter <- 1
mi
<- 1e-15 tolerance
Ingat Formula IRLS yang digunakan adalah sebagai berikut:
\[ \boldsymbol{\beta}_{(p+1)Γ1}^{(π+1)} = \left\{ \boldsymbol{X}_{(π+1) \times n }^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{X}_{n \times (π+1)} \right\}^{-1} \boldsymbol{X}_{(π+1) \times n}^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{z}_{n \times1}^{(i)} \]
for(i in 1:max_iter){
#menyimpan beta lama
<- beta_new
beta_old
<- mu(X = X,beta = beta_new)
mu_new <- W(m = mi,pi = mu_new)
W_new <- D(pi = mu_new)
D_new
<- z(X = X,beta = beta_new,D = D_new,y = y,mu = mu_new)
z_new <- chol2inv(chol(crossprod(X,W_new %*% X)))
inv_XWX
#formulasi fisher scoring
<- inv_XWX %*% crossprod(X,W_new %*% z_new)
beta_new
#menghitung stopping criterion
<- stop_criterion(
stop_criterion_iter beta_old = beta_old,
beta_new = beta_new,
tol = tolerance)
#iterasi berhenti jika sudah memenuhi stopping criterion
if(stop_criterion_iter$status) break
}
Saat Convergent maka kita mendapatkan
\[ Var(\hat{\boldsymbol{\beta}}_{(p+1) \times 1})= \left\{ \boldsymbol{X}_{n \times (π+1)}^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{X}_{n \times (π+1)} \right\}^{-1} \]
kemudian standard error dari \(\hat{\boldsymbol{\beta}}_{(p+1) \times 1}\) adalah akar dari diagonal utama \(\left\{ \boldsymbol{X}_{n \times (π+1)}^{t} \boldsymbol{W}_{n \times n}^{(i)} \boldsymbol{X}_{n \times (π+1)} \right\}^{-1}\).
<- list(iter_convergence=i,coefficients=as.numeric(beta_new),var_beta=inv_XWX)
result_irls $iter_convergence result_irls
[1] 33
$var_beta result_irls
6 x 6 Matrix of class "dpoMatrix"
(Intercept) balance age housingyes
(Intercept) 2.743637e-01 -5.891964e-07 -4.050466e-03 -3.584211e-02
balance -5.891964e-07 1.308069e-09 -3.003060e-08 6.255880e-07
age -4.050466e-03 -3.003060e-08 8.377425e-05 3.146135e-04
housingyes -3.584211e-02 6.255880e-07 3.146135e-04 3.568298e-02
maritalmarried -7.623728e-02 -2.022522e-07 2.006810e-04 5.139815e-03
maritalsingle -1.230458e-01 -4.027429e-07 1.169965e-03 8.360438e-03
maritalmarried maritalsingle
(Intercept) -7.623728e-02 -1.230458e-01
balance -2.022522e-07 -4.027429e-07
age 2.006810e-04 1.169965e-03
housingyes 5.139815e-03 8.360438e-03
maritalmarried 8.060487e-02 6.751034e-02
maritalsingle 6.751034e-02 1.072151e-01
<- data.frame(variables=rownames(beta_new),
summary_irls Estimate=round(result_irls$coefficients,10),
Std_Err=round(sqrt(diag(as.matrix(result_irls$var_beta))),10),row.names = NULL) %>%
mutate(z=Estimate / Std_Err, p_value=round(2*pnorm(q=abs(z),lower.tail = F),3))
summary_irls
Default fungsi R
<- glm(formula = subscribed~.,
mod1 data = bank_marketing_mini,
family = binomial(link = "logit"))
tidy(mod1) %>%
mutate(across(where(is.numeric),function(x) round(x,7)))