Sigmoid 함수 (logistic function)
sigmoid(x)=11+e−x
Sigmoid 함수 미분
ddxsigmoid(x)=ddx(1+e−x)−1
=(−1)1(1+e−x)2ddx(1+e−x)
=(−1)1(1+e−x)2(0+e−x)ddx(−x)
=(−1)1(1+e−x)2e−x(−1)
=(−1)1(1+e−x)2e−x(−1)
=(−1)1(1+e−x)2e−x(−1)
=(1+e−x)(1+e−x)2−1(1+e−x)2
=11+e−x−1(1+e−x)2
=11+e−x(1−11+e−x)
=sigmoid(x)(1−sigmoid(x))
=σ(x)'=σ(x)(1−σ(x))
Cost 함수
Cost(hθ(x),y)=−ylog(hθ(x))−(1−y)log(1−hθ(x))
전체 Cost 함수
j(θ)=−1m∑mi=1[y(i)log(hθ(x(i)))+(1−y(i))log(1−hθ(x(i)))]
Cost 함수 미분
∂∂θjj(θ)=∂∂θj−1m∑mi=1[y(i)log(hθ(x(i)))+(1−y(i))log(1−hθ(x(i)))]
=−1m∑mi=1[y(i)∂∂θjlog(hθ(x(i)))+(1−y(i))∂∂θjlog(1−hθ(x(i)))]
=−1m∑mi=1[y(i)∂∂θjhθ(x(i))hθ(x(i))+(1−y(i))∂∂θj(1−hθ(x(i)))1−hθ(x(i))]
=−1m∑mi=1[y(i)∂∂θjσ(θTx(i))hθ(x(i))+(1−y(i))∂∂θj(1−σ(θTx(i)))1−hθ(x(i))]
=−1m∑mi=1[y(i)σ(θTx(i))(1−σ(θTx(i)))∂∂θjθTx(i)hθ(x(i))+−(1−y(i))σ(θTx(i))(1−σ(θTx(i)))∂∂θjθTx(i)1−hθ(x(i))]
=−1m∑mi=1[y(i)hθ(x(i))(1−hθ(x(i)))∂∂θjθTx(i)hθ(x(i))+−(1−y(i))hθ(x(i))(1−hθ(x(i)))∂∂θjθTx(i)1−hθ(x(i))]
=−1m∑mi=1[y(i)hθ(x(i))(1−hθ(x(i)))x(i)j+−(1−y(i))hθ(x(i))x(i)j]
=−1m∑mi=1[y(i)hθ(x(i))(1−hθ(x(i)))+−(1−y(i))hθ(x(i))]x(i)j
=−1m∑mi=1[y(i)−y(i)hθ(x(i))−hθ(x(i))+y(i)hθ(x(i))]x(i)j
=−1m∑mi=1[y(i)−hθ(x(i))]x(i)j
=1m∑mi=1[hθ(x(i))−y(i)]x(i)j
Gradient Desent
Repeat{θj :=θj−α∂∂θjJ(θ)}
↓
Repeat{θj :=θj−αm∑mi=1(hθ(x(i))−y(i))x(i)j}