$$ \nabla E(\mathbf{w}) = \frac{\partial E(\mathbf{w})}{\partial \mathbf{w}} = \begin{bmatrix} \frac{\partial E(\mathbf{w})}{\partial w_0} \ \frac{\partial E(\mathbf{w})}{\partial w_1} \end{bmatrix} $$
$$ E(w) = \sum_n \left( t_n \ln \sigma \left( \hat{\mathbf{x}}^T_n \mathbf{w} \right) + (1-t_n) \ln \left( 1 \red{-} \sigma \left( \hat{\mathbf{x}}^T_n \mathbf{w} \right) \right) \right) \ \sigma’(a) = \sigma(a) \left(1 - \sigma(a) \right) = \sigma(a) \sigma(-a) \ {\partial E(w) \over \partial w} = - \sum_n \left( { t_n \over \sigma \left( \hat{\mathbf{x}}^T_n \mathbf{w} \right) } \sigma \left( \hat{\mathbf{x}}^T_n \mathbf{w} \right) \left( 1 - \sigma \left( \hat{\mathbf{x}}^T_n \mathbf{w} \right) \right) x_n \red{-} { (1 - t_n) \left( 1 - \sigma \left( \hat{\mathbf{x}}^T_n \mathbf{w} \right) \right) \sigma \left( \hat{\mathbf{x}}^T_n \mathbf{w} \right) \over 1 - \sigma \left( \hat{\mathbf{x}}^T_n \mathbf{w} \right) } x_n \right) \ = -\sum_n \left( t_n \left( 1 - \sigma \left( \hat{\mathbf{x}}^T_n \mathbf{w} \right) \right) x_n - (1 - t_n) \sigma \left( \hat{\mathbf{x}}^T_n \mathbf{w} \right) x_n \right) \ = -\sum_n \left( t_n x_n - t_n x_n \sigma \left( \hat{\mathbf{x}}^T_n \mathbf{w} \right) - x_n \sigma \left( \hat{\mathbf{x}}^T_n \mathbf{w} \right) + t_n x_n \sigma \left( \hat{\mathbf{x}}^T_n \mathbf{w} \right) \right) \ = -\sum_n \left( \left( t_n - \sigma \left( \hat{\mathbf{x}}^T_n \mathbf{w} \right) \right) x_n \right) $$