3 minute read

1. Logistic Regression


Hypothesis

\[H(X) = \frac{1}{1 + e^{-XW}} \]

Cost Function

\[Cost(W) = \frac{1}{m}\sum_{i=1}^n c(H(x_i),y_i) \]


Cross Entropy in Logistic Classification

\(c(H(x),y) = \begin{cases} -log(H(x)) : y =1 \\ -log(1 - H(x)) : y = 0 \end{cases} = -ylog(H(x)) - (1 - y)log(1 - H(x))\)


Minimizing cost

\[W_{new} = W_{old} - \alpha\frac{\partial}{\partial W} Cost(W) \]



2. Multinomial Classification


Softmax Function (Hypothesis)

\[S(y_i) = \frac{e^{y_i}}{\sum_{i=1}^n e^{y_j}} \] \[ n: 클래스의 \enspace 수 \] \[ i: i \enspace class \]

Cost Function

\[ Cost(W) = \frac{1}{m} \sum_{i=1}^m D(S(X_iW + b), L_i) \] \[m: 인스턴스의 \enspace 개수 \] \[i: i \enspace instance \]


Cross Entropy in Multinomial Classification

\[ D(S, L) = -\sum_{j=i}^n L_j log(S(y_j)) \] \[ n: 클래스의 수 \] \[ j : j \enspace class \]


Minimizing Cost

\[W_{new} = W_{old} - \alpha\frac{\partial}{\partial W} Cost(W) \]




3. softmax_classifier_TF2.x

import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

print('TensorFlow Version: %s' % (tf.__version__))

# Data
# x_data = [8, 4]
x_data = [[1, 2, 1, 1],
          [2, 1, 3, 2],
          [3, 1, 3, 4],
          [4, 1, 5, 5],
          [1, 7, 5, 5],
          [1, 2, 5, 6],
          [1, 6, 6, 6],
          [1, 7, 7, 7]]

# y_data = [8, 3]
y_data = [[0, 0, 1],
          [0, 0, 1],
          [0, 0, 1],
          [0, 1, 0],
          [0, 1, 0],
          [0, 1, 0],
          [1, 0, 0],
          [1, 0, 0]]


x_data = np.array(x_data, dtype = np.float32)
y_data = np.array(y_data, dtype = np.float32)

nb_classes = 3

# Weights
tf.random.set_seed(2020)
W = tf.Variable(tf.random.normal([4, nb_classes], mean = 0.0)) # W는 [4,3] 이어야 한다. x_data가 [8, 4]이고, y_data가 [8, 3]이기 때문
b = tf.Variable(tf.random.normal([nb_classes], mean = 0.0)) # bias는 [8, 3]에 더할 수 있는 형태여야 한다.

print('# Weights: \n', W.numpy(), '\n\n# Bias: \n', b.numpy())

# learning_rate
learning_rate = 0.01


# Softmax Function
def softmax(x):
    sm = tf.nn.softmax(tf.matmul(x_data, W) + b)
    return sm


# Training
for i in range(10000 + 1):
    with tf.GradientTape() as tape:
        sm = softmax(x_data)
        cost = tf.reduce_mean(-tf.reduce_sum(y_data * tf.math.log(sm), axis = 1))
        W_grad, b_grad = tape.gradient(cost, [W, b])

        W.assign_sub(learning_rate * W_grad)
        b.assign_sub(learning_rate * b_grad)

    if i % 1000 == 0:
        print(">>> #%s \n Weights: \n%s \n Bias: \n%s \n cost: %s\n" % (i, W.numpy(), b.numpy(), cost.numpy()))


# Predict
predicted = tf.argmax(softmax(x_data), axis = 1)
real = tf.argmax(y_data, axis = 1)

def acc(predicted, real):
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, real), dtype = tf.float32))
    return accuracy

accuracy = acc(predicted, real).numpy()
print("Accuracy: %s" % accuracy)

print("# Predicted: \n", predicted.numpy(), "\n# real.numpy: \n", real.numpy())

4. ML Lab 6-1-softmax_classifier_Keras


# Lab 6 Softmax Classifier
import tensorflow as tf
import numpy as np

x_raw = [[1, 2, 1, 1],
          [2, 1, 3, 2],
          [3, 1, 3, 4],
          [4, 1, 5, 5],
          [1, 7, 5, 5],
          [1, 2, 5, 6],
          [1, 6, 6, 6],
          [1, 7, 7, 7]]

# y데이터를 one-hot encoding으로표시
y_raw = [[0, 0, 1],
          [0, 0, 1],
          [0, 0, 1],
          [0, 1, 0],
          [0, 1, 0],
          [0, 1, 0],
          [1, 0, 0],
          [1, 0, 0]]

x_data = np.array(x_raw, dtype = np.float32)
y_data = np.array(y_raw, dtype = np.float32)

# y의 열의 개수가 레이블의 개수이다. 즉, 클래스의 개수이다.
nb_classes = 3

tf.model = tf.keras.Sequential()
tf.model.add(tf.keras.layers.Dense(input_dim = 4, units = nb_classes, use_bias = True)) # use_bias is True, by default. / input_dim : 입력 뉴련의 수를 설정합니다. / units : 출력 뉴런의 수를 설정합니다.

# use softmax activations: softmax = exp(logits) / reduce_sum(exp(logits), dim)
tf.model.add(tf.keras.layers.Activation('softmax'))

# use loss == categorical_crossentropy
tf.model.compile(loss = 'categorical_crossentropy', optimizer = tf.keras.optimizers.SGD(lr = 0.1), metrics = ['accuracy'])
tf.model.summary()

history = tf.model.fit(x_data, y_data, epochs = 2000)

print('--------------')
# Testing & One-hot encoding
a = tf.model.predict(np.array([[1, 11, 7, 9]]))
print(a, tf.keras.backend.eval(tf.argmax(a, axis=1))) # argmax == one-hot encoding

print('--------------')
b = tf.model.predict(np.array([[1, 3, 4, 3]]))
print(b, tf.keras.backend.eval(tf.argmax(b, axis=1)))

print('--------------')
# or use argmax embedded method, predict_classes
c = tf.model.predict(np.array([[1, 1, 0, 1]]))
# c_onehot = tf.model.predict_classes(np.array([[1, 1, 0, 1]])) # 아니 버전 tf 2.6이후로 predict_classes가 없네 실환가
c_onehot = tf.model.predict(np.array([[1, 1, 0, 1]]), verbose=0)
predicted = c_onehot.argmax(axis=-1)
print(c, predicted)

print('--------------')
all = tf.model.predict(np.array([[1, 11, 7, 9], [1, 3, 4, 3], [1, 1, 0, 1]]))
all_onehot = tf.model.predict(np.array([[1, 11, 7, 9], [1, 3, 4, 3], [1, 1, 0, 1]]), verbose = 0)
all_onehot = all_onehot.argmax(axis = 1)
print(all, all_onehot)

참고자료

강의 - 모두를 위한 딥러닝Lab6
정우일 블로그 - Multinomial Classification