Mxnet/Training and Inference
ph
목차
Linear Regression
원문
전체 소스
import mxnet as mx import numpy as np #Training data train_data = np.random.uniform(0, 1, [100, 2]) train_label = np.array([train_data[i][0] + 2 * train_data[i][1] for i in range(100)]) batch_size = 1 #Evaluation Data eval_data = np.array([[7,2],[6,10],[12,2]]) eval_label = np.array([11,26,16]) train_iter = mx.io.NDArrayIter(train_data,train_label, batch_size, shuffle=True,label_name='lin_reg_label') #(1) eval_iter = mx.io.NDArrayIter(eval_data, eval_label, batch_size, shuffle=False) X = mx.sym.Variable('data') Y = mx.symbol.Variable('lin_reg_label') #(2) fully_connected_layer = mx.sym.FullyConnected(data=X, name='fc1', num_hidden = 1) lro = mx.sym.LinearRegressionOutput(data=fully_connected_layer, label=Y, name="lro") #(3) model = mx.mod.Module( symbol = lro , data_names=['data'], label_names = ['lin_reg_label']# network structure (4) ) mx.viz.plot_network(symbol=lro) # (5) model.fit(train_iter, eval_iter, optimizer_params={'learning_rate':0.005, 'momentum': 0.9}, num_epoch=1000, batch_end_callback = mx.callback.Speedometer(batch_size, 2)) model.predict(eval_iter).asnumpy() metric = mx.metric.MSE() model.score(eval_iter, metric) eval_data = np.array([[7,2],[6,10],[12,2]]) eval_label = np.array([11.1,26.1,16.1]) #Adding 0.1 to each of the values eval_iter = mx.io.NDArrayIter(eval_data, eval_label, batch_size, shuffle=False) model.score(eval_iter, metric)
(3) mx.sym.LinearRegressionOutput은 l2 loss계산함.
(1),(2)에서 lin_reg_label이라고 준 것과 NDArrayIter에서 이름이 일치해야 한다. train_iter = mx.io.NDArrayIter(..., label_name='lin_reg_label' )
다시말해, 입력단의 이름이 일치해야 한다는 얘기. 결국 (4)에 나오는 이름까지 일치해야 해서 같은 이름이 세번 나온다
(5)의 model.fit에서 batch_end_callback으로 mx.callback.Speedometer
줄 수 있다.
Speedometer(batch_size, frequent=50, auto_reset=True)
: 배치 50번마다 로깅하고, 로깅 후 reset할것. 아래를 미리 해줘야 stdout에 보인다.
import logging logging.getLogger().setLevel(logging.DEBUG)
실행해보면,
>>> # Print training speed and evaluation metrics every ten batches. Batch size is one. >>> module.fit(iterator, num_epoch=n_epoch, ... batch_end_callback=mx.callback.Speedometer(1, 10)) Epoch[0] Batch [10] Speed: 1910.41 samples/sec Train-accuracy=0.200000 Epoch[0] Batch [20] Speed: 1764.83 samples/sec Train-accuracy=0.400000 Epoch[0] Batch [30] Speed: 1740.59 samples/sec Train-accuracy=0.500000
Handwritten Digit Recognition
full src
import mxnet as mx mnist = mx.test_utils.get_mnist() batch_size = 100 train_iter = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], batch_size, shuffle=True) val_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size) data = mx.sym.var('data') # Flatten the data from 4-D shape into 2-D (batch_size, num_channel*width*height) data = mx.sym.flatten(data=data) # The first fully-connected layer and the corresponding activation function fc1 = mx.sym.FullyConnected(data=data, num_hidden=128) act1 = mx.sym.Activation(data=fc1, act_type="relu") # The second fully-connected layer and the corresponding activation function fc2 = mx.sym.FullyConnected(data=act1, num_hidden = 64) act2 = mx.sym.Activation(data=fc2, act_type="relu") # MNIST has 10 classes fc3 = mx.sym.FullyConnected(data=act2, num_hidden=10) # Softmax with cross entropy loss mlp = mx.sym.SoftmaxOutput(data=fc3, name='softmax') import logging logging.getLogger().setLevel(logging.DEBUG) # logging to stdout # create a trainable module on CPU mlp_model = mx.mod.Module(symbol=mlp, context=mx.cpu()) mlp_model.fit(train_iter, # train data eval_data=val_iter, # validation data optimizer='sgd', # use SGD to train optimizer_params={'learning_rate':0.1}, # use fixed learning rate eval_metric='acc', # report accuracy during training batch_end_callback = mx.callback.Speedometer(batch_size, 100), # output progress for each 100 data batches num_epoch=10) # train for at most 10 dataset passes test_iter = mx.io.NDArrayIter(mnist['test_data'], None, batch_size) prob = mlp_model.predict(test_iter) assert prob.shape == (10000, 10) test_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size) # predict accuracy of mlp acc = mx.metric.Accuracy() mlp_model.score(test_iter, acc) print(acc) assert acc.get()[1] > 0.96 data = mx.sym.var('data') # first conv layer conv1 = mx.sym.Convolution(data=data, kernel=(5,5), num_filter=20) tanh1 = mx.sym.Activation(data=conv1, act_type="tanh") pool1 = mx.sym.Pooling(data=tanh1, pool_type="max", kernel=(2,2), stride=(2,2)) # second conv layer conv2 = mx.sym.Convolution(data=pool1, kernel=(5,5), num_filter=50) tanh2 = mx.sym.Activation(data=conv2, act_type="tanh") pool2 = mx.sym.Pooling(data=tanh2, pool_type="max", kernel=(2,2), stride=(2,2)) # first fullc layer flatten = mx.sym.flatten(data=pool2) fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=500) tanh3 = mx.sym.Activation(data=fc1, act_type="tanh") # second fullc fc2 = mx.sym.FullyConnected(data=tanh3, num_hidden=10) # softmax loss lenet = mx.sym.SoftmaxOutput(data=fc2, name='softmax') # create a trainable module on GPU 0 lenet_model = mx.mod.Module(symbol=lenet, context=mx.cpu()) # train with the same lenet_model.fit(train_iter, eval_data=val_iter, optimizer='sgd', optimizer_params={'learning_rate':0.1}, eval_metric='acc', batch_end_callback = mx.callback.Speedometer(batch_size, 100), num_epoch=10) test_iter = mx.io.NDArrayIter(mnist['test_data'], None, batch_size) prob = lenet_model.predict(test_iter) test_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size) # predict accuracy for lenet acc = mx.metric.Accuracy() lenet_model.score(test_iter, acc) print(acc) assert acc.get()[1] > 0.98
Loading data
mx.test_utils.get_mnist()
이 뒤로는 trivial.