[Pytorch] MLP Practice
Multilayer Perceptron (MLP)
1
2
3
4
5
6
7
8
9
10
11
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
%matplotlib inline
%config InlineBackend.figure_format='retina'
print ("PyTorch version:[%s]."%(torch.__version__))
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print ("device:[%s]."%(device))
1
2
PyTorch version:[1.10.0+cu111].
device:[cpu].
Dataset
60000개 Train
10000개 Test
1
2
3
4
5
6
from torchvision import datasets,transforms
mnist_train = datasets.MNIST(root='./data/',train=True,transform=transforms.ToTensor(),download=True)
mnist_test = datasets.MNIST(root='./data/',train=False,transform=transforms.ToTensor(),download=True)
print ("mnist_train:\n",mnist_train,"\n")
print ("mnist_test:\n",mnist_test,"\n")
print ("Done.")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz
0%| | 0/9912422 [00:00<?, ?it/s]
Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz
0%| | 0/28881 [00:00<?, ?it/s]
Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz
0%| | 0/1648877 [00:00<?, ?it/s]
Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz
0%| | 0/4542 [00:00<?, ?it/s]
Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw
mnist_train:
Dataset MNIST
Number of datapoints: 60000
Root location: ./data/
Split: Train
StandardTransform
Transform: ToTensor()
mnist_test:
Dataset MNIST
Number of datapoints: 10000
Root location: ./data/
Split: Test
StandardTransform
Transform: ToTensor()
Done.
Data iterator
sgd(mini batch tarining) 위해 dataloader 가져온다
batch size 따라서 성능이 달라진다
iter의 정확한 개념?
1
2
3
4
5
BATCH_SIZE = 128
train_iter = torch.utils.data.DataLoader(mnist_train,batch_size=BATCH_SIZE,shuffle=True,num_workers=1)
test_iter = torch.utils.data.DataLoader(mnist_test,batch_size=BATCH_SIZE,shuffle=True,num_workers=1)
print ("Done.")
list(train_iter)[0][0].shape
1
2
3
4
5
6
7
Done.
torch.Size([128, 1, 28, 28])
Define the MLP model
super class 통해 nn.module 초기화
forward() : input 들어오면 net 에 넣어 linear1 후 relu,second linear
이때 마지막 layer는 logit? 이 나오기 때문에 필요 없다
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
class MultiLayerPerceptronClass(nn.Module):
"""
Multilayer Perceptron (MLP) Class
"""
def __init__(self,name='mlp',xdim=784,hdim=256,ydim=10):
super(MultiLayerPerceptronClass,self).__init__()
self.name = name
self.xdim = xdim
self.hdim = hdim
self.ydim = ydim
self.lin_1 = nn.Linear(self.xdim,self.hdim)
self.lin_2 = nn.Linear(self.hdim,self.ydim)
self.init_param() # initialize parameters
def init_param(self):
nn.init.kaiming_normal_(self.lin_1.weight)
nn.init.zeros_(self.lin_1.bias)
nn.init.kaiming_normal_(self.lin_2.weight)
nn.init.zeros_(self.lin_2.bias)
def forward(self,x):
net = x
net = self.lin_1(net)
net = F.relu(net)
net = self.lin_2(net)
return net
M = MultiLayerPerceptronClass(name='mlp',xdim=784,hdim=256,ydim=10).to(device)
loss = nn.CrossEntropyLoss()
optm = optim.Adam(M.parameters(),lr=1e-3)#초기화 시 어떤 parameter 로 optimize 할건지
print ("Done.")
1
Done.
Simple forward Path of the MLP Model
앞의 2개 batch batch가2개라고 가정(하나의 batch 784개)
to device 사용해야 gpu or cpu 로 넘겨주어 사용할 수 있다
y_torch가 forward 안해도 돌아간다 -> 알아서 forward를 불러오기 때문
1
2
3
4
5
6
7
8
x_numpy = np.random.rand(2,784)
x_torch = torch.from_numpy(x_numpy).float().to(device)
y_torch = M.forward(x_torch) # forward path
y_numpy = y_torch.detach().cpu().numpy() # torch tensor to numpy array
print ("x_numpy:\n",x_numpy)
print ("x_torch:\n",x_torch)
print ("y_torch:\n",y_torch)
print ("y_numpy:\n",y_numpy)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
x_numpy:
[[0.52800155 0.58338899 0.96121876 ... 0.20249647 0.52669656 0.07227875]
[0.15828359 0.18153112 0.23636639 ... 0.43557073 0.29804247 0.57536225]]
x_torch:
tensor([[0.5280, 0.5834, 0.9612, ..., 0.2025, 0.5267, 0.0723],
[0.1583, 0.1815, 0.2364, ..., 0.4356, 0.2980, 0.5754]])
y_torch:
tensor([[-0.3729, -0.1477, 0.6461, -0.5641, -1.5517, 1.4930, -1.3116, -0.5282,
0.3516, -0.5646],
[-1.0711, -0.2764, 1.1477, -0.8116, -0.3308, 0.5411, -0.4587, -0.1054,
0.3442, -0.3024]], grad_fn=<AddmmBackward0>)
y_numpy:
[[-0.37288374 -0.14774457 0.64612365 -0.56407714 -1.5517001 1.493022
-1.3115766 -0.5282446 0.3516426 -0.5645925 ]
[-1.0710508 -0.27635956 1.1476835 -0.8116034 -0.33080995 0.541132
-0.4586879 -0.10544484 0.34417093 -0.30238646]]
Check Parameters
enumerate 정확한 개념
1
2
3
4
5
6
7
8
np.set_printoptions(precision=3)
n_param = 0
for p_idx,(param_name,param) in enumerate(M.named_parameters()):
param_numpy = param.detach().cpu().numpy()
n_param += len(param_numpy.reshape(-1))
print ("[%d] name:[%s] shape:[%s]."%(p_idx,param_name,param_numpy.shape))
print (" val:%s"%(param_numpy.reshape(-1)[:5]))
print ("Total number of parameters:[%s]."%(format(n_param,',d')))
1
2
3
4
5
6
7
8
9
[0] name:[lin_1.weight] shape:[(256, 784)].
val:[-0.006 0.026 -0.06 -0.011 -0.068]
[1] name:[lin_1.bias] shape:[(256,)].
val:[0. 0. 0. 0. 0.]
[2] name:[lin_2.weight] shape:[(10, 256)].
val:[ 0.091 -0.084 0.043 0.053 -0.023]
[3] name:[lin_2.bias] shape:[(10,)].
val:[0. 0. 0. 0. 0.]
Total number of parameters:[203,530].
Evaluation Function
model_pred 내용 잘 모르겠음
1
2
3
4
5
6
7
8
9
10
11
12
13
14
def func_eval(model,data_iter,device):
with torch.no_grad():
model.eval() # evaluate (affects DropOut and BN)
n_total,n_correct = 0,0
for batch_in,batch_out in data_iter:
y_trgt = batch_out.to(device)
model_pred = model(batch_in.view(-1,28*28).to(device))#model forward
_,y_pred = torch.max(model_pred.data,1)#max 취하면 label 나온다(logit이기 떄문)
n_correct += (y_pred == y_trgt).sum().item()#맞은 갯수
n_total += batch_in.size(0)
val_accr = (n_correct/n_total)
model.train() # back to train mode
return val_accr
print ("Done")
1
Done
Initial Evaluation
처음에는 낮다 -> w random initialize
1
2
3
4
M.init_param() # initialize parameters
train_accr = func_eval(M,train_iter,device)
test_accr = func_eval(M,test_iter,device)
print ("train_accr:[%.3f] test_accr:[%.3f]."%(train_accr,test_accr))
1
train_accr:[0.110] test_accr:[0.112].
Train
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
print ("Start training.")
M.init_param() # initialize parameters
M.train()
EPOCHS,print_every = 10,1
for epoch in range(EPOCHS):
loss_val_sum = 0
for batch_in,batch_out in train_iter:
# Forward path
y_pred = M.forward(batch_in.view(-1, 28*28).to(device))#batch 를 넣어서 view 로 reshape
loss_out = loss(y_pred,batch_out.to(device))
# Update
optm.zero_grad()# reset gradient
loss_out.backward()# backpropagate
optm.step()# optimizer update
loss_val_sum += loss_out
loss_val_avg = loss_val_sum/len(train_iter)
# Print
if ((epoch%print_every)==0) or (epoch==(EPOCHS-1)):
train_accr = func_eval(M,train_iter,device)
test_accr = func_eval(M,test_iter,device)
print ("epoch:[%d] loss:[%.3f] train_accr:[%.3f] test_accr:[%.3f]."%
(epoch,loss_val_avg,train_accr,test_accr))
print ("Done")
1
2
3
4
5
6
7
8
9
10
11
12
Start training.
epoch:[0] loss:[0.306] train_accr:[0.955] test_accr:[0.951].
epoch:[1] loss:[0.135] train_accr:[0.970] test_accr:[0.962].
epoch:[2] loss:[0.091] train_accr:[0.982] test_accr:[0.972].
epoch:[3] loss:[0.067] train_accr:[0.986] test_accr:[0.974].
epoch:[4] loss:[0.051] train_accr:[0.990] test_accr:[0.976].
epoch:[5] loss:[0.040] train_accr:[0.992] test_accr:[0.978].
epoch:[6] loss:[0.031] train_accr:[0.994] test_accr:[0.978].
epoch:[7] loss:[0.025] train_accr:[0.995] test_accr:[0.979].
epoch:[8] loss:[0.020] train_accr:[0.998] test_accr:[0.979].
epoch:[9] loss:[0.017] train_accr:[0.998] test_accr:[0.980].
Done
Test
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
n_sample = 25
sample_indices = np.random.choice(len(mnist_test.targets), n_sample, replace=False)
test_x = mnist_test.data[sample_indices]
test_y = mnist_test.targets[sample_indices]
with torch.no_grad():
y_pred = M.forward(test_x.view(-1, 28*28).type(torch.float).to(device)/255.)
y_pred = y_pred.argmax(axis=1)
plt.figure(figsize=(10,10))
for idx in range(n_sample):
plt.subplot(5, 5, idx+1)
plt.imshow(test_x[idx], cmap='gray')
plt.axis('off')
plt.title("Pred:%d, Label:%d"%(y_pred[idx],test_y[idx]))
plt.show()
print ("Done")
backpropagation vs gradient descent