ネットワークの構成を変えると、結果がどのように変わるか実験してみました。
FacialEmotions seed1
System test 1位(Psyho)と2位(wleite)
d = dropout
c = convolutional layer
b = batch normalization
r = rectified linear unit (ReLU)
m = max pooling
l = Linear layer (a.k.a. fully-connected layer)
s = softmax
convolutional layer を 3 回使います。
Batch Normalizationを使いました。
Batch Normalization を使う位置を変えました。
dropout を使いました。
dropout を2回以上使いました。
source code (cbrmDcbrmDcbrmDlbrDls)
FacialEmotions seed1
System test 1位(Psyho)と2位(wleite)
sse | ||||
Psyho | 82.6467632504 | |||
wleite | 80.3573356044 |
d = dropout
c = convolutional layer
b = batch normalization
r = rectified linear unit (ReLU)
m = max pooling
l = Linear layer (a.k.a. fully-connected layer)
s = softmax
convolutional layer を 3 回使います。
net | epoch | loss | accuracy | sse |
crmcrmcrmlrls | 10 | 0.862057242 | 0.6888581587 | 84.8293236042 |
crmcrmcrmlrls | 20 | 0.2330729473 | 0.9241406559 | 110.2754166642 |
Batch Normalizationを使いました。
net | epoch | loss | accuracy | sse |
cbrm cbrm cbrm lbr ls | 10 | 0.0292957636 | 1 | 98.5088547936 |
Batch Normalization を使う位置を変えました。
net | epoch | loss | accuracy | sse |
crbmcrbmcrbmlrbls | 10 | 0.0161193058 | 1 | 103.3351983911 |
crmbcrmbcrmblbrls | 10 | 0.0428612025 | 0.9990122481 | 104.5428854816 |
dropout を使いました。
net | epoch | loss | accuracy | sse |
d cbrm cbrm cbrm lbr ls | 10 | 0.6779491677 | 0.7741999208 | 127.6396549389 |
cdbrm cbrm cbrm lbr ls | 10 | 0.1860714287 | 0.9697747926 | 97.3982535937 |
cbdrm cbrm cbrm lbr ls | 10 | 0.1261851785 | 0.9891347294 | 90.293485705 |
cbrdm cbrm cbrm lbr ls | 10 | 0.1165383348 | 0.9877518768 | 135.9137072744 |
cbrm d cbrm cbrm lbr ls | 10 | 0.3982799557 | 0.8860134335 | 89.3255575127 |
cbrm cdbrm cbrm lbr ls | 10 | 0.3108963729 | 0.9205847492 | 87.1665944558 |
cbrm cbdrm cbrm lbr ls | 10 | 0.2976720617 | 0.9265112604 | 87.8945815325 |
cbrm cbrdm cbrm lbr ls | 10 | 0.2908983695 | 0.9274990119 | 97.0035103791 |
cbrm cbrm d cbrm lbr ls | 10 | 0.6466208026 | 0.7740023707 | 82.3041648362 |
cbrm cbrm d cbrm lbr ls | 10 | 0.6593907256 | 0.7710391151 | 87.7033221745 |
cbrm cbrm cdbrm lbr ls | 10 | 0.5000952218 | 0.8356380879 | 85.98265144 |
cbrm cbrm cbdrm lbr ls | 10 | 0.4267725744 | 0.8735677599 | 83.6281797147 |
cbrm cbrm cbrdm lbr ls | 10 | 0.4398758164 | 0.8619122878 | 87.4801027678 |
cbrm cbrm cbrm d lbr ls | 10 | 0.7070338922 | 0.7443698141 | 85.7758809568 |
cbrm cbrm cbrm d lbr ls | 10 | 0.7025089265 | 0.7459502175 | 83.1445387195 |
cbrm cbrm cbrm ldbr ls | 10 | 0.6557939961 | 0.7773607272 | 98.114469585 |
cbrm cbrm cbrm lbdr ls | 10 | 0.44960894 | 0.8561833265 | 105.3589899718 |
cbrm cbrm cbrm lbr d ls | 10 | 0.4411115036 | 0.8555906757 | 84.4414017709 |
cbrm cbrm cbrm lbr l b s | 10 | 0.350525047 | 0.9998024496 | 107.6956465493 |
dropout を2回以上使いました。
net | epoch | loss | accuracy | sse |
cbrmDcbrmcbrmlbrDls | 20 | 0.4687651145 | 0.8354405375 | 88.8934547861 |
cbrmDcbrmcbrmDlbrls | 20 | 0.646407447 | 0.7635322005 | 83.6677099091 |
cbrmDcbrmDcbrmlbrls | 20 | 0.4206087752 | 0.8668510471 | 88.9427621938 |
cbrmcbrmDcbrmlbrDls | 20 | 0.697579882 | 0.7500987754 | 81.9197008873 |
cbrmcbrmDcbrmDlbrls | 20 | 0.7615070114 | 0.7354800472 | 78.8430071664 |
cbrmcbrmcbrmDlbrDls | 20 | 0.7329212929 | 0.7412090082 | 83.007588472 |
cbrmDcbrmDcbrmDlbrls | 20 | 0.8634045921 | 0.6959699726 | 79.3253190373 |
cbrmDcbrmDcbrmlbrDls | 20 | 0.8133866381 | 0.7044646383 | 79.0648046938 |
cbrmDcbrmcbrmDlbrDls | 20 | 0.9094222649 | 0.6716712761 | 80.0929658934 |
cbrmcbrmDcbrmDlbrDls | 20 | 0.9502801996 | 0.66851047 | 78.9084275277 |
cbrmDcbrmDcbrmDlbrDls | 20 | 1.037415473 | 0.6309758988 | 79.860401207 |
source code (cbrmDcbrmDcbrmDlbrDls)
#coding: utf-8
import numpy as np
import sys
import chainer
from chainer import cuda
import chainer.functions as F
import chainer.links as L
from chainer import optimizers
import time
import pickle
import os.path
class Net(chainer.Chain):
def __init__(self, class_labels=7):
super(Net, self).__init__(
conv1=L.Convolution2D( 3, 64, 3, stride=1, pad=1),
bn1=L.BatchNormalization(64),
conv2=L.Convolution2D( 64, 64, 3, stride=1, pad=1),
bn2=L.BatchNormalization(64),
conv3=L.Convolution2D( 64, 64, 3, stride=1, pad=1),
bn3=L.BatchNormalization(64),
fc1=L.Linear(None, 64, nobias=True),
bn_fc1=L.BatchNormalization(64),
fc2=L.Linear(None, class_labels, nobias=True),
bn_fc2=L.BatchNormalization(class_labels)
)
def __call__(self, x, y, train=True):
h = self.conv1(x)
h = self.bn1(h, test=not train)
h = F.relu(h)
h = F.max_pooling_2d(h, ksize=2, stride=2)
h = F.dropout(h, ratio=0.5, train=train)
h = self.conv2(h)
h = self.bn2(h, test=not train)
h = F.relu(h)
h = F.max_pooling_2d(h, ksize=2, stride=2)
h = F.dropout(h, ratio=0.5, train=train)
h = self.conv3(h)
h = self.bn3(h, test=not train)
h = F.relu(h)
h = F.max_pooling_2d(h, ksize=2, stride=2)
h = F.dropout(h, ratio=0.5, train=train)
h = self.fc1(h)
h = self.bn_fc1(h, test=not train)
h = F.relu(h)
h = F.dropout(h, ratio=0.5, train=train)
h = self.fc2(h)
# h = self.bn_fc2(h, test=not train)
# h = F.relu(h)
# h = F.dropout(h, ratio=0.5, train=train)
if train:
return F.softmax_cross_entropy(h, y), F.accuracy(h, y)
else:
return F.softmax(h)
class FacialEmotions:
def __init__(self):
self.startTime = time.perf_counter();
# self.useModel = True
self.useModel = False
# self.train_size10 = True
self.train_size10 = False
self.train = []
self.train_result = []
self.train_index = 0
self.test = []
self.test_index = 0
self.batchsize = 128
self.useSmallImage = True
# self.useSmallImage = False
self.size = 64
self.model = Net()
self.folder = "./python/cbrmDcbrmDcbrmDlbrDls/"
# pkl_file = open(self.folder + "FacialEmotions.pkl", "rb")
# self.model = pickle.load(pkl_file)
# print("load model", file=sys.stderr)
# sys.stderr.flush()
def newImage(self, img):
three = self.oneDimensionToThreeDimension(img)
if self.useSmallImage == True:
three = self.smallImage(three)
return self.cropImage(three, (len(three[0])//2) - (self.size//2), (len(three[0][0])//2) - (self.size//2), self.size, self.size)
def cropImage(self, img, startR, startC, sizeR, sizeC):
crop = np.zeros((3, sizeR, sizeC))
for color in range(3):
for r in range(sizeR):
for c in range(sizeC):
crop[color][r][c] = img[color][startR + r][startC + c]
return crop
def smallImage(self, img):
sizeR = len(img[0])//2
sizeC = len(img[0][0])//2
# print("sizeR : {}, sizeC : {}\n".format(sizeR , sizeC), file=sys.stderr)
# sys.stderr.flush()
small = np.zeros((3, sizeR, sizeC))
for color in range(3):
for r in range(sizeR):
for c in range(sizeC):
small[color][r][c] = img[color][2 * r + 0][2 * c + 0]
small[color][r][c] += img[color][2 * r + 0][2 * c + 1]
small[color][r][c] += img[color][2 * r + 1][2 * c + 0]
small[color][r][c] += img[color][2 * r + 1][2 * c + 1]
small[color][r][c] /= 4
return small
def oneDimensionToThreeDimension(self, img):
three = np.zeros((3, 250, 250))
for r in range(250):
for c in range(250):
value = img[r * 250 + c]
red = (value >> 16) & 255
green = (value >> 8) & 255
blue = (value >> 0) & 255
three[0][r][c] = red
three[1][r][c] = green
three[2][r][c] = blue
return three
def flipImage(self, img):
sizeR = len(img[0])
sizeC = len(img[0][0])
flip = np.zeros((3, sizeR, sizeC))
for color in range(3):
for r in range(sizeR):
for c in range(sizeC):
flip[color][r][c] = img[color][r][(sizeC - 1) - c]
return flip
def training(self, img, emot):
if self.useModel == True:
return 1
elif os.path.isfile("./python/training_seed1.pkl"):
pkl_file = open("./python/training_seed1.pkl", "rb")
self.train = pickle.load(pkl_file)
pkl_file = open("./python/training_result_seed1.pkl", "rb")
self.train_result = pickle.load(pkl_file)
print("load training data", file=sys.stderr)
sys.stderr.flush()
return 1
if self.train_index % 100 == 0:
print(self.train_index, file=sys.stderr)
image = self.newImage(img)
self.train.append(image)
# self.train.append(self.flipImage(image))
emotion = 0
for i in range(7):
emotion += (i+0) * emot[i]
self.train_result.append(emotion)
# self.train_result.append(emotion)
self.train_index+=1
if self.train_size10 == True and self.train_index == 10:
return 1
return 0
def training2(self):
if os.path.isfile("./python/training_seed1.pkl") == False:
pickle.dump(self.train, open("./python/training_seed1.pkl", "wb"), -1)
pickle.dump(self.train_result, open("./python/training_result_seed1.pkl", "wb"), -1)
print("save training data", file=sys.stderr)
sys.stderr.flush()
self.train = np.array(self.train, dtype=np.float32)
self.train_result = np.array(self.train_result, dtype=np.int32)
self.train /= 255.0
optimizer = optimizers.Adam()
optimizer.setup(self.model)
batchsize = self.batchsize
n_epoch = 20
x_train = self.train
y_train = self.train_result
N = len(y_train)
elapse = int(time.perf_counter() - self.startTime)
print("start, time: {}:{}:{}".format(elapse//(60*60), (elapse%(60*60))//(60), elapse%60), file=sys.stderr)
sys.stderr.flush()
for epoch in range(1, n_epoch + 1):
perm = np.random.permutation(N)
sum_loss = 0
sum_accuracy = 0
for i in range(0, N, batchsize):
x_batch = np.asarray(x_train[perm[i:i + batchsize]])
y_batch = np.asarray(y_train[perm[i:i + batchsize]])
optimizer.zero_grads()
loss, accuracy = self.model(x_batch, y_batch, train=True)
loss.backward()
optimizer.update()
sum_loss += float(loss.data) * len(y_batch)
sum_accuracy += float(accuracy.data) * len(y_batch)
pickle.dump(self.model.to_cpu(), open(self.folder + "FacialEmotions" + str(epoch) + ".pkl", "wb"), -1)
print("save model", file=sys.stderr)
sys.stderr.flush()
elapse = int(time.perf_counter() - self.startTime)
print("epoch: {}, time: {}:{}:{}, loss: {}, accuracy: {}".format(epoch, elapse//(60*60), (elapse%(60*60))//(60), elapse%60, sum_loss / N, sum_accuracy / N), file=sys.stderr)
sys.stderr.flush()
pickle.dump(self.model.to_cpu(), open(self.folder + "FacialEmotions.pkl", "wb"), -1)
print("save model", file=sys.stderr)
sys.stderr.flush()
def testing(self, img,*args):
if self.test_index % 10 == 0:
print(self.test_index, file=sys.stderr)
sys.stderr.flush()
if self.useModel == True and self.test_index == 0:
pkl_file = open(self.folder + "FacialEmotions.pkl", "rb")
self.model = pickle.load(pkl_file)
print("load model", file=sys.stderr)
sys.stderr.flush()
elif self.test_index == 0:
self.training2()
self.test_index += 1
newImg = self.newImage(img)
test = []
test.append(newImg)
test = np.array(test, dtype=np.float32)
test /= 255.0
prediction = self.model(test, None, train=False)
return prediction.data[0]
if __name__ == "__main__":
fe = FacialEmotions()
N = int(input())
for i in range(N):
S = int(input())
imageData = []
for j in range(S):
imageData.append(int(input()))
emotions = []
for j in range(7):
emotions.append(float(input()))
ret = fe.training(imageData, emotions)
print(ret)
sys.stdout.flush()
if ret == 1:
break
M = int(input())
for i in range(M):
S = int(input())
imageData = []
for j in range(S):
imageData.append(int(input()))
emotions = fe.testing(imageData)
for emotion in emotions:
print(emotion)
sys.stdout.flush()
コメント