ネットワークの構成を変えると、結果がどのように変わるか実験してみました。

FacialEmotions seed1

System test 1位(Psyho)と2位(wleite)




sse
Psyho


82.6467632504
wleite


80.3573356044



d = dropout
c = convolutional layer
b = batch normalization
r = rectified linear unit (ReLU)
m = max pooling
l = Linear layer (a.k.a. fully-connected layer)
s = softmax

convolutional layer を 3 回使います。
net epoch loss accuracy sse
crmcrmcrmlrls 10 0.862057242 0.6888581587 84.8293236042
crmcrmcrmlrls 20 0.2330729473 0.9241406559 110.2754166642


Batch Normalizationを使いました。
net epoch loss accuracy sse
cbrm cbrm cbrm lbr ls 10 0.0292957636 1 98.5088547936
 
Batch Normalization を使う位置を変えました。
net epoch loss accuracy sse
crbmcrbmcrbmlrbls 10 0.0161193058 1 103.3351983911
crmbcrmbcrmblbrls 10 0.0428612025 0.9990122481 104.5428854816

dropout を使いました。
net epoch loss accuracy sse
d cbrm cbrm cbrm lbr ls 10 0.6779491677 0.7741999208 127.6396549389
cdbrm cbrm cbrm lbr ls 10 0.1860714287 0.9697747926 97.3982535937
cbdrm cbrm cbrm lbr ls 10 0.1261851785 0.9891347294 90.293485705
cbrdm cbrm cbrm lbr ls 10 0.1165383348 0.9877518768 135.9137072744
cbrm d cbrm cbrm lbr ls 10 0.3982799557 0.8860134335 89.3255575127
cbrm cdbrm cbrm lbr ls 10 0.3108963729 0.9205847492 87.1665944558
cbrm cbdrm cbrm lbr ls 10 0.2976720617 0.9265112604 87.8945815325
cbrm cbrdm cbrm lbr ls 10 0.2908983695 0.9274990119 97.0035103791
cbrm cbrm d cbrm lbr ls 10 0.6466208026 0.7740023707 82.3041648362
cbrm cbrm d cbrm lbr ls 10 0.6593907256 0.7710391151 87.7033221745
cbrm cbrm cdbrm lbr ls 10 0.5000952218 0.8356380879 85.98265144
cbrm cbrm cbdrm lbr ls 10 0.4267725744 0.8735677599 83.6281797147
cbrm cbrm cbrdm lbr ls 10 0.4398758164 0.8619122878 87.4801027678
cbrm cbrm cbrm d lbr ls 10 0.7070338922 0.7443698141 85.7758809568
cbrm cbrm cbrm d lbr ls 10 0.7025089265 0.7459502175 83.1445387195
cbrm cbrm cbrm ldbr ls 10 0.6557939961 0.7773607272 98.114469585
cbrm cbrm cbrm lbdr ls 10 0.44960894 0.8561833265 105.3589899718
cbrm cbrm cbrm lbr d ls 10 0.4411115036 0.8555906757 84.4414017709
cbrm cbrm cbrm lbr l b s 10 0.350525047 0.9998024496 107.6956465493

dropout を2回以上使いました。

net epoch loss accuracy sse
cbrmDcbrmcbrmlbrDls 20 0.4687651145 0.8354405375 88.8934547861
cbrmDcbrmcbrmDlbrls 20 0.646407447 0.7635322005 83.6677099091
cbrmDcbrmDcbrmlbrls 20 0.4206087752 0.8668510471 88.9427621938
cbrmcbrmDcbrmlbrDls 20 0.697579882 0.7500987754 81.9197008873
cbrmcbrmDcbrmDlbrls 20 0.7615070114 0.7354800472 78.8430071664
cbrmcbrmcbrmDlbrDls 20 0.7329212929 0.7412090082 83.007588472
cbrmDcbrmDcbrmDlbrls 20 0.8634045921 0.6959699726 79.3253190373
cbrmDcbrmDcbrmlbrDls 20 0.8133866381 0.7044646383 79.0648046938
cbrmDcbrmcbrmDlbrDls 20 0.9094222649 0.6716712761 80.0929658934
cbrmcbrmDcbrmDlbrDls 20 0.9502801996 0.66851047 78.9084275277
cbrmDcbrmDcbrmDlbrDls 20 1.037415473 0.6309758988 79.860401207





source code (cbrmDcbrmDcbrmDlbrDls)


#coding: utf-8
import numpy as np
import sys
import chainer
from chainer import cuda
import chainer.functions as F
import chainer.links as L
from chainer import optimizers
import time
import pickle
import os.path


class Net(chainer.Chain):

def __init__(self, class_labels=7):
super(Net, self).__init__(
conv1=L.Convolution2D( 3, 64, 3, stride=1, pad=1),
bn1=L.BatchNormalization(64),
conv2=L.Convolution2D( 64, 64, 3, stride=1, pad=1),
bn2=L.BatchNormalization(64),
conv3=L.Convolution2D( 64, 64, 3, stride=1, pad=1),
bn3=L.BatchNormalization(64),

fc1=L.Linear(None, 64, nobias=True),
bn_fc1=L.BatchNormalization(64),
fc2=L.Linear(None, class_labels, nobias=True),
bn_fc2=L.BatchNormalization(class_labels)
)

def __call__(self, x, y, train=True):
h = self.conv1(x)
h = self.bn1(h, test=not train)
h = F.relu(h)
h = F.max_pooling_2d(h, ksize=2, stride=2)

h = F.dropout(h, ratio=0.5, train=train)

h = self.conv2(h)
h = self.bn2(h, test=not train)
h = F.relu(h)
h = F.max_pooling_2d(h, ksize=2, stride=2)

h = F.dropout(h, ratio=0.5, train=train)

h = self.conv3(h)
h = self.bn3(h, test=not train)
h = F.relu(h)
h = F.max_pooling_2d(h, ksize=2, stride=2)

h = F.dropout(h, ratio=0.5, train=train)

h = self.fc1(h)
h = self.bn_fc1(h, test=not train)
h = F.relu(h)

h = F.dropout(h, ratio=0.5, train=train)

h = self.fc2(h)
# h = self.bn_fc2(h, test=not train)
# h = F.relu(h)
# h = F.dropout(h, ratio=0.5, train=train)

if train:
return F.softmax_cross_entropy(h, y), F.accuracy(h, y)
else:
return F.softmax(h)

class FacialEmotions:
def __init__(self):
self.startTime = time.perf_counter();

# self.useModel = True
self.useModel = False

# self.train_size10 = True
self.train_size10 = False

self.train = []
self.train_result = []
self.train_index = 0
self.test = []
self.test_index = 0


self.batchsize = 128
self.useSmallImage = True
# self.useSmallImage = False
self.size = 64
self.model = Net()

self.folder = "./python/cbrmDcbrmDcbrmDlbrDls/"

# pkl_file = open(self.folder + "FacialEmotions.pkl", "rb")
# self.model = pickle.load(pkl_file)
# print("load model", file=sys.stderr)
# sys.stderr.flush()

def newImage(self, img):
three = self.oneDimensionToThreeDimension(img)

if self.useSmallImage == True:
three = self.smallImage(three)

return self.cropImage(three, (len(three[0])//2) - (self.size//2), (len(three[0][0])//2) - (self.size//2), self.size, self.size)

def cropImage(self, img, startR, startC, sizeR, sizeC):
crop = np.zeros((3, sizeR, sizeC))
for color in range(3):
for r in range(sizeR):
for c in range(sizeC):
crop[color][r][c] = img[color][startR + r][startC + c]
return crop

def smallImage(self, img):
sizeR = len(img[0])//2
sizeC = len(img[0][0])//2
# print("sizeR : {}, sizeC : {}\n".format(sizeR , sizeC), file=sys.stderr)
# sys.stderr.flush()
small = np.zeros((3, sizeR, sizeC))
for color in range(3):
for r in range(sizeR):
for c in range(sizeC):
small[color][r][c] = img[color][2 * r + 0][2 * c + 0]
small[color][r][c] += img[color][2 * r + 0][2 * c + 1]
small[color][r][c] += img[color][2 * r + 1][2 * c + 0]
small[color][r][c] += img[color][2 * r + 1][2 * c + 1]
small[color][r][c] /= 4
return small

def oneDimensionToThreeDimension(self, img):
three = np.zeros((3, 250, 250))
for r in range(250):
for c in range(250):
value = img[r * 250 + c]
red = (value >> 16) & 255
green = (value >> 8) & 255
blue = (value >> 0) & 255
three[0][r][c] = red
three[1][r][c] = green
three[2][r][c] = blue
return three

def flipImage(self, img):
sizeR = len(img[0])
sizeC = len(img[0][0])
flip = np.zeros((3, sizeR, sizeC))
for color in range(3):
for r in range(sizeR):
for c in range(sizeC):
flip[color][r][c] = img[color][r][(sizeC - 1) - c]
return flip

def training(self, img, emot):
if self.useModel == True:
return 1

elif os.path.isfile("./python/training_seed1.pkl"):
pkl_file = open("./python/training_seed1.pkl", "rb")
self.train = pickle.load(pkl_file)
pkl_file = open("./python/training_result_seed1.pkl", "rb")
self.train_result = pickle.load(pkl_file)
print("load training data", file=sys.stderr)
sys.stderr.flush()
return 1

if self.train_index % 100 == 0:
print(self.train_index, file=sys.stderr)

image = self.newImage(img)
self.train.append(image)
# self.train.append(self.flipImage(image))

emotion = 0
for i in range(7):
emotion += (i+0) * emot[i]
self.train_result.append(emotion)
# self.train_result.append(emotion)

self.train_index+=1

if self.train_size10 == True and self.train_index == 10:
return 1

return 0

def training2(self):
if os.path.isfile("./python/training_seed1.pkl") == False:
pickle.dump(self.train, open("./python/training_seed1.pkl", "wb"), -1)
pickle.dump(self.train_result, open("./python/training_result_seed1.pkl", "wb"), -1)
print("save training data", file=sys.stderr)
sys.stderr.flush()

self.train = np.array(self.train, dtype=np.float32)
self.train_result = np.array(self.train_result, dtype=np.int32)

self.train /= 255.0

optimizer = optimizers.Adam()
optimizer.setup(self.model)

batchsize = self.batchsize
n_epoch = 20

x_train = self.train
y_train = self.train_result
N = len(y_train)

elapse = int(time.perf_counter() - self.startTime)
print("start, time: {}:{}:{}".format(elapse//(60*60), (elapse%(60*60))//(60), elapse%60), file=sys.stderr)
sys.stderr.flush()
for epoch in range(1, n_epoch + 1):

perm = np.random.permutation(N)
sum_loss = 0
sum_accuracy = 0
for i in range(0, N, batchsize):
x_batch = np.asarray(x_train[perm[i:i + batchsize]])
y_batch = np.asarray(y_train[perm[i:i + batchsize]])

optimizer.zero_grads()
loss, accuracy = self.model(x_batch, y_batch, train=True)
loss.backward()
optimizer.update()
sum_loss += float(loss.data) * len(y_batch)
sum_accuracy += float(accuracy.data) * len(y_batch)

pickle.dump(self.model.to_cpu(), open(self.folder + "FacialEmotions" + str(epoch) + ".pkl", "wb"), -1)
print("save model", file=sys.stderr)
sys.stderr.flush()

elapse = int(time.perf_counter() - self.startTime)
print("epoch: {}, time: {}:{}:{}, loss: {}, accuracy: {}".format(epoch, elapse//(60*60), (elapse%(60*60))//(60), elapse%60, sum_loss / N, sum_accuracy / N), file=sys.stderr)
sys.stderr.flush()

pickle.dump(self.model.to_cpu(), open(self.folder + "FacialEmotions.pkl", "wb"), -1)
print("save model", file=sys.stderr)
sys.stderr.flush()


def testing(self, img,*args):
if self.test_index % 10 == 0:
print(self.test_index, file=sys.stderr)
sys.stderr.flush()

if self.useModel == True and self.test_index == 0:
pkl_file = open(self.folder + "FacialEmotions.pkl", "rb")
self.model = pickle.load(pkl_file)
print("load model", file=sys.stderr)
sys.stderr.flush()

elif self.test_index == 0:
self.training2()

self.test_index += 1

newImg = self.newImage(img)

test = []
test.append(newImg)
test = np.array(test, dtype=np.float32)
test /= 255.0

prediction = self.model(test, None, train=False)

return prediction.data[0]

if __name__ == "__main__":

fe = FacialEmotions()
N = int(input())
for i in range(N):
S = int(input())
imageData = []
for j in range(S):
imageData.append(int(input()))
emotions = []
for j in range(7):
emotions.append(float(input()))
ret = fe.training(imageData, emotions)
print(ret)
sys.stdout.flush()
if ret == 1:
break

M = int(input())
for i in range(M):
S = int(input())
imageData = []
for j in range(S):
imageData.append(int(input()))
emotions = fe.testing(imageData)
for emotion in emotions:
print(emotion)
sys.stdout.flush()