EvbCFfp1XB

problem and my answer.


yowaさんのスコアだと、0.905位。上位から4%ほど悪いらしい。

Approach
  • コインが200以下で始まった時、即終了。(1回くらいnotePlayしても良かったかも?)
  • コインが最初の25%以下になったら終了。(kelly criterion より多く賭けているとき終了というのを入れていた。そして、終了の時のコインの枚数を見ると6枚とかで、kelly criterionより全然少なく賭けていることがわかった。そしたら、最初のコインの枚数から大きく減るはずがないので、これを入れておいた。)
  • notePlay する確率 1から0に線形に減らす。(乱数使いたくなかった。)
    • notePlay することにしても、期待値+ボーナスが最も良いマシーンが40回以上 notePlay してたら、 quickPlay に変える。
    • quickPlay することにしても、期待値が1以下なら notePlay に変える。
  • リールは重複除去して覚える。(Testerで調べると、長さが30のリールでも、重複除去してもだいたい28〜30パターン残ったので、これで充分だと思った。)
  • ボーナス = 2 * Math.pow((40 - 選んだマシーンの現在のnotePlayの回数) / 40, 2) (UCBから変えた。)

強化学習したかった。



source code

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;

public class BrokenSlotMachines {
private XorShift rng = new XorShift(System.nanoTime());

private int coins;
private int maxTime;
private int noteTime;
private int numMachines;

private int startCoins;

private ArrayList<Character>[][] wheels;
private HashSet<String>[][] wheelPatternSet;

private int[] countNote;
private double[] expected;

public int playSlots(int coins, int maxTime, int noteTime, int numMachines) {
init(coins, maxTime, noteTime, numMachines);

solve();

Utils.debug("coins", this.coins);
return 0;
}

private void solve() {
for (int time = 0; time < maxTime;) {
if (coins <= 0.25 * startCoins) {
break;
}

calculateExpected();

int bestMachine = -1;

boolean isExplore = isExplore(time);
if (isExplore) {

double[] evPlusBonus = new double[numMachines];
for (int i = 0; i < evPlusBonus.length; i++) {
evPlusBonus[i] = expected[i] + bonus(i);
}
bestMachine = argMax(evPlusBonus);
if (countNote[bestMachine] >= 40) {
isExplore = false;
bestMachine = argMax(expected);
}

} else {
bestMachine = argMax(expected);
if (expected[bestMachine] < 1) {

isExplore = true;
double[] evPlusBonus = new double[numMachines];
for (int i = 0; i < evPlusBonus.length; i++) {
evPlusBonus[i] = expected[i] + bonus(i);
}
bestMachine = argMax(evPlusBonus);
}
}

if (coins <= 200) {
if (expected[argMax(expected)] > 1.1) {

} else {
break;
}
}

coins--;

int value;
String[] notePlay;
boolean useNote = isExplore;
useNote &= time + noteTime < maxTime;
if (useNote) {
notePlay = PlaySlots.notePlay(bestMachine, 1);
value = Integer.parseInt(notePlay[0]);
for (int c = 0; c < 3; c++) {
StringBuilder pattern = new StringBuilder();
for (int r = 0; r < 3; r++) {
pattern.append(notePlay[1].charAt(r * 3 + c));
}
if (wheelPatternSet[bestMachine][c].add(pattern.toString())) {
for (int r = 0; r < 3; r++) {
wheels[bestMachine][c].add(notePlay[1].charAt(r * 3 + c));
}
}
}
countNote[bestMachine]++;
} else {
value = PlaySlots.quickPlay(bestMachine, 1);
}
coins += value;
if (useNote) {
time += noteTime;
} else {
time++;
}
}

calculateExpected();

Utils.debug("expected", toString(expected));
}

private String[] toString(double[] expected2) {
String[] s = new String[expected2.length];
for (int i = 0; i < s.length; i++) {
s[i] = String.format("%.2f", expected2[i]);
}
return s;
}

private static int argMax(double[] values) {
int maxIndex = -1;
double max = -1e99;
for (int i = 0; i < values.length; i++) {
if (values[i] > max) {
max = values[i];
maxIndex = i;
}
}
return maxIndex;
}

private boolean isExplore(int time) {
double nextDouble = rng.nextDouble();
double range = updateRange(1.0, (double) 0, (double) time, (double) 0, (double) (maxTime - 1), 1.0);
return nextDouble < range;
}

private double bonus(int machine) {
if (countNote[machine] > 40) {
return 0;
}
return updateRange(2.0, 0, countNote[machine], 0, 40, 2.0);
}

private double updateRange(double startRange, double endRange, double time, double startTime, double endTime, double exp) {
if (time > endTime) {
return 0;
}
return endRange + (startRange - endRange) * Math.pow((endTime - time) / (endTime - startTime), exp);
}

private int count(ArrayList<Character> s, char c) {
int ret = 0;
for (int i = 0; i < s.size(); i++) {
if (s.get(i) == c)
ret++;
}
return ret;
}

private void calculateExpected() {
int[] payouts = new int[numMachines];
expected = new double[numMachines];
for (int i = 0; i < numMachines; i++) {
payouts[i] += count(wheels[i][0], 'A') * count(wheels[i][1], 'A') * count(wheels[i][2], 'A') * 1000;
payouts[i] += count(wheels[i][0], 'B') * count(wheels[i][1], 'B') * count(wheels[i][2], 'B') * 200;
payouts[i] += count(wheels[i][0], 'C') * count(wheels[i][1], 'C') * count(wheels[i][2], 'C') * 100;
payouts[i] += count(wheels[i][0], 'D') * count(wheels[i][1], 'D') * count(wheels[i][2], 'D') * 50;
payouts[i] += count(wheels[i][0], 'E') * count(wheels[i][1], 'E') * count(wheels[i][2], 'E') * 20;
payouts[i] += count(wheels[i][0], 'F') * count(wheels[i][1], 'F') * count(wheels[i][2], 'F') * 10;
payouts[i] += count(wheels[i][0], 'G') * count(wheels[i][1], 'G') * count(wheels[i][2], 'G') * 5;
if (payouts[i] == 0) {
expected[i] = 0;
continue;
}
expected[i] = 1.0 * payouts[i] / wheels[i][0].size() / wheels[i][1].size() / wheels[i][2].size();
}
}

private void init(int coins, int maxTime, int noteTime, int numMachines) {
this.coins = coins;
this.startCoins = coins;
this.maxTime = maxTime;
this.noteTime = noteTime;
this.numMachines = numMachines;
wheels = new ArrayList[numMachines][3];
for (int i = 0; i < wheels.length; i++) {
for (int j = 0; j < 3; j++) {
wheels[i][j] = new ArrayList<Character>();
}
}

wheelPatternSet = new HashSet[numMachines][3];
for (int i = 0; i < wheelPatternSet.length; i++) {
for (int j = 0; j < 3; j++) {
wheelPatternSet[i][j] = new HashSet<String>();
}
}

countNote = new int[numMachines];
Utils.debug("coins", coins, "maxTime", maxTime, "noteTime", noteTime, "numMachines", numMachines);
}

public static void main(String[] args) {
try {
BR.br = new BufferedReader(new InputStreamReader(System.in));

int coins = BR.readInt();
int maxTime = BR.readInt();
int noteTime = BR.readInt();
int numMachines = BR.readInt();

BrokenSlotMachines pw = new BrokenSlotMachines();
int ret = pw.playSlots(coins, maxTime, noteTime, numMachines);

System.out.println(ret);
System.out.flush();
} catch (Exception e) {
e.printStackTrace();
}
}
}

final class Utils {
private Utils() {
}

public static final void debug(Object... o) {
System.err.println(toString(o));
}

public static final String toString(Object... o) {
return Arrays.deepToString(o);
}

}

class XorShift {
private int w = 88675123;
private int x = 123456789;
private int y = 362436069;
private int z = 521288629;

public XorShift(long l) {
x = (int) l;
}

public int nextInt() {
final int t = x ^ (x << 11);
x = y;
y = z;
z = w;
w = w ^ (w >>> 19) ^ (t ^ (t >>> 8));
return w;
}

public long nextLong() {
return ((long) nextInt() << 32) ^ (long) nextInt();
}

public double nextDouble() {
return (nextInt() >>> 1) * 4.6566128730773926E-10;
}

public int nextInt(int n) {
return (int) (n * nextDouble());
}

}



Rank 61
Score 9991409128


Approach 行、列をランダムに、高さを小さい方から調べて良さそうなのに変えるSA。スコアは差の絶対値より差の2乗の方が良かった。


source code

https://future-contest-2018-qual.contest.atcoder.jp/submissions/2104810

このページのトップヘ