Minmax algorithm chooses the worst move for some depths
I have to code a player for the FloodWars, the game on Android. So far I succeeded in coding a minmax and a material evaluator, which is straightforward.
My problem is that on a specific table, on odd depths greater than 9, it gives the worst move. Here is the table:
@@**..**@@.#@@.##
@@**..**@@.#@@.##
@@**..**@@.#@@.##
@@**..**@@.#@@.##
@@**..**@@.#@@.##
@@**..**@@.#@@.##
@@**..**@@.#@@.##
@@**..**@@.#@@.##
@@**..**@@.#@@.##
@@**..**@@.#@@.##
@@**..**@@.#@@.##
@@**..**@@.#@@.##
@@**..**@@.#@@.##
The colors are replaced by: '@', '#', '+', '.', '*'
. Here is the output on specified depths:
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
On all depths smaller than 11 it gives the expected output:
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
Here is some pieces of the code, the important ones:
int minmaxMin(int lvl, board *b)
{
if(lvl == 0)
{
return evalM(b, g_me, g_he);
}
board cpB;
moves M = getMoves(b, g_me, g_he);
int score = 1000000;
int i;
for(i = 0; i < 3; i++)
{
copyBoard(b, &cpB);
memset(was, 0, MAX_L * MAX_C);
floodfill_ch(&cpB, g_he.l, g_he.c, b->d[g_he.l][g_he.c], M.m[i]);
score = min(score, minmaxMax(lvl - 1, &cpB));
}
return score;
}
int minmaxMax(int lvl, board *b)
{
if(lvl == 0)
{
return evalM(b, g_me, g_he);
}
board cpB;
moves M = getMoves(b, g_me, g_he);
int score = -1000000;
for(int i = 0; i < 3; i++)
{
copyBoard(b, &cpB);
memset(was, 0, MAX_L * MAX_C);
floodfill_ch(&cpB, g_me.l, g_me.c, b->d[g_me.l][g_me.c], M.m[i]);
score = max(score, minmaxMin(lvl - 1, &cpB));
}
return score;
}
I swapped the arguments of the evalM
, how @WeatherVane suggested, but nothing had happened at all. I printed on stdout the result of the child of the root and the result(scores) were different, but the program chose the worst one.
Here is the debug screen for program with arguments swapped:
scr: 0:
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
scr: 0:
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
scr: -26:
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
And this is the output without arguments being swapped:
scr: 39:
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
++**..**@@.#@@.##
scr: 26:
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
..**..**@@.#@@.##
scr: 39:
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
****..**@@.#@@.##
The score is the score marked on the children of the root.
Here is the entire code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define DEBUG
#ifndef DEBUG
#define fin stdin
#define fout stdout
#else
FILE *fin, *fout, *tty;
#endif
#define MAX_L 50
#define MAX_C 50
typedef struct
{
char d[MAX_L][MAX_C];
int l, c;
}board;
typedef struct
{
int l, c;
}player;
typedef struct
{
char m[3];
}moves;
int max(int a, int b)
{
return a > b ? a : b;
}
int min(int a, int b)
{
return a < b ? a : b;
}
//utilities
void readBoard(FILE *fin, board *b, player* me, player* he);
void printBoard(FILE *fout, board *b);
//eval
int evalM(board* b, player me, player he);
//search
int minmaxMin(int lvl, board *b);
int minmaxMax(int lvl, board *b);
int negamax(int lvl, board *b, char col);
int alphabetaMin(int lvl, board *bo, int a, int b);
int alphabetaMax(int lvl, board *bo, int a, int b);
void floodfill_ch(board *b, int l, int c, char from, char to);
player g_me, g_he;
char was[MAX_L][MAX_C];
//moves
moves getMoves(board *b, player me, player he)
{
char pM [] = {'@', '#', '+', '.', '*'};
moves M;
int i, j = 0;
//printf("inside getMoves (%c, %c): ", b->d[me.l][me.c], b->d[he.l][he.c]);
for(i = 0; i < 5; i++)
if(pM[i] != b->d[me.l][me.c] && pM[i] != b->d[he.l][he.c])
M.m[j++] = pM[i]/*, printf("'%c' ", pM[i])*/;
//printf("n");
return M;
}
void copyBoard(board *src, board *dst)
{
dst -> l = src -> l;
dst -> c = src -> c;
memcpy(dst -> d, src -> d, MAX_C * MAX_L);
}
int main()
{
#ifdef DEBUG
fin = fopen("in", "r");
fout = fopen("out", "w");
tty = fopen("debugyline", "w");
#endif
board currentState;
board cpB;
board winner;
player me, he;
int score;
readBoard(fin, ¤tState, &me, &he);
g_me = me, g_he = he;
printf("reported: (%d %d) (%d %d)n", me.l, me.c, he.l, he.c);
printf("reported score: %dn", evalM(¤tState, me, he));
printBoard(stdout, ¤tState);
moves M = getMoves(¤tState, g_me, g_he);
printf("possible moves: '%c' '%c' '%c';n", M.m[0], M.m[1], M.m[2]);
score = -1000000;
for(int i = 0; i < 3; i++)
{
copyBoard(¤tState, &cpB);
memset(was, 0, MAX_L * MAX_C);
floodfill_ch(&cpB, g_me.l, g_me.c, cpB.d[g_me.l][g_me.c], M.m[i]);
int x = minmaxMin(10, &cpB);
#ifdef DEBUG
fprintf(stdout, " scr: %d:n", x, evalM(&cpB, g_he, g_me));
printBoard(stdout, &cpB);
fprintf(stdout, "nnnnn");
#endif
if(x > score)
{
score = x;
copyBoard(&cpB, &winner);
}
}
printf("nn");
printf("minimax scored at: %dn", score);
printBoard(stdout, &winner);
#ifdef DEBUG
fclose(fin);
fclose(fout);
fclose(tty);
#endif
return 0;
}
//search
//--
void floodfill_ch(board *b, int l, int c, char from, char to)
{
if(l >= 0 && l < b->l && c >= 0 && c < b->c && !was[l][c] && b->d[l][c] == from)
{
b->d[l][c] = to;
was[l][c] = 1;
char ldir[] = {0, 0, -1, 1},
cdir[] = {1, -1, 0, 0};
for(int i = 0; i < 4; i++)
floodfill_ch(b, l + ldir[i], c + cdir[i], from, to);
}
}
//--
//minmax (mm123)
int minmaxMin(int lvl, board *b)
{
if(lvl == 0)
{
#ifdef DEBUG
fprintf(tty, "scr = %d: n", evalM(b, g_he, g_me));
//printBoard(tty, b);
//fprintf(tty, "nnnnn");
#endif
return evalM(b, g_he, g_me);
}
board cpB;
moves M = getMoves(b, g_me, g_he);
int score = 1000000;
int i;
for(i = 0; i < 3; i++)
{
copyBoard(b, &cpB);
memset(was, 0, MAX_L * MAX_C);
floodfill_ch(&cpB, g_he.l, g_he.c, b->d[g_he.l][g_he.c], M.m[i]);
score = min(score, minmaxMax(lvl - 1, &cpB));
}
return score;
}
int minmaxMax(int lvl, board *b)
{
if(lvl == 0)
{
#ifdef DEBUG
fprintf(tty, "scr = %d: n", evalM(b, g_he, g_me));
printBoard(tty, b);
fprintf(tty, "nnnnn");
#endif
return evalM(b, g_me, g_he);
}
board cpB;
moves M = getMoves(b, g_me, g_he);
int score = -1000000;
for(int i = 0; i < 3; i++)
{
copyBoard(b, &cpB);
memset(was, 0, MAX_L * MAX_C);
floodfill_ch(&cpB, g_me.l, g_me.c, b->d[g_me.l][g_me.c], M.m[i]);
/*
#ifdef DEBUG
fprintf(tty, "%d:n", lvl - 1);
printBoard(tty, &cpB);
fprintf(tty, "nnnnn");
#endif
*/
score = max(score, minmaxMin(lvl - 1, &cpB));
}
return score;
}
// eval (ev456)
int sum;
void floodfill(board *b, int l, int c, char src)
{
if(l >= 0 && l < b->l && c >= 0 && c < b->c && !was[l][c] && b->d[l][c] == src)
{
sum ++;
was[l][c] = 1;
char ldir[] = {0, 0, -1, 1},
cdir[] = {1, -1, 0, 0};
for(int i = 0; i < 4; i++)
floodfill(b, l + ldir[i], c + cdir[i], src);
}
}
int evalM(board* b, player me, player he)
{
int s_me, s_he;
sum = 0;
memset(was, 0, MAX_L * MAX_C);
floodfill(b, me.l, me.c, b->d[me.l][me.c]);
s_me = sum;
sum = 0;
memset(was, 0, MAX_L * MAX_C);
floodfill(b, he.l, he.c, b->d[he.l][he.c]);
s_he = sum;
return s_me - s_he;
}
// I/O => board (iob1234)
void readBoard(FILE *fin, board *b, player* me, player* he)
{
int l, c;
char ch, t;
player pJ, pS;
t = fgetc(fin);
fscanf(fin," ");
ch = fgetc(fin);
l = 0;
while(ch != EOF && ch != 'n')
{
c = 0;
while(ch != 'n' && ch != ' ' && ch != EOF)
{
b->d[l][c] = ch;
ch = fgetc(fin);
c++;
}
ch = fgetc(fin);
l++;
}
b->l = l;
b->c = c;
pJ.l = l-1, pJ.c = 0;
pS.l = 0, pS.c = c-1;
if(t == 'J')
*me = pJ, *he = pS;
else if(t == 'S')
*me = pS, *he = pJ;
#ifdef DEBUG
else printf("debug: (error) type of player broken; exiting...n"), exit(-1);
#endif
}
void printBoard(FILE *fout, board *b)
{
int l, c;
for(l = 0; l < b->l; l++)
{
fprintf(fout, " ");
for(c = 0; c < b->c; c++)
fprintf(fout, "%c", b->d[l][c]);
fprintf(fout, "n");
}
}
The call for minmax algorithm is minmaxMin(10, &cpB);
上一篇: C ++ Negamax alpha
下一篇: Minmax算法选择一些深度的最差移动