2013-06-26 44 views
0

为了提供上下文,我正在通过Programming Praxis Bingo Challenge工作,并希望看看我能够让代码运行得有多快。随机数组优化提示

static void fisher_yates(T& source) { 
    const size_t len = source.size(); 
    for(size_t i = 1; i < len;++i) { 
     std::swap(source[i],source[rand() % (i+1)]); 
    } 
} 
std::array<int,25> generate_table() { 
    std::array<int,25> bingo_grid; 
    for(int i = 0 ; i < 25;++i) { 
     switch(i) { 
     case 0: case 1: case 2: case 3: case 4: 
      bingo_grid[i] = rand() % 15 + 1; 
      break; 
     case 5: case 6: case 7: case 8: case 9: 
      bingo_grid[i] = rand() % 15 + 16; 
      break; 
     case 10: case 11: case 12: case 13: case 14: 
      bingo_grid[i] = rand() % 15 + 31; 
      break; 
     case 15: case 16: case 17: case 18: case 19: 
      bingo_grid[i] = rand() % 15 + 46; 
      break; 
     case 20: case 21: case 22: case 23: case 24: 
      bingo_grid[i] = rand() % 15 + 61; 
      break; 
     } 
    } 
    bingo_grid[12] = 0; 
    return bingo_grid; 
} 

bool is_bingoed(const std::array<int,25>& grid) { 
    // Check columns 
    if(grid[0] == 0) { 
     if(grid[1] == 0 && grid[2] == 0 && grid[3] == 0 && grid[4] == 0) 
      return true; 
     if(grid[0] == 0 && grid[6] == 0 && grid[18] == 0 && grid[24] == 0) 
      return true; 
     if(grid[5] == 0 && grid[10] == 0 && grid[15] == 0 && grid[20] == 0) 
      return true; 
    } 
    if(grid[1] == 0) { 
     if(grid[6] == 0 && grid[11] == 0 && grid[16] == 0 && grid[21] == 0) 
      return true; 
    } 
    if(grid[2] == 0) { 
     if(grid[7] == 0 && grid[17] == 0 && grid[22] == 0) 
      return true; 
    } 
    if(grid[3] == 0) { 
     if(grid[8] == 0 && grid[13] == 0 && grid[18] == 0 && grid[23] == 0) 
      return true; 
    } 
    if(grid[4] == 0) { 
     if(grid[9] == 0 && grid[14] == 0 && grid[19] == 0 && grid[24] == 0) 
      return true; 
     if(grid[8] == 0 && grid[16] == 0 && grid[21] == 0) 
      return true; 
    } 
    if(grid[6] == 0) { 
     if(grid[6] == 0 && grid[7] == 0 && grid[8] == 0 && grid[9] == 0) 
      return true; 
    } 
    if(grid[12] == 0) { 
     if(grid[10] == 0 && grid[11] == 0 && grid[13] == 0 && grid[14] == 0) 
      return true; 
    } 
    if(grid[18] == 0) { 
     if(grid[15] == 0 && grid[16] == 0 && grid[17] == 0 && grid[19] == 0) 
      return true; 
    } 
    return false; 
} 

static bool mark_card(const int card,std::array<int,25>& bingo_grid) { 
    for(auto &i : bingo_grid) 
     if(card == i) { 
      i = 0; 
      return true; 
     } 
     return false; 
} 

int play_game() { 
    // Bingo is 5 columns, each column(n) is random permutation of 1-15*n 
    // Fisher-Yates to generate random permutations 

    // Create 500 playing cards 
    const int max = 500; 
    std::vector<std::array<int,25>> bingo_cards; 
    bingo_cards.reserve(max); 
    for(int i = 0; i<max;++i) { 
     bingo_cards.push_back(generate_table()); 
     //display_bingo(bingo_cards[i]); 
    } 
    // Random shuffle 75 cards 
    auto iter = boost::counting_range(1,76); 
    std::vector<int> cards(std::begin(iter),std::end(iter)); 
    fisher_yates(cards); 
    bool is_finished = false; 
    int counter = 0; 
    for(auto card : cards) { 
     for(auto& playing_card : bingo_cards) { 
      if(mark_card(card,playing_card)) { 
       //display_bingo(playing_card); 
       if(is_bingoed(playing_card)) 
        return counter; 
      } 
     } 
     counter++; 
    } 
    return counter; 
} 
int bingo() { 
    srand(time(NULL)); 
    int total = 0; 
    for(int i = 0 ; i < 10000;i++) { 
     total+=play_game(); 
    } 
    boost::singleton_pool<boost::pool_allocator_tag, sizeof(int)>::release_memory(); 
    return total/10000; 
} 

原始版本使用boost :: multi_array来表示网格。分析后,我将其更改为一个std ::数组,这让我大大加快了速度。然后,我使用fisher_yates shuffle移动生成宾果卡,使用随机数生成器。 最后,我改变了is_bingoed测试函数,以减少每次调用的检查次数,以加速游戏结束检查。

这一切都有所帮助。现在,如果我分析此代码,generate_table函数占用72%的时间,mark_card()为18%,is_bingoed()约为6%。我在寻找提示,看看可以做些什么来提高速度。我首先想到的是is_bingoed()是使用SSE内在函数与0进行比较(也许使用XOR?),但我对generate_table()或mark_car()没有任何想法。这更多的是为了好玩而自我挑战,但是想知道别人怎么看?

目前的时机是它2GHz的Q6660需要4.6S(低于原来35秒)

+0

当你剖析你的代码''generate_table'在'rand'中花费了多少百分比? –

+0

大约40%花在兰特() – Ronnie

回答

1

只是集中于最昂贵的功能,generate_table,可以简化这部分代码,使之少枝,这可能有所帮助:

for(int i = 0 ; i < 25;++i) { 
    switch(i) { 
    case 0: case 1: case 2: case 3: case 4: 
     bingo_grid[i] = rand() % 15 + 1; 
     break; 
    case 5: case 6: case 7: case 8: case 9: 
     bingo_grid[i] = rand() % 15 + 16; 
     break; 
    case 10: case 11: case 12: case 13: case 14: 
     bingo_grid[i] = rand() % 15 + 31; 
     break; 
    case 15: case 16: case 17: case 18: case 19: 
     bingo_grid[i] = rand() % 15 + 46; 
     break; 
    case 20: case 21: case 22: case 23: case 24: 
     bingo_grid[i] = rand() % 15 + 61; 
     break; 
    } 
} 

eg

for(int i = 0 ; i < 25;++i) { 
    int r = rand() % 15 + 1; 
    bingo_grid[i] = r + (i/5) * 15; 
} 

除此之外,我会看一个更快的rand(),并看看你是否可以摆脱除法和模。

在另一个注释中,您的算法可能有缺陷,因为在bingo_grid中没有任何可以防止重复的数字。

+0

我同意兰德()的重复数字 - 谢谢。 – Ronnie

0

改变is_bingoed()方法使用SSE指令(使用Agner Fog's库)和Paul的r generate_table()的时间减少到仅仅1.05s。并使用Intel's fast_rand()函数将其降至0.38s。 所以我想我会粘贴代码更改为其他谁可能感兴趣。

static unsigned int g_seed; 


//Used to seed the generator. 
inline void fast_srand(int seed) 
{ 
    g_seed = seed; 
} 


//fastrand routine returns one integer, similar output value range as C lib. 
inline int fastrand() 
{ 
    g_seed = (214013*g_seed+2531011); 
    return (g_seed>>16)&0x7FFF; 

} 
bool is_bingoed(const std::array<int,25>& grid) { 
    // Check columns 
    Vec8i vec(grid[0],grid[1],grid[2],grid[3],grid[4],0,0,0); 
    Vec4i vec2(grid[6],grid[18],grid[24],0); 
    Vec4i vec3(grid[5],grid[10],grid[15],20); 
    Vec8i vec4(grid[1],grid[6],grid[11],grid[16],grid[21],0,0,0); 
    Vec4i vec5(grid[2],grid[7],grid[17],grid[22]); 
    Vec8i vec6(grid[3],grid[8],grid[13],grid[18],grid[23],0,0,0); 
    Vec8i vec7(grid[4],grid[9],grid[14],grid[19],grid[24],0,0,0); 
    Vec4i vec8(grid[8],grid[16],grid[21],grid[4]); 
    Vec4i vec9(grid[6],grid[7],grid[8],grid[9]); 
    Vec8i vec10(grid[12],grid[10],grid[11],grid[13],grid[14],0,0,0); 
    Vec8i vec11(grid[18],grid[15],grid[16],grid[17],grid[19],0,0,0); 
    if(horizontal_and(vec) && horizontal_and(vec2) && horizontal_and(vec3) && horizontal_and(vec4) && 
     horizontal_and(vec5) && horizontal_and(vec6) && horizontal_and(vec7) && horizontal_and(vec8)) { 
      return false; 
    } 
    if(horizontal_and(vec9) && horizontal_and(vec10) && horizontal_and(vec11)) { 
      return false; 
    } 
    return true; 
}