我只是看了看源代码,但不幸的是(除非,希望我错了),他们似乎并没有让你就位访问特定位块的const & unsigned long
。如果他们这样做了,那么你可以执行模板递归,并且有效地比较每个unsigned long
而不是无符号长整数中的每一位。
毕竟,如果A < B
,那么不仅应该每个最高有效位a <= b
,也是每个最重要的块A[i] <= B[i]
。
我讨厌这么说,但我可能会用C++ 11的std::array
递归递归。如果你有权访问这些块,那么你可以做一个模板递归函数来做到这一点很容易(因为你确实知道你知道元编程),所以给编译器一个很好的机会来优化。
总而言之,这不是一个好的答案,但那就是我会做的。顺便说一下,这是一个很好的问题。
===========
编辑
这应该时间三种方法:将一个使用最新的upvotes,我描述区块策略和模板递归变种。我用bitsets填充矢量,然后使用指定的比较器函子重复排序。
快乐黑客!
输出我的电脑上:
RUNTIMES:
compiled g++ -std=c++11 -Wall -g test.cpp
std::bitset 4530000 (6000000 original in OP)
Block-by-block 900000
Template recursive 730000
compiled g++ -std=c++11 -Wall -g -O3 test.cpp
RUNTIMES:
std::bitset 700000 (740000 original in OP)
Block-by-block 470000
Template recursive 530000
C++代码11:
#include <iostream>
#include <bitset>
#include <algorithm>
#include <time.h>
/* Existing answer. Note that I've flipped the order of bit significance to match my own */
template<std::size_t N>
class BitByBitComparator
{
public:
bool operator()(const std::bitset<N>& x, const std::bitset<N>& y) const
{
for (int i = 0; i < N; ++i) {
if (x[i]^y[i]) return y[i];
}
return false;
}
};
/* New simple bit set class (note: mostly untested). Also note bad
design: should only allow read access via immutable facade. */
template<std::size_t N>
class SimpleBitSet
{
public:
static const int BLOCK_SIZE = 64;
static const int LOG_BLOCK_SIZE = 6;
static constexpr int NUM_BLOCKS = N >> LOG_BLOCK_SIZE;
std::array<unsigned long int, NUM_BLOCKS> allBlocks;
SimpleBitSet()
{
allBlocks.fill(0);
}
void addItem(int itemIndex)
{
// TODO: can do faster
int blockIndex = itemIndex >> LOG_BLOCK_SIZE;
unsigned long int & block = allBlocks[blockIndex];
int indexWithinBlock = itemIndex % BLOCK_SIZE;
block |= (0x8000000000000000 >> indexWithinBlock);
}
bool getItem(int itemIndex) const
{
int blockIndex = itemIndex >> LOG_BLOCK_SIZE;
unsigned long int block = allBlocks[blockIndex];
int indexWithinBlock = itemIndex % BLOCK_SIZE;
return bool((block << indexWithinBlock) & 0x8000000000000000);
}
};
/* New comparator type 1: block-by-block. */
template<std::size_t N>
class BlockByBlockComparator
{
public:
bool operator()(const SimpleBitSet<N>& x, const SimpleBitSet<N>& y) const
{
return ArrayCompare(x.allBlocks, y.allBlocks);
}
template <std::size_t S>
bool ArrayCompare(const std::array<unsigned long int, S> & lhs, const std::array<unsigned long int, S> & rhs) const
{
for (int i=0; i<S; ++i)
{
unsigned long int lhsBlock = lhs[i];
unsigned long int rhsBlock = rhs[i];
if (lhsBlock < rhsBlock) return true;
if (lhsBlock > rhsBlock) return false;
}
return false;
}
};
/* New comparator type 2: template recursive block-by-block. */
template <std::size_t I, std::size_t S>
class TemplateRecursiveArrayCompare;
template <std::size_t S>
class TemplateRecursiveArrayCompare<S, S>
{
public:
bool operator()(const std::array<unsigned long int, S> & lhs, const std::array<unsigned long int, S> & rhs) const
{
return false;
}
};
template <std::size_t I, std::size_t S>
class TemplateRecursiveArrayCompare
{
public:
bool operator()(const std::array<unsigned long int, S> & lhs, const std::array<unsigned long int, S> & rhs) const
{
unsigned long int lhsBlock = lhs[I];
unsigned long int rhsBlock = rhs[I];
if (lhsBlock < rhsBlock) return true;
if (lhsBlock > rhsBlock) return false;
return TemplateRecursiveArrayCompare<I+1, S>()(lhs, rhs);
}
};
template<std::size_t N>
class TemplateRecursiveBlockByBlockComparator
{
public:
bool operator()(const SimpleBitSet<N>& x, const SimpleBitSet<N>& y) const
{
return TemplateRecursiveArrayCompare<x.NUM_BLOCKS, x.NUM_BLOCKS>()(x.allBlocks, y.allBlocks);
}
};
/* Construction, timing, and verification code */
int main()
{
srand(0);
const int BITSET_SIZE = 4096;
std::cout << "Constructing..." << std::endl;
// Fill a vector with random bitsets
const int NUMBER_TO_PROCESS = 10000;
const int SAMPLES_TO_FILL = BITSET_SIZE;
std::vector<std::bitset<BITSET_SIZE> > allBitSets(NUMBER_TO_PROCESS);
std::vector<SimpleBitSet<BITSET_SIZE> > allSimpleBitSets(NUMBER_TO_PROCESS);
for (int k=0; k<NUMBER_TO_PROCESS; ++k)
{
std::bitset<BITSET_SIZE> bs;
SimpleBitSet<BITSET_SIZE> homemadeBs;
for (int j=0; j<SAMPLES_TO_FILL; ++j)
{
int indexToAdd = rand()%BITSET_SIZE;
bs[indexToAdd] = true;
homemadeBs.addItem(indexToAdd);
}
allBitSets[k] = bs;
allSimpleBitSets[k] = homemadeBs;
}
clock_t t1,t2,t3,t4;
t1=clock();
std::cout << "Sorting using bit-by-bit compare and std::bitset..." << std::endl;
const int NUMBER_REPS = 100;
for (int rep = 0; rep<NUMBER_REPS; ++rep)
{
auto tempCopy = allBitSets;
std::sort(tempCopy.begin(), tempCopy.end(), BitByBitComparator<BITSET_SIZE>());
}
t2=clock();
std::cout << "Sorting block-by-block using SimpleBitSet..." << std::endl;
for (int rep = 0; rep<NUMBER_REPS; ++rep)
{
auto tempCopy = allSimpleBitSets;
std::sort(tempCopy.begin(), tempCopy.end(), BlockByBlockComparator<BITSET_SIZE>());
}
t3=clock();
std::cout << "Sorting block-by-block w/ template recursion using SimpleBitSet..." << std::endl;
for (int rep = 0; rep<NUMBER_REPS; ++rep)
{
auto tempCopy = allSimpleBitSets;
std::sort(tempCopy.begin(), tempCopy.end(), TemplateRecursiveBlockByBlockComparator<BITSET_SIZE>());
}
t4=clock();
std::cout << std::endl << "RUNTIMES:" << std::endl;
std::cout << "\tstd::bitset \t" << t2-t1 << std::endl;
std::cout << "\tBlock-by-block \t" << t3-t2 << std::endl;
std::cout << "\tTemplate recursive \t" << t4-t3 << std::endl;
std::cout << std::endl;
std::cout << "Checking result... ";
std::sort(allBitSets.begin(), allBitSets.end(), BitByBitComparator<BITSET_SIZE>());
auto copy = allSimpleBitSets;
std::sort(allSimpleBitSets.begin(), allSimpleBitSets.end(), BlockByBlockComparator<BITSET_SIZE>());
std::sort(copy.begin(), copy.end(), TemplateRecursiveBlockByBlockComparator<BITSET_SIZE>());
for (int k=0; k<NUMBER_TO_PROCESS; ++k)
{
auto stdBitSet = allBitSets[k];
auto blockBitSet = allSimpleBitSets[k];
auto tempRecBlockBitSet = allSimpleBitSets[k];
for (int j=0; j<BITSET_SIZE; ++j)
if (stdBitSet[j] != blockBitSet.getItem(j) || blockBitSet.getItem(j) != tempRecBlockBitSet.getItem(j))
std::cerr << "error: sorted order does not match" << std::endl;
}
std::cout << "success" << std::endl;
return 0;
}
关于你的想法无关分支使用正确的bitshift:这将创建大量的中间对象,并且to_ullong将必须检查移位后的值是否适合每个检查的“无符号long long”,从而创建相当的s头顶上。我怀疑它会更快,但只有基准可以证明这一点。 –
复制std :: bitset的代码,重命名它,给它一种访问某个单词的方法。 –
@brianbeuning如果你正在复制代码,你可以简单地提供一个可以访问内部的'operator <'。 –