2011-08-10 63 views
0

我被要求在C++中重新实现散列。散列将用于集合。我拥有所有的代码。我只需要rehash功能。 rehash函数的目标是根据数据增加散列表的大小;所以,如果数据超过表的一半大小,应该增加表的大小(基于一些素数值),并在其中重新分配元素。我无法更改公共界面,但我可以更改私人部分。在C++中重新实现散列,rehash()方法中的问题

#ifndef HASHSET_H 
#define HASHSET_H 

// 
//  Hash table implementation of set 
//   (open addressing, quadratic hashing) 


#include <algorithm> 
#include <functional> 
#include <iterator> 
#include <vector> 
#include <iostream> 
#include <cassert> 


enum HashStatus { Occupied, Empty, Deleted }; 

template <class T> 
struct HashEntry 
{ 
    T data; 
    unsigned hash; 
    HashStatus info; 

    HashEntry(): info(Empty) {} 
    HashEntry(const T& v, unsigned h, HashStatus status) 
    : data(v), hash(h), info(status) {} 

private: 
    // Forbid copying of HashEntries 
    HashEntry(const HashEntry<T>&) {} 
    void operator=(const HashEntry<T>&) {} 
    // (If you think you need to copy these, you should consider 
    // copying pointers to them instead.) 
}; 


template <class T, class HashFun, class CompareEQ=std::equal_to<T> > 
class hash_set 
{ 
public: 
    typedef ptrdiff_t size_type; 
    typedef T key_type; 
    typedef T value_type; 


    hash_set(); 
    hash_set (const hash_set<T,HashFun,CompareEQ>&); 
    ~hash_set(); 

    const hash_set<T,HashFun,CompareEQ>& 
    operator= (const hash_set<T,HashFun,CompareEQ>&); 

    bool empty() const {return theSize == 0;} 
    size_type size() const {return theSize;} 

    void insert (const T& element); 

    unsigned count (const T& element) const; 

    void erase (const T& element); 

    void clear(); 


    class const_iterator 
    { 
    public: 
    typedef std::bidirectional_iterator_tag iterator_category; 
    typedef T       value_type; 
    typedef ptrdiff_t     difference_type; 
    typedef const T*     pointer; 
    typedef const T&     reference; 


    const_iterator& operator ++() 
     {++current; advance(); return *this;} 
    const_iterator operator ++(int) 
     { 
    const_iterator clone = this; 
    ++current; 
    advance(); 
    return clone; 
     } 

    const_iterator & operator --() 
     {--current; regress(); return *this;} 
    const_iterator operator --(int) 
     {const_iterator clone = this; 
     --current; 
     regress(); 
     return clone;} 

    reference operator *() { return (*current)->data; } 
    pointer operator ->() { return &((*current)->data); } 

    bool operator == (const const_iterator & rhs) const 
     { return current == rhs.current; } 
    bool operator != (const const_iterator & rhs) const 
     { return current != rhs.current; } 

    protected: 
    typename std::vector<HashEntry<T>*>::const_iterator current; 
    const std::vector<HashEntry<T>*>* v; 

    const_iterator (typename std::vector<HashEntry<T>*>::const_iterator p, 
      const std::vector<HashEntry<T>*>& theVector) 
     : current(p), v(&theVector) {} 

    void advance() 
     { 
    while (current != v->end() 
      && ((*current) == 0 
      || (*current)->info == Deleted 
      || (*current)->info == Empty)) 
     ++current; 
     } 

    void regress() 
     { 
    while (current != v->begin() 
      && ((*current) == 0 
      || (*current)->info == Deleted 
      || (*current)->info == Empty)) 
     --current; 
     } 

    friend class hash_set<T,HashFun,CompareEQ>; 
    }; 

    typedef const_iterator iterator; 

    const_iterator begin() const 
    { 
     const_iterator i = const_iterator(table.begin(), table); 
     i.advance(); 
     return i; 
    } 
    const_iterator end() const {return const_iterator(table.end(), table);} 

    iterator begin() 
    {iterator i = iterator(table.begin(), table); 
    i.advance(); 
    return i; 
    } 
    iterator end() {return iterator(table.end(), table);} 

    const_iterator find (const T& element) const; 
    iterator find (const T& element); 

    void printStats(std::ostream& out) const 
    { 
    using namespace std; 
    out << "size: " << theSize << " hSize: " << hSize << endl; 
    }  
private: 
    static unsigned primes[13]; 


    unsigned find (const T& element, unsigned h0) const; 
    void rehash(); 

    std::vector<HashEntry<T>*> table; 
    int theSize; 
    unsigned hSize; 
    unsigned hSize_index; 
    HashFun hash; 
    CompareEQ compare; 
}; 

template <class T, class Hash1, class Hash2, class Equals1, class Equals2> 
bool operator== (const hash_set<T,Hash1,Equals1>& left, 
     const hash_set<T,Hash2,Equals2>& right) 
{ 
    if (left.size() == right.size()) 
    { 
     for (typename hash_set<T,Hash1,Equals1>::const_iterator p = left.begin(); 
     p != left.end(); ++p) 
    { 
     if (right.count(*p) == 0) 
     return false; 
    } 
     return true; 
    } 
    else 
    return false; 
} 



// Table of prime numbers: Use these for table sizes so that 
// quadratic probing will succeed. 
template <class T, class HashFun, class CompareEQ> 
unsigned hash_set<T,HashFun,CompareEQ>::primes[13] = 
{5, 13, 31, 61, 127, 521, 1279, 2281, 3217, 
9689, 19937, 44497, 86243}; 





template <class T, class HashFun, class CompareEQ> 
hash_set<T,HashFun,CompareEQ>:: 
hash_set(): theSize(0), hSize_index(0) 
{ 
    // hash table size is the hsize_Index_th prime number in the primes table 
    hSize = primes[hSize_index]; 
    table.resize(hSize); 

    // Fill table with null pointers to strt with 
    fill (table.begin(), table.end(), (HashEntry<T>*)0); 
} 

template <class T, class HashFun, class CompareEQ> 
hash_set<T,HashFun,CompareEQ>:: 
    hash_set (const hash_set<T,HashFun,CompareEQ>& hs) 
    : hSize(hs.hSize), hSize_index(hs.hSize_index), theSize(0) 
{ 
    table.resize(hSize); 
    for (unsigned i = 0; i < hSize; ++i) 
    if ((hs.table[i] != 0) && (hs.table[i]->info == Occupied)) 
     insert (hs.table[i]->data); 
} 

template <class T, class HashFun, class CompareEQ> 
hash_set<T,HashFun,CompareEQ>:: 
    ~hash_set() 
{ 
    clear(); 
} 


template <class T, class HashFun, class CompareEQ> 
const hash_set<T,HashFun,CompareEQ>& 
hash_set<T,HashFun,CompareEQ>:: 
    operator= (const hash_set<T,HashFun,CompareEQ>& hs) 
{ 
    if (this != &hs) 
    { 
     clear(); 
     for (unsigned i = 0; i < hs.hSize; ++i) 
     if ((hs.table[i] != 0) && (hs.table[i]->info == Occupied)) 
      insert (hs.table[i]->data); 
    } 
    return *this; 
} 


template <class T, class HashFun, class CompareEQ> 
void hash_set<T,HashFun,CompareEQ>:: 
insert (const T& element) 
{ 
    unsigned h0 = hash(element); 

    // Is this item already in the table? 
    unsigned h = find(element, h0); 
    if (h == hSize) 
    { 
     // No. Find an empty or deleted slot to put it into. 
     unsigned count = 0; 
     h = h0 % hSize; 
     while (table[h] != 0 
     && table[h]->info == Occupied 
     && count < hSize) 
    { 
     ++count; 
     h = (h0 + count*count) % hSize; 
    } 

     assert (count < hSize); // could not insert (table filled?) 

     delete table[h]; 
     table[h] = new HashEntry<T>(element, h0, Occupied); 
     ++theSize; 
    } 
    else 
    { // Yes. Replace the data field. 
     table[h]->data = element; 
    } 

    // Is the table half full or more? If so, increase the table size 
    // and redistribute the data. 
    if (2*theSize >= hSize) 
    rehash(); 
} 


template <class T, class HashFun, class CompareEQ> 
void hash_set<T,HashFun,CompareEQ>:: 
rehash() 
{ 
    //*** It should increase the size of 
    //*** the hash table to primes[hSize_index+1], redistributing the 
    //*** entries already in the table to their new positions. 
} 


template <class T, class HashFun, class CompareEQ> 
unsigned hash_set<T,HashFun,CompareEQ>:: 
count (const T& element) const 
{ 
    unsigned h0 = hash(element); 
    unsigned h = find(element, h0); 
    return (h != hSize) ? 1 : 0; 
} 




template <class T, class HashFun, class CompareEQ> 
void hash_set<T,HashFun,CompareEQ>:: 
erase (const T& element) 
{ 
    unsigned h0 = hash(element); 
    unsigned h = find(element, h0); 
    if (h != hSize) 
    { 
     table[h]->info = Deleted; 
     --theSize; 
    } 
} 


template <class T, class HashFun, class CompareEQ> 
void hash_set<T,HashFun,CompareEQ>:: 
clear() 
{ 
    for (unsigned i = 0; i < hSize; ++i) 
    { 
     delete table[i]; 
     table[i] = 0; 
    } 
    theSize = 0; 
} 



template <class T, class HashFun, class CompareEQ> 
unsigned hash_set<T,HashFun,CompareEQ>:: 
find (const T& element, unsigned h0) const 
{ 
    unsigned h = h0 % hSize; 
    unsigned count = 0; 

    // Search for element, stopping at any empty slot 
    // or when we've searched the whole table 
    while (table[h] != 0 && 
    (table[h]->info == Deleted || 
     (table[h]->info == Occupied 
     && (!compare(table[h]->data, element)))) 
    && count < hSize) 
    { 
     ++count; 
     h = (h0 + count*count) % hSize; 
    } 

    if (count >= hSize 
     || table[h] == 0 
     || table[h]->info == Empty) 
    return hSize; // didn't find element 
    else 
    return h;  // found it 
} 


template <class T, class HashFun, class CompareEQ> 
typename hash_set<T,HashFun,CompareEQ>::const_iterator 
hash_set<T,HashFun,CompareEQ>:: 
find (const T& element) const 
{ 
    unsigned h0 = hash(element); 
    unsigned h = find(element, h0); 
    if (h == hSize) 
    return end(); 
    else 
    return const_iterator(table.begin()+h, table); 
} 


template <class T, class HashFun, class CompareEQ> 
typename hash_set<T,HashFun,CompareEQ>::iterator 
hash_set<T,HashFun,CompareEQ>:: 
find (const T& element) 
{ 
    unsigned h0 = hash(element); 
    unsigned h = find(element, h0); 
    if (h == hSize) 
    return end(); 
    else 
    return iterator(table.begin()+h, table); 
} 





#endif 
+0

这里有问题吗?您需要使用您描述的方法完全按照您描述的方式编写rehash函数的主体。 –

+0

是的,我知道。我的问题是,当我调整矢量大小并且我想重新分配时,我会得到每个元素的两个瞬间。我不知道如何重新分配。如果我想在调整大小之前创建另一个副本,然后插入该副本的元素形式,则看起来会出现循环问题。我不知道如何重新分配,我应该在什么时候调用调整大小? – justin

回答

0

您创建所需大小的从后插入HashEntry s转换前在其相应的索引的新载体,然后迭代过电流table。 我注意到这个类在成员HashEntry::hash中保留了完整的散列码,所以不需要重新计算对象哈希值。 然后,您需要用新矢量替换table

请记住使用std::vector::swap来避免不必要的幕后分配和std::vector中的复制。

除了在hSizehSize_index的管家,我认为就是这样。

有一种重新分配现有的table内的条目的方式,我想你可能会想知道这一点。我不确定这样做有什么好处。

+0

PS:它看起来像你的'hash_set'是碰撞脆弱的。我没有看到任何方法来存储所有在表格中“击中”相同索引的条目。在大多数应用程序中,这是一个致命的缺陷,并且考虑到您调整表格的大小会与顺序有关。再次,这通常是致命的。 – Persixty