MurMurHash3有没有德尔福的实现？

是否有MurMurHash 3的Delphi实现？我试着自己实现它，但我的实现实际上是MurMurHash2慢。这是正常的吗？有没有其他的实现？MurMurHash3有没有德尔福的实现？

这是我的：

function MurMur3_32(const S: AnsiString; const Seed: LongWord=$9747b28c): LongWord; 
const 
    c1 = $cc9e2d51; 
    c2 = $1b873593; 
    r1 = 15; 
    r2 = 13; 
    m = 5; 
    n = $e6546b64; 
var 
    hash: LongWord; 
    len: LongWord; 
    k, k2: LongWord; 
    data: Integer; 
begin 
    Hash := Seed; 

    len := Length(S); 

    //The default seed, $9747b28c, is from the original C library 

    // Initialize the hash to a 'random' value 
    hash := seed xor len; 

    // Mix 4 bytes at a time into the hash 
    data := 1; 

    while(len >= 4) do 
    begin 
     k := PLongWord(@S[data])^; 

     k := k*c1; 
     k := (k shl r1) or (k shr (32 - r1)); 
     k := k*c2; 


     hash := hash xor k; 
     hash := ((hash shl r2) or (hash shr (32 - r2))) * m + n; 

     Inc(Data, 4); 
     Dec(len, 4); 
    end; 

    k2 := 0; 

    { Handle the last few bytes of the input array 
      S: ... $69 $18 $2f 
    } 
    Assert(len <= 3); 
    if len = 3 then 
     k2 := k2 xor (LongWord(s[data+2]) shl 16); 
    if len >= 2 then 
     k2 := k2 xor (LongWord(s[data+1]) shl 8); 
    if len >= 1 then 
    begin 
     k2 := k2 xor (LongWord(s[data])); 
     k2 := k2 * c1; 
     k2 := (k2 shl r1) or (k2 shr (32 - r1)); 
     k2 := k2 * c2; 
     hash := hash xor k2; 
    end; 

    // Do a few final mixes of the hash to ensure the last few 
    // bytes are well-incorporated. 
    len := Length(S); 

    hash := hash xor len; 
    hash := hash xor (hash shr 16); 
    hash := hash * $85ebca6b; 
    hash := hash xor (hash shr 13); 
    hash := hash * $c2b2ae35; 
    hash := hash xor (hash shr 16); 


    Result := hash; 

end;

声明：我不知道，如果Seed值是正确的。

来源

2015-06-19 Alex

这个问题可能是关闭的话题和不清楚你问什么。我的主要评论虽然会想知道为什么你想要在字符串中存储二进制数据？使用一个字节数组。 –

我为了尝试实现而使用了字符串......每个人都适应它以满足他的需求。我需要散列字符串......你对这个问题不了解吗？ – Alex

我理解这个问题，但是由于你问其他图书馆并且不清楚（技术术语），因此我理解这个问题，因为你提出了一个模糊的“可以改进”的问题，因此它是脱离主题的。同样，为什么把二进制数据放入一个字符串？如果字符串进行编码转换会发生什么？ –

我在https://github.com/JBontes/FastCode

在我FastDefaults单元中的murmurhash3实现下面是Murmurhash3的源代码：

{$pointermath on} 
function MurmurHash3(const [ref] HashData; Len: integer; Seed: integer = 0): integer; 
const 
    c1 = $CC9E2D51; 
    c2 = $1B873593; 
    r1 = 15; 
    r2 = 13; 
    m = 5; 
    n = $E6546B64; 
    f1 = $85EBCA6B; 
    f2 = $C2B2AE35; 
{$IFDEF purepascal} 
var 
    i, Len2: integer; 
    k: cardinal; 
    remaining: cardinal; 
    Data: PCardinal; 
label 
    case1, case2, case3, final; 
begin 
    Result:= Seed; 
    Data:= @HashData; 
    for i:= 0 to (Len shr 2) - 1 do begin 
    k:= Data[i]; 
    k:= k * c1; 
    k:= (k shl r1) or (k shr (32 - r1)); 
    k:= k * c2; 
    Result:= Result xor k; 
    Result:= (Result shl r2) or (Result shr (32 - r2)); 
    Result:= Result * m + n; 
    end; {for i} 
    Len2:= Len; 
    remaining:= 0; 
    case Len and $3 of 
    1: goto case1; 
    2: goto case2; 
    3: goto case3; 
    else goto final; 
    end; 
case3: 
    dec(Len2); 
    inc(remaining, PByte(Data)[Len2] shl 16); 
case2: 
    dec(Len2); 
    inc(remaining, PByte(Data)[Len2] shl 8); 
case1: 
    dec(Len2); 
    inc(remaining, PByte(Data)[Len2]); 
    remaining:= remaining * c1; 
    remaining:= (remaining shl r1) or (remaining shr (32 - r1)); 
    remaining:= remaining * c2; 
    Result:= Result xor remaining; 
final: 
    Result:= Result xor Len; 

    Result:= Result xor (Result shr 16); 
    Result:= Result * f1; 
    Result:= Result xor (Result shr 13); 
    Result:= Result * f2; 
    Result:= Result xor (Result shr 16); 
end; 
{$ELSE} 
{$REGION 'asm'} 
{$IFDEF CPUx86} 
    asm 
    push EBX 
    push EDI 
    push ESI 
    xchg ECX,EDX 
    //EAX = data 
    //ECX = count in bytes 
    //EDX = seed 
    mov ESI,ECX 
    shr ECX,2 
    jz @remaining_bytes 
    @loop: 
    mov EDI,[EAX] 
    imul EDI,EDI,c1 
    rol EDI,r1 
    imul EDI,EDI,c2 
    xor EDX,EDI 
    rol EDX,r2 
    lea EDX,[EDX*4+EDX+n] 
    lea EAX,[EAX+4] 
    dec ECX 
    jnz @loop 
    @remaining_bytes: 
    mov ECX,ESI 
    and ECX,$3 
    jz @finalization 
    xor EBX,EBX 
    dec ECX 
    mov BL,byte ptr [EAX+ECX] 
    jz @process_remaining 
    shl EBX,8 
    dec ECX 
    mov BL,byte ptr [EAX+ECX] 
    jz @process_remaining 
    shl EBX,8 
    mov BL,byte ptr [EAX] 
    @process_remaining: 
    imul EBX,EBX,c1 
    rol EBX,r1 
    imul EBX,EBX,c2 
    xor EDX,EBX 
    @finalization: 
    xor EDX,ESI 
    mov EAX,EDX 
    shr EDX,16 
    xor EDX,EAX 
    imul EDX,EDX,f1 
    mov EAX,EDX 
    shr EDX,13 
    xor EDX,EAX 
    imul EDX,EDX,f2 
    mov EAX,EDX 
    shr EDX,16 
    xor EAX,EDX 
    pop ESI 
    pop EDI 
    pop EBX 
end; 
{$ENDIF} 
{$IFDEF CPUx64} 
asm 
    push RBX 
    push RDI 
    push RSI 
    mov RAX,RCX 
    mov RCX,RDX 
    mov RDX,R8 
    //RAX = data 
    //RCX = count in bytes 
    //RDX = seed 
    mov ESI,ECX 
    shr ECX,2 
    jz @remaining_bytes 
@loop: 
    mov EDI, dword ptr [RAX] 
    imul EDI,EDI,c1 
    rol EDI,r1 
    imul EDI,EDI,c2 
    xor EDX,EDI 
    rol EDX,r2 
    lea EDX,dword ptr [EDX*4+EDX+n] // *5 + n 
    lea RAX,qword ptr [RAX+4] 
    dec ECX 
    jnz @loop 
@remaining_bytes: 
    mov ECX,ESI 
    and ECX,$3 
    jz @finalization 
    xor RBX,RBX 
    dec ECX 
    mov BL,byte ptr [RAX+RCX] 
    jz @process_remaining 
    shl EBX,8 
    dec ECX 
    mov BL,byte ptr [RAX+RCX] 
    jz @process_remaining 
    shl EBX,8 
    mov BL,byte ptr [RAX] 
@process_remaining: 
    imul EBX,EBX,c1 
    rol EBX,r1 
    imul EBX,EBX,c2 
    xor EDX,EBX 
@finalization: 
    xor EDX,ESI 
    mov EAX,EDX 
    shr EDX,16 
    xor EDX,EAX 
    imul EDX,EDX,f1 
    mov EAX,EDX 
    shr EDX,13 
    xor EDX,EAX 
    imul EDX,EDX,f2 
    mov EAX,EDX 
    shr EDX,16 
    xor EAX,EDX 
    pop RSI 
    pop RDI 
    pop RBX 
end; 
{$ENDIF} 
{$ENDREGION} 
{$ENDIF}

的后藤的到位，以模拟C'S fallthough开关/ case语句。
这种代码更容易在asm中编写，它比asm Delphi生成的寄存器使用更好。

为什么你的代码慢
他只是把我看到的一个问题是在这里：

Project42.dpr.54: while(len >= 4) do //a for loop is faster. 
00420E17 83FE04   cmp esi,$04 
00420E1A 7242    jb $00420e5e 

//This translates into inefficient code 
Project42.dpr.56: k := PLongWord(@S[data])^; 
00420E1C 8B0424   mov eax,[esp] 
00420E1F 8D4438FF   lea eax,[eax+edi-$01] 
00420E23 8B00    mov eax,[eax]

三个间接内存引用和read is misaligned！
查看更多信息：Purpose of memory alignment没关系接受的答案，请参阅第二个答案。
双字的读取应该总是发生在双字边界上。
@pointer^技巧使编译器增加了一个额外的间接级别（以及额外的往返内存（Oops））。
使用{$pointermath on}并将指针作为数组寻址。

整数！=指针
而且它的使用integer存储指针错误。
它将在X64中破解。 改为使用NativeUInt。

在我的版本的等效代码转换为：

Project42.dpr.128: Data:= @HashData; 
00420FCD 89442404   mov [esp+$04],eax 
Project42.dpr.129: for i:= 0 to (Len shr 2) - 1 do begin 
00420FD1 8B1C24   mov ebx,[esp] 
00420FD4 C1EB02   shr ebx,$02 
00420FD7 4B    dec ebx 
00420FD8 85DB    test ebx,ebx 
00420FDA 7C3E    jl $0042101a 
00420FDC 43    inc ebx 
00420FDD 33D2    xor edx,edx 
////------------------------------------------/// 
Project42.dpr.130: k:= Data[i]; 
00420FDF 8B442404   mov eax,[esp+$04] 
00420FE3 8B0490   mov eax,[eax+edx*4]

注意，有没有少不必要的间接内存引用和读取正确对齐。

当然，asm版本稍微好一些，但asm和pascal版本之间的差异应该小于两个Pascal版本之间的差异。

这是我认为你的表现被浪费的地方。
错位读取在X86上吃了很多（浪费）的周期。
在其他处理器上，速度变慢甚至更糟。

与代码
限制您的实现字符串的其他问题是不好的事情。
如果我想散列记录怎么办？
不要强制二进制数据字符串。
字符串不适合二进制数据。
只需使用一个无类型的参数和指针。

种子值
我不认为这是一个正确的种子价值。我了解它的种子就在那里，所以你可以把呼叫连接到Murmurhash。
您可以使用第一个散列的输出作为第二个运行的种子。
通过这种方式，您可以输入2个变量并获得相同的输出，就像您一次处理这两个变量一样。

让我知道他们如何执行您的代码。

来源

2015-06-19 20:25:44 Johan

感谢您的精彩解释！您的ASM代码非常快！感谢分享！ PS：我注意到pascal代码中有一个错误...您错过了初始化“剩余”变量。非常感谢你！ – Alex

FWIW整数保持32位指针正常。除非启用范围/溢出检查，否则签名或未签名并不重要。 –

@亚历山大，是的，不客气。剩下的好地方 – Johan

MurMurHash3有没有德尔福的实现？

回答

相关问题