2010-03-16 27 views
1

我有下面的代码,需要去掉所有非字母数字字符。它不是在德尔福2009年工作德尔福2009年 - 去掉字符串中的非字母数字

unit Unit2; 
//Used information from 
// http://stackoverflow.com/questions/574603/what-is-the-fastest-way-of-stripping-non-alphanumeric-characters-from-a-string-in 

interface 

uses 
    Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms, 
    Dialogs, StdCtrls; 
Type 
    TExplodeArray = Array Of String; 

    TForm2 = class(TForm) 
    Memo1: TMemo; 
    ListBox1: TListBox; 
    Button1: TButton; 
    procedure Button1Click(Sender: TObject); 
    private 
    { Private declarations } 
    public 
    { Public declarations } 
    Function Explode (Const cSeparator, vString : String) : TExplodeArray; 
    Function Implode (Const cSeparator : String; Const cArray : TExplodeArray) : String; 
    Function StripHTML (S : String) : String; 
    function allwords(data:string):integer; 
    end; 

var 
    Form2: TForm2; 
    allword, allphrase: TExplodeArray; 

implementation 
{$R *.dfm} 
Function TForm2.StripHTML (S : String) : String; 
Var 
    TagBegin, TagEnd, TagLength : Integer; 
Begin 
    TagBegin := Pos ('<', S);  // search position of first < 

    While (TagBegin > 0) Do 
      Begin // while there is a < in S 
      TagEnd := Pos ('>', S);    // find the matching > 
      TagLength := TagEnd - TagBegin + 1; 
      Delete (S, TagBegin, TagLength);  // delete the tag 
      TagBegin := Pos ('<', S);   // search for next < 
      End; 

    Result := S;     // give the result 
End; 
Function TForm2.Implode (Const cSeparator : String; Const cArray : TExplodeArray) : String; 
Var 
    i : Integer; 
Begin 
    Result := ''; 
    For i := 0 To Length (cArray) - 1 Do 
      Begin 
      Result := Result + cSeparator + cArray [i]; 
      End; 
    System.Delete (Result, 1, Length (cSeparator)); 
End; 

Function TForm2.Explode (Const cSeparator, vString : String) : TExplodeArray; 
Var 
    i : Integer; 
    S : String; 
Begin 
    S := vString; 
    SetLength (Result, 0); 
    i := 0; 
    While Pos (cSeparator, S) > 0 Do 
      Begin 
      SetLength (Result, Length (Result) + 1); 
      Result[i] := Copy (S, 1, Pos (cSeparator, S) - 1); 
      Inc (i); 
      S := Copy (S, Pos (cSeparator, S) + Length (cSeparator), Length (S)); 
      End; 
    SetLength (Result, Length (Result) + 1); 
    Result[i] := Copy (S, 1, Length (S)); 
End; 
//Copied from JclStrings 
function StrKeepChars(const S: AnsiString; const Chars: TSysCharSet): AnsiString; 
var 
    Source, Dest: PChar; 
begin 
    SetLength(Result, Length(S)); 
    UniqueString(Result); 
    Source := PChar(S); 
    Dest := PChar(Result); 
    while (Source <> nil) and (Source^ <> #0) do 
    begin 
    if Source^ in Chars then 
    begin 
     Dest^ := Source^; 
     Inc(Dest); 
    end; 
    Inc(Source); 
    end; 
    SetLength(Result, (Longint(Dest) - Longint(PChar(Result))) div SizeOf(AnsiChar)); 
end; 
function ReplaceNewlines(const AValue: string): string; 
var 
    SrcPtr, DestPtr: PChar; 
begin 
    SrcPtr := PChar(AValue); 
    SetLength(Result, Length(AValue)); 
    DestPtr := PChar(Result); 
    while SrcPtr <> {greater than less than} #0 do begin 
    if (SrcPtr[0] = #13) and (SrcPtr[1] = #10) then begin 
     DestPtr[0] := '\'; 
     DestPtr[1] := 't'; 
     Inc(SrcPtr); 
     Inc(DestPtr); 
    end else 
     DestPtr[0] := SrcPtr[0]; 
    Inc(SrcPtr); 
    Inc(DestPtr); 
    end; 
    SetLength(Result, DestPtr - PChar(Result)); 
end; 
function StripNonAlphaNumeric(const AValue: string): string; 
var 
    SrcPtr, DestPtr: PChar; 
begin 
    SrcPtr := PChar(AValue); 
    SetLength(Result, Length(AValue)); 
    DestPtr := PChar(Result); 
    while SrcPtr <> #0 do begin 
    if SrcPtr[0] in ['a'..'z', 'A'..'Z', '0'..'9'] then begin 
     DestPtr[0] := SrcPtr[0]; 
     Inc(DestPtr); 
    end; 
    Inc(SrcPtr); 
    end; 
    SetLength(Result, DestPtr - PChar(Result)); 
end; 
function TForm2.allwords(data:string):integer; 
var i:integer; 
begin 
    listbox1.Items.add(data); 
    data:= StripHTML (data); 
    listbox1.Items.add(data); 
    ////////////////////////////////////////////////////////////// 
    data := StrKeepChars(data, ['A'..'Z', 'a'..'z', '0'..'9']); 
    // Strips out everything data comes back blank in Delphi 2009 
    ////////////////////////////////////////////////////////////// 
    listbox1.Items.add(data); 
    data := stringreplace(data,' ',' ', [rfReplaceAll, rfIgnoreCase]); 
    //Replace two spaces with one. 
    listbox1.Items.add(data); 
    allword:= explode(' ',data); 
{ // Converting the following PHP code to Delphi 
    $text = ereg_replace("[^[:alnum:]]", " ", $text); 
    while(strpos($text,' ')!==false) $text = ereg_replace(" ", " ", $text); 
    $text=$string=strtolower($text); 
    $text=explode(" ",$text); 
    return count($text); 
} 
for I := 0 to Length(allword) - 1 do 
listbox1.Items.Add(allword[i]); 
end; 
procedure TForm2.Button1Click(Sender: TObject); 
begin 
//[^[:alnum:]] 

allwords(memo1.Text); 
end; 

end. 

我还会怎么做呢?

回答

1

Uses StrUtils; //StuffString 

var 
    Regex: TPerlRegEx; 
    I:Integer; 
begin 
Regex := TPerlRegEx.Create(nil); 
Regex.RegEx := '[^[:alnum:]]'; 
Regex.Options := [preMultiLine]; 
Regex.Subject := data; 
if Regex.Match then begin 
    repeat 
    data := StuffString(data,Regex.MatchedExpressionOffset,Regex.MatchedExpressionLength,' '); 
    until not Regex.MatchAgain; 
end; 
1

想到最简单的解决方案是定义一个正则表达式,该正则表达式返回输入字符串减去其中的任何非字母字符。

+0

是的,我打算使用RegEx [^ [:alnum:]]和TPerlRegEx,但并不真正知道如何正确使用它。 [^ [:alnum:]]可以很好地工作。 – Brad

+0

好吧,我用RegEx的建议,并在数小时后算出来! – Brad

1

这是一段时间,因为我用Delphi做了很多 - 第5版是我的操场。

默认情况下,它不是Delphi 2009的主要功能之一,现在它始终是Unicode。

这对试图逐字处理字符的任何内容都有影响。它可能是你问题的根源吗?