要在一个特定的ANSI编码编码的文件转换为Unicode string
(UTF-16 LE),使用MultiByteToWideChar
function:
function MultiByteToWideChar(
CodePage: UINT; dwFlags: DWORD; const lpMultiByteStr: AnsiString;
cchMultiByte: Integer; lpWideCharStr: string; cchWideChar: Integer): Integer;
external '[email protected] stdcall';
function LoadStringFromFileInCP(FileName: string; var S: string; CP: Integer): Boolean;
var
Ansi: AnsiString;
Len: Integer;
begin
Result := LoadStringFromFile(FileName, Ansi);
if Result then
begin
Len := MultiByteToWideChar(CP, 0, Ansi, Length(Ansi), S, 0);
SetLength(S, Len);
MultiByteToWideChar(CP, 0, Ansi, Length(Ansi), S, Len);
end;
end;
function LoadStringsFromFileInCP(
FileName: string; Strings: TStrings; CP: Integer): Boolean;
var
S: string;
begin
Result := LoadStringFromFileInCP(FileName, S, CP);
if Result then Strings.Text := S;
end;
(请注意,我用TStrings
存储串/线收集的,而不是TArrayOfString
,因为TStrings
是比较容易的工作)
要转换的Unicode string
回到ANSI,使用WideCharToMultiByte
function:
function WideCharToMultiByte(CodePage: UINT; dwFlags: DWORD;
lpWideCharStr: string; cchWideChar: Integer; lpMultiByteStr: AnsiString;
cchMultiByte: Integer; lpDefaultCharFake: Integer;
lpUsedDefaultCharFake: Integer): Integer;
external '[email protected] stdcall';
function SaveStringToFileInCP(FileName: string; S: string; CP: Integer): Boolean;
var
Ansi: AnsiString;
Len: Integer;
begin
Len := WideCharToMultiByte(CP, 0, S, Length(S), Ansi, 0, 0, 0);
SetLength(Ansi, Len);
WideCharToMultiByte(CP, 0, S, Length(S), Ansi, Len, 0, 0);
Result := SaveStringToFile(FileName, Ansi, False);
end;
function SaveStringsToFileInCP(
FileName: string; Strings: TStrings; CP: Integer): Boolean;
begin
Result := SaveStringToFileInCP(FileName, Strings.Text, CP);
end;
使用功能,如:正确
const
CP_EUC_KOREAN = 51949;
var
I: Integer;
Strings: TStrings;
begin
Strings := TStringList.Create;
if LoadStringsFromFileInCP('korean.txt', Strings, CP_EUC_KOREAN) then
begin
for I := 0 to Strings.Count - 1 do
begin
MsgBox(Strings[I], mbInformation, MB_OK);
end;
end;
SaveStringsToFileInCP('korean_out.txt', Strings, CP_EUC_KOREAN);
end;
作品我英语只有系统:

@MartinPrikryl绝对。如果我不说清楚,该文件是ANSI,我需要加载的内容为Unicode都可以看到正确的字符和对待一切,字符串(摆脱AnsiString类型<->字符串头痛),然后整个数组转换返回以保存回文件。 –
@MartinPrikryl,好吧,不...它实际上是CP51949(EUC-KR)。它可能与CP949一起使用,但我没有尝试过。原始未经编辑的文件是CP51949 –