2017-09-19 95 views
1

我正在用golang解析一个来自google api的csv文件,该文件使用utf-16编码,下面的代码尝试读取一条记录(跳过标题)并打印该记录,但它提供我这个样子,其输出是怪异:解析csv文件时出现奇怪的输出

, v=/09/20 00:35:42 k=Smartfren Andromax AD681H 

我想这可能与UTF-16编码有关,但不知道细节,这里是代码: 包主要

import (
    "encoding/csv" 
    "io" 
    "log" 
    "net/http" 
    "strings" 
) 

var url = "http://storage.googleapis.com/play_public/supported_devices.csv" 

func main() { 

    resp, err := http.Get(url) 
    if err != nil { 
     return 
    } 
    defer resp.Body.Close() 

    r := csv.NewReader(resp.Body) 
    r.LazyQuotes = true 
    r.FieldsPerRecord = -1 
    // skip header 
    r.Read() 

    m := make(map[string]string) 
    for { 
     record, err := r.Read() 
     if err == io.EOF { 
      break 
     } 
     if err != nil { 
      log.Println(err) 
      continue 
     } 
     if len(record) >= 4 { 
      m[strings.TrimSpace(record[3])] = strings.TrimSpace(record[1]) 
      for k, v := range m { 
       log.Printf("k=%s, v=%s\n", k, v) 
      } 
      break 
     } 
    } 
} 

回答

3

如您所怀疑的,输入数据必须从UTF-16编码字符流转换为UTF-8编码那些。您可以通过使用Go子存储库包golang.org/x/text/encoding/unicode

package main 

import (
    "encoding/csv" 
    "io" 
    "log" 
    "net/http" 
    "strings" 

    "golang.org/x/text/encoding/unicode" 
) 

var url = "http://storage.googleapis.com/play_public/supported_devices.csv" 

func main() { 

    resp, err := http.Get(url) 
    if err != nil { 
     return 
    } 
    defer resp.Body.Close() 

    dec := unicode.UTF16(unicode.LittleEndian, unicode.UseBOM).NewDecoder() 
    reader := dec.Reader(resp.Body) 

    r := csv.NewReader(reader) 
    r.LazyQuotes = true 
    r.FieldsPerRecord = -1 
    // skip header 
    r.Read() 

    m := make(map[string]string) 
    for { 
     record, err := r.Read() 
     if err == io.EOF { 
      break 
     } 
     if err != nil { 
      log.Println(err) 
      continue 
     } 
     if len(record) >= 4 { 
      m[strings.TrimSpace(record[3])] = strings.TrimSpace(record[1]) 
      for k, v := range m { 
       log.Printf("k=%s, v=%s\n", k, v) 
      } 
      break 
     } 
    } 
}