2016-05-28 39 views
-1

我最近用golang重新实现了我的项目。该项目是用C++实现的。当我完成代码并进行性能测试时。我对结果感到震惊。当我用C++查询数据库时,可以在5分钟内得到1.3亿行数据。但是用golang,差不多45分钟。但是,当我从项目中分离代码并构建代码片段时,它在2分钟内完成。为什么他们有这么大的差异表现结果?巨大的性能差异查询MySQL数据库与相同的golang片段

我的代码片段: https://gist.github.com/pyanfield/2651d23311901b33c5723b7de2364148

package main 

import (
    "database/sql" 
    "fmt" 
    "runtime" 
    "strconv" 
    "time" 

    _ "github.com/go-sql-driver/mysql" 
) 

func main() { 
    runtime.GOMAXPROCS(runtime.NumCPU()) 
    // defer profile.Start(profile.CPUProfile, profile.ProfilePath(".")).Stop() 
    dbRead, err := connectDB("test:[email protected](127.0.0.1:3306)/test_oltp?charset=utf8&readTimeout=600s&writeTimeout=600s") 
    if err != nil { 
     fmt.Printf("Error happend when connecting to DB. %s\n", err.Error()) 
     return 
    } 
    defer dbRead.Close() 
    dbRead.SetMaxIdleConns(0) 
    dbRead.SetMaxOpenConns(100) 

    query := fmt.Sprintf("WHERE company_id in (11,22,33,44,55,66,77,88,99,00,111,222,333,4444,555,666,777,888,999)") 

    relations := getRelations(dbRead, query) 
} 
func connectDB(addr string) (*sql.DB, error) { 
    db, err := sql.Open("mysql", addr) 
    if err != nil { 
     return nil, err 
    } 
    if err = db.Ping(); err != nil { 
     return nil, err 
    } 
    return db, nil 
} 

type Relation struct { 
    childId int64 
    parentId int64 
} 

func getRelations(db *sql.DB, where string)[]Relation { 
    begin := time.Now() 

    var err error 
    var rows *sql.Rows 
    query := fmt.Sprintf("SELECT `child_id`, `parent_id` FROM `test_relations` %s", where) 
    rows, err = db.Query(query) 
    if err != nil { 
     fmt.Println("query error:", err.Error()) 
     return nil 
    } 
    defer rows.Close() 

    columns, err := rows.Columns() 
    buffer := make([]sql.RawBytes, len(columns)) 
    scanArgs := make([]interface{}, len(buffer)) 
    for i := range scanArgs { 
     scanArgs[i] = &buffer[i] 
    } 

    relations := []Relation{} 
    relation := Relation{} 
    for rows.Next() { 
     if err = rows.Scan(scanArgs...); err != nil { 
      fmt.Println("scan:", err.Error()) 
      return nil 
     } 
     relation.parentId, _ = strconv.ParseInt(string(buffer[1]), 10, 64) 
     relation.childId, _ = strconv.ParseInt(string(buffer[0]), 10, 64) 

     relations = append(relations, relation) 
    } 

    if err = rows.Err(); err != nil { 
     fmt.Println("next error:", err.Error()) 
     return nil 
    } 
    fmt.Printf(">>> getRelations cost: %s\n", time.Since(begin).String()) 
    // output :>>> getRelations cost:1m45.791047s 
    return relations 
    // len(relations): 131123541 
} 

更新: 我走的版本是1.6。该CPU的profile我是如下: 代码片段轮廓TOP20:

75.67s of 96.82s total (78.16%) 
Dropped 109 nodes (cum <= 0.48s) 
Showing top 20 nodes out of 82 (cum >= 12.04s) 
     flat flat% sum%  cum cum% 
    11.85s 12.24% 12.24%  11.85s 12.24% runtime.memmove 
    10.28s 10.62% 22.86%  20.01s 20.67% runtime.mallocgc 
    5.82s 6.01% 28.87%  5.82s 6.01% strconv.ParseUint 
    5.79s 5.98% 34.85%  5.79s 5.98% runtime.futex 
    3.42s 3.53% 38.38%  10.28s 10.62% github.com/go-sql-driver/mysql.(*buffer).readNext 
    3.42s 3.53% 41.91%  6.38s 6.59% runtime.scang 
    3.37s 3.48% 45.39%  36.97s 38.18% github.com/go-sql-driver/mysql.(*textRows).readRow 
    3.37s 3.48% 48.87%  3.37s 3.48% runtime.memclr 
    3.20s 3.31% 52.18%  3.20s 3.31% runtime.heapBitsSetType 
    3.02s 3.12% 55.30%  7.36s 7.60% database/sql.convertAssign 
    2.96s 3.06% 58.36%  3.02s 3.12% runtime.(*mspan).sweep.func1 
    2.53s 2.61% 60.97%  2.53s 2.61% runtime._ExternalCode 
    2.39s 2.47% 63.44%  2.96s 3.06% runtime.readgstatus 
    2.24s 2.31% 65.75%  8.06s 8.32% strconv.ParseInt 
    2.21s 2.28% 68.03%  5.24s 5.41% runtime.heapBitsSweepSpan 
    2.15s 2.22% 70.25%  7.68s 7.93% runtime.rawstring 
    2.06s 2.13% 72.38%  3.18s 3.28% github.com/go-sql-driver/mysql.readLengthEncodedString 
    1.95s 2.01% 74.40%  12.23s 12.63% github.com/go-sql-driver/mysql.(*mysqlConn).readPacket 
    1.83s 1.89% 76.29%  79.42s 82.03% main.Relations 
    1.81s 1.87% 78.16%  12.04s 12.44% runtime.slicebytetostring 

项目CPU轮廓TOP20:

 (pprof) top20 
38.71mins of 42.82mins total (90.40%) 
Dropped 334 nodes (cum <= 0.21mins) 
Showing top 20 nodes out of 76 (cum >= 1.35mins) 
     flat flat% sum%  cum cum% 
12.02mins 28.07% 28.07% 12.48mins 29.15% runtime.addspecial 
    5.95mins 13.89% 41.96% 15.08mins 35.21% runtime.pcvalue 
    5.26mins 12.29% 54.25% 5.26mins 12.29% runtime.readvarint 
    2.60mins 6.08% 60.32% 7.87mins 18.37% runtime.step 
    1.98mins 4.62% 64.94% 19.45mins 45.43% runtime.gentraceback 
    1.65mins 3.86% 68.80% 1.65mins 3.86% runtime/internal/atomic.Xchg 
    1.57mins 3.66% 72.46% 2.93mins 6.84% runtime.(*mspan).sweep 
    1.52mins 3.54% 76.01% 1.78mins 4.15% runtime.findfunc 
    1.41mins 3.30% 79.31% 1.42mins 3.31% runtime.markrootSpans 
    1.13mins 2.64% 81.95% 1.13mins 2.64% runtime.(*fixalloc).alloc 
    0.64mins 1.50% 83.45% 0.64mins 1.50% runtime.duffcopy 
    0.46mins 1.08% 84.53% 0.46mins 1.08% runtime.findmoduledatap 
    0.44mins 1.02% 85.55% 0.44mins 1.02% runtime.fastrand1 
    0.42mins 0.97% 86.52% 15.49mins 36.18% runtime.funcspdelta 
    0.38mins 0.89% 87.41% 36.02mins 84.13% runtime.mallocgc 
    0.30mins 0.7% 88.12% 0.78mins 1.83% runtime.scanobject 
    0.26mins 0.6% 88.72% 0.32mins 0.74% runtime.stkbucket 
    0.26mins 0.6% 89.32% 0.26mins 0.6% runtime.memmove 
    0.23mins 0.55% 89.86% 0.23mins 0.55% runtime.heapBitsForObject 
    0.23mins 0.53% 90.40% 1.35mins 3.15% runtime.lock 
+0

如果您将'relations:= [] Relation {}'更改为'relations:= make([] Relation,0,131123541)'(仅测试一个理论),您使用的是哪种版本的Go? – OneOfOne

+0

Go版本是1.6。就像你提到的那样,我尝试过,但结果并没有明显改善。 – pyanfield

+0

一定要计时两次 - 第一次由于I/O而变慢;由于缓存第二次可能会更快。 –

回答

1

我得到了我的答案,并希望分享它。这是我的错误造成的。有时候,我试图添加内存配置文件并在我的init方法中设置runtime. MemProfileRate=1。但我忘了将它重置为合理的价值。每当我检查我的代码时,我都忽略了这种方法。从我的项目中删除这个设置后,它恢复正常,花费近5到6分钟来查询这130M数据。速度非常接近C++版本。我的建议是,当你设置runtime.MemProfileRate=1时请小心谨慎,除非你确定要这样做,并且记得将其重置。

0

Golang很可能运行DB查询处理多个并行的代码段单独。您的完整应用程序几乎可以肯定地将其中一些内核用于其他事情。

处理所有130M行的循环似乎是可能的罪魁祸首。

如果你想测试这个理论,试着在代码片段中设置max procs为1。

+0

感谢您的意见。我用'runtime.GOMAXPROCS(1)'再次测试了它。它花费近2分25秒。 – pyanfield