2014-10-10 307 views
0

我试图将我的程序的上传(和下载)性能推到极限。 当使用aws的命令行界面上传256MB文件时,我正在获得大约1000Mbps的速度。 但我得到停留在大约600Mbps的上传与以下程序使用Node.js SDK的AWS S3性能

if (process.argv.length < 7) { 
    console.log ("usage: " + process.argv [0] + " " + process.argv[1] + " <config> <region> <bucket> <key> <file>") 
    return -1 
} 

var config = process.argv[2] 
var region = process.argv[3] 
var bucketName = process.argv[4] 
var key = process.argv[5] 
var file = process.argv[6] 

var multipartMap = { Parts: [] } 
var uploadStartTime // = new Date() 
var partSize = 1024 * 1024 * 8   // at least 5MB, specified by amazon 
var partNum 
var multipartParams = { 
    Bucket: bucketName, 
    Key: key, 
    ContentType: "binary", 
    StorageClass: "REDUCED_REDUNDANCY", 
} 
var part = 0 
var maxRetry = 3 

var fs = require ('fs') 
var aws = require ('aws-sdk') 

function upload (bucket, multipart, partParams, trial) { 
    var trial = trial || 1; 
    bucket.uploadPart (partParams, function (err, data) { 
     if (err) { 
      console.log ("failed: ", err) 
      if (trial < maxRetry) { 
       console.log ("retrying part: ", partParams.PartNumber) 
       upload (bucket, multipart, partParams, trial + 1) 
      } else { 
       console.log ("failed: ", err, " unable to upload part: ", partParams.PartNumber) 
      } 
      return; 
     } 
     multipartMap.Parts[this.request.params.PartNumber - 1] = { 
      ETag: data.ETag, 
      PartNumber: Number (this.request.params.PartNumber) 
     } 

     if (--partNum > 0) return; 

     var doneParams = { 
      Bucket: bucketName, 
      Key: key, 
      MultipartUpload: multipartMap, 
      UploadId: multipart.UploadId 
     } 

     console.log ("success") 
     bucket.completeMultipartUpload (doneParams, function (err, data){ 
      if (err) { 
       console.log("An error occurred while completing the multipart upload"); 
       console.log(err); 
      } else { 
       var delta = (new Date() - uploadStartTime)/1000; 
       console.log('Completed upload in', delta, 'seconds'); 
       console.log('Final upload data:', data); 
      } 
     }) 
    }) 
} 

var kickoffTime = new Date() 
aws.config.loadFromPath (config) 
aws.config.region = region 

var bucket = new aws.S3 ({params: {Bucket: bucketName}}) 

console.log ("filename: ", file) 
buffer = fs.readFileSync (file) 
partNum = Math.ceil (buffer.length/partSize) // number of parts 
var totalPart = partNum 

uploadStartTime = new Date() 
bucket.createMultipartUpload (multipartParams, function (err, multipart) { 
    if (err) { 
     console.log ("cannot create multipart upload: ", err) 
     return -1 
    } 

    for (var i = 0; i < buffer.length; i += partSize) { 
     ++part 
     var end = Math.min (i + partSize, buffer.length) 
     var body = buffer.slice (i, end) 
     var partParams = { 
      Body: body, 
      Bucket: bucketName, 
      Key: key, 
      PartNumber: String (part), 
      UploadId: multipart.UploadId, 
      ContentLength: end - i 
     } 

     upload (bucket, multipart, partParams); 
    } 
}) 
var kickoffTimeDelta = (new Date() - kickoffTime)/1000 
console.log ("Kickoff time: ", kickoffTimeDelta) 

这项计划将不会空文件的工作,但请忽略此情况。上述程序参照this进行编码。如果(process.argv.length < 7)console.log(“usage:”+ process.argv [0] +“”+ process) .argv 1 +““) 返回-1 }

var config = process.argv[2] 
var region = process.argv[3] 
var bucketName = process.argv[4] 
var key = process.argv[5] 
var file = process.argv[6] 

var fs = require ('fs') 
var aws = require ('aws-sdk') 
fs.readFile (config, "utf8", function (err, configFile) { 
    if (err) { 
     console.log ("Config file cannot be read: ", err) 
     return -1 
    } 
    aws.config = JSON.parse (configFile) 
    aws.config.region = region 

    var bucket = new aws.S3 ({params: {Bucket: bucketName}}) 

    bucket.createBucket (function() { 
     var data = {Key: key} 
     bucket.getObject (data, function (err, fileData) { 
      if (err) { 
       console.log ("Error downloading data: ", err) 
      } else { 
       fs.writeFile (file, fileData.Body, function (err) { 
        if (err) { 
         console.log ("Error writing data: ", err) 
        } else { 
         console.log ("Successfully downloaded!") 
        } 
       }) 
      } 
     }) 
    }) 
}) 

我新的Node.js和AWS的SDK,有什么遗漏,以实现更好的吞吐率? 谢谢

回答

0

嗯......有一个澄清的问题,但没有这样的声誉。

你看到两端有多少个请求?如果您每秒以超过100次请求的速度触碰S3,您可以通过随机化密钥名称的开头来获得更好的性能。

请参见本文的解释,并提出了一些建议: http://docs.aws.amazon.com/AmazonS3/latest/dev/request-rate-perf-considerations.html

基本上,如果你有一个键(子目录)一堆文件具有相同字符开头,你可以压倒索引分区...所以对于大容量的读/写操作,随机密钥名称可以提高性能。

+0

谢谢,这将是一个有用的资源,但在此刻,我非常关心如何将少量文件一次放到S3上以获得最佳性能。由于系统将扩展并容纳大量请求,因此最终可能会有点意义不大 – 2014-10-12 16:16:31