自定义MultipartFormDataStreamProvider上传后通过WebApi从SQL下载大文件

这是我之前询问过的关于过于宽泛的问题的后续问题。 Previous Question 自定义MultipartFormDataStreamProvider上传后通过WebApi从SQL下载大文件

在那个问题中，我解释说我需要通过将块存储为单独的行来将大文件（1-3GB）上载到数据库。我通过重写MultipartFormDataStreamProvider.GetStream方法来做到这一点。该方法返回了将缓冲区块写入数据库的自定义流。

问题是重写GetStream方法正在将整个请求写入数据库（包括头文件）。它是在保持内存级别平坦的情况下成功写入该数据的，但是当我下载文件时，除了文件内容之外，它将返回下载文件内容中的所有标题信息，因此文件无法打开。

有没有办法在覆盖GetStream方法，只写入文件的内容到数据库而不写头文件？

API

[HttpPost] 
    [Route("file")] 
    [ValidateMimeMultipartContentFilter] 
    public Task<HttpResponseMessage> PostFormData() 
    { 
     var provider = new CustomMultipartFormDataStreamProvider(); 

     // Read the form data and return an async task. 
     var task = Request.Content.ReadAsMultipartAsync(provider).ContinueWith<HttpResponseMessage>(t => 
     { 
      if (t.IsFaulted || t.IsCanceled) 
      { 
       Request.CreateErrorResponse(HttpStatusCode.InternalServerError, t.Exception); 
      } 

      return Request.CreateResponse(HttpStatusCode.OK); 
     }); 

     return task; 
    } 

    [HttpGet] 
    [Route("file/{id}")] 
    public async Task<HttpResponseMessage> GetFile(string id) 
    { 
         var result = new HttpResponseMessage() 
      { 
       Content = new PushStreamContent(async (outputStream, httpContent, transportContext) => 
       { 
        await WriteDataChunksFromDBToStream(outputStream, httpContent, transportContext, id); 
       }), 
       StatusCode = HttpStatusCode.OK 
      }; 


      result.Content.Headers.ContentType = new MediaTypeHeaderValue("application/zipx"); 
      result.Content.Headers.ContentDisposition = new ContentDispositionHeaderValue("attachment") { FileName = "test response.zipx" }; 

      return result; 
     } 

     return new HttpResponseMessage(HttpStatusCode.BadRequest); 
    } 

    private async Task WriteDataChunksFromDBToStream(Stream responseStream, HttpContent httpContent, TransportContext transportContext, string fileIdentifier) 
    { 
     // PushStreamContent requires the responseStream to be closed 
     // for signaling it that you have finished writing the response. 
     using (responseStream) 
     { 
      using (var myConn = new SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["TestDB"].ConnectionString)) 
      { 
       await myConn.OpenAsync(); 

       using (var myCmd = new SqlCommand("ReadAttachmentChunks", myConn)) 
       { 
        myCmd.CommandType = System.Data.CommandType.StoredProcedure; 

        var fileName = new SqlParameter("@Identifier", fileIdentifier); 

        myCmd.Parameters.Add(fileName); 


        // Read data back from db in async call to avoid OutOfMemoryException when sending file back to user 
        using (var reader = await myCmd.ExecuteReaderAsync(CommandBehavior.SequentialAccess)) 
        { 
         while (await reader.ReadAsync()) 
         { 
          if (!(await reader.IsDBNullAsync(3))) 
          { 
           using (var data = reader.GetStream(3)) 
           { 
            // Asynchronously copy the stream from the server to the response stream 
            await data.CopyToAsync(responseStream); 
           } 
          } 
         } 
        } 
       } 
      } 
     }// close response stream 
    }

定制MultipartFormDataStreamProvider GetStream方法实现

public override Stream GetStream(HttpContent parent, HttpContentHeaders headers) 
    { 
     // For form data, Content-Disposition header is a requirement 
     ContentDispositionHeaderValue contentDisposition = headers.ContentDisposition; 
     if (contentDisposition != null) 
     { 
      // If we have a file name then write contents out to AWS stream. Otherwise just write to MemoryStream 
      if (!String.IsNullOrEmpty(contentDisposition.FileName)) 
      { 
       var identifier = Guid.NewGuid().ToString(); 
       var fileName = contentDisposition.FileName;// GetLocalFileName(headers); 

       if (fileName.Contains("\\")) 
       { 
        fileName = fileName.Substring(fileName.LastIndexOf("\\") + 1).Replace("\"", ""); 
       } 

       // We won't post process files as form data 
       _isFormData.Add(false); 

       var stream = new CustomSqlStream(); 
       stream.Filename = fileName; 
       stream.Identifier = identifier; 
       stream.ContentType = headers.ContentType.MediaType; 
       stream.Description = (_formData.AllKeys.Count() > 0 && _formData["description"] != null) ? _formData["description"] : ""; 

       return stream; 
       //return new CustomSqlStream(contentDisposition.Name); 
      } 

      // We will post process this as form data 
      _isFormData.Add(true); 

      // If no filename parameter was found in the Content-Disposition header then return a memory stream. 
      return new MemoryStream(); 
     } 

     throw new InvalidOperationException("Did not find required 'Content-Disposition' header field in MIME multipart body part.."); 
     #endregion 
    }

通过CustomSqlStream称为物流的实现Write方法

public override void Write(byte[] buffer, int offset, int count) 
    { 
        //write buffer to database 
     using (var myConn = new SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["TestDB"].ConnectionString)) { 
      using (var myCmd = new SqlCommand("WriteAttachmentChunk", myConn)) { 
       myCmd.CommandType = System.Data.CommandType.StoredProcedure; 

            var pContent = new SqlParameter("@Content", buffer); 

       myCmd.Parameters.Add(pContent); 

       myConn.Open(); 
       myCmd.ExecuteNonQuery(); 

       if (myConn.State == System.Data.ConnectionState.Open) 
       { 
        myConn.Close(); 
       } 
      } 
     } 
      ((ManualResetEvent)_dataAddedEvent).Set(); 
    }

“ReadAttachmentChunks”存储过程获取与插入数据库时排序的db文件相对应的行。因此，代码的工作方式是将这些块拉回，然后异步将其写回到PushStreamContent以返回给用户。

所以我的问题是：

是否有写入文件的只有内容，除了内容相对于头部被上传的方法吗？

任何帮助将不胜感激。谢谢。

来源

2016-11-15 JakeHova

我终于明白了。我过度复杂化了大部分挣扎的写作过程。这里是我的初始问题的解决方案：

为了让.net避免在内存中缓冲文件（以便您可以处理大文件上传），首先需要重写WebHostBufferPolicySelector，以便它不缓冲输入流你的控制器，然后替换BufferPolicy Selector。

public class NoBufferPolicySelector : WebHostBufferPolicySelector 
{ 
    public override bool UseBufferedInputStream(object hostContext) 
    { 
     var context = hostContext as HttpContextBase; 

     if (context != null) 
     { 
      if (context.Request.RequestContext.RouteData.Values["controller"] != null) 
      { 
       if (string.Equals(context.Request.RequestContext.RouteData.Values["controller"].ToString(), "upload", StringComparison.InvariantCultureIgnoreCase)) 
        return false; 
      } 
     } 

     return true; 
    } 

    public override bool UseBufferedOutputStream(HttpResponseMessage response) 
    { 
     return base.UseBufferedOutputStream(response); 
    } 
}

然后更换BufferPolicy选择

GlobalConfiguration.Configuration.Services.Replace(typeof(IHostBufferPolicySelector), new NoBufferPolicySelector());

然后以避免文件流写入到磁盘的默认行为，您需要提供物流供应商，将写入数据库，而不是。为此，您继承MultipartStreamProvider并重写GetStream方法以返回将写入数据库的流。

public override Stream GetStream(HttpContent parent, HttpContentHeaders headers) 
    { 
     // For form data, Content-Disposition header is a requirement 
     ContentDispositionHeaderValue contentDisposition = headers.ContentDisposition; 
     if (contentDisposition != null && !String.IsNullOrEmpty(contentDisposition.FileName)) 
     { 
      // We won't post process files as form data 
      _isFormData.Add(false); 

      //create unique identifier for this file upload 
      var identifier = Guid.NewGuid(); 
      var fileName = contentDisposition.FileName; 

      var boundaryObj = parent.Headers.ContentType.Parameters.SingleOrDefault(a => a.Name == "boundary"); 

      var boundary = (boundaryObj != null) ? boundaryObj.Value : ""; 

      if (fileName.Contains("\\")) 
      { 
       fileName = fileName.Substring(fileName.LastIndexOf("\\") + 1).Replace("\"", ""); 
      } 

      //write parent container for the file chunks that are being stored 
      WriteLargeFileContainer(fileName, identifier, headers.ContentType.MediaType, boundary); 

      //create an instance of the custom stream that will write the chunks to the database 
      var stream = new CustomSqlStream(); 
      stream.Filename = fileName; 
      stream.FullFilename = contentDisposition.FileName.Replace("\"", ""); 
      stream.Identifier = identifier.ToString(); 
      stream.ContentType = headers.ContentType.MediaType; 
      stream.Boundary = (!string.IsNullOrEmpty(boundary)) ? boundary : ""; 

      return stream; 
     } 
     else 
     { 
      // We will post process this as form data 
      _isFormData.Add(true); 

      // If no filename parameter was found in the Content-Disposition header then return a memory stream. 
      return new MemoryStream(); 
     } 
    }

您创建的自定义流需要继承Stream并重写Write方法。这是我推翻问题的地方，我认为我需要解析通过缓冲区参数传递的边界标题。但这实际上是通过利用偏移和计数参数为您完成的。

public override void Write(byte[] buffer, int offset, int count) 
    { 
     //no boundary is inluded in buffer 
     byte[] fileData = new byte[count]; 
     Buffer.BlockCopy(buffer, offset, fileData, 0, count); 
     WriteData(fileData); 
    }

从那里，它只是插入上传和下载的API方法。上传：

public Task<HttpResponseMessage> PostFormData() 
    { 
     var provider = new CustomMultipartLargeFileStreamProvider(); 

     // Read the form data and return an async task. 
     var task = Request.Content.ReadAsMultipartAsync(provider).ContinueWith<HttpResponseMessage>(t => 
     { 
      if (t.IsFaulted || t.IsCanceled) 
      { 
       Request.CreateErrorResponse(HttpStatusCode.InternalServerError, t.Exception); 
      } 

      return Request.CreateResponse(HttpStatusCode.OK); 
     }); 

     return task; 
    }

对于下载，并且为了保持内存占用低，我利用了PushStreamContent推块返回给用户：

[HttpGet] 
    [Route("file/{id}")] 
    public async Task<HttpResponseMessage> GetFile(string id) 
    { 
     string mimeType = string.Empty; 
     string filename = string.Empty; 
     if (!string.IsNullOrEmpty(id)) 
     { 
      //get the headers for the file being sent back to the user 
      using (var myConn = new SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["PortalBetaConnectionString"].ConnectionString)) 
      { 
       using (var myCmd = new SqlCommand("ReadLargeFileInfo", myConn)) 
       { 
        myCmd.CommandType = System.Data.CommandType.StoredProcedure; 

        var pIdentifier = new SqlParameter("@Identifier", id); 

        myCmd.Parameters.Add(pIdentifier); 

        myConn.Open(); 

        var dataReader = myCmd.ExecuteReader(); 

        if (dataReader.HasRows) 
        { 
         while (dataReader.Read()) 
         { 
          mimeType = dataReader.GetString(0); 
          filename = dataReader.GetString(1); 
         } 
        } 
       } 
      } 


      var result = new HttpResponseMessage() 
      { 
       Content = new PushStreamContent(async (outputStream, httpContent, transportContext) => 
       { 
        //pull the data back from the db and stream the data back to the user 
        await WriteDataChunksFromDBToStream(outputStream, httpContent, transportContext, id); 
       }), 
       StatusCode = HttpStatusCode.OK 
      }; 

      result.Content.Headers.ContentType = new MediaTypeHeaderValue(mimeType);// "application/octet-stream"); 
      result.Content.Headers.ContentDisposition = new ContentDispositionHeaderValue("attachment") { FileName = filename }; 

      return result; 
     } 

     return new HttpResponseMessage(HttpStatusCode.BadRequest); 
    } 

    private async Task WriteDataChunksFromDBToStream(Stream responseStream, HttpContent httpContent, TransportContext transportContext, string fileIdentifier) 
    { 
     // PushStreamContent requires the responseStream to be closed 
     // for signaling it that you have finished writing the response. 
     using (responseStream) 
     { 
      using (var myConn = new SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["PortalBetaConnectionString"].ConnectionString)) 
      { 
       await myConn.OpenAsync(); 

       //stored proc to pull the data back from the db 
       using (var myCmd = new SqlCommand("ReadAttachmentChunks", myConn)) 
       { 
        myCmd.CommandType = System.Data.CommandType.StoredProcedure; 

        var fileName = new SqlParameter("@Identifier", fileIdentifier); 

        myCmd.Parameters.Add(fileName); 

        // The reader needs to be executed with the SequentialAccess behavior to enable network streaming 
        // Otherwise ReadAsync will buffer the entire BLOB into memory which can cause scalability issues or even OutOfMemoryExceptions 
        using (var reader = await myCmd.ExecuteReaderAsync(CommandBehavior.SequentialAccess)) 
        { 
         while (await reader.ReadAsync()) 
         { 
          //confirm the column that has the binary data of the file returned is not null 
          if (!(await reader.IsDBNullAsync(0))) 
          { 
           //read the binary data of the file into a stream 
           using (var data = reader.GetStream(0)) 
           { 
            // Asynchronously copy the stream from the server to the response stream 
            await data.CopyToAsync(responseStream); 
            await data.FlushAsync(); 
           } 
          } 
         } 
        } 
       } 
      } 
     }// close response stream 
    }

来源

2016-11-23 14:00:01 JakeHova

呃。这很讨厌。通过上传，您必须确保

将头部与内容部分分开 - 您必须遵循HTTP的要求RFC文档。
允许分块传输
当然，内容部分（除非您正在传输文本）将被二进制编码为字符串。
允许压缩的传输，即GZIP或DEFLATE。
也许 - 只是也许 - 考虑到编码（ASCII，Unicode，UTF8等）。

你不能真正确保你在不查看所有这些的情况下坚持正确的信息到数据库。对于后面的项目，所有关于做什么的元数据将位于标题的某处，因此它不仅仅是一次性的。

来源

2016-11-15 19:42:29

是不是讨厌的，因为有一个更好的方法来做到这一点，或者因为你一般不这样做？每次我以前做过大文件传输时，我都会将其写入磁盘并锁定文件存储区，所以我从来没有处理过这样的事情，所以请原谅我，如果我所做的只是愚蠢的。需求是我需要将大文件存储在数据库中（无法使用FileStream），我需要在它到达之前对其进行加密，同时保持较低的内存占用。这个分块的想法是我能想到的唯一方法。 – JakeHova

令人讨厌的部分是，你不能在框架中使用东西来为你做所有肮脏的工作。也许你可以在Nuget上找到第三方的http客户端库，查看源代码，看看它是如何在破解上传流的情况下发挥它的魔力的。 –

看来我现在唯一的问题是请求头包含在我写入数据库的时候。我试过用手工剥离标题的各种方法，通过利用边界值来检测标题的结束位置，但是1）它不适用于非文本文件2）它感觉像这样一个骇人而脆弱的解决方案，应该是什么是一个简单的问题。有关如何在写出标题之前删除标题的想法？ – JakeHova

自定义MultipartFormDataStreamProvider上传后通过WebApi从SQL下载大文件

回答

相关问题