2017-06-30 46 views
1

我需要从包含400GB文档的户外网站下载所有文档。 下面的代码可以创建一个小的zip文件(大约1GB),否则会占用太多内存。 我不想在内存中保留ZipOutputStream,我只想将内存仅用于复制到Zip文件的每个文档,或者使用为每个文档覆盖的临时文件。如何在java中创建大小为400 GB的zip文件

这种问题的最佳做法是什么?

这段代码是从叫我的主:

FolderImpl sitoFolder = (FolderImpl) cmisObject; 

List<Tree<FileableCmisObject>> sitoFolderDescendants = sitoFolder.getDescendants(-1); 

byte[] zipFile = createZipFILE(sitoFolderDescendants); 
String rootPath = cartella_download_file; 
File dir = new File(rootPath + File.separator); 
if (!dir.exists()) { 
    dir.mkdirs(); 
} 
Date date = new Date(); 
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); 
String stringDate = sdf.format(date); 
String nameZipFile = sitoFolder.getName().replaceAll("\\s",""); 
File serverFile = new File(dir.getAbsolutePath() + File.separator + stringDate+"_"+nameZipFile+".zip"); 
BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(serverFile)); 
IOUtils.write(zipFile, bufferedOutputStream); 
bufferedOutputStream.close(); 

//Returns the zip file 
private byte[] createZipFILE(List<Tree<FileableCmisObject>> list) throws IOException { 
    ByteArrayOutputStream baos = new ByteArrayOutputStream(); 
    ByteTransform byteTransform = new ByteTransform(); 
    try { 
     ReportDocument reportDocument = new ReportDocument(); 
     ZipOutputStream zos = new ZipOutputStream(baos); 
     for (Tree<FileableCmisObject> aList : list) { 
      traverseTree(aList, zos, reportDocument); 
     } 
     zos.close(); 
     return baos.toByteArray(); 
    } catch (IOException exc) { 
     reportLog.error(exc.getMessage()); 
    } finally { 
     baos.close(); 
    } 
    return new byte[0]; 
} 

private void traverseTree(Tree<FileableCmisObject> tree, ZipOutputStream zos, ReportDocument reportDocument) { 
    for (int i=0; i<tree.getChildren().size(); i++) { 
     Tree<FileableCmisObject> child = tree.getChildren().get(i); 
     if (CmisUtil.isDocument(child.getItem())) { 
      Document document = (Document) child.getItem(); 
      try { 
       addToZipFile(document, zos); 
      } catch (IOException ioExc) { 
       appLog.error(ioExc.getMessage()); 
      } 
     } else if(CmisUtil.isFolder(child.getItem())) { 
      Folder folder = (Folder) child.getItem(); 
      if (folder.getChildren().getTotalNumItems() == 0) { 
       try { 
        addToZipFolder(folder, zos); 
       } catch (IOException ioExc) { 
        appLog.error(ioExc.getMessage()); 
       } 
      } 
     } 
     traverseTree(child, zos, reportDocument); 
    } 
} 

//Service method to add documents to the zip file 
private void addToZipFile(Document document, ZipOutputStream zos) throws IOException { 
    InputStream inputStream = document.getContentStream().getStream(); 
    String path = document.getPaths().get(0).replace(sito_export_path, "");  
    ZipEntry zipEntry = new ZipEntry(path); 
    zos.putNextEntry(zipEntry); 
    IOUtils.copy(inputStream, zos, 1024); 
    inputStream.close(); 
    zos.closeEntry(); 
} 

//Service method to add empty folder to the zip file 
private void addToZipFolder(Folder folder, ZipOutputStream zos) throws IOException { 
    String path = folder.getPaths().get(0).replace(sito_export_path, ""); 
    ZipEntry zipEntry = new ZipEntry(path.concat("/")); 
    zos.putNextEntry(zipEntry); 
} 
+5

首先,确保您的ZIP库支持64位ZIP文件。总的来说,这听起来像一个坏主意。 –

+3

为什么你可能想在一个zip文件中放入近半个太字节?对我来说,这似乎是一个可怕的想法,不仅从服务器负载的角度来看,而且在下载或传输它时会带来一些带宽和时间。 –

+0

因为它是一个请求...并且此过程由夜间批次调用。 是一个将被调用一次的过程 –

回答

2

我解决了它。我首先在服务器上创建了一个目录,然后直接在这个目录下创建了zip文件。

错误是先保存所有文件:ByteArrayOutputStream然后在zip文件中。

File serverFile = new File(dir.getAbsolutePath() + File.separator + stringDate+"_"+nameZipFile+".zip"); 
FileOutputStream fileOutputStream = new FileOutputStream(serverFile); 
ZipArchiveOutputStream zos = new ZipArchiveOutputStream(fileOutputStream); 
for (Tree<FileableCmisObject> aList : sitoFolderDescendants) { 
    traverseTree(aList, zos, reportDocument); 
} 
zos.close(); 

在finally块中,我关闭了FileOutputStream。 比我更改服务方法使用:ZipArchiveOutputStreamZipArchiveEntry

private void addToZipFolder(Folder folder, ZipArchiveOutputStream zos) throws IOException { 
    String path = folder.getPaths().get(0).replace(sito_export_path, ""); 
    ZipArchiveEntry zipEntry = new ZipArchiveEntry(path.concat("/")); 
    appLog.info("aggiungo cartella vuota "+folder.getName()+" al file zip"); 
    zos.putArchiveEntry(zipEntry); 
    zos.closeArchiveEntry(); 
} 

private void addToZipFile(Document document, ZipArchiveOutputStream zos) throws IOException { 
    InputStream inputStream = document.getContentStream().getStream(); 
    String path = document.getPaths().get(0).replace(sito_export_path, ""); 
    ZipArchiveEntry entry = new ZipArchiveEntry(path); 
    entry.setSize(document.getContentStreamLength()); 
    zos.putArchiveEntry(entry); 
    byte buffer[] = new byte[1024]; 
    while (true) { 
     int nRead = inputStream.read(buffer, 0, buffer.length); 
     if (nRead <= 0) { 
      break; 
     } 
     zos.write(buffer, 0, nRead); 
    } 
    inputStream.close(); 
    zos.closeArchiveEntry(); 
} 
1

其实我已经创建downlod为露天3.4.d版本的拉链功能,并用于以下code.i没有检查它GB的文件,因为我没有那么多的数据,这可能对你有帮助。

这是Java Backed WebScript。

/* 
* this class create a zip file base on given(parameter) node 
* */ 
public class ZipContents extends AbstractWebScript { 
    private static Log logger = LogFactory.getLog(ZipContents.class); 

    private static final int BUFFER_SIZE = 1024; 

    private static final String MIMETYPE_ZIP = "application/zip"; 
    private static final String TEMP_FILE_PREFIX = "alf"; 
    private static final String ZIP_EXTENSION = ".zip"; 

    private ContentService contentService; 
    private NodeService nodeService; 
    private NamespaceService namespaceService; 
    private DictionaryService dictionaryService; 
    private StoreRef storeRef; 
    private String encoding; 

    public void setNodeService(NodeService nodeService) { 
     this.nodeService = nodeService; 
    } 

    public void setContentService(ContentService contentService) { 
     this.contentService = contentService; 
    } 

    public void setNamespaceService(NamespaceService namespaceService) { 
     this.namespaceService = namespaceService; 
    } 

    public void setDictionaryService(DictionaryService dictionaryService) { 
     this.dictionaryService = dictionaryService; 
    } 

    public void setStoreUrl(String url) { 
     this.storeRef = new StoreRef(url); 
    } 

    public void setEncoding(String encoding) { 
     this.encoding = encoding; 
    } 

    public void execute(WebScriptRequest req, WebScriptResponse res) throws IOException { 

     String nodes = req.getParameter("nodes"); 
     if (nodes == null || nodes.length() == 0) { 
      throw new WebScriptException(HttpServletResponse.SC_BAD_REQUEST, "nodes"); 
     } 

     List<String> nodeIds = new ArrayList<String>(); 
     StringTokenizer tokenizer = new StringTokenizer(nodes, ","); 
     if (tokenizer.hasMoreTokens()) { 
      while (tokenizer.hasMoreTokens()) { 
       nodeIds.add(tokenizer.nextToken()); 
      } 
     } 

     String filename = req.getParameter("filename"); 
     if (filename == null || filename.length() == 0) { 
      throw new WebScriptException(HttpServletResponse.SC_BAD_REQUEST, "filename"); 
     } 

     String noaccentStr = req.getParameter("noaccent"); 
     if (noaccentStr == null || noaccentStr.length() == 0) { 
      throw new WebScriptException(HttpServletResponse.SC_BAD_REQUEST, "noaccent"); 
     } 

     try { 
      res.setContentType(MIMETYPE_ZIP); 
      res.setHeader("Content-Transfer-Encoding", "binary"); 
      res.addHeader("Content-Disposition", "attachment;filename=\"" + unAccent(filename) + ZIP_EXTENSION + "\""); 

      res.setHeader("Cache-Control", "must-revalidate, post-check=0, pre-check=0"); 
      res.setHeader("Pragma", "public"); 
      res.setHeader("Expires", "0"); 

      createZipFile(nodeIds, res.getOutputStream(), new Boolean(noaccentStr)); 
     } catch (RuntimeException e) { 
      throw new WebScriptException(HttpServletResponse.SC_BAD_REQUEST, e.getMessage()); 
     } 
    } 

    public void createZipFile(List<String> nodeIds, OutputStream os, boolean noaccent) throws IOException { 
     File zip = null; 

     try { 
      if (nodeIds != null && !nodeIds.isEmpty()) { 
       zip = TempFileProvider.createTempFile(TEMP_FILE_PREFIX, ZIP_EXTENSION); 
       FileOutputStream stream = new FileOutputStream(zip); 
       CheckedOutputStream checksum = new CheckedOutputStream(stream, new Adler32()); 
       BufferedOutputStream buff = new BufferedOutputStream(checksum); 
       ZipArchiveOutputStream out = new ZipArchiveOutputStream(buff); 
       out.setEncoding(encoding); 
       out.setMethod(ZipArchiveOutputStream.DEFLATED); 
       out.setLevel(Deflater.BEST_COMPRESSION); 

       if (logger.isDebugEnabled()) { 
        logger.debug("Using encoding '" + encoding + "' for zip file."); 
       } 

       try { 
        for (String nodeId : nodeIds) { 
         NodeRef node = new NodeRef(storeRef, nodeId); 
         addToZip(node, out, noaccent, ""); 
        } 
       } catch (Exception e) { 
        logger.error(e.getMessage(), e); 
        throw new WebScriptException(HttpServletResponse.SC_BAD_REQUEST, e.getMessage()); 
       } finally { 
        out.close(); 
        buff.close(); 
        checksum.close(); 
        stream.close(); 

        if (nodeIds.size() > 0) { 
         InputStream in = new FileInputStream(zip); 
         try { 
          byte[] buffer = new byte[BUFFER_SIZE]; 
          int len; 

          while ((len = in.read(buffer)) > 0) { 
           os.write(buffer, 0, len); 
          } 
         } finally { 
          IOUtils.closeQuietly(in); 
         } 
        } 
       } 
      } 
     } catch (Exception e) { 
      logger.error(e.getMessage(), e); 
      throw new WebScriptException(HttpServletResponse.SC_BAD_REQUEST, e.getMessage()); 
     } finally { 
      // try and delete the temporary file 
      if (zip != null) { 
       zip.delete(); 
      } 
     } 
    } 

    public void addToZip(NodeRef node, ZipArchiveOutputStream out, boolean noaccent, String path) throws IOException { 
     QName nodeQnameType = this.nodeService.getType(node); 

     // Special case : links 
     if (this.dictionaryService.isSubClass(nodeQnameType, ApplicationModel.TYPE_FILELINK)) { 
      NodeRef linkDestinationNode = (NodeRef) nodeService.getProperty(node, ContentModel.PROP_LINK_DESTINATION); 
      if (linkDestinationNode == null) { 
       return; 
      } 

      // Duplicate entry: check if link is not in the same space of the 
      // link destination 
      if (nodeService.getPrimaryParent(node).getParentRef().equals(nodeService.getPrimaryParent(linkDestinationNode).getParentRef())) { 
       return; 
      } 

      nodeQnameType = this.nodeService.getType(linkDestinationNode); 
      node = linkDestinationNode; 
     } 

     String nodeName = (String) nodeService.getProperty(node, ContentModel.PROP_NAME); 
     nodeName = noaccent ? unAccent(nodeName) : nodeName; 

     if (this.dictionaryService.isSubClass(nodeQnameType, ContentModel.TYPE_CONTENT)) { 
      ContentReader reader = contentService.getReader(node, ContentModel.PROP_CONTENT); 
      if (reader != null) { 
       InputStream is = reader.getContentInputStream(); 

       String filename = path.isEmpty() ? nodeName : path + '/' + nodeName; 



       ZipArchiveEntry entry = new ZipArchiveEntry(filename); 
       entry.setTime(((Date) nodeService.getProperty(node, ContentModel.PROP_MODIFIED)).getTime()); 

       entry.setSize(reader.getSize()); 
       out.putArchiveEntry(entry); 

       byte buffer[] = new byte[BUFFER_SIZE]; 
       while (true) { 
        int nRead = is.read(buffer, 0, buffer.length); 
        if (nRead <= 0) { 
         break; 
        } 

        out.write(buffer, 0, nRead); 
       } 
       is.close(); 
       out.closeArchiveEntry(); 
      } else { 
       logger.warn("Could not read : " + nodeName + "content"); 
      } 
     } else if (this.dictionaryService.isSubClass(nodeQnameType, ContentModel.TYPE_FOLDER) 
       && !this.dictionaryService.isSubClass(nodeQnameType, ContentModel.TYPE_SYSTEM_FOLDER)) { 
      List<ChildAssociationRef> children = nodeService.getChildAssocs(node); 
      if (children.isEmpty()) { 

       String folderPath = path.isEmpty() ? nodeName + '/' : path + '/' + nodeName + '/'; 
       ZipArchiveEntry entry = new ZipArchiveEntry(folderPath); 
       entry.setSize(0); 
       entry.setTime(((Date) nodeService.getProperty(node, ContentModel.PROP_MODIFIED)).getTime()); 
       out.putArchiveEntry(entry); 
       out.closeArchiveEntry(); 

      } else { 
       for (ChildAssociationRef childAssoc : children) { 
        NodeRef childNodeRef = childAssoc.getChildRef(); 
        addToZip(childNodeRef, out, noaccent, path.isEmpty() ? nodeName : path + '/' + nodeName); 
       } 
      } 
     } else { 
      logger.info("Unmanaged type: " + nodeQnameType.getPrefixedQName(this.namespaceService) + ", filename: " + nodeName); 
     } 
    } 



    /** 
    * ZipEntry() does not convert filenames from Unicode to platform (waiting 
    * Java 7) http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4244499 
    * 
    * @param s 
    * @return 
    */ 
    public static String unAccent(String s) { 
     String temp = Normalizer.normalize(s, Normalizer.NFD, 0); 
     return temp.replaceAll("[^\\p{ASCII}]", ""); 
    } 

} 
相关问题