2016-02-23 56 views
0

当我尝试访问HTML单元中的txt文件时,出现以下错误。Java堆内存错误HTMLuint

代码:

public class LogFileAutomation { 
private static Logger logger = Logger.getLogger(LogFileAutomation.class); 

public static void main(String[] args) { 
    Properties prop = new Properties(); 
    InputStream input = null; 
    try { 
     input = new FileInputStream("H:\\config.properties"); 
    } catch (Exception e) { 
     logger.error("Error in loading the external config file", e); 
    } 
    try { 
     String readURL = null; 
     String readfolderPath = null; 
     String readlogFileHref = null; 
     String loginURL = null; 

     if (input == null) { 
      System.out.println("Loading config file from classPath.."); 
      input = LogFileAutomation.class.getClassLoader() 
        .getResourceAsStream("config.properties"); 

     } else { 
      System.out.println("Loading config file from external Path.."); 
     } 
     // load a properties file 
     prop.load(input); 
     String version = prop.getProperty("version"); 

     // get the property value 
     if (version.equals("1")) { 
      System.out.println("getting into version 1"); 
      readURL = prop.getProperty("URLS"); 
      readfolderPath = prop.getProperty("folderPath"); 
      readlogFileHref = prop.getProperty("logFileHref"); 
      loginURL = prop.getProperty("oldLogin"); 
     } else { 
      System.out.println("getting into version 2"); 
      readURL = prop.getProperty("NEWURLS"); 
      readfolderPath = prop.getProperty("newfolderPath"); 
      readlogFileHref = prop.getProperty("newlogFileHref"); 
      loginURL = prop.getProperty("newLogin"); 
     } 
     String URLS[] = readURL.split(","); 
     String folderPath[] = readfolderPath.split(","); 
     String logFileHref[] = readlogFileHref.split(","); 

     final WebClient webClient = new WebClient(); 
     webClient.setJavaScriptTimeout(10000); 

     HtmlPage page = null; 

     System.out.println("Please enter the Login details below."); 

     /** 
     * Getting user details. 
     */ 

     System.out.println("Username:"); 
     Scanner scanner = new Scanner(System.in); 
     String username = scanner.next(); 
     String password = new LogFileAutomation().getPassword(); 

     page = webClient.getPage(loginURL); 

     HtmlForm form = page.getFormByName("PWChange"); 

     HtmlSubmitInput button = form.getInputByName("BUTTON"); 
     HtmlTextInput usernameHTMLInput = form.getInputByName("USER"); 

     /** 
     * Password got assigned 
     */ 
     Page passwordHTMLInput = form.getInputByName("PASSWORD") 
       .setValueAttribute(password.trim()); 

     /** 
     * User name got assigned 
     */ 
     usernameHTMLInput.setValueAttribute(username); 

     webClient.setRefreshHandler(new ThreadedRefreshHandler()); 
     webClient.getOptions().setUseInsecureSSL(true); 
     HtmlPage page2 = button.click(); 

     DomNodeList<DomElement> statusTable = page2 
       .getElementsByTagName("table"); 

     String pageTitle = page2.getTitleText(); 

     /** 
     * Checking that we landed in correct scraping page. 
     */ 
     if (!pageTitle.equals("Corporate Systems ESS Password Services")) { 
      logPageContent(webClient, statusTable, logFileHref[0], 
        folderPath[0], version); 

      for (int r = 0; r < URLS.length; r++) { 
       HtmlPage page1 = webClient.getPage(URLS[r].toString()); 

       DomNodeList<DomElement> logTable = page1 
         .getElementsByTagName("table"); 

       logPageContent(webClient, logTable, logFileHref[r + 1], 
         folderPath[r + 1], version); 

      } 
     } else { 
      System.out 
        .println("Please enter correct username and password.."); 
     } 
    } catch (IOException e) { 
     System.out.println("IOException occured in Main method: " + e); 
     saveLogException(e, "IOException occured in Main method: "); 
    } catch (FailingHttpStatusCodeException se) { 
     System.out 
       .println("FailingHttpStatusCodeException occured in Main method: " 
         + se.getMessage()); 
     saveLogException(se, 
       "FailingHttpStatusCodeException occured in Main method: "); 
    } catch (Exception e) { 
     System.out.println("Generic exception occured in Main method: " 
       + e.getMessage()); 
     saveLogException(e, "Generic exception occured in Main method: "); 
    } finally { 
     if (input != null) { 
      try { 
       input.close(); 
      } catch (IOException e) { 
       System.out 
         .println("IOException occured in finally try catch Main method: " 
           + e.getMessage()); 
       saveLogException(e, 
         "IOException occured in finally try catch Main method: "); 
      } 
     } 
    } 

} 

/** 
* This method is to get the log page content to scrap the information.. 
* 
* @param webClient 
*   - contains the webClient. 
* @param logTable 
*   - contains the log page table HTML information. 
* @param logFileHref 
*   - contains the log file HREF link from where we have to down 
*   load the file. 
* @param folderPath 
*   - contains the folder path where we have to down load the 
*   file. 
*/ 
private static void logPageContent(WebClient webClient, 
     DomNodeList<DomElement> logTable, String logFileHref, 
     String folderPath, String version) { 
    System.out.println("version"+version); 
    try { 
     for (int k = 0; k < logTable.size(); k++) { 
      /* 
      * logPageContent(webClient, statusTable, k, folderPath[0], 
      * logFileHref[0]); 
      */ 

      DomNodeList<HtmlElement> statusTable1 = logTable.get(k) 
        .getElementsByTagName("tr"); 

      for (int u = 0; u < statusTable1.size(); u++) { 
       DomNodeList<HtmlElement> statusTD = statusTable1.get(u) 
         .getElementsByTagName("td"); 

       for (int i = 0; i < statusTD.size(); i++) { 
        Iterable<DomElement> innerContent = statusTD.get(i) 
          .getChildElements(); 

        String fileModifiedDate[] = statusTD.get(1) 
          .getTextContent().split(" "); 
        SimpleDateFormat df = new SimpleDateFormat(
          "dd-MMM-yyyy"); 
        df.setTimeZone(TimeZone.getTimeZone("EST")); 
        Date date = new Date(); 
        String currentDate = df.format(date); 
        if (fileModifiedDate[0].equals(currentDate)) { 
         for (DomElement y : innerContent) { 

          if (y.getTagName().equals("a")) { 
           if (version.equals("1")) { 
            String filnameStartsWith = "sp"; 
            String filnameStartsWith1 = "sy"; 
            String filnameStartsWith2 = "na"; 
            String filnameStartsWith3 = "impaqsws2RISC"; 
            if (filnameStartsWith.startsWith(y 
              .getTextContent().substring(0, 
                2)) 
              || filnameStartsWith1 
                .startsWith(y 
                  .getTextContent() 
                  .toLowerCase() 
                  .substring(0, 2)) 
              || filnameStartsWith2 
                .startsWith(y 
                  .getTextContent() 
                  .substring(0, 2)) 
              || filnameStartsWith3 
                .startsWith(y 
                  .getTextContent() 
                  .substring(0, 2))) { 

             TextPage page1 = webClient 
               .getPage(logFileHref 
                 + y.getAttributeNode(
                   "href") 
                   .getNodeValue()); 
             System.out 
               .println("Downlaoding: " 
                 + logFileHref 
                 + y.getAttributeNode(
                   "href") 
                   .getNodeValue()); 
             exportFile(page1, 
               y.getTextContent(), 
               folderPath); 
            } 
           } else { 
            String filnameStartsWith = "sp"; 
            String filnameStartsWith1 = "sy"; 
            String filnameStartsWith2 = "na"; 
            String filnameStartsWith3 = "impaqsws2RISC"; 
            String filnameStartsWith4 = "id"; 
            if (filnameStartsWith.startsWith(y 
              .getTextContent().substring(0, 
                2)) 
              || filnameStartsWith1 
                .startsWith(y 
                  .getTextContent() 
                  .toLowerCase() 
                  .substring(0, 2)) 
              || filnameStartsWith2 
                .startsWith(y 
                  .getTextContent() 
                  .substring(0, 2)) 
              || filnameStartsWith3 
                .startsWith(y 
                  .getTextContent() 
                  .substring(0, 2)) 
              || filnameStartsWith4 
                .startsWith(y 
                  .getTextContent() 
                  .substring(0, 2))) { 

             TextPage page1 = webClient 
               .getPage(logFileHref 
                 + y.getAttributeNode(
                   "href") 
                   .getNodeValue()); 
             System.out 
               .println("Downlaoding: " 
                 + logFileHref 
                 + y.getAttributeNode(
                   "href") 
                   .getNodeValue()); 
             exportFile(page1, 
               y.getTextContent(), 
               folderPath); 
            } 
           } 

          } 

         } 
        } 

       } 
      } 
      if (folderPath.contains("metintugpar0635")) { 
       TextPage page1 = webClient 
         .getPage("https://****/view/ezspprodportal1/ezspPROD/WebSphere_Portal/SystemOut.log"); 
       exportFile(page1, "SystemOut.log", folderPath); 
       TextPage page2 = webClient 
         .getPage("https://****/view/ezspprodportal1/ezspPROD/WebSphere_Portal/SystemErr.log"); 
       exportFile(page2, "SystemErr.log", folderPath); 
      } 
      if (folderPath.contains("metintugpar0660")) { 
       TextPage page1 = webClient 
         .getPage("https://*****/view/ezspprodportal2/ezspPROD/WebSphere_Portal2/SystemOut.log"); 
       exportFile(page1, "SystemOut.log", folderPath); 
       TextPage page2 = webClient 
         .getPage("https://*****/view/ezspprodportal2/ezspPROD/WebSphere_Portal2/SystemErr.log"); 
       exportFile(page2, "SystemErr.log", folderPath); 
      } 
     } 
    } catch (IOException se) { 
     System.out.println("IOException occured in logPageContent method: " 
       + se.getMessage()); 
     saveLogException(se, 
       "IOException occured in logPageContent method: "); 
    } catch (Exception e) { 
     System.out 
       .println("Generic exception occured in logPageContent method: " 
         + e.getMessage()); 
     saveLogException(e, 
       "Generic exception occured in logPageContent method: "); 
    } 
} 

/** 
* Export the file to local path. 
* 
* @param page1 
*   - contains the page information 
* @param filname 
*   - contains the filename in which the page content should be 
*   saved. 
* @param folderPath 
*   - contains the folderPath information. 
* @throws IOException 
*/ 
private static void exportFile(TextPage page1, String filname, 
     String folderPath) throws IOException { 
    try { 
     if (filname == null) { 
      filname = "logs"; 
     } 
     File dir = new File(folderPath); 
     if (!dir.exists()) { 
      System.out.println("creating directory: " + folderPath); 
      boolean result = false; 

      dir.mkdirs(); 
      result = true; 

      if (result) { 
       System.out.println("DIR got created."); 
      } 

     } 
     if (filname.endsWith(".log")) { 
      filname = filname.substring(0, filname.length() - 4); 
     } 
     File file = new File(folderPath + "\\" + filname + ".txt"); 

     // if file doesnt exists, then create it if 
     file.createNewFile(); 

     FileWriter fw = new FileWriter(file.getAbsoluteFile()); 
     BufferedWriter bw = new BufferedWriter(fw); 

     bw.write(page1.getContent()); 
     bw.close(); 

     System.out.println("Done dowloading the file : " + filname); 
    } catch (SecurityException se) { 
     System.out 
       .println("Security exception occured in exportFile method: " 
         + se.getMessage()); 
     saveLogException(se, 
       "Security exception occured in exportFile method: "); 
    } catch (Exception e) { 
     System.out 
       .println("Generic exception occured in exportFile method: " 
         + e.getMessage()); 
     saveLogException(e, 
       "Generic exception occured in exportFile method: "); 
    } 

} 

/** 
* save the error information into the log file. 
* 
* @param e 
*   - contains the error information. 
*/ 
private static void saveLogException(Exception e, String errorMessage) { 
    logger.error(errorMessage, e); 

} 

public String getPassword() { 
    Console console = System.console(); 
    if (console == null) { 
     System.out.println("Couldn't get Console instance"); 
     System.exit(0); 
    } 

    char passwordArray[] = console.readPassword("Password: "); 
    String password = new String(passwordArray); 
    return password; 
} 

}

错误:

Exception in thread "main" java.lang.OutOfMemoryError: Java heap space 
    at java.util.Arrays.copyOf(Unknown Source) 
    at java.lang.AbstractStringBuilder.expandCapacity(Unknown Source) 
    at java.lang.AbstractStringBuilder.append(Unknown Source) 
    at java.lang.StringBuilder.append(Unknown Source) 
    at org.apache.commons.io.output.StringBuilderWriter.write(StringBuilderW 
    riter.java:138) 
    at org.apache.commons.io.IOUtils.copyLarge(IOUtils.java:1681) 
    at org.apache.commons.io.IOUtils.copyLarge(IOUtils.java:1659) 
    at org.apache.commons.io.IOUtils.copy(IOUtils.java:1636) 
    at org.apache.commons.io.IOUtils.copy(IOUtils.java:1611) 
    at org.apache.commons.io.IOUtils.toString(IOUtils.java:668) 
    at com.gargoylesoftware.htmlunit.WebResponse.getContentAsString(WebRespo 
    nse.java:224) 
    at com.gargoylesoftware.htmlunit.WebResponse.getContentAsString(WebRespo 
    nse.java:195) 
    at com.gargoylesoftware.htmlunit.TextPage.<init>(TextPage.java:42) 
    at com.gargoylesoftware.htmlunit.DefaultPageCreator.createTextPage(Defau 
    ltPageCreator.java:304) 
    at com.gargoylesoftware.htmlunit.DefaultPageCreator.createPage(DefaultPa 
    geCreator.java:170) 
    at com.gargoylesoftware.htmlunit.WebClient.loadWebResponseInto(WebClient 
    .java:468) 
    at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:342) 
    at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:407) 
    at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:392) 
    at LogFileAutomation.logPageContent(LogFileAutomation.java:289) 

回答

0

看来你尝试读取日志文件之一是太大,无法加载到内存中。

TextPage page1 = webClient 
        .getPage("https://****/view/ezspprodportal1/ezspPROD/WebSphere_Portal/SystemOut.log");