2011-04-09 39 views
1

我想使用SPARQL查询链接电影数据库的本地版本。该文件采用N-Triples格式,其大小约为450mb。我正在使用servlet来实现。现在,当我通过查询,大约需要超过五分钟的servlet来处理它,并在年底,我得到以下异常:无法查询链接电影数据库的本地版本

type Exception report 

message 

description The server encountered an internal error() that prevented it from fulfilling this request. 

exception 

javax.servlet.ServletException: Servlet execution threw an exception 


root cause 

java.lang.OutOfMemoryError: Java heap space 
    java.util.Arrays.copyOfRange(Arrays.java:3209) 
    java.lang.String.<init>(String.java:215) 
    java.lang.StringBuilder.toString(StringBuilder.java:430) 
    org.openjena.riot.tokens.TokenizerText.allBetween(TokenizerText.java:732) 
    org.openjena.riot.tokens.TokenizerText.parseToken(TokenizerText.java:152) 
    org.openjena.riot.tokens.TokenizerText.hasNext(TokenizerText.java:69) 
    org.openjena.atlas.iterator.PeekIterator.fill(PeekIterator.java:37) 
    org.openjena.atlas.iterator.PeekIterator.next(PeekIterator.java:77) 
    org.openjena.riot.lang.LangBase.nextToken(LangBase.java:145) 
    org.openjena.riot.lang.LangNTriples.parseOne(LangNTriples.java:59) 
    org.openjena.riot.lang.LangNTriples.parseOne(LangNTriples.java:21) 
    org.openjena.riot.lang.LangNTuple.runParser(LangNTuple.java:58) 
    org.openjena.riot.lang.LangBase.parse(LangBase.java:75) 
    org.openjena.riot.system.JenaReaderNTriples2.readWorker(JenaReaderNTriples2.java:28) 
    org.openjena.riot.system.JenaReaderRIOT.readImpl(JenaReaderRIOT.java:124) 
    org.openjena.riot.system.JenaReaderRIOT.read(JenaReaderRIOT.java:79) 
    com.hp.hpl.jena.rdf.model.impl.ModelCom.read(ModelCom.java:226) 
    com.hp.hpl.jena.util.FileManager.readModelWorker(FileManager.java:395) 
    com.hp.hpl.jena.util.FileManager.loadModelWorker(FileManager.java:299) 
    com.hp.hpl.jena.util.FileManager.loadModel(FileManager.java:250) 
    ServletExample.runQuery(ServletExample.java:92) 
    ServletExample.doGet(ServletExample.java:62) 
    javax.servlet.http.HttpServlet.service(HttpServlet.java:627) 
    javax.servlet.http.HttpServlet.service(HttpServlet.java:729) 


note The full stack trace of the root cause is available in the Apache Tomcat/5.5.31 logs. 

我的代码是:

import java.io.IOException; 
import java.io.PrintWriter; 

import javax.servlet.ServletException; 
import javax.servlet.http.*; 

import com.hp.hpl.jena.query.*; 
import com.hp.hpl.jena.rdf.model.*; 
import com.hp.hpl.jena.util.FileManager; 

public class ServletExample 
    extends HttpServlet 
{ 
    /***********************************/ 
    /* Constants      */ 
    /***********************************/ 

    private static final long serialVersionUID = 1L; 

    public static final String SPARQL_ENDPOINT = "http://data.linkedmdb.org/sparql"; 

    public static final String QUERY ="PREFIX m: <http://data.linkedmdb.org/resource/movie/>" 
+"SELECT DISTINCT ?actorName WHERE {"+ 
"?dir1 m:director_name \"Sofia Coppola\"."+ 
"?dir2 m:director_name \"Francis Ford Coppola\"."+ 
"?dir1film m:director ?dir1;"+ 
"m:actor ?actor."+ 
"?dir2film m:director ?dir2;"+ 
"m:actor ?actor."+ 
"?actor m:actor_name ?actorName."+ 
"}"; 
     /*"PREFIX m: <http://data.linkedmdb.org/resource/movie/>\n" + 
      "SELECT DISTINCT ?actorName WHERE {\n" + 
      " ?dir1  m:director_name %dir_name_1%.\n" + 
      " ?dir2  m:director_name %dir_name_2%.\n" + 
      " ?dir1film m:director ?dir1;\n" + 
      "   m:actor ?actor.\n" + 
      " ?dir2film m:director ?dir2;\n" + 
      "   m:actor ?actor.\n" + 
      " ?actor m:actor_name ?actorName.\n" + 
      "}\n" + 
      "";*/ 

    private static final String HEADER = "<html>\n" + 
      "  <head>\n" + 
      "  <title>results</title>\n" + 
      "   <link href=\"simple.css\" type=\"text/css\" rel=\"stylesheet\" />\n" + 
      "  </head>\n" + 
      "  <body>\n" + 
      ""; 

    private static final String FOOTER = "</body></html>"; 

    /** 
    * Respond to HTTP GET request. Will need to be mounted against some URL 
    * pattern in web.xml 
    */ 
    @Override 
    protected void doGet(HttpServletRequest req, HttpServletResponse resp) 
     throws ServletException, IOException 
    { 
     String dir1 = req.getParameter("dir1");//"Sofia"; 
     String dir2 = req.getParameter("dir2");//"Francis Ford Coppola"; 
     //String dir1 = "Sofia"; 
     //String dir2 = "Francis Ford Coppola"; 
     if (dir1 == null || dir2 == null || dir1.isEmpty() || dir2.isEmpty()) { 
      noInput(resp); 
     } 
     else { 
      runQuery(resp, dir1, dir2); 
     } 
    } 

    protected void noInput(HttpServletResponse resp) 
     throws IOException 
    { 
     header(resp); 
     resp.getWriter().println("<p>Please select director names as query params <code>dir1</code> and <code>dir2</code></p>"); 
     footer(resp); 
    } 

    protected void footer(HttpServletResponse resp) throws IOException { 
     resp.getWriter().println(FOOTER); 
    } 

    protected void header(HttpServletResponse resp) throws IOException { 
     resp.getWriter().println(HEADER); 
    } 

    protected void runQuery(HttpServletResponse resp, String dir1, String dir2) 
     throws IOException 
    { 
     PrintWriter out = resp.getWriter(); 

     // Set up the query 
    //  String q = QUERY.replace("%dir_name_1%", "\"" + dir1 + "\"") 
    //     .replace("%dir_name_2%", "\"" + dir2 + "\""); 
     String q=QUERY; 
     Query query = QueryFactory.create(q) ; 
     Model model = FileManager.get().loadModel("e:\\applications\\linkedmdb-18-05-2009-dump\\dump.nt"); 
     // QueryExecution qexec = QueryExecutionFactory.sparqlService(SPARQL_ENDPOINT, query); 

     //com.hp.hpl.jena.query.Query query = QueryFactory.create(QUERY); 
     QueryExecution qexec = QueryExecutionFactory.create(query, model); 
     // perform the query 
     ResultSet results = qexec.execSelect(); 

     // generate the output 
     header(resp); 
     if (!results.hasNext()) { 
      out.println("<p>No results, sorry.</p>"); 
     } 
     else { 
      out.println("<h1>Results</h1>"); 
      while (results.hasNext()) { 
       QuerySolution qs = results.next(); 
       String actorName = qs.getLiteral("actorName").getLexicalForm(); 
       out.println(String.format("<div>Actor named: %s</div>", actorName)); 
      } 
     } 
     footer(resp); 
    } 
} 

有有什么办法来解决这个异常?

回答

1

看来你使用耶拿/ RIOT加载在内存中的所有数据。据我所知,LinkedIMDB足够大,可以为您提供这种方法的问题。你正在做的是把所有的数据库带到内存中。

增加您的JVM堆可能是一个可行的解决方案,但如果你的数据还在不断增加也不会扩大。

正确的解决办法是去而设计的这种规模的数据集的耶拿的其他配置。它们是:

  1. Jena SDB,它使用关系数据库作为后端。
  2. Jena TDB,它采用了基于B树索引,以加快查询本地Java存储。它比(1)更好地缩放。

(可选)您可以选择可扩展的RDF数据库,例如4store并通过Jena ARQ查询您的数据。这个解决方案是迄今为止可以扩展并且性能更好的解决方案。

0

Java虚拟机(JVM)中的堆内存不足。要么增加JVM可用的堆内存量,要么设计软件以使用较少的内存,例如以较小的块处理这些内容。

要增加堆内存,这些参数添加到您的servlet容器的服务器或应用服务器的启动脚本,地方执行的java二进制文件。这告诉JVM,它最多可以使用512兆内存,如果这是不够的,尝试用较大的值:

-Xmx512m 

这是很难说如何提高软件使用更少的内存,而不会看到实际码。

+0

你能准确地知道它在日食helios中的位置吗?我无法找到它 – ProgramME 2011-04-09 14:02:45

+0

如何启动应用程序服务器?一个地方可能是运行 - >运行配置 - > VM参数。 – 2011-04-09 14:04:29

+0

的是,我得到相同exception.i加入你右击input.html说(文件开始用的applcation,然后运行配置和VM arguments.i我张贴的代码 – ProgramME 2011-04-09 14:19:01