2015-08-18 109 views
1

我将把我的数据从MongoDB传递到Neo4j。 因此,我用.csv导出了我的MongoDB文档。正如你可以阅读here我有一个阵列制服的问题。 所以我写了一个Java程序来解决这个问题。
这里是MongoDB中导出的.csv(注意不同的约均匀阵列):使文件读取速度更快

_id,official_name,common_name,country,started_by.day,started_by.month,started_by.year,championship,stadium.name,stadium.capacity,palmares.first_prize,palmares.second_prize,palmares.third_prize,palmares.fourth_prize,average_age,squad_value,foreigners,uniform 
0,yaDIXxLAOV,WWYWLqPcYM,QsVwiNmeGl,7,9,1479,oYKGgstIMv,qskcxizCkd,8560,10,25,9,29,16,58,6,"[""first_colour"",""second_colour"",""third_colour""]" 

下面是它必须如何在Neo4j的导入:

_id,official_name,common_name,country,started_by.day,started_by.month,started_by.year,championship,stadium.name,stadium.capacity,palmares.first_prize,palmares.second_prize,palmares.third_prize,palmares.fourth_prize,average_age,squad_value,foreigners,uniform.0,uniform.1,uniform.2 
0,yaDIXxLAOV,WWYWLqPcYM,QsVwiNmeGl,7,9,1479,oYKGgstIMv,qskcxizCkd,8560,10,25,9,29,16,58,6,first_colour,second_colour,third_colour 

我的代码工作,但我必须转换.csv文件的500k行和程序它太慢(它仍然工作20分钟后:/):

import java.io.BufferedReader; 
import java.io.File; 
import java.io.FileReader; 
import java.io.IOException; 
import java.io.PrintWriter; 

public class ConvertireCSV { 

    public static void main(String[] args) throws IOException { 

     FileReader f; 
     f=new FileReader("output.csv"); 

     BufferedReader b; 
     b=new BufferedReader(f); 

     String firstLine= b.readLine(); 
     int uniform = firstLine.indexOf("uniform"); 
     firstLine=firstLine.substring(0, uniform); 
     firstLine = firstLine + "uniform.0,uniform.1,uniform.2\n"; 

     String line=""; 
     String csv=""; 

     while(true) { 
      line=b.readLine(); 
      if(line==null) 
       break; 
      int u = line.indexOf("\"["); 
      line=line.substring(0, u); 
      line=line + "first_colour,second_colour,third_colour \n"; 
      csv=csv+line;     
     } 

     File file = new File("outputForNeo4j.csv"); 

     if(file.createNewFile()) { 
      PrintWriter pw = new PrintWriter(file); 
      pw.println(firstLine + csv); 
      System.out.println("New file \"outputForNeo4j.csv\" created."); 
      pw.flush(); 
      pw.close(); 
     } 
    } 
} 

我该如何加快速度?

+0

在第一个快速视图,我会说,检查是否有足够的堆,我会使用StringBuilder而不是字符串。您应该尝试找到哪部分代码太慢 – alacambra

+0

这应该是代码审查SE tbh – Sammaye

+1

也许您会有兴趣与我们一起讨论有关mongodb - > neo4j docs collector的反馈意见吗?让我知道neo4j.com的michael –

回答

2

好了一些基本的方法来改善你的代码:

  1. 确保您的变量得到所需的最小范围。如果循环外部不需要line,请不要在循环外声明它。
  2. 简单字符串的连接通常很慢。使用StringBuilder加快速度。
  3. 为什么你要缓冲字符串呢?看起来像是浪费记忆。只需打开输出流到您的目标文件并在处理它们时将这些行写入新文件。

例子:

我不认为你需要在第一点的例子。 对于第二种情况可能是这样的:

... 
StringBuilder csv = new StringBuilder(); 
while(true) { 
    ... 
    csv.append(line); 
} 
... 
if(file.createNewFile()) { 
    ... 
    pw.println(firstLine + csv.toString()); 
    ... 
} 

对于第三点重写会稍微更广泛:

public static void main(String[] args) throws IOException { 
    FileReader f; 
    f=new FileReader("output.csv"); 

    BufferedReader b; 
    b=new BufferedReader(f); 

    String firstLine= b.readLine(); 
    int uniform = firstLine.indexOf("uniform"); 
    firstLine=firstLine.substring(0, uniform); 
    firstLine = firstLine + "uniform.0,uniform.1,uniform.2\n"; 

    File file = new File("outputForNeo4j.csv"); 
    if(!file.createNewFile()) { 
     // all work would be for nothing! Bailing out. 
     return; 
    } 

    PrintWriter pw = new PrintWriter(file); 
    pw.print(firstLine); 

    while(true) { 
     String line=b.readLine(); 
     if(line==null) 
      break; 
     int u = line.indexOf("\"["); 
     line=line.substring(0, u); 
     line=line + "first_colour,second_colour,third_colour \n"; 
     pw.print(line);     
    } 

    System.out.println("New file \"outputForNeo4j.csv\" created."); 
    pw.flush(); 
    pw.close(); 
    b.close() 
} 
1
csv=csv+line; 

字符串连接是昂贵的操作。我会建议使用bufferedWriter。 是这样的:

FileReader f; 
    f=new FileReader("output.csv"); 

    BufferedReader b; 
    BufferedWriter out; 
    b=new BufferedReader(f); 
    try{ 
     out = new BufferedWriter(new FileWriter("outputForNeo4j.csv")); 
    } catch(Exception e){ 
     //cannot create file 
    } 
    System.out.println("New file \"outputForNeo4j.csv\" created."); 

    String firstLine= b.readLine(); 
    int uniform = firstLine.indexOf("uniform"); 
    firstLine=firstLine.substring(0, uniform); 
    firstLine = firstLine + "uniform.0,uniform.1,uniform.2\n"; 

    String line=""; 
    String csv=""; 

    out.write(firstLine); 

    while(true) { 
     line=b.readLine(); 
     if(line==null) 
      break; 
     int u = line.indexOf("\"["); 
     line=line.substring(0, u); 
     line=line + "first_colour,second_colour,third_colour \n"; 
     out.write(line);    
    }   
     out.flush(); 

    } 
1

结果:
TEST0:运行:241次迭代,avarage milis = 246
test1:运行:249次迭代,avarage milis = 118
test2:运行:269次迭代,avarage milis = 5
test3:运行:241次迭代NS,avarage milis = 2

import java.io.*; 
import java.lang.reflect.InvocationTargetException; 
import java.lang.reflect.Method; 
import java.util.Random; 

public class Tester { 

private static final String filePath = "c:\\bigFile.txt"; 
//private static final String filePath = "c:\\bigfileNewLine.txt"; 

private static final int numOfMethods = 4; 
private static final int numOfIter = 1000; 
public Tester() throws NoSuchMethodException { 
    System.out.println("Tester.Tester"); 

    int[] milisArr = new int [numOfMethods]; 
    int[] actualRun = new int [numOfMethods]; 

    Random rnd = new Random(System.currentTimeMillis()); 
    Long startMs = 0l, endMs = 0l; 
    Method[] method = new Method[numOfMethods]; 
    for (int i = 0; i < numOfMethods; i++) 
     method[i] = this.getClass().getMethod("test" + i); 

    int testCount = 0; 
    while (testCount++ < numOfIter) { 
     int testMethod = rnd.nextInt(numOfMethods); 
     Method m = method[testMethod]; 
     try { 
      System.gc(); 
      startMs = System.currentTimeMillis(); 
      String retval = (String) m.invoke(null); 
      endMs = System.currentTimeMillis(); 

     } catch (IllegalAccessException e) { 
      e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. 
     } catch (InvocationTargetException e) { 
      e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. 
     } 
     milisArr[testMethod] += (endMs - startMs); 
     actualRun[testMethod]++; 
     System.out.println("Test name: " + m.getName() + " testCount=" + testCount + " Of " + numOfIter + " iteration, Total time :" + (endMs - startMs)/1000.0 + " seconds"); 
    } 

    System.out.println("Test Summery :"); 
    for (int i = 0; i < numOfMethods; i++) 
     System.out.println("test" + i + " : Runs: " + actualRun[i] + " iterations ,avarage milis = " + milisArr[i]/numOfIter); 

} 


public static String test0() throws IOException { 
    InputStream file = getInputStream(); 
    StringBuffer textBuffer = new StringBuffer(); 
    int c; 
    while ((c = file.read()) != -1) 
     textBuffer.append((char) c); 


    file.close(); 
    return textBuffer.toString(); 
} 

public static String test1() throws IOException { 
    Reader reader = new FileReader(new File(filePath)); 
    BufferedReader br = new BufferedReader(reader); 
    String line = br.readLine(); 
    String result = line; 
    while (line != null) { 
     line = br.readLine(); 
     if (line == null) { 
     } else { 
      result = result + "\n" + line; 
     } 
    } 
    br.close(); 
    reader.close(); 
    return result; 
} 


public static String test2() throws IOException { 
    byte[] buf = new byte[1024]; 
    int l; 

    InputStream is = getInputStream(); 
    StringBuffer tmpBuf = new StringBuffer(); 
    while ((l = is.read(buf)) != -1) { 
     tmpBuf.append(new String(buf, 0, l)); 
    } 

    is.close(); 
    return tmpBuf.toString(); 
} 

public static String test3() throws IOException { 
    File source = new File(filePath); 
    final DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(source))); 
    final byte[] buffer = new byte[(int) source.length()]; 
    dis.readFully(buffer); 
    dis.close(); 
    return new String(buffer, "UTF-8"); 
} 

private static InputStream getInputStream() { 
    try { 
     return new FileInputStream(filePath); 
    } catch (FileNotFoundException e) { 
     e.printStackTrace(); 
     return null; 
    } 
} 

public static void main(String[] args) { 
    try { 
     new Tester(); 
    } catch (NoSuchMethodException e) { 
     e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. 
    } 
} 

}