2013-07-21 50 views
0

我有一个简单的程序:在java中爪哇 - 字符串中的许多比较txt文件

  • 解析txt文件
  • 在Unix时间戳
  • 转换数据保留值只有一行每个第二

下面是代码。 的txt文件都是这种格式:

21/03/2013 4时18分23秒6890 6830 6850 6770 6830 6400 6630 6710 6770 6850 35024 34976 21/03/2013 4时18分23秒6910 6800 6850 6770 6820 6410 6590 6710 6780 6820 35056 34976 21/03/2013 04:18:24 6890 6820 6860 6770 6830 6400 6580 6720 6770 6830 34912 34880 21/03/2013 04:18:24 6860 6840 6840 6770 6830 6390 6660 6700 6740 6890 35008 34880

我的程序转换的代码是这样的:

放sensor.rat.128 1364278801 7100传感器= A

放sensor.rat.128 1364278801 6910传感器= B

放sensor.rat.128 1364278801 6890传感器= C

放传感器.rat.128 1364278801 6630传感器= d

该方案工作得非常好与txt文件,在这个意义上,它使只有一个值的线,用于每个第二,但如果在不同相同的第二值txt文件,它无法识别打他们。

所以问题是:我该如何让代码保存每秒钟和跨多个文件的值的唯一一个列表? 我希望你们所有人都明白。

import java.util.Scanner; 
import java.util.List; 
import java.util.ArrayList; 
import java.io.*; 
import java.text.SimpleDateFormat; 
import java.util.Date; 
import org.apache.commons.io.FileUtils; 


public class Downsampler { 


    public static void main(String[] args) throws Exception{ 

     /* 
     * Scans all the files in a specified folder 
     * Obtains Cell number from the file name 
     */ 
     String path = "/home/alessandro/Data128prova"; // name of path 
     File folder = new File(path); 

     for (File file : folder.listFiles()) { 
      Scanner s = new Scanner(file); 
      ArrayList<String> list = new ArrayList<String>(); 
       while (s.hasNext()){ 
        list.add(s.next()); 
       } 
      s.close(); 

      //Arraylist to save modified values 
      ArrayList<String> ds = new ArrayList<String>(); 

      int i; 
      String app = ""; 
       for(i=0; i<=list.size()-13; i=i+14){ 

     //combining the first to values to obtain data 
     String str = list.get(i)+" "+list.get(i+1); 

     //------convert data in epoch time 
      Date dt= new java.text.SimpleDateFormat("dd/MM/yyyy HH:mm:ss").parse(str); 
      long epochlong = dt.getTime()/1000; 
      String epoch = Long.toString(epochlong); 
     //------end conversion data 

        if (!str.equalsIgnoreCase(app)){ 
        //add all the other values to arraylist ds 
        ds.add(epoch); 
        int j; 
      for(j=1; j<14; j++){ 
      ds.add(list.get(i+j)); 
         } 
        } 
     app = str; 
       } 

    int k; 
     String metric = "sensor.rat.128.riprova"; //name of the metric 
    for (k=0; k<=ds.size()-13; k=k+14){ 
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+2)+" sensor=A"); 
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+3)+" sensor=B"); 
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+4)+" sensor=C"); 
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+5)+" sensor=D"); 
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+6)+" sensor=E"); 
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+7)+" sensor=F"); 
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+8)+" sensor=G"); 
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+9)+" sensor=H"); 
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+10)+" sensor=I"); 
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+11)+" sensor=L"); 
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+12)+" sensor=M"); 
    System.out.println ("put "+metric+" "+ds.get(k)+" "+ds.get(k+13)+" sensor=N"); 
    } 


    } //end of for 
} 
} 
+0

问号在哪里? –

+0

@Heuster,我编辑了这个问题,谢谢 – alessandrob

+0

您的数据是否按时间顺序写入文件文件,以便文件中较早的值永远不会写入相同文件中后面的值之后? – lreeder

回答

0

使用地图和时间作为存储数据集的关键。下一次你看到你解析的任何文件的时间点时,你可以决定你想如何处理新的数据集(放下它,用当时的当前数据集平均)等等。这里是你的代码更新使用地图关联数据集与特定的时间。这段代码只是打印一条语句来表明它已经在特定时间处理过了。你应该插入你自己的代码来做一些重复的事情。

import java.util.HashMap; 
import java.util.Map; 
import java.util.Scanner; 
import java.util.List; 
import java.util.ArrayList; 
import java.io.*; 
import java.text.SimpleDateFormat; 
import java.util.Date; 

import org.apache.commons.io.FileUtils; 

public class Downsampler 
{ 

    public static void main(String[] args) throws Exception 
    { 

     /* 
     * Scans all the files in a specified folder 
     * Obtains Cell number from the file name 
     */ 
     String path = "/tmp/data"; // name of path 
     File folder = new File(path); 

     Map<Long, List<String>> timeValuesMap = new HashMap<Long, List<String>>(); 
     for (File file : folder.listFiles()) 
     { 
     Scanner s = new Scanner(file); 
     ArrayList<String> list = new ArrayList<String>(); 
     while (s.hasNext()) 
     { 
      list.add(s.next()); 
     } 
     s.close(); 

     //Arraylist to save modified values 
     ArrayList<String> ds = new ArrayList<String>(); 

     int i; 
     String app = ""; 
     for (i = 0; i <= list.size() - 13; i = i + 14) 
     { 

      //combining the first to values to obtain data 
      String str = list.get(i) + " " + list.get(i + 1); 

      //------convert data in epoch time 
      Date dt = new java.text.SimpleDateFormat("dd/MM/yyyy HH:mm:ss").parse(str); 
      long epochlong = dt.getTime()/1000; 
      String epoch = Long.toString(epochlong); 
      //------end conversion data 

      if (!str.equalsIgnoreCase(app)) 
      { 
       //add all the other values to arraylist ds 
       ds.add(epoch); 
       int j; 
       for (j = 1; j < 14; j++) 
       { 
        ds.add(list.get(i + j)); 
       } 
      } 
      app = str; 

      if(timeValuesMap.containsKey(epochlong)) 
      { 
       System.out.println("Already processed time: " + str); 
       //do something - ignore values, average across sensor, min/max, etc... 
       //newds = doSomeOperation(ds); 
       //timeValuesMap.put(epochlong, newds); 
      } 
      else 
      { 

       System.out.println("New time: " + str); 
       timeValuesMap.put(epochlong, ds); 
      } 
     } 

     int k; 
     String metric = "sensor.rat.128.riprova"; //name of the metric 
     for (k = 0; k <= ds.size() - 13; k = k + 14) 
     { 
      System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 2) + " sensor=A"); 
      System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 3) + " sensor=B"); 
      System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 4) + " sensor=C"); 
      System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 5) + " sensor=D"); 
      System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 6) + " sensor=E"); 
      System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 7) + " sensor=F"); 
      System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 8) + " sensor=G"); 
      System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 9) + " sensor=H"); 
      System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 10) + " sensor=I"); 
      System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 11) + " sensor=L"); 
      System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 12) + " sensor=M"); 
      System.out.println("put " + metric + " " + ds.get(k) + " " + ds.get(k + 13) + " sensor=N"); 
     } 

     } //end of for 
    } 
} 

注:非常大的数据集,这可能会导致JVM的项目数出内存中运行的地图变得非常大。

+0

不幸的是我有大约1GB的数据(它只是它的一部分)。恐怕这个解决方案正如你所说的,会让JVM耗尽内存 – alessandrob

+0

也许是这样。这取决于您可以向JVM提供多少RAM,以及数据中有多少个唯一时间戳。当然,每个独特的时间戳都是地图中的一个条目,但是如果你有很多副本,你的地图可能不会太大。 – lreeder