2013-11-28 65 views
0

我是Hadoop的新手。我有以下格式的文件:在Java中减少地图Hadoop

123textfinderlater。它是一个固定宽度的文件。我想添加一个分隔符。假设假设我的第一个字段是123,即长度为3,第二个字段是textfinder,即:长度为10,第三个字段后面为ie.length 5.每个字段都有一个预定义的长度。现在我需要添加一个分隔符来分隔我的字段。我的输出应该是123 | textfinder |稍后。我只有值(文件中的行)。映射器和减速器程序的关键应该是什么。

在此先感谢

+0

您如何确定输入中的字段?它是由空格还是制表符分隔?即它是否像'123 textfinder later'? –

+0

我们有文件的布局。每个领域都有固定的长度......它不被任何东西隔开。 – user3047154

回答

0

你甚至不需要你的具体情况减速,用于映射的键值仍然line no. - line像往常一样,那么你只需要编写回线您添加分隔符为键入。检查以下代码:

import java.io.IOException; 

import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.conf.Configured; 
import org.apache.hadoop.io.NullWritable; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.util.Tool; 
import org.apache.hadoop.util.ToolRunner; 



public class Delimiter extends Configured implements Tool { 

    public static class DelimiterMapper 
     extends Mapper<LongWritable, Text, Text, NullWritable> { 

    private static Text addDelimiter(Text value, char delimiter) { 
    String str = value.toString(); 
    String ret = str.substring(0,2) + delimiter + str.substring(3,12) + delimiter + str.substring(13); 
    return new Text(ret); 
    } 

    public void map(LongWritable key, Text value, Context context) 
        throws IOException, InterruptedException { 
     context.write(addDelimiter(value, '|'), NullWritable.get()); 
    } 

    } 

    public int run(String[] args) 
    throws IOException, InterruptedException, ClassNotFoundException { 
    Job job = Job.getInstance(getConf()); 
    if (args.length != 2) { 
     System.err.println("Usage: Delimiter <in> <out>"); 
     return 2; 
    } 

    FileInputFormat.addInputPath(job, new Path(args[0])); 
    Path outputDir = new Path(args[1]); 
    if (outputDir.getFileSystem(getConf()).exists(outputDir)) { 
     throw new IOException("Output directory " + outputDir + 
           "already exists"); 
    } 
    FileOutputFormat.setOutputPath(job, outputDir); 
    job.setJobName("Delimiter"); 
    job.setJarByClass(Delimiter.class); 
    job.setMapperClass(DelimiterMapper.class); 
    job.setNumReduceTasks(0); 
    job.setOutputKeyClass(Text.class); 
    job.setOutputValueClass(NullWritable.class); 
    return job.waitForCompletion(true) ? 0:1; 

    } 

    public static void main(String[] args) throws Exception { 
    int res = ToolRunner.run(new Configuration(), new Delimiter(), args); 
    System.exit(res); 
    } 
} 
+0

非常感谢 – user3047154