2014-12-05 30 views
2

我正在尝试编写一个以文本文件存储的表为基础的map缩减代码。该表有两个属性。一个是id,第二个是name,代码应该采用相同id的所有值并连接它们。例如:1 xyz 2 xyz 1 abc应该导致1 xyzabc 2 xyz。 以下是我的版本code.As的初学者我已经修改了MaxTemperature代码,以了解这样做Map Reduce String字符串串联出界限错误

import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.mapreduce.Mapper; 
import java.io.IOException; 
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Reducer; 

public class MaxTemperature { 

public static class MaxTemperatureMapper 
    extends Mapper<Text, Text, Text, Text> { 

    @Override 
    public void map(Text key, Text value, Context context) 
    throws IOException, InterruptedException { 

    String line = value.toString(); 
    String lastWord = line.substring(line.lastIndexOf(" ")+1); 
    Text valq = new Text(); 
    valq.set(line.substring(0,4)); 
    context.write(new Text(lastWord), valq); 
     } 
    } 

public static class MaxTemperatureReducer 
extends Reducer<Text, Text, Text, Text> { 

    @Override 
    public void reduce(Text key, Iterable<Text> values, 
    Context context) 
    throws IOException, InterruptedException { 
    String p=""; 
    for (Text value : values) { 
    p=p+value.toString(); 
    } 
Text aa= new Text(); 
aa.set(p); 
context.write(key, new Text(aa)); 
    } 
} 

public static void main(String[] args) throws Exception { 
    if (args.length != 2) { 
    System.err.println("Usage: MaxTemperature <input path> <output path>"); 
    System.exit(-1); 
    } 

    Job job = new Job(); 
    job.setJarByClass(MaxTemperature.class); 
    job.setJobName("Max temperature"); 

    FileInputFormat.addInputPath(job, new Path(args[0])); 
FileOutputFormat.setOutputPath(job, new Path(args[1])); 

job.setMapperClass(MaxTemperatureMapper.class); 
job.setReducerClass(MaxTemperatureReducer.class); 

job.setOutputKeyClass(Text.class); 
job.setOutputValueClass(Text.class); 
    job.setInputFormatClass(KeyValueTextInputFormat.class); 

    System.exit(job.waitForCompletion(true) ? 0 : 1); 
    } 
} 

我输入文件

123456 name 
123456 name 
123456 age 
123456 age 
123456 relation 
132323 age 
123565 name 
258963 test 
258963 age 
254789 age 
254259 age 
652145 name 
985745 name 
523698 name 
214569 ame 
123546 name 
123456 age 
321456 age 
123456 age 
124589 hyderabad 


期望输出

123456 name,name,age (all values with index 123456) 
124589 hyderabad (al values with index 124589) 

我得到以下错误

java.lang.StringIndexOutOfBoundsException: String index out of range: 4 
    at java.lang.String.substring(String.java:1907) 
    at MaxTemperature$MaxTemperatureMapper.map(MaxTemperature.java:39) 
    at MaxTemperature$MaxTemperatureMapper.map(MaxTemperature.java:26) 
    at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:140) 
    at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:672) 
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:330) 
    at org.apache.hadoop.mapred.Child$4.run(Child.java:268) 
    at java.security.AccessController.doPrivileged(Native Method) 
    at javax.security.auth.Subject.doAs(Subject.java:415) 
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614) 
    at org.apache.hadoop.mapred.Child.main(Child.java:262) 
+0

对于'子()'DOC IndexOutOfBoundsException异常说' - 如果的beginIndex为负,或endIndex大较大此String对象的长度或beginIndex大于endIndex.'。确保最终索引4已足够。 – 2014-12-05 07:10:11

+0

是的,我拿着它像15456654即一个巨大的数字仍然显示相同 – user229534 2014-12-05 07:11:22

回答