2
我正在尝试编写一个以文本文件存储的表为基础的map缩减代码。该表有两个属性。一个是id,第二个是name,代码应该采用相同id的所有值并连接它们。例如:1 xyz 2 xyz 1 abc应该导致1 xyzabc 2 xyz。 以下是我的版本code.As的初学者我已经修改了MaxTemperature代码,以了解这样做Map Reduce String字符串串联出界限错误
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class MaxTemperature {
public static class MaxTemperatureMapper
extends Mapper<Text, Text, Text, Text> {
@Override
public void map(Text key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String lastWord = line.substring(line.lastIndexOf(" ")+1);
Text valq = new Text();
valq.set(line.substring(0,4));
context.write(new Text(lastWord), valq);
}
}
public static class MaxTemperatureReducer
extends Reducer<Text, Text, Text, Text> {
@Override
public void reduce(Text key, Iterable<Text> values,
Context context)
throws IOException, InterruptedException {
String p="";
for (Text value : values) {
p=p+value.toString();
}
Text aa= new Text();
aa.set(p);
context.write(key, new Text(aa));
}
}
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: MaxTemperature <input path> <output path>");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(MaxTemperature.class);
job.setJobName("Max temperature");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(MaxTemperatureMapper.class);
job.setReducerClass(MaxTemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
我输入文件
123456 name
123456 name
123456 age
123456 age
123456 relation
132323 age
123565 name
258963 test
258963 age
254789 age
254259 age
652145 name
985745 name
523698 name
214569 ame
123546 name
123456 age
321456 age
123456 age
124589 hyderabad
〜
期望输出
123456 name,name,age (all values with index 123456)
124589 hyderabad (al values with index 124589)
我得到以下错误
java.lang.StringIndexOutOfBoundsException: String index out of range: 4
at java.lang.String.substring(String.java:1907)
at MaxTemperature$MaxTemperatureMapper.map(MaxTemperature.java:39)
at MaxTemperature$MaxTemperatureMapper.map(MaxTemperature.java:26)
at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:140)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:672)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:330)
at org.apache.hadoop.mapred.Child$4.run(Child.java:268)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
at org.apache.hadoop.mapred.Child.main(Child.java:262)
两
对于'子()'DOC IndexOutOfBoundsException异常说' - 如果的beginIndex为负,或endIndex大较大此String对象的长度或beginIndex大于endIndex.'。确保最终索引4已足够。 – 2014-12-05 07:10:11
是的,我拿着它像15456654即一个巨大的数字仍然显示相同 – user229534 2014-12-05 07:11:22