2016-07-01 37 views
0

我想要计算hadoop独立设置中的数字的平均值。我无法运行该程序。但程序编译没有任何错误和jar文件也创建。我想我使用正确的命令来执行hadoop设置程序。有人请查看我的代码,并告诉我是否有任何问题。这里是我的代码在地图上的平均缩小率

import java.io.IOException; 
import java.util.StringTokenizer; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
    class sum_count{ 
    int sum; 
    int count; 
    } 
public class Average { 


    public static class TokenizerMapper 
     extends Mapper<Object, Text, Text, Object>{ 

    private final static IntWritable valueofkey = new IntWritable(); 
    private Text word = new Text(); 
    sum_count sc=new sum_count(); 
    public void map(Object key, Text value, Context context 
        ) throws IOException, InterruptedException { 
     StringTokenizer itr = new StringTokenizer(value.toString()); 
     int sum=0; 
     int count=0; 
     int v; 
     while (itr.hasMoreTokens()) { 
     word.set(itr.nextToken()); 
     v=Integer.parseInt(word.toString()); 
     count=count+1; 
     sum=sum+v;  
     } 
     //valueofkey.set(sum); 
     word.set("average"); 

     sc.sum=sum; 
     sc.count=count; 

    // context.write(word, valueofkey); 
    // valueofkey.set(count); 
     // word.set("count"); 
     context.write(word,sc); 
    } 
    } 

    public static class IntSumReducer 
     extends Reducer<Text,Object,Text,IntWritable> { 
    private IntWritable result = new IntWritable(); 
    private IntWritable test=new IntWritable(); 

    public void reduce(Text key, Iterable<sum_count> values,Context context) throws IOException, InterruptedException { 
     int sum = 0; 
     int count=0; 
     int wholesum=0; 
     int wholecount=0; 
     for (sum_count val : values) { 
     //value=val.get(); 
     wholesum=wholesum+val.sum; 
     wholecount=wholecount+val.count; 
     } 
     int res=wholesum/wholecount; 
     result.set(res); 
     context.write(key, result); 
    } 
    } 

    public static void main(String[] args) throws Exception { 
    Configuration conf = new Configuration(); 
    Job job = Job.getInstance(conf, ""); 
    job.setJarByClass(Average.class); 
    job.setMapperClass(TokenizerMapper.class); 
    job.setCombinerClass(IntSumReducer.class); 
    job.setReducerClass(IntSumReducer.class); 
    job.setOutputKeyClass(Text.class); 
    job.setOutputValueClass(IntWritable.class); 
    FileInputFormat.addInputPath(job, new Path(args[0])); 
    FileOutputFormat.setOutputPath(job, new Path(args[1])); 
    System.exit(job.waitForCompletion(true) ? 0 : 1); 
    } 
} 

这里是我的输出:

[email protected]:~/hadoop-2.7.2$ ./bin/hadoop jar av.jar Average bin/user/hduser/input bin/user/hduser/out12 
Picked up JAVA_TOOL_OPTIONS: -javaagent:/usr/share/java/jayatanaag.jar 
16/07/01 11:19:05 INFO Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id 
16/07/01 11:19:05 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId= 
16/07/01 11:19:05 WARN mapreduce.JobResourceUploader: Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this. 
16/07/01 11:19:05 INFO input.FileInputFormat: Total input paths to process : 2 
16/07/01 11:19:05 INFO mapreduce.JobSubmitter: number of splits:2 
16/07/01 11:19:05 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_local276107063_0001 
16/07/01 11:19:05 INFO mapreduce.Job: The url to track the job: http://localhost:8080/ 
16/07/01 11:19:05 INFO mapreduce.Job: Running job: job_local276107063_0001 
16/07/01 11:19:05 INFO mapred.LocalJobRunner: OutputCommitter set in config null 
16/07/01 11:19:05 INFO output.FileOutputCommitter: File Output Committer Algorithm version is 1 
16/07/01 11:19:05 INFO mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter 
16/07/01 11:19:05 INFO mapred.LocalJobRunner: Waiting for map tasks 
16/07/01 11:19:05 INFO mapred.LocalJobRunner: Starting task: attempt_local276107063_0001_m_000000_0 
16/07/01 11:19:06 INFO output.FileOutputCommitter: File Output Committer Algorithm version is 1 
16/07/01 11:19:06 INFO mapred.Task: Using ResourceCalculatorProcessTree : [ ] 
16/07/01 11:19:06 INFO mapred.LocalJobRunner: Starting task: attempt_local276107063_0001_m_000001_0 
16/07/01 11:19:06 INFO output.FileOutputCommitter: File Output Committer Algorithm version is 1 
16/07/01 11:19:06 INFO mapred.Task: Using ResourceCalculatorProcessTree : [ ] 
16/07/01 11:19:06 INFO mapred.LocalJobRunner: map task executor complete. 
16/07/01 11:19:06 WARN mapred.LocalJobRunner: job_local276107063_0001 
java.lang.Exception: java.lang.RuntimeException: java.lang.reflect.InvocationTargetException 
    at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462) 
    at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:522) 
Caused by: java.lang.RuntimeException: java.lang.reflect.InvocationTargetException 
    at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:134) 
    at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:745) 
    at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341) 
    at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:243) 
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) 
    at java.util.concurrent.FutureTask.run(FutureTask.java:262) 
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) 
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) 
    at java.lang.Thread.run(Thread.java:745) 
Caused by: java.lang.reflect.InvocationTargetException 
    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) 
    at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) 
    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) 
    at java.lang.reflect.Constructor.newInstance(Constructor.java:526) 
    at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:132) 
    ... 8 more 
Caused by: java.lang.NoClassDefFoundError: sum_count 
    at Average$TokenizerMapper.<init>(Average.java:24) 
    ... 13 more 
Caused by: java.lang.ClassNotFoundException: sum_count 
    at java.net.URLClassLoader$1.run(URLClassLoader.java:366) 
    at java.net.URLClassLoader$1.run(URLClassLoader.java:355) 
    at java.security.AccessController.doPrivileged(Native Method) 
    at java.net.URLClassLoader.findClass(URLClassLoader.java:354) 
    at java.lang.ClassLoader.loadClass(ClassLoader.java:425) 
    at java.lang.ClassLoader.loadClass(ClassLoader.java:358) 
    ... 14 more 
16/07/01 11:19:06 INFO mapreduce.Job: Job job_local276107063_0001 running in uber mode : false 
16/07/01 11:19:06 INFO mapreduce.Job: map 0% reduce 0% 
16/07/01 11:19:06 INFO mapreduce.Job: Job job_local276107063_0001 failed with state FAILED due to: NA 
16/07/01 11:19:06 INFO mapreduce.Job: Counters: 0 
+0

您应该详细描述“不能运行程序”的含义。你会收到错误消息吗?空输出?也许发布你用来执行程序的命令会很有用,因为即使你认为它们是正确的,它们也可能是错误的。 –

+0

她是我的输出: –

回答

2

你得到一个ClassNotFoundExceptionsum_count会。在文件的顶层声明两个类并不是真正构建代码的好方法。它看起来像TokenizerMapper试图创建该类时,它无法在类路径中找到它。

我只是把这个类放在它自己的文件中。它将需要改变,无论如何,你的工作将不会像你所做的那样工作,因为sum_count不会implementWritable接口。它看起来更像是:

public class SumCount implements Writable { 

    public int sum; 
    public int count; 

    @Override 
    public void write(DataOutput out) throws IOException { 
     out.writeInt(sum); 
     out.writeInt(count); 
    } 

    @Override 
    public void readFields(DataInput in) throws IOException { 
     sum = in.readInt(); 
     count = in.readInt(); 
    } 
} 

在你main()你还需要告诉它什么类型的键/值的就会写出来是:

job.setMapOutputKeyClass(Text.class); 
job.setMapOutputValueClass(SumCount.class); 

注意在类名的变化。请参阅Java命名约定文档here

+0

出现了一个新问题,请回答这个问题,http://stackoverflow.com/questions/38177609/output-file-contains-mapperoutput-instead-of-reducer-output –