2015-12-30 76 views
1

我试图运行一个简单的地图减少程序从coursera hortonworks沙箱与HDP。 这是(从https://github.com/jz33/Coursera-Cloud-Computing-Applications-Solution-Manual/blob/master/hw2/TitleCount.java采取)的程序:hortonworks上运行jar与HDP抛出ClassNotFoundException

import org.apache.commons.logging.Log; 
import org.apache.commons.logging.LogFactory; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.conf.Configured; 
import org.apache.hadoop.fs.FSDataInputStream; 
import org.apache.hadoop.fs.FileSystem; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.util.Tool; 
import org.apache.hadoop.util.ToolRunner; 

import java.io.*; 
import java.util.*; 
/** 
* Classic "Word Count" 
*/ 
public class TitleCount extends Configured implements Tool { 
    public static void main(String[] args) throws Exception { 
     int res = ToolRunner.run(new Configuration(), new TitleCount(), args); 
    System.exit(res); 
} 

@Override 
public int run(String[] args) throws Exception { 
    Job job = Job.getInstance(this.getConf(), "Title Count"); 
    job.setOutputKeyClass(Text.class); 
    job.setOutputValueClass(IntWritable.class); 

    job.setMapOutputKeyClass(Text.class); 
    job.setMapOutputValueClass(IntWritable.class); 

    job.setMapperClass(TitleCountMap.class); 
    job.setReducerClass(TitleCountReduce.class); 

    FileInputFormat.setInputPaths(job, new Path(args[0])); 
    FileOutputFormat.setOutputPath(job, new Path(args[1])); 

    job.setJarByClass(TitleCount.class); 
    return job.waitForCompletion(true) ? 0 : 1; 
} 

public static String readHDFSFile(String path, Configuration conf) throws IOException{ 
    Path pt=new Path(path); 
    FileSystem fs = FileSystem.get(pt.toUri(), conf); 
    FSDataInputStream file = fs.open(pt); 
    BufferedReader buffIn=new BufferedReader(new InputStreamReader(file)); 

    StringBuilder everything = new StringBuilder(); 
    String line; 
    while((line = buffIn.readLine()) != null) { 
     everything.append(line); 
     everything.append("\n"); 
    } 
    return everything.toString(); 
} 

public static class TitleCountMap extends Mapper<Object, Text, Text, IntWritable> { 
    Set<String> stopWords = new HashSet<String>(); 
    String delimiters; 

    @Override 
    protected void setup(Context context) throws IOException,InterruptedException { 

     Configuration conf = context.getConfiguration(); 

     String delimitersPath = conf.get("delimiters"); 
     delimiters = readHDFSFile(delimitersPath, conf); 

     String stopWordsPath = conf.get("stopwords"); 
     List<String> stopWordsList = Arrays.asList(readHDFSFile(stopWordsPath, conf).split("\n")); 
     for(String e : stopWordsList){ 
      stopWords.add(e); 
     } 
    } 

    @Override 
    public void map(Object key, Text value, Context context) throws IOException, InterruptedException { 
     StringTokenizer stk = new StringTokenizer(value.toString(),delimiters); 
     while(stk.hasMoreTokens()){ 
      String e = stk.nextToken().trim().toLowerCase(); 
      if(stopWords.contains(e) == false){ 
       context.write(new Text(e),new IntWritable(1)); 
      } 
     } 
    } 
} 

public static class TitleCountReduce extends Reducer<Text, IntWritable, Text, IntWritable> { 
    @Override 
    public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { 
     int sum = 0; 
     for(IntWritable e : values){ 
      sum += e.get(); 
     } 
     context.write(key, new IntWritable(sum)); 
    } 
    } 
} 

这是我如何运行它:

  • ​​
  • cd cloudapp-mp2
  • sudo -u hdfs bash start.sh
  • cp /media/sharedFolder/TitleCount.java .
  • mkdir build
  • export HADOOP_CLASSPATH=$JAVA_HOME/lib/tools.jar
  • hadoop com.sun.tools.javac.Main TitleCount.java -d build
  • jar -cvf TitleCount.jar -C build/ ./
  • sudo -u hdfs hadoop fs -rm -r /mp2/A-output

    后,我有.class文件build文件夹(TitleCount.class,TitleCount $ TitleCountMap.class,TitleCount $ TitleCountReduce.class ) 和当前文件夹 - cloud-mp2,带有TitleCount.jar文件。

  • sudo -u hdfs hadoop jar TitleCount.jar TitleCount -D stopwords=/mp2/misc/stopwords.txt -D delimiters=/mp2/misc/delimiters.txt /mp2/titles /mp2/A-output

这是我得到的错误:

Error: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class TitleCount$TitleCountReduce not found

完全跟踪:

000_0, Status : FAILED 
Error: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class Title 
Count$TitleCountReduce not found 
     at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2195 
) 
     at org.apache.hadoop.mapreduce.task.JobContextImpl.getReducerClass(JobCo 
ntextImpl.java:210) 
     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:611 
) 
     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389) 
     at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) 
     at java.security.AccessController.doPrivileged(Native Method) 
     at javax.security.auth.Subject.doAs(Subject.java:415) 
     at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInforma 
tion.java:1657) 
     at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) 
Caused by: java.lang.ClassNotFoundException: Class TitleCount$TitleCountReduce n 
ot found 
     at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.jav 
a:2101) 
     at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2193 
) 
     ... 8 more 

15/12/30 12:46:45 INFO mapreduce.Job: Task Id : attempt_1451479269826_0001_r_000 
000_1, Status : FAILED 
Error: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class Title 
Count$TitleCountReduce not found 
     at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2195 
) 
     at org.apache.hadoop.mapreduce.task.JobContextImpl.getReducerClass(JobCo 
ntextImpl.java:210) 
     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:611 
) 
     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389) 
     at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) 
     at java.security.AccessController.doPrivileged(Native Method) 
     at javax.security.auth.Subject.doAs(Subject.java:415) 
     at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInforma 
tion.java:1657) 
     at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) 
Caused by: java.lang.ClassNotFoundException: Class TitleCount$TitleCountReduce n 
ot found 
     at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.jav 
a:2101) 
     at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2193 
) 
     ... 8 more 

15/12/30 12:46:57 INFO mapreduce.Job: Task Id : attempt_1451479269826_0001_r_000 
000_2, Status : FAILED 
Error: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class Title 
Count$TitleCountReduce not found 
     at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2195 
) 
     at org.apache.hadoop.mapreduce.task.JobContextImpl.getReducerClass(JobCo 
ntextImpl.java:210) 
     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:611 
) 
     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:389) 
     at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) 
     at java.security.AccessController.doPrivileged(Native Method) 
     at javax.security.auth.Subject.doAs(Subject.java:415) 
     at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInforma 
tion.java:1657) 
     at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158) 
Caused by: java.lang.ClassNotFoundException: Class TitleCount$TitleCountReduce n 
ot found 
     at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.jav 
a:2101) 
     at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2193 
) 
     ... 8 more 

15/12/30 12:47:05 INFO mapreduce.Job: map 0% reduce 100% 
15/12/30 12:47:06 INFO mapreduce.Job: Job job_1451479269826_0001 failed with sta 
te FAILED due to: Task failed task_1451479269826_0001_r_000000 
Job failed as tasks failed. failedMaps:0 failedReduces:1 

15/12/30 12:47:07 INFO mapreduce.Job: Counters: 7 
     Job Counters 
       Failed reduce tasks=4 
       Launched reduce tasks=4 
       Total time spent by all maps in occupied slots (ms)=0 
       Total time spent by all reduces in occupied slots (ms)=32247 
       Total time spent by all reduce tasks (ms)=32247 
       Total vcore-seconds taken by all reduce tasks=32247 
       Total megabyte-seconds taken by all reduce tasks=8061750 

回答

2

问题是,我用sudo的-u HDFS代替使用root用户。

我不得不添加/用户/根文件夹:

sudo -u hdfs hadoop fs -mkdir /user/root 

并为其提供权限/用户/根/ MP2:

sudo -u hdfs hadoop fs -chown root:root /user/root 
sudo -u hdfs hadoop fs -chown root:root /mp2 
sudo -u hdfs hadoop fs -chown root:root /mp2/* 
相关问题