2011-10-16 22 views
0

我想对于k进行聚类聚类数据如下所示: https://cwiki.apache.org/MAHOUT/clustering-of-synthetic-control-data.html 然而,当映射精简工作即将发生我得到的错误错误,同时意味着对输入数据算法k均值

11/10/16 21:05:57 INFO mapred.JobClient: Task Id : attempt_201110161920_0008_m_000000_0,  Status : FAILED 
Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector 
at java.net.URLClassLoader$1.run(URLClassLoader.java:202) 
at java.security.AccessController.doPrivileged(Native Method) 
at java.net.URLClassLoader.findClass(URLClassLoader.java:190) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:306) 
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:247) 
at java.lang.Class.forName0(Native Method) 
at java.lang.Class.forName(Class.java:247) 
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:762) 
at org.apache.hadoop.io.WritableName.getClass(WritableName.java:71) 
at org.apache.hadoop.io.SequenceFile$Reader.getValueClass(SequenceFile.java:1613) 
at org.apache.hadoop.io.SequenceFile$Reader.init(SequenceFile.java:1555) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1428) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1417) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1412) 
at org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader.initialize(SequenceFileRecordReader.java:50) 
at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:418) 
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:620) 
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305) 
at org.apache.hadoop.mapred.Child.main(Child.java:170) 

11/10/16 21:06:03 INFO mapred.JobClient: Task Id : attempt_201110161920_0008_m_000000_1, Status : FAILED 
Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector 
at java.net.URLClassLoader$1.run(URLClassLoader.java:202) 
at java.security.AccessController.doPrivileged(Native Method) 
at java.net.URLClassLoader.findClass(URLClassLoader.java:190) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:306) 
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:247) 
at java.lang.Class.forName0(Native Method) 
at java.lang.Class.forName(Class.java:247) 
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:762) 
at org.apache.hadoop.io.WritableName.getClass(WritableName.java:71) 
at org.apache.hadoop.io.SequenceFile$Reader.getValueClass(SequenceFile.java:1613) 
at org.apache.hadoop.io.SequenceFile$Reader.init(SequenceFile.java:1555) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1428) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1417) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1412) 
at org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader.initialize(SequenceFileRecordReader.java:50) 
at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:418) 
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:620) 
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305) 
at org.apache.hadoop.mapred.Child.main(Child.java:170) 

11/10/16 21:06:09 INFO mapred.JobClient: Task Id : attempt_201110161920_0008_m_000000_2,  Status : FAILED 
    Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector 
at java.net.URLClassLoader$1.run(URLClassLoader.java:202) 
at java.security.AccessController.doPrivileged(Native Method) 
at java.net.URLClassLoader.findClass(URLClassLoader.java:190) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:306) 
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301) 
at java.lang.ClassLoader.loadClass(ClassLoader.java:247) 
at java.lang.Class.forName0(Native Method) 
at java.lang.Class.forName(Class.java:247) 
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:762) 
at org.apache.hadoop.io.WritableName.getClass(WritableName.java:71) 
at org.apache.hadoop.io.SequenceFile$Reader.getValueClass(SequenceFile.java:1613) 
at org.apache.hadoop.io.SequenceFile$Reader.init(SequenceFile.java:1555) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1428) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1417) 
at org.apache.hadoop.io.SequenceFile$Reader.<init>(SequenceFile.java:1412) 
at org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader.initialize(SequenceFileRecordReader.java:50) 
at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:418) 
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:620) 
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305) 
at org.apache.hadoop.mapred.Child.main(Child.java:170) 

11/10/16 21:06:18 INFO mapred.JobClient: Job complete: job_201110161920_0008 
11/10/16 21:06:18 INFO mapred.JobClient: Counters: 3 
11/10/16 21:06:18 INFO mapred.JobClient: Job Counters 
11/10/16 21:06:18 INFO mapred.JobClient:  Launched map tasks=4 
11/10/16 21:06:18 INFO mapred.JobClient:  Data-local map tasks=4 
11/10/16 21:06:18 INFO mapred.JobClient:  Failed map tasks=1 
Exception in thread "main" java.lang.InterruptedException: K-Means Iteration failed processing output/clusters-0/part-randomSeed 
at org.apache.mahout.clustering.kmeans.KMeansDriver.runIteration(KMeansDriver.java:363) 
at org.apache.mahout.clustering.kmeans.KMeansDriver.buildClustersMR(KMeansDriver.java:310) 
at org.apache.mahout.clustering.kmeans.KMeansDriver.buildClusters(KMeansDriver.java:237) 
at org.apache.mahout.clustering.kmeans.KMeansDriver.run(KMeansDriver.java:152) 
at org.apache.mahout.clustering.syntheticcontrol.kmeans.Job.run(Job.java:149) 
at org.apache.mahout.clustering.syntheticcontrol.kmeans.Job.main(Job.java:60) 
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) 
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) 
at java.lang.reflect.Method.invoke(Method.java:597) 
at org.apache.hadoop.util.ProgramDriver$ProgramDescription.invoke(ProgramDriver.java:68) 
at org.apache.hadoop.util.ProgramDriver.driver(ProgramDriver.java:139) 
at org.apache.mahout.driver.MahoutDriver.main(MahoutDriver.java:187) 
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) 
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) 
at java.lang.reflect.Method.invoke(Method.java:597) 
at org.apache.hadoop.util.RunJar.main(RunJar.java:156) 

有人可以请告诉我如何纠正这一点。这真的意味着很多。 谢谢你的时间。

回答

1

这意味着您没有提供将工作运行到Hadoop所需的所有类。您发送的JAR文件必须打包所有依赖项,包括来自其核心和数学模块的所有Mahout类。幸运的是,Mahout为你做到了这一点。请参阅它在创建时创建的“作业”文件,该文件出现在target/中。

+0

你看我的例子/目标目录或新生成的目标目录? – Pavan

+0

他们说Mahout的mahout-examples- $ MAHOUT_VERSION.job会执行实际的集群任务,因此需要创建它。你能告诉我如何创建这个?我非常感谢你的帮助。我想我在这一步出错了。 – Pavan

+0

只是:'mvn包' –

0

你缺少org.apache.mahout.math包:

Error: java.lang.ClassNotFoundException: org.apache.mahout.math.Vector