使用mapreduce从HDFS读取图像

请在此代码中帮助我。我正在尝试从HDFS重新引导图像。我正在使用WholeFileInputFormat。与WholeFileRecordreader。没有编译时错误。但是代码给出了运行时错误。输出结果是：无法创建给定类WholeFileInputFormat的实例。我已经根据How to read multiple image files as input from hdfs in map-reduce? 的意见编写了这段代码请帮我看看这段代码。它包含3个类。如何调试它？或者其他方式？使用mapreduce从HDFS读取图像

import java.awt.image.BufferedImage; 
import java.io.ByteArrayInputStream; 
import java.io.FileInputStream; 
import java.io.IOException; 
import java.util.ArrayList; 
import java.util.Iterator; 
import java.util.List; 
import javax.imageio.ImageIO; 
import net.semanticmetadata.lire.imageanalysis.AutoColorCorrelogram; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.conf.Configured; 
import org.apache.hadoop.fs.FSDataInputStream; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.*; 
import org.apache.hadoop.mapred.*; 
import org.apache.hadoop.mapred.lib.NullOutputFormat; 
import org.apache.hadoop.util.Tool; 
import org.apache.hadoop.util.ToolRunner; 

public class map2 extends Configured implements Tool { 


    public static class MapClass extends MapReduceBase 
      implements Mapper<NullWritable, BytesWritable, Text, Text> { 


     private Text input_image = new Text(); 
     private Text input_vector = new Text(); 


     @Override 
     public void map(NullWritable key,BytesWritable value, 
       OutputCollector<Text, Text> output, 
       Reporter reporter) throws IOException { 

      System.out.println("CorrelogramIndex Method:"); 
     String featureString; 
     int MAXIMUM_DISTANCE = 16; 
     AutoColorCorrelogram.Mode mode = AutoColorCorrelogram.Mode.FullNeighbourhood; 



     byte[] identifier=value.getBytes(); 

      BufferedImage bimg = ImageIO.read(new ByteArrayInputStream(identifier)); 

      AutoColorCorrelogram vd = new AutoColorCorrelogram(MAXIMUM_DISTANCE, mode); 

      vd.extract(bimg); 

      featureString = vd.getStringRepresentation(); 
      double[] bytearray = vd.getDoubleHistogram(); 

      System.out.println("image: " + identifier + " " + featureString); 




      System.out.println(" ------------- "); 


      input_image.set(identifier); 
      input_vector.set(featureString); 
      output.collect(input_image, input_vector); 




     } 
    } 



    public static class Reduce extends MapReduceBase 
      implements Reducer<Text, Text, Text, Text> { 

     @Override 
     public void reduce(Text key, Iterator<Text> values, 
       OutputCollector<Text, Text> output, 
       Reporter reporter) throws IOException { 
      String out_vector = ""; 

      while (values.hasNext()) { 
       out_vector += (values.next().toString()); 
      } 
      output.collect(key, new Text(out_vector)); 
     } 
    } 

    static int printUsage() { 
     System.out.println("map2 [-m <maps>] [-r <reduces>] <input> <output>"); 
     ToolRunner.printGenericCommandUsage(System.out); 
     return -1; 
    } 


    @Override 
    public int run(String[] args) throws Exception { 



     JobConf conf = new JobConf(getConf(), map2.class); 
     conf.setJobName("image_mapreduce"); 

      conf.setInputFormat(WholeFileInputFormat.class); 
     conf.setOutputFormat(NullOutputFormat.class); 


     conf.setOutputKeyClass(Text.class); 

     conf.setOutputValueClass(Text.class); 

     conf.setMapperClass(MapClass.class); 

     conf.setReducerClass(Reduce.class); 






     List<String> other_args = new ArrayList<>(); 
     for (int i = 0; i < args.length; ++i) { 
      try { 
       switch (args[i]) { 
        case "-m": 
         conf.setNumMapTasks(Integer.parseInt(args[++i])); 
         break; 
        case "-r": 
         conf.setNumReduceTasks(Integer.parseInt(args[++i])); 
         break; 
        default: 
         other_args.add(args[i]); 
         break; 
       } 
      } catch (NumberFormatException except) { 
       System.out.println("ERROR: Integer expected instead of " + args[i]); 
       return printUsage(); 
      } catch (ArrayIndexOutOfBoundsException except) { 
       System.out.println("ERROR: Required parameter missing from " 
         + args[i - 1]); 
       return printUsage(); 
      } 
     } 

     // Make sure there are exactly 2 parameters left. 
     if (other_args.size() != 2) { 
      System.out.println("ERROR: Wrong number of parameters: " 
        + other_args.size() + " instead of 2."); 
      return printUsage(); 
     } 




     FileInputFormat.setInputPaths(conf, other_args.get(0)); 
     FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); 
     JobClient.runJob(conf); 
     return 0; 
    } 

    public static void main(String[] args) throws Exception { 
      int res = ToolRunner.run(new Configuration(), new map2(), args); 
    System.exit(res); 
    } 
} 
----------------------------------------------------------------------------------- 
//WholeFileInputFormat 

import java.io.IOException; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.mapred.*; 

public class WholeFileInputFormat<NullWritable, BytesWritable> 
     extends FileInputFormat<NullWritable, BytesWritable> { 

    // @Override 
    protected boolean isSplitable(JobContext context, Path file) { 
     return false; 
    } 
    //@Override 

    public WholeFileRecordReader createRecordReader(
      InputSplit split, TaskAttemptContext context) throws IOException, 
      InterruptedException { 
     WholeFileRecordReader reader = new WholeFileRecordReader(); 
     reader.initialize(split, context); 
     return reader; 
    } 




@Override 
public RecordReader<NullWritable, BytesWritable> getRecordReader(InputSplit split, 
    JobConf job, Reporter reporter) 
    throws IOException; 
} 

    ------------------------------------------------------------------------------- 
    //WholeInputFileRecorder 

import java.io.IOException; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.FSDataInputStream; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.BytesWritable; 
import org.apache.hadoop.io.IOUtils; 
import org.apache.hadoop.io.NullWritable; 
import org.apache.hadoop.mapred.FileSplit; 
import org.apache.hadoop.mapred.InputSplit; 
import org.apache.hadoop.mapred.RecordReader; 
import org.apache.hadoop.mapred.TaskAttemptContext; 

class WholeFileRecordReader implements RecordReader<NullWritable, BytesWritable> { //recordreader 

    private FileSplit fileSplit; 
    private Configuration conf; 
    private BytesWritable value = new BytesWritable(); 
    private boolean processed = false; 

    public void initialize(InputSplit split, TaskAttemptContext context) 
      throws IOException, InterruptedException { 
     this.fileSplit = (FileSplit) split; 
     this.conf = context.getJobConf(); 
    } 

    @Override 
    public boolean next(NullWritable k, BytesWritable v) throws IOException { 
     if (!processed) { 
      byte[] contents = new byte[(int) fileSplit.getLength()]; 
      Path file = fileSplit.getPath(); 
      org.apache.hadoop.fs.FileSystem fs = file.getFileSystem(conf); 
      FSDataInputStream in = null; 
      try { 
       in = fs.open(file); 
       IOUtils.readFully(in, contents, 0, contents.length); 
       value.set(contents, 0, contents.length); 
      } finally { 
       IOUtils.closeStream(in); 
      } 
      processed = true; 
      return true; 
     } 
     return false; 
    } 
@Override 
    public NullWritable createKey() { 
    return NullWritable.get(); 
    } 

    @Override 
    public BytesWritable createValue() { 
    return value; 
    } 

@Override 
public long getPos() throws IOException { 
    throw new UnsupportedOperationException("Not supported yet."); 
} 

@Override 
public void close() throws IOException { 
    throw new UnsupportedOperationException("Not supported yet."); 
} 

@Override 
public float getProgress() throws IOException { 
    throw new UnsupportedOperationException("Not supported yet."); 
} 
    }

来源

2012-06-04 Amnesiac

你用来启动你的工作的命令行是什么？你是在指定一个jar吗，那个jar里面有类吗？ –

是的，我指定jar。它有三个类。 – Amnesiac

@ChrisWhite：我的逻辑正确吗？ – Amnesiac

WholeFileInputFormat被定义为抽象，你想如何创建它的一个实例？

要么使其不具有抽象或具体实现的子类。

来源

2012-06-04 18:06:37

@Thomas ......谢谢你的错误。我编辑了代码。我写了类似于TextInputFormat，SequenceFileInputFormat等的代码。仍旧在NetBeans中，它显示了一些错误，如实现所有抽象方法或覆盖等。请告诉我我写的代码是否正确？ – Amnesiac

为什么你需要实现你自己的格式？抛出异常并不是解决这个问题的正确方法。 –

@ Thomas.then我将如何能够从HDFS读取图像？我想以字节读取图像。我该怎么做？ hadoop没有定义任何以字节为单位读取图像的输入格式。 – Amnesiac

使用mapreduce从HDFS读取图像

回答

相关问题