내가 hadoop
mapreduce
도구를 배우는, 내가 그럼 지금은 동안 그 프로그램을 공유하고, 그것을 위해 항아리를 내보내 떨어져 전통 WordCount
프로그램에서 나중에 내 자신의 프로그램을 작성 :
이
내 매퍼 내가
hadoop-1.2.1
항아리 종속성으로 쓴, 그것은 수를 변환 단어에서 그 물품이었고,이 그래서 여기, 단일 오류없이 4 개 LaCS (락스) 번호에
처리 된 프로그램입니다 -
package com.whodesire.count;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import com.whodesire.numstats.AmtInWords;
public class CountInWords {
public static class NumberTokenizerMapper
extends Mapper <Object, Text, LongWritable, Text> {
private static final Text theOne = new Text("1");
private LongWritable longWord = new LongWritable();
public void map(Object key, Text value, Context context) {
try{
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
longWord.set(Long.parseLong(itr.nextToken()));
context.write(longWord, theOne);
}
}catch(ClassCastException cce){
System.out.println("ClassCastException raiseddd...");
System.exit(0);
}catch(IOException | InterruptedException ioe){
ioe.printStackTrace();
System.out.println("IOException | InterruptedException raiseddd...");
System.exit(0);
}
}
}
public static class ModeReducerCumInWordsCounter
extends Reducer <LongWritable, Text, LongWritable, Text>{
private Text result = new Text();
//This is the user defined reducer function which is invoked for each unique key
public void reduce(LongWritable key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
/*** Putting the key, which is a LongWritable value,
putting in AmtInWords constructor as String***/
AmtInWords aiw = new AmtInWords(key.toString());
result.set(aiw.getInWords());
//Finally the word and counting is sent to Hadoop MR and thus to target
context.write(key, result);
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
/****
*** all random numbers generated inside input files has been
*** generated using url https://andrew.hedges.name/experiments/random/
****/
//Load the configuration files and add them to the the conf object
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Job job = new Job(conf, "CountInWords");
//Specify the jar which contains the required classes for the job to run.
job.setJarByClass(CountInWords.class);
job.setMapperClass(NumberTokenizerMapper.class);
job.setCombinerClass(ModeReducerCumInWordsCounter.class);
job.setReducerClass(ModeReducerCumInWordsCounter.class);
//Set the output key and the value class for the entire job
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
//Set the Input (format and location) and similarly for the output also
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
//Setting the Results to Single Target File
job.setNumReduceTasks(1);
//Submit the job and wait for it to complete
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
오류를 확인한 후에 프로젝트에 mapreduce jar 파일을 추가하지 않은 것 같습니다. 도움이된다면 hadoop-jar-xxxjar 파일에 추가 한 hadoop jar 파일을 검토해 보시기 바랍니다. 도움이나 답을 표시해 주시면 도움이되기를 바랍니다. 감사합니다.
hadoop 2.7.1을 사용합니다. hadoop-mapreduce-client-jobclient-2.7.1, hadoop-mapreduce-client-core-2.7.1.jar와 같은 세 가지 jar 파일을 추가했습니다. 나는 그 오류를 다시 들었다. – programer