您好,登錄后才能下訂單哦!
本篇內容介紹了“hadoop怎么合并sequcefie并在map中讀取”的有關知識,在實際案例的操作過程中,不少人都會遇到這樣的困境,接下來就讓小編帶領大家學習一下如何處理這些情況吧!希望大家仔細閱讀,能夠學有所成!
package hgs.sequencefile; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; //合并小文件 public class SequenceMain { public static void main(String[] args) throws IOException, URISyntaxException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(new URI("hdfs://192.168.6.129:9000"),conf); //獲得該文件夾下的所有的文件 FileStatus[] fstats = fs.listStatus(new Path("/words")); //System.out.println(fstats.length); Text key = new Text(); Text value = new Text(); @SuppressWarnings("deprecation") //創建一個sequecewriter //merge.seq是文件名 SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, new Path("/sequence/merge.seq"), key.getClass(), value.getClass()); //循環遍歷每個文件 for(FileStatus fis : fstats) { //將每個文件以key value的形式寫入到sequencefile中 FSDataInputStream finput = fs.open(fis.getPath()); byte[] buffer = new byte[(int)fis.getLen()]; IOUtils.readFully(finput, buffer, 0, buffer.length); //文件名為key 文件內容為value key.set(fis.getPath().getName()); value.set(buffer); writer.append(key, value); finput.close(); } writer.close(); fs.close(); } }
package hgs.sequencefile; import java.io.IOException; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class SequnceMapper extends Mapper<Text, Text, Text, Text> { @Override protected void map(Text key, Text value, Mapper<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { context.write(key, value); } }
package hgs.sequencefile; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class SequenceDriver { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "read_sequence_file"); job.setJarByClass(hgs.sequencefile.SequenceDriver.class); // TODO: specify a mapper job.setMapperClass(SequnceMapper.class); // TODO: specify a reducer //job.setReducerClass(Reducer.class); // TODO: specify output types job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //在這個設置讀取sequencefile的inputformat,該類讀取的是String淚習慣的key value //SequenceFileAsBinaryInputFormat 該類獨處的ByteWritable的key value job.setInputFormatClass(SequenceFileAsTextInputFormat.class); // TODO: specify input and output DIRECTORIES (not files) FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.6.129:9000/sequence")); FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.6.129:9000/seqresult")); if (!job.waitForCompletion(true)) return; } }
“hadoop怎么合并sequcefie并在map中讀取”的內容就介紹到這里了,感謝大家的閱讀。如果想了解更多行業相關的知識可以關注億速云網站,小編將為大家輸出更多高質量的實用文章!
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。