hive怎么指定多個字符作為列分隔符

發布時間：2021-07-26 14:21:42 來源：億速云閱讀：228 作者：chen 欄目：云計算

本篇內容主要講解“hive怎么指定多個字符作為列分隔符”，感興趣的朋友不妨來看看。本文介紹的方法操作簡單快捷，實用性強。下面就讓小編來帶大家學習“hive怎么指定多個字符作為列分隔符”吧!

hive創建表指定分隔符，不支持多個字符作為分隔符,如果想使用多個字符作為分割符的話就需要實現InputFormat.主要重寫next方法,代碼如下

package hiveStream;
import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
public class MyHiveInputFormat extends TextInputFormat implements
        JobConfigurable {

    public RecordReader<LongWritable, Text> getRecordReader(
            InputSplit genericSplit, JobConf job, Reporter reporter)
            throws IOException {
        reporter.setStatus(genericSplit.toString());
        return new MyRecordReader((FileSplit) genericSplit, job);
    }
}

package hiveStream;

import java.io.IOException;
import java.io.InputStream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.util.LineReader;

public class MyRecordReader implements RecordReader<LongWritable, Text> {

    private CompressionCodecFactory compressionCodecs = null;
    private long start;
    private long pos;
    private long end;
    private LineReader lineReader;
    int maxLineLength;

    // 構造方法
    public MyRecordReader(FileSplit inputSplit, Configuration job)
            throws IOException {
        maxLineLength = job.getInt("mapred.mutilCharRecordReader.maxlength",
                Integer.MAX_VALUE);
        start = inputSplit.getStart();
        end = start + inputSplit.getLength();
        final Path file = inputSplit.getPath();
        // 創建壓縮器
        compressionCodecs = new CompressionCodecFactory(job);
        final CompressionCodec codec = compressionCodecs.getCodec(file);
        // 打開文件系統
        FileSystem fs = file.getFileSystem(job);
        FSDataInputStream fileIn = fs.open(file);
        boolean skipFirstLine = false;

        if (codec != null) {
            lineReader = new LineReader(codec.createInputStream(fileIn), job);
            end = Long.MAX_VALUE;
        } else {
            if (start != 0) {
                skipFirstLine = true;
                --start;
                fileIn.seek(start);
            }
            lineReader = new LineReader(fileIn, job);
        }

        if (skipFirstLine) {
            start += lineReader.readLine(new Text(), 0,
                    (int) Math.min((long) Integer.MAX_VALUE, end - start));
        }
        this.pos = start;
    }

    public MyRecordReader(InputStream in, long offset, long endOffset,
            int maxLineLength) {
        this.maxLineLength = maxLineLength;
        this.start = offset;
        this.lineReader = new LineReader(in);
        this.pos = offset;
        this.end = endOffset;
    }

    public MyRecordReader(InputStream in, long offset, long endOffset,
            Configuration job) throws IOException {
        this.maxLineLength = job.getInt(
                "mapred.mutilCharRecordReader.maxlength", Integer.MAX_VALUE);
        this.lineReader = new LineReader(in, job);
        this.start = offset;
        this.end = endOffset;
    }

    @Override
    public void close() throws IOException {
        if (lineReader != null)
            lineReader.close();
    }

    @Override
    public LongWritable createKey() {
        return new LongWritable();
    }

    @Override
    public Text createValue() {
        return new Text();
    }

    @Override
    public long getPos() throws IOException {
        return pos;
    }

    @Override
    public float getProgress() throws IOException {
        if (start == end) {
            return 0.0f;
        } else {
            return Math.min(1.0f, (pos - start) / (float) (end - start));
        }
    }

    @Override
    public boolean next(LongWritable key, Text value) throws IOException {
        while (pos < end) {
            key.set(pos);
            int newSize = lineReader.readLine(value, maxLineLength,
                    Math.max((int) Math.min(Integer.MAX_VALUE, end - pos),
                            maxLineLength));
            // 把字符串中的"##"轉變為"#"
            String strReplace = value.toString().replace("##", "#");
            Text txtReplace = new Text();
            txtReplace.set(strReplace);
            value.set(txtReplace.getBytes(), 0, txtReplace.getLength());
            if (newSize == 0)
                return false;
            pos += newSize;
            if (newSize < maxLineLength)
                return true;

        }
        return false;
    }
}

建表語句:自定義 outputformat/inputformat 后，在建表時需要指定 outputformat/inputformat

create external table testHiveInput( id int,name string,age int) row format delimited fields terminated by '|' stored as INPUTFORMAT 'hiveStream.MyHiveInputFormat'  
OUTPUTFORMAT'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' location '/user/hdfs/hiveInput';

測試數據:

1##Tom##22
2##Jerry##22
3##Jeny##22

測試代碼( 通過jdbc來查詢數據):

public static void testHive() throws Exception {
        String sql = "select id,name,age from testHiveInput";
        Class.forName("org.apache.hive.jdbc.HiveDriver");
        String url = "jdbc:hive2://xxx.xxx.xxx.xxx:10000";
        Connection conn = DriverManager.getConnection(url, "hive", "passwd");
        Statement stmt = conn.createStatement();
        stmt.execute("add jar /usr/lib/hive/lib/hiveInput.jar");
        ResultSet rs = stmt.executeQuery(sql);
        while (rs.next()) {
            System.out.println(rs.getString("id")+" "+ rs.getString("name") + " " + rs.getString("age"));
        }
    }

到此，相信大家對“hive怎么指定多個字符作為列分隔符”有了更深的了解，不妨來實際操作一番吧！這里是億速云網站，更多相關內容可以進入相關頻道進行查詢，關注我們，繼續學習！

向AI問一下細節

亚洲激情专区-91九色丨porny丨老师-久久久久久久女国产乱让韩-国产精品午夜小视频观看

hive怎么指定多個字符作為列分隔符

猜你喜歡

亚洲激情专区-91九色丨porny丨老师-久久久久久久女国产乱让韩-国产精品午夜小视频观看

hive怎么指定多個字符作為列分隔符

猜你喜歡

最新資訊

相關推薦

相關標簽