Hive实现返回MAP的UDF

如果只是返回String,那么直接继承UDF即可,如果想要返回MAP/LIST/STRUCT,则需要继承GenericUDF;

如下代码示例,将URL中的参数进行了解析成了一个MAP返回:

import java.util.LinkedHashMap;
import java.util.Map;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.Text;

public class UrlParamsToMap extends GenericUDF {
    private final Map<Text, Text> sortMap = new LinkedHashMap<Text, Text>();
    private StringObjectInspector urlOI;

    @Override
    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
        if (arguments.length != 1) {
            throw new UDFArgumentException("UrlParamsToMap param must be 1 argu.");
        }

        urlOI = (StringObjectInspector) arguments[0];

        return ObjectInspectorFactory.getStandardMapObjectInspector(
                PrimitiveObjectInspectorFactory.writableStringObjectInspector,
                PrimitiveObjectInspectorFactory.writableStringObjectInspector);
    }

    @Override
    public Object evaluate(DeferredObject[] deferredObjects) throws HiveException {
        Object urlObj = deferredObjects[0].get();
        Text url = (Text) urlOI.getPrimitiveWritableObject(urlObj);

        getParamsMap(url.toString(), sortMap);
        return sortMap;
    }

    public Map<Text, Text> getParamsMap(String url, Map<Text, Text> sortMap) {
        Map<Text, Text> defaultMap = new LinkedHashMap<Text, Text>();
        if (StringUtils.isBlank(url)) {
            return defaultMap;
        }

        String[] urlSplits = url.split("\\?");
        if (null == urlSplits || urlSplits.length != 2) {
            return defaultMap;
        }

        String urlParamStr = urlSplits[1];
        if (StringUtils.isBlank(urlParamStr)) {
            return defaultMap;
        }

        String[] paramSplits = urlParamStr.split("&");
        if (null == paramSplits || paramSplits.length == 0) {
            return defaultMap;
        }

        for (String kvStr : paramSplits) {
            if (StringUtils.isBlank(kvStr)) {
                continue;
            }

            String[] kvs = kvStr.split("=");
            if (null != kvs && kvs.length == 2) {
                if (StringUtils.isNotBlank(kvs[0]) && StringUtils.isNotBlank(kvs[1])) {
                    sortMap.put(new Text(kvs[0]), new Text(kvs[1]));
                }
            }
        }
        return sortMap;
    }

    @Override
    public String getDisplayString(String[] strings) {
        return "map(" + strings[0] + ")";
    }

}

 

转载请注明来自:疯狂的蚂蚁www.crazyant.net

相关推荐

Leave a Comment