如果只是返回String,那么直接继承UDF即可,如果想要返回MAP/LIST/STRUCT,则需要继承GenericUDF;
如下代码示例,将URL中的参数进行了解析成了一个MAP返回:
import java.util.LinkedHashMap; import java.util.Map; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.io.Text; public class UrlParamsToMap extends GenericUDF { private final Map<Text, Text> sortMap = new LinkedHashMap<Text, Text>(); private StringObjectInspector urlOI; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 1) { throw new UDFArgumentException("UrlParamsToMap param must be 1 argu."); } urlOI = (StringObjectInspector) arguments[0]; return ObjectInspectorFactory.getStandardMapObjectInspector( PrimitiveObjectInspectorFactory.writableStringObjectInspector, PrimitiveObjectInspectorFactory.writableStringObjectInspector); } @Override public Object evaluate(DeferredObject[] deferredObjects) throws HiveException { Object urlObj = deferredObjects[0].get(); Text url = (Text) urlOI.getPrimitiveWritableObject(urlObj); getParamsMap(url.toString(), sortMap); return sortMap; } public Map<Text, Text> getParamsMap(String url, Map<Text, Text> sortMap) { Map<Text, Text> defaultMap = new LinkedHashMap<Text, Text>(); if (StringUtils.isBlank(url)) { return defaultMap; } String[] urlSplits = url.split("\\?"); if (null == urlSplits || urlSplits.length != 2) { return defaultMap; } String urlParamStr = urlSplits[1]; if (StringUtils.isBlank(urlParamStr)) { return defaultMap; } String[] paramSplits = urlParamStr.split("&"); if (null == paramSplits || paramSplits.length == 0) { return defaultMap; } for (String kvStr : paramSplits) { if (StringUtils.isBlank(kvStr)) { continue; } String[] kvs = kvStr.split("="); if (null != kvs && kvs.length == 2) { if (StringUtils.isNotBlank(kvs[0]) && StringUtils.isNotBlank(kvs[1])) { sortMap.put(new Text(kvs[0]), new Text(kvs[1])); } } } return sortMap; } @Override public String getDisplayString(String[] strings) { return "map(" + strings[0] + ")"; } }
转载请注明来自:疯狂的蚂蚁www.crazyant.net