-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Handle Object-returning scalar transform functions #18628
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -66,7 +66,8 @@ public ScalarTransformFunctionWrapper(FunctionInfo functionInfo) { | |
| ColumnDataType resultType = FunctionUtils.getColumnDataType(resultClass); | ||
| if (resultType != null) { | ||
| _resultType = resultType; | ||
| _resultMetadata = new TransformResultMetadata(resultType.toDataType(), !_resultType.isArray(), false); | ||
| DataType dataType = resultType == ColumnDataType.OBJECT ? DataType.STRING : resultType.toDataType(); | ||
| _resultMetadata = new TransformResultMetadata(dataType, !_resultType.isArray(), false); | ||
| } else { | ||
| // Handle unrecognized result class with STRING | ||
| _resultType = ColumnDataType.STRING; | ||
|
|
@@ -264,7 +265,7 @@ public String[] transformToStringValuesSV(ValueBlock valueBlock) { | |
| } | ||
| Object value = _functionInvoker.invoke(_scalarArguments); | ||
| _stringValuesSV[i] = | ||
| value != null ? (String) _resultType.toInternal(value) : NullValuePlaceHolder.STRING; | ||
| value != null ? String.valueOf(_resultType.toInternal(value)) : NullValuePlaceHolder.STRING; | ||
| } | ||
| return _stringValuesSV; | ||
| } | ||
|
|
@@ -424,6 +425,9 @@ private void getNonLiteralValues(ValueBlock valueBlock) { | |
| case BYTES: | ||
| _nonLiteralValues[i] = transformFunction.transformToBytesValuesSV(valueBlock); | ||
| break; | ||
| case OBJECT: | ||
| _nonLiteralValues[i] = transformToObjectValues(transformFunction, valueBlock); | ||
| break; | ||
| case PRIMITIVE_INT_ARRAY: | ||
| _nonLiteralValues[i] = transformFunction.transformToIntValuesMV(valueBlock); | ||
| break; | ||
|
|
@@ -482,4 +486,65 @@ private void getNonLiteralValues(ValueBlock valueBlock) { | |
| } | ||
| } | ||
| } | ||
|
|
||
| private static Object[] transformToObjectValues(TransformFunction transformFunction, ValueBlock valueBlock) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Most of this helper (the numeric/decimal/timestamp/boolean/bytes SV branches and the whole MV switch) isn't exercised by the tests — only the STRING path is. Worth a couple more cases, especially one where the Object function returns null so the null-placeholder path gets hit. |
||
| TransformResultMetadata resultMetadata = transformFunction.getResultMetadata(); | ||
| DataType dataType = resultMetadata.getDataType(); | ||
| if (resultMetadata.isSingleValue()) { | ||
| switch (dataType) { | ||
| case BOOLEAN: { | ||
| int[] intValues = transformFunction.transformToIntValuesSV(valueBlock); | ||
| int numValues = intValues.length; | ||
| Boolean[] booleanValues = new Boolean[numValues]; | ||
| for (int i = 0; i < numValues; i++) { | ||
| booleanValues[i] = intValues[i] == 1; | ||
| } | ||
| return booleanValues; | ||
| } | ||
| case INT: | ||
| return ArrayUtils.toObject(transformFunction.transformToIntValuesSV(valueBlock)); | ||
| case LONG: | ||
| return ArrayUtils.toObject(transformFunction.transformToLongValuesSV(valueBlock)); | ||
| case FLOAT: | ||
| return ArrayUtils.toObject(transformFunction.transformToFloatValuesSV(valueBlock)); | ||
| case DOUBLE: | ||
| return ArrayUtils.toObject(transformFunction.transformToDoubleValuesSV(valueBlock)); | ||
| case BIG_DECIMAL: | ||
| return transformFunction.transformToBigDecimalValuesSV(valueBlock); | ||
| case TIMESTAMP: { | ||
| long[] longValues = transformFunction.transformToLongValuesSV(valueBlock); | ||
| int numValues = longValues.length; | ||
| Timestamp[] timestampValues = new Timestamp[numValues]; | ||
| for (int i = 0; i < numValues; i++) { | ||
| timestampValues[i] = new Timestamp(longValues[i]); | ||
| } | ||
| return timestampValues; | ||
| } | ||
| case STRING: | ||
| return transformFunction.transformToStringValuesSV(valueBlock); | ||
| case BYTES: | ||
| return transformFunction.transformToBytesValuesSV(valueBlock); | ||
| default: | ||
| throw new IllegalStateException("Unsupported data type: " + dataType); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. JSON isn't handled, so a JSON column or JSON-returning expr passed to an Object param (e.g. |
||
| } | ||
| } | ||
| switch (dataType) { | ||
| case INT: | ||
| return transformFunction.transformToIntValuesMV(valueBlock); | ||
| case LONG: | ||
| return transformFunction.transformToLongValuesMV(valueBlock); | ||
| case FLOAT: | ||
| return transformFunction.transformToFloatValuesMV(valueBlock); | ||
| case DOUBLE: | ||
| return transformFunction.transformToDoubleValuesMV(valueBlock); | ||
| case BIG_DECIMAL: | ||
| return transformFunction.transformToBigDecimalValuesMV(valueBlock); | ||
| case STRING: | ||
| return transformFunction.transformToStringValuesMV(valueBlock); | ||
| case BYTES: | ||
| return transformFunction.transformToBytesValuesMV(valueBlock); | ||
| default: | ||
| throw new IllegalStateException("Unsupported multi-value data type: " + dataType); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. MV switch is missing BOOLEAN and TIMESTAMP, which the SV switch above handles — an MV boolean/timestamp child feeding an Object param would throw here. Intended, or should it mirror the SV cases? |
||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This changes every OBJECT-returning scalar UDF to advertise STRING output and stringify the value. That silently changes query semantics for functions like
nullIf/caseWhen:NULLIF(int_col, 0)now becomes a VARCHAR result instead of preserving the input type, so downstream arithmetic/comparisons will see the wrong type. We need a type-preserving resolution here rather than coercing OBJECT to STRING.