Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ public ScalarTransformFunctionWrapper(FunctionInfo functionInfo) {
ColumnDataType resultType = FunctionUtils.getColumnDataType(resultClass);
if (resultType != null) {
_resultType = resultType;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This changes every OBJECT-returning scalar UDF to advertise STRING output and stringify the value. That silently changes query semantics for functions like nullIf/caseWhen: NULLIF(int_col, 0) now becomes a VARCHAR result instead of preserving the input type, so downstream arithmetic/comparisons will see the wrong type. We need a type-preserving resolution here rather than coercing OBJECT to STRING.

_resultMetadata = new TransformResultMetadata(resultType.toDataType(), !_resultType.isArray(), false);
DataType dataType = resultType == ColumnDataType.OBJECT ? DataType.STRING : resultType.toDataType();
_resultMetadata = new TransformResultMetadata(dataType, !_resultType.isArray(), false);
} else {
// Handle unrecognized result class with STRING
_resultType = ColumnDataType.STRING;
Expand Down Expand Up @@ -264,7 +265,7 @@ public String[] transformToStringValuesSV(ValueBlock valueBlock) {
}
Object value = _functionInvoker.invoke(_scalarArguments);
_stringValuesSV[i] =
value != null ? (String) _resultType.toInternal(value) : NullValuePlaceHolder.STRING;
value != null ? String.valueOf(_resultType.toInternal(value)) : NullValuePlaceHolder.STRING;
}
return _stringValuesSV;
}
Expand Down Expand Up @@ -424,6 +425,9 @@ private void getNonLiteralValues(ValueBlock valueBlock) {
case BYTES:
_nonLiteralValues[i] = transformFunction.transformToBytesValuesSV(valueBlock);
break;
case OBJECT:
_nonLiteralValues[i] = transformToObjectValues(transformFunction, valueBlock);
break;
case PRIMITIVE_INT_ARRAY:
_nonLiteralValues[i] = transformFunction.transformToIntValuesMV(valueBlock);
break;
Expand Down Expand Up @@ -482,4 +486,65 @@ private void getNonLiteralValues(ValueBlock valueBlock) {
}
}
}

private static Object[] transformToObjectValues(TransformFunction transformFunction, ValueBlock valueBlock) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most of this helper (the numeric/decimal/timestamp/boolean/bytes SV branches and the whole MV switch) isn't exercised by the tests — only the STRING path is. Worth a couple more cases, especially one where the Object function returns null so the null-placeholder path gets hit.

TransformResultMetadata resultMetadata = transformFunction.getResultMetadata();
DataType dataType = resultMetadata.getDataType();
if (resultMetadata.isSingleValue()) {
switch (dataType) {
case BOOLEAN: {
int[] intValues = transformFunction.transformToIntValuesSV(valueBlock);
int numValues = intValues.length;
Boolean[] booleanValues = new Boolean[numValues];
for (int i = 0; i < numValues; i++) {
booleanValues[i] = intValues[i] == 1;
}
return booleanValues;
}
case INT:
return ArrayUtils.toObject(transformFunction.transformToIntValuesSV(valueBlock));
case LONG:
return ArrayUtils.toObject(transformFunction.transformToLongValuesSV(valueBlock));
case FLOAT:
return ArrayUtils.toObject(transformFunction.transformToFloatValuesSV(valueBlock));
case DOUBLE:
return ArrayUtils.toObject(transformFunction.transformToDoubleValuesSV(valueBlock));
case BIG_DECIMAL:
return transformFunction.transformToBigDecimalValuesSV(valueBlock);
case TIMESTAMP: {
long[] longValues = transformFunction.transformToLongValuesSV(valueBlock);
int numValues = longValues.length;
Timestamp[] timestampValues = new Timestamp[numValues];
for (int i = 0; i < numValues; i++) {
timestampValues[i] = new Timestamp(longValues[i]);
}
return timestampValues;
}
case STRING:
return transformFunction.transformToStringValuesSV(valueBlock);
case BYTES:
return transformFunction.transformToBytesValuesSV(valueBlock);
default:
throw new IllegalStateException("Unsupported data type: " + dataType);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

JSON isn't handled, so a JSON column or JSON-returning expr passed to an Object param (e.g. nullIf(jsonCol, '...')) hits this default and throws — even though JSON is stored as STRING. Can we add case JSON: next to STRING (here and in the MV switch)? There's a jsonSV test column handy to cover it.

}
}
switch (dataType) {
case INT:
return transformFunction.transformToIntValuesMV(valueBlock);
case LONG:
return transformFunction.transformToLongValuesMV(valueBlock);
case FLOAT:
return transformFunction.transformToFloatValuesMV(valueBlock);
case DOUBLE:
return transformFunction.transformToDoubleValuesMV(valueBlock);
case BIG_DECIMAL:
return transformFunction.transformToBigDecimalValuesMV(valueBlock);
case STRING:
return transformFunction.transformToStringValuesMV(valueBlock);
case BYTES:
return transformFunction.transformToBytesValuesMV(valueBlock);
default:
throw new IllegalStateException("Unsupported multi-value data type: " + dataType);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MV switch is missing BOOLEAN and TIMESTAMP, which the SV switch above handles — an MV boolean/timestamp child feeding an Object param would throw here. Intended, or should it mirror the SV cases?

}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,30 @@

public class ScalarTransformFunctionWrapperTest extends BaseTransformFunctionTest {

@Test
public void testObjectResultScalarTransformFunction() {
ExpressionContext expression = RequestContextUtils.getExpression("nullIf(1, 2)");
TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap);
assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
assertTrue(transformFunction.getResultMetadata().isSingleValue());
assertEquals(transformFunction.getResultMetadata().getDataType(), DataType.STRING);

String[] expectedValues = new String[NUM_ROWS];
Arrays.fill(expectedValues, "1");
testTransformFunction(transformFunction, expectedValues);
}

@Test
public void testObjectParameterScalarTransformFunction() {
ExpressionContext expression =
RequestContextUtils.getExpression(String.format("nullIf(%s, '__pinot_nullif_miss__')", STRING_SV_COLUMN));
TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap);
assertTrue(transformFunction instanceof ScalarTransformFunctionWrapper);
assertTrue(transformFunction.getResultMetadata().isSingleValue());
assertEquals(transformFunction.getResultMetadata().getDataType(), DataType.STRING);
testTransformFunction(transformFunction, _stringSVValues);
}

@Test
public void testStringLowerTransformFunction() {
ExpressionContext expression =
Expand Down
Loading