Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ public Double visitCall(RexCall call) {
if (!deep) {
return 1.0;
}

/*
* Ignore any predicates on partition columns because we have already
* accounted for these in the Table row count.
Expand Down Expand Up @@ -160,6 +159,33 @@ public Double visitCall(RexCall call) {
break;
}

case IS_NULL: {
if (childRel instanceof HiveTableScan) {
HiveTableScan hiveTableScan = (HiveTableScan) childRel;
if (hasMissingColumnStats(call, hiveTableScan)) {
selectivity = DEFAULT_COMPARISON_SELECTIVITY;
break;
}
double noOfNulls = getMaxNulls(call, hiveTableScan);
if (childCardinality >= noOfNulls) {
selectivity = noOfNulls / Math.max(childCardinality, 1);
} else {
HiveConfPlannerContext ctx = childRel.getCluster().getPlanner().getContext().unwrap(HiveConfPlannerContext.class);
String msg = "Invalid statistics: Number of null values > number of tuples. " +
"Consider recomputing statistics for table: " +
((RelOptHiveTable) childRel.getTable()).getHiveTableMD().getFullyQualifiedName();
if (ctx.isExplainPlan()) {
SessionState.getConsole().printError("WARNING: " + msg);
}
LOG.warn(msg);
selectivity = DEFAULT_COMPARISON_SELECTIVITY;
}
} else {
selectivity = computeFunctionSelectivity(call);
}
break;
}

case LESS_THAN_OR_EQUAL:
case GREATER_THAN_OR_EQUAL:
case LESS_THAN:
Expand Down Expand Up @@ -191,7 +217,6 @@ public Double visitCall(RexCall call) {
}
selectivity = computeFunctionSelectivity(call);
}

return selectivity;
}

Expand Down Expand Up @@ -839,6 +864,27 @@ private long getMaxNulls(RexCall call, HiveTableScan t) {
return maxNoNulls;
}

/**
* Returns true when one or more referenced columns do not have column statistics
* or getInputRefs returns empty
* Does not account for stale stats
*/
private boolean hasMissingColumnStats(RexCall call, HiveTableScan t) {
Set<Integer> iRefSet = HiveCalciteUtil.getInputRefs(call);
if (iRefSet.isEmpty()) return true;

List<ColStatistics> colStats = t.getColStat(new ArrayList<Integer>(iRefSet));
if (colStats.size() < iRefSet.size()) return true;

for (ColStatistics cs : colStats) {
// Treat estimated stats as missing stats
if (cs == null || cs.isEstimated()) {
return true;
}
}
return false;
}

private Double getMaxNDV(RexCall call) {
return getMaxNDV(call.getOperands());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1116,6 +1116,32 @@ public void testBetweenWithCastToDecimal7s1() {
checkBetweenSelectivity(0, universe, total, cast, 100f, 0f);
}

@Test
public void testComputeIsNullSelectivityWithStats() {
stats.setNumNulls(3);
doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0));
RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.IS_NULL, inputRef0);
checkSelectivity(3f / VALUES.length, filter); // 3 / 13
}

@Test
public void testComputeIsNullSelectivityMissingStats() {
doReturn(Collections.emptyList()).when(tableMock).getColStat(Collections.singletonList(0));
RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.IS_NULL, inputRef0);
checkSelectivity(1f / 3f, filter); // DEFAULT_COMPARISON_SELECTIVITY
}

@Test
public void testComputeIsNullSelectivityEstimatedStatsFallback() {
ColStatistics estimated = new ColStatistics();
estimated.setIsEstimated(true);
estimated.setNumNulls(0);
doReturn(Collections.singletonList(estimated)).when(tableMock).getColStat(Collections.singletonList(0));
RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.IS_NULL, inputRef0);
// estimated stats should be treated as missing and fallback to DEFAULT_COMPARISON_SELECTIVITY
checkSelectivity(1f / 3f, filter);
}

private void checkSelectivity(float expectedSelectivity, RexNode filter) {
FilterSelectivityEstimator estimator = new FilterSelectivityEstimator(scan, mq);
Assert.assertEquals(filter.toString(), expectedSelectivity, estimator.estimateSelectivity(filter), DELTA);
Expand Down