Skip to content

Commit

Permalink
[SPARK-49200][SQL] Fix null type non-codegen ordering exception
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

Spark mark `NullType` as orderable, and we return 0 when gen comparing code for `NullType`.
```
object OrderUtils {
  def isOrderable(dataType: DataType): Boolean = dataType match {
    case NullType => true
```
This pr makes `NullType` ordering work for non-codegen path to avoid exception.

### Why are the changes needed?

Fix exception:
```sql
set spark.sql.codegen.factoryMode=NO_CODEGEN;
set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.EliminateSorts;

select * from range(10) order by array(null);
```

```
org.apache.spark.SparkIllegalArgumentException: Type PhysicalNullType does not support ordered operations.
    at org.apache.spark.sql.errors.QueryExecutionErrors$.orderedOperationUnsupportedByDataTypeError(QueryExecutionErrors.scala:352)
    at org.apache.spark.sql.catalyst.types.PhysicalNullType.ordering(PhysicalDataType.scala:246)
    at org.apache.spark.sql.catalyst.types.PhysicalNullType.ordering(PhysicalDataType.scala:243)
    at org.apache.spark.sql.catalyst.types.PhysicalArrayType$$anon$1.<init>(PhysicalDataType.scala:283)
    at org.apache.spark.sql.catalyst.types.PhysicalArrayType.interpretedOrdering$lzycompute(PhysicalDataType.scala:281)
    at org.apache.spark.sql.catalyst.types.PhysicalArrayType.interpretedOrdering(PhysicalDataType.scala:281)
    at org.apache.spark.sql.catalyst.types.PhysicalArrayType.ordering(PhysicalDataType.scala:277)
    at org.apache.spark.sql.catalyst.expressions.InterpretedOrdering.compare(ordering.scala:67)
    at org.apache.spark.sql.catalyst.expressions.InterpretedOrdering.compare(ordering.scala:40)
    at org.apache.spark.sql.execution.UnsafeExternalRowSorter$RowComparator.compare(UnsafeExternalRowSorter.java:254)
    at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter$SortComparator.compare(UnsafeInMemorySorter.java:70)
    at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter$SortComparator.compare(UnsafeInMemorySorter.java:44)
```

### Does this PR introduce _any_ user-facing change?

yes, bug fix

### How was this patch tested?

add test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes apache#47707 from ulysses-you/null-ordering.

Authored-by: ulysses-you <[email protected]>
Signed-off-by: youxiduo <[email protected]>
  • Loading branch information
ulysses-you committed Aug 12, 2024
1 parent 2fb8dff commit e3ba74b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,7 @@ case class PhysicalMapType(keyType: DataType, valueType: DataType, valueContains

class PhysicalNullType() extends PhysicalDataType with PhysicalPrimitiveType {
override private[sql] def ordering =
throw QueryExecutionErrors.orderedOperationUnsupportedByDataTypeError(
"PhysicalNullType")
implicitly[Ordering[Unit]].asInstanceOf[Ordering[Any]]
override private[sql] type InternalType = Any
@transient private[sql] lazy val tag = typeTag[InternalType]
}
Expand Down
13 changes: 12 additions & 1 deletion sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import org.apache.commons.io.FileUtils
import org.apache.spark.{AccumulatorSuite, SPARK_DOC_ROOT, SparkArithmeticException, SparkDateTimeException, SparkException, SparkNumberFormatException, SparkRuntimeException}
import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
import org.apache.spark.sql.catalyst.ExtendedAnalysisException
import org.apache.spark.sql.catalyst.expressions.{GenericRow, Hex}
import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode, GenericRow, Hex}
import org.apache.spark.sql.catalyst.expressions.Cast._
import org.apache.spark.sql.catalyst.expressions.aggregate.{Complete, Partial}
import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, NestedColumnAliasingSuite}
Expand Down Expand Up @@ -1430,6 +1430,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
}
}

test("SPARK-49200: Fix null type non-codegen ordering exception") {
withSQLConf(
SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString,
SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
"org.apache.spark.sql.catalyst.optimizer.EliminateSorts") {
checkAnswer(
sql("SELECT * FROM range(3) ORDER BY array(null)"),
Seq(Row(0), Row(1), Row(2)))
}
}

test("SPARK-8837: use keyword in column name") {
withTempView("t") {
val df = Seq(1 -> "a").toDF("count", "sort")
Expand Down

0 comments on commit e3ba74b

Please sign in to comment.