Skip to content

Commit

Permalink
Feature/spark 3.5 (#435) issue #394
Browse files Browse the repository at this point in the history
* Added spark 3.5 to compile

* Update documentation and CI to add spark 3.5

* scalafmt

* Doc error due to a new alternative version of the method
  • Loading branch information
alfonsorr authored Mar 12, 2024
1 parent be8f352 commit fee55fc
Show file tree
Hide file tree
Showing 43 changed files with 140 additions and 65 deletions.
6 changes: 5 additions & 1 deletion .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,8 @@ spark_3.3:

spark_3.4:
- changed-files:
- any-glob-to-any-file: [ 'core/src/main/spark*3.4*/**/*', 'core/src/main/scala/**/*' ]
- any-glob-to-any-file: [ 'core/src/main/spark*3.4*/**/*', 'core/src/main/scala/**/*' ]

spark_3.5:
- changed-files:
- any-glob-to-any-file: [ 'core/src/main/spark*3.5*/**/*', 'core/src/main/scala/**/*' ]
10 changes: 10 additions & 0 deletions .github/workflows/matrix_includes.json
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,15 @@
"spark": "3.4",
"scope": "test-uploadReport",
"isRelease": "release"
},
{
"spark": "3.5.0",
"scope": "test",
"isRelease": "-"
},
{
"spark": "3.5",
"scope": "test-uploadReport",
"isRelease": "release"
}
]
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ following spark versions.
| 3.4.0 | 2.12 / 2.13 || - |
| 3.4.1 | 2.12 / 2.13 || - |
| 3.4.2 | 2.12 / 2.13 || [![Maven Central](https://img.shields.io/maven-central/v/org.hablapps/doric_3-4_2.12)](https://mvnrepository.com/artifact/org.hablapps/doric_3-4_2.12/0.0.7) |
| 3.5.0 | 2.12 / 2.13 || - |
| 3.5.1 | 2.12 / 2.13 || [![Maven Central](https://img.shields.io/maven-central/v/org.hablapps/doric_3-5_2.12)](https://mvnrepository.com/artifact/org.hablapps/doric_3-4_2.12/0.0.7) |



Expand Down
7 changes: 5 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ import scala.language.postfixOps

val stableVersion = "0.0.7"

val sparkDefaultShortVersion = "3.4"
val sparkDefaultShortVersion = "3.5"
val spark30Version = "3.0.3"
val spark31Version = "3.1.3"
val spark32Version = "3.2.3"
val spark33Version = "3.3.4"
val spark34Version = "3.4.2"
val spark35Version = "3.5.1"

val versionRegex = """^(.*)\.(.*)\.(.*)$""".r
val versionRegexShort = """^(.*)\.(.*)$""".r
Expand All @@ -23,7 +24,8 @@ val parserSparkVersion: String => String = {
case versionRegexShort("3", "2") => spark32Version
case versionRegexShort("3", "3") => spark33Version
case versionRegexShort("3", "4") => spark34Version
case versionRegex("3", b, c) => s"3.$b.$c"
case versionRegexShort("3", "5") => spark35Version
case versionRegex("3", b, c) => s"3.$b.$c"
}

val long2ShortVersion: String => String = { case versionRegex(a, b, _) =>
Expand All @@ -36,6 +38,7 @@ val scalaVersionSelect: String => List[String] = {
case versionRegex("3", "2", _) => List(scala212, scala213)
case versionRegex("3", "3", _) => List(scala212, scala213)
case versionRegex("3", "4", _) => List(scala212, scala213)
case versionRegex("3", "5", _) => List(scala212, scala213)
}

ThisBuild / organization := "org.hablapps"
Expand Down
9 changes: 5 additions & 4 deletions core/src/main/scala/doric/syntax/StringColumns.scala
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package doric
package syntax

import scala.jdk.CollectionConverters._

import cats.implicits._
import doric.DoricColumn.sparkFunction

import org.apache.spark.sql.{Column, functions => f}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types.{DataType, StructType}
import org.apache.spark.sql.{Column, functions => f}

import scala.jdk.CollectionConverters._

protected trait StringColumns {

Expand Down Expand Up @@ -154,7 +155,7 @@ protected trait StringColumns {
* Computes the Levenshtein distance of the two given string columns.
*
* @group String Type
* @see [[org.apache.spark.sql.functions.levenshtein]]
* @see [[https://spark.apache.org/docs/latest/api/scala/org/apache/spark/sql/functions$.html#levenshtein(l:org.apache.spark.sql.Column,r:org.apache.spark.sql.Column):org.apache.spark.sql.Column]]
*/
def levenshtein(dc: StringColumn): IntegerColumn =
(s.elem, dc.elem).mapN(f.levenshtein).toDC
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ package syntax

import cats.implicits._
import doric.types.CollectionType
import org.apache.spark.sql.{Column, functions => f, Row}

import org.apache.spark.sql.{Column, Row, functions => f}
import org.apache.spark.sql.catalyst.expressions._

trait ArrayColumns3x {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
package doric
package syntax

import scala.jdk.CollectionConverters._

import doric.types.SparkType
import org.apache.spark.sql.{Row, functions => f}

import scala.jdk.CollectionConverters._
import org.apache.spark.sql.{Row, functions => f}

private[syntax] trait DStructs3x {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ package syntax

import cats.implicits._
import doric.types.SparkType
import org.apache.spark.sql.catalyst.expressions.{MapFilter, MapZipWith, TransformKeys, TransformValues}

import org.apache.spark.sql.{Column, Row, functions => f}
import org.apache.spark.sql.catalyst.expressions.{MapFilter, MapZipWith, TransformKeys, TransformValues}

trait MapColumns3x {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
package doric
package syntax

import scala.jdk.CollectionConverters._

import cats.implicits._

import org.apache.spark.sql.{Column, functions => f}
import org.apache.spark.sql.catalyst.expressions.StringSplit
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{Column, functions => f}

import scala.jdk.CollectionConverters._

trait StringColumn3x {
implicit class StringOperationsSyntax3x(s: DoricColumn[String]) {
Expand Down
31 changes: 31 additions & 0 deletions core/src/main/spark_3.5_mount/scala/doric/syntax/All.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package doric.syntax

private[doric] trait All
extends ArrayColumns
with TypeMatcher
with CommonColumns
with DStructs
with LiteralConversions
with MapColumns
with NumericColumns
with DateColumns
with TimestampColumns
with BooleanColumns
with StringColumns
with ControlStructures
with AggregationColumns
with CNameOps
with BinaryColumns
with Interpolators
with AggregationColumns31
with BooleanColumns31
with NumericColumns31
with NumericColumns32
with StringColumns31
with BinaryColumns32
with ArrayColumns3x
with CommonColumns3x
with MapColumns3x
with StringColumn3x
with AggregationColumns32
with DStructs3x
5 changes: 4 additions & 1 deletion core/src/test/scala/doric/sem/ChildColumnNotFound.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ object ChildColumnNotFound {
): SparkErrorWrapper = {
SparkErrorWrapper(
new Throwable(
if (!sparkSession.version.startsWith("3.4"))
if (
!(sparkSession.version.startsWith("3.4") || sparkSession.version
.startsWith("3.5"))
)
s"No such struct field $expectedCol in ${foundCols.mkString(", ")}"
else
s"[FIELD_NOT_FOUND] No such struct field `$expectedCol` in ${foundCols
Expand Down
5 changes: 4 additions & 1 deletion core/src/test/scala/doric/sem/ColumnNotFound.scala
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ object ColumnNotFound {

SparkErrorWrapper(
new Throwable(
if (!sparkSession.version.startsWith("3.4"))
if (
!(sparkSession.version.startsWith("3.4") || sparkSession.version
.startsWith("3.5"))
)
s"""Cannot resolve column name "$expectedCol" among (${foundCols
.mkString(", ")})"""
else
Expand Down
13 changes: 9 additions & 4 deletions core/src/test/scala/doric/sem/TransformOpsSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ package doric
package sem

import doric.implicitConversions._
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{DoubleType, IntegerType, StringType, TimestampType}
import org.scalatest.matchers.should.Matchers
import org.scalatest.EitherValues

import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{DoubleType, IntegerType, StringType, TimestampType}

class TransformOpsSpec
extends DoricTestElements
with Matchers
Expand Down Expand Up @@ -122,13 +123,17 @@ class TransformOpsSpec
)
}
error.getMessage should startWith(
if (!spark.version.startsWith("3.4"))
if (
!(spark.version.startsWith("3.4") || spark.version.startsWith("3.5"))
)
"Found duplicate column(s) in given column names:"
else
"[COLUMN_ALREADY_EXISTS] The column `a` already exists. Consider to choose another name or rename the existing column."
)
error.getMessage should include("`a`")
if (!spark.version.startsWith("3.4")) {
if (
!(spark.version.startsWith("3.4") || spark.version.startsWith("3.5"))
) {
error.getMessage should include("`b`")
}
}
Expand Down
6 changes: 4 additions & 2 deletions core/src/test/scala/doric/syntax/StringColumnsSpec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ class StringColumnsSpec extends DoricTestElements {
}

it("should raise an error if group > regex group result") {
intercept[java.lang.IllegalArgumentException] {
intercept[java.lang.RuntimeException] {
df.withColumn(
"res",
colString("col1").regexpExtract("(\\d+)-(\\d+)".lit, 4.lit)
Expand Down Expand Up @@ -481,7 +481,9 @@ class StringColumnsSpec extends DoricTestElements {
df.testColumns2("col1", "")(
(str, pattern) => colString(str).split(pattern.lit),
(str, pattern) => f.split(f.col(str), pattern),
if (!spark.version.startsWith("3.4"))
if (
!(spark.version.startsWith("3.4") || spark.version.startsWith("3.5"))
)
List(
Array("h", "e", "l", "l", "o", " ", "w", "o", "r", "l", "d", ""),
Array("1", "2", "3", "4", "5", ""),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ package doric
package syntax

import doric.testUtilities.data.User
import org.apache.spark.sql.catalyst.expressions.ArraySort
import org.apache.spark.sql.{Column, Row, functions => f}
import org.scalatest.EitherValues
import org.scalatest.matchers.should.Matchers

import org.apache.spark.sql.{Column, Row, functions => f}
import org.apache.spark.sql.catalyst.expressions.ArraySort

class ArrayColumns3xSpec
extends DoricTestElements
with EitherValues
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
package doric.syntax

import doric.{DoricTestElements, colStruct}
import org.apache.spark.sql.{functions => f}
import scala.jdk.CollectionConverters._

import doric.{colStruct, DoricTestElements}
import java.sql.Timestamp
import scala.jdk.CollectionConverters._

import org.apache.spark.sql.{functions => f}

class DStructs3xSpec extends DoricTestElements {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package doric
package syntax

import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{Row, functions => f}
import scala.jdk.CollectionConverters._

import java.sql.Timestamp
import org.scalatest.EitherValues
import org.scalatest.matchers.should.Matchers

import java.sql.Timestamp
import scala.jdk.CollectionConverters._
import org.apache.spark.sql.{Row, functions => f}
import org.apache.spark.sql.types.StructType

class StringColumns3xSpec
extends DoricTestElements
Expand Down
4 changes: 3 additions & 1 deletion docs/docs/docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Here you can see the whole doric API for every supported spark version:

| Spark | Scala | API | doric |
|:-----:|:-----:|:--------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
| 2.4.8 | 2.11 | Deprecated [doric_2-4_2.11](spark-2.4/scala-2.11/) | [![Maven Central](https://img.shields.io/maven-central/v/org.hablapps/doric_2-4_2.11)](https://mvnrepository.com/artifact/org.hablapps/doric_2-4_2.11/0.0.7) |
| 2.4.8 | 2.11 | Deprecated [doric_2-4_2.11](spark-2.4/scala-2.11/) | [![Maven Central](https://img.shields.io/maven-central/v/org.hablapps/doric_2-4_2.11)](https://mvnrepository.com/artifact/org.hablapps/doric_2-4_2.11/0.0.7) |
| 3.0.2 | 2.12 | [doric_3-0_2.12](spark-3.0/scala-2.12/) | [![Maven Central](https://img.shields.io/maven-central/v/org.hablapps/doric_3-0_2.12)](https://mvnrepository.com/artifact/org.hablapps/doric_3-0_2.12/@STABLE_VERSION@) |
| 3.1.3 | 2.12 | [doric_3-1_2.12](spark-3.1/scala-2.12/) | [![Maven Central](https://img.shields.io/maven-central/v/org.hablapps/doric_3-1_2.12)](https://mvnrepository.com/artifact/org.hablapps/doric_3-1_2.12/@STABLE_VERSION@) |
| 3.2.3 | 2.12 | [doric_3-2_2.12](spark-3.2/scala-2.12/) | [![Maven Central](https://img.shields.io/maven-central/v/org.hablapps/doric_3-2_2.12)](https://mvnrepository.com/artifact/org.hablapps/doric_3-2_2.12/@STABLE_VERSION@) |
Expand All @@ -19,3 +19,5 @@ Here you can see the whole doric API for every supported spark version:
| 3.3.4 | 2.13 | [doric_3-3_2.13](spark-3.3/scala-2.13/) | [![Maven Central](https://img.shields.io/maven-central/v/org.hablapps/doric_3-3_2.13)](https://mvnrepository.com/artifact/org.hablapps/doric_3-3_2.13/@STABLE_VERSION@) |
| 3.4.2 | 2.12 | [doric_3-3_2.12](spark-3.4/scala-2.12/) | [![Maven Central](https://img.shields.io/maven-central/v/org.hablapps/doric_3-4_2.12)](https://mvnrepository.com/artifact/org.hablapps/doric_3-4_2.12/@STABLE_VERSION@) |
| 3.4.2 | 2.13 | [doric_3-3_2.13](spark-3.4/scala-2.13/) | [![Maven Central](https://img.shields.io/maven-central/v/org.hablapps/doric_3-4_2.13)](https://mvnrepository.com/artifact/org.hablapps/doric_3-4_2.13/@STABLE_VERSION@) |
| 3.5.1 | 2.12 | [doric_3-3_2.12](spark-3.5/scala-2.12/) | [![Maven Central](https://img.shields.io/maven-central/v/org.hablapps/doric_3-5_2.12)](https://mvnrepository.com/artifact/org.hablapps/doric_3-5_2.12/@STABLE_VERSION@) |
| 3.5.1 | 2.13 | [doric_3-3_2.13](spark-3.5/scala-2.13/) | [![Maven Central](https://img.shields.io/maven-central/v/org.hablapps/doric_3-5_2.13)](https://mvnrepository.com/artifact/org.hablapps/doric_3-5_2.13/@STABLE_VERSION@) |
Loading

0 comments on commit fee55fc

Please sign in to comment.