Merge pull request #128 from SemyonSinchenko/127-ci

Update sclafmt plugin and sbt version
mrpowers-io · Aug 4, 2024 · 97de5a0 · 97de5a0
2 parents 542bd0e + 40257c9
commit 97de5a0
Show file tree

Hide file tree

Showing 14 changed files with 240 additions and 304 deletions.
diff --git a/.scalafmt.conf b/.scalafmt.conf
@@ -1,6 +1,10 @@
-align = more
+version = 3.8.2
+
+align.preset = more
+runner.dialect = scala212
 maxColumn = 150
-docstrings = JavaDoc
+docstrings.style = Asterisk
+
 //style = defaultWithAlign
 //align = true
 //danglingParentheses = false

diff --git a/build.sbt b/build.sbt
@@ -5,7 +5,7 @@ scalafmtOnCompile in Compile := true
 organization := "com.github.mrpowers"
 name := "spark-fast-tests"
 
-version := "1.3.0"
+version := "1.10.1"
 
 val versionRegex      = """^(.*)\.(.*)\.(.*)$""".r
 
@@ -34,11 +34,11 @@ fork in Test := true
 javaOptions ++= Seq("-Xms512M", "-Xmx2048M", "-XX:+CMSClassUnloadingEnabled", "-Duser.timezone=GMT")
 
 licenses := Seq("MIT" -> url("http://opensource.org/licenses/MIT"))
-homepage := Some(url("https://github.com/MrPowers/spark-fast-tests"))
+homepage := Some(url("https://github.com/mrpowers-io/spark-fast-tests"))
 developers ++= List(
   Developer("MrPowers", "Matthew Powers", "@MrPowers", url("https://github.com/MrPowers"))
 )
-scmInfo := Some(ScmInfo(url("https://github.com/MrPowers/spark-fast-tests"), "[email protected]:MrPowers/spark-fast-tests.git"))
+scmInfo := Some(ScmInfo(url("https://github.com/mrpowers-io/spark-fast-tests"), "[email protected]:MrPowers/spark-fast-tests.git"))
 
 updateOptions := updateOptions.value.withLatestSnapshots(false)
 

diff --git a/project/plugins.sbt b/project/plugins.sbt
@@ -4,7 +4,7 @@ resolvers += Resolver.bintrayIvyRepo("s22s", "sbt-plugins")
 
 resolvers += Resolver.typesafeRepo("releases")
 
-addSbtPlugin("com.lucidchart" % "sbt-scalafmt" % "1.15")
+addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2")
 
 addSbtPlugin("com.geirsson" % "sbt-ci-release" % "1.5.3")
 

diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/ArrayUtil.scala b/src/main/scala/com/github/mrpowers/spark/fast/tests/ArrayUtil.scala
@@ -56,34 +56,31 @@ object ArrayUtil {
 
     // column names
     val h: Seq[(String, Int)] = rows.head.zipWithIndex
-    h.map {
-        case (cell, i) =>
-          if (truncate > 0) {
-            StringUtils.leftPad(cell, colWidths(i))
-          } else {
-            StringUtils.rightPad(cell, colWidths(i))
-          }
+    h.map { case (cell, i) =>
+      if (truncate > 0) {
+        StringUtils.leftPad(cell, colWidths(i))
+      } else {
+        StringUtils.rightPad(cell, colWidths(i))
       }
-      .addString(sb, "|", "|", "|\n")
+    }.addString(sb, "|", "|", "|\n")
 
     sb.append(sep)
 
     // data
     rows.tail.map { row =>
       val color = if (row(0) == row(1)) "blue" else "red"
       row.zipWithIndex
-        .map {
-          case (cell, i) =>
-            val r = if (truncate > 0) {
-              StringUtils.leftPad(cell.toString, colWidths(i))
-            } else {
-              StringUtils.rightPad(cell.toString, colWidths(i))
-            }
-            if (color == "blue") {
-              ufansi.Color.DarkGray(r)
-            } else {
-              ufansi.Color.Red(r)
-            }
+        .map { case (cell, i) =>
+          val r = if (truncate > 0) {
+            StringUtils.leftPad(cell.toString, colWidths(i))
+          } else {
+            StringUtils.rightPad(cell.toString, colWidths(i))
+          }
+          if (color == "blue") {
+            ufansi.Color.DarkGray(r)
+          } else {
+            ufansi.Color.Red(r)
+          }
         }
         .addString(sb, "|", "|", "|\n")
     }
@@ -123,36 +120,32 @@ object ArrayUtil {
 
     // column names
     val h: Seq[(String, Int)] = rows.head.zipWithIndex
-    h.map {
-        case (cell, i) =>
-          if (truncate > 0) {
-            StringUtils.leftPad(cell, colWidths(i))
-          } else {
-            StringUtils.rightPad(cell, colWidths(i))
-          }
+    h.map { case (cell, i) =>
+      if (truncate > 0) {
+        StringUtils.leftPad(cell, colWidths(i))
+      } else {
+        StringUtils.rightPad(cell, colWidths(i))
       }
-      .addString(sb, "|", "|", "|\n")
+    }.addString(sb, "|", "|", "|\n")
 
     sb.append(sep)
 
     // data
-    rows.tail.zipWithIndex.map {
-      case (row, j) =>
-        row.zipWithIndex
-          .map {
-            case (cell, i) =>
-              val r = if (truncate > 0) {
-                StringUtils.leftPad(cell.toString, colWidths(i))
-              } else {
-                StringUtils.rightPad(cell.toString, colWidths(i))
-              }
-              if (rowEqual(j)) {
-                equalColor(r)
-              } else {
-                unequalColor(r)
-              }
+    rows.tail.zipWithIndex.map { case (row, j) =>
+      row.zipWithIndex
+        .map { case (cell, i) =>
+          val r = if (truncate > 0) {
+            StringUtils.leftPad(cell.toString, colWidths(i))
+          } else {
+            StringUtils.rightPad(cell.toString, colWidths(i))
           }
-          .addString(sb, "|", "|", "|\n")
+          if (rowEqual(j)) {
+            equalColor(r)
+          } else {
+            unequalColor(r)
+          }
+        }
+        .addString(sb, "|", "|", "|\n")
     }
 
     sb.append(sep)

diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/ColumnComparer.scala b/src/main/scala/com/github/mrpowers/spark/fast/tests/ColumnComparer.scala
@@ -151,7 +151,8 @@ trait ColumnComparer {
 
     if (sf1.dataType != sf2.dataType) {
       throw ColumnMismatch(
-        s"The column dataTypes are different. The `${colName1}` column has a `${sf1.dataType}` dataType and the `${colName2}` column has a `${sf2.dataType}` dataType.")
+        s"The column dataTypes are different. The `${colName1}` column has a `${sf1.dataType}` dataType and the `${colName2}` column has a `${sf2.dataType}` dataType."
+      )
     }
 
     val r = df

diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparer.scala b/src/main/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparer.scala
@@ -7,12 +7,14 @@ trait DataFrameComparer extends DatasetComparer {
   /**
    * Raises an error unless `actualDF` and `expectedDF` are equal
    */
-  def assertSmallDataFrameEquality(actualDF: DataFrame,
-                                   expectedDF: DataFrame,
-                                   ignoreNullable: Boolean = false,
-                                   ignoreColumnNames: Boolean = false,
-                                   orderedComparison: Boolean = true,
-                                   truncate: Int = 500): Unit = {
+  def assertSmallDataFrameEquality(
+      actualDF: DataFrame,
+      expectedDF: DataFrame,
+      ignoreNullable: Boolean = false,
+      ignoreColumnNames: Boolean = false,
+      orderedComparison: Boolean = true,
+      truncate: Int = 500
+  ): Unit = {
     assertSmallDatasetEquality(
       actualDF,
       expectedDF,
@@ -26,11 +28,13 @@ trait DataFrameComparer extends DatasetComparer {
   /**
    * Raises an error unless `actualDF` and `expectedDF` are equal
    */
-  def assertLargeDataFrameEquality(actualDF: DataFrame,
-                                   expectedDF: DataFrame,
-                                   ignoreNullable: Boolean = false,
-                                   ignoreColumnNames: Boolean = false,
-                                   orderedComparison: Boolean = true): Unit = {
+  def assertLargeDataFrameEquality(
+      actualDF: DataFrame,
+      expectedDF: DataFrame,
+      ignoreNullable: Boolean = false,
+      ignoreColumnNames: Boolean = false,
+      orderedComparison: Boolean = true
+  ): Unit = {
     assertLargeDatasetEquality(
       actualDF,
       expectedDF,

diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/DataFramePrettyPrint.scala b/src/main/scala/com/github/mrpowers/spark/fast/tests/DataFramePrettyPrint.scala
@@ -93,45 +93,42 @@ object DataFramePrettyPrint {
 
     // column names
     val h: Seq[(String, Int)] = rows.head.zipWithIndex
-    h.map {
-        case (cell, i) =>
+    h.map { case (cell, i) =>
+      if (truncate > 0) {
+        StringUtils.leftPad(
+          cell,
+          colWidths(i)
+        )
+      } else {
+        StringUtils.rightPad(
+          cell,
+          colWidths(i)
+        )
+      }
+    }.addString(
+      sb,
+      "|",
+      "|",
+      "|\n"
+    )
+
+    sb.append(sep)
+
+    // data
+    rows.tail.map {
+      _.zipWithIndex
+        .map { case (cell, i) =>
           if (truncate > 0) {
             StringUtils.leftPad(
-              cell,
+              cell.toString,
               colWidths(i)
             )
           } else {
             StringUtils.rightPad(
-              cell,
+              cell.toString,
               colWidths(i)
             )
           }
-      }
-      .addString(
-        sb,
-        "|",
-        "|",
-        "|\n"
-      )
-
-    sb.append(sep)
-
-    // data
-    rows.tail.map {
-      _.zipWithIndex
-        .map {
-          case (cell, i) =>
-            if (truncate > 0) {
-              StringUtils.leftPad(
-                cell.toString,
-                colWidths(i)
-              )
-            } else {
-              StringUtils.rightPad(
-                cell.toString,
-                colWidths(i)
-              )
-            }
         }
         .addString(
           sb,

diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/DatasetComparer.scala b/src/main/scala/com/github/mrpowers/spark/fast/tests/DatasetComparer.scala
@@ -66,22 +66,23 @@ Expected DataFrame Row Count: '${expectedCount}'
   private def unequalRDDMessage[T](unequalRDD: RDD[(Long, (T, T))], length: Int): String = {
     "\nRow Index | Actual Row | Expected Row\n" + unequalRDD
       .take(length)
-      .map {
-        case (idx, (left, right)) =>
-          ufansi.Color.Red(s"$idx | $left | $right")
+      .map { case (idx, (left, right)) =>
+        ufansi.Color.Red(s"$idx | $left | $right")
       }
       .mkString("\n")
   }
 
   /**
    * Raises an error unless `actualDS` and `expectedDS` are equal
    */
-  def assertSmallDatasetEquality[T](actualDS: Dataset[T],
-                                    expectedDS: Dataset[T],
-                                    ignoreNullable: Boolean = false,
-                                    ignoreColumnNames: Boolean = false,
-                                    orderedComparison: Boolean = true,
-                                    truncate: Int = 500): Unit = {
+  def assertSmallDatasetEquality[T](
+      actualDS: Dataset[T],
+      expectedDS: Dataset[T],
+      ignoreNullable: Boolean = false,
+      ignoreColumnNames: Boolean = false,
+      orderedComparison: Boolean = true,
+      truncate: Int = 500
+  ): Unit = {
     if (!SchemaComparer.equals(actualDS.schema, expectedDS.schema, ignoreNullable, ignoreColumnNames)) {
       throw DatasetSchemaMismatch(
         betterSchemaMismatchMessage(actualDS, expectedDS)
@@ -121,12 +122,14 @@ Expected DataFrame Row Count: '${expectedCount}'
   /**
    * Raises an error unless `actualDS` and `expectedDS` are equal
    */
-  def assertLargeDatasetEquality[T: ClassTag](actualDS: Dataset[T],
-                                              expectedDS: Dataset[T],
-                                              equals: (T, T) => Boolean = naiveEquality _,
-                                              ignoreNullable: Boolean = false,
-                                              ignoreColumnNames: Boolean = false,
-                                              orderedComparison: Boolean = true): Unit = {
+  def assertLargeDatasetEquality[T: ClassTag](
+      actualDS: Dataset[T],
+      expectedDS: Dataset[T],
+      equals: (T, T) => Boolean = naiveEquality _,
+      ignoreNullable: Boolean = false,
+      ignoreColumnNames: Boolean = false,
+      orderedComparison: Boolean = true
+  ): Unit = {
     // first check if the schemas are equal
     if (!SchemaComparer.equals(actualDS.schema, expectedDS.schema, ignoreNullable, ignoreColumnNames)) {
       throw DatasetSchemaMismatch(betterSchemaMismatchMessage(actualDS, expectedDS))
@@ -148,8 +151,8 @@ Expected DataFrame Row Count: '${expectedCount}'
         val resultIndexValue: RDD[(Long, T)]   = RddHelpers.zipWithIndex(ds2.rdd)
         val unequalRDD = expectedIndexValue
           .join(resultIndexValue)
-          .filter {
-            case (idx, (o1, o2)) => !equals(o1, o2)
+          .filter { case (idx, (o1, o2)) =>
+            !equals(o1, o2)
           }
         val maxUnequalRowsToShow = 10
         if (!unequalRDD.isEmpty()) {
@@ -172,12 +175,14 @@ Expected DataFrame Row Count: '${expectedCount}'
     }
   }
 
-  def assertApproximateDataFrameEquality(actualDF: DataFrame,
-                                         expectedDF: DataFrame,
-                                         precision: Double,
-                                         ignoreNullable: Boolean = false,
-                                         ignoreColumnNames: Boolean = false,
-                                         orderedComparison: Boolean = true): Unit = {
+  def assertApproximateDataFrameEquality(
+      actualDF: DataFrame,
+      expectedDF: DataFrame,
+      precision: Double,
+      ignoreNullable: Boolean = false,
+      ignoreColumnNames: Boolean = false,
+      orderedComparison: Boolean = true
+  ): Unit = {
     val e = (r1: Row, r2: Row) => {
       r1.equals(r2) || RowComparer.areRowsEqual(r1, r2, precision)
     }

diff --git a/src/main/scala/com/github/mrpowers/spark/fast/tests/RddHelpers.scala b/src/main/scala/com/github/mrpowers/spark/fast/tests/RddHelpers.scala
@@ -5,14 +5,12 @@ import org.apache.spark.rdd.RDD
 object RddHelpers {
 
   /**
-   * Zip RDD's with precise indexes. This is used so we can join two DataFrame's
-   * Rows together regardless of if the source is different but still compare based on
-   * the order.
+   * Zip RDD's with precise indexes. This is used so we can join two DataFrame's Rows together regardless of if the source is different but still
+   * compare based on the order.
    */
   def zipWithIndex[T](rdd: RDD[T]): RDD[(Long, T)] = {
-    rdd.zipWithIndex().map {
-      case (row, idx) =>
-        (idx, row)
+    rdd.zipWithIndex().map { case (row, idx) =>
+      (idx, row)
     }
   }