Skip to content

Commit

Permalink
Merge pull request #128 from SemyonSinchenko/127-ci
Browse files Browse the repository at this point in the history
Update sclafmt plugin and sbt version
  • Loading branch information
SemyonSinchenko authored Aug 4, 2024
2 parents 542bd0e + 40257c9 commit 97de5a0
Show file tree
Hide file tree
Showing 14 changed files with 240 additions and 304 deletions.
8 changes: 6 additions & 2 deletions .scalafmt.conf
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
align = more
version = 3.8.2

align.preset = more
runner.dialect = scala212
maxColumn = 150
docstrings = JavaDoc
docstrings.style = Asterisk

//style = defaultWithAlign
//align = true
//danglingParentheses = false
Expand Down
6 changes: 3 additions & 3 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ scalafmtOnCompile in Compile := true
organization := "com.github.mrpowers"
name := "spark-fast-tests"

version := "1.3.0"
version := "1.10.1"

val versionRegex = """^(.*)\.(.*)\.(.*)$""".r

Expand Down Expand Up @@ -34,11 +34,11 @@ fork in Test := true
javaOptions ++= Seq("-Xms512M", "-Xmx2048M", "-XX:+CMSClassUnloadingEnabled", "-Duser.timezone=GMT")

licenses := Seq("MIT" -> url("http://opensource.org/licenses/MIT"))
homepage := Some(url("https://github.com/MrPowers/spark-fast-tests"))
homepage := Some(url("https://github.com/mrpowers-io/spark-fast-tests"))
developers ++= List(
Developer("MrPowers", "Matthew Powers", "@MrPowers", url("https://github.com/MrPowers"))
)
scmInfo := Some(ScmInfo(url("https://github.com/MrPowers/spark-fast-tests"), "[email protected]:MrPowers/spark-fast-tests.git"))
scmInfo := Some(ScmInfo(url("https://github.com/mrpowers-io/spark-fast-tests"), "[email protected]:MrPowers/spark-fast-tests.git"))

updateOptions := updateOptions.value.withLatestSnapshots(false)

Expand Down
2 changes: 1 addition & 1 deletion project/plugins.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ resolvers += Resolver.bintrayIvyRepo("s22s", "sbt-plugins")

resolvers += Resolver.typesafeRepo("releases")

addSbtPlugin("com.lucidchart" % "sbt-scalafmt" % "1.15")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2")

addSbtPlugin("com.geirsson" % "sbt-ci-release" % "1.5.3")

Expand Down
81 changes: 37 additions & 44 deletions src/main/scala/com/github/mrpowers/spark/fast/tests/ArrayUtil.scala
Original file line number Diff line number Diff line change
Expand Up @@ -56,34 +56,31 @@ object ArrayUtil {

// column names
val h: Seq[(String, Int)] = rows.head.zipWithIndex
h.map {
case (cell, i) =>
if (truncate > 0) {
StringUtils.leftPad(cell, colWidths(i))
} else {
StringUtils.rightPad(cell, colWidths(i))
}
h.map { case (cell, i) =>
if (truncate > 0) {
StringUtils.leftPad(cell, colWidths(i))
} else {
StringUtils.rightPad(cell, colWidths(i))
}
.addString(sb, "|", "|", "|\n")
}.addString(sb, "|", "|", "|\n")

sb.append(sep)

// data
rows.tail.map { row =>
val color = if (row(0) == row(1)) "blue" else "red"
row.zipWithIndex
.map {
case (cell, i) =>
val r = if (truncate > 0) {
StringUtils.leftPad(cell.toString, colWidths(i))
} else {
StringUtils.rightPad(cell.toString, colWidths(i))
}
if (color == "blue") {
ufansi.Color.DarkGray(r)
} else {
ufansi.Color.Red(r)
}
.map { case (cell, i) =>
val r = if (truncate > 0) {
StringUtils.leftPad(cell.toString, colWidths(i))
} else {
StringUtils.rightPad(cell.toString, colWidths(i))
}
if (color == "blue") {
ufansi.Color.DarkGray(r)
} else {
ufansi.Color.Red(r)
}
}
.addString(sb, "|", "|", "|\n")
}
Expand Down Expand Up @@ -123,36 +120,32 @@ object ArrayUtil {

// column names
val h: Seq[(String, Int)] = rows.head.zipWithIndex
h.map {
case (cell, i) =>
if (truncate > 0) {
StringUtils.leftPad(cell, colWidths(i))
} else {
StringUtils.rightPad(cell, colWidths(i))
}
h.map { case (cell, i) =>
if (truncate > 0) {
StringUtils.leftPad(cell, colWidths(i))
} else {
StringUtils.rightPad(cell, colWidths(i))
}
.addString(sb, "|", "|", "|\n")
}.addString(sb, "|", "|", "|\n")

sb.append(sep)

// data
rows.tail.zipWithIndex.map {
case (row, j) =>
row.zipWithIndex
.map {
case (cell, i) =>
val r = if (truncate > 0) {
StringUtils.leftPad(cell.toString, colWidths(i))
} else {
StringUtils.rightPad(cell.toString, colWidths(i))
}
if (rowEqual(j)) {
equalColor(r)
} else {
unequalColor(r)
}
rows.tail.zipWithIndex.map { case (row, j) =>
row.zipWithIndex
.map { case (cell, i) =>
val r = if (truncate > 0) {
StringUtils.leftPad(cell.toString, colWidths(i))
} else {
StringUtils.rightPad(cell.toString, colWidths(i))
}
.addString(sb, "|", "|", "|\n")
if (rowEqual(j)) {
equalColor(r)
} else {
unequalColor(r)
}
}
.addString(sb, "|", "|", "|\n")
}

sb.append(sep)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ trait ColumnComparer {

if (sf1.dataType != sf2.dataType) {
throw ColumnMismatch(
s"The column dataTypes are different. The `${colName1}` column has a `${sf1.dataType}` dataType and the `${colName2}` column has a `${sf2.dataType}` dataType.")
s"The column dataTypes are different. The `${colName1}` column has a `${sf1.dataType}` dataType and the `${colName2}` column has a `${sf2.dataType}` dataType."
)
}

val r = df
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@ trait DataFrameComparer extends DatasetComparer {
/**
* Raises an error unless `actualDF` and `expectedDF` are equal
*/
def assertSmallDataFrameEquality(actualDF: DataFrame,
expectedDF: DataFrame,
ignoreNullable: Boolean = false,
ignoreColumnNames: Boolean = false,
orderedComparison: Boolean = true,
truncate: Int = 500): Unit = {
def assertSmallDataFrameEquality(
actualDF: DataFrame,
expectedDF: DataFrame,
ignoreNullable: Boolean = false,
ignoreColumnNames: Boolean = false,
orderedComparison: Boolean = true,
truncate: Int = 500
): Unit = {
assertSmallDatasetEquality(
actualDF,
expectedDF,
Expand All @@ -26,11 +28,13 @@ trait DataFrameComparer extends DatasetComparer {
/**
* Raises an error unless `actualDF` and `expectedDF` are equal
*/
def assertLargeDataFrameEquality(actualDF: DataFrame,
expectedDF: DataFrame,
ignoreNullable: Boolean = false,
ignoreColumnNames: Boolean = false,
orderedComparison: Boolean = true): Unit = {
def assertLargeDataFrameEquality(
actualDF: DataFrame,
expectedDF: DataFrame,
ignoreNullable: Boolean = false,
ignoreColumnNames: Boolean = false,
orderedComparison: Boolean = true
): Unit = {
assertLargeDatasetEquality(
actualDF,
expectedDF,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,45 +93,42 @@ object DataFramePrettyPrint {

// column names
val h: Seq[(String, Int)] = rows.head.zipWithIndex
h.map {
case (cell, i) =>
h.map { case (cell, i) =>
if (truncate > 0) {
StringUtils.leftPad(
cell,
colWidths(i)
)
} else {
StringUtils.rightPad(
cell,
colWidths(i)
)
}
}.addString(
sb,
"|",
"|",
"|\n"
)

sb.append(sep)

// data
rows.tail.map {
_.zipWithIndex
.map { case (cell, i) =>
if (truncate > 0) {
StringUtils.leftPad(
cell,
cell.toString,
colWidths(i)
)
} else {
StringUtils.rightPad(
cell,
cell.toString,
colWidths(i)
)
}
}
.addString(
sb,
"|",
"|",
"|\n"
)

sb.append(sep)

// data
rows.tail.map {
_.zipWithIndex
.map {
case (cell, i) =>
if (truncate > 0) {
StringUtils.leftPad(
cell.toString,
colWidths(i)
)
} else {
StringUtils.rightPad(
cell.toString,
colWidths(i)
)
}
}
.addString(
sb,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,22 +66,23 @@ Expected DataFrame Row Count: '${expectedCount}'
private def unequalRDDMessage[T](unequalRDD: RDD[(Long, (T, T))], length: Int): String = {
"\nRow Index | Actual Row | Expected Row\n" + unequalRDD
.take(length)
.map {
case (idx, (left, right)) =>
ufansi.Color.Red(s"$idx | $left | $right")
.map { case (idx, (left, right)) =>
ufansi.Color.Red(s"$idx | $left | $right")
}
.mkString("\n")
}

/**
* Raises an error unless `actualDS` and `expectedDS` are equal
*/
def assertSmallDatasetEquality[T](actualDS: Dataset[T],
expectedDS: Dataset[T],
ignoreNullable: Boolean = false,
ignoreColumnNames: Boolean = false,
orderedComparison: Boolean = true,
truncate: Int = 500): Unit = {
def assertSmallDatasetEquality[T](
actualDS: Dataset[T],
expectedDS: Dataset[T],
ignoreNullable: Boolean = false,
ignoreColumnNames: Boolean = false,
orderedComparison: Boolean = true,
truncate: Int = 500
): Unit = {
if (!SchemaComparer.equals(actualDS.schema, expectedDS.schema, ignoreNullable, ignoreColumnNames)) {
throw DatasetSchemaMismatch(
betterSchemaMismatchMessage(actualDS, expectedDS)
Expand Down Expand Up @@ -121,12 +122,14 @@ Expected DataFrame Row Count: '${expectedCount}'
/**
* Raises an error unless `actualDS` and `expectedDS` are equal
*/
def assertLargeDatasetEquality[T: ClassTag](actualDS: Dataset[T],
expectedDS: Dataset[T],
equals: (T, T) => Boolean = naiveEquality _,
ignoreNullable: Boolean = false,
ignoreColumnNames: Boolean = false,
orderedComparison: Boolean = true): Unit = {
def assertLargeDatasetEquality[T: ClassTag](
actualDS: Dataset[T],
expectedDS: Dataset[T],
equals: (T, T) => Boolean = naiveEquality _,
ignoreNullable: Boolean = false,
ignoreColumnNames: Boolean = false,
orderedComparison: Boolean = true
): Unit = {
// first check if the schemas are equal
if (!SchemaComparer.equals(actualDS.schema, expectedDS.schema, ignoreNullable, ignoreColumnNames)) {
throw DatasetSchemaMismatch(betterSchemaMismatchMessage(actualDS, expectedDS))
Expand All @@ -148,8 +151,8 @@ Expected DataFrame Row Count: '${expectedCount}'
val resultIndexValue: RDD[(Long, T)] = RddHelpers.zipWithIndex(ds2.rdd)
val unequalRDD = expectedIndexValue
.join(resultIndexValue)
.filter {
case (idx, (o1, o2)) => !equals(o1, o2)
.filter { case (idx, (o1, o2)) =>
!equals(o1, o2)
}
val maxUnequalRowsToShow = 10
if (!unequalRDD.isEmpty()) {
Expand All @@ -172,12 +175,14 @@ Expected DataFrame Row Count: '${expectedCount}'
}
}

def assertApproximateDataFrameEquality(actualDF: DataFrame,
expectedDF: DataFrame,
precision: Double,
ignoreNullable: Boolean = false,
ignoreColumnNames: Boolean = false,
orderedComparison: Boolean = true): Unit = {
def assertApproximateDataFrameEquality(
actualDF: DataFrame,
expectedDF: DataFrame,
precision: Double,
ignoreNullable: Boolean = false,
ignoreColumnNames: Boolean = false,
orderedComparison: Boolean = true
): Unit = {
val e = (r1: Row, r2: Row) => {
r1.equals(r2) || RowComparer.areRowsEqual(r1, r2, precision)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@ import org.apache.spark.rdd.RDD
object RddHelpers {

/**
* Zip RDD's with precise indexes. This is used so we can join two DataFrame's
* Rows together regardless of if the source is different but still compare based on
* the order.
* Zip RDD's with precise indexes. This is used so we can join two DataFrame's Rows together regardless of if the source is different but still
* compare based on the order.
*/
def zipWithIndex[T](rdd: RDD[T]): RDD[(Long, T)] = {
rdd.zipWithIndex().map {
case (row, idx) =>
(idx, row)
rdd.zipWithIndex().map { case (row, idx) =>
(idx, row)
}
}

Expand Down
Loading

0 comments on commit 97de5a0

Please sign in to comment.