|
1 | | -import uk.co.gresearch.spark.diff._ |
| 1 | +import org.apache.spark.sql.DataFrame |
| 2 | + |
| 3 | +def assertSize(df: DataFrame, size: Long): Unit = { |
| 4 | + df.show() |
| 5 | + assert(df.collect().size == size) |
| 6 | +} |
2 | 7 |
|
3 | | -val left = Seq((1, "one"), (2, "two"), (3, "three")).toDF("id", "value") |
4 | | -val right = Seq((1, "one"), (2, "Two"), (4, "four")).toDF("id", "value") |
5 | 8 |
|
6 | | -val diff = left.diff(right) |
7 | | -diff.show() |
| 9 | +import uk.co.gresearch.spark.diff._ |
| 10 | +import uk.co.gresearch.spark.parquet._ |
| 11 | + |
| 12 | +try { |
| 13 | + val left = Seq((1, "one"), (2, "two"), (3, "three")).toDF("id", "value") |
| 14 | + val right = Seq((1, "one"), (2, "Two"), (4, "four")).toDF("id", "value") |
| 15 | + assertSize(left.diff(right), 5) |
8 | 16 |
|
9 | | -if (diff.collect().size == 5) { sys.exit(0) } |
| 17 | + assertSize(spark.read.parquetMetadata("src/test/files/test.parquet"), 5) |
| 18 | + assertSize(spark.read.parquetMetadata("src/test/files/nested.parquet"), 5) |
| 19 | + assertSize(spark.read.parquetMetadata("src/test/files/encrypted1.parquet"), 5) |
| 20 | + assertSize(spark.read.parquetMetadata("src/test/files/encrypted2.parquet"), 5) |
| 21 | +} catch { |
| 22 | + case e: Throwable => sys.exit(1) |
| 23 | +} |
10 | 24 |
|
11 | | -sys.exit(1) |
| 25 | +sys.exit(0) |
12 | 26 |
|
0 commit comments