|
33 | 33 | import org.apache.avro.io.DatumWriter; |
34 | 34 | import org.apache.iceberg.AssertHelpers; |
35 | 35 | import org.apache.iceberg.DataFile; |
| 36 | +import org.apache.iceberg.Files; |
| 37 | +import org.apache.iceberg.MetricsConfig; |
| 38 | +import org.apache.iceberg.data.Record; |
| 39 | +import org.apache.iceberg.data.orc.GenericOrcWriter; |
| 40 | +import org.apache.iceberg.io.FileAppender; |
| 41 | +import org.apache.iceberg.io.OutputFile; |
36 | 42 | import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; |
| 43 | +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; |
37 | 44 | import org.apache.iceberg.relocated.com.google.common.collect.Lists; |
| 45 | +import org.apache.iceberg.types.Types; |
38 | 46 | import org.apache.spark.sql.Dataset; |
39 | 47 | import org.apache.spark.sql.Row; |
40 | 48 | import org.apache.spark.sql.RowFactory; |
|
51 | 59 | import org.junit.Test; |
52 | 60 | import org.junit.rules.TemporaryFolder; |
53 | 61 |
|
| 62 | +import static org.apache.iceberg.types.Types.NestedField.optional; |
| 63 | + |
54 | 64 | public class TestAddFilesProcedure extends SparkExtensionsTestBase { |
55 | 65 |
|
56 | 66 | private final String sourceTableName = "source_table"; |
@@ -507,6 +517,42 @@ public void invalidDataImportPartitioned() { |
507 | 517 | catalogName, tableName, fileTableDir.getAbsolutePath())); |
508 | 518 | } |
509 | 519 |
|
| 520 | + @Test |
| 521 | + public void addOrcFileWithDoubleAndFloatColumns() throws Exception { |
| 522 | + // Spark Session Catalog cannot load metadata tables |
| 523 | + // with "The namespace in session catalog must have exactly one name part" |
| 524 | + Assume.assumeFalse(catalogName.equals("spark_catalog")); |
| 525 | + |
| 526 | + // Create an ORC file |
| 527 | + File outputFile = temp.newFile("test.orc"); |
| 528 | + final int numRows = 5; |
| 529 | + List<Record> expectedRecords = createOrcFile(outputFile, numRows); |
| 530 | + String createIceberg = |
| 531 | + "CREATE TABLE %s (x float, y double, z long) USING iceberg"; |
| 532 | + sql(createIceberg, tableName); |
| 533 | + |
| 534 | + Object result = scalarSql("CALL %s.system.add_files('%s', '`orc`.`%s`')", |
| 535 | + catalogName, tableName, outputFile.getPath()); |
| 536 | + Assert.assertEquals(1L, result); |
| 537 | + |
| 538 | + List<Object[]> expected = expectedRecords.stream() |
| 539 | + .map(record -> new Object[]{record.get(0), record.get(1), record.get(2)}) |
| 540 | + .collect(Collectors.toList()); |
| 541 | + |
| 542 | + // x goes 2.00, 1.99, 1.98, ... |
| 543 | + assertEquals("Iceberg table contains correct data", |
| 544 | + expected, |
| 545 | + sql("SELECT * FROM %s ORDER BY x DESC", tableName)); |
| 546 | + |
| 547 | + List<Object[]> actualRecordCount = sql("select %s from %s.files", |
| 548 | + DataFile.RECORD_COUNT.name(), |
| 549 | + tableName); |
| 550 | + List<Object[]> expectedRecordCount = Lists.newArrayList(); |
| 551 | + expectedRecordCount.add(new Object[]{(long) numRows}); |
| 552 | + assertEquals("Iceberg file metadata should have correct metadata count", |
| 553 | + expectedRecordCount, actualRecordCount); |
| 554 | + } |
| 555 | + |
510 | 556 | private static final StructField[] struct = { |
511 | 557 | new StructField("id", DataTypes.IntegerType, false, Metadata.empty()), |
512 | 558 | new StructField("name", DataTypes.StringType, false, Metadata.empty()), |
@@ -597,4 +643,36 @@ private void createPartitionedHiveTable() { |
597 | 643 | partitionedDF.write().insertInto(sourceTableName); |
598 | 644 | partitionedDF.write().insertInto(sourceTableName); |
599 | 645 | } |
| 646 | + |
| 647 | + // Update this to not write a file for import using Iceberg's ID numbers |
| 648 | + public List<Record> createOrcFile(File orcFile, int numRows) throws IOException { |
| 649 | + // Needs to be deleted but depend on the rule to delete the file for us again at the end. |
| 650 | + if (orcFile.exists()) { |
| 651 | + orcFile.delete(); |
| 652 | + } |
| 653 | + final org.apache.iceberg.Schema icebergSchema = new org.apache.iceberg.Schema( |
| 654 | + optional(1, "x", Types.FloatType.get()), |
| 655 | + optional(2, "y", Types.DoubleType.get()), |
| 656 | + optional(3, "z", Types.LongType.get()) |
| 657 | + ); |
| 658 | + |
| 659 | + List<Record> records = Lists.newArrayListWithExpectedSize(numRows); |
| 660 | + for (int i = 0; i < numRows; i += 1) { |
| 661 | + Record record = org.apache.iceberg.data.GenericRecord.create(icebergSchema); |
| 662 | + record.setField("x", ((float) (100 - i)) / 100F + 1.0F); // 2.0f, 1.99f, 1.98f, ... |
| 663 | + record.setField("y", ((double) i) / 100.0D + 2.0D); // 2.0d, 2.01d, 2.02d, ... |
| 664 | + record.setField("z", 5_000_000_000L + i); |
| 665 | + records.add(record); |
| 666 | + } |
| 667 | + |
| 668 | + OutputFile outFile = Files.localOutput(orcFile); |
| 669 | + try (FileAppender<Record> appender = org.apache.iceberg.orc.ORC.write(outFile) |
| 670 | + .schema(icebergSchema) |
| 671 | + .metricsConfig(MetricsConfig.fromProperties(ImmutableMap.of("write.metadata.metrics.default", "none"))) |
| 672 | + .createWriterFunc(GenericOrcWriter::buildWriter) |
| 673 | + .build()) { |
| 674 | + appender.addAll(records); |
| 675 | + } |
| 676 | + return records; |
| 677 | + } |
600 | 678 | } |
0 commit comments