From 26a65b64b42e8cc262016633baf13942f984af99 Mon Sep 17 00:00:00 2001 From: Simbarashe Dzinamarira Date: Wed, 13 May 2026 13:43:16 -0700 Subject: [PATCH 1/3] Honor caller-provided rowType in HiveViewExpander.expandView HiveViewExpander.expandView() previously discarded the rowType argument and returned whatever the re-parsed view body produced. When the caller-recorded row type and the expanded RelNode's row type differed in field order, downstream consumers that rely on positional access could land on the wrong RelNode-output position. Prefix-named sibling columns (one name a prefix of another) are particularly prone to swapping under such mismatches. This change wraps the expanded RelRoot in a LogicalProject that re-aligns fields to the caller's rowType by name (case-insensitive). The wrapper is a no-op when the orderings already match. If a caller-expected field is missing from the expanded body, or if arities differ, the method falls back to returning the original RelRoot unchanged. Adds: - Unit tests for alignToRowType covering aligned, case-only-difference, reorder, prefix-pair, missing-field, and arity-mismatch cases. - An integration test that drives the full expandView path with a deliberately reversed caller-provided rowType. --- .../coral/hive/hive2rel/HiveViewExpander.java | 72 +++++- .../hive/hive2rel/HiveToRelConverterTest.java | 39 ++++ .../hive/hive2rel/HiveViewExpanderTest.java | 215 ++++++++++++++++++ .../coral/hive/hive2rel/TestUtils.java | 4 + 4 files changed, 327 insertions(+), 3 deletions(-) create mode 100644 coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java index ac81842a9..e9fcb5241 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java @@ -1,31 +1,45 @@ /** - * Copyright 2017-2022 LinkedIn Corporation. All rights reserved. + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ package com.linkedin.coral.hive.hive2rel; +import java.util.ArrayList; import java.util.List; import javax.annotation.Nonnull; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.util.Util; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.linkedin.coral.common.FuzzyUnionSqlRewriter; /** * Class that implements {@link org.apache.calcite.plan.RelOptTable.ViewExpander} - * interface to support expansion of Hive Views to relational algebra. + * interface to support expansion of Hive Views to relational algebra. The + * returned {@link RelRoot} is realigned to the caller-provided + * {@link RelDataType} by name (case-insensitive) to preserve the + * {@link RelOptTable.ViewExpander} contract. */ public class HiveViewExpander implements RelOptTable.ViewExpander { + private static final Logger LOG = LoggerFactory.getLogger(HiveViewExpander.class); + private final HiveToRelConverter hiveToRelConverter; /** * Instantiates a new Hive view expander. @@ -46,6 +60,58 @@ public RelRoot expandView(RelDataType rowType, String queryString, List SqlNode sqlNode = hiveToRelConverter.processView(dbName, tableName) .accept(new FuzzyUnionSqlRewriter(tableName, hiveToRelConverter)); - return hiveToRelConverter.getSqlToRelConverter().convertQuery(sqlNode, true, true); + RelRoot root = hiveToRelConverter.getSqlToRelConverter().convertQuery(sqlNode, true, true); + return alignToRowType(root, rowType); + } + + /** + * Wrap {@code root} in a {@link LogicalProject} that re-orders its output + * fields to match {@code expected} by name (case-insensitive). No-op when the + * orderings already agree. If a name is missing from {@code root} or arities + * differ, returns {@code root} unchanged and logs a warning. + */ + @VisibleForTesting + static RelRoot alignToRowType(RelRoot root, RelDataType expected) { + final RelNode rel = root.rel; + final RelDataType actual = rel.getRowType(); + if (expected.getFieldCount() != actual.getFieldCount()) { + LOG.warn("Skipping row-type alignment: expected {} fields, expanded view produced {}. expected={}, actual={}", + expected.getFieldCount(), actual.getFieldCount(), expected, actual); + return root; + } + if (fieldNamesAlignedByOrder(expected, actual)) { + return root; + } + final List projects = new ArrayList<>(expected.getFieldCount()); + final List names = new ArrayList<>(expected.getFieldCount()); + final RexBuilder rexBuilder = rel.getCluster().getRexBuilder(); + for (RelDataTypeField expectedField : expected.getFieldList()) { + // case-insensitive name lookup, no struct-field traversal + final RelDataTypeField actualField = actual.getField(expectedField.getName(), false, false); + if (actualField == null) { + LOG.warn( + "Skipping row-type alignment: expected field '{}' is absent from expanded view. expected={}, actual={}", + expectedField.getName(), expected, actual); + return root; + } + projects.add(rexBuilder.makeInputRef(actualField.getType(), actualField.getIndex())); + names.add(expectedField.getName()); + } + final RelNode aligned = LogicalProject.create(rel, projects, names); + return RelRoot.of(aligned, root.kind); + } + + private static boolean fieldNamesAlignedByOrder(RelDataType a, RelDataType b) { + if (a.getFieldCount() != b.getFieldCount()) { + return false; + } + final List af = a.getFieldList(); + final List bf = b.getFieldList(); + for (int i = 0; i < af.size(); i++) { + if (!af.get(i).getName().equalsIgnoreCase(bf.get(i).getName())) { + return false; + } + } + return true; } } diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java index ba77b3f61..67bd84563 100644 --- a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java @@ -13,8 +13,13 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeFamily; @@ -668,6 +673,40 @@ public void testIntCastToBigIntDuringComparison() { assertEquals(relToString(sql), expected); } + /** + * Regression test for the {@link org.apache.calcite.plan.RelOptTable.ViewExpander} contract: + * the {@link org.apache.calcite.rel.RelRoot} returned from {@link HiveViewExpander#expandView} + * must have a row type that matches the caller-provided {@code rowType}, even when the + * re-parsed view body would naturally produce its columns in a different order. Without the + * alignment, downstream consumers that rely on positional access (e.g. {@code SELECT *} over + * a view-on-view) would index into the wrong RelNode output column. + */ + @Test + public void testExpandViewAlignsToCallerRowType() { + // Build a caller-provided rowType whose order is the reverse of the view body's + // natural order. The view body produces [col_a INT, col_b STRING]; we ask for + // [col_b STRING, col_a INT]. + RelDataTypeFactory typeFactory = new JavaTypeFactoryImpl(); + RelDataType reversedRowType = typeFactory.builder().add("col_b", typeFactory.createSqlType(SqlTypeName.VARCHAR)) + .add("col_a", typeFactory.createSqlType(SqlTypeName.INTEGER)).build(); + + HiveViewExpander expander = new HiveViewExpander(converter); + RelRoot result = expander.expandView(reversedRowType, "ignored", ImmutableList.of("hive", "test"), + ImmutableList.of("realign_view")); + + List actualNames = result.rel.getRowType().getFieldNames(); + assertEquals(actualNames.size(), 2, "Expected 2 output columns"); + assertEquals(actualNames.get(0).toLowerCase(), "col_b", + "Position 0 must be col_b (matches caller-provided rowType), got: " + actualNames); + assertEquals(actualNames.get(1).toLowerCase(), "col_a", + "Position 1 must be col_a (matches caller-provided rowType), got: " + actualNames); + + // The types must also line up with the caller-provided rowType, not the body's natural order. + List fields = result.rel.getRowType().getFieldList(); + assertEquals(fields.get(0).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + assertEquals(fields.get(1).getType().getSqlTypeName(), SqlTypeName.INTEGER); + } + private String relToString(String sql) { return RelOptUtil.toString(converter.convertSql(sql)); } diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java new file mode 100644 index 000000000..71679b1cf --- /dev/null +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java @@ -0,0 +1,215 @@ +/** + * Copyright 2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel; + +import java.util.List; + +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.volcano.VolcanoPlanner; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.core.Values; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalValues; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.SqlTypeName; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertSame; +import static org.testng.Assert.assertTrue; + + +/** + * Unit tests for {@link HiveViewExpander#alignToRowType}. These exercise the + * alignment logic in isolation -- no Hive metastore or end-to-end view expansion + * required. + */ +public class HiveViewExpanderTest { + + private RelDataTypeFactory typeFactory; + private RelOptCluster cluster; + + @BeforeClass + public void setUp() { + typeFactory = new JavaTypeFactoryImpl(); + cluster = RelOptCluster.create(new VolcanoPlanner(), new RexBuilder(typeFactory)); + } + + private RelDataType rowType(Object... nameTypePairs) { + if (nameTypePairs.length % 2 != 0) { + throw new IllegalArgumentException("Expected (name, type) pairs"); + } + RelDataTypeFactory.Builder builder = typeFactory.builder(); + for (int i = 0; i < nameTypePairs.length; i += 2) { + builder.add((String) nameTypePairs[i], (SqlTypeName) nameTypePairs[i + 1]); + } + return builder.build(); + } + + private RelRoot rootWithRowType(RelDataType type) { + RelNode rel = LogicalValues.createEmpty(cluster, type); + return RelRoot.of(rel, SqlKind.SELECT); + } + + @Test + public void testAlignedByOrderReturnsSameRoot() { + RelDataType type = rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR); + RelRoot root = rootWithRowType(type); + + RelRoot result = HiveViewExpander.alignToRowType(root, type); + + assertSame(result, root, "Already-aligned root should be returned unchanged"); + } + + @Test + public void testCaseDifferencesOnlyReturnsSameRoot() { + RelRoot root = rootWithRowType(rowType("colA", SqlTypeName.INTEGER, "colB", SqlTypeName.VARCHAR)); + RelDataType expected = rowType("cola", SqlTypeName.INTEGER, "colb", SqlTypeName.VARCHAR); + + RelRoot result = HiveViewExpander.alignToRowType(root, expected); + + assertSame(result, root, "Names differing only in case should be treated as aligned (case-insensitive)"); + } + + @Test + public void testReorderedFieldsWrapInProject() { + RelRoot root = rootWithRowType(rowType("b", SqlTypeName.VARCHAR, "a", SqlTypeName.INTEGER)); + RelDataType expected = rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR); + + RelRoot result = HiveViewExpander.alignToRowType(root, expected); + + assertTrue(result.rel instanceof LogicalProject, + "Reordered fields should result in a LogicalProject wrapper, got " + result.rel.getClass()); + LogicalProject project = (LogicalProject) result.rel; + assertTrue(project.getInput() instanceof Values, "Wrapped Project should sit directly on the input"); + + List outFields = result.rel.getRowType().getFieldList(); + assertEquals(outFields.size(), 2); + assertEquals(outFields.get(0).getName(), "a"); + assertEquals(outFields.get(0).getType().getSqlTypeName(), SqlTypeName.INTEGER); + assertEquals(outFields.get(1).getName(), "b"); + assertEquals(outFields.get(1).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + } + + /** + * Models the production failure mode this method exists to prevent: an + * upstream view body that lists {@code idHash} (the longer name) ahead of + * its sibling {@code id} (the shorter name, a prefix of the other) + * introduces column aliases whose ordering disagrees with the downstream + * consumer's expected ordering. Without realignment, the downstream's + * positional access lands {@code id} on the {@code idHash} position and + * vice versa, silently swapping every value read from those columns. + */ + @Test + public void testPrefixSiblingsReorderedCorrectly() { + RelRoot root = rootWithRowType(rowType("idHash", SqlTypeName.VARCHAR, "id", SqlTypeName.INTEGER)); + RelDataType expected = rowType("id", SqlTypeName.INTEGER, "idHash", SqlTypeName.VARCHAR); + + RelRoot result = HiveViewExpander.alignToRowType(root, expected); + + assertTrue(result.rel instanceof LogicalProject); + LogicalProject project = (LogicalProject) result.rel; + + // Output field order matches expected: id (INTEGER) first, idHash (VARCHAR) second. + List outFields = result.rel.getRowType().getFieldList(); + assertEquals(outFields.get(0).getName(), "id"); + assertEquals(outFields.get(0).getType().getSqlTypeName(), SqlTypeName.INTEGER); + assertEquals(outFields.get(1).getName(), "idHash"); + assertEquals(outFields.get(1).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + + // And the underlying input refs cross-link to the correct source positions: + // expected[0]=id -> input position 1; expected[1]=idHash -> input position 0. + assertTrue(project.getProjects().get(0) instanceof RexInputRef); + assertTrue(project.getProjects().get(1) instanceof RexInputRef); + assertEquals(((RexInputRef) project.getProjects().get(0)).getIndex(), 1); + assertEquals(((RexInputRef) project.getProjects().get(1)).getIndex(), 0); + } + + /** + * Realignment over multiple prefix-name pairs at once. Each pair in the input + * row type lists the longer name first ({@code idHash} before {@code id}, + * {@code emailVerified} before {@code email}, {@code countryCode} before + * {@code country}); the caller's expected row type lists them the other way + * around. Mirrors the realistic failure shape where an upstream view body's + * case-sensitive alphabetical sort places longer prefix-extensions ahead of + * their shorter siblings, and a downstream consumer expects the + * lowercase-alphabetical (prefix-first) order. Each pair must be realigned + * independently and correctly. + */ + @Test + public void testMultiplePrefixSiblingsAllReorderedCorrectly() { + RelRoot root = rootWithRowType( + rowType("idHash", SqlTypeName.VARCHAR, "id", SqlTypeName.INTEGER, "emailVerified", SqlTypeName.BOOLEAN, "email", + SqlTypeName.VARCHAR, "countryCode", SqlTypeName.VARCHAR, "country", SqlTypeName.VARCHAR)); + RelDataType expected = + rowType("id", SqlTypeName.INTEGER, "idHash", SqlTypeName.VARCHAR, "email", SqlTypeName.VARCHAR, "emailVerified", + SqlTypeName.BOOLEAN, "country", SqlTypeName.VARCHAR, "countryCode", SqlTypeName.VARCHAR); + + RelRoot result = HiveViewExpander.alignToRowType(root, expected); + + assertTrue(result.rel instanceof LogicalProject); + LogicalProject project = (LogicalProject) result.rel; + + // Output names and types follow the expected (prefix-first) ordering. + List outFields = result.rel.getRowType().getFieldList(); + assertEquals(outFields.size(), 6); + assertEquals(outFields.get(0).getName(), "id"); + assertEquals(outFields.get(0).getType().getSqlTypeName(), SqlTypeName.INTEGER); + assertEquals(outFields.get(1).getName(), "idHash"); + assertEquals(outFields.get(1).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + assertEquals(outFields.get(2).getName(), "email"); + assertEquals(outFields.get(2).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + assertEquals(outFields.get(3).getName(), "emailVerified"); + assertEquals(outFields.get(3).getType().getSqlTypeName(), SqlTypeName.BOOLEAN); + assertEquals(outFields.get(4).getName(), "country"); + assertEquals(outFields.get(4).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + assertEquals(outFields.get(5).getName(), "countryCode"); + assertEquals(outFields.get(5).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + + // Each expected position points at the correct source position in the input row type. + // Input order was: [idHash=0, id=1, emailVerified=2, email=3, countryCode=4, country=5]. + int[] expectedSourceIndices = { 1, 0, 3, 2, 5, 4 }; + for (int i = 0; i < expectedSourceIndices.length; i++) { + assertTrue(project.getProjects().get(i) instanceof RexInputRef, "Project " + i + " should be a RexInputRef"); + assertEquals(((RexInputRef) project.getProjects().get(i)).getIndex(), expectedSourceIndices[i], + "Expected position " + i + " (" + outFields.get(i).getName() + ") should source from input position " + + expectedSourceIndices[i]); + } + } + + @Test + public void testMissingFieldReturnsOriginalRoot() { + RelRoot root = rootWithRowType(rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR)); + // `c` is not in the input -- alignment should bail rather than fabricate. + RelDataType expected = rowType("a", SqlTypeName.INTEGER, "c", SqlTypeName.VARCHAR); + + RelRoot result = HiveViewExpander.alignToRowType(root, expected); + + assertSame(result, root, "Missing field should trigger safe fallback to the original root"); + } + + @Test + public void testDifferentArityReturnsOriginalRoot() { + RelRoot root = + rootWithRowType(rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR, "c", SqlTypeName.DOUBLE)); + RelDataType expected = rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR); + + RelRoot result = HiveViewExpander.alignToRowType(root, expected); + + assertSame(result, root, "Different arity should trigger safe fallback rather than silent column drop"); + assertFalse(result.rel instanceof LogicalProject, + "Different-arity case should not wrap in a Project (would be lossy)"); + } +} diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java index d9443f44b..43f61622b 100644 --- a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java @@ -85,6 +85,10 @@ public static TestHive setupDefaultHive(HiveConf conf) throws IOException { driver.run( "CREATE TABLE IF NOT EXISTS test.tableInt(tinyint_col tinyint, smallint_col smallint, int_col int, bigint_col bigint)"); + // Fixture for HiveViewExpander.expandView() row-type alignment regression test. + driver.run("CREATE TABLE IF NOT EXISTS test.realign_base(col_a int, col_b string)"); + driver.run("CREATE VIEW IF NOT EXISTS test.realign_view AS SELECT * FROM test.realign_base"); + driver.run("CREATE DATABASE IF NOT EXISTS fuzzy_union"); driver.run("CREATE TABLE IF NOT EXISTS fuzzy_union.tableA(a int, b struct)"); From 38a8a9deb44d6165ebfc0ece3162d4b482cad403 Mon Sep 17 00:00:00 2001 From: Simbarashe Dzinamarira Date: Thu, 28 May 2026 11:02:28 -0700 Subject: [PATCH 2/3] Downgrade row-type alignment from active fix to warn-only diagnostic The upstream column-ordering condition that motivated this PR has been addressed at the view-producer layer (the upstream view's projection no longer triggers the case-sensitive resolver mismatch in HiveToRelConverter). With that upstream change in place, Coral no longer needs to silently rewrite the expanded view to re-align with the caller's rowType. This commit drops the LogicalProject re-alignment branch added in the previous commit and replaces it with a single warn-only diagnostic: - HiveViewExpander.expandView() now invokes warnIfRowTypeMisaligned and returns the original RelRoot unchanged. - warnIfRowTypeMisaligned logs a single warning when the expanded body's row type disagrees with the caller-provided rowType (either by field count or by case-insensitive field name order). No-op fast path when they agree. - A TODO marks the intent to tighten this from warn to an IllegalStateException once we are confident no live view trips it. Tests: - HiveViewExpanderTest rewritten to verify the warn-emission and no-rewrite contract directly via a Mockito-mocked slf4j Logger swapped in/out via reflection. - The integration test in HiveToRelConverterTest and its realign_view fixture in TestUtils are removed; they only made sense when the expander actively realigned the row type. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../coral/hive/hive2rel/HiveViewExpander.java | 61 ++---- .../hive/hive2rel/HiveToRelConverterTest.java | 39 ---- .../hive/hive2rel/HiveViewExpanderTest.java | 190 ++++++------------ .../coral/hive/hive2rel/TestUtils.java | 4 - 4 files changed, 83 insertions(+), 211 deletions(-) diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java index e9fcb5241..4892b154f 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java @@ -5,7 +5,6 @@ */ package com.linkedin.coral.hive.hive2rel; -import java.util.ArrayList; import java.util.List; import javax.annotation.Nonnull; @@ -14,13 +13,9 @@ import com.google.common.base.Preconditions; import org.apache.calcite.plan.RelOptTable; -import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; -import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexNode; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.util.Util; import org.slf4j.Logger; @@ -31,14 +26,14 @@ /** * Class that implements {@link org.apache.calcite.plan.RelOptTable.ViewExpander} - * interface to support expansion of Hive Views to relational algebra. The - * returned {@link RelRoot} is realigned to the caller-provided - * {@link RelDataType} by name (case-insensitive) to preserve the - * {@link RelOptTable.ViewExpander} contract. + * interface to support expansion of Hive Views to relational algebra. Logs a + * warning when the expanded body's row type disagrees with the caller-provided + * {@link RelDataType} so silent positional column swaps surface in logs. */ public class HiveViewExpander implements RelOptTable.ViewExpander { - private static final Logger LOG = LoggerFactory.getLogger(HiveViewExpander.class); + // Non-final so unit tests can swap in a mock via reflection. See HiveViewExpanderTest. + private static Logger LOG = LoggerFactory.getLogger(HiveViewExpander.class); private final HiveToRelConverter hiveToRelConverter; /** @@ -61,44 +56,28 @@ public RelRoot expandView(RelDataType rowType, String queryString, List SqlNode sqlNode = hiveToRelConverter.processView(dbName, tableName) .accept(new FuzzyUnionSqlRewriter(tableName, hiveToRelConverter)); RelRoot root = hiveToRelConverter.getSqlToRelConverter().convertQuery(sqlNode, true, true); - return alignToRowType(root, rowType); + warnIfRowTypeMisaligned(root, rowType); + // TODO: tighten this from a warning to an IllegalStateException once we + // are confident no live Hive view emits an expanded body whose row type + // disagrees with the HMS-recorded row type. This is currently a + // transitional safeguard while view producers migrate to projections + // that do not trigger the case-sensitive resolver mismatch. + return root; } /** - * Wrap {@code root} in a {@link LogicalProject} that re-orders its output - * fields to match {@code expected} by name (case-insensitive). No-op when the - * orderings already agree. If a name is missing from {@code root} or arities - * differ, returns {@code root} unchanged and logs a warning. + * Logs a warning when {@code root}'s row type disagrees with {@code expected} + * by field count or by case-insensitive field name order. No-op when they + * already agree. */ @VisibleForTesting - static RelRoot alignToRowType(RelRoot root, RelDataType expected) { - final RelNode rel = root.rel; - final RelDataType actual = rel.getRowType(); - if (expected.getFieldCount() != actual.getFieldCount()) { - LOG.warn("Skipping row-type alignment: expected {} fields, expanded view produced {}. expected={}, actual={}", - expected.getFieldCount(), actual.getFieldCount(), expected, actual); - return root; - } + static void warnIfRowTypeMisaligned(RelRoot root, RelDataType expected) { + final RelDataType actual = root.rel.getRowType(); if (fieldNamesAlignedByOrder(expected, actual)) { - return root; - } - final List projects = new ArrayList<>(expected.getFieldCount()); - final List names = new ArrayList<>(expected.getFieldCount()); - final RexBuilder rexBuilder = rel.getCluster().getRexBuilder(); - for (RelDataTypeField expectedField : expected.getFieldList()) { - // case-insensitive name lookup, no struct-field traversal - final RelDataTypeField actualField = actual.getField(expectedField.getName(), false, false); - if (actualField == null) { - LOG.warn( - "Skipping row-type alignment: expected field '{}' is absent from expanded view. expected={}, actual={}", - expectedField.getName(), expected, actual); - return root; - } - projects.add(rexBuilder.makeInputRef(actualField.getType(), actualField.getIndex())); - names.add(expectedField.getName()); + return; } - final RelNode aligned = LogicalProject.create(rel, projects, names); - return RelRoot.of(aligned, root.kind); + LOG.warn("Expanded view row type does not match caller-provided row type. expected={}, actual={}", expected, + actual); } private static boolean fieldNamesAlignedByOrder(RelDataType a, RelDataType b) { diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java index 67bd84563..ba77b3f61 100644 --- a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java @@ -13,13 +13,8 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import org.apache.calcite.jdbc.JavaTypeFactoryImpl; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.RelRoot; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeFamily; @@ -673,40 +668,6 @@ public void testIntCastToBigIntDuringComparison() { assertEquals(relToString(sql), expected); } - /** - * Regression test for the {@link org.apache.calcite.plan.RelOptTable.ViewExpander} contract: - * the {@link org.apache.calcite.rel.RelRoot} returned from {@link HiveViewExpander#expandView} - * must have a row type that matches the caller-provided {@code rowType}, even when the - * re-parsed view body would naturally produce its columns in a different order. Without the - * alignment, downstream consumers that rely on positional access (e.g. {@code SELECT *} over - * a view-on-view) would index into the wrong RelNode output column. - */ - @Test - public void testExpandViewAlignsToCallerRowType() { - // Build a caller-provided rowType whose order is the reverse of the view body's - // natural order. The view body produces [col_a INT, col_b STRING]; we ask for - // [col_b STRING, col_a INT]. - RelDataTypeFactory typeFactory = new JavaTypeFactoryImpl(); - RelDataType reversedRowType = typeFactory.builder().add("col_b", typeFactory.createSqlType(SqlTypeName.VARCHAR)) - .add("col_a", typeFactory.createSqlType(SqlTypeName.INTEGER)).build(); - - HiveViewExpander expander = new HiveViewExpander(converter); - RelRoot result = expander.expandView(reversedRowType, "ignored", ImmutableList.of("hive", "test"), - ImmutableList.of("realign_view")); - - List actualNames = result.rel.getRowType().getFieldNames(); - assertEquals(actualNames.size(), 2, "Expected 2 output columns"); - assertEquals(actualNames.get(0).toLowerCase(), "col_b", - "Position 0 must be col_b (matches caller-provided rowType), got: " + actualNames); - assertEquals(actualNames.get(1).toLowerCase(), "col_a", - "Position 1 must be col_a (matches caller-provided rowType), got: " + actualNames); - - // The types must also line up with the caller-provided rowType, not the body's natural order. - List fields = result.rel.getRowType().getFieldList(); - assertEquals(fields.get(0).getType().getSqlTypeName(), SqlTypeName.VARCHAR); - assertEquals(fields.get(1).getType().getSqlTypeName(), SqlTypeName.INTEGER); - } - private String relToString(String sql) { return RelOptUtil.toString(converter.convertSql(sql)); } diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java index 71679b1cf..29e05f0c4 100644 --- a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java @@ -5,41 +5,48 @@ */ package com.linkedin.coral.hive.hive2rel; -import java.util.List; +import java.lang.reflect.Field; import org.apache.calcite.jdbc.JavaTypeFactoryImpl; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.volcano.VolcanoPlanner; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; -import org.apache.calcite.rel.core.Values; -import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.logical.LogicalValues; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.SqlTypeName; +import org.slf4j.Logger; +import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeClass; +import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertFalse; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.testng.Assert.assertSame; -import static org.testng.Assert.assertTrue; /** - * Unit tests for {@link HiveViewExpander#alignToRowType}. These exercise the - * alignment logic in isolation -- no Hive metastore or end-to-end view expansion - * required. + * Unit tests for {@link HiveViewExpander#warnIfRowTypeMisaligned}. The method + * is the diagnostic that surfaces a mismatch between the caller-provided row + * type and the expanded view body's row type -- a state that previously caused + * silent positional column swaps in downstream view-on-view consumers. Behavior + * is "log a warning and leave the {@code RelRoot} untouched"; tests pin both + * the warn emission and the no-rewrite contract. */ public class HiveViewExpanderTest { private RelDataTypeFactory typeFactory; private RelOptCluster cluster; + private Logger originalLogger; + private Logger mockLogger; @BeforeClass public void setUp() { @@ -47,6 +54,22 @@ public void setUp() { cluster = RelOptCluster.create(new VolcanoPlanner(), new RexBuilder(typeFactory)); } + @BeforeMethod + public void installMockLogger() throws Exception { + mockLogger = mock(Logger.class); + Field logField = HiveViewExpander.class.getDeclaredField("LOG"); + logField.setAccessible(true); + originalLogger = (Logger) logField.get(null); + logField.set(null, mockLogger); + } + + @AfterMethod + public void restoreLogger() throws Exception { + Field logField = HiveViewExpander.class.getDeclaredField("LOG"); + logField.setAccessible(true); + logField.set(null, originalLogger); + } + private RelDataType rowType(Object... nameTypePairs) { if (nameTypePairs.length % 2 != 0) { throw new IllegalArgumentException("Expected (name, type) pairs"); @@ -64,152 +87,65 @@ private RelRoot rootWithRowType(RelDataType type) { } @Test - public void testAlignedByOrderReturnsSameRoot() { + public void testAlignedByOrderEmitsNoWarning() { RelDataType type = rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR); RelRoot root = rootWithRowType(type); - RelRoot result = HiveViewExpander.alignToRowType(root, type); + HiveViewExpander.warnIfRowTypeMisaligned(root, type); - assertSame(result, root, "Already-aligned root should be returned unchanged"); + verify(mockLogger, never()).warn(anyString(), any(), any()); } @Test - public void testCaseDifferencesOnlyReturnsSameRoot() { + public void testCaseDifferencesOnlyEmitNoWarning() { RelRoot root = rootWithRowType(rowType("colA", SqlTypeName.INTEGER, "colB", SqlTypeName.VARCHAR)); RelDataType expected = rowType("cola", SqlTypeName.INTEGER, "colb", SqlTypeName.VARCHAR); - RelRoot result = HiveViewExpander.alignToRowType(root, expected); + HiveViewExpander.warnIfRowTypeMisaligned(root, expected); - assertSame(result, root, "Names differing only in case should be treated as aligned (case-insensitive)"); - } - - @Test - public void testReorderedFieldsWrapInProject() { - RelRoot root = rootWithRowType(rowType("b", SqlTypeName.VARCHAR, "a", SqlTypeName.INTEGER)); - RelDataType expected = rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR); - - RelRoot result = HiveViewExpander.alignToRowType(root, expected); - - assertTrue(result.rel instanceof LogicalProject, - "Reordered fields should result in a LogicalProject wrapper, got " + result.rel.getClass()); - LogicalProject project = (LogicalProject) result.rel; - assertTrue(project.getInput() instanceof Values, "Wrapped Project should sit directly on the input"); - - List outFields = result.rel.getRowType().getFieldList(); - assertEquals(outFields.size(), 2); - assertEquals(outFields.get(0).getName(), "a"); - assertEquals(outFields.get(0).getType().getSqlTypeName(), SqlTypeName.INTEGER); - assertEquals(outFields.get(1).getName(), "b"); - assertEquals(outFields.get(1).getType().getSqlTypeName(), SqlTypeName.VARCHAR); + verify(mockLogger, never()).warn(anyString(), any(), any()); } /** - * Models the production failure mode this method exists to prevent: an - * upstream view body that lists {@code idHash} (the longer name) ahead of - * its sibling {@code id} (the shorter name, a prefix of the other) - * introduces column aliases whose ordering disagrees with the downstream - * consumer's expected ordering. Without realignment, the downstream's - * positional access lands {@code id} on the {@code idHash} position and - * vice versa, silently swapping every value read from those columns. + * Models the production failure mode this method exists to flag: an upstream + * view body that lists {@code idHash} (the longer name) ahead of its sibling + * {@code id} (the shorter name, a prefix of the other) introduces column + * aliases whose ordering disagrees with the downstream consumer's expected + * ordering. We no longer auto-realign -- Kyoto (kyoto_table_management#305) + * fixed the upstream ordering -- but we still want to know if a view trips it. */ @Test - public void testPrefixSiblingsReorderedCorrectly() { + public void testReorderedFieldsEmitWarning() { RelRoot root = rootWithRowType(rowType("idHash", SqlTypeName.VARCHAR, "id", SqlTypeName.INTEGER)); RelDataType expected = rowType("id", SqlTypeName.INTEGER, "idHash", SqlTypeName.VARCHAR); - RelRoot result = HiveViewExpander.alignToRowType(root, expected); - - assertTrue(result.rel instanceof LogicalProject); - LogicalProject project = (LogicalProject) result.rel; + HiveViewExpander.warnIfRowTypeMisaligned(root, expected); - // Output field order matches expected: id (INTEGER) first, idHash (VARCHAR) second. - List outFields = result.rel.getRowType().getFieldList(); - assertEquals(outFields.get(0).getName(), "id"); - assertEquals(outFields.get(0).getType().getSqlTypeName(), SqlTypeName.INTEGER); - assertEquals(outFields.get(1).getName(), "idHash"); - assertEquals(outFields.get(1).getType().getSqlTypeName(), SqlTypeName.VARCHAR); - - // And the underlying input refs cross-link to the correct source positions: - // expected[0]=id -> input position 1; expected[1]=idHash -> input position 0. - assertTrue(project.getProjects().get(0) instanceof RexInputRef); - assertTrue(project.getProjects().get(1) instanceof RexInputRef); - assertEquals(((RexInputRef) project.getProjects().get(0)).getIndex(), 1); - assertEquals(((RexInputRef) project.getProjects().get(1)).getIndex(), 0); - } - - /** - * Realignment over multiple prefix-name pairs at once. Each pair in the input - * row type lists the longer name first ({@code idHash} before {@code id}, - * {@code emailVerified} before {@code email}, {@code countryCode} before - * {@code country}); the caller's expected row type lists them the other way - * around. Mirrors the realistic failure shape where an upstream view body's - * case-sensitive alphabetical sort places longer prefix-extensions ahead of - * their shorter siblings, and a downstream consumer expects the - * lowercase-alphabetical (prefix-first) order. Each pair must be realigned - * independently and correctly. - */ - @Test - public void testMultiplePrefixSiblingsAllReorderedCorrectly() { - RelRoot root = rootWithRowType( - rowType("idHash", SqlTypeName.VARCHAR, "id", SqlTypeName.INTEGER, "emailVerified", SqlTypeName.BOOLEAN, "email", - SqlTypeName.VARCHAR, "countryCode", SqlTypeName.VARCHAR, "country", SqlTypeName.VARCHAR)); - RelDataType expected = - rowType("id", SqlTypeName.INTEGER, "idHash", SqlTypeName.VARCHAR, "email", SqlTypeName.VARCHAR, "emailVerified", - SqlTypeName.BOOLEAN, "country", SqlTypeName.VARCHAR, "countryCode", SqlTypeName.VARCHAR); - - RelRoot result = HiveViewExpander.alignToRowType(root, expected); - - assertTrue(result.rel instanceof LogicalProject); - LogicalProject project = (LogicalProject) result.rel; - - // Output names and types follow the expected (prefix-first) ordering. - List outFields = result.rel.getRowType().getFieldList(); - assertEquals(outFields.size(), 6); - assertEquals(outFields.get(0).getName(), "id"); - assertEquals(outFields.get(0).getType().getSqlTypeName(), SqlTypeName.INTEGER); - assertEquals(outFields.get(1).getName(), "idHash"); - assertEquals(outFields.get(1).getType().getSqlTypeName(), SqlTypeName.VARCHAR); - assertEquals(outFields.get(2).getName(), "email"); - assertEquals(outFields.get(2).getType().getSqlTypeName(), SqlTypeName.VARCHAR); - assertEquals(outFields.get(3).getName(), "emailVerified"); - assertEquals(outFields.get(3).getType().getSqlTypeName(), SqlTypeName.BOOLEAN); - assertEquals(outFields.get(4).getName(), "country"); - assertEquals(outFields.get(4).getType().getSqlTypeName(), SqlTypeName.VARCHAR); - assertEquals(outFields.get(5).getName(), "countryCode"); - assertEquals(outFields.get(5).getType().getSqlTypeName(), SqlTypeName.VARCHAR); - - // Each expected position points at the correct source position in the input row type. - // Input order was: [idHash=0, id=1, emailVerified=2, email=3, countryCode=4, country=5]. - int[] expectedSourceIndices = { 1, 0, 3, 2, 5, 4 }; - for (int i = 0; i < expectedSourceIndices.length; i++) { - assertTrue(project.getProjects().get(i) instanceof RexInputRef, "Project " + i + " should be a RexInputRef"); - assertEquals(((RexInputRef) project.getProjects().get(i)).getIndex(), expectedSourceIndices[i], - "Expected position " + i + " (" + outFields.get(i).getName() + ") should source from input position " - + expectedSourceIndices[i]); - } + verify(mockLogger, times(1)).warn(anyString(), any(), any()); } @Test - public void testMissingFieldReturnsOriginalRoot() { - RelRoot root = rootWithRowType(rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR)); - // `c` is not in the input -- alignment should bail rather than fabricate. - RelDataType expected = rowType("a", SqlTypeName.INTEGER, "c", SqlTypeName.VARCHAR); + public void testDifferentArityEmitsWarning() { + RelRoot root = + rootWithRowType(rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR, "c", SqlTypeName.DOUBLE)); + RelDataType expected = rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR); - RelRoot result = HiveViewExpander.alignToRowType(root, expected); + HiveViewExpander.warnIfRowTypeMisaligned(root, expected); - assertSame(result, root, "Missing field should trigger safe fallback to the original root"); + verify(mockLogger, times(1)).warn(anyString(), any(), any()); } @Test - public void testDifferentArityReturnsOriginalRoot() { - RelRoot root = - rootWithRowType(rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR, "c", SqlTypeName.DOUBLE)); + public void testWarningPathLeavesRootUnchanged() { + RelRoot root = rootWithRowType(rowType("b", SqlTypeName.VARCHAR, "a", SqlTypeName.INTEGER)); RelDataType expected = rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR); + RelNode originalRel = root.rel; - RelRoot result = HiveViewExpander.alignToRowType(root, expected); + HiveViewExpander.warnIfRowTypeMisaligned(root, expected); - assertSame(result, root, "Different arity should trigger safe fallback rather than silent column drop"); - assertFalse(result.rel instanceof LogicalProject, - "Different-arity case should not wrap in a Project (would be lossy)"); + // The helper returns void; we're pinning the contract that it does not + // mutate the RelRoot in place either (no LogicalProject wrap from earlier + // versions of this code). + assertSame(root.rel, originalRel); } } diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java index 43f61622b..d9443f44b 100644 --- a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java @@ -85,10 +85,6 @@ public static TestHive setupDefaultHive(HiveConf conf) throws IOException { driver.run( "CREATE TABLE IF NOT EXISTS test.tableInt(tinyint_col tinyint, smallint_col smallint, int_col int, bigint_col bigint)"); - // Fixture for HiveViewExpander.expandView() row-type alignment regression test. - driver.run("CREATE TABLE IF NOT EXISTS test.realign_base(col_a int, col_b string)"); - driver.run("CREATE VIEW IF NOT EXISTS test.realign_view AS SELECT * FROM test.realign_base"); - driver.run("CREATE DATABASE IF NOT EXISTS fuzzy_union"); driver.run("CREATE TABLE IF NOT EXISTS fuzzy_union.tableA(a int, b struct)"); From e700d48bfe2c4bba052d2fe4cf5b83394973f76b Mon Sep 17 00:00:00 2001 From: Simbarashe Dzinamarira Date: Thu, 28 May 2026 14:08:52 -0700 Subject: [PATCH 3/3] Extend mismatch detection to nested struct fields The top-level fieldNamesAlignedByOrder check missed the same class of silent positional-swap bug at deeper nesting levels: when a top-level field is itself a STRUCT (or an ARRAY/MULTISET of STRUCT) and its nested field order differs between caller and expanded body, a downstream consumer's positional access into the struct could land on the wrong source column. Adds a nestedRowTypesAlignedByOrder helper that recurses through: - STRUCT types: re-runs the case-insensitive field-name-by-order check on the nested row type. - ARRAY / MULTISET types: unwraps the component type and recurses, so ARRAY> reorderings are caught one level past the array. Adds three unit tests covering: - Aligned nested struct (no warn). - Top-level field names agree but inner struct fields are reordered. - ARRAY-of-STRUCT element fields reordered. - Three-level nesting (STRUCT>>) with the innermost struct fields reordered. Sanity-checked by stubbing nestedRowTypesAlignedByOrder to always return true: exactly the three nested-aware tests fail, the rest pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../coral/hive/hive2rel/HiveViewExpander.java | 28 ++++++- .../hive/hive2rel/HiveViewExpanderTest.java | 73 ++++++++++++++++++- 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java index 4892b154f..af6e8d0f2 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java @@ -87,9 +87,35 @@ private static boolean fieldNamesAlignedByOrder(RelDataType a, RelDataType b) { final List af = a.getFieldList(); final List bf = b.getFieldList(); for (int i = 0; i < af.size(); i++) { - if (!af.get(i).getName().equalsIgnoreCase(bf.get(i).getName())) { + final RelDataTypeField afi = af.get(i); + final RelDataTypeField bfi = bf.get(i); + if (!afi.getName().equalsIgnoreCase(bfi.getName())) { return false; } + // Recurse into nested row-shaped types so reordered struct fields + // (including arrays/multisets of structs) are flagged too. Same + // class of silent positional-swap bug, one level deeper. + if (!nestedRowTypesAlignedByOrder(afi.getType(), bfi.getType())) { + return false; + } + } + return true; + } + + private static boolean nestedRowTypesAlignedByOrder(RelDataType a, RelDataType b) { + if (a.isStruct() != b.isStruct()) { + return false; + } + if (a.isStruct()) { + return fieldNamesAlignedByOrder(a, b); + } + final RelDataType ac = a.getComponentType(); + final RelDataType bc = b.getComponentType(); + if ((ac == null) != (bc == null)) { + return false; + } + if (ac != null) { + return nestedRowTypesAlignedByOrder(ac, bc); } return true; } diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java index 29e05f0c4..49b8e4e4f 100644 --- a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java @@ -76,11 +76,23 @@ private RelDataType rowType(Object... nameTypePairs) { } RelDataTypeFactory.Builder builder = typeFactory.builder(); for (int i = 0; i < nameTypePairs.length; i += 2) { - builder.add((String) nameTypePairs[i], (SqlTypeName) nameTypePairs[i + 1]); + String name = (String) nameTypePairs[i]; + Object type = nameTypePairs[i + 1]; + if (type instanceof SqlTypeName) { + builder.add(name, (SqlTypeName) type); + } else if (type instanceof RelDataType) { + builder.add(name, (RelDataType) type); + } else { + throw new IllegalArgumentException("Expected SqlTypeName or RelDataType for field " + name); + } } return builder.build(); } + private RelDataType arrayOf(RelDataType element) { + return typeFactory.createArrayType(element, -1); + } + private RelRoot rootWithRowType(RelDataType type) { RelNode rel = LogicalValues.createEmpty(cluster, type); return RelRoot.of(rel, SqlKind.SELECT); @@ -135,6 +147,65 @@ public void testDifferentArityEmitsWarning() { verify(mockLogger, times(1)).warn(anyString(), any(), any()); } + @Test + public void testNestedStructAlignedEmitsNoWarning() { + RelDataType address = rowType("street", SqlTypeName.VARCHAR, "city", SqlTypeName.VARCHAR); + RelDataType row = rowType("id", SqlTypeName.INTEGER, "address", address); + RelRoot root = rootWithRowType(row); + + HiveViewExpander.warnIfRowTypeMisaligned(root, row); + + verify(mockLogger, never()).warn(anyString(), any(), any()); + } + + @Test + public void testNestedStructFieldsReorderedEmitWarning() { + // Top-level field names agree ("address"), but the inner struct fields + // are in opposite order between caller and expanded body. The previous + // top-level-only check would miss this; the recursive check catches it. + RelDataType expectedAddress = rowType("street", SqlTypeName.VARCHAR, "city", SqlTypeName.VARCHAR); + RelDataType actualAddress = rowType("city", SqlTypeName.VARCHAR, "street", SqlTypeName.VARCHAR); + RelDataType expected = rowType("id", SqlTypeName.INTEGER, "address", expectedAddress); + RelDataType actual = rowType("id", SqlTypeName.INTEGER, "address", actualAddress); + RelRoot root = rootWithRowType(actual); + + HiveViewExpander.warnIfRowTypeMisaligned(root, expected); + + verify(mockLogger, times(1)).warn(anyString(), any(), any()); + } + + @Test + public void testArrayOfStructFieldsReorderedEmitWarning() { + // ARRAY> vs ARRAY> -- nested + // reorder is one level past the array. Same silent-swap risk; should warn. + RelDataType expectedElem = rowType("street", SqlTypeName.VARCHAR, "city", SqlTypeName.VARCHAR); + RelDataType actualElem = rowType("city", SqlTypeName.VARCHAR, "street", SqlTypeName.VARCHAR); + RelDataType expected = rowType("id", SqlTypeName.INTEGER, "addresses", arrayOf(expectedElem)); + RelDataType actual = rowType("id", SqlTypeName.INTEGER, "addresses", arrayOf(actualElem)); + RelRoot root = rootWithRowType(actual); + + HiveViewExpander.warnIfRowTypeMisaligned(root, expected); + + verify(mockLogger, times(1)).warn(anyString(), any(), any()); + } + + @Test + public void testDeeplyNestedStructReorderedEmitWarning() { + // STRUCT>> vs same outer shape but the + // innermost struct fields are in opposite order. Pins the recursion. + RelDataType expectedGeo = rowType("lat", SqlTypeName.DOUBLE, "lon", SqlTypeName.DOUBLE); + RelDataType actualGeo = rowType("lon", SqlTypeName.DOUBLE, "lat", SqlTypeName.DOUBLE); + RelDataType expectedAddr = rowType("street", SqlTypeName.VARCHAR, "geo", expectedGeo); + RelDataType actualAddr = rowType("street", SqlTypeName.VARCHAR, "geo", actualGeo); + RelDataType expected = rowType("addr", expectedAddr); + RelDataType actual = rowType("addr", actualAddr); + RelRoot root = rootWithRowType(actual); + + HiveViewExpander.warnIfRowTypeMisaligned(root, expected); + + verify(mockLogger, times(1)).warn(anyString(), any(), any()); + } + @Test public void testWarningPathLeavesRootUnchanged() { RelRoot root = rootWithRowType(rowType("b", SqlTypeName.VARCHAR, "a", SqlTypeName.INTEGER));