diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java index ac81842a9..af6e8d0f2 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java @@ -1,5 +1,5 @@ /** - * Copyright 2017-2022 LinkedIn Corporation. All rights reserved. + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -9,23 +9,32 @@ import javax.annotation.Nonnull; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.util.Util; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.linkedin.coral.common.FuzzyUnionSqlRewriter; /** * Class that implements {@link org.apache.calcite.plan.RelOptTable.ViewExpander} - * interface to support expansion of Hive Views to relational algebra. + * interface to support expansion of Hive Views to relational algebra. Logs a + * warning when the expanded body's row type disagrees with the caller-provided + * {@link RelDataType} so silent positional column swaps surface in logs. */ public class HiveViewExpander implements RelOptTable.ViewExpander { + // Non-final so unit tests can swap in a mock via reflection. See HiveViewExpanderTest. + private static Logger LOG = LoggerFactory.getLogger(HiveViewExpander.class); + private final HiveToRelConverter hiveToRelConverter; /** * Instantiates a new Hive view expander. @@ -46,6 +55,68 @@ public RelRoot expandView(RelDataType rowType, String queryString, List SqlNode sqlNode = hiveToRelConverter.processView(dbName, tableName) .accept(new FuzzyUnionSqlRewriter(tableName, hiveToRelConverter)); - return hiveToRelConverter.getSqlToRelConverter().convertQuery(sqlNode, true, true); + RelRoot root = hiveToRelConverter.getSqlToRelConverter().convertQuery(sqlNode, true, true); + warnIfRowTypeMisaligned(root, rowType); + // TODO: tighten this from a warning to an IllegalStateException once we + // are confident no live Hive view emits an expanded body whose row type + // disagrees with the HMS-recorded row type. This is currently a + // transitional safeguard while view producers migrate to projections + // that do not trigger the case-sensitive resolver mismatch. + return root; + } + + /** + * Logs a warning when {@code root}'s row type disagrees with {@code expected} + * by field count or by case-insensitive field name order. No-op when they + * already agree. + */ + @VisibleForTesting + static void warnIfRowTypeMisaligned(RelRoot root, RelDataType expected) { + final RelDataType actual = root.rel.getRowType(); + if (fieldNamesAlignedByOrder(expected, actual)) { + return; + } + LOG.warn("Expanded view row type does not match caller-provided row type. expected={}, actual={}", expected, + actual); + } + + private static boolean fieldNamesAlignedByOrder(RelDataType a, RelDataType b) { + if (a.getFieldCount() != b.getFieldCount()) { + return false; + } + final List af = a.getFieldList(); + final List bf = b.getFieldList(); + for (int i = 0; i < af.size(); i++) { + final RelDataTypeField afi = af.get(i); + final RelDataTypeField bfi = bf.get(i); + if (!afi.getName().equalsIgnoreCase(bfi.getName())) { + return false; + } + // Recurse into nested row-shaped types so reordered struct fields + // (including arrays/multisets of structs) are flagged too. Same + // class of silent positional-swap bug, one level deeper. + if (!nestedRowTypesAlignedByOrder(afi.getType(), bfi.getType())) { + return false; + } + } + return true; + } + + private static boolean nestedRowTypesAlignedByOrder(RelDataType a, RelDataType b) { + if (a.isStruct() != b.isStruct()) { + return false; + } + if (a.isStruct()) { + return fieldNamesAlignedByOrder(a, b); + } + final RelDataType ac = a.getComponentType(); + final RelDataType bc = b.getComponentType(); + if ((ac == null) != (bc == null)) { + return false; + } + if (ac != null) { + return nestedRowTypesAlignedByOrder(ac, bc); + } + return true; } } diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java new file mode 100644 index 000000000..49b8e4e4f --- /dev/null +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveViewExpanderTest.java @@ -0,0 +1,222 @@ +/** + * Copyright 2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel; + +import java.lang.reflect.Field; + +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.volcano.VolcanoPlanner; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelRoot; +import org.apache.calcite.rel.logical.LogicalValues; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.SqlTypeName; +import org.slf4j.Logger; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.testng.Assert.assertSame; + + +/** + * Unit tests for {@link HiveViewExpander#warnIfRowTypeMisaligned}. The method + * is the diagnostic that surfaces a mismatch between the caller-provided row + * type and the expanded view body's row type -- a state that previously caused + * silent positional column swaps in downstream view-on-view consumers. Behavior + * is "log a warning and leave the {@code RelRoot} untouched"; tests pin both + * the warn emission and the no-rewrite contract. + */ +public class HiveViewExpanderTest { + + private RelDataTypeFactory typeFactory; + private RelOptCluster cluster; + private Logger originalLogger; + private Logger mockLogger; + + @BeforeClass + public void setUp() { + typeFactory = new JavaTypeFactoryImpl(); + cluster = RelOptCluster.create(new VolcanoPlanner(), new RexBuilder(typeFactory)); + } + + @BeforeMethod + public void installMockLogger() throws Exception { + mockLogger = mock(Logger.class); + Field logField = HiveViewExpander.class.getDeclaredField("LOG"); + logField.setAccessible(true); + originalLogger = (Logger) logField.get(null); + logField.set(null, mockLogger); + } + + @AfterMethod + public void restoreLogger() throws Exception { + Field logField = HiveViewExpander.class.getDeclaredField("LOG"); + logField.setAccessible(true); + logField.set(null, originalLogger); + } + + private RelDataType rowType(Object... nameTypePairs) { + if (nameTypePairs.length % 2 != 0) { + throw new IllegalArgumentException("Expected (name, type) pairs"); + } + RelDataTypeFactory.Builder builder = typeFactory.builder(); + for (int i = 0; i < nameTypePairs.length; i += 2) { + String name = (String) nameTypePairs[i]; + Object type = nameTypePairs[i + 1]; + if (type instanceof SqlTypeName) { + builder.add(name, (SqlTypeName) type); + } else if (type instanceof RelDataType) { + builder.add(name, (RelDataType) type); + } else { + throw new IllegalArgumentException("Expected SqlTypeName or RelDataType for field " + name); + } + } + return builder.build(); + } + + private RelDataType arrayOf(RelDataType element) { + return typeFactory.createArrayType(element, -1); + } + + private RelRoot rootWithRowType(RelDataType type) { + RelNode rel = LogicalValues.createEmpty(cluster, type); + return RelRoot.of(rel, SqlKind.SELECT); + } + + @Test + public void testAlignedByOrderEmitsNoWarning() { + RelDataType type = rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR); + RelRoot root = rootWithRowType(type); + + HiveViewExpander.warnIfRowTypeMisaligned(root, type); + + verify(mockLogger, never()).warn(anyString(), any(), any()); + } + + @Test + public void testCaseDifferencesOnlyEmitNoWarning() { + RelRoot root = rootWithRowType(rowType("colA", SqlTypeName.INTEGER, "colB", SqlTypeName.VARCHAR)); + RelDataType expected = rowType("cola", SqlTypeName.INTEGER, "colb", SqlTypeName.VARCHAR); + + HiveViewExpander.warnIfRowTypeMisaligned(root, expected); + + verify(mockLogger, never()).warn(anyString(), any(), any()); + } + + /** + * Models the production failure mode this method exists to flag: an upstream + * view body that lists {@code idHash} (the longer name) ahead of its sibling + * {@code id} (the shorter name, a prefix of the other) introduces column + * aliases whose ordering disagrees with the downstream consumer's expected + * ordering. We no longer auto-realign -- Kyoto (kyoto_table_management#305) + * fixed the upstream ordering -- but we still want to know if a view trips it. + */ + @Test + public void testReorderedFieldsEmitWarning() { + RelRoot root = rootWithRowType(rowType("idHash", SqlTypeName.VARCHAR, "id", SqlTypeName.INTEGER)); + RelDataType expected = rowType("id", SqlTypeName.INTEGER, "idHash", SqlTypeName.VARCHAR); + + HiveViewExpander.warnIfRowTypeMisaligned(root, expected); + + verify(mockLogger, times(1)).warn(anyString(), any(), any()); + } + + @Test + public void testDifferentArityEmitsWarning() { + RelRoot root = + rootWithRowType(rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR, "c", SqlTypeName.DOUBLE)); + RelDataType expected = rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR); + + HiveViewExpander.warnIfRowTypeMisaligned(root, expected); + + verify(mockLogger, times(1)).warn(anyString(), any(), any()); + } + + @Test + public void testNestedStructAlignedEmitsNoWarning() { + RelDataType address = rowType("street", SqlTypeName.VARCHAR, "city", SqlTypeName.VARCHAR); + RelDataType row = rowType("id", SqlTypeName.INTEGER, "address", address); + RelRoot root = rootWithRowType(row); + + HiveViewExpander.warnIfRowTypeMisaligned(root, row); + + verify(mockLogger, never()).warn(anyString(), any(), any()); + } + + @Test + public void testNestedStructFieldsReorderedEmitWarning() { + // Top-level field names agree ("address"), but the inner struct fields + // are in opposite order between caller and expanded body. The previous + // top-level-only check would miss this; the recursive check catches it. + RelDataType expectedAddress = rowType("street", SqlTypeName.VARCHAR, "city", SqlTypeName.VARCHAR); + RelDataType actualAddress = rowType("city", SqlTypeName.VARCHAR, "street", SqlTypeName.VARCHAR); + RelDataType expected = rowType("id", SqlTypeName.INTEGER, "address", expectedAddress); + RelDataType actual = rowType("id", SqlTypeName.INTEGER, "address", actualAddress); + RelRoot root = rootWithRowType(actual); + + HiveViewExpander.warnIfRowTypeMisaligned(root, expected); + + verify(mockLogger, times(1)).warn(anyString(), any(), any()); + } + + @Test + public void testArrayOfStructFieldsReorderedEmitWarning() { + // ARRAY> vs ARRAY> -- nested + // reorder is one level past the array. Same silent-swap risk; should warn. + RelDataType expectedElem = rowType("street", SqlTypeName.VARCHAR, "city", SqlTypeName.VARCHAR); + RelDataType actualElem = rowType("city", SqlTypeName.VARCHAR, "street", SqlTypeName.VARCHAR); + RelDataType expected = rowType("id", SqlTypeName.INTEGER, "addresses", arrayOf(expectedElem)); + RelDataType actual = rowType("id", SqlTypeName.INTEGER, "addresses", arrayOf(actualElem)); + RelRoot root = rootWithRowType(actual); + + HiveViewExpander.warnIfRowTypeMisaligned(root, expected); + + verify(mockLogger, times(1)).warn(anyString(), any(), any()); + } + + @Test + public void testDeeplyNestedStructReorderedEmitWarning() { + // STRUCT>> vs same outer shape but the + // innermost struct fields are in opposite order. Pins the recursion. + RelDataType expectedGeo = rowType("lat", SqlTypeName.DOUBLE, "lon", SqlTypeName.DOUBLE); + RelDataType actualGeo = rowType("lon", SqlTypeName.DOUBLE, "lat", SqlTypeName.DOUBLE); + RelDataType expectedAddr = rowType("street", SqlTypeName.VARCHAR, "geo", expectedGeo); + RelDataType actualAddr = rowType("street", SqlTypeName.VARCHAR, "geo", actualGeo); + RelDataType expected = rowType("addr", expectedAddr); + RelDataType actual = rowType("addr", actualAddr); + RelRoot root = rootWithRowType(actual); + + HiveViewExpander.warnIfRowTypeMisaligned(root, expected); + + verify(mockLogger, times(1)).warn(anyString(), any(), any()); + } + + @Test + public void testWarningPathLeavesRootUnchanged() { + RelRoot root = rootWithRowType(rowType("b", SqlTypeName.VARCHAR, "a", SqlTypeName.INTEGER)); + RelDataType expected = rowType("a", SqlTypeName.INTEGER, "b", SqlTypeName.VARCHAR); + RelNode originalRel = root.rel; + + HiveViewExpander.warnIfRowTypeMisaligned(root, expected); + + // The helper returns void; we're pinning the contract that it does not + // mutate the RelRoot in place either (no LogicalProject wrap from earlier + // versions of this code). + assertSame(root.rel, originalRel); + } +}