|
| 1 | +// |
| 2 | +// Title: TMDL Slimmer - Strip metadata bloat for LLM context |
| 3 | +// |
| 4 | +// Author: Alexis Olson |
| 5 | +// Version: 1.0 |
| 6 | +// |
| 7 | +// Description: |
| 8 | +// Reads all *.tmdl files from a SemanticModel/definition folder, |
| 9 | +// removes UI/engine metadata while preserving model semantics, |
| 10 | +// and outputs a single .slimdl file for LLM consumption. |
| 11 | +// |
| 12 | +// Usage: |
| 13 | +// - Run in Tabular Editor 2 or 3 (Advanced Scripting) |
| 14 | +// - Select your SemanticModel folder when prompted |
| 15 | +// - Choose where to save the output .slimdl file |
| 16 | + |
| 17 | +using System; |
| 18 | +using System.Collections.Generic; |
| 19 | +using System.IO; |
| 20 | +using System.Text; |
| 21 | +using System.Text.RegularExpressions; |
| 22 | +using System.Windows.Forms; |
| 23 | + |
| 24 | +// ==================== CONFIGURATION ==================== |
| 25 | +bool REMOVE_Annotations = true; // annotation, changedProperty, extendedProperties |
| 26 | +bool REMOVE_Lineage = true; // lineageTag, sourceLineageTag |
| 27 | +bool REMOVE_LanguageData = true; // cultures folder, culture, linguisticMetadata |
| 28 | +bool REMOVE_ColumnMeta = true; // summarizeBy, sourceColumn, dataCategory (+ select column booleans) |
| 29 | +bool REMOVE_InferredMeta = true; // isNameInferred, isDataTypeInferred, sourceProviderType |
| 30 | +bool REMOVE_DisplayProps = true; // isHidden, displayFolder, formatString, isDefaultLabel/Image |
| 31 | + |
| 32 | +// ==================== MAIN EXECUTION ==================== |
| 33 | +try |
| 34 | +{ |
| 35 | + // Select SemanticModel folder |
| 36 | + string modelFolder = null; |
| 37 | + using (var dialog = new FolderBrowserDialog()) |
| 38 | + { |
| 39 | + dialog.Description = "Select the SemanticModel folder (contains 'definition' subfolder)"; |
| 40 | + dialog.ShowNewFolderButton = false; |
| 41 | + if (dialog.ShowDialog() != DialogResult.OK) return; |
| 42 | + modelFolder = dialog.SelectedPath; |
| 43 | + } |
| 44 | + |
| 45 | + // Locate definition root - handle both cases: user selected SemanticModel or definition directly |
| 46 | + string definitionPath = Path.Combine(modelFolder, "definition"); |
| 47 | + if (!Directory.Exists(definitionPath)) |
| 48 | + { |
| 49 | + definitionPath = modelFolder; // Fallback: user already selected the definition folder |
| 50 | + if (Directory.GetFiles(definitionPath, "*.tmdl", SearchOption.AllDirectories).Length == 0) |
| 51 | + { |
| 52 | + Info("No TMDL files found in the selected folder."); |
| 53 | + return; |
| 54 | + } |
| 55 | + } |
| 56 | + |
| 57 | + // Build removal patterns based on configuration flags |
| 58 | + var patterns = new Dictionary<string, Regex>(); |
| 59 | + |
| 60 | + // Common regex components for matching property assignments |
| 61 | + string ASSIGN = @"\s*(?:=|:)"; // matches optional whitespace then = or : |
| 62 | + string BOOL = @"(?:\s*(?:=|:)\s*(?:true|false))?\s*;?\s*$"; // matches optional boolean and semicolon |
| 63 | + |
| 64 | + // Helper to add patterns when corresponding removal flag is enabled |
| 65 | + Action<bool,string,string> Add = (flag, name, pattern) => |
| 66 | + { |
| 67 | + if (flag) patterns[name] = new Regex(pattern); |
| 68 | + }; |
| 69 | + |
| 70 | + // Annotations group |
| 71 | + Add(REMOVE_Annotations, "annotation", @"^\s*annotation\b"); |
| 72 | + Add(REMOVE_Annotations, "changedProperty", @"^\s*changedProperty\b"); |
| 73 | + Add(REMOVE_Annotations, "extendedProperties", @"^\s*extendedProperties" + ASSIGN + @"\s*\{?"); |
| 74 | + |
| 75 | + // Lineage tracking group |
| 76 | + Add(REMOVE_Lineage, "lineageTag", @"^\s*lineageTag" + ASSIGN); |
| 77 | + Add(REMOVE_Lineage, "sourceLineageTag", @"^\s*sourceLineageTag" + ASSIGN); |
| 78 | + |
| 79 | + // Language/culture group |
| 80 | + Add(REMOVE_LanguageData, "culture", @"^\s*culture" + ASSIGN); |
| 81 | + Add(REMOVE_LanguageData, "refCulture", @"^\s*ref\s+cultureInfo\b"); |
| 82 | + Add(REMOVE_LanguageData, "linguisticMetadata", @"^\s*linguisticMetadata" + ASSIGN + @"\s*\{?"); |
| 83 | + |
| 84 | + // Column metadata group |
| 85 | + Add(REMOVE_ColumnMeta, "dataCategory", @"^\s*dataCategory" + ASSIGN); |
| 86 | + Add(REMOVE_ColumnMeta, "summarizeBy", @"^\s*summarizeBy" + ASSIGN); |
| 87 | + Add(REMOVE_ColumnMeta, "sourceColumn", @"^\s*sourceColumn" + ASSIGN); |
| 88 | + Add(REMOVE_ColumnMeta, "isAvailableInMdx", @"^\s*isAvailableInMdx" + BOOL); |
| 89 | + Add(REMOVE_ColumnMeta, "isNullable", @"^\s*isNullable" + BOOL); |
| 90 | + |
| 91 | + // Inferred metadata group |
| 92 | + Add(REMOVE_InferredMeta, "isNameInferred", @"^\s*isNameInferred" + BOOL); |
| 93 | + Add(REMOVE_InferredMeta, "isDataTypeInferred", @"^\s*isDataTypeInferred" + BOOL); |
| 94 | + Add(REMOVE_InferredMeta, "sourceProviderType", @"^\s*sourceProviderType" + ASSIGN); |
| 95 | + |
| 96 | + // Display/UI properties group |
| 97 | + Add(REMOVE_DisplayProps, "isHidden", @"^\s*isHidden" + BOOL); |
| 98 | + Add(REMOVE_DisplayProps, "displayFolder", @"^\s*displayFolder" + ASSIGN); |
| 99 | + Add(REMOVE_DisplayProps, "formatString", @"^\s*formatString" + ASSIGN); |
| 100 | + Add(REMOVE_DisplayProps, "isDefaultLabel", @"^\s*isDefaultLabel" + BOOL); |
| 101 | + Add(REMOVE_DisplayProps, "isDefaultImage", @"^\s*isDefaultImage" + BOOL); |
| 102 | + |
| 103 | + // Identify patterns that start multi-line blocks (need brace tracking) |
| 104 | + var blockStarters = new HashSet<string>(); |
| 105 | + if (REMOVE_LanguageData) blockStarters.Add("linguisticMetadata"); |
| 106 | + if (REMOVE_Annotations) blockStarters.Add("extendedProperties"); |
| 107 | + |
| 108 | + // Track removal statistics for summary report |
| 109 | + var removalStats = new Dictionary<string, int>(); |
| 110 | + |
| 111 | + // Small helper to increment removal counters deterministically |
| 112 | + Action<string> Bump = key => |
| 113 | + { |
| 114 | + int v; |
| 115 | + if (!removalStats.TryGetValue(key, out v)) v = 0; removalStats[key] = v + 1; |
| 116 | + }; |
| 117 | + |
| 118 | + // Collect all TMDL files recursively |
| 119 | + string[] tmdlFiles = Directory.GetFiles(definitionPath, "*.tmdl", SearchOption.AllDirectories); |
| 120 | + Array.Sort(tmdlFiles); |
| 121 | + if (tmdlFiles.Length == 0) |
| 122 | + { |
| 123 | + Info("No TMDL files found in the selected folder."); |
| 124 | + return; |
| 125 | + } |
| 126 | + |
| 127 | + // Initialize output with header |
| 128 | + var output = new StringBuilder(); |
| 129 | + output.AppendLine("// Combined TMDL (Slim)"); |
| 130 | + output.AppendLine("// Source: " + Path.GetFileName(modelFolder)); |
| 131 | + output.AppendLine("// Generated: " + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss")); |
| 132 | + |
| 133 | + long originalTotalSize = 0; |
| 134 | + int filesWithContent = 0; |
| 135 | + |
| 136 | + // Calculate base path for relative file names (normalize with trailing separator) |
| 137 | + string definitionBasePath = definitionPath.TrimEnd('\\', '/') + Path.DirectorySeparatorChar; |
| 138 | + |
| 139 | + // Process each TMDL file |
| 140 | + foreach (string filePath in tmdlFiles) |
| 141 | + { |
| 142 | + // Calculate relative path from definition root |
| 143 | + string relativePath = filePath.StartsWith(definitionBasePath) |
| 144 | + ? filePath.Substring(definitionBasePath.Length) |
| 145 | + : Path.GetFileName(filePath); |
| 146 | + relativePath = relativePath.Replace('\\', '/'); |
| 147 | + |
| 148 | + // Skip entire cultures/ subtree when language data removal is enabled |
| 149 | + if (REMOVE_LanguageData && relativePath.StartsWith("cultures/")) |
| 150 | + { |
| 151 | + Bump("cultures-folder"); |
| 152 | + continue; |
| 153 | + } |
| 154 | + |
| 155 | + // Read file content and track original size |
| 156 | + string content = File.ReadAllText(filePath, Encoding.UTF8); |
| 157 | + originalTotalSize += new FileInfo(filePath).Length; |
| 158 | + |
| 159 | + // Process content line by line |
| 160 | + string[] contentLines = content.Split(new[] { "\r\n", "\n", "\r" }, StringSplitOptions.None); |
| 161 | + |
| 162 | + // State tracking for multi-line block removal |
| 163 | + bool inSkippedBlock = false; |
| 164 | + int blockBraceDepth = 0; |
| 165 | + bool fileHasOutput = false; |
| 166 | + |
| 167 | + foreach (string line in contentLines) |
| 168 | + { |
| 169 | + // Handle multi-line block skipping (tracks nested braces) |
| 170 | + if (inSkippedBlock) |
| 171 | + { |
| 172 | + blockBraceDepth += line.Split('{').Length - 1; |
| 173 | + blockBraceDepth -= line.Split('}').Length - 1; |
| 174 | + if (blockBraceDepth <= 0) |
| 175 | + { |
| 176 | + inSkippedBlock = false; |
| 177 | + blockBraceDepth = 0; |
| 178 | + continue; // Don't output closing brace line that ended the block |
| 179 | + } |
| 180 | + continue; // Continue skipping lines inside the block |
| 181 | + } |
| 182 | + |
| 183 | + // Check if current line matches any removal pattern |
| 184 | + bool shouldRemoveLine = false; |
| 185 | + foreach (var patternEntry in patterns) |
| 186 | + { |
| 187 | + if (patternEntry.Value.IsMatch(line)) |
| 188 | + { |
| 189 | + // Check if this starts a multi-line block that needs brace tracking |
| 190 | + if (blockStarters.Contains(patternEntry.Key)) |
| 191 | + { |
| 192 | + Bump(patternEntry.Key); |
| 193 | + |
| 194 | + // Initialize brace tracking for this block |
| 195 | + blockBraceDepth = line.Split('{').Length - line.Split('}').Length; |
| 196 | + inSkippedBlock = true; |
| 197 | + shouldRemoveLine = true; |
| 198 | + break; |
| 199 | + } |
| 200 | + else |
| 201 | + { |
| 202 | + // Single-line removal |
| 203 | + Bump(patternEntry.Key); |
| 204 | + shouldRemoveLine = true; |
| 205 | + break; |
| 206 | + } |
| 207 | + } |
| 208 | + } |
| 209 | + |
| 210 | + if (!shouldRemoveLine) |
| 211 | + { |
| 212 | + // Skip pure whitespace lines to reduce output bloat |
| 213 | + if (string.IsNullOrWhiteSpace(line)) |
| 214 | + continue; |
| 215 | + |
| 216 | + // Keep the line (trim trailing whitespace for consistency) |
| 217 | + output.AppendLine(line.TrimEnd()); |
| 218 | + fileHasOutput = true; |
| 219 | + } |
| 220 | + } |
| 221 | + |
| 222 | + if (fileHasOutput) |
| 223 | + { |
| 224 | + filesWithContent++; |
| 225 | + output.AppendLine(); // Ensure separation between files |
| 226 | + } |
| 227 | + } |
| 228 | + |
| 229 | + // Squeeze excessive blank lines to maximum of one blank line |
| 230 | + string finalOutput = Regex.Replace(output.ToString(), @"(\r?\n){3,}", Environment.NewLine + Environment.NewLine); |
| 231 | + finalOutput = finalOutput.TrimEnd() + Environment.NewLine; // Ensure file ends with newline |
| 232 | + |
| 233 | + // Get output path via save dialog |
| 234 | + var parentDir = Directory.GetParent(modelFolder); |
| 235 | + string suggestedPath = Path.Combine(parentDir != null ? parentDir.FullName : modelFolder, |
| 236 | + Path.GetFileName(modelFolder) + ".slimdl"); |
| 237 | + |
| 238 | + string outputPath; |
| 239 | + using (var saveDialog = new SaveFileDialog()) |
| 240 | + { |
| 241 | + saveDialog.Title = "Save slimmed TMDL"; |
| 242 | + saveDialog.Filter = "Slimmed TMDL (*.slimdl)|*.slimdl|TMDL files (*.tmdl)|*.tmdl|All files (*.*)|*.*"; |
| 243 | + saveDialog.DefaultExt = "slimdl"; |
| 244 | + saveDialog.AddExtension = true; |
| 245 | + saveDialog.FileName = Path.GetFileName(suggestedPath); |
| 246 | + saveDialog.InitialDirectory = Path.GetDirectoryName(suggestedPath); |
| 247 | + saveDialog.OverwritePrompt = true; |
| 248 | + saveDialog.CheckPathExists = true; |
| 249 | + |
| 250 | + if (saveDialog.ShowDialog() != DialogResult.OK) return; |
| 251 | + outputPath = saveDialog.FileName; |
| 252 | + } |
| 253 | + |
| 254 | + // Write the combined, slimmed TMDL |
| 255 | + File.WriteAllText(outputPath, finalOutput, new UTF8Encoding(false)); |
| 256 | + |
| 257 | + // Calculate size reduction metrics |
| 258 | + long outputSize = new FileInfo(outputPath).Length; |
| 259 | + double reductionPercent = (originalTotalSize > 0) |
| 260 | + ? (1.0 - (double)outputSize / (double)originalTotalSize) * 100.0 |
| 261 | + : 0.0; |
| 262 | + |
| 263 | + // Generate summary report |
| 264 | + var summary = new StringBuilder(); |
| 265 | + summary.AppendLine("TMDL Slimmer Results"); |
| 266 | + summary.AppendLine("===================="); |
| 267 | + summary.AppendLine(string.Format("Files processed: {0} of {1}", filesWithContent, tmdlFiles.Length)); |
| 268 | + summary.AppendLine(string.Format("Input size: {0:N1} KB", originalTotalSize / 1024.0)); |
| 269 | + summary.AppendLine(string.Format("Output: {0} ({1:N1} KB)", Path.GetFileName(outputPath), outputSize / 1024.0)); |
| 270 | + summary.AppendLine(string.Format("Size reduction: {0:F1}%", reductionPercent)); |
| 271 | + |
| 272 | + if (removalStats.Count > 0) |
| 273 | + { |
| 274 | + int totalRemovals = 0; |
| 275 | + foreach (int count in removalStats.Values) totalRemovals += count; |
| 276 | + summary.AppendLine(); |
| 277 | + summary.AppendLine(string.Format("Removed {0:N0} items:", totalRemovals)); |
| 278 | + |
| 279 | + var sortedKeys = new List<string>(removalStats.Keys); |
| 280 | + sortedKeys.Sort(); |
| 281 | + foreach (string key in sortedKeys) |
| 282 | + summary.AppendLine(string.Format(" - {0}: {1:N0}", key, removalStats[key])); |
| 283 | + } |
| 284 | + |
| 285 | + Info(summary.ToString()); |
| 286 | +} |
| 287 | +catch (Exception ex) |
| 288 | +{ |
| 289 | + Error("Processing failed: " + ex.Message); |
| 290 | +} |
0 commit comments