Skip to content

Commit a564431

Browse files
authored
Add TMDL slimmer
* Upload tmdl_slimmer.csx * Remove StringComparer.OrdinalIgnorCase * Code and comment cleanup
1 parent 7fc0289 commit a564431

File tree

1 file changed

+290
-0
lines changed

1 file changed

+290
-0
lines changed

Intermediate/tmdl_slimmer.csx

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
//
2+
// Title: TMDL Slimmer - Strip metadata bloat for LLM context
3+
//
4+
// Author: Alexis Olson
5+
// Version: 1.0
6+
//
7+
// Description:
8+
// Reads all *.tmdl files from a SemanticModel/definition folder,
9+
// removes UI/engine metadata while preserving model semantics,
10+
// and outputs a single .slimdl file for LLM consumption.
11+
//
12+
// Usage:
13+
// - Run in Tabular Editor 2 or 3 (Advanced Scripting)
14+
// - Select your SemanticModel folder when prompted
15+
// - Choose where to save the output .slimdl file
16+
17+
using System;
18+
using System.Collections.Generic;
19+
using System.IO;
20+
using System.Text;
21+
using System.Text.RegularExpressions;
22+
using System.Windows.Forms;
23+
24+
// ==================== CONFIGURATION ====================
25+
bool REMOVE_Annotations = true; // annotation, changedProperty, extendedProperties
26+
bool REMOVE_Lineage = true; // lineageTag, sourceLineageTag
27+
bool REMOVE_LanguageData = true; // cultures folder, culture, linguisticMetadata
28+
bool REMOVE_ColumnMeta = true; // summarizeBy, sourceColumn, dataCategory (+ select column booleans)
29+
bool REMOVE_InferredMeta = true; // isNameInferred, isDataTypeInferred, sourceProviderType
30+
bool REMOVE_DisplayProps = true; // isHidden, displayFolder, formatString, isDefaultLabel/Image
31+
32+
// ==================== MAIN EXECUTION ====================
33+
try
34+
{
35+
// Select SemanticModel folder
36+
string modelFolder = null;
37+
using (var dialog = new FolderBrowserDialog())
38+
{
39+
dialog.Description = "Select the SemanticModel folder (contains 'definition' subfolder)";
40+
dialog.ShowNewFolderButton = false;
41+
if (dialog.ShowDialog() != DialogResult.OK) return;
42+
modelFolder = dialog.SelectedPath;
43+
}
44+
45+
// Locate definition root - handle both cases: user selected SemanticModel or definition directly
46+
string definitionPath = Path.Combine(modelFolder, "definition");
47+
if (!Directory.Exists(definitionPath))
48+
{
49+
definitionPath = modelFolder; // Fallback: user already selected the definition folder
50+
if (Directory.GetFiles(definitionPath, "*.tmdl", SearchOption.AllDirectories).Length == 0)
51+
{
52+
Info("No TMDL files found in the selected folder.");
53+
return;
54+
}
55+
}
56+
57+
// Build removal patterns based on configuration flags
58+
var patterns = new Dictionary<string, Regex>();
59+
60+
// Common regex components for matching property assignments
61+
string ASSIGN = @"\s*(?:=|:)"; // matches optional whitespace then = or :
62+
string BOOL = @"(?:\s*(?:=|:)\s*(?:true|false))?\s*;?\s*$"; // matches optional boolean and semicolon
63+
64+
// Helper to add patterns when corresponding removal flag is enabled
65+
Action<bool,string,string> Add = (flag, name, pattern) =>
66+
{
67+
if (flag) patterns[name] = new Regex(pattern);
68+
};
69+
70+
// Annotations group
71+
Add(REMOVE_Annotations, "annotation", @"^\s*annotation\b");
72+
Add(REMOVE_Annotations, "changedProperty", @"^\s*changedProperty\b");
73+
Add(REMOVE_Annotations, "extendedProperties", @"^\s*extendedProperties" + ASSIGN + @"\s*\{?");
74+
75+
// Lineage tracking group
76+
Add(REMOVE_Lineage, "lineageTag", @"^\s*lineageTag" + ASSIGN);
77+
Add(REMOVE_Lineage, "sourceLineageTag", @"^\s*sourceLineageTag" + ASSIGN);
78+
79+
// Language/culture group
80+
Add(REMOVE_LanguageData, "culture", @"^\s*culture" + ASSIGN);
81+
Add(REMOVE_LanguageData, "refCulture", @"^\s*ref\s+cultureInfo\b");
82+
Add(REMOVE_LanguageData, "linguisticMetadata", @"^\s*linguisticMetadata" + ASSIGN + @"\s*\{?");
83+
84+
// Column metadata group
85+
Add(REMOVE_ColumnMeta, "dataCategory", @"^\s*dataCategory" + ASSIGN);
86+
Add(REMOVE_ColumnMeta, "summarizeBy", @"^\s*summarizeBy" + ASSIGN);
87+
Add(REMOVE_ColumnMeta, "sourceColumn", @"^\s*sourceColumn" + ASSIGN);
88+
Add(REMOVE_ColumnMeta, "isAvailableInMdx", @"^\s*isAvailableInMdx" + BOOL);
89+
Add(REMOVE_ColumnMeta, "isNullable", @"^\s*isNullable" + BOOL);
90+
91+
// Inferred metadata group
92+
Add(REMOVE_InferredMeta, "isNameInferred", @"^\s*isNameInferred" + BOOL);
93+
Add(REMOVE_InferredMeta, "isDataTypeInferred", @"^\s*isDataTypeInferred" + BOOL);
94+
Add(REMOVE_InferredMeta, "sourceProviderType", @"^\s*sourceProviderType" + ASSIGN);
95+
96+
// Display/UI properties group
97+
Add(REMOVE_DisplayProps, "isHidden", @"^\s*isHidden" + BOOL);
98+
Add(REMOVE_DisplayProps, "displayFolder", @"^\s*displayFolder" + ASSIGN);
99+
Add(REMOVE_DisplayProps, "formatString", @"^\s*formatString" + ASSIGN);
100+
Add(REMOVE_DisplayProps, "isDefaultLabel", @"^\s*isDefaultLabel" + BOOL);
101+
Add(REMOVE_DisplayProps, "isDefaultImage", @"^\s*isDefaultImage" + BOOL);
102+
103+
// Identify patterns that start multi-line blocks (need brace tracking)
104+
var blockStarters = new HashSet<string>();
105+
if (REMOVE_LanguageData) blockStarters.Add("linguisticMetadata");
106+
if (REMOVE_Annotations) blockStarters.Add("extendedProperties");
107+
108+
// Track removal statistics for summary report
109+
var removalStats = new Dictionary<string, int>();
110+
111+
// Small helper to increment removal counters deterministically
112+
Action<string> Bump = key =>
113+
{
114+
int v;
115+
if (!removalStats.TryGetValue(key, out v)) v = 0; removalStats[key] = v + 1;
116+
};
117+
118+
// Collect all TMDL files recursively
119+
string[] tmdlFiles = Directory.GetFiles(definitionPath, "*.tmdl", SearchOption.AllDirectories);
120+
Array.Sort(tmdlFiles);
121+
if (tmdlFiles.Length == 0)
122+
{
123+
Info("No TMDL files found in the selected folder.");
124+
return;
125+
}
126+
127+
// Initialize output with header
128+
var output = new StringBuilder();
129+
output.AppendLine("// Combined TMDL (Slim)");
130+
output.AppendLine("// Source: " + Path.GetFileName(modelFolder));
131+
output.AppendLine("// Generated: " + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"));
132+
133+
long originalTotalSize = 0;
134+
int filesWithContent = 0;
135+
136+
// Calculate base path for relative file names (normalize with trailing separator)
137+
string definitionBasePath = definitionPath.TrimEnd('\\', '/') + Path.DirectorySeparatorChar;
138+
139+
// Process each TMDL file
140+
foreach (string filePath in tmdlFiles)
141+
{
142+
// Calculate relative path from definition root
143+
string relativePath = filePath.StartsWith(definitionBasePath)
144+
? filePath.Substring(definitionBasePath.Length)
145+
: Path.GetFileName(filePath);
146+
relativePath = relativePath.Replace('\\', '/');
147+
148+
// Skip entire cultures/ subtree when language data removal is enabled
149+
if (REMOVE_LanguageData && relativePath.StartsWith("cultures/"))
150+
{
151+
Bump("cultures-folder");
152+
continue;
153+
}
154+
155+
// Read file content and track original size
156+
string content = File.ReadAllText(filePath, Encoding.UTF8);
157+
originalTotalSize += new FileInfo(filePath).Length;
158+
159+
// Process content line by line
160+
string[] contentLines = content.Split(new[] { "\r\n", "\n", "\r" }, StringSplitOptions.None);
161+
162+
// State tracking for multi-line block removal
163+
bool inSkippedBlock = false;
164+
int blockBraceDepth = 0;
165+
bool fileHasOutput = false;
166+
167+
foreach (string line in contentLines)
168+
{
169+
// Handle multi-line block skipping (tracks nested braces)
170+
if (inSkippedBlock)
171+
{
172+
blockBraceDepth += line.Split('{').Length - 1;
173+
blockBraceDepth -= line.Split('}').Length - 1;
174+
if (blockBraceDepth <= 0)
175+
{
176+
inSkippedBlock = false;
177+
blockBraceDepth = 0;
178+
continue; // Don't output closing brace line that ended the block
179+
}
180+
continue; // Continue skipping lines inside the block
181+
}
182+
183+
// Check if current line matches any removal pattern
184+
bool shouldRemoveLine = false;
185+
foreach (var patternEntry in patterns)
186+
{
187+
if (patternEntry.Value.IsMatch(line))
188+
{
189+
// Check if this starts a multi-line block that needs brace tracking
190+
if (blockStarters.Contains(patternEntry.Key))
191+
{
192+
Bump(patternEntry.Key);
193+
194+
// Initialize brace tracking for this block
195+
blockBraceDepth = line.Split('{').Length - line.Split('}').Length;
196+
inSkippedBlock = true;
197+
shouldRemoveLine = true;
198+
break;
199+
}
200+
else
201+
{
202+
// Single-line removal
203+
Bump(patternEntry.Key);
204+
shouldRemoveLine = true;
205+
break;
206+
}
207+
}
208+
}
209+
210+
if (!shouldRemoveLine)
211+
{
212+
// Skip pure whitespace lines to reduce output bloat
213+
if (string.IsNullOrWhiteSpace(line))
214+
continue;
215+
216+
// Keep the line (trim trailing whitespace for consistency)
217+
output.AppendLine(line.TrimEnd());
218+
fileHasOutput = true;
219+
}
220+
}
221+
222+
if (fileHasOutput)
223+
{
224+
filesWithContent++;
225+
output.AppendLine(); // Ensure separation between files
226+
}
227+
}
228+
229+
// Squeeze excessive blank lines to maximum of one blank line
230+
string finalOutput = Regex.Replace(output.ToString(), @"(\r?\n){3,}", Environment.NewLine + Environment.NewLine);
231+
finalOutput = finalOutput.TrimEnd() + Environment.NewLine; // Ensure file ends with newline
232+
233+
// Get output path via save dialog
234+
var parentDir = Directory.GetParent(modelFolder);
235+
string suggestedPath = Path.Combine(parentDir != null ? parentDir.FullName : modelFolder,
236+
Path.GetFileName(modelFolder) + ".slimdl");
237+
238+
string outputPath;
239+
using (var saveDialog = new SaveFileDialog())
240+
{
241+
saveDialog.Title = "Save slimmed TMDL";
242+
saveDialog.Filter = "Slimmed TMDL (*.slimdl)|*.slimdl|TMDL files (*.tmdl)|*.tmdl|All files (*.*)|*.*";
243+
saveDialog.DefaultExt = "slimdl";
244+
saveDialog.AddExtension = true;
245+
saveDialog.FileName = Path.GetFileName(suggestedPath);
246+
saveDialog.InitialDirectory = Path.GetDirectoryName(suggestedPath);
247+
saveDialog.OverwritePrompt = true;
248+
saveDialog.CheckPathExists = true;
249+
250+
if (saveDialog.ShowDialog() != DialogResult.OK) return;
251+
outputPath = saveDialog.FileName;
252+
}
253+
254+
// Write the combined, slimmed TMDL
255+
File.WriteAllText(outputPath, finalOutput, new UTF8Encoding(false));
256+
257+
// Calculate size reduction metrics
258+
long outputSize = new FileInfo(outputPath).Length;
259+
double reductionPercent = (originalTotalSize > 0)
260+
? (1.0 - (double)outputSize / (double)originalTotalSize) * 100.0
261+
: 0.0;
262+
263+
// Generate summary report
264+
var summary = new StringBuilder();
265+
summary.AppendLine("TMDL Slimmer Results");
266+
summary.AppendLine("====================");
267+
summary.AppendLine(string.Format("Files processed: {0} of {1}", filesWithContent, tmdlFiles.Length));
268+
summary.AppendLine(string.Format("Input size: {0:N1} KB", originalTotalSize / 1024.0));
269+
summary.AppendLine(string.Format("Output: {0} ({1:N1} KB)", Path.GetFileName(outputPath), outputSize / 1024.0));
270+
summary.AppendLine(string.Format("Size reduction: {0:F1}%", reductionPercent));
271+
272+
if (removalStats.Count > 0)
273+
{
274+
int totalRemovals = 0;
275+
foreach (int count in removalStats.Values) totalRemovals += count;
276+
summary.AppendLine();
277+
summary.AppendLine(string.Format("Removed {0:N0} items:", totalRemovals));
278+
279+
var sortedKeys = new List<string>(removalStats.Keys);
280+
sortedKeys.Sort();
281+
foreach (string key in sortedKeys)
282+
summary.AppendLine(string.Format(" - {0}: {1:N0}", key, removalStats[key]));
283+
}
284+
285+
Info(summary.ToString());
286+
}
287+
catch (Exception ex)
288+
{
289+
Error("Processing failed: " + ex.Message);
290+
}

0 commit comments

Comments
 (0)