Skip to content

Commit 5d1cca2

Browse files
committed
Updated culture and extendedProperty handling
1 parent a564431 commit 5d1cca2

File tree

1 file changed

+22
-15
lines changed

1 file changed

+22
-15
lines changed

Intermediate/tmdl_slimmer.csx

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// Title: TMDL Slimmer - Strip metadata bloat for LLM context
33
//
44
// Author: Alexis Olson
5-
// Version: 1.0
5+
// Version: 1.1
66
//
77
// Description:
88
// Reads all *.tmdl files from a SemanticModel/definition folder,
@@ -22,9 +22,9 @@ using System.Text.RegularExpressions;
2222
using System.Windows.Forms;
2323

2424
// ==================== CONFIGURATION ====================
25-
bool REMOVE_Annotations = true; // annotation, changedProperty, extendedProperties
25+
bool REMOVE_Annotations = true; // annotation, changedProperty, extendedProperty/extendedProperties
2626
bool REMOVE_Lineage = true; // lineageTag, sourceLineageTag
27-
bool REMOVE_LanguageData = true; // cultures folder, culture, linguisticMetadata
27+
bool REMOVE_LanguageData = true; // cultures folder (includes linguisticMetadata)
2828
bool REMOVE_ColumnMeta = true; // summarizeBy, sourceColumn, dataCategory (+ select column booleans)
2929
bool REMOVE_InferredMeta = true; // isNameInferred, isDataTypeInferred, sourceProviderType
3030
bool REMOVE_DisplayProps = true; // isHidden, displayFolder, formatString, isDefaultLabel/Image
@@ -70,17 +70,12 @@ try
7070
// Annotations group
7171
Add(REMOVE_Annotations, "annotation", @"^\s*annotation\b");
7272
Add(REMOVE_Annotations, "changedProperty", @"^\s*changedProperty\b");
73-
Add(REMOVE_Annotations, "extendedProperties", @"^\s*extendedProperties" + ASSIGN + @"\s*\{?");
73+
Add(REMOVE_Annotations, "extendedProperty", @"^\s*extendedPropert(?:y|ies)\b");
7474

7575
// Lineage tracking group
7676
Add(REMOVE_Lineage, "lineageTag", @"^\s*lineageTag" + ASSIGN);
7777
Add(REMOVE_Lineage, "sourceLineageTag", @"^\s*sourceLineageTag" + ASSIGN);
7878

79-
// Language/culture group
80-
Add(REMOVE_LanguageData, "culture", @"^\s*culture" + ASSIGN);
81-
Add(REMOVE_LanguageData, "refCulture", @"^\s*ref\s+cultureInfo\b");
82-
Add(REMOVE_LanguageData, "linguisticMetadata", @"^\s*linguisticMetadata" + ASSIGN + @"\s*\{?");
83-
8479
// Column metadata group
8580
Add(REMOVE_ColumnMeta, "dataCategory", @"^\s*dataCategory" + ASSIGN);
8681
Add(REMOVE_ColumnMeta, "summarizeBy", @"^\s*summarizeBy" + ASSIGN);
@@ -102,8 +97,9 @@ try
10297

10398
// Identify patterns that start multi-line blocks (need brace tracking)
10499
var blockStarters = new HashSet<string>();
105-
if (REMOVE_LanguageData) blockStarters.Add("linguisticMetadata");
106-
if (REMOVE_Annotations) blockStarters.Add("extendedProperties");
100+
if (REMOVE_Annotations) {
101+
blockStarters.Add("extendedProperty");
102+
}
107103

108104
// Track removal statistics for summary report
109105
var removalStats = new Dictionary<string, int>();
@@ -131,6 +127,8 @@ try
131127
output.AppendLine("// Generated: " + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"));
132128

133129
long originalTotalSize = 0;
130+
long culturesBytesRemoved = 0; // bytes saved by excluding cultures/ folder
131+
int culturesFilesSkipped = 0; // number of cultures/ tmdl files skipped
134132
int filesWithContent = 0;
135133

136134
// Calculate base path for relative file names (normalize with trailing separator)
@@ -145,16 +143,20 @@ try
145143
: Path.GetFileName(filePath);
146144
relativePath = relativePath.Replace('\\', '/');
147145

146+
// Include every file's size in input total, even if we skip its content later
147+
long fileSize = new FileInfo(filePath).Length;
148+
originalTotalSize += fileSize;
149+
148150
// Skip entire cultures/ subtree when language data removal is enabled
149151
if (REMOVE_LanguageData && relativePath.StartsWith("cultures/"))
150152
{
151-
Bump("cultures-folder");
153+
culturesBytesRemoved += fileSize; // track savings from cultures folder
154+
culturesFilesSkipped++;
152155
continue;
153156
}
154157

155-
// Read file content and track original size
158+
// Read file content
156159
string content = File.ReadAllText(filePath, Encoding.UTF8);
157-
originalTotalSize += new FileInfo(filePath).Length;
158160

159161
// Process content line by line
160162
string[] contentLines = content.Split(new[] { "\r\n", "\n", "\r" }, StringSplitOptions.None);
@@ -265,15 +267,20 @@ try
265267
summary.AppendLine("TMDL Slimmer Results");
266268
summary.AppendLine("====================");
267269
summary.AppendLine(string.Format("Files processed: {0} of {1}", filesWithContent, tmdlFiles.Length));
270+
if (culturesFilesSkipped > 0)
271+
summary.AppendLine(string.Format("Culture files not processed: {0}", culturesFilesSkipped));
268272
summary.AppendLine(string.Format("Input size: {0:N1} KB", originalTotalSize / 1024.0));
269-
summary.AppendLine(string.Format("Output: {0} ({1:N1} KB)", Path.GetFileName(outputPath), outputSize / 1024.0));
273+
summary.AppendLine(string.Format("Output size: {0:N1} KB", outputSize / 1024.0));
270274
summary.AppendLine(string.Format("Size reduction: {0:F1}%", reductionPercent));
271275

272276
if (removalStats.Count > 0)
273277
{
274278
int totalRemovals = 0;
275279
foreach (int count in removalStats.Values) totalRemovals += count;
276280
summary.AppendLine();
281+
if (culturesBytesRemoved > 0)
282+
summary.AppendLine(string.Format("Removed cultures folder: {0:N1} KB", culturesBytesRemoved / 1024.0));
283+
summary.AppendLine();
277284
summary.AppendLine(string.Format("Removed {0:N0} items:", totalRemovals));
278285

279286
var sortedKeys = new List<string>(removalStats.Keys);

0 commit comments

Comments
 (0)