22// Title: TMDL Slimmer - Strip metadata bloat for LLM context
33//
44// Author: Alexis Olson
5- // Version: 1.0
5+ // Version: 1.1
66//
77// Description:
88// Reads all *.tmdl files from a SemanticModel/definition folder,
@@ -22,9 +22,9 @@ using System.Text.RegularExpressions;
2222using System . Windows . Forms ;
2323
2424// ==================== CONFIGURATION ====================
25- bool REMOVE_Annotations = true ; // annotation, changedProperty, extendedProperties
25+ bool REMOVE_Annotations = true ; // annotation, changedProperty, extendedProperty/ extendedProperties
2626bool REMOVE_Lineage = true ; // lineageTag, sourceLineageTag
27- bool REMOVE_LanguageData = true ; // cultures folder, culture, linguisticMetadata
27+ bool REMOVE_LanguageData = true ; // cultures folder (includes linguisticMetadata)
2828bool REMOVE_ColumnMeta = true ; // summarizeBy, sourceColumn, dataCategory (+ select column booleans)
2929bool REMOVE_InferredMeta = true ; // isNameInferred, isDataTypeInferred, sourceProviderType
3030bool REMOVE_DisplayProps = true ; // isHidden, displayFolder, formatString, isDefaultLabel/Image
7070 // Annotations group
7171 Add ( REMOVE_Annotations , "annotation" , @"^\s*annotation\b" ) ;
7272 Add ( REMOVE_Annotations , "changedProperty" , @"^\s*changedProperty\b" ) ;
73- Add ( REMOVE_Annotations , "extendedProperties " , @"^\s*extendedProperties" + ASSIGN + @" \s*\{? ") ;
73+ Add ( REMOVE_Annotations , "extendedProperty " , @"^ \s*extendedPropert(?:y|ies)\b ") ;
7474
7575 // Lineage tracking group
7676 Add ( REMOVE_Lineage , "lineageTag" , @"^\s*lineageTag" + ASSIGN ) ;
7777 Add ( REMOVE_Lineage , "sourceLineageTag" , @"^\s*sourceLineageTag" + ASSIGN ) ;
7878
79- // Language/culture group
80- Add ( REMOVE_LanguageData , "culture" , @"^\s*culture" + ASSIGN ) ;
81- Add ( REMOVE_LanguageData , "refCulture" , @"^\s*ref\s+cultureInfo\b" ) ;
82- Add ( REMOVE_LanguageData , "linguisticMetadata" , @"^\s*linguisticMetadata" + ASSIGN + @"\s*\{?" ) ;
83-
8479 // Column metadata group
8580 Add ( REMOVE_ColumnMeta , "dataCategory" , @"^\s*dataCategory" + ASSIGN ) ;
8681 Add ( REMOVE_ColumnMeta , "summarizeBy" , @"^\s*summarizeBy" + ASSIGN ) ;
10297
10398 // Identify patterns that start multi-line blocks (need brace tracking)
10499 var blockStarters = new HashSet < string > ( ) ;
105- if ( REMOVE_LanguageData ) blockStarters . Add ( "linguisticMetadata" ) ;
106- if ( REMOVE_Annotations ) blockStarters . Add ( "extendedProperties" ) ;
100+ if ( REMOVE_Annotations ) {
101+ blockStarters . Add ( "extendedProperty" ) ;
102+ }
107103
108104 // Track removal statistics for summary report
109105 var removalStats = new Dictionary < string , int > ( ) ;
131127 output . AppendLine ( "// Generated: " + DateTime . Now . ToString ( "yyyy-MM-dd HH:mm:ss" ) ) ;
132128
133129 long originalTotalSize = 0 ;
130+ long culturesBytesRemoved = 0 ; // bytes saved by excluding cultures/ folder
131+ int culturesFilesSkipped = 0 ; // number of cultures/ tmdl files skipped
134132 int filesWithContent = 0 ;
135133
136134 // Calculate base path for relative file names (normalize with trailing separator)
@@ -145,16 +143,20 @@ try
145143 : Path . GetFileName ( filePath ) ;
146144 relativePath = relativePath . Replace ( '\\ ' , '/' ) ;
147145
146+ // Include every file's size in input total, even if we skip its content later
147+ long fileSize = new FileInfo ( filePath ) . Length ;
148+ originalTotalSize += fileSize ;
149+
148150 // Skip entire cultures/ subtree when language data removal is enabled
149151 if ( REMOVE_LanguageData && relativePath . StartsWith ( "cultures/" ) )
150152 {
151- Bump ( "cultures-folder" ) ;
153+ culturesBytesRemoved += fileSize ; // track savings from cultures folder
154+ culturesFilesSkipped ++ ;
152155 continue ;
153156 }
154157
155- // Read file content and track original size
158+ // Read file content
156159 string content = File . ReadAllText ( filePath , Encoding . UTF8 ) ;
157- originalTotalSize += new FileInfo ( filePath ) . Length ;
158160
159161 // Process content line by line
160162 string [ ] contentLines = content . Split ( new [ ] { "\r \n " , "\n " , "\r " } , StringSplitOptions . None ) ;
@@ -265,15 +267,20 @@ try
265267 summary . AppendLine ( "TMDL Slimmer Results" ) ;
266268 summary . AppendLine ( "====================" ) ;
267269 summary . AppendLine ( string . Format ( "Files processed: {0} of {1}" , filesWithContent , tmdlFiles . Length ) ) ;
270+ if ( culturesFilesSkipped > 0 )
271+ summary . AppendLine ( string . Format ( "Culture files not processed: {0}" , culturesFilesSkipped ) ) ;
268272 summary . AppendLine ( string . Format ( "Input size: {0:N1} KB" , originalTotalSize / 1024.0 ) ) ;
269- summary . AppendLine ( string . Format ( "Output: {0} ({1 :N1} KB)" , Path . GetFileName ( outputPath ) , outputSize / 1024.0 ) ) ;
273+ summary . AppendLine ( string . Format ( "Output size : {0:N1} KB" , outputSize / 1024.0 ) ) ;
270274 summary . AppendLine ( string . Format ( "Size reduction: {0:F1}%" , reductionPercent ) ) ;
271275
272276 if ( removalStats . Count > 0 )
273277 {
274278 int totalRemovals = 0 ;
275279 foreach ( int count in removalStats . Values ) totalRemovals += count ;
276280 summary . AppendLine ( ) ;
281+ if ( culturesBytesRemoved > 0 )
282+ summary . AppendLine ( string . Format ( "Removed cultures folder: {0:N1} KB" , culturesBytesRemoved / 1024.0 ) ) ;
283+ summary . AppendLine ( ) ;
277284 summary . AppendLine ( string . Format ( "Removed {0:N0} items:" , totalRemovals ) ) ;
278285
279286 var sortedKeys = new List < string > ( removalStats . Keys ) ;
0 commit comments