2727) ]
2828
2929use clap:: { Parser , ValueEnum } ;
30+ use displaydoc:: Display ;
3031use eyre:: WrapErr ;
3132use icu_provider:: export:: ExportableProvider ;
3233use icu_provider:: hello_world:: HelloWorldV1 ;
@@ -35,9 +36,54 @@ use icu_provider_export::prelude::*;
3536use icu_provider_export:: ExportMetadata ;
3637#[ cfg( feature = "provider" ) ]
3738use icu_provider_source:: SourceDataProvider ;
39+ use regex:: Regex ;
3840use simple_logger:: SimpleLogger ;
3941use std:: collections:: HashMap ;
4042use std:: path:: PathBuf ;
43+ use std:: str:: FromStr ;
44+
45+ #[ derive( Clone ) ]
46+ struct Filter {
47+ domain : String ,
48+ regex : Regex ,
49+ }
50+
51+ #[ derive( Debug , Display ) ]
52+ enum FilterError {
53+ #[ displaydoc( "no filter found. specify one after an =" ) ]
54+ NoFilter ,
55+ #[ displaydoc( "opening / delimiter for regex not found" ) ]
56+ NoOpeningSlash ,
57+ #[ displaydoc( "closing / delimiter for regex not found" ) ]
58+ NoClosingSlash ,
59+ #[ displaydoc( "{0}" ) ]
60+ Regex ( regex:: Error ) ,
61+ }
62+
63+ impl From < regex:: Error > for FilterError {
64+ fn from ( value : regex:: Error ) -> Self {
65+ FilterError :: Regex ( value)
66+ }
67+ }
68+
69+ impl std:: error:: Error for FilterError { }
70+
71+ impl FromStr for Filter {
72+ type Err = FilterError ;
73+ fn from_str ( s : & str ) -> Result < Self , Self :: Err > {
74+ let ( domain, regex) = s. split_once ( '=' ) . ok_or ( FilterError :: NoFilter ) ?;
75+
76+ let regex = regex. strip_prefix ( '/' ) . ok_or ( FilterError :: NoOpeningSlash ) ?;
77+ let regex = regex. strip_suffix ( '/' ) . ok_or ( FilterError :: NoClosingSlash ) ?;
78+
79+ let regex = Regex :: new ( regex) ?;
80+
81+ Ok ( Filter {
82+ domain : domain. to_owned ( ) ,
83+ regex,
84+ } )
85+ }
86+ }
4187
4288#[ derive( Parser ) ]
4389#[ command( name = "icu4x-datagen" ) ]
@@ -169,6 +215,10 @@ struct Cli {
169215 #[ arg( help = "Analyzes the binary and only includes markers that are used by the binary." ) ]
170216 markers_for_bin : Option < PathBuf > ,
171217
218+ #[ arg( long, value_name = "FILTER" ) ]
219+ #[ arg( help = "Filter attributes on markers for a domain. Accepts form `domain=/regex/`." ) ]
220+ attribute_filter : Vec < Filter > ,
221+
172222 #[ arg( long, short, num_args = 0 ..) ]
173223 #[ cfg_attr( feature = "provider" , arg( default_value = "recommended" ) ) ]
174224 #[ arg(
@@ -528,6 +578,21 @@ fn main() -> eyre::Result<()> {
528578 driver. with_segmenter_models ( cli. segmenter_models . clone ( ) )
529579 } ;
530580
581+ let attribute_filters = cli
582+ . attribute_filter
583+ . iter ( )
584+ . fold ( HashMap :: new ( ) , |mut map, filter| {
585+ map. entry ( & filter. domain )
586+ . and_modify ( |v : & mut Vec < _ > | v. push ( filter. regex . clone ( ) ) )
587+ . or_insert_with ( || vec ! [ filter. regex. clone( ) ] ) ;
588+ map
589+ } ) ;
590+ for ( domain, filters) in attribute_filters {
591+ driver = driver. with_marker_attributes_filter ( domain, move |attr| {
592+ filters. iter ( ) . all ( |regex| regex. is_match ( attr) )
593+ } )
594+ }
595+
531596 let metadata: Result < ExportMetadata , DataError > = match cli. format {
532597 #[ cfg( not( feature = "fs_exporter" ) ) ]
533598 Format :: Fs => {
0 commit comments