From 2c1412e05c19345ad1a627e23aa6631b27ec1f66 Mon Sep 17 00:00:00 2001 From: Vasilev Date: Wed, 19 Jul 2023 11:09:35 +0000 Subject: [PATCH 01/49] changed logging --- lightautoml/automl/blend.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lightautoml/automl/blend.py b/lightautoml/automl/blend.py index f5bea128..d0c78615 100644 --- a/lightautoml/automl/blend.py +++ b/lightautoml/automl/blend.py @@ -361,11 +361,14 @@ def _optimize(self, splitted_preds: Sequence[NumpyDataset]) -> np.ndarray: length = len(splitted_preds) candidate = np.ones(length, dtype=np.float32) / length + pre_candidate = candidate best_pred = self._get_weighted_pred(splitted_preds, candidate) best_score = self.score(best_pred) logger.info("Blending: optimization starts with equal weights and score \x1b[1m{0}\x1b[0m".format(best_score)) score = best_score + iter_best_score = None + iter_best_weights = None for _ in range(self.max_iters): flg_no_upd = True for i in range(len(splitted_preds)): @@ -381,17 +384,21 @@ def _optimize(self, splitted_preds: Sequence[NumpyDataset]) -> np.ndarray: ) w = opt_res.x score = -opt_res.fun + pre_candidate = self._get_candidate(candidate, i, w) + if i == 0 or iter_best_score < score: + iter_best_score = score + iter_best_weights = pre_candidate if score > best_score: flg_no_upd = False best_score = score # if w < self.max_nonzero_coef: # w = 0 - candidate = self._get_candidate(candidate, i, w) + candidate = pre_candidate logger.info( "Blending: iteration \x1b[1m{0}\x1b[0m: score = \x1b[1m{1}\x1b[0m, weights = \x1b[1m{2}\x1b[0m".format( - _, score, candidate + _, iter_best_score, iter_best_weights ) ) From bb955756574ee62fbab5f658fcda49a0dd85ceb1 Mon Sep 17 00:00:00 2001 From: Vasilev Date: Wed, 19 Jul 2023 11:44:52 +0000 Subject: [PATCH 02/49] lint fix --- lightautoml/addons/autots/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lightautoml/addons/autots/base.py b/lightautoml/addons/autots/base.py index 5cc8cea1..d84ac2ba 100644 --- a/lightautoml/addons/autots/base.py +++ b/lightautoml/addons/autots/base.py @@ -192,7 +192,8 @@ def fit_predict(self, train_data, roles, verbose=0): if hasattr(self.TM, "automl_trend"): self.datetime_step = ( - pd.to_datetime(train_data[self.datetime_key]).iloc[1] - pd.to_datetime(train_data[self.datetime_key]).iloc[0] + pd.to_datetime(train_data[self.datetime_key]).iloc[1] + - pd.to_datetime(train_data[self.datetime_key]).iloc[0] ) # fit main train_detrend = train_data.copy() From 3d76b96bdb80ae11a074fc1884145697b58388ad Mon Sep 17 00:00:00 2001 From: Vasilev Date: Wed, 19 Jul 2023 11:09:35 +0000 Subject: [PATCH 03/49] changed logging --- lightautoml/automl/blend.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lightautoml/automl/blend.py b/lightautoml/automl/blend.py index f5bea128..d0c78615 100644 --- a/lightautoml/automl/blend.py +++ b/lightautoml/automl/blend.py @@ -361,11 +361,14 @@ def _optimize(self, splitted_preds: Sequence[NumpyDataset]) -> np.ndarray: length = len(splitted_preds) candidate = np.ones(length, dtype=np.float32) / length + pre_candidate = candidate best_pred = self._get_weighted_pred(splitted_preds, candidate) best_score = self.score(best_pred) logger.info("Blending: optimization starts with equal weights and score \x1b[1m{0}\x1b[0m".format(best_score)) score = best_score + iter_best_score = None + iter_best_weights = None for _ in range(self.max_iters): flg_no_upd = True for i in range(len(splitted_preds)): @@ -381,17 +384,21 @@ def _optimize(self, splitted_preds: Sequence[NumpyDataset]) -> np.ndarray: ) w = opt_res.x score = -opt_res.fun + pre_candidate = self._get_candidate(candidate, i, w) + if i == 0 or iter_best_score < score: + iter_best_score = score + iter_best_weights = pre_candidate if score > best_score: flg_no_upd = False best_score = score # if w < self.max_nonzero_coef: # w = 0 - candidate = self._get_candidate(candidate, i, w) + candidate = pre_candidate logger.info( "Blending: iteration \x1b[1m{0}\x1b[0m: score = \x1b[1m{1}\x1b[0m, weights = \x1b[1m{2}\x1b[0m".format( - _, score, candidate + _, iter_best_score, iter_best_weights ) ) From 697ebfd013a681520e3f594008eb2d23a891738b Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 25 Jul 2023 10:25:13 +0000 Subject: [PATCH 04/49] Added timm cv-library, bug-fix in multilabel linear model, added softmax with clip to repair warning in log_loss from sklearn --- lightautoml/automl/presets/image_config.yml | 145 +++++++++++++++++- lightautoml/image/image.py | 78 ++++------ lightautoml/ml_algo/linear_sklearn.py | 5 +- .../ml_algo/torch_based/linear_model.py | 14 +- lightautoml/ml_algo/utils.py | 35 +++++ .../pipelines/features/image_pipeline.py | 4 +- lightautoml/tasks/common_metric.py | 6 +- lightautoml/transformers/image.py | 9 +- pyproject.toml | 10 +- 9 files changed, 232 insertions(+), 74 deletions(-) diff --git a/lightautoml/automl/presets/image_config.yml b/lightautoml/automl/presets/image_config.yml index 7d4d17dc..9f0ee138 100644 --- a/lightautoml/automl/presets/image_config.yml +++ b/lightautoml/automl/presets/image_config.yml @@ -167,7 +167,7 @@ cv_simple_features: autocv_features: # model name from effnet family - embed_model: 'efficientnet-b0' + embed_model: 'vit_base_patch16_224.augreg_in21k' weights_path: null # directory for save / load cache cache_dir: './cache_CV' @@ -175,6 +175,147 @@ autocv_features: device: 'cuda:0' n_jobs: 4 random_state: 42 - is_advprop: True batch_size: 128 verbose: True + + +nn_pipeline_params: + # use quantile transformer for numerical columns + use_qnt: false + # number of quantiles to be computed + n_quantiles: null + # maximum number of samples used to estimate the quantiles for computational efficiency + subsample: 1000000000 + # marginal distribution for the transformed data. The choices are 'uniform' or 'normal' + output_distribution: normal + # add noise with certain std to dataset before quantile transformation to make data more smooth + noise: 0.001 + # if number of quantiles is none then it equals dataset size / factor + qnt_factor: 30 + # use target encoding for categorical columns + use_te: false + # max number of categories to generate intersections + top_intersections: 5 + max_bin_count: 10 + # max depth of cat intersection + max_intersection_depth: 3 + # subsample to calc data statistics + te_subsample: null + # should we output sparse if ohe encoding was used during cat handling + sparse_ohe: auto + # switch to target encoding if high cardinality + auto_unique_co: 50 + # output encoded categories or embed idxs + output_categories: true + # cutoff if use target encoding in cat handling on multiclass task if number of classes is high + multiclass_te_co: 3 + + + +nn_params: + # Look for NN train params here. + # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn'] or custom torch model + model: denselight + # use model with custom embeddings + model_with_emb: false + # tune custom network + tuned: false + # fewf + optimization_search_space: null + # str in torch.nn loss functions or nn.Module or func with (y_pred, y_true) args + loss: null + loss_params: {} + # calculate loss on logits or on predictions of model for classification tasks + loss_on_logits: true + # clip gradient before loss backprop + clip_grad: false + clip_grad_params: {} + drop_rate: 0.1 + # add fc layer before model with certain dim + num_init_features: null + # activation function (str in torch.nn activation functions or custom nn.Module) + act_fun: ReLU + # add noise after dropout layer for more regularization + use_noise: false + # noise parameter + noise_std: 0.05 + # use BatchNorm + use_bn: true + # define hidden layer dimensions for models in ['mlp', 'denselight', 'snn'] + hidden_size: [512, 512, 512] + # dim of intermediate fc is increased times this factor in ResnetModel layer + hid_factor: [2, 2] + # list of number of layers within each DenseModel block + block_config: [2, 2] + # portion of neuron to drop after DenseBlock + compression: 0.5 + # output dim of every DenseLayer + growth_size: 256 + # dim of intermediate fc is increased times this factor in DenseModel layer + bn_factor: 2 + # early stopping and scheduler use metric + stop_by_metric: false + random_state: 42 + # path to save model state + # if None: stay in memory (CPU) + path_to_save: null + # optimizer + opt: Adam + # params of optimizer + opt_params: { 'lr': 0.0003, 'weight_decay': 0 } + # scheduler + sch: ReduceLROnPlateau + # params of ReduceLROnPlateau scheduler + scheduler_params: { 'patience': 5, 'factor': 0.5, 'min_lr': 0.00001 } + # using snapshot ensembles + # https://arxiv.org/abs/1704.00109 + is_snap: false + # params of snapshots: + # k - number of best snapshots (in terms of loss) + # early_stopping - use early stopping + # patience - early_stopping patience + # swa - stochastic weight average - averaging of snapshots weights and replace base model + # https://pytorch.org/blog/stochastic-weight-averaging-in-pytorch/ for idea details (different implementation) + # use swa with disabled is_snap + snap_params: { 'k': 3, 'early_stopping': True, 'patience': 10, 'swa': True } + # init last linear layer: + # zeros for weights, mean value for bias in regression, inverse sigmoid mean for binary, argmax for multiclass + init_bias: true + # verbose and create snapshots inside one training epoch every k steps + verbose_inside: null + # verbose every k epochs + verbose: 1 + # show progress bar for each epoch during batchwise training + verbose_bar: false + n_epochs: 50 + input_bn: False + emb_dropout: 0.1 + emb_ratio: 3 + max_emb_size: 256 + use_cont: true + use_cat: true + use_text: false + #set cudnn backend + deterministic: true + # use DP for model training + # currently, must be set to FALSE value + multigpu: false + # device + device: cuda:0 + # use defualt dataset config or custom torch dataset + dataset: UniversalDataset + pin_memory: false + # training and inference batch size + bs: 512 + num_workers: 0 + + tuning_params: + # pretrain tuner on holdout set. True - fast/ False - accurate + # Ex. if you have 5-fold cv, validate tuner only on 1 fold + fit_on_holdout: True + # max tuning iter for lightgbm. Auto - depends on dataset + # smaller dataset gets more iters (int or 'auto') + max_tuning_iter: 25 + # max tuning time. Tuning time might be set lower during train by automl's timer, but cannot be higher + max_tuning_time: 3600 + freeze_defaults: False diff --git a/lightautoml/image/image.py b/lightautoml/image/image.py index 4950dc3c..44a0cfb2 100644 --- a/lightautoml/image/image.py +++ b/lightautoml/image/image.py @@ -22,20 +22,14 @@ try: - from albumentations import Compose - from albumentations import Normalize - from albumentations import Resize - from albumentations.pytorch import ToTensorV2 -except: - import warnings + import timm - warnings.warn("'albumentations' - package isn't installed") -try: - from efficientnet_pytorch import EfficientNet + from timm.data import resolve_data_config + from timm.data.transforms_factory import create_transform except: import warnings - warnings.warn("'efficientnet_pytorch' - package isn't installed") + warnings.warn("'timm' - package isn't installed") from joblib import Parallel from joblib import delayed @@ -171,39 +165,31 @@ def transform(self, samples: Sequence[str]) -> np.ndarray: return np.vstack(res) -class EffNetImageEmbedder(nn.Module): - """Class to compute EfficientNet embeddings.""" +class TimmModelEmbedder(nn.Module): + """Class to compute TimmModels embeddings.""" def __init__( self, - model_name: str = "efficientnet-b0", + model_name: str = "efficientnet_b0.ra_in1k", weights_path: Optional[str] = None, - is_advprop: bool = True, device=torch.device("cuda:0"), ): - """Pytorch module for image embeddings based on efficient-net model. + """Pytorch module for image embeddings based on timm models. Args: model_name: Name of effnet model. weights_path: Path to saved weights. - is_advprop: Use adversarial training. device: Device to use. """ - super(EffNetImageEmbedder, self).__init__() + super(TimmModelEmbedder, self).__init__() self.device = device self.model = ( - EfficientNet.from_pretrained( - model_name, - weights_path=weights_path, - advprop=is_advprop, - include_top=False, - ) + timm.create_model(model_name, pretrained=True, num_classes=0, checkpoint_path=weights_path) .eval() .to(self.device) ) self.feature_shape = self.get_shape() - self.is_advprop = is_advprop self.model_name = model_name @torch.no_grad() @@ -219,85 +205,77 @@ def get_shape(self) -> int: def forward(self, x) -> torch.Tensor: """Forward pass.""" out = self.model(x) - return out[:, :, 0, 0] + return out -class ImageDataset: - """Image Dataset Class.""" +class ImageTimmDataset: + """Image for Timm Dataset Class.""" def __init__( self, + model: TimmModelEmbedder, data: Sequence[str], - is_advprop: bool = True, loader: Callable = pil_loader, ): - """Pytorch Dataset for :class:`~lightautoml.image.EffNetImageEmbedder`. + """Pytorch Dataset for :class:`~lightautoml.image.TimmModelEmbedder`. Args: + model: model which we train. data: Sequence of paths. - is_advprop: Use adversarial training. loader: Callable for reading image from path. """ self.X = data - self.transforms = Compose( - [ - Resize(224, 224), - Normalize([0.5] * 3, [0.5] * 3) if is_advprop else Normalize(), - ToTensorV2(), - ] - ) + self.transforms = create_transform(**resolve_data_config(model.model.pretrained_cfg, model=model.model)) self.loader = loader def __getitem__(self, idx: int) -> np.ndarray: path = self.X[idx] - img = np.array(self.loader(path)) - img = self.transforms(image=img)["image"] + img = self.loader(path) + img = self.transforms(img) return img def __len__(self): return len(self.X) -class DeepImageEmbedder(TransformerMixin): - """Transformer for image embeddings.""" +class DeepTimmImageEmbedder(TransformerMixin): + """Timm Transformer for image embeddings.""" def __init__( self, device: torch.device = torch.device("cuda:0"), n_jobs=4, random_state=42, - is_advprop=True, - model_name="efficientnet-b0", + model_name="efficientnet_b0.ra_in1k", weights_path: Optional[str] = None, batch_size: int = 128, verbose: bool = True, ): - """Pytorch Dataset for :class:`~lightautoml.image.EffNetImageEmbedder`. + """Pytorch Dataset for :class:`~lightautoml.image.TimmModelEmbedder`. Args: device: Torch device. n_jobs: Number of threads for dataloader. random_state: Random seed. - is_advprop: Use adversarial training. model_name: Name of effnet model. weights_path: Path to saved weights. batch_size: Batch size. verbose: Verbose data processing. """ - super(DeepImageEmbedder, self).__init__() - assert model_name in {f"efficientnet-b{i}" for i in range(8)} + super(DeepTimmImageEmbedder, self).__init__() + # add assert to check model + # assert model_name in {f"efficientnet-b{i}" for i in range(8)} self.device, self.device_ids = parse_devices(device) self.random_state = random_state self.n_jobs = n_jobs - self.is_advprop = is_advprop self.batch_size = batch_size self.verbose = verbose seed_everything(random_state) - self.model = EffNetImageEmbedder(model_name, weights_path, self.is_advprop, self.device) + self.model = TimmModelEmbedder(model_name, weights_path, self.device) def fit(self, data: Any = None): """Train model.""" @@ -314,7 +292,7 @@ def transform(self, data: Sequence[str]) -> np.ndarray: Array of embeddings. """ - data = ImageDataset(data, self.is_advprop) + data = ImageTimmDataset(self.model, data) loader = DataLoader(data, batch_size=self.batch_size, shuffle=False, num_workers=self.n_jobs) result = [] diff --git a/lightautoml/ml_algo/linear_sklearn.py b/lightautoml/ml_algo/linear_sklearn.py index a4737b1e..d6388298 100644 --- a/lightautoml/ml_algo/linear_sklearn.py +++ b/lightautoml/ml_algo/linear_sklearn.py @@ -83,7 +83,10 @@ def _infer_params(self) -> TorchBasedLinearEstimator: params = copy(self.params) params["loss"] = self.task.losses["torch"].loss params["metric"] = self.task.losses["torch"].metric_func - if self.task.name in ["binary", "multiclass", "multilabel"]: + + if self.task.name in ["multilabel"]: + model = TorchBasedLogisticRegression(output_size=self.n_classes, multilabel=True, **params) + elif self.task.name in ["binary", "multiclass"]: model = TorchBasedLogisticRegression(output_size=self.n_classes, **params) elif self.task.name == "reg": model = TorchBasedLinearRegression(output_size=1, **params) diff --git a/lightautoml/ml_algo/torch_based/linear_model.py b/lightautoml/ml_algo/torch_based/linear_model.py index 02c6d5a5..6321caf5 100644 --- a/lightautoml/ml_algo/torch_based/linear_model.py +++ b/lightautoml/ml_algo/torch_based/linear_model.py @@ -16,6 +16,7 @@ from torch import optim from ...tasks.losses import TorchLossWrapper +from ..utils import MySoftmaxClip logger = logging.getLogger(__name__) @@ -137,7 +138,7 @@ class CatMulticlass(CatLinear): def __init__(self, numeric_size: int, embed_sizes: Sequence[int] = (), output_size: int = 1): super().__init__(numeric_size, embed_sizes=embed_sizes, output_size=output_size) - self.final_act = nn.Softmax(dim=1) + self.final_act = MySoftmaxClip(dim=1) class TorchBasedLinearEstimator: @@ -438,6 +439,7 @@ class TorchBasedLogisticRegression(TorchBasedLinearEstimator): embed_sizes: categorical embedding sizes. output_size: size of output layer. cs: regularization coefficients. + multilabel: multilabel or not. max_iter: maximum iterations of L-BFGS. tol: the tolerance for the stopping criteria. early_stopping: maximum rounds without improving. @@ -470,21 +472,25 @@ def __init__( 10.0, 20.0, ), + multilabel: bool = False, max_iter: int = 1000, tol: float = 1e-4, early_stopping: int = 2, loss=Optional[Callable], metric=Optional[Callable], ): - if output_size == 1: - _loss = nn.BCELoss + if multilabel: + _loss = nn.BCEWithLogitsLoss + _model = CatLogisticRegression + self._binary = False + elif output_size == 1: + _loss = nn.BCEWithLogitsLoss _model = CatLogisticRegression self._binary = True else: _loss = nn.CrossEntropyLoss _model = CatMulticlass self._binary = False - if loss is None: loss = TorchLossWrapper(_loss) diff --git a/lightautoml/ml_algo/utils.py b/lightautoml/ml_algo/utils.py index 75a24108..3cbbd5e1 100644 --- a/lightautoml/ml_algo/utils.py +++ b/lightautoml/ml_algo/utils.py @@ -6,6 +6,11 @@ from typing import Optional from typing import Tuple +import torch.nn as nn + +from torch import Tensor +from torch import finfo + from ..dataset.base import LAMLDataset from ..validation.base import TrainValidIterator from .base import MLAlgo @@ -76,3 +81,33 @@ def tune_and_fit_predict( return None, None return ml_algo, preds + + +class MySoftmaxClip(nn.Module): + """Softmax with clip-norm. + + Args: + dim : A dimension along which Softmax will be computed (so every slice + along dim will sum to 1). + """ + + def __init__(self, dim: Optional[int] = None) -> None: + super(MySoftmaxClip, self).__init__() + self.dim = dim + self.smax = nn.Softmax(dim=dim) + + def forward(self, inputs: Tensor) -> Tensor: + """Inference phase. + + Args: + inputs: data to softmax and clip. + + Returns: + transformed values. + + """ + inputs = self.smax(inputs) + eps = 2 * finfo(inputs.dtype).eps + inputs = inputs.clip(eps, 1 - eps) + inputs /= inputs.sum(dim=self.dim)[:, None] + return inputs diff --git a/lightautoml/pipelines/features/image_pipeline.py b/lightautoml/pipelines/features/image_pipeline.py index c8d2ca29..f72a515e 100644 --- a/lightautoml/pipelines/features/image_pipeline.py +++ b/lightautoml/pipelines/features/image_pipeline.py @@ -33,12 +33,11 @@ def __init__(self, **kwargs: Any): self.n_jobs = 4 self.loader = pil_loader - self.embed_model = "efficientnet-b0" + self.embed_model = "efficientnet_b0.ra_in1k" self.weights_path = None self.subs = 10000 self.cache_dir = "../cache_CV" self.device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") - self.is_advprop = True self.batch_size = 128 self.verbose = True @@ -108,7 +107,6 @@ def create_pipeline(self, train: LAMLDataset) -> LAMLTransformer: self.device, self.n_jobs, self.random_state, - self.is_advprop, self.batch_size, self.verbose, ), diff --git a/lightautoml/tasks/common_metric.py b/lightautoml/tasks/common_metric.py index 8cab338b..65bc6dee 100644 --- a/lightautoml/tasks/common_metric.py +++ b/lightautoml/tasks/common_metric.py @@ -315,7 +315,7 @@ def __call__(self, y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Option _valid_str_binary_metric_names = { "auc": roc_auc_score, - "logloss": partial(log_loss, eps=1e-7), + "logloss": partial(log_loss), "accuracy": BestClassBinaryWrapper(accuracy_score), } @@ -333,7 +333,7 @@ def __call__(self, y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Option _valid_str_multiclass_metric_names = { "auc_mu": auc_mu, "auc": roc_auc_ovr, - "crossentropy": partial(log_loss, eps=1e-7), + "crossentropy": partial(log_loss), "accuracy": BestClassMulticlassWrapper(accuracy_score), "f1_macro": BestClassMulticlassWrapper(F1Factory("macro")), "f1_micro": BestClassMulticlassWrapper(F1Factory("micro")), @@ -341,7 +341,7 @@ def __call__(self, y_true: np.ndarray, y_pred: np.ndarray, sample_weight: Option } _valid_str_multireg_metric_names = {"mse": mean_squared_error, "mae": mean_absolute_error} -_valid_str_multilabel_metric_names = {"logloss": partial(log_loss, eps=1e-7)} +_valid_str_multilabel_metric_names = {"logloss": partial(log_loss)} _valid_str_metric_names = { "binary": _valid_str_binary_metric_names, diff --git a/lightautoml/transformers/image.py b/lightautoml/transformers/image.py index 51985453..9d434ff2 100644 --- a/lightautoml/transformers/image.py +++ b/lightautoml/transformers/image.py @@ -19,7 +19,7 @@ from ..dataset.np_pd_dataset import PandasDataset from ..dataset.roles import NumericRole from ..image.image import CreateImageFeatures -from ..image.image import DeepImageEmbedder +from ..image.image import DeepTimmImageEmbedder from ..image.utils import pil_loader from ..text.utils import get_textarr_hash from ..text.utils import single_text_hash @@ -154,7 +154,6 @@ class AutoCVWrap(LAMLTransformer): device: Torch device. n_jobs: Number of threads for dataloader. random_state: Random state to take subsample and set torch seed. - is_advprop: Use adversarial training. batch_size: Batch size for embedding model. verbose: Verbose data processing. @@ -177,14 +176,13 @@ def features(self) -> List[str]: def __init__( self, - model="efficientnet-b0", + model="efficientnet_b0.ra_in1k", weights_path: Optional[str] = None, cache_dir: str = "./cache_CV", subs: Optional[Any] = None, device: torch.device = torch.device("cuda:0"), n_jobs: int = 4, random_state: int = 42, - is_advprop: bool = True, batch_size: int = 128, verbose: bool = True, ): @@ -194,11 +192,10 @@ def __init__( self.dicts = {} self.cache_dir = cache_dir - self.transformer = DeepImageEmbedder( + self.transformer = DeepTimmImageEmbedder( device, n_jobs, random_state, - is_advprop, model, weights_path, batch_size, diff --git a/pyproject.toml b/pyproject.toml index 15337165..85306808 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ catboost = ">=0.26.1" optuna = "*" torch = [ {platform = "win32", python = "3.6.1", version = "1.7.0"}, - {version = ">=1.0.0"} + {version = "<=2.0.0"} ] dataclasses = {version = "0.6", python = "<3.7"} holidays = "*" @@ -72,16 +72,16 @@ transformers = {version = ">=4", optional = true} # CV albumentations = {version = "<=1.0.3", optional = true} -efficientnet-pytorch = {version = "*", optional = true} +timm = {version = "*", optional = true} opencv-python = {version = "<=4.5.2.52", optional = true} PyWavelets = {version = "*", optional = true} torchvision = [ {platform = "win32", python = "3.6.1", version = "0.8.0", optional = true}, - {platform = "*", version = "*", optional = true} + {platform = "*", version = "<=0.14.0", optional = true} ] # AFG -featuretools = {version = ">=1.11.1", python = ">=3.7", optional = true} +featuretools = {version = ">=1.11.1", python = ">=3.8", optional = true} # Report (pdf) weasyprint = {version = "52.5", optional = true} @@ -91,7 +91,7 @@ cffi = {version = "1.14.5", optional = true} [tool.poetry.extras] cv = [ "albumentations", - "efficientnet-pytorch", + "timm", "opencv-python", "PyWavelets", "scikit-image", From a9d14660b143e67d1d8e1fb6c5bd63c026830a9f Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Thu, 3 Aug 2023 12:09:35 +0000 Subject: [PATCH 05/49] Added NODE neural network, added NODE example with tunning params --- .../Tutorial_9_neural_networks.ipynb | 186 +++++- lightautoml/automl/presets/tabular_presets.py | 12 +- lightautoml/ml_algo/dl_model.py | 2 + lightautoml/ml_algo/torch_based/nn_models.py | 69 ++- .../ml_algo/torch_based/node_nn_model.py | 531 ++++++++++++++++++ 5 files changed, 791 insertions(+), 9 deletions(-) create mode 100644 lightautoml/ml_algo/torch_based/node_nn_model.py diff --git a/examples/tutorials/Tutorial_9_neural_networks.ipynb b/examples/tutorials/Tutorial_9_neural_networks.ipynb index 0ee679d5..4224e3d3 100644 --- a/examples/tutorials/Tutorial_9_neural_networks.ipynb +++ b/examples/tutorials/Tutorial_9_neural_networks.ipynb @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "2bea2ba9", "metadata": { "execution": { @@ -137,7 +137,40 @@ }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/dvladimirvasilyev/anaconda3/envs/myenv/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'nlp' extra dependecy package 'gensim' isn't installed. Look at README.md in repo 'LightAutoML' for installation instructions.\n", + "'nlp' extra dependecy package 'nltk' isn't installed. Look at README.md in repo 'LightAutoML' for installation instructions.\n", + "'nlp' extra dependecy package 'transformers' isn't installed. Look at README.md in repo 'LightAutoML' for installation instructions.\n", + "'nlp' extra dependecy package 'gensim' isn't installed. Look at README.md in repo 'LightAutoML' for installation instructions.\n", + "'nlp' extra dependecy package 'nltk' isn't installed. Look at README.md in repo 'LightAutoML' for installation instructions.\n", + "'nlp' extra dependecy package 'transformers' isn't installed. Look at README.md in repo 'LightAutoML' for installation instructions.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/dvladimirvasilyev/LightAutoML/lightautoml/ml_algo/dl_model.py:41: UserWarning: 'transformers' - package isn't installed\n", + " warnings.warn(\"'transformers' - package isn't installed\")\n", + "/home/dvladimirvasilyev/LightAutoML/lightautoml/text/nn_model.py:22: UserWarning: 'transformers' - package isn't installed\n", + " warnings.warn(\"'transformers' - package isn't installed\")\n", + "/home/dvladimirvasilyev/LightAutoML/lightautoml/text/dl_transformers.py:25: UserWarning: 'transformers' - package isn't installed\n", + " warnings.warn(\"'transformers' - package isn't installed\")\n" + ] + } + ], "source": [ "# Standard python libraries\n", "import os\n", @@ -187,7 +220,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "64dfd5d0", "metadata": { "execution": { @@ -230,7 +263,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "b8c3218d", "metadata": {}, "outputs": [], @@ -318,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "fc3bd7a7", "metadata": { "execution": { @@ -710,7 +743,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "id": "343d7bac", "metadata": {}, "outputs": [], @@ -1507,6 +1540,145 @@ "automl.fit_predict(tr_data, roles = roles, verbose = 3)" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "1000351d", + "metadata": {}, + "source": [ + "##### 4.2.3 One more example\n", + "##### Tuning NODE params" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "fcbad7ce", + "metadata": {}, + "outputs": [], + "source": [ + "TIMEOUT = 3000" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "a3bba8dc", + "metadata": {}, + "outputs": [], + "source": [ + "default_lama_params = {\n", + " \"task\": task, \n", + " \"timeout\": TIMEOUT,\n", + " \"cpu_limit\": N_THREADS,\n", + " \"reader_params\": {'n_jobs': N_THREADS, 'cv': N_FOLDS, 'random_state': RANDOM_STATE}\n", + "}\n", + "\n", + "default_nn_params = {\n", + " \"bs\": 512, \"num_workers\": 0, \"path_to_save\": None, \"n_epochs\": 10, \"freeze_defaults\": True\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "ec77132c", + "metadata": {}, + "outputs": [], + "source": [ + "def my_opt_space_NODE(trial: optuna.trial.Trial, estimated_n_trials, suggested_params):\n", + " ''' \n", + " This fucntion needs for paramer tuning\n", + " '''\n", + " # optionally\n", + " trial_values = copy(suggested_params)\n", + "\n", + " trial_values[\"layer_dim\"] = trial.suggest_categorical(\n", + " \"layer_dim\", [2 ** i for i in range(8, 10)]\n", + " )\n", + " trial_values[\"use_original_head\"] = trial.suggest_categorical(\n", + " \"use_original_head\", [True, False]\n", + " )\n", + " trial_values[\"num_layers\"] = trial.suggest_int(\n", + " \"num_layers\", 1, 3\n", + " )\n", + " trial_values[\"drop_rate\"] = trial.suggest_float(\n", + " \"drop_rate\", 0.0, 0.3\n", + " )\n", + " trial_values[\"tree_dim\"] = trial.suggest_int(\n", + " \"tree_dim\", 1, 3\n", + " )\n", + " return trial_values" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "ba312d42", + "metadata": {}, + "outputs": [], + "source": [ + "automl = TabularAutoML(\n", + " task = task, \n", + " timeout = TIMEOUT,\n", + " cpu_limit = N_THREADS,\n", + " general_params = {\"use_algos\": [[\"node_tuned\"]]}, # ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn'] or custom torch model\n", + " nn_params = {\"n_epochs\": 10, \"bs\": 512, \"num_workers\": 0, \"path_to_save\": None, \"freeze_defaults\": True, \"optimization_search_space\": my_opt_space_NODE,},\n", + " nn_pipeline_params = {\"use_qnt\": True, \"use_te\": False},\n", + " reader_params = {'n_jobs': N_THREADS, 'cv': N_FOLDS, 'random_state': RANDOM_STATE}\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "3df2104f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[11:58:03] Stdout logging level is INFO2.\n", + "[11:58:03] Task: binary\n", + "\n", + "[11:58:03] Start automl preset with listed constraints:\n", + "[11:58:03] - time: 3000.00 seconds\n", + "[11:58:03] - CPU: 4 cores\n", + "[11:58:03] - memory: 16 GB\n", + "\n", + "[11:58:03] \u001b[1mTrain data shape: (8000, 122)\u001b[0m\n", + "\n", + "[11:58:03] Layer \u001b[1m1\u001b[0m train process start. Time left 2999.19 secs\n", + "[11:58:04] Start hyperparameters optimization for \u001b[1mLvl_0_Pipe_0_Mod_0_Tuned_TorchNN_node_tuned_0\u001b[0m ... Time budget is 1574.27 secs\n", + "[12:01:57] Hyperparameters optimization for \u001b[1mLvl_0_Pipe_0_Mod_0_Tuned_TorchNN_node_tuned_0\u001b[0m completed\n", + "[12:01:57] The set of hyperparameters \u001b[1m{'num_workers': 0, 'pin_memory': False, 'max_length': 256, 'is_snap': False, 'input_bn': False, 'max_emb_size': 256, 'bert_name': None, 'pooling': 'cls', 'device': ['0'], 'use_cont': True, 'use_cat': True, 'use_text': False, 'lang': 'en', 'deterministic': True, 'multigpu': False, 'random_state': 42, 'model': 'node', 'model_with_emb': False, 'path_to_save': None, 'verbose_inside': None, 'verbose': 1, 'n_epochs': 10, 'snap_params': {'k': 3, 'early_stopping': True, 'patience': 10, 'swa': True}, 'bs': 512, 'emb_dropout': 0.1, 'emb_ratio': 3, 'opt': 'Adam', 'opt_params': {'lr': 0.0003, 'weight_decay': 0}, 'sch': 'ReduceLROnPlateau', 'scheduler_params': {'patience': 5, 'factor': 0.5, 'min_lr': 1e-05}, 'loss': None, 'loss_params': {}, 'loss_on_logits': True, 'clip_grad': False, 'clip_grad_params': {}, 'init_bias': True, 'dataset': 'UniversalDataset', 'tuned': False, 'optimization_search_space': , 'verbose_bar': False, 'freeze_defaults': True, 'n_out': None, 'hid_factor': [2, 2], 'hidden_size': [512, 512, 512], 'block_config': [2, 2], 'compression': 0.5, 'growth_size': 256, 'bn_factor': 2, 'drop_rate': 0.12034524690886754, 'noise_std': 0.05, 'num_init_features': None, 'act_fun': 'ReLU', 'use_noise': False, 'use_bn': True, 'stop_by_metric': False, 'tuning_params': {'fit_on_holdout': True, 'max_tuning_iter': 25, 'max_tuning_time': 3600}, 'layer_dim': 512, 'use_original_head': False, 'num_layers': 3, 'tree_dim': 2}\u001b[0m\n", + " achieve 0.7432 auc\n", + "[12:01:57] Start fitting \u001b[1mLvl_0_Pipe_0_Mod_0_Tuned_TorchNN_node_tuned_0\u001b[0m ...\n", + "[12:01:57] ===== Start working with \u001b[1mfold 0\u001b[0m for \u001b[1mLvl_0_Pipe_0_Mod_0_Tuned_TorchNN_node_tuned_0\u001b[0m =====\n", + "[12:02:09] ===== Start working with \u001b[1mfold 1\u001b[0m for \u001b[1mLvl_0_Pipe_0_Mod_0_Tuned_TorchNN_node_tuned_0\u001b[0m =====\n", + "[12:02:22] ===== Start working with \u001b[1mfold 2\u001b[0m for \u001b[1mLvl_0_Pipe_0_Mod_0_Tuned_TorchNN_node_tuned_0\u001b[0m =====\n", + "[12:02:34] ===== Start working with \u001b[1mfold 3\u001b[0m for \u001b[1mLvl_0_Pipe_0_Mod_0_Tuned_TorchNN_node_tuned_0\u001b[0m =====\n", + "[12:02:47] ===== Start working with \u001b[1mfold 4\u001b[0m for \u001b[1mLvl_0_Pipe_0_Mod_0_Tuned_TorchNN_node_tuned_0\u001b[0m =====\n", + "[12:02:59] Fitting \u001b[1mLvl_0_Pipe_0_Mod_0_Tuned_TorchNN_node_tuned_0\u001b[0m finished. score = \u001b[1m0.7146780211829931\u001b[0m\n", + "[12:02:59] \u001b[1mLvl_0_Pipe_0_Mod_0_Tuned_TorchNN_node_tuned_0\u001b[0m fitting and predicting completed\n", + "[12:02:59] Time left 2703.40 secs\n", + "\n", + "[12:02:59] \u001b[1mLayer 1 training completed.\u001b[0m\n", + "\n", + "[12:02:59] \u001b[1mAutoml preset training completed in 296.61 seconds\u001b[0m\n", + "\n", + "[12:02:59] Model description:\n", + "Final prediction for new objects (level 0) = \n", + "\t 1.00000 * (5 averaged models Lvl_0_Pipe_0_Mod_0_Tuned_TorchNN_node_tuned_0) \n", + "\n" + ] + } + ], + "source": [ + "oof_pred = automl.fit_predict(tr_data, roles = roles, verbose = 2)" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -1689,7 +1861,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.8.17" }, "papermill": { "default_parameters": {}, diff --git a/lightautoml/automl/presets/tabular_presets.py b/lightautoml/automl/presets/tabular_presets.py index 8d208c39..7ab512e0 100644 --- a/lightautoml/automl/presets/tabular_presets.py +++ b/lightautoml/automl/presets/tabular_presets.py @@ -594,7 +594,17 @@ def create_automl(self, **fit_args): selector = pre_selector lvl.append(self.get_gbms(gbm_models, n + 1, selector)) - available_nn_models = ["nn", "mlp", "dense", "denselight", "resnet", "snn", "linear_layer", "_linear_layer"] + available_nn_models = [ + "nn", + "mlp", + "dense", + "denselight", + "resnet", + "snn", + "linear_layer", + "_linear_layer", + "node", + ] available_nn_models = available_nn_models + [x + "_tuned" for x in available_nn_models] nn_models = [ x for x in names if x in available_nn_models or (isinstance(x, type) and issubclass(x, nn.Module)) diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index ac1dae5d..8db9d7db 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -56,6 +56,7 @@ from ..text.utils import parse_devices from ..text.utils import seed_everything from .torch_based.nn_models import MLP +from .torch_based.nn_models import NODE from .torch_based.nn_models import SNN from .torch_based.nn_models import DenseLightModel from .torch_based.nn_models import DenseModel @@ -74,6 +75,7 @@ "linear_layer": LinearLayer, "_linear_layer": _LinearLayer, "snn": SNN, + "node": NODE, } diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index dbd42ec6..119e0779 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -9,6 +9,9 @@ import torch import torch.nn as nn +from lightautoml.ml_algo.torch_based.node_nn_model import DenseODSTBlock +from lightautoml.ml_algo.torch_based.node_nn_model import Lambda + class GaussianNoise(nn.Module): """Adds gaussian noise. @@ -389,7 +392,6 @@ class DenseModel(nn.Module): bn_factor: Dim of intermediate fc is increased times `bn_factor` in DenseModel layer. act_fun: Activation function. use_bn: Use BatchNorm. - """ def __init__( @@ -729,3 +731,68 @@ def __init__(self): def forward(self, x: torch.Tensor, x_mask: torch.Tensor) -> torch.Tensor: """Forward-pass.""" return x + + +class NODE(nn.Module): + """The NODE model from https://github.com/Qwicen. + + Args: + n_in: Input dim. + n_out: Output dim. + layer_dim: num trees in one layer. + num_layers: number of forests. + tree_dim: number of response channels in the response of individual tree. + use_original_head use averaging as a head or put linear layer instead. + depth: number of splits in every tree. + drop_rate: Dropout rate for each layer altogether. + act_fun: Activation function. + num_init_features: If not none add fc layer before model with certain dim. + use_bn: Use BatchNorm. + """ + + def __init__( + self, + n_in: int, + n_out: int = 1, + layer_dim: int = 2048, + num_layers: int = 1, + tree_dim: int = 1, + use_original_head: bool = False, + depth: int = 6, + drop_rate: float = 0.0, + act_fun: nn.Module = nn.ReLU, + num_init_features: Optional[int] = None, + use_bn: bool = True, + **kwargs, + ): + super(NODE, self).__init__() + num_features = n_in if num_init_features is None else num_init_features + self.dense0 = nn.Linear(n_in, num_features) if num_init_features is not None else nn.Identity() + self.features1 = nn.Sequential(OrderedDict([])) + block = DenseODSTBlock( + input_dim=num_features, + layer_dim=layer_dim, + num_layers=num_layers, + tree_dim=tree_dim if not use_original_head else n_out, + depth=depth, + input_dropout=drop_rate, + flatten_output=not use_original_head, + ) + self.features1.add_module("ODSTForestblock%d", block) + self.features2 = nn.Sequential(OrderedDict([])) + if use_original_head: + last_layer = Lambda(lambda x: x[..., :n_out].mean(dim=-2)) + self.features2.add_module("head", last_layer) + else: + if use_bn: + self.features2.add_module("norm", nn.BatchNorm1d(layer_dim * num_layers * tree_dim)) + self.features2.add_module("act", act_fun()) + fc = nn.Linear(layer_dim * num_layers * tree_dim, n_out) + self.features2.add_module("fc", fc) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Forward-pass.""" + x = self.dense0(x) + x = self.features1(x) + x = self.features2(x) + return x.view(x.shape[0], -1) diff --git a/lightautoml/ml_algo/torch_based/node_nn_model.py b/lightautoml/ml_algo/torch_based/node_nn_model.py new file mode 100644 index 00000000..cdfedbea --- /dev/null +++ b/lightautoml/ml_algo/torch_based/node_nn_model.py @@ -0,0 +1,531 @@ +"""Node utils models.""" + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F + +from torch.autograd import Function +from torch.jit import script + + +def check_numpy(x): + """Makes sure x is a numpy array. + + Args: + x : array to check. + + Returns: + x + """ + if isinstance(x, torch.Tensor): + x = x.detach().cpu().numpy() + x = np.asarray(x) + assert isinstance(x, np.ndarray) + return x + + +def to_one_hot(y, depth=None): + """Takes integer with n dims and converts it to 1-hot representation with n + 1 dims. + + The n+1'st dimension will have zeros everywhere but at y'th index, where it will be equal to 1. + + Args: + y : input integer (IntTensor, LongTensor or Variable) of any shape + depth : the size of the one hot dimension + + Returns: + one hot Tensor + """ + y_flat = y.to(torch.int64).view(-1, 1) + depth = depth if depth is not None else int(torch.max(y_flat)) + 1 + y_one_hot = torch.zeros(y_flat.size()[0], depth, device=y.device).scatter_(1, y_flat, 1) + y_one_hot = y_one_hot.view(*(tuple(y.shape) + (-1,))) + return y_one_hot + + +def _make_ix_like(input, dim=0): + d = input.size(dim) + rho = torch.arange(1, d + 1, device=input.device, dtype=input.dtype) + view = [1] * input.dim() + view[0] = -1 + return rho.view(view).transpose(0, dim) + + +class SparsemaxFunction(Function): + """An implementation of sparsemax (Martins & Astudillo, 2016). + + See :cite:`DBLP:journals/corr/MartinsA16` for detailed description. + By Ben Peters and Vlad Niculae + """ + + @staticmethod + def forward(ctx, input, dim=-1): + """sparsemax: normalizing sparse transform (a la softmax). + + Args: + ctx: context, to increase the speed + input (Tensor): any shape + dim: dimension along which to apply sparsemax + + Returns: + Tensor same shape as input + """ + ctx.dim = dim + max_val, _ = input.max(dim=dim, keepdim=True) + input -= max_val # same numerical stability trick as for softmax + tau, supp_size = SparsemaxFunction._threshold_and_support(input, dim=dim) + output = torch.clamp(input - tau, min=0) + ctx.save_for_backward(supp_size, output) + return output + + @staticmethod + def backward(ctx, grad_output): + """backward-pass. + + Args: + ctx: context, to increase the speed + grad_output: grad from the next layers + + Returns: + grad output + """ + supp_size, output = ctx.saved_tensors + dim = ctx.dim + grad_input = grad_output.clone() + grad_input[output == 0] = 0 + + v_hat = grad_input.sum(dim=dim) / supp_size.to(output.dtype).squeeze() + v_hat = v_hat.unsqueeze(dim) + grad_input = torch.where(output != 0, grad_input - v_hat, grad_input) + return grad_input, None + + @staticmethod + def _threshold_and_support(input, dim=-1): + """Sparsemax building block compute the threshold. + + Args: + input: any dimension + dim: dimension along which to apply the sparsemax + + Returns: + the threshold value + """ + input_srt, _ = torch.sort(input, descending=True, dim=dim) + input_cumsum = input_srt.cumsum(dim) - 1 + rhos = _make_ix_like(input, dim) + support = rhos * input_srt > input_cumsum + + support_size = support.sum(dim=dim).unsqueeze(dim) + tau = input_cumsum.gather(dim, support_size - 1) + tau /= support_size.to(input.dtype) + return tau, support_size + + +sparsemax = lambda input, dim=-1: SparsemaxFunction.apply(input, dim) # noqa: E731 +sparsemoid = lambda input: (0.5 * input + 0.5).clamp_(0, 1) # noqa: E731 + + +class Entmax15Function(Function): + """An implementation of exact Entmax with alpha=1.5 (B. Peters, V. Niculae, A. Martins). + + See :cite:`https://arxiv.org/abs/1905.05702 for detailed description. + Source: https://github.com/deep-spin/entmax + """ + + @staticmethod + def forward(ctx, input, dim=-1): + """Entmax: normalizing sparse transform (a la softmax). + + Args: + ctx: context, to increase the speed + input (Tensor): any shape + dim: dimension along which to apply Entmax + + Returns: + output (Tensor): same shape as input + """ + ctx.dim = dim + max_val, _ = input.max(dim=dim, keepdim=True) + input = input - max_val # same numerical stability trick as for softmax + input = input / 2 # divide by 2 to solve actual Entmax + + tau_star, _ = Entmax15Function._threshold_and_support(input, dim) + output = torch.clamp(input - tau_star, min=0) ** 2 + ctx.save_for_backward(output) + return output + + @staticmethod + def backward(ctx, grad_output): + """backward-pass. + + Args: + ctx: context, to increase the speed + grad_output: grad from the next layers + + Returns: + grad output + """ + (Y,) = ctx.saved_tensors + gppr = Y.sqrt() # = 1 / g'' (Y) + dX = grad_output * gppr + q = dX.sum(ctx.dim) / gppr.sum(ctx.dim) + q = q.unsqueeze(ctx.dim) + dX -= q * gppr + return dX, None + + @staticmethod + def _threshold_and_support(input, dim=-1): + """Sparsemax building block compute the threshold. + + Args: + input: any dimension + dim: dimension along which to apply the sparsemax + + Returns: + the threshold value + """ + Xsrt, _ = torch.sort(input, descending=True, dim=dim) + + rho = _make_ix_like(input, dim) + mean = Xsrt.cumsum(dim) / rho + mean_sq = (Xsrt ** 2).cumsum(dim) / rho + ss = rho * (mean_sq - mean ** 2) + delta = (1 - ss) / rho + + # NOTE this is not exactly the same as in reference algo + # Fortunately it seems the clamped values never wrongly + # get selected by tau <= sorted_z. Prove this! + delta_nz = torch.clamp(delta, 0) + tau = mean - torch.sqrt(delta_nz) + + support_size = (tau <= Xsrt).sum(dim).unsqueeze(dim) + tau_star = tau.gather(dim, support_size - 1) + return tau_star, support_size + + +class Entmoid15(Function): + """A highly optimized equivalent of labda x: Entmax15([x, 0]).""" + + @staticmethod + def forward(ctx, input): + """Entmoid15 (a la softmax). + + Args: + ctx: context, to increase the speed + input (Tensor): any shape + + Returns: + output (Tensor): same shape as input + """ + output = Entmoid15._forward(input) + ctx.save_for_backward(output) + return output + + @staticmethod + @script + def _forward(input): + input, is_pos = abs(input), input >= 0 + tau = (input + torch.sqrt(F.relu(8 - input ** 2))) / 2 + tau.masked_fill_(tau <= input, 2.0) + y_neg = 0.25 * F.relu(tau - input, inplace=True) ** 2 + return torch.where(is_pos, 1 - y_neg, y_neg) + + @staticmethod + def backward(ctx, grad_output): + """backward-pass. + + Args: + ctx: context, to increase the speed + grad_output: grad from the next layers + + Returns: + grad output + """ + return Entmoid15._backward(ctx.saved_tensors[0], grad_output) + + @staticmethod + @script + def _backward(output, grad_output): + gppr0, gppr1 = output.sqrt(), (1 - output).sqrt() + grad_input = grad_output * gppr0 + q = grad_input / (gppr0 + gppr1) + grad_input -= q * gppr0 + return grad_input + + +entmax15 = lambda input, dim=-1: Entmax15Function.apply(input, dim) # noqa: E731 +entmoid15 = Entmoid15.apply # noqa: E731 + + +class Lambda(nn.Module): + """Pytorch implementation of lambda. + + Args: + func : returned func + """ + + def __init__(self, func): + super().__init__() + self.func = func + + def forward(self, *args, **kwargs): + """Forward-pass. + + # noqa: DAR101 + + Returns: + f(*args, **kwargs) + """ + return self.func(*args, **kwargs) + + +class ModuleWithInit(nn.Module): + """Base class for pytorch module with data-aware initializer on first batch.""" + + def __init__(self): + super().__init__() + self._is_initialized_tensor = nn.Parameter(torch.tensor(0, dtype=torch.uint8), requires_grad=False) + self._is_initialized_bool = None + # Note: this module uses a separate flag self._is_initialized so as to achieve both + # * persistence: is_initialized is saved alongside model in state_dict + # * speed: model doesn't need to cache + # please DO NOT use these flags in child modules + + def initialize(self, *args, **kwargs): + """Initialize module tensors using first batch of data.""" + raise NotImplementedError("Please implement ") + + def __call__(self, *args, **kwargs): + """Initialize module after forward-pass. + + # noqa: DAR101 + + Returns: + Forward-pass. + """ + if self._is_initialized_bool is None: + self._is_initialized_bool = bool(self._is_initialized_tensor.item()) + if not self._is_initialized_bool: + self.initialize(*args, **kwargs) + self._is_initialized_tensor.data[...] = 1 + self._is_initialized_bool = True + return super().__call__(*args, **kwargs) + + +class ODST(ModuleWithInit): + r"""Oblivious Differentiable Sparsemax Trees. http://tinyurl.com/odst-readmore. + + One can drop (sic!) this module anywhere instead of nn.Linear + + Args: + in_features: number of features in the input tensor + num_trees: number of trees in this layer + tree_dim: number of response channels in the response of individual tree + depth: number of splits in every tree + flatten_output: if False, returns [..., num_trees, tree_dim], + by default returns [..., num_trees * tree_dim] + choice_function: f(tensor, dim) -> R_simplex computes feature weights s.t. f(tensor, dim).sum(dim) == 1 + bin_function: f(tensor) -> R[0, 1], computes tree leaf weights + initialize_response_: in-place initializer for tree output tensor + initialize_selection_logits_: in-place initializer for logits that select features for the tree + both thresholds and scales are initialized with data-aware init (or .load_state_dict) + threshold_init_beta: initializes threshold to a q-th quantile of data points + where q ~ Beta(:threshold_init_beta:, :threshold_init_beta:) + If this param is set to 1, initial thresholds will have the same distribution as data points + If greater than 1 (e.g. 10), thresholds will be closer to median data value + If less than 1 (e.g. 0.1), thresholds will approach min/max data values. + threshold_init_cutoff: threshold log-temperatures initializer, \in (0, inf) + By default(1.0), log-remperatures are initialized in such a way that all bin selectors + end up in the linear region of sparse-sigmoid. The temperatures are then scaled by this parameter. + Setting this value > 1.0 will result in some margin between data points and sparse-sigmoid cutoff value + Setting this value < 1.0 will cause (1 - value) part of data points to end up in flat sparse-sigmoid region + For instance, threshold_init_cutoff = 0.9 will set 10% points equal to 0.0 or 1.0 + Setting this value > 1.0 will result in a margin between data points and sparse-sigmoid cutoff value + All points will be between (0.5 - 0.5 / threshold_init_cutoff) and (0.5 + 0.5 / threshold_init_cutoff) + """ + + def __init__( + self, + in_features, + num_trees, + depth=6, + tree_dim=1, + flatten_output=True, + choice_function=entmax15, + bin_function=entmoid15, + initialize_response_=nn.init.normal_, + initialize_selection_logits_=nn.init.uniform_, + threshold_init_beta=1.0, + threshold_init_cutoff=1.0, + ): + super().__init__() + self.depth, self.num_trees, self.tree_dim, self.flatten_output = depth, num_trees, tree_dim, flatten_output + self.choice_function, self.bin_function = choice_function, bin_function + self.threshold_init_beta, self.threshold_init_cutoff = threshold_init_beta, threshold_init_cutoff + + self.response = nn.Parameter(torch.zeros([num_trees, tree_dim, 2 ** depth]), requires_grad=True) + initialize_response_(self.response) + + self.feature_selection_logits = nn.Parameter(torch.zeros([in_features, num_trees, depth]), requires_grad=True) + initialize_selection_logits_(self.feature_selection_logits) + + self.feature_thresholds = nn.Parameter( + torch.full([num_trees, depth], float("nan"), dtype=torch.float32), requires_grad=True + ) # nan values will be initialized on first batch (data-aware init) + + self.log_temperatures = nn.Parameter( + torch.full([num_trees, depth], float("nan"), dtype=torch.float32), requires_grad=True + ) + + # binary codes for mapping between 1-hot vectors and bin indices + with torch.no_grad(): + indices = torch.arange(2 ** self.depth) + offsets = 2 ** torch.arange(self.depth) + bin_codes = (indices.view(1, -1) // offsets.view(-1, 1) % 2).to(torch.float32) + bin_codes_1hot = torch.stack([bin_codes, 1.0 - bin_codes], dim=-1) + self.bin_codes_1hot = nn.Parameter(bin_codes_1hot, requires_grad=False) + # ^-- [depth, 2 ** depth, 2] + + def forward(self, input): + """Forward-pass. + + Args: + input: any shape + + Returns: + response + """ + assert len(input.shape) >= 2 + if len(input.shape) > 2: + return self.forward(input.view(-1, input.shape[-1])).view(*input.shape[:-1], -1) + # new input shape: [batch_size, in_features] + + feature_logits = self.feature_selection_logits + feature_selectors = self.choice_function(feature_logits, dim=0) + # ^--[in_features, num_trees, depth] + + feature_values = torch.einsum("bi,ind->bnd", input, feature_selectors) + # ^--[batch_size, num_trees, depth] + + threshold_logits = (feature_values - self.feature_thresholds) * torch.exp(-self.log_temperatures) + + threshold_logits = torch.stack([-threshold_logits, threshold_logits], dim=-1) + # ^--[batch_size, num_trees, depth, 2] + + bins = self.bin_function(threshold_logits) + # ^--[batch_size, num_trees, depth, 2], approximately binary + + bin_matches = torch.einsum("btds,dcs->btdc", bins, self.bin_codes_1hot) + # ^--[batch_size, num_trees, depth, 2 ** depth] + + response_weights = torch.prod(bin_matches, dim=-2) + # ^-- [batch_size, num_trees, 2 ** depth] + + response = torch.einsum("bnd,ncd->bnc", response_weights, self.response) + # ^-- [batch_size, num_trees, tree_dim] + + return response.flatten(1, 2) if self.flatten_output else response + + def initialize(self, input, eps=1e-6): + """Initialization. + + Args: + input: any dimension + eps: extra epsilon as a temperature + """ + # data-aware initializer + assert len(input.shape) == 2 + with torch.no_grad(): + feature_selectors = self.choice_function(self.feature_selection_logits, dim=0) + # ^--[in_features, num_trees, depth] + + feature_values = torch.einsum("bi,ind->bnd", input, feature_selectors) + # ^--[batch_size, num_trees, depth] + + # initialize thresholds: sample random percentiles of data + percentiles_q = 100 * np.random.beta( + self.threshold_init_beta, self.threshold_init_beta, size=[self.num_trees, self.depth] + ) + self.feature_thresholds.data[...] = torch.as_tensor( + list(map(np.percentile, check_numpy(feature_values.flatten(1, 2).t()), percentiles_q.flatten())), + dtype=feature_values.dtype, + device=feature_values.device, + ).view(self.num_trees, self.depth) + + # init temperatures: make sure enough data points are in the linear region of sparse-sigmoid + temperatures = np.percentile( + check_numpy(abs(feature_values - self.feature_thresholds)), + q=100 * min(1.0, self.threshold_init_cutoff), + axis=0, + ) + + # if threshold_init_cutoff > 1, scale everything down by it + temperatures /= max(1.0, self.threshold_init_cutoff) + self.log_temperatures.data[...] = torch.log(torch.as_tensor(temperatures) + eps) + + def __repr__(self): + return "{}(in_features={}, num_trees={}, depth={}, tree_dim={}, flatten_output={})".format( + self.__class__.__name__, + self.feature_selection_logits.shape[0], + self.num_trees, + self.depth, + self.tree_dim, + self.flatten_output, + ) + + +class DenseODSTBlock(nn.Sequential): + """The DenseBlock from https://github.com/Qwicen. + + Args: + sinput_dim: Input dim. + layer_dim: num trees in one layer. + num_layers: number of forests. + tree_dim: number of response channels in the response of individual tree. + max_features: maximum number of features per input + depth: number of splits in every tree. + input_dropout: Dropout rate forest layer. + flatten_output: flatten output or not. + """ + + def __init__( + self, + input_dim, + layer_dim, + num_layers, + tree_dim=1, + max_features=None, + input_dropout=0.0, + flatten_output=True, + **kwargs + ): + layers = [] + for i in range(num_layers): + oddt = ODST(input_dim, layer_dim, tree_dim=tree_dim, flatten_output=True, **kwargs) + input_dim = min(input_dim + layer_dim * tree_dim, max_features or float("inf")) + layers.append(oddt) + + super().__init__(*layers) + self.num_layers, self.layer_dim, self.tree_dim = num_layers, layer_dim, tree_dim + self.max_features, self.flatten_output = max_features, flatten_output + self.input_dropout = input_dropout + + def forward(self, x): + """Forward-pass.""" + initial_features = x.shape[-1] + for layer in self: + layer_inp = x + if self.max_features is not None: + tail_features = min(self.max_features, layer_inp.shape[-1]) - initial_features + if tail_features != 0: + layer_inp = torch.cat([layer_inp[..., :initial_features], layer_inp[..., -tail_features:]], dim=-1) + if self.input_dropout: + layer_inp = F.dropout(layer_inp, self.input_dropout, self.training) + h = layer(layer_inp) + x = torch.cat([x, h], dim=-1) + + outputs = x[..., initial_features:] + if not self.flatten_output: + outputs = outputs.view(*outputs.shape[:-1], self.num_layers * self.layer_dim, self.tree_dim) + return outputs From 6415c28f62178c713c3425b8820c049123a25709 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Thu, 3 Aug 2023 14:29:59 +0000 Subject: [PATCH 06/49] added tutorial run --- examples/tutorials/Tutorial_8_CV_preset.ipynb | 1440 +++++++++-------- 1 file changed, 781 insertions(+), 659 deletions(-) diff --git a/examples/tutorials/Tutorial_8_CV_preset.ipynb b/examples/tutorials/Tutorial_8_CV_preset.ipynb index 06a4f833..8a946476 100644 --- a/examples/tutorials/Tutorial_8_CV_preset.ipynb +++ b/examples/tutorials/Tutorial_8_CV_preset.ipynb @@ -62,23 +62,21 @@ "outputs": [], "source": [ "##Kaggle functionality for loading data; Note that you have to use your kaggle API token (see the link above):\n", - "#!pip install opendatasets\n", - "#!pip install -q kaggle\n", - "#!pip install --upgrade --force-reinstall --no-deps kaggle\n", - "#!mkdir ~/.kaggle\n", - "#!ls ~/.kaggle\n", - "#!cp kaggle.json ~/.kaggle/\n", - "#!chmod 600 ~/.kaggle/kaggle.json\n", - "#!kaggle competitions download -c paddy-disease-classification\n", + "# !pip install opendatasets\n", + "# !pip install -q kaggle\n", + "# !pip install --upgrade --force-reinstall --no-deps kaggle\n", + "# !mkdir ~/.kaggle\n", + "# !ls ~/.kaggle\n", + "# !cp kaggle.json ~/.kaggle/\n", + "# !chmod 600 ~/.kaggle/kaggle.json\n", + "# !kaggle competitions download -c paddy-disease-classification\n", "\n", - "##Unpack data:\n", - "#!mkdir paddy-disease\n", - "#!unzip paddy-disease-classification.zip -d paddy-disease\n", + "# #Unpack data:\n", + "# !mkdir paddy-disease\n", + "# !unzip paddy-disease-classification.zip -d paddy-disease\n", "\n", - "##Install LightAutoML, Pandas and torch EfficientNet:\n", - "#!pip install -U lightautoml[cv] #[cv] is for installing CV tasks functionality\n", - "#!pip install efficientnet-pytorch==0.7.0\n", - "#!pip install -U pandas" + "# #Install LightAutoML, Pandas and torch EfficientNet:\n", + "# !pip install -U lightautoml[cv] #[cv] is for installing CV tasks functionality\n" ] }, { @@ -102,6 +100,17 @@ "- LightAutoML modules: `TabularCVAutoML` preset for AutoML model creation and Task class to setup what kind of ML problem we solve (binary/multiclass classification or regression)" ] }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\" # see issue #152\n", + "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"" + ] + }, { "cell_type": "code", "execution_count": 2, @@ -122,7 +131,32 @@ }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'nlp' extra dependecy package 'gensim' isn't installed. Look at README.md in repo 'LightAutoML' for installation instructions.\n", + "'nlp' extra dependecy package 'nltk' isn't installed. Look at README.md in repo 'LightAutoML' for installation instructions.\n", + "'nlp' extra dependecy package 'transformers' isn't installed. Look at README.md in repo 'LightAutoML' for installation instructions.\n", + "'nlp' extra dependecy package 'gensim' isn't installed. Look at README.md in repo 'LightAutoML' for installation instructions.\n", + "'nlp' extra dependecy package 'nltk' isn't installed. Look at README.md in repo 'LightAutoML' for installation instructions.\n", + "'nlp' extra dependecy package 'transformers' isn't installed. Look at README.md in repo 'LightAutoML' for installation instructions.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/dvladimirvasilyev/LightAutoML/lightautoml/ml_algo/dl_model.py:41: UserWarning: 'transformers' - package isn't installed\n", + " warnings.warn(\"'transformers' - package isn't installed\")\n", + "/home/dvladimirvasilyev/LightAutoML/lightautoml/text/nn_model.py:22: UserWarning: 'transformers' - package isn't installed\n", + " warnings.warn(\"'transformers' - package isn't installed\")\n", + "/home/dvladimirvasilyev/LightAutoML/lightautoml/text/dl_transformers.py:25: UserWarning: 'transformers' - package isn't installed\n", + " warnings.warn(\"'transformers' - package isn't installed\")\n" + ] + } + ], "source": [ "# Standard python libraries\n", "import os\n", @@ -443,8 +477,8 @@ "77 42\n", "73 38\n", "66 36\n", - "82 5\n", "62 5\n", + "82 5\n", "Name: age, dtype: int64" ] }, @@ -576,8 +610,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 10.5 ms, sys: 1.28 ms, total: 11.8 ms\n", - "Wall time: 10.6 ms\n" + "CPU times: user 4.89 ms, sys: 485 µs, total: 5.37 ms\n", + "Wall time: 5.14 ms\n" ] }, { @@ -783,7 +817,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -797,6 +831,20 @@ "scrolled": true }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c0e2e2174b1644ed91ed76b5f30a6d6e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", @@ -817,10 +865,10 @@ ], "source": [ "from PIL import Image\n", - "\n", + "from tqdm.notebook import tqdm\n", "new_imgs = []\n", "\n", - "for i, p in enumerate(train_data['path'].values):\n", + "for i, p in tqdm(enumerate(train_data['path'].values)):\n", " if i % 1000 == 0: \n", " print(i)\n", " \n", @@ -1013,7 +1061,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -1025,6 +1073,20 @@ "execution_count": 14, "metadata": {}, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2d6a6c9a493b4a6298da4543f3ed3dba", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", @@ -1039,7 +1101,7 @@ "source": [ "new_imgs = []\n", "\n", - "for i, p in enumerate(submission['path'].values):\n", + "for i, p in tqdm(enumerate(submission['path'].values)):\n", " if i % 1000 == 0: \n", " print(i)\n", " \n", @@ -1365,117 +1427,143 @@ "name": "stdout", "output_type": "stream", "text": [ - "[10:13:29] Stdout logging level is INFO3.\n", - "[10:13:29] Task: multiclass\n", + "[14:04:32] Stdout logging level is INFO3.\n", + "[14:04:32] Task: multiclass\n", "\n", - "[10:13:29] Start automl preset with listed constraints:\n", - "[10:13:29] - time: 18000.00 seconds\n", - "[10:13:29] - CPU: 2 cores\n", - "[10:13:29] - memory: 16 GB\n", + "[14:04:32] Start automl preset with listed constraints:\n", + "[14:04:32] - time: 18000.00 seconds\n", + "[14:04:32] - CPU: 2 cores\n", + "[14:04:32] - memory: 16 GB\n", "\n", - "[10:13:29] Train data shape: (114477, 5)\n", + "[14:04:32] \u001b[1mTrain data shape: (114477, 5)\u001b[0m\n", "\n", - "[10:13:29] Layer 1 train process start. Time left 17999.80 secs\n", - "Loaded pretrained weights for efficientnet-b0\n", - "[10:13:33] Load saved dataset for path\n", - "[10:13:34] Feature path transformed\n", - "[10:13:43] Start fitting Lvl_0_Pipe_0_Mod_0_LinearL2 ...\n", - "[10:13:43] ===== Start working with fold 0 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====\n", - "[10:13:57] Linear model: C = 1e-05 score = -1.1418084988175383\n", - "[10:14:11] Linear model: C = 5e-05 score = -0.8305336454784469\n", - "[10:14:20] Linear model: C = 0.0001 score = -0.720568943030616\n", - "[10:14:33] Linear model: C = 0.0005 score = -0.5286940477535328\n", - "[10:14:46] Linear model: C = 0.001 score = -0.4742702118116027\n", - "[10:15:05] Linear model: C = 0.005 score = -0.4115479073137628\n", - "[10:15:22] Linear model: C = 0.01 score = -0.4116697343411257\n", - "[10:15:37] Linear model: C = 0.05 score = -0.4708596346733632\n", - "[10:15:37] ===== Start working with fold 1 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====\n", - "[10:15:53] Linear model: C = 1e-05 score = -1.125846835364122\n", - "[10:16:06] Linear model: C = 5e-05 score = -0.8160193296553417\n", - "[10:16:15] Linear model: C = 0.0001 score = -0.7059358171644057\n", - "[10:16:30] Linear model: C = 0.0005 score = -0.519242546498812\n", - "[10:16:41] Linear model: C = 0.001 score = -0.46901655981859697\n", - "[10:16:57] Linear model: C = 0.005 score = -0.41414562408622063\n", - "[10:17:11] Linear model: C = 0.01 score = -0.41384713476625173\n", - "[10:17:27] Linear model: C = 0.05 score = -0.4648754680980122\n", - "[10:17:43] Linear model: C = 0.1 score = -0.5075886657099099\n", - "[10:17:44] ===== Start working with fold 2 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====\n", - "[10:17:59] Linear model: C = 1e-05 score = -1.1051242355426971\n", - "[10:18:13] Linear model: C = 5e-05 score = -0.8001807308024304\n", - "[10:18:22] Linear model: C = 0.0001 score = -0.694614750733295\n", - "[10:18:37] Linear model: C = 0.0005 score = -0.5152255270514708\n", - "[10:18:49] Linear model: C = 0.001 score = -0.4661388869197108\n", - "[10:19:04] Linear model: C = 0.005 score = -0.41149006525348847\n", - "[10:19:19] Linear model: C = 0.01 score = -0.4104284110463969\n", - "[10:19:35] Linear model: C = 0.05 score = -0.45961952123715527\n", - "[10:19:51] Linear model: C = 0.1 score = -0.5011072775518325\n", - "[10:19:51] ===== Start working with fold 3 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====\n", - "[10:20:09] Linear model: C = 1e-05 score = -1.1144246553258361\n", - "[10:20:25] Linear model: C = 5e-05 score = -0.8084470717185533\n", - "[10:20:35] Linear model: C = 0.0001 score = -0.7023525467007014\n", - "[10:20:49] Linear model: C = 0.0005 score = -0.5214177088196867\n", - "[10:21:01] Linear model: C = 0.001 score = -0.4720856273082093\n", - "[10:21:15] Linear model: C = 0.005 score = -0.4191401085852046\n", - "[10:21:32] Linear model: C = 0.01 score = -0.4208859924287323\n", - "[10:21:47] Linear model: C = 0.05 score = -0.4851827484977867\n", - "[10:21:48] ===== Start working with fold 4 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====\n", - "[10:22:04] Linear model: C = 1e-05 score = -1.0955685661573173\n", - "[10:22:17] Linear model: C = 5e-05 score = -0.7741691221828721\n", - "[10:22:26] Linear model: C = 0.0001 score = -0.6653917443236547\n", - "[10:22:42] Linear model: C = 0.0005 score = -0.48628652950179174\n", - "[10:22:53] Linear model: C = 0.001 score = -0.4381206233809863\n", - "[10:23:09] Linear model: C = 0.005 score = -0.38322203539072797\n", - "[10:23:24] Linear model: C = 0.01 score = -0.38175313554732276\n", - "[10:23:40] Linear model: C = 0.05 score = -0.42856031627342633\n", - "[10:23:55] Linear model: C = 0.1 score = -0.4689137362889697\n", - "[10:23:55] Fitting Lvl_0_Pipe_0_Mod_0_LinearL2 finished. score = -0.4073443684095255\n", - "[10:23:55] Lvl_0_Pipe_0_Mod_0_LinearL2 fitting and predicting completed\n", - "[10:23:55] Time left 17373.85 secs\n", + "[14:04:32] Layer \u001b[1m1\u001b[0m train process start. Time left 17999.83 secs\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 895/895 [07:29<00:00, 1.99it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[14:12:09] Feature path transformed\n", + "[14:12:16] Start fitting \u001b[1mLvl_0_Pipe_0_Mod_0_LinearL2\u001b[0m ...\n", + "[14:12:17] ===== Start working with \u001b[1mfold 0\u001b[0m for \u001b[1mLvl_0_Pipe_0_Mod_0_LinearL2\u001b[0m =====\n", + "[14:12:26] Linear model: C = 1e-05 score = -0.9995305866945853\n", + "[14:12:32] Linear model: C = 5e-05 score = -0.6879959560713191\n", + "[14:12:38] Linear model: C = 0.0001 score = -0.5802952177399445\n", + "[14:12:45] Linear model: C = 0.0005 score = -0.3907926611544111\n", + "[14:12:51] Linear model: C = 0.001 score = -0.33425017155675657\n", + "[14:13:00] Linear model: C = 0.005 score = -0.2559518217619532\n", + "[14:13:07] Linear model: C = 0.01 score = -0.24141776919439237\n", + "[14:13:15] Linear model: C = 0.05 score = -0.2431661172897411\n", + "[14:13:23] Linear model: C = 0.1 score = -0.25925367786528475\n", + "[14:13:24] ===== Start working with \u001b[1mfold 1\u001b[0m for \u001b[1mLvl_0_Pipe_0_Mod_0_LinearL2\u001b[0m =====\n", + "[14:13:32] Linear model: C = 1e-05 score = -0.9872444001968863\n", + "[14:13:39] Linear model: C = 5e-05 score = -0.6682540100549987\n", + "[14:13:45] Linear model: C = 0.0001 score = -0.5574685730009872\n", + "[14:13:51] Linear model: C = 0.0005 score = -0.3653461360638747\n", + "[14:13:58] Linear model: C = 0.001 score = -0.31059360297670363\n", + "[14:14:05] Linear model: C = 0.005 score = -0.2370436682635623\n", + "[14:14:14] Linear model: C = 0.01 score = -0.22495884629469698\n", + "[14:14:21] Linear model: C = 0.05 score = -0.23420873784566962\n", + "[14:14:29] Linear model: C = 0.1 score = -0.25263966927426823\n", + "[14:14:29] ===== Start working with \u001b[1mfold 2\u001b[0m for \u001b[1mLvl_0_Pipe_0_Mod_0_LinearL2\u001b[0m =====\n", + "[14:14:37] Linear model: C = 1e-05 score = -0.9554531133528031\n", + "[14:14:43] Linear model: C = 5e-05 score = -0.640784196156178\n", + "[14:14:49] Linear model: C = 0.0001 score = -0.5345024606190905\n", + "[14:14:57] Linear model: C = 0.0005 score = -0.3546726337461952\n", + "[14:15:04] Linear model: C = 0.001 score = -0.30344210801693483\n", + "[14:15:12] Linear model: C = 0.005 score = -0.2331574262775805\n", + "[14:15:19] Linear model: C = 0.01 score = -0.22071779776854528\n", + "[14:15:28] Linear model: C = 0.05 score = -0.22603075278344578\n", + "[14:15:36] Linear model: C = 0.1 score = -0.24138537694410292\n", + "[14:15:36] ===== Start working with \u001b[1mfold 3\u001b[0m for \u001b[1mLvl_0_Pipe_0_Mod_0_LinearL2\u001b[0m =====\n", + "[14:15:44] Linear model: C = 1e-05 score = -0.973115505822288\n", + "[14:15:51] Linear model: C = 5e-05 score = -0.6613476137718094\n", + "[14:15:56] Linear model: C = 0.0001 score = -0.5539538946164072\n", + "[14:16:04] Linear model: C = 0.0005 score = -0.3666276035478478\n", + "[14:16:10] Linear model: C = 0.001 score = -0.31130200709742806\n", + "[14:16:18] Linear model: C = 0.005 score = -0.2326339584928626\n", + "[14:16:25] Linear model: C = 0.01 score = -0.21658099282365262\n", + "[14:16:33] Linear model: C = 0.05 score = -0.21364841773406087\n", + "[14:16:42] Linear model: C = 0.1 score = -0.2256018292053085\n", + "[14:16:51] Linear model: C = 0.5 score = -0.2763179966937595\n", + "[14:16:51] ===== Start working with \u001b[1mfold 4\u001b[0m for \u001b[1mLvl_0_Pipe_0_Mod_0_LinearL2\u001b[0m =====\n", + "[14:16:58] Linear model: C = 1e-05 score = -0.9531496536787142\n", + "[14:17:05] Linear model: C = 5e-05 score = -0.6270339670737181\n", + "[14:17:10] Linear model: C = 0.0001 score = -0.517302736118502\n", + "[14:17:17] Linear model: C = 0.0005 score = -0.331531311465719\n", + "[14:17:23] Linear model: C = 0.001 score = -0.27798570249468424\n", + "[14:17:32] Linear model: C = 0.005 score = -0.20448637290477473\n", + "[14:17:39] Linear model: C = 0.01 score = -0.19081673660070902\n", + "[14:17:47] Linear model: C = 0.05 score = -0.1923892363102242\n", + "[14:17:56] Linear model: C = 0.1 score = -0.20661581389305533\n", + "[14:17:56] Fitting \u001b[1mLvl_0_Pipe_0_Mod_0_LinearL2\u001b[0m finished. score = \u001b[1m-0.21831477243925082\u001b[0m\n", + "[14:17:56] \u001b[1mLvl_0_Pipe_0_Mod_0_LinearL2\u001b[0m fitting and predicting completed\n", + "[14:17:56] Time left 17195.98 secs\n", "\n", - "[10:29:05] Start fitting Lvl_0_Pipe_1_Mod_0_CatBoost ...\n", - "[10:29:05] ===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_0_CatBoost =====\n", - "[10:29:06] 0:\tlearn: 2.2635128\ttest: 2.2654787\tbest: 2.2654787 (0)\ttotal: 8.13ms\tremaining: 32.5s\n", - "[10:29:28] bestTest = 0.2448323674\n", - "[10:29:28] bestIteration = 3999\n", - "[10:29:29] ===== Start working with fold 1 for Lvl_0_Pipe_1_Mod_0_CatBoost =====\n", - "[10:29:30] 0:\tlearn: 2.2645175\ttest: 2.2638240\tbest: 2.2638240 (0)\ttotal: 7.8ms\tremaining: 31.2s\n", - "[10:29:52] bestTest = 0.2655982428\n", - "[10:29:52] bestIteration = 3998\n", - "[10:29:52] Shrink model to first 3999 iterations.\n", - "[10:29:53] ===== Start working with fold 2 for Lvl_0_Pipe_1_Mod_0_CatBoost =====\n", - "[10:29:53] 0:\tlearn: 2.2638053\ttest: 2.2658397\tbest: 2.2658397 (0)\ttotal: 7.9ms\tremaining: 31.6s\n", - "[10:30:15] bestTest = 0.2736756787\n", - "[10:30:15] bestIteration = 3999\n", - "[10:30:16] ===== Start working with fold 3 for Lvl_0_Pipe_1_Mod_0_CatBoost =====\n", - "[10:30:17] 0:\tlearn: 2.2646526\ttest: 2.2635595\tbest: 2.2635595 (0)\ttotal: 7.37ms\tremaining: 29.5s\n", - "[10:30:38] bestTest = 0.2742944825\n", - "[10:30:38] bestIteration = 3998\n", - "[10:30:38] Shrink model to first 3999 iterations.\n", - "[10:30:39] ===== Start working with fold 4 for Lvl_0_Pipe_1_Mod_0_CatBoost =====\n", - "[10:30:40] 0:\tlearn: 2.2639121\ttest: 2.2648622\tbest: 2.2648622 (0)\ttotal: 7.03ms\tremaining: 28.1s\n", - "[10:31:01] bestTest = 0.2581136896\n", - "[10:31:01] bestIteration = 3998\n", - "[10:31:01] Shrink model to first 3999 iterations.\n", - "[10:31:02] Fitting Lvl_0_Pipe_1_Mod_0_CatBoost finished. score = -0.26330128259018876\n", - "[10:31:02] Lvl_0_Pipe_1_Mod_0_CatBoost fitting and predicting completed\n", - "[10:31:02] Time left 16947.02 secs\n", + "[14:22:15] Start fitting \u001b[1mLvl_0_Pipe_1_Mod_0_CatBoost\u001b[0m ...\n", + "[14:22:16] ===== Start working with \u001b[1mfold 0\u001b[0m for \u001b[1mLvl_0_Pipe_1_Mod_0_CatBoost\u001b[0m =====\n", + "[14:22:16] 0:\tlearn: 2.2636799\ttest: 2.2649649\tbest: 2.2649649 (0)\ttotal: 6.85ms\tremaining: 27.4s\n", + "[14:22:35] bestTest = 0.2436411292\n", + "[14:22:35] bestIteration = 3999\n", + "[14:22:35] ===== Start working with \u001b[1mfold 1\u001b[0m for \u001b[1mLvl_0_Pipe_1_Mod_0_CatBoost\u001b[0m =====\n", + "[14:22:36] 0:\tlearn: 2.2634692\ttest: 2.2632526\tbest: 2.2632526 (0)\ttotal: 6.16ms\tremaining: 24.6s\n", + "[14:22:55] bestTest = 0.2658199543\n", + "[14:22:55] bestIteration = 3999\n", + "[14:22:56] ===== Start working with \u001b[1mfold 2\u001b[0m for \u001b[1mLvl_0_Pipe_1_Mod_0_CatBoost\u001b[0m =====\n", + "[14:22:56] 0:\tlearn: 2.2631654\ttest: 2.2656298\tbest: 2.2656298 (0)\ttotal: 6.08ms\tremaining: 24.3s\n", + "[14:23:16] bestTest = 0.2753673319\n", + "[14:23:16] bestIteration = 3999\n", + "[14:23:16] ===== Start working with \u001b[1mfold 3\u001b[0m for \u001b[1mLvl_0_Pipe_1_Mod_0_CatBoost\u001b[0m =====\n", + "[14:23:17] 0:\tlearn: 2.2645696\ttest: 2.2657045\tbest: 2.2657045 (0)\ttotal: 6.76ms\tremaining: 27s\n", + "[14:23:37] bestTest = 0.2738943611\n", + "[14:23:37] bestIteration = 3996\n", + "[14:23:37] Shrink model to first 3997 iterations.\n", + "[14:23:37] ===== Start working with \u001b[1mfold 4\u001b[0m for \u001b[1mLvl_0_Pipe_1_Mod_0_CatBoost\u001b[0m =====\n", + "[14:23:38] 0:\tlearn: 2.2642805\ttest: 2.2644245\tbest: 2.2644245 (0)\ttotal: 5.84ms\tremaining: 23.4s\n", + "[14:23:57] bestTest = 0.2538460334\n", + "[14:23:57] bestIteration = 3999\n", + "[14:23:58] Fitting \u001b[1mLvl_0_Pipe_1_Mod_0_CatBoost\u001b[0m finished. score = \u001b[1m-0.2625123265864018\u001b[0m\n", + "[14:23:58] \u001b[1mLvl_0_Pipe_1_Mod_0_CatBoost\u001b[0m fitting and predicting completed\n", + "[14:23:58] Time left 16834.07 secs\n", "\n", - "[10:31:02] Layer 1 training completed.\n", + "[14:23:58] \u001b[1mLayer 1 training completed.\u001b[0m\n", "\n", - "[10:31:02] Blending: optimization starts with equal weights and score -0.2506653444869967\n", - "[10:31:03] Blending: iteration 0: score = -0.23574740438551683, weights = [0.21525846 0.7847415 ]\n", - "[10:31:03] Blending: iteration 1: score = -0.23574740438551683, weights = [0.21525846 0.7847415 ]\n", - "[10:31:03] Blending: no score update. Terminated\n", + "[14:23:58] Blending: optimization starts with equal weights and score \u001b[1m-0.1879588701291192\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/dvladimirvasilyev/anaconda3/envs/myenv/lib/python3.8/site-packages/sklearn/metrics/_classification.py:2916: UserWarning: The y_pred values do not sum to one. Starting from 1.5 thiswill result in an error.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[14:23:59] Blending: iteration \u001b[1m0\u001b[0m: score = \u001b[1m-0.18573794844833624\u001b[0m, weights = \u001b[1m[0.63928086 0.36071914]\u001b[0m\n", + "[14:23:59] Blending: iteration \u001b[1m1\u001b[0m: score = \u001b[1m-0.18573794844833624\u001b[0m, weights = \u001b[1m[0.63928086 0.36071914]\u001b[0m\n", + "[14:23:59] Blending: no score update. Terminated\n", "\n", - "[10:31:03] Automl preset training completed in 1054.20 seconds\n", + "[14:23:59] \u001b[1mAutoml preset training completed in 1167.35 seconds\u001b[0m\n", "\n", - "[10:31:03] Model description:\n", + "[14:23:59] Model description:\n", "Final prediction for new objects (level 0) = \n", - "\t 0.21526 * (5 averaged models Lvl_0_Pipe_0_Mod_0_LinearL2) +\n", - "\t 0.78474 * (5 averaged models Lvl_0_Pipe_1_Mod_0_CatBoost) \n", + "\t 0.63928 * (5 averaged models Lvl_0_Pipe_0_Mod_0_LinearL2) +\n", + "\t 0.36072 * (5 averaged models Lvl_0_Pipe_1_Mod_0_CatBoost) \n", "\n", - "CPU times: user 25min 28s, sys: 1min 41s, total: 27min 9s\n", - "Wall time: 17min 34s\n" + "CPU times: user 18min 40s, sys: 3min 1s, total: 21min 42s\n", + "Wall time: 19min 27s\n" ] } ], @@ -1494,7 +1582,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -1600,7 +1688,7 @@ "[114477 rows x 2 columns]" ] }, - "execution_count": 20, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1615,6 +1703,24 @@ "execution_count": 22, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_12895/1432655611.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " preds['pred_' + str(i)] = oof_pred.data[:,i]\n", + "/tmp/ipykernel_12895/1432655611.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " preds['pred_' + str(i)] = oof_pred.data[:,i]\n" + ] + }, { "data": { "text/html": [ @@ -1655,76 +1761,76 @@ " 0\n", " 100330.jpg\n", " bacterial_leaf_blight\n", - " 0.030123\n", - " 0.307897\n", - " 0.456500\n", - " 0.001812\n", - " 0.031111\n", - " 0.022188\n", - " 0.002502\n", - " 0.147808\n", - " 0.000054\n", - " 4.927851e-06\n", + " 0.023245\n", + " 0.315283\n", + " 0.470886\n", + " 0.002528\n", + " 0.021895\n", + " 0.007454\n", + " 0.001554\n", + " 0.157142\n", + " 8.914904e-06\n", + " 4.559626e-06\n", " \n", " \n", " 1\n", " 100365.jpg\n", " bacterial_leaf_blight\n", - " 0.002971\n", - " 0.121589\n", - " 0.027907\n", - " 0.000097\n", - " 0.014529\n", - " 0.002228\n", - " 0.000053\n", - " 0.830617\n", - " 0.000003\n", - " 5.009400e-06\n", + " 0.003717\n", + " 0.011035\n", + " 0.028317\n", + " 0.000110\n", + " 0.003178\n", + " 0.000015\n", + " 0.000131\n", + " 0.953496\n", + " 1.555987e-07\n", + " 5.692390e-07\n", " \n", " \n", " 2\n", " 100382.jpg\n", " bacterial_leaf_blight\n", - " 0.084655\n", - " 0.150933\n", - " 0.496689\n", - " 0.003890\n", - " 0.020583\n", - " 0.003894\n", - " 0.000731\n", - " 0.238610\n", - " 0.000010\n", - " 4.939010e-06\n", + " 0.025734\n", + " 0.095088\n", + " 0.208473\n", + " 0.000879\n", + " 0.007030\n", + " 0.003382\n", + " 0.000142\n", + " 0.659271\n", + " 3.872871e-07\n", + " 2.898941e-07\n", " \n", " \n", " 3\n", " 100632.jpg\n", " bacterial_leaf_blight\n", - " 0.008494\n", - " 0.691138\n", - " 0.045640\n", - " 0.000334\n", - " 0.090399\n", - " 0.000388\n", - " 0.000152\n", - " 0.163438\n", - " 0.000012\n", - " 5.071352e-06\n", + " 0.002876\n", + " 0.542942\n", + " 0.027466\n", + " 0.000317\n", + " 0.036005\n", + " 0.000398\n", + " 0.000082\n", + " 0.389901\n", + " 3.837710e-06\n", + " 9.339438e-06\n", " \n", " \n", " 4\n", " 101918.jpg\n", " bacterial_leaf_blight\n", - " 0.025846\n", - " 0.215757\n", - " 0.033691\n", - " 0.000158\n", - " 0.034468\n", - " 0.000259\n", - " 0.000074\n", - " 0.689742\n", - " 0.000001\n", - " 1.504601e-06\n", + " 0.009988\n", + " 0.033572\n", + " 0.017635\n", + " 0.000032\n", + " 0.008310\n", + " 0.000136\n", + " 0.000041\n", + " 0.930286\n", + " 1.554736e-07\n", + " 1.530466e-07\n", " \n", " \n", " ...\n", @@ -1745,76 +1851,76 @@ " 114472\n", " 110381.jpg\n", " tungro\n", - " 0.011443\n", - " 0.210518\n", - " 0.011583\n", - " 0.005677\n", - " 0.737980\n", - " 0.000237\n", - " 0.020141\n", - " 0.002203\n", - " 0.000218\n", - " 3.771045e-07\n", + " 0.001716\n", + " 0.109143\n", + " 0.020722\n", + " 0.001495\n", + " 0.845324\n", + " 0.000177\n", + " 0.021384\n", + " 0.000027\n", + " 6.304998e-06\n", + " 6.075803e-06\n", " \n", " \n", " 114473\n", " 110381.jpg\n", " tungro\n", - " 0.010602\n", - " 0.037910\n", - " 0.013473\n", - " 0.005240\n", - " 0.911409\n", - " 0.000039\n", - " 0.021000\n", - " 0.000297\n", - " 0.000029\n", - " 7.560920e-07\n", + " 0.022644\n", + " 0.137650\n", + " 0.026389\n", + " 0.004165\n", + " 0.788036\n", + " 0.001093\n", + " 0.019688\n", + " 0.000259\n", + " 3.142513e-05\n", + " 4.477663e-05\n", " \n", " \n", " 114474\n", " 110381.jpg\n", " tungro\n", - " 0.039948\n", - " 0.047678\n", - " 0.024860\n", - " 0.010115\n", - " 0.843555\n", - " 0.000212\n", - " 0.032901\n", - " 0.000302\n", - " 0.000428\n", - " 5.207394e-07\n", + " 0.016897\n", + " 0.072329\n", + " 0.010469\n", + " 0.005554\n", + " 0.789777\n", + " 0.001240\n", + " 0.103631\n", + " 0.000060\n", + " 1.301366e-05\n", + " 2.972130e-05\n", " \n", " \n", " 114475\n", " 110381.jpg\n", " tungro\n", - " 0.011752\n", - " 0.048414\n", - " 0.004618\n", - " 0.003911\n", - " 0.907945\n", - " 0.000063\n", - " 0.022485\n", - " 0.000527\n", - " 0.000281\n", - " 2.993103e-06\n", + " 0.008637\n", + " 0.114299\n", + " 0.082281\n", + " 0.003465\n", + " 0.560001\n", + " 0.000741\n", + " 0.230260\n", + " 0.000112\n", + " 1.909918e-04\n", + " 1.351225e-05\n", " \n", " \n", " 114476\n", " 110381.jpg\n", " tungro\n", - " 0.011532\n", - " 0.135282\n", - " 0.031151\n", - " 0.007868\n", - " 0.753411\n", - " 0.000106\n", - " 0.059282\n", - " 0.001330\n", - " 0.000034\n", - " 4.079704e-06\n", + " 0.004179\n", + " 0.099988\n", + " 0.008320\n", + " 0.004660\n", + " 0.822037\n", + " 0.000663\n", + " 0.059627\n", + " 0.000318\n", + " 1.922170e-04\n", + " 1.441010e-05\n", " \n", " \n", "\n", @@ -1823,43 +1929,43 @@ ], "text/plain": [ " image_id label pred_0 pred_1 pred_2 \\\n", - "0 100330.jpg bacterial_leaf_blight 0.030123 0.307897 0.456500 \n", - "1 100365.jpg bacterial_leaf_blight 0.002971 0.121589 0.027907 \n", - "2 100382.jpg bacterial_leaf_blight 0.084655 0.150933 0.496689 \n", - "3 100632.jpg bacterial_leaf_blight 0.008494 0.691138 0.045640 \n", - "4 101918.jpg bacterial_leaf_blight 0.025846 0.215757 0.033691 \n", + "0 100330.jpg bacterial_leaf_blight 0.023245 0.315283 0.470886 \n", + "1 100365.jpg bacterial_leaf_blight 0.003717 0.011035 0.028317 \n", + "2 100382.jpg bacterial_leaf_blight 0.025734 0.095088 0.208473 \n", + "3 100632.jpg bacterial_leaf_blight 0.002876 0.542942 0.027466 \n", + "4 101918.jpg bacterial_leaf_blight 0.009988 0.033572 0.017635 \n", "... ... ... ... ... ... \n", - "114472 110381.jpg tungro 0.011443 0.210518 0.011583 \n", - "114473 110381.jpg tungro 0.010602 0.037910 0.013473 \n", - "114474 110381.jpg tungro 0.039948 0.047678 0.024860 \n", - "114475 110381.jpg tungro 0.011752 0.048414 0.004618 \n", - "114476 110381.jpg tungro 0.011532 0.135282 0.031151 \n", + "114472 110381.jpg tungro 0.001716 0.109143 0.020722 \n", + "114473 110381.jpg tungro 0.022644 0.137650 0.026389 \n", + "114474 110381.jpg tungro 0.016897 0.072329 0.010469 \n", + "114475 110381.jpg tungro 0.008637 0.114299 0.082281 \n", + "114476 110381.jpg tungro 0.004179 0.099988 0.008320 \n", "\n", - " pred_3 pred_4 pred_5 pred_6 pred_7 pred_8 \\\n", - "0 0.001812 0.031111 0.022188 0.002502 0.147808 0.000054 \n", - "1 0.000097 0.014529 0.002228 0.000053 0.830617 0.000003 \n", - "2 0.003890 0.020583 0.003894 0.000731 0.238610 0.000010 \n", - "3 0.000334 0.090399 0.000388 0.000152 0.163438 0.000012 \n", - "4 0.000158 0.034468 0.000259 0.000074 0.689742 0.000001 \n", - "... ... ... ... ... ... ... \n", - "114472 0.005677 0.737980 0.000237 0.020141 0.002203 0.000218 \n", - "114473 0.005240 0.911409 0.000039 0.021000 0.000297 0.000029 \n", - "114474 0.010115 0.843555 0.000212 0.032901 0.000302 0.000428 \n", - "114475 0.003911 0.907945 0.000063 0.022485 0.000527 0.000281 \n", - "114476 0.007868 0.753411 0.000106 0.059282 0.001330 0.000034 \n", + " pred_3 pred_4 pred_5 pred_6 pred_7 pred_8 \\\n", + "0 0.002528 0.021895 0.007454 0.001554 0.157142 8.914904e-06 \n", + "1 0.000110 0.003178 0.000015 0.000131 0.953496 1.555987e-07 \n", + "2 0.000879 0.007030 0.003382 0.000142 0.659271 3.872871e-07 \n", + "3 0.000317 0.036005 0.000398 0.000082 0.389901 3.837710e-06 \n", + "4 0.000032 0.008310 0.000136 0.000041 0.930286 1.554736e-07 \n", + "... ... ... ... ... ... ... \n", + "114472 0.001495 0.845324 0.000177 0.021384 0.000027 6.304998e-06 \n", + "114473 0.004165 0.788036 0.001093 0.019688 0.000259 3.142513e-05 \n", + "114474 0.005554 0.789777 0.001240 0.103631 0.000060 1.301366e-05 \n", + "114475 0.003465 0.560001 0.000741 0.230260 0.000112 1.909918e-04 \n", + "114476 0.004660 0.822037 0.000663 0.059627 0.000318 1.922170e-04 \n", "\n", " pred_9 \n", - "0 4.927851e-06 \n", - "1 5.009400e-06 \n", - "2 4.939010e-06 \n", - "3 5.071352e-06 \n", - "4 1.504601e-06 \n", + "0 4.559626e-06 \n", + "1 5.692390e-07 \n", + "2 2.898941e-07 \n", + "3 9.339438e-06 \n", + "4 1.530466e-07 \n", "... ... \n", - "114472 3.771045e-07 \n", - "114473 7.560920e-07 \n", - "114474 5.207394e-07 \n", - "114475 2.993103e-06 \n", - "114476 4.079704e-06 \n", + "114472 6.075803e-06 \n", + "114473 4.477663e-05 \n", + "114474 2.972130e-05 \n", + "114475 1.351225e-05 \n", + "114476 1.441010e-05 \n", "\n", "[114477 rows x 12 columns]" ] @@ -1928,76 +2034,76 @@ " 0\n", " 100001.jpg\n", " brown_spot\n", - " 0.003971\n", - " 0.001296\n", - " 0.003271\n", - " 1.106504e-02\n", - " 0.007749\n", - " 0.966667\n", - " 0.005006\n", - " 0.000583\n", - " 0.000050\n", - " 3.405732e-04\n", + " 0.001334\n", + " 0.000791\n", + " 0.002372\n", + " 5.432664e-03\n", + " 0.005328\n", + " 0.978495\n", + " 0.002519\n", + " 0.003511\n", + " 7.897679e-05\n", + " 1.378119e-04\n", " \n", " \n", " 1\n", " 100002.jpg\n", " normal\n", - " 0.898224\n", - " 0.038288\n", - " 0.016787\n", - " 2.553555e-02\n", - " 0.011513\n", - " 0.003794\n", - " 0.001537\n", - " 0.003510\n", - " 0.000794\n", - " 1.736730e-05\n", + " 0.978428\n", + " 0.011744\n", + " 0.001621\n", + " 3.187062e-03\n", + " 0.002579\n", + " 0.000282\n", + " 0.000156\n", + " 0.001969\n", + " 3.391063e-05\n", + " 1.971700e-07\n", " \n", " \n", " 2\n", " 100003.jpg\n", " hispa\n", - " 0.024842\n", - " 0.001781\n", - " 0.971629\n", - " 9.731490e-08\n", - " 0.000082\n", - " 0.000143\n", - " 0.000401\n", - " 0.001095\n", - " 0.000008\n", - " 1.762646e-05\n", + " 0.004639\n", + " 0.002192\n", + " 0.992883\n", + " 1.573081e-07\n", + " 0.000026\n", + " 0.000037\n", + " 0.000005\n", + " 0.000218\n", + " 1.920397e-07\n", + " 1.528186e-07\n", " \n", " \n", " 3\n", " 100004.jpg\n", " blast\n", - " 0.000396\n", - " 0.976271\n", - " 0.003184\n", - " 1.529731e-02\n", - " 0.002230\n", - " 0.002466\n", - " 0.000006\n", - " 0.000060\n", - " 0.000089\n", - " 5.999530e-07\n", + " 0.000259\n", + " 0.982406\n", + " 0.004401\n", + " 7.787708e-03\n", + " 0.002372\n", + " 0.002163\n", + " 0.000173\n", + " 0.000115\n", + " 3.223106e-04\n", + " 4.848040e-07\n", " \n", " \n", " 4\n", " 100005.jpg\n", " hispa\n", - " 0.040845\n", - " 0.017258\n", - " 0.914247\n", - " 6.972055e-05\n", - " 0.011210\n", - " 0.012487\n", - " 0.001175\n", - " 0.000358\n", - " 0.002349\n", - " 2.259109e-06\n", + " 0.010951\n", + " 0.047475\n", + " 0.829855\n", + " 1.200308e-05\n", + " 0.091933\n", + " 0.000418\n", + " 0.018967\n", + " 0.000370\n", + " 1.118553e-05\n", + " 8.759866e-06\n", " \n", " \n", " ...\n", @@ -2018,76 +2124,76 @@ " 10402\n", " 110403.jpg\n", " tungro\n", - " 0.004226\n", - " 0.011191\n", - " 0.024608\n", - " 7.129314e-03\n", - " 0.939744\n", - " 0.002247\n", - " 0.006578\n", - " 0.004179\n", - " 0.000066\n", - " 3.340825e-05\n", + " 0.001664\n", + " 0.002167\n", + " 0.007366\n", + " 4.507852e-03\n", + " 0.981122\n", + " 0.000052\n", + " 0.001666\n", + " 0.001455\n", + " 1.527430e-07\n", + " 3.928369e-07\n", " \n", " \n", " 10403\n", " 110404.jpg\n", " normal\n", - " 0.894465\n", - " 0.001002\n", - " 0.083502\n", - " 1.798824e-05\n", - " 0.007053\n", - " 0.000746\n", - " 0.012884\n", - " 0.000325\n", - " 0.000003\n", - " 4.647352e-07\n", + " 0.932484\n", + " 0.002359\n", + " 0.049850\n", + " 1.244102e-05\n", + " 0.011696\n", + " 0.000593\n", + " 0.002646\n", + " 0.000304\n", + " 4.828784e-05\n", + " 7.773816e-06\n", " \n", " \n", " 10404\n", " 110405.jpg\n", " dead_heart\n", - " 0.000375\n", - " 0.000232\n", - " 0.000546\n", - " 9.984713e-01\n", - " 0.000006\n", - " 0.000077\n", - " 0.000213\n", - " 0.000016\n", - " 0.000009\n", - " 5.422648e-05\n", + " 0.000192\n", + " 0.000044\n", + " 0.000152\n", + " 9.994839e-01\n", + " 0.000001\n", + " 0.000025\n", + " 0.000058\n", + " 0.000003\n", + " 1.957294e-06\n", + " 3.789358e-05\n", " \n", " \n", " 10405\n", " 110406.jpg\n", " blast\n", - " 0.000328\n", - " 0.957169\n", - " 0.000155\n", - " 2.832647e-02\n", - " 0.002730\n", - " 0.002962\n", - " 0.000273\n", - " 0.003520\n", - " 0.000160\n", - " 4.376236e-03\n", + " 0.000226\n", + " 0.977683\n", + " 0.000268\n", + " 9.254745e-03\n", + " 0.004962\n", + " 0.000595\n", + " 0.004523\n", + " 0.001717\n", + " 5.624577e-04\n", + " 2.080105e-04\n", " \n", " \n", " 10406\n", " 110407.jpg\n", " brown_spot\n", - " 0.000017\n", - " 0.000416\n", - " 0.001281\n", - " 6.053330e-04\n", - " 0.000525\n", - " 0.996624\n", - " 0.000190\n", - " 0.000081\n", - " 0.000190\n", - " 7.008029e-05\n", + " 0.000009\n", + " 0.000188\n", + " 0.000539\n", + " 4.357956e-04\n", + " 0.000232\n", + " 0.997215\n", + " 0.000039\n", + " 0.000010\n", + " 1.319862e-03\n", + " 1.372061e-05\n", " \n", " \n", "\n", @@ -2096,30 +2202,30 @@ ], "text/plain": [ " image_id label pred_0 pred_1 pred_2 pred_3 \\\n", - "0 100001.jpg brown_spot 0.003971 0.001296 0.003271 1.106504e-02 \n", - "1 100002.jpg normal 0.898224 0.038288 0.016787 2.553555e-02 \n", - "2 100003.jpg hispa 0.024842 0.001781 0.971629 9.731490e-08 \n", - "3 100004.jpg blast 0.000396 0.976271 0.003184 1.529731e-02 \n", - "4 100005.jpg hispa 0.040845 0.017258 0.914247 6.972055e-05 \n", + "0 100001.jpg brown_spot 0.001334 0.000791 0.002372 5.432664e-03 \n", + "1 100002.jpg normal 0.978428 0.011744 0.001621 3.187062e-03 \n", + "2 100003.jpg hispa 0.004639 0.002192 0.992883 1.573081e-07 \n", + "3 100004.jpg blast 0.000259 0.982406 0.004401 7.787708e-03 \n", + "4 100005.jpg hispa 0.010951 0.047475 0.829855 1.200308e-05 \n", "... ... ... ... ... ... ... \n", - "10402 110403.jpg tungro 0.004226 0.011191 0.024608 7.129314e-03 \n", - "10403 110404.jpg normal 0.894465 0.001002 0.083502 1.798824e-05 \n", - "10404 110405.jpg dead_heart 0.000375 0.000232 0.000546 9.984713e-01 \n", - "10405 110406.jpg blast 0.000328 0.957169 0.000155 2.832647e-02 \n", - "10406 110407.jpg brown_spot 0.000017 0.000416 0.001281 6.053330e-04 \n", + "10402 110403.jpg tungro 0.001664 0.002167 0.007366 4.507852e-03 \n", + "10403 110404.jpg normal 0.932484 0.002359 0.049850 1.244102e-05 \n", + "10404 110405.jpg dead_heart 0.000192 0.000044 0.000152 9.994839e-01 \n", + "10405 110406.jpg blast 0.000226 0.977683 0.000268 9.254745e-03 \n", + "10406 110407.jpg brown_spot 0.000009 0.000188 0.000539 4.357956e-04 \n", "\n", - " pred_4 pred_5 pred_6 pred_7 pred_8 pred_9 \n", - "0 0.007749 0.966667 0.005006 0.000583 0.000050 3.405732e-04 \n", - "1 0.011513 0.003794 0.001537 0.003510 0.000794 1.736730e-05 \n", - "2 0.000082 0.000143 0.000401 0.001095 0.000008 1.762646e-05 \n", - "3 0.002230 0.002466 0.000006 0.000060 0.000089 5.999530e-07 \n", - "4 0.011210 0.012487 0.001175 0.000358 0.002349 2.259109e-06 \n", - "... ... ... ... ... ... ... \n", - "10402 0.939744 0.002247 0.006578 0.004179 0.000066 3.340825e-05 \n", - "10403 0.007053 0.000746 0.012884 0.000325 0.000003 4.647352e-07 \n", - "10404 0.000006 0.000077 0.000213 0.000016 0.000009 5.422648e-05 \n", - "10405 0.002730 0.002962 0.000273 0.003520 0.000160 4.376236e-03 \n", - "10406 0.000525 0.996624 0.000190 0.000081 0.000190 7.008029e-05 \n", + " pred_4 pred_5 pred_6 pred_7 pred_8 pred_9 \n", + "0 0.005328 0.978495 0.002519 0.003511 7.897679e-05 1.378119e-04 \n", + "1 0.002579 0.000282 0.000156 0.001969 3.391063e-05 1.971700e-07 \n", + "2 0.000026 0.000037 0.000005 0.000218 1.920397e-07 1.528186e-07 \n", + "3 0.002372 0.002163 0.000173 0.000115 3.223106e-04 4.848040e-07 \n", + "4 0.091933 0.000418 0.018967 0.000370 1.118553e-05 8.759866e-06 \n", + "... ... ... ... ... ... ... \n", + "10402 0.981122 0.000052 0.001666 0.001455 1.527430e-07 3.928369e-07 \n", + "10403 0.011696 0.000593 0.002646 0.000304 4.828784e-05 7.773816e-06 \n", + "10404 0.000001 0.000025 0.000058 0.000003 1.957294e-06 3.789358e-05 \n", + "10405 0.004962 0.000595 0.004523 0.001717 5.624577e-04 2.080105e-04 \n", + "10406 0.000232 0.997215 0.000039 0.000010 1.319862e-03 1.372061e-05 \n", "\n", "[10407 rows x 12 columns]" ] @@ -2178,7 +2284,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Out-of-fold accuracy: 0.9436917459402325\n" + "Out-of-fold accuracy: 0.9686749303353512\n" ] } ], @@ -2201,14 +2307,12 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAq8AAALzCAYAAAA/Le4RAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAACq5ElEQVR4nOzdeZyNdf/H8ddnBllH1pEoxpIoKpQW+5JEdm13pZRWKlS2X2lf7u677vYkaZEQoU2KRJZsFUpFpSJb9t3Mme/vj+syjTErM3OdS+9nj/Nwzvfa3ufqnDPf87m+13XMOYeIiIiISBjEBB1ARERERCS71HkVERERkdBQ51VEREREQkOdVxEREREJDXVeRURERCQ01HkVERERkdBQ51VE8pSZDTOzt4LOkRfMrLOZ/WFmu8zszKNYz3dm1iz3kuU/M2tsZj/m8TZ2mVlCJtNXm1mrbK6rp5l9mc15j/g1fCy//kWCos6riABgZheY2Vwz225mW8xsjpk1DDrX0TKzE8zsVTNbZ2Y7zewHM7vfzIrlwuqfBG5zzhV3zn19pCtxztVxzs3MhTyHMLOZZubMrF6a9vf89mbZXI8zs+qZzeOcm+2cO+XI02bN38+/+JlGmdlDebk9EYlO6ryKCGYWB3wAPAuUBk4E7gf2B5krLTOLzeH8pYF5QBHgXOdcCaA1cDxQLRcinQx8lwvryUs/AVcffGBmZYBzgU25tQEzK5Bb6xIRyYo6ryICUBPAOTfGORdxzu11zk1zzi09OIOZXWdmK8xsq5l9YmYnp5r2P//w+Q4zW2xmjdOsv7CZjfUrn0tSVwLN7FS/QrjNP3x+Sappo8zsRTP7yMx2A839Q8MDzGypXyUea2aFM3he/YCdwL+cc6v95/iHc+72g8/NzM4zs4X+uhaa2Xmptj/TzB70q9A7zWyamZU1s+PMbBcQC3xrZj/78x9SoUxdHfSX+8B/nlvMbLaZxfjTUg53++t+2sz+9G9Pm9lx/rRmZrbGzPqb2Ua/mnxtFv9vRwOXpur4Xw68BxxIlfNsM5vnZ1tnZs+ZWSF/2ix/tm/9w/aXpspxj5mtB1472OYvU81/jmf5jyua2ab0Kr1mdq2ZvZ/q8UozG5/q8R9mdkbq/WtmvYErgbv9TO+nWuUZ2XxtpM1xNK/himY2wX+Ov5pZ3wy2UdjM3jKzzf6+Xmhm8dnJJyJ/U+dVRMCrzkXM7HUzu8jMSqWeaGYdgcFAF6AcMBsYk2qWhcAZeFXbt4HxaToNHYHxqaZPMrOCZlYQeB+YBpQH+gCjzSz14ecrgIeBEsDBMYo9gLZAVaAu0DOD59UKmOicS05vonmV2Q+BZ4AywH+BD82rTqbe/rV+vkLAAOfcfudccX96Pedcdqq4/YE1ePsvHm9/pvf73EOARnj7sx5wNjA01fQKQEm86ngv4Pm0/7/S+BP4HmjjP74aeCPNPBHgTqAsXlW2JXALgHOuiT9PPf+w/dhUOUrjVZ97p16Zc+5n4B7gLTMrCrwGvJ7B0IgvgMZmFmNmFfH28bkA5o1vLQ4sTb2Ac244Xqf8CT9Th1STs/vaSOtIX8MxeK/hb/H+n7QE7jCzC9PZxjV4/+8q473ebgL2ZjOfiPjUeRURnHM7gAvwOlOvAJvMbEqqqtBNwKPOuRXOuSTgEbwK18n+8m855zY755Kcc/8BjgNSd0AXO+fedc4l4nUQC+N10BrhdU4ec84dcM7NwBu+cHmqZSc75+Y455Kdc/v8tmecc38657bgdRzOyOCplQHWZfLULwZWOufe9LOPAX4AUneGXnPO/eSc2wuMy2RbWUkETgBOds4l+mNE0+u8Xgk84Jzb6JzbhDd846o063nAX8dHwC4O3dfpeQO42sxqAcc75+alnuicW+ycm+/vg9XAy0DTLNaZDNznd+QP64A5514BVgFf+c97SHor8cew7sTbr02AT4A//axNgdkZffnIQHZfG2lzHOlruCFQzjn3gP8a/gXvPXRZOptJxHtNVvePcCz233sikgPqvIoIAH7HtKdzrhJwGlAReNqffDLwP/9Q5zZgC2B4lSbMO4y/wj9Uuw2vulQ21er/SLWdZLwKZEX/9keazslvB9ebdtlU1qe6vwevA5yezXgdp4xU9LeXWtrtZ3dbWfk3Xmdumpn9YmYDs5npN7/toM3+F4icZJoItABuA95MO9HMavpDGtab2Q68Lydl086XxqZUXyYy8grea+lZ51xm46e/AJrhdV6/AGbidVyb+o9z4oj+fx3Fa/hkoOLB94a/7GC86npab+J1zt/xh4Q84R99EJEcUOdVRA7jnPsBGIXX8QDvD/eNzrnjU92KOOfm+mMD78Y7XFvKOXc8sB2vc3tQ5YN3/MOslfAOZ/8JVD449tN3ErA2dZyjeCqfAZ3TrD+1P/E6H6ml3X5O7AGKpnpc4eAd59xO51x/51wCcAnQz8xaZiPTSX7bEXPO7QE+Bm4mnc4r8CJexbmGcy4Or/Nl6cx3yGozm2hmxfG+/LwKDPOHaGTkYOe1sX//C7LuvB7N6yJt1qN5Df8B/JrmvVHCOdfusMBetfx+51xt4DygPalOphOR7FHnVUQws1r+SUCV/MeV8Q7dz/dneQkYZGZ1/Oklzay7P60EkIR39noBM7sXiEuzifpm1sW8s9LvwLuKwXy8Q8p78E68Keif0NMBeCeXntp//SyvHxziYGYnmtl/zawu8BFQ08yuMLMCZnYpUBtv6MKR+Aa4wsxizawtqQ69m1l7/2Qjw+sYRfAOvac1BhhqZuXMrCxwL5Ab1wkdDDQ9eOJaGiWAHcAu/3D9zWmmbwAyvL5qBv4HLHLOXY83rvilTOb9AmgOFHHOrcEbU90W7xB7RpcgO5JMGTma1/ACYKd5J68V8f/fn2bpXGbOzJqb2enmnTy3A28YQU6GRIgI6ryKiGcncA7wlXln9c8HluOdZIRz7j3gcbzDnTv8aRf5y34CTMU76es3YB+HH+qfDFwKbMUbv9nFr0IdwOusXgT8BbwAXO1Xfo+aP+7xPLxOwldmthOYjtd5XOWc24xX/eqPN8TgbqC9c+6vI9zk7XjPZxve2NVJqabVwKsE78K7fNcLzrnP01nHQ8AivJOUlgFL/Laj4o8Dzeii/APwTkzbiXeof2ya6cPwvgBsM7MeWW3LP8GvLX93gvsBZ5nZlRlk+wlvv8z2H+8AfgHmOOciGWzmVaC2n2lSVpmycDSv4Qjea+gM4Fe81/EIvGEHaVUA3sXruK7A67SnVwkXkUxY+ucLiIiIiIhEH1VeRURERCQ01HkVERERkdBQ51VEREREQkOdVxEREREJDXVeRURERCQ01HkVERERkdBQ51VEREREQkOdVxEREREJDXVeRURERCQ01HkVERERkdBQ51VEREREQkOdVxEREREJDXVeRURERCQ01HkVERERkdBQ51VEREREQkOdVxEREREJDXVeRURERCQ01HkVERERkdBQ51VEREREQkOdVxEREREJDXVeRURERCQ01HkVERERkdBQ51VEREREQkOdVxEREREJDXVeRURERCQ01HkVERERkdBQ51VEREREQkOdVxEREREJDXVeRURERCQ01HkVERERkdBQ51VEREREQkOdVxEREREJDXVeRURERCQ01HkVERERkdBQ51VEREREQqNA0AHk2FHkzNtc0BlyYvOCZ4OOkGPJyUEnyJkCsRZ0hBxzoXoVh5OF72Uhkq7CBcjXV3N+/p3d+/VzUftOVeVVREREREJDnVcRERERCQ0NGxAREREJA1PNEVR5FREREZEQUeVVREREJAx0tiOgyquIiIiIhIgqryIiIiJhoDGvgCqvIiIiIhIiqryKiIiIhIHGvAKqvIqIiIhIiKjyKiIiIhIGGvMKqPIqIiIiIiGiyquIiIhIGGjMK6DKq4iIiIiEiCqvIiIiImGgMa+AKq8iIiIiEiKqvIqIiIiEgca8Aqq8ioiIiEiIqPMqIiIiIqGhYQMiIiIiYaATtgBVXkVEREQkh8xspJltNLPladr7mNkPZvadmT2Rqn2Qma0ysx/N7MJU7W39tlVmNjA721bnVbLFzFabWdncWt9L913Jb9MfZdH4wYe033xZU76ZOJTF7w7h4ds7AnDZRQ2Y/87AlNvuxc9Qt+aJAPRoW5+F4wazYOwgJj93C2WOL5ZbEbNt2NDBtGhyHt06dTikfczoN+nc4SK6dmzP0//5d77nysj+/fu5+oruXNatI907t+el5585ZPoTjz3EBeecFVC67NmxYwf97+hLx/Zt6dThIr795uugIx3ivqGDaN7kXLp2ap/SNu2Tj+nS8WLOPL0W3y1fFmC69KWXefv2bdx4/bV0aNeGG6+/lh3btweYMGuRSIQeXTtx2y03Bh0lW+bMnsUlF19I+7atefWV4UHHyZYwZr6odQu6dupAjy4dubxHl6DjHB2z/LtlbRTQ9tB41hzoCNRzztUBnvTbawOXAXX8ZV4ws1gziwWeBy4CagOX+/NmSp3XfwAzi7rhIW++P5+Otz5/SFuTBjVo3+x0zr70Mep3e5in35gOwDsfL6LRZY/R6LLH6DX0DVav3czSn9YSGxvDv+/qRtve/+PsSx9l+cq13HRp03x/Lh06deb5l145pG3hgvnM/HwGYydMZsLkD7i653X5nisjhQoV4qURo3jn3cm8Pe495s75kmXffgPA998tY+eOHcEGzIYnHn2Y8y9ozOQPpjJ+wmSqJlQLOtIhLunUhRdeGnFIW/XqNfnv089yVv2GAaXKXHqZR44YzjmNzuX9j6ZxTqNzGflqdHdWRr/5BglR9lrISCQS4ZGHH+CFl0bw3pQPmfrRB/y8alXQsTIVxswHjXjtdcZNnMyYcRODjnLMcM7NArakab4ZeMw5t9+fZ6Pf3hF4xzm33zn3K7AKONu/rXLO/eKcOwC848+bKXVeQ8LMqpjZCjN7xS/FTzOzImZ2hpnNN7OlZvaemZXy559pZk+b2SLgdv/xU2a2yF9PQzObaGYrzeyhVNuZZGaL/W30zqvnM2fJz2zZvueQtt7dG/Pka59yIDEJgE1bdx22XI+29Rn/yRI/q3crVqQQACWKF2HdpvyvDNVv0JCSJUse0jZ+7Dtc2+sGChXyspUuUybfc2XEzCha1KtQJyUlkZSUBGZEIhGe/u+/6XvngIATZm7nzp0sXryQzl27AVCwUCHi4uICTnWo+g0aEpfmNZFQrRpVqiYElChr6WWe+fl0OnTsBECHjp34fMZnASTLng3r1zN71syU10W0W75sKZUrn0ylypUpWKgQbdtdzMzPpwcdK1NhzHzMsZj8ux2ZmkBjM/vKzL4ws4Pf1k8E/kg13xq/LaP2TKnzGi41gOf9Uvw2oCvwBnCPc64usAy4L9X8hZxzDZxz//EfH3DONQBeAiYDtwKnAT3N7GDv6jrnXH2gAdA3VXueq35yec4/sxqz3hjAtBG3U7/2SYfN063NWYybugiApKRkbn9kLAvHDeaXaQ9zakIFRk2am19xM/Xb6tV8vXgRV13eg149/8V3y6LrMHEkEuHy7p1o3ex8Gp17HqfXrcfYMaNp2qwF5cqVDzpeptauWUOpUqW5d8ggenTtxLB7h7Bnz56sF5Qc27x5c8rroWzZcmzevDngRBl74rFHuLP/XcTEhOPP2sYNG6hwQoWUx+Xj49mwYUOAibIWxswAGNx0Qy8u696Fd8eNDTpNaJhZb7/gdfCWnYJWAaA00Ai4CxhnlvsXpw3Hu1wO+tU5941/fzFQDTjeOfeF3/Y60CTV/GnfpVP8f5cB3znn1vml/V+Ayv60vmb2LTDfb6uRu08hYwViYyhdshhNrn6SwU9N4q0nDj3U3vC0k9mzL5Hvf17nzV8ghhu6NabR5Y+T0GYIy39ay13XtcmvuJmKRCJs37GdN94ey5397+buAXfgnAs6VorY2FjGjJ/Ex5/OZPnypSxZtJDPPp3KpZf/K+hoWYpEkvhhxfd0v+xyxk2YRJEiRRg5IroPZx8LzIw8+BuUK76Y+TmlS5emdp3Tgo4iUWjUm2MY++57PP/SK4wdM5rFixYGHenI5eOYV+fccL8AdvCWnQ/aNcBE51kAJANlgbX83c8AqOS3ZdSeKXVew2V/qvsR4Pgs5t+dwfLJadaVDBQws2ZAK+Bc51w94GugcGYbSP3NLOmv77KIk7m1G7Yxafo3ACz67jeSkx1lSxVPmd79wvopVVeAejUrAfDrmr8AePfTJTSqFx2HZePj42nZqjVmxmmn1yXGYti6dWvQsQ5TIi6OBg3PYdHCr1jz++90at+G9m1bsG/fXjpeHB1fBNKKj69AfHwF6tatB0DrNm35YcX3Aac6NpUpU4ZNm7wha5s2baR06dIBJ0rfN18vYebMGVzUugX3DOjHwq/mM+ie6B7+Uj4+nvXr1qc83rhhA/Hx8QEmyloYMwMpGcuUKUOLVq1ZvmxpwImOaZOA5gBmVhMoBPyFVzy7zMyOM7OqeIWxBcBCoIaZVTWzQngndU1Jb8WpqfMabtuBrWbW2H98FfBFJvNnpSSw1Tm3x8xq4ZX9M5X6m1mBsnWOYtPw/sylNG1YE4DqJ5WnUMEC/OWPezUzurY5i/GfLE6Z/89N26mVUCGlg9uyUS1+/HX94SsOQLMWrVi4YAEAv63+lcTEREqVKhVwKs/WLVtSTsrat28fX82bS63adZj2+Zd8MHUGH0ydQeHCRZj84bSAk6avbLlyxFeowOpffwHgq/nzSKgWjpN0wqZpsxa8P3kSAO9PnkSz5i2DDZSB2+/sz6czZvHxpzN4/Mn/0vCcRjz6+JNBx8pUndNO5/ffV7NmzR8kHjjA1I8+pGnzFkHHylQYM+/Zs4fdu3el3J83dw7Vq+fbAcXcF0VjXs1sDDAPOMXM1phZL2AkkOBfPusd4Bq/CvsdMA74HpgK3OqcizjnkoDbgE+AFcA4f95MRd1Z6JJj1wAvmVlRvMP/1x7FuqYCN5nZCuBHvKEDeeL1R3vSuH4Nyh5fnFVTH+TBlz7i9UnzeHnYlSwaP5gDiRGuv/fNlPkvOKs6a9ZvZfXav8fcrdu0nUeGf8ynI+4gMSnC7+u20Pu+t/IqcoYG3tWPxQsXsm3bVi5s2ZSbbulDpy5dGDZ0CN06daBgwYI88MhjUXPI9a+/NnHf0IFEIhFcsqPVhW1p0rR50LFyZODg/2PQPQNITEykUqXKPPDQo0FHOsTAu/qxaOECtm3bSpuWTbj5lj6ULHk8jz36IFu3bKHPLTdySq1TeXH4q0FHTZFe5uuu783d/e/gvYnvUrFiRZ74z9NBxzxmFChQgEFD7uXm3teTnByhU+euUd+pCmPmLZs3c2ffWwFIikRod3F7zm/cJIulJDucc5dnMCnd8WfOuYeBh9Np/wj4KCfbtmgahyfhVuTM20L1Ytq84NmgI+RYcnLQCXKmQGx0dNhzQh+JeS9KvseJHLXCBcjXV3ORpg/k2yfU3i/ujdp3qoYNiIiIiEhoaNiAiIiISBjERG0xNF+p8ioiIiIioaHKq4iIiEgYHPkvXx1TtBdEREREJDTUeRURERGR0NCwAREREZEw0HXmAFVeRURERCREVHkVERERCQOdsAWo8ioiIiIiIaLKq4iIiEgYaMwroMqriIiIiISIKq8iIiIiYaAxr4AqryIiIiISIqq8ioiIiISBxrwCqryKiIiISIio8ioiIiISBhrzCqjyKiIiIiIhosqriIiISBhozCugyquIiIiIhIgqryIiIiJhoDGvgCqvIiIiIhIi6ryKiIiISGho2IDkms0Lng06Qo6UObtP0BFyLGz7WPKHzuEQ+YfQmx1Q5VVEREREQkSVVxEREZEw0AlbgCqvIiIiIhIiqryKiIiIhIEqr4AqryIiIiISIqq8ioiIiISBrjYAqPIqIiIiIiGiyquIiIhIGGjMK6DKq4iIiIiEiCqvIiIiImGgMa+AKq8iIiIiEiKqvIqIiIiEgca8Aqq8ioiIiEiIqPIqIiIiEgYa8wqo8ioiIiIiIaLOq4iIiIiEhoYNiIiIiISAadgAoMqriIiIiISIKq8iIiIiIaDKq0eVVxEREREJDVVeRURERMJAhVdAlVcRERERCRFVXo8xZlYF+MA5d1qa9pnAAOfcohyurxPwk3Pu+9zKmJlhQwcza9ZMSpcuw7uT3k9pHzP6Tca98zYxMbE0btKUO/rflR9xUrx035Vc1OQ0Nm3ZSYPuj6S033xZU27s0ZhIsmPq7OUM+d9kLruoAXdc0yplntNrVOTcyx9n6U9r+eSV26lQNo69+xMB6HDzc2zauitfn0t6+/jHH1bw8APD2L9/P7GxsQz+v/s47fS6+ZorJ3bs2MH99w5l1aqfMDPuf/AR6p1xZtCxDnHf0EEp+3nCpA8AeP7Zp5k5YzoWE0Pp0mV44OFHKV8+PuCk6buodQuKFitGbEwMsQViGTNuYtCRMrV+3TqGDLqbLZs3gxnduvfgyquuCTpWpubMnsXjjz1MciSZzl270+uG3kFHytT+/fu59uorSTxwgKRIhNZtLuSW2/oGHStTYdvHWdGYV486r5KVTsAHQL50Xjt06sylV1zJ/w0emNK2cMF8Zn4+g7ETJlOoUCHvj1M+e/P9+bw09gtGPHh1SluTBjVo3+x0zr70MQ4kJlGuVHEA3vl4Ee987H1HqFO9IuP+ewNLf1qbsty1Q15nyfe/5+8TSCW9ffz0f/5N75tv5YLGTZg96wue/s+/GTHqzcAyZuWJRx/m/Asa85+nnyHxwAH27tsXdKTDXNKpC5dd8S+GDr4npe2aa6/n1j53APD2W28w/MXnGXrfAwElzNqI116nVKnSQcfIltgCsQy4eyCn1q7D7t27uKx7Vxqdez7VqlcPOlq6IpEIjzz8AC+/8hrx8fFccWk3mjVvEbV5AQoVKsSIka9TtFgxEhMT6XnVFVzQuAl1650RdLR0hXEfS/Zo2MCxqYCZjTazFWb2rpkVTT3RzF40s0Vm9p2Z3Z+q/TEz+97MlprZk2Z2HnAJ8G8z+8bMquV18PoNGlKyZMlD2saPfYdre91AoUKFAChdpkxexzjMnCU/s2X7nkPaendvzJOvfcqBxCSAdCuoPdrWZ/wnS/IlY3alt4/NjN27vPy7du2kXPnyQUTLlp07d7J48UI6d+0GQMFChYiLiws41eHqN2hIXJr9XLx48ZT7e/fuVRUlF5UrV55Ta9cBoFix4iQkJLBx44aAU2Vs+bKlVK58MpUqV6ZgoUK0bXcxMz+fHnSsTJkZRYsVAyApKYmkpKSo/rnSMO7jrJhZvt2imSqvx6ZTgF7OuTlmNhK4Jc30Ic65LWYWC0w3s7rAWqAzUMs558zseOfcNjObgjcM4d38fQp/+231ar5evIjnn3maQscVol//e6hz+ulBxUlR/eTynH9mNe6/tQP7DiQy6L/vsThNRbVbm7PofufwQ9peHvYvIsnJTJr+DY+9MjU/I2dowD2DufXG63nqySdIdsmMemtM0JEytHbNGkqVKs29Qwbx448/ULtOHe4eOISiRYtmvXAUePZ/T/HBlEkUL1GCV0a+EXScjBncdEMvzIxu3S+lW49Lg06UbWvXruGHFSs4vW69oKNkaOOGDVQ4oULK4/Lx8SxbujTARNkTiUS4vHsXfv/9dy69/Arqah9LAFR5PTb94Zyb499/C7ggzfQeZrYE+BqoA9QGtgP7gFfNrAuwhygRiUTYvmM7b7w9ljv7383dA+7AORd0LArExlC6ZDGaXP0kg5+axFtPXHfI9IanncyefYl8//O6lLZrB4+iYY9HaHXdU5x/ZjWuaH92fsdO1/ixY+h/z0CmTp/JgLsHcf+9Q4OOlKFIJIkfVnxP98suZ9yESRQpUoSRI4ZnvWCU6HP7nXwy/QvaXdyBd95+K+g4GRr15hjGvvsez7/0CmPHjGbxooVBR8qWPbt30/+Ovtw1cPAhlW7JHbGxsYybOJlpM75g+bKlrFz5U9CR/lFUefWo83psStuzS3lsZlWBAUBL51xd4EOgsHMuCTgbeBdoD2SrJGhmvf0hCIvyqgMRHx9Py1atMTNOO70uMRbD1q1b82RbObF2wzYmTf8GgEXf/UZysqNsqb//WHa/sD7jph56ftyfm7YDsGvPfsZ+vIiGdU7Ot7yZ+WDKJFq2agNA6wvb8t2y6K1OxMdXID6+QkrFp3WbtvywIl+GZOeqdu07MP2zaUHHyFB8vHciWZkyZWjRqjXLo/g1cVBiYiL97uhLu4s70Kp1m6DjZKp8fDzr161Pebxxw4aUfR4GcXFxNDz7HOZ+OTvoKBkK+z6WjKnzemw6yczO9e9fAXyZalocsBvYbmbxwEUAZlYcKOmc+wi4Ezh4LGgnUCKjDTnnhjvnGjjnGlx3fd6cxdmsRSsWLlgAwG+rfyUxMZFSpUrlybZy4v2ZS2nasCYA1U8qT6GCBfjLH/dqZnRtcxbjP1mcMn9sbAxljvfGixUoEEO7JqfxXaqqbJDKlSvP4oXePl7w1XxOOjk6OtXpKVuuHPEVKrD6118A+Gr+PBKq5flw7Fzx22+rU+7PnDGdqlUTgguTiT179rB7966U+/PmzqF69RoBp8qcc45h9w4hISGBq3teG3ScLNU57XR+/301a9b8QeKBA0z96EOaNm8RdKxMbdmyhR07dgCwb98+5s+bS5UofQ1DOPdxVlR59WjM67HpR+BWf7zr98CLQAcA59y3ZvY18APwB3BweEEJYLKZFca7DHI/v/0d4BUz6wt0c879nJfBB97Vj8ULF7Jt21YubNmUm27pQ6cuXRg2dAjdOnWgYMGCPPDIY/n+xnr90Z40rl+DsscXZ9XUB3nwpY94fdI8Xh52JYvGD+ZAYoTr7/377PwLzqrOmvVbWb327ysjHFewAFOev5WCBWKJjY3h869+YOTEOeltLk+lt4//7/4H+fdjD5OUFOG4446L6jPgAQYO/j8G3TOAxMREKlWqzAMPPRp0pMMMvKsfixYuYNu2rbRp2YSbb+nDl7NnsXr1r8SYcULFExly7/1ZrygAWzZv5s6+twKQFInQ7uL2nN+4ScCpMvf1ksV8MGUyNWrWpEeXjgD0uaMfjZs0DThZ+goUKMCgIfdyc+/rSU6O0Klz16j/gvDXpo0MHTyQ5OQIycmONhe2pWmz5kHHylAY97Fkj0XD2EE5NuxJDNeLqczZfYKOkGObFzwbdIQciYnyb+/pCder2BPC3SxyTChcIH9/86rkFW/m2yfU9revitpPFg0bEBEREZEcMbORZrbRzJanM62/mTkzK+s/NjN7xsxW+ZfjPCvVvNeY2Ur/lq1fFlHnVURERERyahTQNm2jmVUG2gCprx15EVDDv/XGG86ImZUG7gPOwTtp/D4zy/KkFnVeRUREREIgmk7Ycs7NArakM+kp4G4OvfJRR+AN55kPHG9mJwAXAp8657Y457YCn5JOhzgtdV5FRERE5KiZWUdgrXPu2zSTTsQ7SfygNX5bRu2Z0tUGREREREIgP6+0Y2a98Q7xHzTcOZfhBd3N+yn6wXhDBvKUOq8iIiIicgi/o5qTXx+qBlQFvvU72ZWAJWZ2Nt5P0FdONW8lv20t0CxN+8ysNqRhAyIiIiIhEE1jXtNyzi1zzpV3zlVxzlXBGwJwlnNuPTAFuNq/6kAjYLtzbh3wCdDGzEr5J2q18dsypc6riIiIiOSImY0B5gGnmNkaM+uVyewfAb8Aq4BXgFsAnHNbgAeBhf7tAb8tUxo2ICIiIhIC0fSzrc65y7OYXiXVfQfcmsF8I4GROdm2Kq8iIiIiEhqqvIqIiIiEQfQUXgOlyquIiIiIhIYqryIiIiIhEE1jXoOkyquIiIiIhIYqryIiIiIhoMqrR5VXEREREQkNVV5FREREQkCVV48qryIiIiISGuq8ioiIiEhoaNiAiIiISBho1ACgyquIiIiIhIgqryIiIiIhoBO2POq8Sq5JTg46Qc5sXvBs0BFyrEzHZ4KOkCNbp9wedAQRETnGqPMqIiIiEgKqvHo05lVEREREQkOVVxEREZEQUOXVo8qriIiIiISGKq8iIiIiIaDKq0eVVxEREREJDVVeRURERMJAhVdAlVcRERERCRFVXkVERERCQGNePaq8ioiIiEhoqPIqIiIiEgKqvHpUeRURERGR0FDnVURERERCQ8MGREREREJAwwY8qryKiIiISGio8ioiIiISBiq8Aqq8ioiIiEiIqPIqIiIiEgIa8+pR5VVEREREQkOVVxEREZEQUOXVo8qriIiIiISGKq8iIiIiIaDKq0ed12OEmVUBPnDOnZam/QFglnPus0CC5dD+/fu54dp/ceDAASKRCC1bteGmW/vinOOFZ5/ms0+nEhMTS7cel3H5lVcHHReAYUMHM2vWTEqXLsO7k94H4J7+d7J69a8A7Ny5gxIl4hg7YVK+5nrpjlZcdHZVNm3bQ4NbRgMw5MpzuO7C09i0fS8A970+l08WraZ0icK8Pbgd9WvG89ZnK7jzxZkAFC9SkM+e6J6yzhPLFuedz3/gruGz8vW5pLZ//36uvfpKEg8cICkSoXWbC7nltr6B5cmO0W++zsQJ43HO0aVbd/51Vc+gI2UqjPt4/bp1DBl0N1s2bwYzunXvwZVXXRN0rExd1LoFRYsVIzYmhtgCsYwZNzHoSJkK4z6eM3sWjz/2MMmRZDp37U6vG3oHHUlygTqvxzjn3L1BZ8iJQoUK8dKIURQtWozExER6XXMl51/QhF9//ZkN69czYfLHxMTEeB+eUaJDp85cesWV/N/ggSltj//nqZT7//n3YxQvXiLfc7352fe89P63jOjf5pD2Zyd9zdMTlxzStu9AEg+8OZ/aVcpQ5+QyKe279ibSqM/bKY/n/O8yJs1dlbfBs1CoUCFGjHydosW810jPq67ggsZNqFvvjEBzZWTVyp+YOGE8b40ZT8GCBbn1putp0rQ5J510ctDRMhS2fQwQWyCWAXcP5NTaddi9exeXde9Ko3PPp1r16kFHy9SI116nVKnSQcfIlrDt40gkwiMPP8DLr7xGfHw8V1zajWbNW0Rt3uxQ5dWjMa/Hllgze8XMvjOzaWZWxMxGmVk3ADN7zMy+N7OlZvak3zbKzF4ys0Vm9pOZtffbq5jZbDNb4t/Oy48nYGYULVoMgKSkJJKSksCMd8e9ww033UJMjPeSLV2mTGaryVf1GzSkZMmS6U5zzvHp1Km0bXdxPqeCOcv/ZMvOfdmad8/+JOZ+/yf7DiRlOE/1E4+n/PFFmbP8z9yKeETMjKLFDn+NRKtffvmZ00+vS5EiRShQoAD1GzRk+mfTgo6VqbDtY4By5cpzau06ABQrVpyEhAQ2btwQcKpjS9j28fJlS6lc+WQqVa5MwUKFaNvuYmZ+Pj3oWJIL1Hk9ttQAnnfO1QG2AV0PTjCzMkBnoI5zri7wUKrlqgBnAxcDL5lZYWAj0No5dxZwKfBMfjwB8L4tX969E62bnU+jc8/j9Lr1WPPH70yb+jH/uqwrfW6+gd9/W51fcY7KksWLKF2mDCefXCXoKClu6lCPBc9fyUt3tOL44sdle7nuTWry7qyf8jBZ9kUiEXp06UjzxufR6NzzqFu3XtCRMlS9ek2WLFnMtm1b2bt3L1/OnsWG9euDjpWlMO3jtNauXcMPK1ZwerRnNrjphl5c1r0L744bG3SaHAnDPt64YQMVTqiQ8rh8fDwbNkRvZztbLB9vUUyd12PLr865b/z7i/E6pQdtB/YBr5pZF2BPqmnjnHPJzrmVwC9ALaAg8IqZLQPGA7XzOHuK2NhYxoyfxMefzmT58qWsWvkTBw4kUui4Qrz1zgQ6d+3O/fcOya84R2XqRx8GUnXNyCsfLqN2r1Gcc9to1m/ZzWPXN872st2b1mTcF9HReY2NjWXcxMlMm/EFy5ctZeXK6MiVnoRq1bj2uuu5uXcvbr3pek45pVbKEYRoFqZ9nNqe3bvpf0df7ho4mOLFiwcdJ1Oj3hzD2Hff4/mXXmHsmNEsXrQw6EjZEqZ9LMem6P8ElZzYn+p+hFRjmp1zSXjV1XeB9sDUVPO6NOtxwJ3ABqAe0AAolN4Gzay3P+Rg0cgRw4/6CaRWIi6OBg3PYe6c2ZSPj6dFS2/sZvOWrVm58sdc3VZeSEpKYsZnn3Jh23ZBR0mxcdsekpMdzsHIqctpUDM+W8udXrUsBWJj+HrVxjxOmDNxcXE0PPsc5n45O+gomerctTtjxk1k5OujKRFXkpOrVAk6UraFZR8DJCYm0u+OvrS7uAOtWrfJeoGAxcd7778yZcrQolVrli9bGnCirIVpH5ePj2f9ur+PcmzcsCFln4eVmeXbLZqp8/oPYWbFgZLOuY/wOqapj/V0N7MYM6sGJAA/AiWBdc65ZOAqIDa99TrnhjvnGjjnGlx3/dGfxbl1yxZ27tgBwL59+/hq3lyqVE2gWYtWLFr4FQCLFy2IqsPwGflq/jyqJFQlvkKFrGfOJxVKFU253/G86nz/W/ZOfOvR9BTGzYyOytuWLVvYkeo1Mt9/jUSzgycYrlv3JzOmT+Oidh0CTpS5MO5j5xzD7h1CQkICV/e8Nug4WdqzZw+7d+9KuT9v7hyqV68RcKrMhW0f1zntdH7/fTVr1vxB4oEDTP3oQ5o2bxF0LMkFutrAP0cJYLI/ntWAfqmm/Q4sAOKAm5xz+8zsBWCCmV2NV6XdnR8h//prE/cNHUgkEsElO1pd2JYmTZtz5pn1GTLoLka/OYqiRYvyf8Meynpl+WTgXf1YvHAh27Zt5cKWTbnplj507tqNTz7+kLYXtQ8s1+t3t6Vx3UqUjSvMqjeu48G3vqJJ3ROpm1AO5+C3DTvo8+zfJy/88Nq1lChaiEIFYuhwbgLth0zihz+2ANC1cQ063Tc5qKdyiL82bWTo4IEkJ0dITna0ubAtTZs1DzpWpvrf2Yft27ZRoEABBg25j7i4uKAjZSqM+/jrJYv5YMpkatSsSY8uHQHoc0c/GjdpGnCy9G3ZvJk7+94KQFIkQruL23N+4yYBp8pc2Pax9367l5t7X09ycoROnbtG/ReErER7RTS/mHNpjxjLP4mZjcK7Puy7R7uuXfvD9WIKwbDDw5TpmG/nzeWKrVNuDzpCjoXrVezR3zORYBQukL+nNlXr/3G+fUL9/J+LovaTJYR/vkVERETkn0rDBv7hnHM9g84gIiIiWdNRFo8qryIiIiISGqq8ioiIiISATtjyqPIqIiIiIqGhyquIiIhICKjw6lHlVURERERCQ5VXERERkRDQmFePKq8iIiIiEhqqvIqIiIiEgAqvHlVeRURERCQ01HkVERERCYGYGMu3W1bMbKSZbTSz5ana/m1mP5jZUjN7z8yOTzVtkJmtMrMfzezCVO1t/bZVZjYwW/shZ7tNRERERIRRQNs0bZ8Cpznn6gI/AYMAzKw2cBlQx1/mBTOLNbNY4HngIqA2cLk/b6Y05lVEREQkBKJpzKtzbpaZVUnTNi3Vw/lAN/9+R+Ad59x+4FczWwWc7U9b5Zz7BcDM3vHn/T6zbavyKiIiIiK57TrgY//+icAfqaat8dsyas+UKq8iIiIiIZCf13k1s95A71RNw51zw7O57BAgCRidF9nUeRURERGRQ/gd1Wx1VlMzs55Ae6Clc875zWuByqlmq+S3kUl7hjRsQERERESOmpm1Be4GLnHO7Uk1aQpwmZkdZ2ZVgRrAAmAhUMPMqppZIbyTuqZktR1VXkVERERCIJpO2DKzMUAzoKyZrQHuw7u6wHHAp/4Qh/nOuZucc9+Z2Ti8E7GSgFudcxF/PbcBnwCxwEjn3HdZbVudVxERERHJEefc5ek0v5rJ/A8DD6fT/hHwUU62rc6riIiISAjk5wlb0UxjXkVEREQkNFR5FREREQkBVV496rxKrikQG643VXLKFTzCY+uU24OOkCMn9MyTS/zlqXWjrgw6Qo6F7aWsv78icjTUeRUREREJAX3x82jMq4iIiIiEhiqvIiIiIiGgMa8eVV5FREREJDRUeRUREREJARVePaq8ioiIiEhoqPIqIiIiEgIa8+pR5VVEREREQkOVVxEREZEQUOHVo8qriIiIiISGOq8iIiIiEhoaNiAiIiISAjphy6PKq4iIiIiEhiqvIiIiIiGgwqtHlVcRERERCQ1VXkVERERCQGNePaq8ioiIiEhoqPIqIiIiEgIqvHpUeRURERGR0FDlVURERCQENObVo8qriIiIiISGKq8Stfbv38+1V19J4oEDJEUitG5zIbfc1jfoWIcZNnQws2bNpHTpMrw76f1Dpr0xaiRPPfkEM2bPo1SpUgElzFokEuHyHl0pHx/Pcy+8HFiOZ29oxIVnnMhfO/Zx3qAPARjcrS7tzqpEsnNs2rGfW1+ex/pteylZtBDP9W5E1fLF2ZcYoc8r81mxZjvVTyjByNsuSFnnyeVL8Oi73/LSJz8G9bTYsWMH9987lFWrfsLMuP/BR6h3xpmB5cnK6l9/4e4Bd6Y8XrvmD26+rS//uqpncKGyEJbPi4PWr1vHkEF3s2XzZjCjW/ceXHnVNUHHylQYM0P0fL7lBhVePeq85jEzGwbscs49eZTrWQ00cM79lc60KsAHzrnTjmYbGWz3DKCic+6j3F53VgoVKsSIka9TtFgxEhMT6XnVFVzQuAl1652R31Ey1aFTZy694kr+b/DAQ9rXr1vH/LlzqHBCxYCSZd/oN98gIaEau3bvCjTHmFm/8MqnP/LSjeeltD374fc88u5SAHq3OYW7O59Ov9cW0L9jHZb9tpWrnp5FjRPi+HfPhnR6dDqr1u2kyZCPAYgx4/tnO/PhojWBPJ+Dnnj0Yc6/oDH/efoZEg8cYO++fYHmyUqVqgmMmzAZ8P7wt2nRhBYtWwecKnNh+bw4KLZALAPuHsipteuwe/cuLuvelUbnnk+16tWDjpahMGaG6Pl8k9yjYQOSITMrAJwBtAto+xQtVgyApKQkkpKSovJrZ/0GDSlZsuRh7U8+8Si397srGiMfYsP69cyeNZPOXbsFHYW5P25k664Dh7Tt3JuUcr/YcQVwzgFwyoklmf39egBWrtvBSWWLUS6u8CHLNq0Tz+qNu/hj8+48Tp6xnTt3snjxwpT9W7BQIeLi4gLLk1NfzZ9HpcqVqVjxxKCjZCosnxcHlStXnlNr1wGgWLHiJCQksHHjhoBTZS6MmaPp8y03mFm+3aKZOq95wMyGmNlPZvYlcIrfVs3MpprZYjObbWa1/PYOZvaVmX1tZp+ZWbzfXsbMppnZd2Y2AsjqlRRrZq/4808zsyJHuN1hZvammc0B3gQeAC41s2/M7NK82F+ZiUQi9OjSkeaNz6PRuedRt269/I5wRD6fMZ3y5eM5pVatoKNk6YnHHuHO/ncRExO9HwdDu9dj+f860f28KjwywavCLv99K+0bVAbgrIQyVC5bjIqlix6yXJdzqzBh3ur8jnuItWvWUKpUae4dMogeXTsx7N4h7NmzJ9BMOfHJxx9yUbv2QcfIlrB+Xqxdu4YfVqzg9JDkhfBkDsPnm+Sc/m/mMjOrD1zG3xXLhv6k4UAf51x9YADwgt/+JdDIOXcm8A5wt99+H/Clc64O8B5wUhabrgE878+/Deh6hNsFqA20cs5dDtwLjHXOneGcG5vd/ZBbYmNjGTdxMtNmfMHyZUtZufKn/I6QY3v37mXkKy9zcxSPtzvoi5mfU7p0aWrXyfURJ7nqofHfctrtkxg/dzU3tK4JwNPvf0fJYoWY9fBF9G5zCkt/20ok2aUsUzA2hovOOpFJX/0eVGwAIpEkfljxPd0vu5xxEyZRpEgRRo4YHmim7EpMPMAXM2fQuk3boKNkSxg/L/bs3k3/O/py18DBFC9ePOg42RKWzGH5fMsJs/y7RTONec19jYH3nHN7AMxsClAYOA8Yn6oUf5z/byVgrJmdABQCfvXbmwBdAJxzH5rZ1iy2+6tz7hv//mKgipkVP4LtAkxxzu3NzpM1s95Ab4DnXniZXjf0zs5iORYXF0fDs89h7pezqVGjZp5sI7es+eN31q5dw6VdOwKwccMGrujehTffGUfZsuUCTneob75ewsyZM/hy9iz279/P7t27GHTPAB59/KiGaOeZ8XN/ZdyA5jw2cRk79yZx2/D5KdO+faojv23amfK4Vb2KfLt6K5t2BDu+ND6+AvHxFVKqgK3btA1N5/XL2bOodWodypQtG3SUHAnL50ViYiL97uhLu4s70Kp1m6DjZEuYMoft802yT53X/BEDbHPOnZHOtGeB/zrnpphZM2DYEW5jf6r7EaDIUWw32wMEnXPD8aq77EvCZTF7jmzZsoUCBQoQFxfHvn37mD9vLtf2uiE3N5EnatQ8hRmz5qY8btemBaPHTojKqw3cfmd/br+zPwALF3zF66NGRt0He0J8CX7Z4HVKLzqrEj+t2wFAXNGC7N0fITGSzNXNqjH3h42HjI/tdu7JgQ8ZAChbrhzxFSqw+tdfqFI1ga/mzyOhWrWgY2XL1I8+pG27i4OOkS1h+7xwzjHs3iEkJCRwdc9rg46TLWHLHIbPNzky6rzmvlnAKDN7FG//dgBeBn41s+7OufHmlUHrOue+BUoCa/1lr0mzniuAh8zsIiDHPR/n3A4zy+l209oJlMjptnPDX5s2MnTwQJKTIyQnO9pc2JamzZoHESVTA+/qx+KFC9m2bSsXtmzKTbf0OWZODshvI249n/NPjadM8eNY/kxnHpuwlNb1KlLjhDiSneOPv3bT77UFAJxSsSQv3HguDvhhzTb6vPJVynqKHhdLs9NO4M6RCwJ6JocaOPj/GHTPABITE6lUqTIPPPRo0JGytHfPHubPm8vQ+x4IOkq2hOXz4qCvlyzmgymTqVGzJj26eEdp+tzRj8ZNmgacLGNhzHysifYTqfKLHTxzV3KPmQ3B6xBuBH4HlgATgBeBE4CCwDvOuQfMrCPwFLAVmAE0dM41M7MywBjgRGAu0Aaon51LZZnZAKC4c26YmVXN4XaHkerSXmZWGvjEX/bRzMa95nblNa8lh/C1HxOyD64Teo4OOkKOrRt1ZdARcixsL+WQvYxFMlS4QJYnU+eqxv/5Mt/e7bP7XxC171R1XiXXqPOa99R5zXvqvOa9kL2MRTKU353XJv+dk2/v9ln9zo/ad6quNiAiIiIioaExryHiDyWYns6kls65zfmdR0RERPKPjlp41HkNEb+DekbQOURERESCos6riIiISAjoagMejXkVERERkdBQ5VVEREQkBFR49ajyKiIiIiKhocqriIiISAhozKtHlVcRERERCQ1VXkVERERCQIVXjyqvIiIiIhIaqryKiIiIhECMSq+AKq8iIiIiEiLqvIqIiIhIaGjYgIiIiEgIaNSAR5VXEREREQkNVV5FREREQkA/UuBR5VVEREREQkOVVxEREZEQiFHhFVDlVURERERCRJVXERERkRDQmFePKq8iIiIikiNmNtLMNprZ8lRtpc3sUzNb6f9bym83M3vGzFaZ2VIzOyvVMtf48680s2uys21VXiXXOBd0gpwJW16AZMIVet2oK4OOkGNdRiwIOkKOTeh1dtARRCQfRFnhdRTwHPBGqraBwHTn3GNmNtB/fA9wEVDDv50DvAicY2algfuABoADFpvZFOfc1sw2rMqriIiIiOSIc24WsCVNc0fgdf/+60CnVO1vOM984HgzOwG4EPjUObfF77B+CrTNatuqvIqIiIiEgBFdpdd0xDvn1vn31wPx/v0TgT9SzbfGb8uoPVOqvIqIiIjIIcyst5ktSnXrnZPlnXMO8masmyqvIiIiIiGQn9d5dc4NB4bncLENZnaCc26dPyxgo9++Fqicar5KfttaoFma9plZbUSVVxERERHJDVOAg1cMuAaYnKr9av+qA42A7f7wgk+ANmZWyr8yQRu/LVOqvIqIiIiEQDRd59XMxuBVTcua2Rq8qwY8Bowzs17Ab0APf/aPgHbAKmAPcC2Ac26LmT0ILPTne8A5l/YksMOo8yoiIiIiOeKcuzyDSS3TmdcBt2awnpHAyJxsW8MGRERERCQ0VHkVERERCYEoGjUQKFVeRURERCQ0VHkVERERCYEYlV4BVV5FREREJERUeRUREREJARVePaq8ioiIiEhoqPIqIiIiEgLR9CMFQVLlVURERERCQ5VXERERkRBQ4dWjyquIiIiIhIYqryIiIiIhoOu8elR5FREREZHQUOVVREREJARUd/Wo8ioiIiIioaHKa4iY2fHAFc65F4LOkl/efGMU700Yj5lRo0ZN7n/oUY477rigY6XYv38/1/f8FwcOHCASidCydRtuvrUva9esYdDd/di2bRun1q7DQ48+TsGChYKOC8CwoYOZNWsmpUuX4d1J76e0jxn9JuPeeZuYmFgaN2nKHf3vCjBlxtavW8eQQXezZfNmMKNb9x5cedU1gWS5o1lVzj75eLbtTeSWccsBKH5cLINaV6d8iePYuHM/j05bxa4DESodX5g7myVQvVxRXl+whonfrs90PfntvqGDUl4XEyZ9AMB/n3ycWV98TsECBalU+STuf+hR4uLiAsmXlf3793Pt1VeSeOAASZEIrdtcyC239Q06VobClhei672XXfcOHcSsL7zX9cTJHwQd56jpOq+eLCuvZvaEmcWZWUEzm25mm8zsX/kRTg5zPHBLXm7APFFRkd+wYQNjRr/B22MnMGHSB0SSI0z9+MOgYx2iUKFCvPzqKMZOmMyY8e8xb86XLP32G5556kmuvOoapnw0jbi4OCZNnBB01BQdOnXm+ZdeOaRt4YL5zPx8BmMnTGbC5A+4uud1AaXLWmyBWAbcPZD33v+It8aM5Z0xb/PzqlWBZPnsx7/4vw9/PKStx5kV+WbNDm4Ys5Rv1uyg+5knALBzXxIvzfmNCak6rZmtJ79d0qkLL7w04pC2Rueez7vvfcD4997n5CpVGDni5YDSZa1QoUKMGPk649+bwrgJk5jz5WyWfvtN0LEyFLa8EF3vvezq2KkLL748IusZJVSy00lp45zbAbQHVgPVgegsyRz7HgOqmdk3ZrbQzFK+RprZc2bW07+/2szuN7MlZrbMzGr57eXM7FMz+87MRpjZb2ZW1syqmNmPZvYGsByobGb/NrPl/vKXBvFkASJJEfbv30dSUhL79u6jXLnyQUVJl5lRtGgxAJKSkkhKSsLMWLhgPi1bXwhA+0s68fmMz4KMeYj6DRpSsmTJQ9rGj32Ha3vdQKFCXnW4dJkyQUTLlnLlynNq7ToAFCtWnISEBDZu3BBIluXrdrJzf9IhbY2qHM9nP/0FwGc//cW5VUsBsH1fEis37SaS7LK1nvxWv0FD4tK8Ls47/wIKFPAO0NWtewYbNhze8Y4WZkbRYoe+F6P5ophhywvR9d7LrvRe1xJ+2em8HhxacDEw3jm3PQ/zSOYGAj87584g6y8QfznnzgJeBAb4bfcBM5xzdYB3gZNSzV8DeMGf1gA4A6gHtAL+bWYn5NaTyK74+Hiu7nkdbVs1p3XzCyheojjnnX9BfsfIUiQS4bJunWjV9HzOaXQelSqfRPEScSl/9OMrVGDTxo0Bp8zcb6tX8/XiRVx1eQ969fwX3y1bFnSkbFm7dg0/rFjB6XXrBR0lxfFFCrJ1TyIAW/ckcnyRggEnyh2T3pvABRc0CTpGpiKRCD26dKR54/NodO551I2i10V6wpY3tWh87/0TxFj+3aJZdjqvH5jZD0B9YLqZlQP25W0syQUT/X8XA1X8+xcA7wA456YCW1PN/5tzbn6q+cY45yLOuQ3AF0DDPE+cxo7t25n5+XQ+/GQ602bMZu/evXz4/uT8jpGl2NhY3nl3ElM/m8l3y5ey+tdfgo6UY5FIhO07tvPG22O5s//d3D3gDpw7vEIYTfbs3k3/O/py18DBFC9ePOg4GYruvZg9r7z8IrGxsbRrf0nQUTIVGxvLuImTmTbjC5YvW8rKlT8FHSlTYct7UFjee3LsyrLz6pwbCJwHNHDOJQJ7gI55HUyylMSh//8Kp5m+3/83QvZOzNt9JCHMrLeZLTKzRa+OGH4kq8jQ/PlzOfHESpQuXZqCBQvSsmUbvvnm61zdRm4qERdHg4bnsPTbb9i1c4d3GBDYsH495cpH13CHtOLj42nZqjVmxmmn1yXGYti6dWvWCwYkMTGRfnf0pd3FHWjVuk3QcQ6xbW8ipYp61dZSRQuyfW9iwImOzuRJE5k9ayaPPP5kaE4WiYuLo+HZ5zD3y9lBR8mWMOWN5vfeP4GZ5dstmmXnhK2ieCcJveg3VcQ7rCz5bydQwr//G1DbzI7zr0LQMhvLzwF6AJhZG6BUBvPNBi41s1i/0t4EWJDejM654c65Bs65Br2u7539Z5INJ5xQkaVLv2Xv3r045/jqq3kkJFTL1W0cra1btrBzxw4A9u3bx/z5c6makECDhucw/dNPAPhgyiSaNc/O/57gNGvRioULvP/Fv63+lcTEREqVyujlESznHMPuHUJCQgJX97w26DiHmb96G61qlgWgVc2yzF+9LdhAR2HOl7N4feQInn72RYoUKRJ0nExt2bKFHanfi/PmUqVqQsCpMha2vBD97z3558hORe41vEPP5/mP1wLjgfBfcyJknHObzWyOmS0HPgbG4Z1g9SuQnZLk/cAYM7sKmAesx+sQpz3u8x5wLvAt3lHPu51z+X6mxul169Gq9YVc3qMzsbEFqFXrVLp2D+zcsXRt2rSJ+4YOJBKJ4JyjdZu2NGnanISE6gy6ux/PP/s/atU6lU5dugUdNcXAu/qxeOFCtm3byoUtm3LTLX3o1KULw4YOoVunDhQsWJAHHnksar95f71kMR9MmUyNmjXp0cU7CNTnjn40btI037Pc3bIadSuWIK5wAd741xm8tWgN479ex6DW1WhzajnvUlmfemdjlypSkP91rUPRQrEkO0en0ytw49il7E1MTnc90374K1+fy8C7+rFo4QK2bdtKm5ZNuPmWPowcMZwDBw5w0w1eR6Vu3XoMve+BfM2VXX9t2sjQwQNJTo6QnOxoc2FbmjZrHnSsDIUtL0TXey+77hnw9+u6dYsm3HxrH7p07R50rCMWpR/L+c6yGtdmZouccw3M7Gvn3Jl+27fOOY3SDhkzOw6IOOeSzOxc4EX/5K9csTcxXMP7kqN8TGd6wvbBFcbf4e4yIt2DDFFtQq+zg46QIyF8WYikq3CB/P3Rq6tGf5tvf7jevLJe1L5Ts1N5PWBmRfDPOzCzavw9nlLC5SRgnH8d1wPADQHnERERkWyK1iNi+S07ndf7gKl41/4cDZwP9MzLUJI3nHMrgTODziEiIiJypLLsvDrnPjWzJUAjwIDbnXP5OxhLRERE5B8u2q+/ml+y7Lya2cGrUu/0/61tZjjnZuVdLBERERGRw2Vn2EDqX3IqDJyNd/WBFnmSSEREREQOozGvnuwMG+iQ+rGZVQaezqtAIiIiIiIZyU7lNa01wKm5HUREREREMqa6qyc7Y16f5e+f544BzgCW5GEmEREREZF0ZafyuijV/SRgjHNuTh7lEREREZF0hPGHX/JCdsa8vp4fQUREREREspJh59XMlkG6P/dpgHPO1c2zVCIiIiIi6cis8to+31KIiIiISKY0asCTYefVOfdbfgYREREREclKTFYzmFkjM1toZrvM7ICZRcxsR36EExERERGPmeXbLZpl2XkFngMuB1YCRYDrgefzMpSIiIiISHqy03nFObcKiHXORZxzrwFt8zaWiIiIiKRmln+3aJad67zuMbNCwDdm9gSwjmx2ekVEREREclOGnVAza+jfvcqf7zZgN1AZ6Jr30URERETkoBizfLtFs8wqr8PNrDjwDt6van0P3J8/sUREREREDpdh5dU5dybetV6TgHfN7FszG2hmVfIrnIiIiIh4NObVk+nYVefcj865+51ztYGrgZLAdDObky/pRERERERSyc4JW5hZDFAeiAeKARvzMpSIiIiIHCrar7+aXzLtvJpZY7xrvHYCluGNf73TObc976OJ5C19Bkh6JvQ6O+gIOXbj+KVBR8iR4T3qBh1BREIsw86rmf0B/IbXYR3mnFO1VURERCQguk6pJ7PK6wXOud/yLYmIiIiISBYy7Lyq4yoiIiISPTTm1aMKtIiIiIiEhjqvIiIiIhIamZ2w9SzgMprunOubJ4lERERE5DAxGjUAZH7C1qJ8SyEiIiIikg2ZnbD1en4GEREREZGMqfLqyfIXtsysHHAPUBsofLDdOdciD3OJiIiIiBwmOydsjQZWAFWB+4HVwMI8zCQiIiIiaZhZvt2ymedOM/vOzJab2RgzK2xmVc3sKzNbZWZjzayQP+9x/uNV/vQqR7ofstN5LeOcexVIdM594Zy7DlDVVUREROQfysxOBPoCDZxzpwGxwGXA48BTzrnqwFagl79IL2Cr3/6UP98RyU7nNdH/d52ZXWxmZwKlj3SDIiIiIpJzMZZ/t2wqABQxswJAUWAdXoHzXX/660An/35H/zH+9JZ2hL+6kJ3O60NmVhLoDwwARgB3HsnGRERERCT6mVlvM1uU6tY79XTn3FrgSeB3vE7rdmAxsM05l+TPtgY40b9/IvCHv2ySP3+ZI8mW5QlbzrkP/LvbgeZHshEREREROTr5+euwzrnhwPCMs1gpvGpqVWAbMB5omx/ZsnO1gddI58cK/LGvIiIiIvLP0wr41Tm3CcDMJgLnA8ebWQG/uloJWOvPvxaoDKzxhxmUBDYfyYaz7LwCH6S6XxjoDPx5JBsTERERkSMTk5+l16z9DjQys6LAXqAl3g9cfQ50A94BrgEm+/NP8R/P86fPcM5l+EuumcnOsIEJqR+b2RjgyyPZmIiIiIiEn3PuKzN7F1gCJAFf4w0z+BB4x8we8tte9Rd5FXjTzFYBW/CuTHBEslN5TasGUP5INygiIiIiOZeds+zzk3PuPuC+NM2/AGenM+8+oHtubDc7Y153cuiY1/V4v7glIiIiIpKvsjNsoER+BBERERGRjEXXkNfgZFmBNrPp2WkTEREREclrGVZezaww3q8llPWv5XWwvx/H3xecFRERERHJN5kNG7gRuAOoiPeLCQc7rzuA5/I2lojnzTdG8d6E8ZgZNWrU5P6HHuW4444LOtYhhg0dzKxZMyldugzvTnofgHv638nq1b8CsHPnDkqUiGPshEkBpvxbenl//OEHHn7wPvbu2UPFiify8ONPUrx48YCTZiwSiXB5j66Uj4/nuRdeDjpOplb/+gt3D/j7RwnXrvmDm2/ry7+u6hlcKF+bU8rQrJr3Azdf/LyFT378i2KFYrn1/JMoW7wQf+06wHNf/s6exAi1yhfjjiZV2LT7AACL/tjO5OUbg4x/iHuHDmLWF97reuLkD7JeIGDr161jyKC72bJ5M5jRrXsPrrzqmqBjZSqMmefMnsXjjz1MciSZzl270+uG3lkvFMWi7FJZgclw2IBz7n/OuarAAOdcgnOuqn+r55xT5zUDZlbFzJYHnSM3mdngILa7YcMGxox+g7fHTmDCpA+IJEeY+vGHQUTJVIdOnXn+pVcOaXv8P08xdsIkxk6YRMvWbWjRqnVA6Q6XXt4H7htK3zv6M/6992nesjWvv/ZqBktHh9FvvkFCQrWgY2RLlaoJjJswmXETJjNm3EQKFy5Ci5bBvx5OLHkczaqVYdgnKxn68U+ccWIJyhcvRPva5fh+wy7ufv9Hvt+wi/Z1yqUs89Om3fzfxyv5v49XRlXHFaBjpy68+PKIoGNkW2yBWAbcPZD33v+It8aM5Z0xb/PzqlVBx8pU2DJHIhEeefgBXnhpBO9N+ZCpH30Q1Xkl+7Jz1YVkMzv+4AMzK2Vmt+RdpGOfmcUGnSGHAum8AkSSIuzfv4+kpCT27d1HuXLRd5W2+g0aUrJkyXSnOef4dOpU2ra7OJ9TZSy9vL//tpr6DRoC0Ojc85j+6bQgomXLhvXrmT1rJp27dgs6So59NX8elSpXpmLF4EdeVYwrzM+b93Ag4kh28MPG3TSoXJKzKpVk9i9bAZj9y1bqV0r/tR1t6jdoSFwG78NoVK5ceU6tXQeAYsWKk5CQwMaNGwJOlbmwZV6+bCmVK59MpcqVKVioEG3bXczMz8N9yo5Z/t2iWXY6rzc457YdfOCc2wrckGeJjg0FzGy0ma0ws3fNrKiZrTazx81sCdDdzC43s2VmttzMHgcws+5m9l///u1m9ot/P8HM5vj3V5vZ/Wa2xF++VkYhzKypmX3j3742sxJm1szMZpnZh2b2o5m9ZGYx/vzpZXoMKOKvY3Te7rZDxcfHc3XP62jbqjmtm19A8RLFOe/8C/IzwlFbsngRpcuU4eSTqwQdJVMJ1aozc4b3of7ptKlsWL8u4EQZe+KxR7iz/13ExETbFQ+z9snHH3JRu/ZBxwBg7fZ9nFKuGMULxVIo1qhXsQRlihYkrnABtu9LAmD7viTiCv89uqx62aI8dFEN+jerwoklo2v4TpitXbuGH1as4PS69YKOkm1hyLxxwwYqnFAh5XH5+Hg2bIjezrZkX3Y+/WPN/u6D+1XDQnkX6ZhwCvCCc+5UvDHCByvVm51zZwGzgMeBFsAZQEMz6wTMBhr78zYGNpvZif79WanW/5e/nheBAZnkGADc6pw7w1/HXr/9bKAPUBuoBnQxs4rpZXLODQT2OufOcM5dmfNdceR2bN/OzM+n8+En05k2YzZ79+7lw/cnZ71gFJn60YdRVXXNyLAHH2HcO29zRY8u7Nm9m4IFCwYdKV1fzPyc0qVLU7vOaUFHybHExAN8MXMGrdu0DToKAH/u2M8H32/krhZVGdC8Kr9t3Udyur/U6LWt3rKXOyf/wNCPV/LpT5u5vUmVfM17rNqzezf97+jLXQMHR/U489TCmPlYEWP5d4tm2em8TgXGmllLM2sJjPHbJGN/OOfm+PffAg6WC8f6/zYEZjrnNjnnkoDRQBPn3HqguJmVACoDbwNN8Dqes1Otf6L/72KgSiY55gD/NbO+wPH+tgAWOOd+cc5F8P5/XpBRpqyeqJn1NrNFZrbo1RHDs5o9R+bPn8uJJ1aidOnSFCxYkJYt2/DNN1/n6jbyUlJSEjM++5QL27YLOkqWqiYk8OIrI3l73ETatruYSpVPCjpSur75egkzZ87gotYtuGdAPxZ+NZ9B92T2/S16fDl7FrVOrUOZsmWDjpJi1i9buW/qKh757Bd2H4iwfucBduxLoqRfbS1ZuAA79kUA2JeUzP6kZACW/rmTWDOKHxe2EVDRJTExkX539KXdxR1o1bpN0HGyJUyZy8fHs37d+pTHGzdsID4+PsBEkluy03m9B5gB3OzfpgN35WWoY0Da8sXBx7uzsexc4FrgR/6uxJ6L1xE9aL//b4RMrhjhnHsMuB4oAsxJNcQgo3w55pwb7pxr4Jxr0Ov63D2L84QTKrJ06bfs3bsX5xxffTUvNCfpgDe+sUpCVeIrVMh65oBt2bwZgOTkZF55+SW69Tjin5zOU7ff2Z9PZ8zi409n8PiT/6XhOY149PEng46VLdFYhS/hdz7LFC1Ig0pxzFu9la/X7KBxQikAGieUYsma7QApHVqAhDJFiDHYtT+S/6GPEc45ht07hISEBK7ueW3QcbIlbJnrnHY6v/++mjVr/iDxwAGmfvQhTZu3CDrWUYkxy7dbNMvOL2wlAy/5N8ysMfAscGveRgu1k8zsXOfcPOAK4EvgzFTTFwDPmFlZYCtwOd4+Ba/D+oB/+xpojnfYfntOQ5hZNefcMmCZmTUEagHbgLPNrCrwG3ApMDyLTIlmVtA5l5jTDEfj9Lr1aNX6Qi7v0ZnY2ALUqnUqXbtfmp8RsmXgXf1YvHAh27Zt5cKWTbnplj507tqNTz7+kLYXRcf4xtTSy7t3zx7GvuMNaW7Rqg0dO3cJOOWxZe+ePcyfN5eh9z0QdJRD9G1cheLHxRJJdryx6E/2JCbzwfcbufWCk2hSrTSbd3uXygJoeFJJWlQvQ7JzHIgk8/yc3wNOf6h7BvRj0cIFbNu2ldYtmnDzrX3o0jVXfkY9T3y9ZDEfTJlMjZo16dGlIwB97uhH4yZNA06WsbBlLlCgAIOG3MvNva8nOTlCp85dqV69RtCxJBeYS3eMU5qZzM7E68z0AH4FJjrnns18qX8mM6uCN6xiEVAf+B64yv+3gXPuL3++y/HO4jfgQ+fcPX57NWAVcIpz7iczmwb84Jzr609ffXA9ZtYAeNI51yyDLM/idX6Tge+AnnhV3AeAnUB14HPgFudcciaZHgcuAZZkNu51b+KRV3CD4MIVN5Si/dt7erLxkRh1bhy/NOgIOTK8R92gI4jkisIFyNcPuQc/W5Vvn1D/16p61H6AZ/YLWzXxOqyXA3/hjdc051zzfMoWSs651XgVzrSqpJlvDN5407TL/8zfPwiBc65NmulVUt1fBDTLJEuftG3+uXc7nHOHlQQzyXQP3vARERERkUBlNmzgB7xD2O2dc6sAzOzOTOYXERERkTwS7VcByC+ZnbDVBVgHfG5mr/hXGtBui0Jmdm2q67kevD2f3rzOuZnpVV1FREREwiCzM9UnAZPMrBjQEbgDKG9mLwLvOeei9yd4/mGcc68BrwWdQ0RERPKOqYYIZONSWc653c65t51zHYBKeGfAa/yjiIiIiOS7LC+VlZr/07DD/ZuIiIiI5BONefWE78fBRUREROQfK0eVVxEREREJhiqvHlVeRURERCQ01HkVERERkdDQsAERERGRELAQ/uR2XlDlVURERERCQ5VXERERkRDQCVseVV5FREREJDRUeRUREREJAQ159ajyKiIiIiKhocqriIiISAjEqPQKqPIqIiIiIiGiyquIiIhICOhqAx5VXkVEREQkNFR5FREREQkBDXn1qPIqIiIiIqGhyquIiIhICMSg0iuo8yq5KGyHM1xy0AlyzkK2k5OdCzpCjlkI/zi81P30oCPkyMffrw86Qo5dVLtC0BFExKfOq4iIiEgIhKx+kWc05lVEREREQkOdVxEREREJDQ0bEBEREQkB/UiBR5VXEREREQkNVV5FREREQiBGZ2wBqryKiIiISIio8ioiIiISAiq8elR5FREREZHQUOVVREREJAQ05tWjyquIiIiIhIYqryIiIiIhoMKrR5VXEREREQkNVV5FREREQkAVR4/2g4iIiIiEhiqvIiIiIiFgGvQKqPIqIiIiIiGiyquIiIhICKju6lHlVURERERyzMyON7N3zewHM1thZueaWWkz+9TMVvr/lvLnNTN7xsxWmdlSMzvrSLerzquIiIiIHIn/AVOdc7WAesAKYCAw3TlXA5juPwa4CKjh33oDLx7pRjVsQERERCQEounnYc2sJNAE6AngnDsAHDCzjkAzf7bXgZnAPUBH4A3nnAPm+1XbE5xz63K6bVVeRURERCSnqgKbgNfM7GszG2FmxYD4VB3S9UC8f/9E4I9Uy6/x23JMnddMmNkwMxsQdI7sMLMHzKyVf3+mmTVIZ56eZvZc/qc7che1bkHXTh3o0aUjl/foEnScw6xfv44brruaLh0vpmun9rz91hsAfPrJVLp2as9ZdU/lu++WBZzyUPcNHUTzJufStVP7lLbt27dx4/XX0qFdG268/lp2bN8eYMLDDRs6mBZNzqNbpw6HTXtj1EjOPK0WW7duDSBZ9rz5xqiU18jAu/qxf//+oCMdJiz7+LFbLuWpfj3534BePHtP70OmzXp/LAO7N2X3jm0A/Pzd19x3dTv+N6AX/xvQi8/Gj8r/wJm4d+ggmjU+ly4d22c9cxQIW14IZ+bMWH7ezHqb2aJUt0PfcN7R+7OAF51zZwK7+XuIAAB+ldXl4i4A1Hk9Zjjn7nXOfRZ0jrww4rXXGTdxMmPGTQw6ymFiY2PpN+AeJk7+kDdGv8PYd0bz88+rqFajBv956hnOqn/Yd4jAXdKpCy+8NOKQtpEjhnNOo3N5/6NpnNPoXEa+OjygdOnr0Kkzz7/0ymHt69etY/7cOVQ4oWIAqbJnw4YNjBn9Bm+PncCESR8QSY4w9eMPg451mDDt497Dnub2J1+lz+N/v063/bWRld8u5Piy8YfMW/XUutz+5Kvc/uSrtOreM5+TZq5jpy68+PKIrGeMEmHLC+HMHC2cc8Odcw1S3dL+YVgDrHHOfeU/fhevM7vBzE4A8P/d6E9fC1ROtXwlvy3H1HlNw8yGmNlPZvYlcIrfdoaZzffPjnvPzEqZWXkzW+xPr2dmzsxO8h//bGZFzWyUf2bdXDP7xcy6+dPfMLNOqbY52h8jkl6enmY2yT9jb7WZ3WZm/fwS/XwzK+3PN+rg+tMsf63/fBYA56dqL2dmE8xsoX87329f5o9DMTPbbGZXp8rcOld28jGkXLnynFq7DgDFihWnatVqbNqwgYSEalSpmhBwuvTVb9CQuJIlD2mb+fl0OnTsBECHjp34fEZ0fQ+q36AhJdNkBnjyiUe5vd9dRNEwsHRFkiLs37+PpKQk9u3dR7ly5YOOdJiw7+MPRj3HRf+6iagPmkp678VoFra8EM7MmTHLv1tWnHPrgT/M7BS/qSXwPTAFuMZvuwaY7N+fAlzt9y8aAduPZLwrqPN6CDOrD1wGnAG0Axr6k94A7nHO1QWWAfc55zYChc0sDmgMLAIam9nJwEbn3B5/2ROAC4D2wGN+26v4A5z9Ac/nAZmVYk4Duvh5Hgb2+CX6ecDVmTyfE4D78TqtFwC1U03+H/CUc64h0BU4+NV0jj9/HeAX/7kBnAvMzSRj3jC46YZeXNa9C++OG5vvm8+JP9eu4ccfVnBa3XpBR8mxzZs3p3SoypYtx+bNmwNOlLXPZ0ynfPl4TqlVK+gomYqPj+fqntfRtlVzWje/gOIlinPe+RcEHStbonEfG/DqQwN49u4b+OrTKQB8t/BL4kqXpWKV6ofN//tP3/H0gOsY+fBdbPjj13xOK3LM6wOMNrOleH2nR/D6Oq3NbCXQir/7Ph/h9StWAa8AtxzpRnW1gUM1Bt472PE0sylAMeB459wX/jyvA+P9+3PxOnpN8P6HtcX7bJ2dap2TnHPJwPdmFg/gnPvCzF4ws3J4HccJzrmkTHJ97pzbCew0s+3A+377MqBuJsudA8x0zm3yn89YoKY/rRVQO9VPzcWZWXE/exPgN7zLWPQ2sxOBrc653Wk34I+B6Q3w3Asv0+uGtENijs6oN8cQHx/P5s2buen6a6makED9Bg2zXjCf7dmzmwF39mXAPYMoXrx40HGOiplF/U8Q7t27l5GvvMwLw18NOkqWdmzfzszPp/PhJ9MpUaIEd/W/nQ/fn8zFHdI92BI1onUf3/Tgc5QsU45d27cy4sH+lDvxZGZOfIteQ588bN4Tq9bknhfGclyRovywZD5vPDGEu559O4DUIrkj2j6bnXPfAOmNj2uZzrwOuDU3tqvK69GZhdfhPRmvLF4Pr8KZuvOa+syM1K+6N4B/AdcCI7PYTup1JKd6nMyRfwGJARo5587wbyc653bx93NqjHd5i01ANw59TilSj4nJ7Y4reFUrgDJlytCiVWuWL1ua69s4WomJiQy4sy8XXdyBlq3aBB3niJQpU4ZNm7xhSZs2baR06dIBJ8rcmj9+Z+3aNVzatSPt2rRg44YNXNG9C3/9tSnoaIeZP38uJ55YidKlS1OwYEFatmzDN998HXSsLEXrPi5ZphwAxUuWos7Zjfn1+2/YsnEdT9/Vi8duuZQdmzfxzN03sHPrZgoXLcZxRYoCUOusRkQikZSTuUQkvNR5PdQsoJOZFTGzEkAHvLPntprZwcPnVwEHq7Cz8TqgK/3q6ha84QZfZmNbo4A7AJxz3+fWE0jjK6CpmZUxs4JA91TTpuGV+wFvXK+f5Q+gLFDDOfcL3nMZgLdv8tWePXvYvXtXyv15c+dQvXqN/I6RKecc9983lKoJ1bjqmmuDjnPEmjZrwfuTJwHw/uRJNGt+2JfmqFKj5inMmDWXj6bN4KNpMygfH8/b4ydStmy5oKMd5oQTKrJ06bfs3bsX5xxffTWPhIRqQcfKUjTu4wP79rJ/756U+yu/XUilarX4v1cnM/CFsQx8YSxxZcrR94lXKFGqDDu3bsYr9sAfK1fgkpMpWuLYGf8o/zwx+XiLZho2kIpzbol/aP1bvLPjFvqTrgFeMrOieOM1rvXnX21eDf9gx+5LoJJzLsvryTjnNpjZCmBS7j6LQ7axzsyG4Y2N3QZ8k2pyX+B5f5xKAbzncJM/7Ssg1r8/G3iU7HXIc9WWzZu5s693hCEpEqHdxe05v3GT/I6RqW++XsKH70+mRo2aXNqtEwC39b2TxMQDPP7IQ2zduoW+t9zEKbVq8cLL0XH4deBd/Vi0cAHbtm2lTcsm3HxLH667vjd397+D9ya+S8WKFXniP08HHfMQA+/qx+KFC9m2bSsXtmzKTbf0oXPXw85PjEqn161Hq9YXcnmPzsTGFqBWrVPp2v3SoGMdJgz7eOf2rbz576EAJEcinHFBK04585wM5182/wvmT5tMTGwsBQsdxxV33hdVh13vGfD3e7F1iybcfGsfunTtnvWCAQlbXghnZsmaHfxWKvnL7wgvA85yzkXXRTWP0L6k3L+WW15KTg5VXCD6xjtlxYXrJQGAEa59DOHbz5+s2BB0hBy7qHaFoCNIFCpcIH8/MMZ982e+vdl7nFExaj8Mo70yfEzyf0xgBfDssdJxFREREckPGjYQAP/HBE5O3WZmFwKPp5n1V+dc53wLJiIiIlErakuh+Uyd1yjhnPsE+CToHCIiIiLRTJ1XERERkRAI23kPeUVjXkVEREQkNNR5FREREZHQ0LABERERkRBQxdGj/SAiIiIioaHKq4iIiEgI6IQtjyqvIiIiIhIaqryKiIiIhIDqrh5VXkVEREQkNFR5FREREQkBDXn1qPIqIiIiIqGhyquIiIhICMRo1CugyquIiIiIhIgqryIiIiIhoDGvHlVeRURERCQ0VHkVERERCQHTmFdAlVcRERERCRFVXkVERERCQGNePaq8ioiIiEhoqPIquca5oBPkkL7B5rmYEJYJQvc6Jnzj4C6qXSHoCDn29eptQUfIkTOrHB90BJE8o86riIiISAjoRwo8GjYgIiIiIqGhyquIiIhICIRwJFaeUOVVREREREJDlVcRERGREFDl1aPKq4iIiIiEhiqvIiIiIiEQtsvi5RVVXkVEREQkNFR5FREREQmBGBVeAVVeRURERCREVHkVERERCQGNefWo8ioiIiIioaHKq4iIiEgI6DqvHlVeRURERCQ0VHkVERERCQGNefWo8ioiIiIioaHOq4iIiIiEhoYNiIiIiISAfqTAo8qriIiIiISGKq8iIiIiIaATtjyqvIqIiIhIaKjyKiIiIhIC+pECT1RXXs2sipktz4V1XHGEy87Nxjy7stj2Eec3s75mtsLMRmcwfZiZDchsu2bWwMyeyWI7GeY0s55mVvFI8h+t1b/+Qo+uHVNu559zFm+9OSqIKJkaNnQwLZqcR7dOHQ6b9saokZx5Wi22bt0aQLLsGf3m63Tt1J4uHS+Oyv2b1v79+7ni0m5073wJnS+5mBeey/TlHRXCsI/vGzqI5k3OpWun9ilt0z75mC4dL+bM02vx3fJlAabL2vp16+jV8yo6d2hH50suZvSbrwcd6RDJkQj39rmKp4b1A+Clf9/LwN7dGXLL5bz69IMkJSUBMPfzqQy99UqG3nIFD/W/nt9/+SnI2IeI9n2cnjmzZ3HJxRfSvm1rXn1leNBxJJdEdec1l1QBctR5NbMCAM658/IiUA7cArR2zl15pCtwzi1yzvU9igw9gUA6r1WqJjBuwmTGTZjMmHETKVy4CC1atg4iSqY6dOrM8y+9clj7+nXrmD93DhVOCGT3ZcuqlT8xccJ43hoznnETJjP7i5n8/vtvQcfKVKFChRgx8nXGvzeFcRMmMefL2Sz99pugY2UoLPv4kk5deOGlEYe0Va9ek/8+/Sxn1W8YUKrsiy0Qy4C7B/Le+x/x1pixvDPmbX5etSroWCmmTRlLxcpVUh6f2+xCHn15HA89/zYH9u9n1ieTASgXX5FBj73IQy+8zSWXX8eoZx8LKPHhon0fpxWJRHjk4Qd44aURvDflQ6Z+9EFU580Oy8dbNAtD57WAmY32K5DvmllRM7vXzBaa2XIzG27mFdLNrLqZfWZm35rZEjOrBjwGNDazb8zsTjOLNbN/+8svNbMb/WWbmdlsM5sCfO+37fL/LW5m0/11LjOzjjl9EplsN911m9lLQALwsZndmcmq65nZPDNbaWY3pLPdZmb2gX+/nJl9ambfmdkIM/vNzMr6s8aa2Sv+tGlmVsTMugENgNH+/iuS0+edW76aP49KlStTseKJQUXIUP0GDSlZsuRh7U8+8Si397srqg/z/PLLz5x+el2KFClCgQIFqN+gIdM/mxZ0rEyZGUWLFQMgKSnJq1hF8U4Oyz6u36AhcWlexwnVqlGlakJAiXKmXLnynFq7DgDFihUnISGBjRs3BJzKs+WvDXy7cA5NLvz7T0e9hudjZpgZCTXrsOWvjQDUqF2XYiXiAKh2ymls2bwxkMzpieZ9nJ7ly5ZSufLJVKpcmYKFCtG23cXM/Hx60LEkF4Sh83oK8IJz7lRgB1418jnnXEPn3GlAEeDgca7RwPPOuXrAecA6YCAw2zl3hnPuKaAXsN051xBoCNxgZlX95c8CbnfO1UyTYR/Q2Tl3FtAc+M/BDnMOZLTddNftnLsJ+BNo7ufOSF2gBXAucG8Wh/jvA2Y45+oA7wInpZpWA2/f1QG2AV2dc+8Ci4Ar/f23N4fPOdd88vGHXNSufdYzRonPZ0ynfPl4TqlVK+gomapevSZLlixm27at7N27ly9nz2LD+vVBx8pSJBKhR5eONG98Ho3OPY+6desFHSlDYd3HYbZ27Rp+WLGC06PkdfH28Ke49NrbSO/PRlJSEnM//5jT6zc6bNqsaVOoW//c/IiYY9G2j9OzccMGKpxQIeVx+fh4NmyI3s52dsSY5dstmoXhhK0/nHNz/PtvAX2BX83sbqAoUBr4zsxmAic6594DcM7tA9L7sGgD1PWrigAl8TpuB4AFzrlf08lgwCNm1gRIBk4E4oGc/AXKaLtrjnLdk/1O5V4z+xw4G/gmg3kvADoDOOemmlnqgZi/OucOLrcYb7hFVEhMPMAXM2fQ947+QUfJlr179zLylZd5YfirQUfJUkK1alx73fXc3LsXRYoU4ZRTahETE/3faWNjYxk3cTI7duzgzr63snLlT9SokfY7Z3QI6z4Oqz27d9P/jr7cNXAwxYsXDzoO3yz4kriSpalS41RWLF182PQ3XniCmqedwSmnnXlI+4pvFzFr2vsM+Xf0jdOMtn0s/zxh6Ly6dB6/ADRwzv1hZsOAwjlYnwF9nHOfHNJo1gzYncEyVwLlgPrOuUQzW53DbWa23Z5Hue709s+R2J/qfgSvop0lM+sN9AZ49oWX6XV97yPcfMa+nD2LWqfWoUzZslnPHAXW/PE7a9eu4dKu3iHCjRs2cEX3Lrz5zjjKli0XcLrDde7anc5duwPwzNP/Jb5CfMCJsi8uLo6GZ5/D3C9nR23nFcK9j8MkMTGRfnf0pd3FHWjVuk3QcQBY+f23fP3VLL5dNJfEA/vZt3c3L//7Pm68634mvT2Cndu30vO2xw9Z5o9fVzLymUfo/8DTFI87fEhSkKJxH2ekfHw869f9XQfauGED8fHhfu9Fdz00/4Th6/9JZnbwuMkVwJf+/b/MrDjQDcA5txNYY2adAMzsODMrCuwESqRa3yfAzWZW0J+vppkVyyJDSWCj37lsDpx8BM8jo+0e7bo7mllhMysDNAMWZjLvHKCHv/02QKlsrD/t/juEc264c66Bc65BXnRcAaZ+9CFt212cJ+vOCzVqnsKMWXP5aNoMPpo2g/Lx8bw9fmJUdlwBtmzeDMC6dX8yY/o0Lmp3+FUTosmWLVvYsWMHAPv27WP+vLlRPy4zbPs4jJxzDLt3CAkJCVzd89qg46To3vNWnnrjA/7z2iRuvuchTq3bgBvvup8vPpnM8sXzufnuBw+pxG/euJ5nHx5I7/7DqHDiSZmsOf9F6z7OSJ3TTuf331ezZs0fJB44wNSPPqRp8xZBx5JcEIbK64/ArWY2Eu9EqhfxOl3L8Q6tp+6sXQW8bGYPAIlAd2ApEDGzb4FRwP/wDokv8cetbgI6ZZFhNPC+mS3DGwP6wxE8jxEZbPdo170U+BwoCzzonPvTzKpkMO/9wBgzuwqYh7f/dgKZHfcZBbxkZnuBc/N73OvePXuYP28uQ+97ID83myMD7+rH4oUL2bZtKxe2bMpNt/Shc9duWS8YJfrf2Yft27ZRoEABBg25j7i4uKAjZeqvTRsZOnggyckRkpMdbS5sS9NmzYOOlakw7OOBd/Vj0cIFbNu2lTYtm3DzLX0oWfJ4Hnv0QbZu2UKfW27klFqn8mKUDof5esliPpgymRo1a9Kji3fUo88d/WjcpGnAydL3+nOPU6Z8BR7sfz0ADc5rRscrrmfymFfZtWM7b7zwBOANkRn2v+i4JFXY9rH3fruXm3tfT3JyhE6du1K9eo2gYx2dKCy9mlksXv9lrXOuvX8+zztAGbxhiFc55w6Y2XHAG0B9YDNwqXNu9RFt07kjPcosYeO/cCLOuSS/mv2ic+6M3Fr/3sQjHrIQCBeuuED4fhowysf8p0sfiXkvjK+Lr1dvCzpCjpxZ5figI/wjFC6Qvx/K83/elm+fUI2qHZ+t52Zm/fCuTBTnd17HAROdc+/4V0761jn3opndAtR1zt1kZpfhnax+6ZFkC8OwAck9JwEL/Sr0M8Bhl9YSERGR6GT5+F+28phVAi7GO7qMf2S5Bd4VjQBe5++j2x39x/jTWx7BlZuAcAwbiHpmdjrwZprm/c65c3Jp/dcCt6dpnuOcuzUn63HOrQTOzHJGERERkaw9DdzN3+fGlAG2OeeS/Mdr8K6ihP/vHwD+EeDt/vx/5XSj6rzmAufcMuCMPFz/a8BrebV+ERERkdRSX03IN9w5NzzV9PZ4J5wv9q/YlG/UeRUREREJgfwcL+53VDO70PD5wCVm1g7vEp9xeCfFH29mBfzqayVgrT//WqAy3pWhCuBdbWnzkWTTmFcRERERyRHn3CDnXCXnXBXgMrxf8LwS7wpIBy+5cw0w2b8/xX+MP32GO8KrBqjzKiIiIhIClo+3o3AP0M/MVuGNaT14fb1XgTJ+ez9g4JFuQMMGREREROSIOedmAjP9+7/g/VR92nn24V1//6ip8yoiIiISBiG8RnJe0LABEREREQkNVV5FREREQiBsv7KYV1R5FREREZHQUOVVREREJATy8zqv0UyVVxEREREJDVVeRUREREJAhVePKq8iIiIiEhqqvIqIiIiEgUqvgCqvIiIiIhIiqryKiIiIhICu8+pR5VVEREREQkOdVxEREREJDQ0bEBEREQkB/UiBR5VXEREREQkNVV4l14TtG6EGvkt6wvY6lvxxZpXjg46QI9+v2RF0hByrXSku6AhRTx9PHlVeRURERCQ0VHkVERERCQOVXgFVXkVEREQkRFR5FREREQkBnavhUeVVREREREJDlVcRERGRENDVUDyqvIqIiIhIaKjyKiIiIhICKrx6VHkVERERkdBQ5VVEREQkDFR6BVR5FREREZEQUeVVREREJAR0nVePKq8iIiIiEhrqvIqIiIhIaGjYgIiIiEgI6EcKPKq8ioiIiEhoqPIqIiIiEgIqvHpUeRURERGR0FDlVURERCQMVHoFVHkVERERkRBR5VVEREQkBPQjBR5VXkVEREQkNFR5lah179BBzPpiJqVLl2Hi5A+CjpOl/fv3c+3VV5J44ABJkQit21zILbf1DTpWpsK2jyF8mdevW8eQQXezZfNmMKNb9x5cedU1QcfK0o4dO7j/3qGsWvUTZsb9Dz5CvTPODDpWhsL4/pszexaPP/YwyZFkOnftTq8begcdiQMH9vNA/94kJiYSiSRxTuOWdL/6RpxzjBv1IvNnTycmJobW7bvSttNlfDnjY6aMewOco3CRovTqM5CTq9UM+mmkCNvnRVZ0nVePOeeCznAYM6sCfOCcO+0o13Gec+7tI1h2rnPuvCzm2eWcK57Jto84v5n1BW4GljjnrszmMlU4wuebwfqaAQOcc+2zu8y+JHL1xbR40UKKFi3KkEH3hOJDxznH3j17KFqsGImJifS86gruGTSEuvXOCDpahsK2jyF8mTdt2shfmzZxau067N69i8u6d+XpZ56nWvXqQUfL1NBB93BW/QZ06dadxAMH2LtvH3FxcUHHylDY3n+RSIRLLr6Ql195jfj4eK64tBuP/fu/ufa6+H7NjiNazjnH/n17KVykKElJSQzrdz3X3Nyftb//yvffLuamAfcRExPD9m1bKHl8aX767lsqnlSV4iXi+GbhHN598xUeembUEW27dqXcf33l9edF4QL5exz/x/V78q3TdkqFolHbVT6Whw1UAa7IyQJmVgAgq45rPrgFaJ3djquvChk834PPK2zqN2hIXMmSQcfINjOjaLFiACQlJZGUlBT1X5PDto8hfJnLlSvPqbXrAFCsWHESEhLYuHFDwKkyt3PnThYvXkjnrt0AKFioUFR3XCF877/ly5ZSufLJVKpcmYKFCtG23cXM/Hx60LEwMwoXKQpAJCmJSCQJM+OzDybQ5crriYnxug0ljy8NQM069ShewnttVK91Olv+2hhM8AyE7fMiK5aPt2gWzZ3XAmY22sxWmNm7ZlbUzO41s4VmttzMhpt5n0xmVt3MPjOzb81siZlVAx4DGpvZN2Z2p5nFmtm//eWXmtmN/rLNzGy2mU0Bvvfbdvn/Fjez6f46l5lZx5w+iUy2m+66zewlIAH42MzuzGCdTf3n9Y2ZfW1mJdJ5vj3NbIqZzQCmm1kxMxtpZgv8ZQ5ur4r//Jf4t8M67mbW0F+mWk6f/z9NJBKhR5eONG98Ho3OPY+6desFHUmiyNq1a/hhxQpOj/LXxdo1ayhVqjT3DhlEj66dGHbvEPbs2RN0rCyF6f23ccMGKpxQIeVx+fh4NmyIji81yZEIA2++ghsvbcPpZ55D9VqnsWHdWuZ98SmDb7uax4b0Zd3a3w9bbubUyZzRMOjaj/wTRHPn9RTgBefcqcAOvGrkc865hv7h+CLAwUPao4HnnXP1gPOAdcBAYLZz7gzn3FNAL2C7c64h0BC4wcyq+sufBdzunEs7UGcf0Nk5dxbQHPjPwQ5zDmS03XTX7Zy7CfgTaO7nTs8A4Fbn3BlAY2BvOs/34PPq5pxrCgwBZjjnzva3928zKwZsxKvyngVcCjyTekN+Z/YloKNz7uccPvd/nNjYWMZNnMy0GV+wfNlSVq78KehIEiX27N5N/zv6ctfAwRQvnu6Io6gRiSTxw4rv6X7Z5YybMIkiRYowcsTwoGNlSe+/3BETG8tjL77N86M/5Ocfv+OP1atITDxAwUKFeOS5N2hxUSde/s+Dhyzz3TeL+PyTKVze67aAUv9DqPQKRHfn9Q/n3Bz//lvABUBzM/vKzJYBLYA6ftXxROfcewDOuX3OufRKBG2Aq83sG+AroAxQw5+2wDn3azrLGPCImS0FPgNOBOJz+Dwy2u7RrHsO8F9/bOzxzrmkDOb71Dm3JVWOgX6OmUBh4CSgIPCKv0/HA7VTLX8qMBzo4Jw7/Gs2YGa9zWyRmS169ZXo/+OWX+Li4mh49jnM/XJ20FEkCiQmJtLvjr60u7gDrVq3CTpOluLjKxAfXyGlctm6TVt+WPF9wKmyLwzvv/Lx8axftz7l8cYNG4iPz+mfl7xVrHgJaterz7cL51GmbHnOvqA5AA3Pb87vv65Mme+3X1Yy/OmHGDDsSUrEHR9QWvkniebOa9pByQ54Aa+SeDrwCl4HLLsM6ONXJs9wzlV1zk3zp+3OYJkrgXJAfb/KuSGH28xsu0e8bufcY8D1eNXnOWZWK4NZUz8vA7qmynGSc24FcKe/7XpAA6BQqmXW4VWIMzzF2Dk33DnXwDnXIBrOlA3Sli1b2LHDO0li3759zJ83lypVEwJOJUFzzjHs3iEkJCRwdc9rg46TLWXLlSO+QgVW//oLAF/Nn0dCtegeNRS291+d007n999Xs2bNHyQeOMDUjz6kafMWQcdix7at7N61E4AD+/exbMkCKlauQoPzmvLdt4sBWLF0CSdUOgmAvzau56kH7ubWu+7nhEonB5b7n8Ly8b9oFs0n8pxkZuc65+bhnYj0Jd6QgL/MrDjQDXjXObfTzNaYWSfn3CQzOw6IBXYCJVKt7xPgZjOb4ZxLNLOawNosMpQENvrzNweO5J2Z0XaPeN1mVs05twxYZmYNgVrAHxz6fNPL0cfM+jjnnJmd6Zz72s+xxjmXbGbX4O27g7bhDXv41Mx2O+dmZvtZ54J7BvRj0cIFbNu2ldYtmnDzrX3o0rV7fkbIkb82bWTo4IEkJ0dITna0ubAtTZs1DzpWpsK2jyF8mb9espgPpkymRs2a9OjiDZvvc0c/GjdpGnCyzA0c/H8MumcAiYmJVKpUmQceejToSJkK2/uvQIECDBpyLzf3vp7k5AidOnelevUaWS+Yx7Zu+YsXnxxGcnIyLjmZRk1acVajxpxy2hk89/j/8fHEtylcpCi97xgKwMTRI9i1czsjn3scgJjYAjzy3BtBPoVDhO3zQrInmi+VNRVYBNTHO5HqKmAwcDmwHvgJ+M05N8zMagAvA2WBRKA7XmfuE7zD9KOA/wEPAR3wqpCbgE54VcVDLgl18DJYZlYWeB8o7mdpBFzknFud3UtlmVlMBtstmMm6VwMNnHN/ZbD+Z/HGrSYD3wE9/fupn+9Wfx23+csUAZ7G+wIQA/zqnGvv77sJeJXtqXhjaYunvlSWmZ0EfAxc55z7Kr1MkPuXyhIRkSNzpJfKClJeXCorr+X3pbJWbtibb39na8QXidrya1R2XiWc1HkVEYkO6rzmj/zuvK7amH+d1+rlo7fzGs1jXkVEREREDhHNY16jnpmdDryZpnm/c+6cXFr/tcDtaZrnOOduzY31i4iISHhEbSk0n6nzehT8k6bOyMP1vwa8llfrFxEREQkbdV5FREREwkClV0BjXkVEREQkRFR5FREREQmBaP/xgPyiyquIiIiIhIYqryIiIiIhYCq8Aqq8ioiIiEgOmVllM/vczL43s+/M7Ha/vbSZfWpmK/1/S/ntZmbPmNkqM1tqZmcd6bbVeRUREREJAcvHWzYkAf2dc7XxfuL+VjOrDQwEpjvnagDT/ccAFwE1/Ftv4MUj2QegzquIiIiI5JBzbp1zbol/fyewAjgR6Ai87s/2OtDJv98ReMN55gPHm9kJR7JtjXkVERERCYMoHfNqZlWAM4GvgHjn3Dp/0nog3r9/IvBHqsXW+G3ryCFVXkVERETkEGbW28wWpbr1zmC+4sAE4A7n3I7U05xzDnC5nU2VVxEREZEQyM/rvDrnhgPDM5vHzAridVxHO+cm+s0bzOwE59w6f1jARr99LVA51eKV/LYcU+VVRERERHLEzAx4FVjhnPtvqklTgGv8+9cAk1O1X+1fdaARsD3V8IIcUeVVREREJASi7Dqv5wNXAcvM7Bu/bTDwGDDOzHoBvwE9/GkfAe2AVcAe4Noj3bA6ryIiIiKSI865L8n4FLKW6czvgFtzY9saNiAiIiIioaHKq4iIiEgIRNeogeCo8ioiIiIioaHKq4iIiEgIRNkJW4FR5VVEREREQkOVVxHJM8ku139YJc/FqLQhx4DaleKCjpBjq9bvCjpCjp1WqXg+b1GfT6DKq4iIiIiEiCqvIiIiIiGgA0MeVV5FREREJDRUeRUREREJARVePaq8ioiIiEhoqPIqIiIiEgIa8+pR5VVEREREQkOVVxEREZEQMI16BVR5FREREZEQUeVVREREJAxUeAVUeRURERGREFHnVURERERCQ8MGREREREJAowY8qryKiIiISGio8ioiIiISAvqRAo8qryIiIiISGqq8ioiIiISAfqTAo8qriIiIiISGKq8iIiIiYaDCK6DKq4iIiIiEiCqvIiIiIiGgwqtHlVcRERERCY0867yaWRUzW54L67jiCJedm415dh3Juo8gyyVmNjC3s2S2j81sppk18O9/ZGbHZ7GulPnTtJ9hZu1ymi23RCIRenTtxG233BhUhGzbv38/V1zaje6dL6HzJRfzwnPPBB0pW+bMnsUlF19I+7atefWV4f/f3n3H2VXV6x//PAmhhgRQQJEWuhB6gqGEDoL0rqhwvUiTC1wQBcQfTa94QbAhRMACiAgoSC/SUmgJhJCEAJcqqPQaSghJvr8/9jrJyWTmzJlkctbe4/PmNa+Zvc6cM0+GmTPfs/ba35U7TrtO//732HbLzdh3z91mjj315BMcdOABHLDPnhy4/z5MnDA+Y8LGTv3+yWw9dFP23mPX3FGaUrW8UM3MVfjda6uMmadO/ZgTv3UQxx/6ZY79z/340++HAfDqy//kpKMO4qiv78G5PziJTz75BIDXXn2Z0084guO+eQCnHn8Yb77+as74XSK17q3Myj7zujLQpeJV0gIAEbHZ/Ag0NyLihoj4ccav/6WIeGcu774BkK14veLyy1hllVVzffkuWXDBBbnkt5dyzXU3cPVf/sp9o0Yy/rFxuWM1NH36dH70P2dywbBLuO6Gm7ntlpt49plncseaw2577sWvhl0829jPzj2Hw448iqv+8leO/K9j+Nm552RK17k99tybC399Se4YTataXqhe5qr87tUra+Y+fRbk9HOHcd7Ff+Lci/7IuDH383+TJnD5xb9g132+yq8uv56+fftx161/BeCyYT9lqx124aeXXMV+X/8mf7jk/Lz/AOuy+V28LiDpCklPSPqzpEUlnSppjKSJki6Sivpe0mqS7pT0mKSxklYFfgwMlTRO0nGSeks6J91/vKTD0323ljRS0g3ApDT2fnrfV9Jd6TEnSNqjmeDpMUdIulnSU5KGSeqVbrtQ0sOSHpd0Rt19XpB0Rt3XWiuN/4ek89PHy0q6Lv07H5M0R5Et6Tt1/8Yz2t7e2fe4ncd7QdKn08f/L/17Rkm6UtIJdZ+6n6TRkv5P0lBJCwJnAgek/wcHNPO96y6vvvIKI0fcy1777NvKLzvXJLHoYosBMG3aNKZNm1b6l68TJ4xnhRVWYvkVVqDPgguy05d24d577sodaw4bDxpM//79ZxuTxAfvFycs3n9/Mksvs0yOaE3ZeNBg+rXJX2ZVywvVy1yV3716Zc0siUUWKf70TZ/53AsTHx3DplttB8DWO+7K6PvuBeClvz/PuhsOBmDgBoMZc//wLLnnhlr4X5nN7+J1TeCCiPg88B7wLeD8iBgcEQOBRYDaOZ4rgF9FxPrAZsDLwEnAyIjYICJ+ChwCvBsRg4HBwKGSBqT7bwQcGxFrtMkwBdgrIjYCtgHOrRXMTdgEOBpYG1gV2DuNnxIRg4D1gK0krVd3nzfS17oQqC8Ma34BDE//zo2Ax+tvlLQjsHr62hsAG0vaskHG9r7H7ZI0GNgHWB/YGWi7TGCBiNgE+G/gtIiYCpwKXJX+H1zVIEe3O/vHP+K4b3+HXr3KfoJglunTp7P/3nuwzdDNGLLpZqy33vq5IzX02quv8pnPfmbm8TLLLsurr1bjFNoJJ36Pn517DjtttzU//cnZHP3fx+eOZNa0Kv7ulTnz9OnT+fZhX+E/99mB9TcewmeWW57F+i5O797FdemfWnoZ3nrjdQBWXnV1Hhx5NwAPjbqHjz78gMnvvpMrus2F+V0VvBQR96WP/wBsAWwj6SFJE4BtgXUkLQ58LiKuA4iIKRHxYTuPtyNwkKRxwEPApygKPYDREfF8O/cR8CNJ44E7gc8ByzaZf3REPBcR04ErU36A/SWNBR4F1qEobmuuTe8foVj20Na2FIUtETE9It5t59+4Y3rsscBadf/G9rT3Pe7I5sD16fs7Gbixze2dZZ+DpMPSLPTD3bn+afi997DUUkux9joDu+0xW6F3795cfe313HH3cCZOGM/TT/9f7kg91jVXXcm3TzyJ2+66lxO+ezJnnPr93JHMLJPevXtz7kVXctFVt/L0kxP554svdPi5Bx9+HJPGj+WEww/k8cceYalPL0Ov3r1bF3YeeM1rYX63yop2ji8ABkXES5JOBxbuwuMJODoibp9tUNoa+KCD+3wVWBrYOCI+kfRCF77mHPnTTO8JwOCIeFvS79s83sfp/XTm7vsr4KyI+PXcZpyLr1nT5ewRcRFwEcCUafP0tWcz7tGx3Hvv3YwaOYKPP/6YDz54n5NPPIGz/vcn3fUl5qt+/foxeJMvcP+okay+etuTAeWxzLLL8srLr8w8fu3VV1l22WZf2+V10w1/5bsnnwLADl/ciTNPc/Fq1VHF370qZF6s7+IM3GAQT02awAfvT2b69Gn07r0Ab77+Gkt9emkAlvr00nz3jOJvyUcffciDI+9msb6L54xtXTS/Z15XlLRp+vhAYFT6+A1JfYF9AdIs4D8k7QkgaaG0dnMyUP8TdTtwpKQ+6fPWkLRYJxn6A6+lwnUbYKUu5N9E0oC01vWAlL8fRaH8rqRlKU6/d8VdwJEpf29JbRdp3Q78Z/r+IOlzkhot5uvoe9ye+4DdJC2cHr+Zy3Lb/j9oiWOP+zZ/u3sEt/7tbv73J+cx+AtDSl+4vvXWW7z33nsATJkyhQcfuJ+VB6ySOVVj6wxclxdffIF//OMlPpk6ldtuuZmtttk2d6ymLL30MjwyZjQAox96kBVX6sqvtlleVfzdK2vmd995mw/enwzAxx9PYfwjD7H8iiszcINBPDC8WJN77x03sclmWwHw3rtvM2PGDACu/ePv2Han3fMEt7k2v2denwKOkvRbigupLgSWBCYCrwBj6j7368CvJZ0JfALsB4wHpkt6DPg98HOK09lj07rV14E9O8lwBXBjWqbwMPBkF/KPAc4HVgPuAa6LiBmSHk2P8xJFQdgVxwIXSTqEYobzSOCB2o0RcYekzwMPpKW57wNfA17r4PHa+x63KyLGpIvaxgOvAhOAtssW2roHOCkt1Tir1eteq+SN11/j+987iRkzpjNjRrDjF3diq623yR2roQUWWICTTzmVIw/7JjNmTGfPvfZhtdUarVLJ46TvHM8jY8bwzjtv88XttuKIbx3N/zvjB5zz4/9h2rTpLLTQQnz/tDNzx+zQiSccz8NjRvPOO2+zw7ZbcuRRR7P3PvvljtWhquWF6mWuyu9evbJmfvvNNzj/7NOYPn06EcFmW23PoE23ZPmVVuGnP/weV/7uAgastibb7bwnAI+Pe4Q//OZ8hFh7vQ059JiGnSythBTRbWd6e5S0FOGEiKhO08AmSOobEe+nme0RwGERMbY7Hrs7lw1YzzCjgs8vvcq+2Mush3rmlZa0Xu9WA5fv29InjLc/nN6yJ9UlF+1d2idDbw/77+ciSWtTrNO9tLsKVzMzM5u//Nq68G9fvEpaF7i8zfDHEfEF4N7WJ5qTpE9RrJVta7uIeLMrjxURc7VjmZmZmVkZ/NsXrxExgaKfammlAnWD3DnMzMwsn7JvHtAq1en+bmZmZmb/9v7tZ17NzMzMqsBrXgueeTUzMzOzyvDMq5mZmVkFeOK14JlXMzMzM6sMz7yamZmZVYGnXgHPvJqZmZlZhXjm1czMzKwC3Oe14JlXMzMzM6sMz7yamZmZVYD7vBY882pmZmZmleHi1czMzMwqw8sGzMzMzCrAqwYKnnk1MzMzs8rwzKuZmZlZFXjqFfDMq5mZmZlViItXMzMzswpQC/9rKo+0k6SnJD0j6aT5/M+fycWrmZmZmXWJpN7Ar4CdgbWBr0hauxVf22tezczMzCqgZJsUbAI8ExHPAUj6E7AHMGl+f2HPvJqZmZlZV30OeKnu+B9pbL7zzKt1m4UXmD/XQUo6LCIumh+PPb9ULfP8yzv/pgmq9j2G6mWuWl5w5laYX3kHLt+3ux9ypqp9jzsyv/7OtkfSYcBhdUMXleV76JlXq4LDOv+U0qla5qrlBWduharlBWduharlhWpmzioiLoqIQXVvbQvXfwIr1B0vn8bmOxevZmZmZtZVY4DVJQ2QtCDwZeCGVnxhLxswMzMzsy6JiGmS/gu4HegN/DYiHm/F13bxalVQijU2XVS1zFXLC87cClXLC87cClXLC9XMXHoRcQtwS6u/riKi1V/TzMzMzGyueM2rmZmZmVWGi1czMzMzqwwXr2bdRNKAZsZs7knar5kxs7KTtFAzY2XSwXPc4BxZmuHni57LxauViqSlGr3lzteJv7Qz9ueWp+jZTm5yrDQkLStp1/S2TO48zZB0eTNjZSFpO0mL5M7RRQ80OVYmf5E0cwclSVsBv82YpzOVe76w5rjbgJXNI0DQ/tZMAazS2jidk7QWsA7QX9LedTf1AxbOk6pzkoYAvwQ+DyxI0erkg4jolzVYOyTtDHwJ+JykX9Td1A+YlidV5yTtD5wD3EvxM/1LSd+JiLK/qFmn/kBSb2DjTFmacRBwoaS3gJHACGBURLydN9acJH2GYgvNRSRtyKznun7AotmCNedw4K+SdgM2As6i+L0slao+X1jzXLxaqUREFU+zrwnsCiwB7FY3Phk4NEegJp1P0VT6GmAQRQGwRtZEHfsX8DCwO8ULnJrJwHFZEjXnFGBwRLwGIGlp4E5KOiMv6WTgexSF1XvMKqymUuJWQxFxMICk5YB9gV8By1HOv3FfBP6DYjei8+rGJ1N870srIsZIOga4A5gCbB8Rr2eO1Z6qPl9Yk9wqy0pL0pLA6tTNXkbEiHyJGpO0aUSU/bTfTJIejohBksZHxHpp7NGI2DB3tvak2b/LI+LA3FmaJWlCRKxbd9wLeKx+rIwknRURlTm9KulrwFBgXeANYBQwssy/j5L2iYj2lhqVjqQbKc581awNvAy8DRARu+fI1RlJfSLik9w5rPuV8VWpGZK+CRxLMTsxDhhCsR5s24yxOrOXpMeBj4DbgPWA4yLiD3ljdejDtKXfOElnU/wxKu06+IiYLmkFSQtGxNTceZp0m6TbgSvT8QFkaOjdVRFxsqTdgS3T0L0RcVPOTJ34GfAsMAy4JyJeyJqmOTdJOhBYmbq/xRFxZrZEHftJ7gBzaRNJpwMrUXyPBURElG75mXWNZ16tlCRNAAYDD0bEBmld6Y8iYu9O7pqNpHEp614UywiOB0ZExPqZo7VL0krAqxTrXY8D+gMXRMQzWYM1IOkyijW6NwAf1MYj4rwO75SJJFG8+BoMbJGGR0bEdflSNUfSWcAmwBVp6CvAmIgo7WltSetQFNtbUJyxeSoivp43Vcck3Qa8S3Fae3ptPCLOzRaqh5H0JMVzW9vv8ZvZQlm38MyrldWUiJgiCUkLRcSTktbMHaoTfdL7XYBrIuLdon4pp4j4e5p5XRm4luKPfdlnNJ9Nb72AxTNnaSgiQtItaYnAtbnzdNEuwAYRMQNA0qXAo5R0TaakfsCKFDNsK1O8EJuRM1MTlo+InXKH6IoqXeSZvBsRt+YOYd3PxauV1T8kLQH8FfibpLeBv2dN1Lkb0yv9j4Aj08U5UzJn6pCkXShOsz5LcTptgKTDy/xkHxFn5M7QRWMlDY6IMbmDzIUlgLfSx/0z5mjGqLq38yPiH5nzNON+SetGxITcQbqgEhd5StoofXiPpHMoXjx+XLs9IsZmCWbdxssGrPRSL8H+wG1lnxlMvWjfTeszFwX6RcQruXO1JxXau9aWCUhaFbg5ItbKm6xj6QXBdylaOdVfyFfKtdDpe7waxQuvD5i15m69rME6IekrwI+BeygybwmcFBFXZQ3WCUmLRsSHuXM0kpZEBcXk0erAcxSFVel/NqpykaekexrcHGV9vrDmeebVSit1G1iBor3JZGAgUPZXzMsB20uq7+96Wa4wnZjcZn3rcxTf5zK7AriKYk3xEcDBQBlb9dR8MXeAuRERV0q6l2K9bgAnlvVFGBSdPoDfAH2BFSWtDxweEd/Km6xdu+YOMA8qcZFnRGyTO4PNX555tVKS9AOKXojPMWvtWqlfMUs6Ddiaoo3MLcDOFI3S982ZqyOSLqRYI3g1RYGyH/AiRR9SIqJ06zQlPRIRG7eZ+RkTEaXcorKDXeEmV6F9T9pwYwuKn41RZb7QTNJDFP1db6jNAkqaGBED8ybrWBV/Nqp2kaek49sZfhd4JCLGtTiOdSPPvFpZ7Q+sWvZlAm3sC6wPPBoR35C0LFDWNllQnHZ/FdgqHb8OLEKx0UJQzouMan/YX05rdv8FlHnb4LEUZw/epjgtvATwiqRXgUMj4pEG981G0gUUyx1qLb4Ol7R9RByVMVZDEfFSmwskp3f0uSVRuZ+NdJHnIsBnK7L+fFB6uzEd7wqMB46QdE1EnJ0tmc0TF69WVhMpnsxfy5yjKz6KiBmSpqWrn1+j+ONUShHxjdwZ5sIPJfUHvk1x1XM/yr1jzt+AP0fE7QCSdgT2AX4HXAB8IWO2RrYFPh/p1FzqNvB43kgNvSRpMyAk9aHoEf1E5kydqdzPRtoW9icUM68DJG0AnFnWTQooWtVtFBHvw8yzYzdTrOF+BHDxWlGlW6tilpwFPCrpdkk31N5yh+rEw6lDwsUUT4xjKTZWKCVJZ0vqJ6mPpLskvZ52KiqtiLgpIt6NiIkRsU1EbBwRZf65GFIrTgAi4g5g04h4EFgoX6xOPUPReqpmhTRWVkcARwGfA/4JbJCOy6yKPxunU/T/fQcgnXov85bey1DXZYDizM2yEfFRm3GrGM+8WlldCvwvMIHy92sEoO7ikGGpAXm/iBifM1MndoyI76ZNFV4A9gZGUOKlDpLWAC6k+AM0UNJ6wO4R8cPM0TrysqQTgT+l4wOAV9NWt2X+uV4ceELS6HQ8mOLF2Q1Qvu1AI+IN4Ku5c3RRFX82Pmmnf3WZL5y5AnhI0vXpeDfgj5IWAybli2XzyhdsWSmV+SKctup6CrarrD0Faxe0SLqE4vTlbZIeK+uOYACShgPfAX5dhQtzJH0aOI1ZO2zdB5xBcdHIiiW+0GWrRrdHxPBWZWlE0i9pUDxFxDEtjNMlVfzZkPQb4C7gJIolDscAfSLiiKzBGpA0CNg8Hd4XEQ/nzGPdw8WrlZKk8yhO69xAyZtLt+kpWP8LVevbWMoOCZJ+DOxJsanCJhRrjG+KiNKttaupvaip7y2ptC1v5mg9SpqZqq3hXgNYC7i1bFfCSzo4fbg5RZePWh/a/YBJZS6qqij1rj4F2DEN3Q78MCJKtRmLpH4R8V4HHR2IiLfaG7fqcPFqpdRBk+nSFoIA6SrcbzGrvdBI4MKyPbHXq9KmCgCSbgX+i2L73Y0k7QscEhE7Z47WrlT4nUCxZenMZVpl/jmGoiUZMBRYkmJGcAwwNSJKeWpe0oPAFhExLR33AUZGxJC8yeYk6WcR8d+SbqSdWeOyLcmoScsZ7qxCD1VJN0XErpKep/0JhVUyRbNu4jWvVjrpSfKGiPhp7ixddCnwHvCLdHwgxQYF+2dL1A5J20bE3amPZ22s/lPK2CKr5ijgImAtSf8EngfKfJHZNRRb8F5C+Vs31VNEfCjpEIo+nmdLeix3qAaWpOg8UZtR65vGyujy9P4nWVN0UXqBO0NS/4h4N3eeRiJi1/S+zBeT2Txw8Wqlk54kvwJUrXgdGBFr1x3fI6mMFwVsBdzNrH6uNaK8/V0BiIjnKHYwWwzoFRFl3xFsWkRcmDvEXFDateqrwCFprMzdaX5M0Z2kfjvb07Mm6kCtf2tZ1g130fvABEl/o9juGCjf2uKqXodgzXPxamV1n6TzKdaw1T9JlvlJZ6ykIanVDZK+AJTu4oCIOC19eCTFRRcrM+u5oNTriCQtRF3m2oxxRJyZMVYjN0r6FnAds6/dLvuau2OBk4HrIuJxSasAjfaLzyoifpeWlNTWa5d2O1tJE2j/96x2Snu9FkfqimuZ88VtGZ8zzm1wW1D0MbYK85pXK6UqrXmt+2PUB1iTYovVoNh69ck2s7Glkdp5vUPRj7Z2Sjsi4rxsoTqRMr9L0Ud35mn4iGj0xyqbtOaurcqvuZP0y4g4ugQ5KjfDlrZY7VBE/L1VWbpK0rER8fPOxszmNxevZvOoqn+MytxiqiNVzNwTSRobEQ0LxxblaDQbXMoXu/UkfYai00cAY8o6W1zT3v/3+s4fZSNpYea8iHZYmS+iteZ42YCVUtoC9DSKtWsAwym2ISzdhQJlLU6bcL+kdSNiQu4gXVCpzJIOam88Ii5rdZaeqApXvndE0jeBUynWnwv4paQzI+K3eZPNKV2DcCDFlrD1O9rVXyRXRpcBkym2kobi33A5RSs1qzDPvFopSfoLMJHiCn6ArwPrR8TeHd/LmlG3zGEBYHXgOYr1mKVdc1fFzDCziX7NwsB2wNiI2DdTpG5RopnXOTpn1IuI0l58KOkpYLOIeDMdfwq4PyLWzJtsTuns0gCKbbtPqrtpMjC+1qKsbCRNartsq70xqx7PvFpZrRoR+9QdnyFpXK4wPcyuuQPMhaYyS1oyIt6e32Ga1XZdqKQlmLUdaJWp809pifrOGW2VunMG8CZF8VczOY2VTjq79HdJ2zPn5hVlPgtSiYtoretcvFpZfSRpi4gYBSBpc4qdoGweVXGZQxcy3wVknxFs4AOg0hdrJaW4QKfWOSMivpE7S7MkHZ8+fAZ4SNL1FIX2HsD4bMGaMwIYKmlJ4A6KzSsOoGipVhptLqK9X9JsF9HmzGbdw8WrldWRwKVp7SvA28DBDT7fDMozIwhAm/WBvSi2ML06U5ympVm171D8sZ9jZ7CI+H2eZO1LM9oHMedOZqXqP5osnt4/m95qrs+Qpava27xiXO5Q7ajkmRprnotXK6sngLOBVYElKNoj7Un5ZyYsr7It4v8MRREIMI2ijdp/5YvTtNrOYBdTjZ3BbgEepDiFPSNzloYi4oxmPq8s7cjaaG/zit4Z87SrB52psQ64eLWyup5ZPUj/mTeK2VxboO1OSpJ2Bk7MlKdZVdsZbOGIOL7zT6uUzXMHaEelNq9oQqnO1FjzXLxaWS0fETvlDmGVU4o/RpKOpOgvuYqk+rMFiwP35UnVJVXbGexySYcCN1GNvJUUESMo1r3Wjp8DZi7NKOlscSNlO1NjTXLxamVVqX6eNn9JWqrR7XVFynYtiNOMPwK30k5roYoUVLX15d+pGwvKe7HZVOAc4BRmFSRlzttTlXG22Hog93m1UpI0CVgNeJ4K9PO0+SttsxoUPwcrUlzAJ4r10C9GxIB86Sw3Sc8Bm0TEG7mzdJcy71zVkbL0/21WFb/HVvDMq5XVzrkDWHnUilNJF1Ost7slHe9McSGfdSNJoyh2tRsJ3BcRkzu5S27PAB/mDtEMSZdHxNclHRsRjVqOlaIdWdVJ2gJYPSJ+J2lpoG9EPJ9uLsuZGusiz7yaWWVImhAR63Y2ZvNG0gBgaHobQnH2Y2REHJc1WAckXQesQ3HxUP2a19K1ykpnlbanWFayNW3WaVdkWUm7yjaTKek0YBCwZkSsIWk54JqI8PKGivPMq5lVyb8kfR/4Qzr+KvCvjHl6pIh4XtIUirWkU4FtgM/nTdXQX9NbFQyjaNG0CvAIsxevpVynW+HZ4r2ADSm61hAR/5K0eOO7WBV45tXMKiNduHUasGUaGgGcUeXZqjKS9CzwBsWFZyOBcRFR6v6pjUj6S5vtprOTdGFEHJk7RzOqOlssaXREbFJbiytpMeABXztRfS5ezcxsNpKOBbYAVqDYTnM4MCIinm14x5Iq2+nsepKWARauHUfEixnjtEvSMRS7Hq5C0Xd7ttniiCjdbDGApBOA1YEdKDp//Cfwx4j4ZdZgNs9cvJpZZaQLLr5Lsb6x/g/+ttlC9WCS+gLfAE6g6L1cut2UmlHGq+Al7QacBywHvEaxFe8TEbFO1mANVGm2uEbSDsCOFAX37RHxt8yRrBt4zauZVckVwFUUe5cfQdGP9PWsiXogSedSzLz2BR4ATqVYPmDd54cUF8PdGREbStoG+FrmTA3VCtcqzBbXpGLVBWsP4+LVzKrkUxHxm3ThyHBguKQxuUP1QA8AZ0fEq7mDdJNS7LzWxicR8aakXpJ6RcQ9kn6WO1QjHc0WU5wJKQ1Jk5nVF7r+9HKtX3i/LMGs27h4NbMq+SS9f1nSLhSdBhruvmVdFxF/lrS7pNqFccMj4sasoRpIRdXNDS4qO7GVeZr0TlqWMRK4QtJrwAeZM3WmErPFEeGOAj1cr9wBzMy64IeS+gPfpliHeQlQyt6jVSbpLOBYYFJ6O0bSj/KmaugA4GlJZ0taq+2NEXFHhkyd2YNiY4X/Bm4DngV2yxmoCZ9ExJvAzNliij6qpSRpSH1rLEmLS/pCzkzWPXzBlpmZzUbSeGCD2kympN7Ao2VuMSSpH/AVigvMAvgdcGWZdweTtBLF7k93SloU6F3yvHdS7Gj3Y+BTFEsHBkfEZjlzdUTSo8BGkQodSb2Ah8t28Z51nWdezawyJK0h6S5JE9PxemnTAut+S9R93D9XiGZFxHvAn4E/AZ+laFA/VtLRWYN1QNKhFHl/nYY+R/k3WqjabLGiboYuvRjzcskewMWrmVXJxcDJpLWvETEe+HLWRD3Tj4BHJf1e0qUUO0H9T+ZMHUrrc68D7gX6AJtExM7A+hRLTMroKGBz4D2AiHgaWCZrok5ExAcUvX+3johLKZbtTM2bqqHnJB0jqU96OxZ4Lncom3d+BWJmVbJoRIyWZrt4fFquMD1ROrU6g+LCnMFp+MSIeCVfqk7tA/w0IkbUD0bEh5IOyZSpMx9HxNTaz7KkBZj9yvjSSbPFh1FcJLkqxWzxMGC7nLkaOAL4BfB9iu/tXRT5reJcvJpZlbwhaVXSH3lJ+wIv543Us0TEDEnfjYirgRty52lGRBzc4La7WpmlC4ZL+h6wSGqk/y2gtB0dkqOATYCHoJgtTj1fSykiXsNnZnokF69mViVHARcBa0n6J/A88NW8kXqkO9PWmldR176pxHvY7w38L8Vpd1GNfp4nAYcAE4DDgVsoTsOXWSVmi9OLr7Ml/ZJ28kXEMRliWTdytwEzKz1Jx7cZWoRizf4HABFxXstD9WCSnqf9P/pl3cP+GWC3iHgid5aeTNLZwDvAQcDRFLPFkyLilJy52pK0W0TcKKndGfm0XtcqzMWrmZWepNPSh2tSrMO8nmJ2bTdgdESUrlF6lUlahKIw2YKiiB0JDIuIj7IG64Ck+yJi89w5miFpAg1mK0vejqwXxWzxjhS/f7cDl4QLCWsxF69mVhmSRgC71HphpgbkN0fElo3vaV0h6WqKq+CvSEMHAv0jYv98qTom6efAZyhaTX1cG4+Ia3Nl6kjq7dqhiPh7q7L0dJLWoNjMZGXqlklGxLa5Mln38JpXM6uSZZm9Nc/UNGbda2BErF13fI+kSdnSdK4fRf/RHevGAihd8dpscSrpgYjYdH7naUaFZ4uvoeiGcAkwPXMW60YuXs2sSi4DRqeenlDs9vP7bGl6rrGShkTEgwBpS82HM2dq5MiImJI7RDdbOHeAOrvmDjCXpkXEhblDWPfzsgEzqxRJGwFD0+GIiHg0Z56epG6GrQ/F+uIX0/FKwJNtZmNLI12w9SrF2tyRwKiIeDdvqnkjaWzVtjEt02wxgKTTKbawvY7Zl5OUsmuGNc/Fq5mZAdVejylpRYoXNZsDXwLeiYgNsoaaBxUtXh+NiA1z56hJXTPairJ2zbDmedmAmZkB5S5OG5G0PEXROpRiS9jHgVFZQ807df4ppVOq2bCIGJA7g80fLl7NzKzqXgTGAD+KiCNyh+kmX88doCeQNBBYm7o1xBFxWb5E1h28bMDMzCpN0voUPWm3BFYEngaGR8RvsgZrh6TJtD9DWYVdwRoq4bKB04CtKYrXW4CdKdZD75szl807F69mZlZ5kvpSFLBDga8BRETDNbzWvSQNjIiJuXPUpAsQ1wcejYj1JS0L/CEidsgczeaRlw2YmVmlSXoYWAi4n6LbwJZVWb8raRlmP6X9YsY47Wp2trhMhWvyUUTMkDRNUj+KzgMr5A5l887Fq5mZVd3OEfF67hBdIWl34FxgOYqiaiXgCWCdnLnaExGL584wlx6WtARwMfAI8D7wQNZE1i28bMDMzCpNUn/gNIo1rwDDgTPL3OtV0mPAtsCdEbGhpG2Ar0XEIZmjdaoKs8VtSVoZ6BcR43NnsXnXK3cAMzOzefRbYDKwf3p7D/hd1kSd+yQi3gR6SeoVEfcAg3KHakTS7pKeBp6neIHwAnBr1lCdkLS3pPOAo4FVc+ex7uFlA2ZmVnWrRsQ+dcdnSBqXK0yT3kkXmY0ArpD0GvBB5kyd+QEwhDazxZkzdUjSBcBqwJVp6HBJ20fEURljWTdw8WpmZlX3kaQtImIUgKTNgY8yZ+rMHsAU4Djgq0B/4MysiTr3SUS8KWnmbLGkn+UO1cC2wOcjrY+UdCnFBhZWcS5ezcys6o4ALktrXwHeBg7OmKdTEVE/y3pptiBdU7XZ4mco+v7WOk+skMas4nzBlpmZVZKk4+sPgcXSxx9QtHA6r/WpGpM0KiK2aKf9VOk3KZC0GMVssZg1W3xFWrtbOpKGA4OB0RTf602Ah4F3ASJi93zpbF545tXMzKqq1sJpTYoi5XqKwuprFAVL6UTEFul95dpPVXC2+NTcAWz+8MyrmZlVmqQRwC4RMTkdLw7cHBFbNr5nHpJ6A49HxFq5szSjyrPFjUh6ICI2zZ3Dus4zr2ZmVnXLAlPrjqemsVKKiOmSnpK0YhV6pFZ5trgTC3f+KVZGLl7NzKzqLgNGS7ouHe8J/D5bmuYsCTwuaTR1Fz2VdR1m1WaLm+RTzxXl4tXMzCotIv5H0q3A0DT0jYh4NGemJvy/3AG6omqzxdazuXg1M7PKi4ixwNjcOZoVEcMlrQSsHhF3SloU6J07VycqNVvcBOUOYHPHxauZmVmLSToUOAxYimLb0s8Bw4DtcubqRKVmi5vw9dwBbO6424CZmVmLpe1rNwEeiogN09iEiFg3a7BOtDdbXOvyUBbtdEWYeRMV7o5gs3jm1czMrPU+joipUnHmWtIClPwCoqrMFvfArgjWRq/cAczMzP4NDZf0PWARSTsA1wA3Zs7UmaOAzYH3ACLiaWCZrImaIGkZSSvW3nLnsXnn4tXMzKz1TgJeByYAhwO3RMQpeSN16uOImNlPt+yzxZJ2l/Q08DwwHHgBuDVrKOsWLl7NzMxa7+iIuDgi9ouIfSPiYknH5g7ViarNFv8AGAL8X0QMoFje8GDeSNYdXLyamZm13sHtjP1Hq0N0UdVmiz+JiDeBXpJ6RcQ9wKDcoWze+YItMzOzFpH0FeBAYICkG+puWhx4K0+qph0dET8HLq4NSDo2jZXRO5L6AiOAKyS9Rl1/Wqsut8oyMzNrkdRqagBwFsVMZs1kYHxETMsSrAmSxkbERm3GHq21+iobSYsBUyhaZH0V6A9ckWZjrcJcvJqZmbWYpFWAf0XElHS8CLBsRLyQNVg76maLtwBG1t20ODAjIkrVKst6Pi8bMDMza72rgc3qjqdTXAA1OE+chu4HXgY+DZxbNz4ZGJ8lUQOSRkXEFu1sVuBNCnoIz7yamZm1mKRxEbFBm7HHImL9TJE6VaXZYuvZ3G3AzMys9V6XtHvtQNIewBsZ8zTjamBG3XFttrh0JPWW9GTuHDZ/eNmAmZlZ6x1BcQX8+RSns18CDsobqVML1G9SkLa3XTBnoI5ExHRJT0laMSJezJ3HupeLVzMzsxaLiGeBIamVExHxfuZIzXhd0u4RcQNUYrZ4SeBxSaOpa5EVEbt3fBerAq95NTMzy0DSLsA6wMK1sYg4M1+ixiStClwBLEfdbHFEPJM1WAckbdXeeEQMb3UW614uXs3MzFpM0jBgUWAb4BJgX2B0RBySNVgTqjRbnPrqrh4Rd0paFOgdEZNz57J54+LVzMysxSSNj4j16t73BW6NiKG5szVSpdliSYcChwFLRcSqklYHhrkvbfW524CZmVnrfZTefyhpOeAT4LMZ83QqzRYfABxNsWxgP2ClrKEaOwrYHHgPICKeBpbJmsi6hYtXMzOz1rtJ0hLA2cAjwAvAlTkDNWGziDgIeDsizgA2BdbInKmRj+u7I0hagNk3LbCKcrcBMzOz1vsJcCQwFHiAYtvVC7Mm6lzb2eI3Kfds8XBJ3wMWkbQD8C3gxsyZrBt45tXMzKz1LqVYO/oL4JfA2sBlWRN1rmqzxScBrwMTgMOBWyLilLyRrDv4gi0zM7MWkzQpItbubKxM0nawtdniIM0W17aLLRtJx0bEzzsbs+rxzKuZmVnrjZU0pHYg6QvAwxnzNKNqs8UHtzP2H60OYd3Pa17NzMxaRNIEilnLPsD9kl5MxysBT+bM1oSBbWaG75E0KVuaDkj6CnAgMEDSDXU3LQ68lSeVdScXr2ZmZq2za+4A82CspCER8SCUerb4fuBl4NPAuXXjk4HxWRJZt/KaVzMzM+tQm9niNYHZZovLuk5X0irAv2prctOa3WUj4oWswWyeuXg1MzOzDqUtVjsUEX9vVZaukPQwRW/aqel4QeC+iBicN5nNKy8bMDMzsw6VtThtwgL1mxRExNRUwFrFuduAmZmZ9USvS9q9diBpD+CNjHmsm3jZgJmZmfU4klYFrgCWAwS8BBwUEc9kDWbzzMWrmZmZ9ViS+gJExPu5s1j3cPFqZmZmPZKkXSg2Vli4NhYRZ+ZLZN3Ba17NzMysx5E0DDgAOJpi2cB+FO29rOI882pmZmY9jqTxEbFe3fu+wK0RMTR3Nps3nnk1MzOznuij9P5DScsBnwCfzZjHuon7vJqZmVlPdJOkJYCzgUfS2CX54lh38bIBMzMz63HSdrBHAkMptrMdCVxY2y7WqsvFq5mZmfU4kq4GJgN/SEMHAv0jYv98qaw7uHg1MzOzHkfSpIhYu7Mxqx5fsGVmZmY90VhJQ2oHkr4APJwxj3UTX7BlZmZmPYakCRRrXPsA90t6MR2vBDyZM5t1Dy8bMDMzsx5DUsONCCLi763KYvOHi1czMzMzqwyveTUzMzOzynDxamZmZmaV4eLVzMzMzCrDxauZmZmZVYaLVzMzMzOrDBevZmZmZlYZLl7NzMzMrDJcvJqZmZlZZbh4NTMzM7PKcPFqZmZmZpXh4tXMzMzMKsPFq5mZmZlVhotXMzMzM6sMF69mZmZmVhkuXs3MOiFpuqRxkiZKukbSovPwWL+XtG/6+BJJazf43K0lbTYXX+MFSZ9uM/Y7SYe3GdtT0q3NZDUzKwsXr2ZmnfsoIjaIiIHAVOCI+hslLTA3DxoR34yISQ0+ZWugy8VrB64Evtxm7Mtp3MysMly8mpl1zUhgtTQrOlLSDcAkSb0lnSNpjKTxtVlOFc6X9JSkO4Flag8k6V5Jg9LHO0kaK+kxSXdJWpmiSD4uzfoOlbS0pL+krzFG0ubpvp+SdIekxyVdAqid3HcBa0n6bLrPYsD2wF8lnZoeb6KkiyTNcf/62VxJgyTdW3scSb+VNFrSo5L2SOPrpLFx6fuxend8883MXLyamTUpzbDuDExIQxsBx0bEGsAhwLsRMRgYDBwqaQCwF7AmsDZwEO3MpEpaGrgY2Cci1gf2i4gXgGHAT9Os70jg5+l4MLAPcEl6iNOAURGxDnAdsGLbrxER04G/APunod2AeyPiPeD8iBicZpYXAXbtwrflFODuiNgE2AY4JxXGRwA/j4gNgEHAP7rwmGZmHZqrU11mZv9mFpE0Ln08EvgNRRE6OiKeT+M7AuvVrRHtD6wObAlcmYrHf0m6u53HHwKMqD1WRLzVQY7tgbXrJkb7Seqbvsbe6b43S3q7g/tfCfyEogj+MnB5Gt9G0neBRYGlgMeBGzt4jLZ2BHaXdEI6XpiieH4AOEXS8sC1EfF0k49nZtaQi1czs859lGYQZ0oF5Af1Q8DREXF7m8/7Ujfm6AUMiYgp7WRpxv3AZyWtT1F8f1nSwsAFwKCIeEnS6RQFaFvTmHW2rv52UcwYP9Xm85+Q9BCwC3CLpMMjor3C3cysS7xswMyse9wOHCmpD4CkNdLp8xHAAWlN7GcpTq239SCwZVpmgKSl0vhkYPG6z7sDOLp2IGmD9OEI4MA0tjOwZHsBIyKAq4BLgVtTEVwrRN9Is7gddRd4Adg4fbxPm3/30bV1spI2TO9XAZ6LiF8A1wPrdfC4ZmZd4uLVzKx7XAJMAsZKmgj8muLs1nXA0+m2yyhOp88mIl4HDgOulfQYRYEJxan7vWoXbAHHAIPSBVCTmNX14AyK4vdxiuUDLzbIeSWwfnpPRLxDsd52IkUhOqaD+50B/FzSw8D0uvEfAH2A8enr/yCN7w9MTMstBqZ/u5nZPFPxQtzMzMzMrPw882pmZmZmleHi1czMzMwqw8WrmZmZmVWGi1czMzMzqwwXr2ZmZmZWGS5ezczMzKwyXLyamZmZWWW4eDUzMzOzyvj/H1bSwrtmipoAAAAASUVORK5CYII=", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7wAAAQoCAYAAAApE9LsAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3QU1fvH8c+mQyAJIUAILdSEDgJCkF6kNykiSBPpHUFEeo0gvQsiIEWlqyC9C0iT3kGaQOg1CQkk+/uDH/t1TZAEkt2w+36dM+ewM3dmn7uzu+HZ+8wdg9FoNAoAAAAAABvjYO0AAAAAAABIDCS8AAAAAACbRMILAAAAALBJJLwAAAAAAJtEwgsAAAAAsEkkvAAAAAAAm0TCCwAAAACwSSS8AAAAAACbRMILAAAAALBJJLwAgFi1bNlSKVKksHYYb419+/apZMmScnd3l8Fg0KFDhxL0+Fu3bpXBYNDWrVsT9LhvM39/f7Vs2dKqMQwePFgGgyFebW/fvv1az9WyZUv5+/u/1r4Gg0GdO3d+rX1jc/HiRRkMBs2dOzfBjgkAiYGEFwCSuKNHj6pBgwbKkiWL3NzclCFDBlWuXFmTJ0+2dmhJzqFDh/Txxx8rU6ZMcnV1lbe3typVqqQ5c+YoKioq0Z736dOnatiwoe7evavx48dr/vz5ypIlS6I9n6WVK1dOBoNBOXPmjHX7hg0bZDAYZDAYtHTp0ngf/8SJExo8eLAuXrz4hpEmDSNHjtTKlSutHQYAQJKTtQMAALzcrl27VL58eWXOnFlt2rSRr6+vrly5oj/++EMTJ05Uly5drB1ikvHtt9+qffv2SpcunZo1a6acOXPq0aNH2rRpk1q3bq3r16/ryy+/TJTnPn/+vC5duqRZs2bp008/TZTnKFOmjMLDw+Xi4pIox38VNzc3nTt3Tnv37tW7775rtm3hwoVyc3PTkydPXuvYJ06c0JAhQ1SuXLl4jWCePn1aDg7W/e2+f//++uKLL8zWjRw5Ug0aNFDdunWtExQAwISEFwCSsBEjRsjT01P79u2Tl5eX2babN29aJ6gEFB0drcjISLm5ub3Rcf744w+1b99eQUFB+u2335QyZUrTtu7du2v//v06duzYm4b7Ui/Oxb/PUUJycHB449fpTWTPnl3Pnj3TDz/8YJbwPnnyRCtWrFCNGjW0bNmyRI/DaDTqyZMnSpYsmVxdXRP9+V7FyclJTk78dwoAkipKmgEgCTt//rzy5s0bayKVNm3aGOsWLFigIkWKKFmyZPL29lbjxo115coVszY7duxQw4YNlTlzZrm6uipTpkzq0aOHwsPDY43hr7/+UpUqVeTu7i4/Pz8NHTpURqPRrE1oaKg+++wzUylxQECAxowZE6Pdi+sIFy5cqLx588rV1VVr167V3LlzZTAYtHPnTvXs2VNp0qSRu7u76tWrp1u3br3ydRoyZIgMBoMWLlxoluy+ULRoUbNrPeMb78qVK5UvXz65uroqb968Wrt2ralNy5YtVbZsWUlSw4YNZTAYVK5cOUnPS4Ff/PufYrsW88cff1SRIkWUMmVKeXh4KH/+/Jo4caJp+8uu4V2yZInpnPv4+Ojjjz/W1atXYzxfihQpdPXqVdWtW1cpUqRQmjRp1KtXr3iVen/00Uf66aefFB0dbVr366+/KiwsTI0aNYrR/tKlS+rYsaMCAgKULFkypU6dWg0bNjQrXZ47d64aNmwoSSpfvrypNPpFP/39/VWzZk2tW7dORYsWVbJkyfTNN9+Ytr04r0ajUeXLl1eaNGnMfgyKjIxU/vz5lT17doWGhsbaL6PRKB8fH/Xs2dO0Ljo6Wl5eXnJ0dNT9+/dN60eNGiUnJyc9fvxYUsxreA0Gg0JDQzVv3jxTX/59nfH9+/fVsmVLeXl5ydPTU61atVJYWNhLXvX/NmbMGJUsWVKpU6dWsmTJVKRIkf8sK1+4cKECAgLk5uamIkWKaPv27THaXL16VZ988onSpUtnes9/9913r4wlJCRErVq1UsaMGeXq6qr06dOrTp06NlOqDuDtRMILAElYlixZdODAgTiNTo4YMULNmzdXzpw5NW7cOHXv3l2bNm1SmTJlzP7DvmTJEoWFhalDhw6aPHmyqlSposmTJ6t58+YxjhkVFaWqVasqXbp0Gj16tIoUKaJBgwZp0KBBpjZGo1G1a9fW+PHjVbVqVY0bN04BAQHq3bu3WQLxwubNm9WjRw99+OGHmjhxolni16VLFx0+fFiDBg1Shw4d9Ouvv75yop2wsDBTPzNnzvzK1ym+8f7+++/q2LGjGjdurNGjR+vJkyeqX7++7ty5I0lq166dqVS6a9eumj9/vvr16/fKOP5pw4YN+uijj5QqVSqNGjVKX331lcqVK6edO3f+535z585Vo0aN5OjoqODgYLVp00bLly9XqVKlzM659PxcVqlSRalTp9aYMWNUtmxZjR07VjNnzoxznE2aNNH169fNku5FixapYsWKsf4As2/fPu3atUuNGzfWpEmT1L59e23atEnlypUzJXhlypRR165dJUlffvml5s+fr/nz5yt37tym45w+fVofffSRKleurIkTJ6pQoUIxnstgMOi7777TkydP1L59e9P6QYMG6fjx45ozZ47c3d1j7ZfBYNB7771nlvwdOXJEDx48kCSz87Bjxw4VLlz4pRO6zZ8/X66uripdurSpL+3atTNr06hRIz169EjBwcFq1KiR5s6dqyFDhsR6vFeZOHGiChcurKFDh2rkyJFycnJSw4YNtXr16hhtt23bpu7du+vjjz/W0KFDdefOHVWtWtXs++XGjRsqUaKENm7cqM6dO2vixInKkSOHWrdurQkTJvxnLPXr19eKFSvUqlUrTZs2TV27dtWjR490+fLl1+obACQIIwAgyVq/fr3R0dHR6OjoaAwKCjJ+/vnnxnXr1hkjIyPN2l28eNHo6OhoHDFihNn6o0ePGp2cnMzWh4WFxXie4OBgo8FgMF66dMm0rkWLFkZJxi5dupjWRUdHG2vUqGF0cXEx3rp1y2g0Go0rV640SjIOHz7c7JgNGjQwGgwG47lz50zrJBkdHByMx48fN2s7Z84coyRjpUqVjNHR0ab1PXr0MDo6Ohrv37//0tfo8OHDRknGbt26vbTNP8U3XhcXF7N1L55v8uTJpnVbtmwxSjIuWbLE7Jhly5Y1li1bNkYMLVq0MGbJksX0uFu3bkYPDw/js2fPXhr3i+fYsmWL0Wg0GiMjI41p06Y15suXzxgeHm5qt2rVKqMk48CBA82eT5Jx6NChZscsXLiwsUiRIi99zn/2I2/evEaj0WgsWrSosXXr1kaj0Wi8d++e0cXFxThv3rxYX4PY3mu7d+82SjJ+//33pnVLliwx69s/ZcmSxSjJuHbt2li3tWjRwmzdN998Y5RkXLBggfGPP/4wOjo6Grt37/7KPn799ddGR0dH48OHD41Go9E4adIkY5YsWYzvvvuusU+fPkaj0WiMiooyenl5GXv06GHab9CgQcZ//3fK3d09Rlz/bPvJJ5+Yra9Xr54xderUr4zx3+8bozHmaxwZGWnMly+fsUKFCmbrJRklGffv329ad+nSJaObm5uxXr16pnWtW7c2pk+f3nj79m2z/Rs3bmz09PQ0Pd+FCxeMkoxz5swxGo3P3wuSjF9//fUr+wEAlsQILwAkYZUrV9bu3btVu3ZtHT58WKNHj1aVKlWUIUMG/fLLL6Z2y5cvV3R0tBo1aqTbt2+bFl9fX+XMmVNbtmwxtU2WLJnp36Ghobp9+7ZKliwpo9GogwcPxojhnyOsL0p8IyMjtXHjRknSb7/9JkdHR9Mo3QufffaZjEaj1qxZY7a+bNmyypMnT6z9bdu2rVl5aOnSpRUVFaVLly699DV6+PChJMVayhyb+MZbqVIlZc+e3fS4QIEC8vDw0F9//RWn54sLLy8vhYaGasOGDXHeZ//+/bp586Y6duxodm1vjRo1FBgYGOsI3z9HPqXnr298+9GkSRMtX75ckZGRWrp0qRwdHVWvXr1Y2/7zvfb06VPduXNHOXLkkJeXl/788884P2fWrFlVpUqVOLVt27atqlSpoi5duqhZs2bKnj27Ro4c+cr9XrzXdu3aJen5SG7p0qVVunRp7dixQ5J07Ngx3b9/X6VLl45z7LGJ7TzcuXPH9F6Oj3++xvfu3dODBw9UunTpWF/foKAgFSlSxPQ4c+bMqlOnjtatW6eoqCgZjUYtW7ZMtWrVktFoNPsuqVKlih48ePDS85YsWTK5uLho69atunfvXrz7AQCJhYQXAJK4YsWKafny5bp375727t2rvn376tGjR2rQoIFOnDghSTp79qyMRqNy5sypNGnSmC0nT540u6bx8uXLatmypby9vU3Xcr64BvVFCecLDg4OypYtm9m6XLlySZLpurxLly7Jz88vRsL5oiT138lq1qxZX9rXf5ckp0qVSpL+8z/QHh4ekqRHjx69tM0/xTfe2MqkU6VKlaD/qe/YsaNy5cqlatWqKWPGjPrkk0/MrhOOzYs4AwICYmwLDAyM0Q83NzelSZPGbN3r9KNx48Z68OCB1qxZo4ULF6pmzZov/bEhPDxcAwcONF0r7ePjozRp0uj+/fsx3mv/5b/eM7GZPXu2wsLCdPbsWc2dO9csKXyZd955R8mTJzclty8S3jJlymj//v168uSJaVupUqXiFc+/vc77/GVWrVqlEiVKyM3NTd7e3kqTJo2mT58e6+sb222lcuXKpbCwMN26dUu3bt3S/fv3NXPmzBjfI61atZL08snyXF1dNWrUKK1Zs0bp0qVTmTJlNHr0aIWEhMS7TwCQkJhWEADeEi4uLipWrJiKFSumXLlyqVWrVlqyZIkGDRqk6OhoGQwGrVmzRo6OjjH2fXG9YVRUlCpXrqy7d++qT58+CgwMlLu7u65evaqWLVuaTUaUWP4r+YgtdkkxJpP6pxw5csjJyUlHjx5949gSKqYXDAZDrO3+PVFU2rRpdejQIa1bt05r1qzRmjVrNGfOHDVv3lzz5s17vcD/5WX9iK/06dOrXLlyGjt2rHbu3PmfMzN36dJFc+bMUffu3RUUFCRPT08ZDAY1btw4Xu+1uCSs/7R161ZFRERIen4f66CgoFfu4+zsrOLFi2v79u06d+6cQkJCVLp0aaVLl05Pnz7Vnj17tGPHDgUGBsb44SC+3uQ99U87duxQ7dq1VaZMGU2bNk3p06eXs7Oz5syZo0WLFsU7rhfn5OOPP1aLFi1ibVOgQIGX7t+9e3fVqlVLK1eu1Lp16zRgwAAFBwdr8+bNKly4cLzjAYCEQMILAG+hokWLSpKuX78u6fktY4xGo7JmzWoagY3N0aNHdebMGc2bN89skqqXldJGR0frr7/+MjvmmTNnJMk02VSWLFm0ceNGPXr0yGyk79SpU6btiSl58uSqUKGCNm/erCtXrihTpkz/2d6S8aZKlSrWkuHYSrRdXFxUq1Yt1apVS9HR0erYsaO++eYbDRgwQDly5Ii1H9LzCZ0qVKhgtu306dOJ+ro3adJEn376qby8vFS9evWXtlu6dKlatGihsWPHmtY9efIkxoRa/yxjf1PXr19Xly5d9P7778vFxUW9evVSlSpV4vR6lC5dWqNGjdLGjRvl4+OjwMBAGQwG5c2bVzt27NCOHTtUs2bNVx4nIfvzX5YtWyY3NzetW7fO7BZNc+bMibX92bNnY6w7c+aMkidPbkriU6ZMqaioKFWqVOm1YsqePbs+++wzffbZZzp79qwKFSqksWPHasGCBa91PAB4U5Q0A0AStmXLllhHfX777TdJ/ytn/eCDD+To6KghQ4bEaG80Gk0zCr8YWfpnG6PRaHb7m3+bMmWKWdspU6bI2dlZFStWlCRVr15dUVFRZu0kafz48TIYDKpWrVqc+/u6Bg0aJKPRqGbNmpluF/NPBw4cMI2UWjLe7Nmz69SpU2a3Vjp8+HCM2ZdfnJ8XHBwcTCNpL0Yq/61o0aJKmzatZsyYYdZmzZo1OnnypGrUqJFQ3YihQYMGGjRokKZNmyYXF5eXtnN0dIzxfpw8eXKMEe4Xsyf/OxF+HW3atFF0dLRmz56tmTNnysnJSa1bt47T6Gnp0qUVERGhCRMmqFSpUqbE9cWMy9euXYvT9bvu7u4J0pdXcXR0lMFgMHs9L168qJUrV8bafvfu3WbX4F65ckU///yz3n//fTk6OsrR0VH169fXsmXLYp0Z/r9uERYWFqYnT56YrcuePbtSpkz50vcwAFgCI7wAkIR16dJFYWFhqlevngIDAxUZGaldu3bpp59+kr+/v+m6uuzZs2v48OHq27evLl68qLp16yplypS6cOGCVqxYobZt26pXr14KDAxU9uzZ1atXL129elUeHh5atmzZS68ddHNz09q1a9WiRQsVL15ca9as0erVq/Xll1+aRoRq1aql8uXLq1+/frp48aIKFiyo9evX6+eff1b37t3NJnxKLCVLltTUqVPVsWNHBQYGqlmzZsqZM6cePXqkrVu36pdfftHw4cMtHu8nn3yicePGqUqVKmrdurVu3rypGTNmKG/evGYTFH366ae6e/euKlSooIwZM+rSpUuaPHmyChUqZHZ7nn9ydnbWqFGj1KpVK5UtW1YfffSRbty4YbrVU48ePRKsH//m6empwYMHv7JdzZo1NX/+fHl6eipPnjzavXu3Nm7cqNSpU5u1K1SokBwdHTVq1Cg9ePBArq6uqlChQqy3Ovovc+bM0erVqzV37lxlzJhR0vME++OPP9b06dPVsWPH/9w/KChITk5OOn36tNq2bWtaX6ZMGU2fPl2S4pTwFilSRBs3btS4cePk5+enrFmzqnjx4vHqS1zUqFFD48aNU9WqVdWkSRPdvHlTU6dOVY4cOXTkyJEY7fPly6cqVaqoa9eucnV11bRp0yTJ7JZIX331lbZs2aLixYurTZs2ypMnj+7evas///xTGzdu1N27d2ON5cyZM6pYsaIaNWqkPHnyyMnJSStWrNCNGzfUuHHjBO87AMSZZSeFBgDEx5o1a4yffPKJMTAw0JgiRQqji4uLMUeOHMYuXboYb9y4EaP9smXLjKVKlTK6u7sb3d3djYGBgcZOnToZT58+bWpz4sQJY6VKlYwpUqQw+vj4GNu0aWO61c6LW4wYjc9vgeLu7m48f/688f333zcmT57cmC5dOuOgQYOMUVFRZs/76NEjY48ePYx+fn5GZ2dnY86cOY1ff/212S2GjMbnt0bp1KlTjLhf3JZo3759Zuv/fSueVzlw4ICxSZMmpjhSpUplrFixonHevHlmMb9pvP++Hc7LbktkNBqNCxYsMGbLls3o4uJiLFSokHHdunUxbi+zdOlS4/vvv29Mmzat0cXFxZg5c2Zju3btjNevX3/la/HTTz8ZCxcubHR1dTV6e3sbmzZtavz777/N2rw4l/8W2y11YvPP2xK9TGyvwb1794ytWrUy+vj4GFOkSGGsUqWK8dSpU7HeTmjWrFnGbNmyGR0dHc36mSVLFmONGjVifc5/HufKlStGT09PY61atWK0q1evntHd3d34119/vbKvxYoVM0oy7tmzx7Tu77//NkoyZsqUKUb72F7DU6dOGcuUKWNMliyZUZIpxhdtX9zS64UX7/8LFy78Z2yx3ZZo9uzZxpw5cxpdXV2NgYGBxjlz5sQa04v38oIFC0ztCxcuHOtn68aNG8ZOnToZM2XKZHR2djb6+voaK1asaJw5c6apzb9vS3T79m1jp06djIGBgUZ3d3ejp6ensXjx4sbFixf/Z58AILEZjMZ4zpAAAAAAAMBbgGt4AQAAAAA2iYQXAAAAAGCTSHgBAAAAADaJhBcAAAAAYJNIeAEAAAAANomEFwAAAABgk0h4AQAAAAA2iYQXAAAAAGCTSHgBAAAAADaJhBcAAAAAYJNIeAEAAAAANomEFwAAAABgk0h4AQAAAAA2iYQXAAAAAGCTSHgBAAAAADaJhBcAAAAAYJNIeAEAAAAANomEFwAAAABgk0h4AQAAAAA2iYQXAAAAAGCTSHgBAAAAADaJhBcAAAAAYJNIeAEAAAAANomEFwAAAABgk0h4AQAAAAA2iYQXAAAAAGCTSHgBAAAAADaJhBcAAAAAYJNIeAEAAAAANomEFwAAAABgk0h4AQAAAAA2iYQXAAAAAGCTSHgBAAAAADaJhBcAAAAAYJNIeAEAAAAANomEFwAAAABgk0h4AQAAAAA2iYQXAAAAAGCTSHgBAAAAADaJhBcAAAAAYJNIeAEAAAAANomEFwAAAABgk0h4AQAAAAA2iYQXAAAAAGCTSHgBAAAAADaJhBcAAAAAYJNIeAEAAAAANomEFwAAAABgk5ysHQDwNklWuLO1Q7CKe/umWDsEq4iKNlo7BKtwdDBYOwQAAF6bWxLOcJLy/yXDD9rm//cY4QUAAAAA2CQSXgAAAACATUrCA/4AAAAAYEMMjDdaGq84AAAAAMAmkfACAAAAAGwSJc0AAAAAYAkG7oRgaYzwAgAAAABsEgkvAAAAAMAmUdIMAAAAAJbALM0WxysOAAAAALBJJLwAAAAAAJtESTMAAAAAWAKzNFscI7wAAAAAAJtEwgsAAAAAsEmUNAMAAACAJTBLs8XxigMAAAAAbBIJLwAAAADAJlHSDAAAAACWwCzNFscILwAAAADAJpHwAgAAAABsEiXNAAAAAGAJzNJscbziAAAAAACbRMILAAAAALBJlDQDAAAAgCUwS7PFMcILAAAAALBJJLwAAAAAAJtESTMAAAAAWAKzNFscrzgAAAAAwCaR8AIAAAAAbBIlzQAAAABgCczSbHGM8AIAAAAAbBIJL+yWv7+/JkyYYO0wAAAAACQSSpoBC3nvnezq0byS3smTWenTeKpRj5n6desR0/bwg1Ni3e/L8Ss0/vtNkqTPW1dRtdJ5VSBXRkU+e6b0ZT6P0b5Inswa1rWOCufJJKNR2n/skvpNXKmjZ64mTscSwfSpkzVjmvnr4Z81q35etdZKESWO7779Rps3btDFC3/J1c1NBQsWVtcen8k/azZTmytXLmvCmNE6ePCAnkZGquR7pfV53/5K7eNjxcgT3oH9+zT3u9k6eeKYbt26pfGTpqpCxUrWDitRzZ71jTZtWK8L/3/+CxUqrO49e5mdf1tkr/2W7PN9/sKPixZq3pzZun37lnIFBOqLLwcof4EC1g4r0dljv2/cuKEJ477Wzh079ORJuDJlzqKhw0cqb7781g4taWCWZovjFUeSFRkZae0QEpR7MlcdPXNV3YN/inW7f6W+ZkvbQQsUHR2tFZsOmdq4ODtq+YaDmrV0x0uew0U/T+2kKyH3VKbZGFVsNU6Pw57ol6md5OT0dn3cs+fIqU1bfzctc+cvsnZICe7A/n1q1LiJ5i38SdNnfqdnz56pY7tPFR4WJkkKDwtTp7atJYNB33w7V999v0hPnz5V9y4dFB0dbeXoE1Z4eJgCAgLUt/8ga4diMfv37dWHHzXV/B8W65tZc/Ts2TO1b9NaYf9//m2VvfZbss/3uSStXfObxowOVruOnfTjkhUKCAhUh3atdefOHWuHlqjssd8PHzxQy48/kpOTs6bOmKXlv6zWZ737yMPD09qhwY69Xf8DRpJWrlw5de3aVZ9//rm8vb3l6+urwYMHm7ZfvnxZderUUYoUKeTh4aFGjRrpxo0bpu2DBw9WoUKF9O233ypr1qxyc3OTJBkMBn3zzTeqWbOmkidPrty5c2v37t06d+6cypUrJ3d3d5UsWVLnz583Hev8+fOqU6eO0qVLpxQpUqhYsWLauHGjxV6L2KzfeUJDpq3SL1uOxLr9xp1HZkutcvm1bd9ZXbz6vz+Mw2f8pskLt+jY2WuxHiMgq69Se7lr2PRVOnvppk7+FaIR36yRr4+HMqf3TpR+JRYnR0f5pEljWlKlervij4upM75V7bofKHuOnMoVEKghw4MVcv2aTpw4Lkk6dOhPXbt2VUOGBytnrgDlzBWgISO+0onjx7Rvzx9Wjj5hlSpdVp279VDFSpWtHYrFTJ85W3XqfaAcOXIqIDBQQ0d8pevXr+nk/59/W2Wv/Zbs830uSfPnzdEHDRqpbr36yp4jh/oPGiI3NzetXL7M2qElKnvs93ezZymdr6+GjQhW/gIFlDFjJpV8r5QyZc5s7dBgx0h4kaDmzZsnd3d37dmzR6NHj9bQoUO1YcMGRUdHq06dOrp79662bdumDRs26K+//tKHH35otv+5c+e0bNkyLV++XIcOHTKtHzZsmJo3b65Dhw4pMDBQTZo0Ubt27dS3b1/t379fRqNRnTt3NrV//Pixqlevrk2bNungwYOqWrWqatWqpcuXL1vqpXgjab1TqmqpfJq3cne89jtz8YZu33usFnVLytnJUW6uzmpZN0gn/7quS9fuJlK0iePS5UuqVK6UqlepqL6ff6br12JP8m3Jo8ePJEmens9/CY+MjJTBYJCLi4upjaurqxwcHHTw4AGrxIjE8/jR8/Pv4WlfIyH22m978TQyUidPHFeJoJKmdQ4ODipRoqSOHD5oxcgSl732e9uWzcqbN5969eiqcqWD1Kh+XS1bstjaYSUtBkPSXWwU1/AiQRUoUECDBj0v1cqZM6emTJmiTZueX3969OhRXbhwQZkyZZIkff/998qbN6/27dunYsWKSXr+H/zvv/9eadKkMTtuq1at1KhRI0lSnz59FBQUpAEDBqhKlSqSpG7duqlVq1am9gULFlTBggVNj4cNG6YVK1bol19+MUuMk6qPaxXXo7AnWrn5ULz2exwWoSptJmrxuLbq26aqJOnc5Zuq3WmqoqLenhLY/AUKaNiIYPn7Z9WtW7f0zfSpatW8qZb9/Kvc3VNYO7xEER0drTGjRqpQ4XeUI2cuSVKBAoWULFkyTRw/Rp279pCMRk2aMFZRUVG6feuWlSNGQoqOjtbo/z//Of///NsDe+23Pbl3/56ioqKUOnVqs/WpU6fWhQt/WSmqxGev/f777yta/NMPatailVq3ba/jR49qVPBwOTs7q3bdetYOD3aKEV4kqAL/moghffr0unnzpk6ePKlMmTKZkl1JypMnj7y8vHTy5EnTuixZssRIdv993HTp0kmS8ufPb7buyZMnevjwoaTnI7y9evVS7ty55eXlpRQpUujkyZPxGuGNiIjQw4cPzRZjdFSc938TzeuU0E9r9isi8lm89nNzddaMQU21+/BfKtt8jCq0GqcT569r+aQOcnN1TqRoE16p0mX1fpVqyhUQqPdKldaU6TP16NFDrVu7xtqhJZqvRgzV+XNnFTx6nGldKm9vjRo7QTu2blGp4u+oTMlievTokQJz55GDA1/ftmTk8CE6f/asRo8Zb+1QLMpe+w3Yquhoo3Lnyauu3Xsqd+48atDoQ33QoJGWLP7R2qHBjjHCiwTl7GyeVBkMhnhNruPu7v7K4xr+v+QitnUvnqtXr17asGGDxowZoxw5cihZsmRq0KBBvCbCCg4O1pAhQ8zWOaYrJuf078b5GK/jvcLZFZDVV82+mBPvfT+sVlSZ/bxVtsVYGY1GSVKLvnN1ffto1SpXQEvWvZ1lsB4eHsqSxV9X3pKS9Pj6asRQ7di2Vd/OXaB0vr5m24JKltIvazbo3r17cnJ0VEoPD1UuV0oZMmZ6ydHwthk5fKi2b9uq7+bFPP+2zF77bW9SeaWSo6NjjIma7ty5Ix8bm23+n+y132nSpFG27NnN1mXLlk0bN6yzUkRJELM0WxyvOCwid+7cunLliq5cuWJad+LECd2/f1958uRJ8OfbuXOnWrZsqXr16il//vzy9fXVxYsX43WMvn376sGDB2aLU7oiCR7rv7WoG6QDJy6/1m2Ekru5KDraaEp2JSnaaJTRKDm8xddmhIWG6sqVK/KJZfT/bWY0GvXViKHasnmjvpk9VxkyZnxp21SpUimlh4f27vlDd+/eUdly5S0YKRKD0WjUyOFDtXnTBs36bp4y2smPGPbab3vl7OKi3Hnyas8f/5uTIjo6Wnv27FaBgoWtGFnistd+Fyr8ji5euGC27tLFi/Lzy2CliABGeGEhlSpVUv78+dW0aVNNmDDh+e1XOnZU2bJlVbRo0QR/vpw5c2r58uWqVauWDAaDBgwYEO/buLi6usrV1dVsncHB8bVjck/mouyZ/pew+WdIrQK5MujewzBdCbknSUrp7qYPKhfWF+NWxHqMTL6plMojuTKlTyVHBwcVyPX8D8j5K7cUGh6pTX+c0sjudTWhbyNN/3GbHAwG9Wr1vp5FRWnb/jOvHbuljf16lMqWK6/0fn66dfOmpk+dLEdHB1WrXtPaoSWor0YM1ZrfVmn8xKlK7u6u27efX5ebIkVK0yzlP69YpqzZsiuVt7eOHDqkMaNGqGmzFjZ3z9Kw0FCzSw6u/v23Tp08KU9PT6X387NiZIln5LAhWvPbKk2YPE3uyd1N12WnSPm/82+L7LXfkn2+zyWpWYtWGvBlH+XNm0/58hfQgvnzFB4errr1PrB2aInKHvv9cfMWavHxR/p25gy9X6Wajh09oqVLF2vg4KHWDg12jIQXFmEwGPTzzz+rS5cuKlOmjBwcHFS1alVNnjw5UZ5v3Lhx+uSTT1SyZEn5+PioT58+put7reWdPFm0/ttupseje9WXJM3/5Q+1HbRAktSwShEZZNDitftjPcaADjXUrHYJ0+M9P/WVJL3/6UTtOHBWZy7eUP1u36hfu2raOu8zRUcbdfjU36rTaZpCblu3//Fx40aIvujdU/fv31cqb28VfqeI5i9aLG9v27o10ZKffpAktfmkudn6wcNGqnbd5/8hunTxoqZMHK8HDx7IL4OfWrdpr6bNW1o61ER3/Pgxfdrqf6/DmNHBkqTadepp2MivrBVWolr8/+e/dctmZuuHDg9WHRv+D7G99luyz/e5JFWtVl337t7VtCmTdPv2LQUE5ta0b75Vahsu7ZXss9/58hfQuIlTNGnCOH0zfaoyZMyoz/t8qRo1a1s7NNgxg/GftY8A/lOywkl/hufEcG/fFGuHYBVR0fb59ejo8PaWvwMA4JaEh/SSlU26o93h2wZaO4REwTW8AAAAAACbRMILAAAAALBJSXjAHwAAAABsCJcNWRwjvAAAAAAAm0TCCwAAAACwSSS8AAAAAGAJBoeku8TD9u3bVatWLfn5+clgMGjlypUx2pw8eVK1a9eWp6en3N3dVaxYMbN7kT958kSdOnVS6tSplSJFCtWvX183btwwO8bly5dVo0YNJU+eXGnTplXv3r317NmzeMVKwgsAAAAAiLPQ0FAVLFhQU6dOjXX7+fPnVapUKQUGBmrr1q06cuSIBgwYIDc3N1ObHj166Ndff9WSJUu0bds2Xbt2TR988L/7sUdFRalGjRqKjIzUrl27NG/ePM2dO1cDB8bv9knchxeIB+7Da1+4Dy8AAG+fJH0f3gojrB3CS4Vv7vda+xkMBq1YsUJ169Y1rWvcuLGcnZ01f/78WPd58OCB0qRJo0WLFqlBgwaSpFOnTil37tzavXu3SpQooTVr1qhmzZq6du2a0qVLJ0maMWOG+vTpo1u3bsnFxSVO8THCCwAAAACWYDAk2SUiIkIPHz40WyIiIuLdxejoaK1evVq5cuVSlSpVlDZtWhUvXtys7PnAgQN6+vSpKlWqZFoXGBiozJkza/fu3ZKk3bt3K3/+/KZkV5KqVKmihw8f6vjx43GOh4QXAAAAAOxccHCwPD09zZbg4OB4H+fmzZt6/PixvvrqK1WtWlXr169XvXr19MEHH2jbtm2SpJCQELm4uMjLy8ts33Tp0ikkJMTU5p/J7ovtL7bFVRIe8AcAAAAAWELfvn3Vs2dPs3Wurq7xPk50dLQkqU6dOurRo4ckqVChQtq1a5dmzJihsmXLvnmw8UDCCwAAAACWEM/ZkC3J1dX1tRLcf/Px8ZGTk5Py5Mljtj537tz6/fffJUm+vr6KjIzU/fv3zUZ5b9y4IV9fX1ObvXv3mh3jxSzOL9rERdJ9xQEAAAAAbxUXFxcVK1ZMp0+fNlt/5swZZcmSRZJUpEgROTs7a9OmTabtp0+f1uXLlxUUFCRJCgoK0tGjR3Xz5k1Tmw0bNsjDwyNGMv1fGOEFAAAAAMTZ48ePde7cOdPjCxcu6NChQ/L29lbmzJnVu3dvffjhhypTpozKly+vtWvX6tdff9XWrVslSZ6enmrdurV69uwpb29veXh4qEuXLgoKClKJEiUkSe+//77y5MmjZs2aafTo0QoJCVH//v3VqVOneI1Ek/ACAAAAgCUYbOPWf/v371f58uVNj19c+9uiRQvNnTtX9erV04wZMxQcHKyuXbsqICBAy5YtU6lSpUz7jB8/Xg4ODqpfv74iIiJUpUoVTZs2zbTd0dFRq1atUocOHRQUFCR3d3e1aNFCQ4cOjVes3IcXiAfuw2tfuA8vAABvnyR9H97Ko6wdwkuFb+hj7RASBdfwAgAAAABsUhL+/QMAAAAAbEgSnqXZVvGKAwAAAABsEgkvAAAAAMAmUdIMAAAAAJZgI7M0v00Y4QUAAAAA2CQSXgAAAACATaKkGQAAAAAsgVmaLY5XHAAAAABgk0h4AQAAAAA2iZJmAAAAALAEZmm2OEZ4AQAAAAA2iYQXAAAAAGCTKGkGAAAAAEtglmaL4xUHAAAAANgkEl4AAAAAgE2ipBkAAAAALIFZmi2OhBeIh7t7p1g7BKtIVaKHtUOwiju7x1k7BFiQ0WjtCKyD/3sBAGwZJc0AAAAAAJvECC8AAAAAWAKzNFscrzgAAAAAwCaR8AIAAAAAbBIlzQAAAABgCZQ0WxyvOAAAAADAJpHwAgAAAABsEiXNAAAAAGAJ3Pzc4hjhBQAAAADYJBJeAAAAAIBNoqQZAAAAACyBWZotjlccAAAAAGCTSHgBAAAAADaJkmYAAAAAsARmabY4RngBAAAAADaJhBcAAAAAYJMoaQYAAAAAS2CWZovjFQcAAAAA2CQSXgAAAACATaKkGQAAAAAsgVmaLY4RXgAAAACATSLhBQAAAADYJEqaAQAAAMACDJQ0WxwjvAAAAAAAm0TCCwAAAACwSZQ0AwAAAIAFUNJseYzwAgAAAABsEgkvAAAAAMAmUdIMAAAAAJZARbPFMcILAAAAALBJJLwAAAAAAJtEwoskoVy5curevftLt/v7+2vChAkWiwcAAABIaAaDIckutoqEF3Zn69atMhgMun//vrVDieHA/n3q2qm9KpcvpUL5ArR500az7Zs2rFf7Np+o7HvFVShfgE6dOmmlSOPuvcLZtHTcp/przWCF7x+vWmXzxWgT4J9WS8a1VsjWkbq94yv9Pq+HMqXzMm3PmiG1fvq6lS5vGKYbW4O1ILiF0nqniPX5XJwd9cfCXgrfP14FcvklVrcSxIH9+9StU3tVLl9ahfMFasu/zvc/DR8ySIXzBWrh/HkWjNCyfly0UNUqV1CxwvnVtHFDHT1yxNohJaqoqChNnTxB1atUUPEiBVSzaiXNnDFVRqPR2qElqsU/LlKDerVU8t13VPLdd9SsyYf6fcc2a4eV6Oy137NnfaMmjeorqFhhlSsdpO5dOurihb+sHVais9fz/YK9fZ8jaSPhBZKQ8PAw5QoIUN9+g166vfA776hbj14Wjuz1uSdz0dGzV9V91LJYt2fNkFqbvu2qMxdvqkq7qSrW+GsFz16vJ5HPJEnJ3Vy0amp7GY1StfbTVKH1JLk4O2rZ+E9j/TVyZNfaun77QaL2KaGEh4crV0Cg+vYb+J/tNm/coKNHDitN2rQWiszy1q75TWNGB6tdx076cckKBQQEqkO71rpz5461Q0s0c2bP0pKfftAXXw7U8l9+U7eevTT3u2/1w8L51g4tUaVN56tuPXrphyXLtWjxMr1bvIS6de6kc+fOWju0RGWv/d6/b68+/Kip5v+wWN/MmqNnz56pfZvWCgsLs3Zoicpez7dkn9/nSNpIeJFkPHv2TJ07d5anp6d8fHw0YMCAl450jBs3Tvnz55e7u7syZcqkjh076vHjx6btly5dUq1atZQqVSq5u7srb968+u2333Tx4kWVL19ekpQqVSoZDAa1bNnSEt2Lk1Kly6pz1x6qUKlyrNtr1q6rdh06q3hQkIUje33rd53SkOlr9MvWo7FuH9KputbtOql+k37V4dNXdeHqHa3efly37j0/n0EFsypLem+1GbJIx89f1/Hz1/XpoEV6J3cmlSuW0+xY75cMVMUSAeo74ZdE71dCKFW6jDp17f7S8y1JN2/c0Kjg4Ro56ms5OdnuxPrz583RBw0aqW69+sqeI4f6DxoiNzc3rVwe+w8ltuDwoYMqV76iypQtpwwZMqry+1UVVLKUjh217ZGQcuUrqHSZssqSxV/+/lnVpVsPJU+eXEcOH7J2aInKXvs9feZs1an3gXLkyKmAwEANHfGVrl+/ppMnjls7tERlr+dbss/v8/iwdtkyJc2AFc2bN09OTk7au3evJk6cqHHjxunbb7+Nta2Dg4MmTZqk48ePa968edq8ebM+//xz0/ZOnTopIiJC27dv19GjRzVq1CilSJFCmTJl0rJlz79wT58+revXr2vixIkW6R9iMhgMqvpeHp29dFO/TG6nS+uHavvc7mZlz64uTjIajYr4/xFfSXoS+VTR0UaVLJTVtC6tdwpN6/ehWg9cqLAnkRbtR2KJjo5W/76fq0XL1sqeI+erd3hLPY2M1MkTx1UiqKRpnYODg0qUKKkjhw9aMbLEVbBQYe3Z84cuXbwgSTp96pQO/nlA75UuY+XILCcqKkprflut8PAwFSxY2NrhWIy99luSHj96JEny8PS0ciSWY0/n216/z5G02e5wAd46mTJl0vjx42UwGBQQEKCjR49q/PjxatOmTYy2/5zgyt/fX8OHD1f79u01bdo0SdLly5dVv3595c+fX5KULVs2U3tvb29JUtq0aeXl5ZV4HcIrpfVOoZTuburVsqKGTF+j/pN/1ftBufXj161Upf00/f7nee09elGhTyI1okstDZy6WgaDQcO71JSTk6N8fTxMx5o5qIlmLd+lP09eUeb0qazYq4QzZ/YsOTo66qOPm1k7lER17/49RUVFKXXq1GbrU6dOrQs2fK3fJ5+2VWjoY9WtVU2Ojo6KiopS5649VKNmbWuHlujOnjmtZk0aKzIyQsmTJ9f4SVOVPUcOa4eV6Oy13y9ER0dr9KiRKlT4HeXMmcva4SQ6ezzf9vp9jqSNhBdJRokSJczKKYKCgjR27FhFRUXFaLtx40YFBwfr1KlTevjwoZ49e6YnT54oLCxMyZMnV9euXdWhQwetX79elSpVUv369VWgQIF4xRMREaGIiAizddEOrnJ1dX29DiIGh/8/36u2HdPkRc8n8zhy5pqKF/RXm/ol9fuf53X7fqia9pmnSX0bqGPj0oqONmrx+oP68+QVRUc/L3nv+GFppXR31ddzXj7p09vmxPFj+mHBfC1assymy4zs2fq1a/Tbql8VPGqssufIodOnTurrUcFKkzatatepZ+3wEpW/f1YtXrZSjx8/0ob16zTgyz6aPXeBzScD9trvF0YOH6LzZ89q7vxF1g7FIuz9fCN2/E23PEqa8da5ePGiatasqQIFCmjZsmU6cOCApk6dKkmKjHxeyvrpp5/qr7/+UrNmzXT06FEVLVpUkydPjtfzBAcHy9PT02z5elRwgvfHnt2+H6qnz6J08sINs/WnL9xQJt//jdJu2nNaeeuOUObKA5WxUn+1HrhQfmk8dfHq8wkwyhXLqeL5/fVg19d69McYHV/RT5K08/uemjW4ieU6lIAO/nlAd+/eUfXKFVS0YF4VLZhX169d07ivR6n6+xWsHV6CSuWVSo6OjjEmNLlz5458fHysFFXiGz92tFp92lZVq9dQzlwBqlm7rj5u3kLfffuNtUNLdM4uLsqcJYvy5M2nbj0+U66AQC1c8L21w0p09tpvSRo5fKi2b9uqWXPmKZ2vr7XDsQh7PN/2+n2OpI0RXiQZe/bsMXv8xx9/KGfOnHJ0dDRbf+DAAUVHR2vs2LFycHj+m83ixYtjHC9Tpkxq37692rdvr759+2rWrFnq0qWLXFxcJCnWkeN/6tu3r3r27Gm2LtqB0d2E9PRZlA4cv6xcWcxnH86ZOY0uX78bo/2dB6GSpLJFcyitdwqt2n5MkvTZ18s1ePpvpnbpfTy1amp7Nfvye+07dikRe5B4atSqreIlzCcn69juU9WoVUd16trW6J+zi4ty58mrPX/sVoWKlSQ9L33cs2e3Gn/0sZWjSzxPnjwxVTm84ODgaKpcsCfR0dF6Gmkb197Hhz3022g0KnjEMG3etEGz585XxoyZrB2S1djD+bbX73MkbSS8SDIuX76snj17ql27dvrzzz81efJkjR07Nka7HDly6OnTp5o8ebJq1aqlnTt3asaMGWZtunfvrmrVqilXrly6d++etmzZoty5c0uSsmTJIoPBoFWrVql69epKliyZUqSIeU9XV9eY5cvhTxOww7EICwvV5cuXTY+vXv1bp06dlKenp9Kn99ODB/d1/fp13bp5U5J06cLzyW58fHzk45MmcYN7Te7JXJQ90/9+1fXPkFoFcvnp3oMwXblxX+Pnb9H84Ob6/c/z2rb/nN4vGajqpfOqSruppn2a1XpXpy/c0K17j1W8gL/GfFZPkxdt09lLtyRJV27cl/4xSPw47Hkp+l9/39bVm0n3FkVhYaG68q/zffrUSXn8//n28jK/FtnJyUk+Pj7yz5rt34d66zVr0UoDvuyjvHnzKV/+Alowf57Cw8NVt94H1g4t0ZQpV17fzpoh3/R+z0uaT57Ugu/nqE69+tYOLVFNHD9WpUqXkW/69AoLDdVvq1dp/769mj5ztrVDS1T22u+Rw4ZozW+rNGHyNLknd9ftW8+/t1OkTCk3NzcrR5d47PV8S/b5fR4flDRbHgkvkozmzZsrPDxc7777rhwdHdWtWze1bds2RruCBQtq3LhxGjVqlPr27asyZcooODhYzZs3N7WJiopSp06d9Pfff8vDw0NVq1bV+PHjJUkZMmTQkCFD9MUXX6hVq1Zq3ry55s6da6lu/qfjx46pzSf/68fY0c9LqGvVqadhI77S1i2bNah/X9P2Pr17SJLadeisDp26WDbYOHonTyat/6az6fHonnUlSfN/3au2Q37QL1uPqkvwEvVuWUlje9XTmUu39FGfudp1+IJpn1xZ0mpopxry9kyuS9fuavScDZq0cJulu5LgThw7pjaftDA9Hjv6K0lSrTp1NXTEV9YKyyqqVquue3fvatqUSbp9+5YCAnNr2jffKrUNl8B98WV/TZ08UcHDh+ju3TtKkyat6jf8UO06dLJ2aInq7t076t+3j27duqkUKVMqV64ATZ85W0El37N2aInKXvu9+KcfJEmtW5pPvjd0eLDq2HACZK/nW7LP73MkbQbjy250CiCGxB7hTaq8g3pYOwSruLN7nLVDsIp/l9naC3v9a2inpxuADXNLwkN6nh/Nt3YIL/XgB9u8K0QSfjsAAAAAgA3hR0aLY5ZmAAAAAIBNIuEFAAAAANgkSpoBAAAAwAKYpdnyGOEFAAAAANgkEl4AAAAAgE2ipBkAAAAALICSZstjhBcAAAAAYJNIeAEAAAAANomSZgAAAACwAEqaLY8RXgAAAACATSLhBQAAAADYJEqaAQAAAMACKGm2PEZ4AQAAAAA2iYQXAAAAAGCTKGkGAAAAAEugotniGOEFAAAAANgkEl4AAAAAgE0i4QUAAAAACzAYDEl2iY/t27erVq1a8vPzk8Fg0MqVK1/atn379jIYDJowYYLZ+rt376pp06by8PCQl5eXWrdurcePH5u1OXLkiEqXLi03NzdlypRJo0ePjlecEgkvAAAAACAeQkNDVbBgQU2dOvU/261YsUJ//PGH/Pz8Ymxr2rSpjh8/rg0bNmjVqlXavn272rZta9r+8OFDvf/++8qSJYsOHDigr7/+WoMHD9bMmTPjFSuTVgEAAAAA4qxatWqqVq3af7a5evWqunTponXr1qlGjRpm206ePKm1a9dq3759Klq0qCRp8uTJql69usaMGSM/Pz8tXLhQkZGR+u677+Ti4qK8efPq0KFDGjdunFli/CqM8AIAAACABVi7bPm/loiICD18+NBsiYiIeK1+RkdHq1mzZurdu7fy5s0bY/vu3bvl5eVlSnYlqVKlSnJwcNCePXtMbcqUKSMXFxdTmypVquj06dO6d+9enGMh4QUAAAAAOxccHCxPT0+zJTg4+LWONWrUKDk5Oalr166xbg8JCVHatGnN1jk5Ocnb21shISGmNunSpTNr8+LxizZxQUkzAAAAANi5vn37qmfPnmbrXF1d432cAwcOaOLEifrzzz/jPRlWYiDhBQAAAAALSAoJ4Mu4urq+VoL7bzt27NDNmzeVOXNm07qoqCh99tlnmjBhgi5evChfX1/dvHnTbL9nz57p7t278vX1lST5+vrqxo0bZm1ePH7RJi4oaQYAAAAAJIhmzZrpyJEjOnTokGnx8/NT7969tW7dOklSUFCQ7t+/rwMHDpj227x5s6Kjo1W8eHFTm+3bt+vp06emNhs2bFBAQIBSpUoV53gY4QUAAAAAxNnjx4917tw50+MLFy7o0KFD8vb2VubMmZU6dWqz9s7OzvL19VVAQIAkKXfu3KpataratGmjGTNm6OnTp+rcubMaN25suoVRkyZNNGTIELVu3Vp9+vTRsWPHNHHiRI0fPz5esZLwAgAAAIAlJN2K5njZv3+/ypcvb3r84trfFi1aaO7cuXE6xsKFC9W5c2dVrFhRDg4Oql+/viZNmmTa7unpqfXr16tTp04qUqSIfHx8NHDgwHjdkkiSDEaj0RivPQA7Fv701W1skXdQD2uHYBV3do+zdghW4ZCEry9KTPb619BOTzcAG+aWhIf00rZebO0QXurm7EbWDiFRcA0vAAAAAMAmJeHfPwAAAADAdiTlWZptFSO8AAAAAACbRMILAAAAALBJlDQD8RBtp7Pa2OvkTanrTbV2CFZxb2Vna4dgFVSZAQASGyXNlscILwAAAADAJpHwAgAAAABsEiXNAAAAAGABlDRbHiO8AAAAAACbRMILAAAAALBJlDQDAAAAgAVQ0mx5jPACAAAAAGwSCS8AAAAAwCZR0gwAAAAAlkBFs8UxwgsAAAAAsEkkvAAAAAAAm0RJMwAAAABYALM0Wx4jvAAAAAAAm0TCCwAAAACwSZQ0AwAAAIAFUNJseYzwAgAAAABsEgkvAAAAAMAmUdIMAAAAABZASbPlMcILAAAAALBJJLwAAAAAAJtESTMAAAAAWAIVzRbHCC8AAAAAwCaR8AIAAAAAbBIlzQAAAABgAczSbHmM8AIAAAAAbBIJLwAAAADAJlHSDAAAAAAWQEmz5THCCwAAAACwSSS8AAAAAACbREkzAAAAAFgAJc2WxwgvAAAAAMAmkfDCasqVK6fu3bu/dLvBYNDKlSstFg8AAAAA20LCiyTr+vXrqlatmrXDsKjvvv1GHzduoFLF31HFsiXVs2snXbzwl1mb27dvqX/fz1W5XCmVfLewmjT6QJs2rLNSxAnjwP596tapvSqXL63C+QK1ZdNGs+2F8wXGusz7braVIn619/L6aenAGvprXiuFr+qsWiWymm2f2b2iwld1Nlt+HlLLrM2SATV05rsWure8vf76vpVm96yk9N7upu39mrwb4xjhqzrr9tJ2FuljQvtx0UJVq1xBxQrnV9PGDXX0yBFrh2RRs2fNVMG8ARodPMLaoVgE59u+zvcL9tZve32f22u/48JgMCTZxVaR8CLJ8vX1laurq7XDsKgD+/epUeMmmrfwJ02f+Z2ePXumju0+VXhYmKnNwC/76NLFCxo/eZoWL/tFFSpWVp9ePXTq5AkrRv5mwsPDlSsgUH37DYx1+4atO8yWwcNGyGAwqGLl9y0cady5uznp6F+31X3Gtpe2Wbf/kvw//s60tBi93mz79iN/6+NRa1Ww3UI1GblG2dJ7alHfqqbtE5YfNNvf/+PvdOLSHS3//Vyi9SuxrF3zm8aMDla7jp3045IVCggIVId2rXXnzh1rh2YRx44e0dIlPypXrgBrh2IRnG/7Ot8v2Fu/7fV9bq/9RtJFwgurio6O1ueffy5vb2/5+vpq8ODBpm3/LGmOjIxU586dlT59erm5uSlLliwKDg42azt9+nRVq1ZNyZIlU7Zs2bR06VKz5+rTp49y5cql5MmTK1u2bBowYICePn1qiW7G2dQZ36p23Q+UPUdO5QoI1JDhwQq5fk0nThw3tTl86JA+bPKx8uUvoIyZMunTdh2UMmVKnfxHm7dNqdJl1Klrd1WoVDnW7T4+acyWrVs2q9i7xZUxUyYLRxp36w9c1pAFe/TL7r9e2ibyaZRu3A8zLfdDI8y2T/75sPaevqHLtx7pj1MhGrPkgN4N8JWT4/Ov7tAnT832T5sqmfJkSa15G96+Hz/mz5ujDxo0Ut169ZU9Rw71HzREbm5uWrl8mbVDS3RhoaHq26e3Bg0ZLg9PT2uHYxGcb/s635J99tte3+f22m8kXSS8sKp58+bJ3d1de/bs0ejRozV06FBt2LAhRrtJkybpl19+0eLFi3X69GktXLhQ/v7+Zm0GDBig+vXr6/Dhw2ratKkaN26skydPmranTJlSc+fO1YkTJzRx4kTNmjVL48ePT+wuvpFHjx9Jkjz/8Z+DgoUKaf3a3/TgwX1FR0dr3ZrVioiMVJFi71orTIu6c/u2ft++TXU/qG/tUN5Y6fwZdGnBJzo8o6kmdiwr75RuL22bKoWrGpcL0B8nr+tZVHSsbVq9n1dn/r6nncevJ1bIieJpZKROnjiuEkElTescHBxUokRJHTl80IqRWcbI4UNVpkxZs/7bMs63fZ3vF+yt3/b6PrfXfseLIQkvNorbEsGqChQooEGDBkmScubMqSlTpmjTpk2qXNl8pO/y5cvKmTOnSpUqJYPBoCxZssQ4VsOGDfXpp59KkoYNG6YNGzZo8uTJmjZtmiSpf//+prb+/v7q1auXfvzxR33++eeJ1b03Eh0drTGjRqpQ4XeUI2cu0/pRYyaoT+8eKl+qhJycnOTm5qaxEyYrc+aYr4kt+vWXlUqe3F0VKiXdcua42PDnZf2867wu3nikbOk9NKR5kH4eUktley1VdLTR1G54yyC1r1lA7m7O2nMqRB8MWRXr8VydHfVhuVwau/SApbqQYO7dv6eoqCilTp3abH3q1Kl14cLLR8htwZrfVuvkyRNa9NPSVze2EZxv+zrfkn32217f5/babyRtJLywqgIFCpg9Tp8+vW7evBmjXcuWLVW5cmUFBASoatWqqlmzpt5/3zzhCQoKivH40KFDpsc//fSTJk2apPPnz+vx48d69uyZPDw8XhpbRESEIiLMS0yfGVwsdl3xVyOG6vy5s/pu3iKz9dOmTNTjR480fdYcpUqVSls2b1SfXj00e+4C5bSD66J+XrFM1WrWfOuv716y/azp38cv3dHRC3d0cnZzlcmfQVsP/23aNn75Qc1df1KZ06ZUv4+K6duelWJNeusEZVPKZM5asOmUReLHmwu5fl2jvxqhb2Z999a/n/Fq9nq+7bXfAJIOSpphVc7OzmaPDQaDoqNjlmu+8847unDhgoYNG6bw8HA1atRIDRo0iPPz7N69W02bNlX16tW1atUqHTx4UP369VNkZORL9wkODpanp6fZMmZ08EvbJ6SvRgzVjm1bNXP290rn62taf+XKZf30w0INGjpCxUsEKVdAoNp16Kw8efJp8Y+L/uOItuHPA/t18cIF1fugobVDSXAXbzzUrQfhyp7e/Nq2Ow+f6Ny1+9p86Iqaj16nasX8VTzQN8b+Lavk0Zp9F3XzfrilQk4wqbxSydHRMcaEJnfu3JGPj4+Vokp8J04c1907d9S44Qd6p0AevVMgj/bv26tFC+frnQJ5FBUVZe0QEwXn277Ot732217f5/ba7/iw9kzM9jhLMyO8eGt4eHjoww8/1IcffqgGDRqoatWqunv3rry9vSVJf/zxh5o3b25q/8cff6hw4cKSpF27dilLlizq16+fafulS5f+8/n69u2rnj17mq17ZnBJqO7Eymg0atTIYdqyeaNmffe9MmTMaLb9SfjzZMbgYP5blYOjQ6w/FNialcuXKneevAoIDLR2KAkuQ2p3pU7pppC7oS9t4+Dw/I+Ri7Oj2fos6VKqbP6MajBsdaLGmFicXVyUO09e7fljtypUrCTpeUn/nj271fijj60cXeIpXqKElq781WzdoH595Z8tm1q1biNHR8eX7Pl243z/jz2cb3vtt72+z+2130jaSHjxVhg3bpzSp0+vwoULy8HBQUuWLJGvr6+8vLxMbZYsWaKiRYuqVKlSWrhwofbu3avZs5/fpzVnzpy6fPmyfvzxRxUrVkyrV6/WihUr/vM5XV1dY5RfhUYaX9I6YXw1YqjW/LZK4ydOVXJ3d92+fUuSlCJFSrm5uck/azZlypxFI4YMUo9en8vTy0tbN2/Unt27NHHKjESNLTGFhYXqyuXLpsdXr/6t06dOysPTU+nT+0mSHj9+rA3r16lnrz7WCjNe3N2czUZr/dN5qEBWH917/ER3H0Wo30fFtHLXeYXcC1O29J4a0aqkzl9/oA1/Pn8diuVKpyK50mrX8eu6/zhCWdN7aNDHJXT+2n3tOWk+KVWLynkUci9U6w789484SVmzFq004Ms+yps3n/LlL6AF8+cpPDxcdet9YO3QEo27ewrl/Mf1+ZKULHlyeXl6xVhvazjfz9nD+bbXfkv2+T6X7LffSLpIePFWSJkypUaPHq2zZ8/K0dFRxYoV02+//SaHf4x0DhkyRD/++KM6duyo9OnT64cfflCePHkkSbVr11aPHj3UuXNnRUREqEaNGhowYIDZbZCSgiU//SBJavNJc7P1g4eNVO26H8jZ2VmTp32jSRPGqnvnDgoLD1OmTJk1ZMRXKlWmrDVCThAnjh1Tm09amB6PHf2VJKlWnboaOuL5v9etWS0ZjapavYZVYoyvd3Km1frgeqbHo9uUliTN33hSXadtVb6sPmpaMVBe7q66fjdUGw9e0dAFfyjy2fOR+rCIZ6oTlF39mxSXu5uTQu6Gaf2flzTqp/2mNpJkMEjNKgZq/sZTZpNdvW2qVquue3fvatqUSbp9+5YCAnNr2jffKjUlcDaJ8w17YK/vc3vtd1zZculwUmUwGo1v7/+QgP9nMBi0YsUK1a1bN1GfJ7FHeJMqe/1uTl1vqrVDsIp7KztbOwQAAF6bWxIe0sv+2Rprh/BS58dWs3YIiYJJqwAAAAAANikJ//4BAAAAALbDXqvmrImEFzaBynwAAAAA/0ZJMwAAAADAJjHCCwAAAAAWwCzNlscILwAAAADAJpHwAgAAAABsEiXNAAAAAGABVDRbHiO8AAAAAACbRMILAAAAALBJlDQDAAAAgAUwS7PlMcILAAAAALBJJLwAAAAAAJtESTMAAAAAWAAVzZbHCC8AAAAAwCaR8AIAAAAAbBIlzQAAAABgAQ4O1DRbGiO8AAAAAACbRMILAAAAALBJlDQDAAAAgAUwS7PlMcILAAAAALBJJLwAAAAAAJtESTMAAAAAWICBmmaLY4QXAAAAAGCTSHgBAAAAADaJkmYAAAAAsAAqmi2PEV4AAAAAgE0i4QUAAAAA2CRKmgEAAADAApil2fIY4QUAAAAA2CQSXgAAAACATSLhBQAAAAALMBgMSXaJj+3bt6tWrVry8/OTwWDQypUrTduePn2qPn36KH/+/HJ3d5efn5+aN2+ua9eumR3j7t27atq0qTw8POTl5aXWrVvr8ePHZm2OHDmi0qVLy83NTZkyZdLo0aPj/ZqT8AIAAAAA4iw0NFQFCxbU1KlTY2wLCwvTn3/+qQEDBujPP//U8uXLdfr0adWuXdusXdOmTXX8+HFt2LBBq1at0vbt29W2bVvT9ocPH+r9999XlixZdODAAX399dcaPHiwZs6cGa9YDUaj0fh63QTsz5Nn1o4ASHzpWy60dghWcX1uU2uHAABIAG5JeFregoM2WTuElzo8pOJr7WcwGLRixQrVrVv3pW327dund999V5cuXVLmzJl18uRJ5cmTR/v27VPRokUlSWvXrlX16tX1999/y8/PT9OnT1e/fv0UEhIiFxcXSdIXX3yhlStX6tSpU3GOjxFeAAAAALAAgyHpLhEREXr48KHZEhERkSD9fvDggQwGg7y8vCRJu3fvlpeXlynZlaRKlSrJwcFBe/bsMbUpU6aMKdmVpCpVquj06dO6d+9enJ+bhBcAAAAA7FxwcLA8PT3NluDg4Dc+7pMnT9SnTx999NFH8vDwkCSFhIQobdq0Zu2cnJzk7e2tkJAQU5t06dKZtXnx+EWbuEjCA/4AAAAAAEvo27evevbsabbO1dX1jY759OlTNWrUSEajUdOnT3+jY70uEl4AAAAAsID4zoZsSa6urm+c4P7Ti2T30qVL2rx5s2l0V5J8fX118+ZNs/bPnj3T3bt35evra2pz48YNszYvHr9oExeUNAMAAAAAEsyLZPfs2bPauHGjUqdObbY9KChI9+/f14EDB0zrNm/erOjoaBUvXtzUZvv27Xr69KmpzYYNGxQQEKBUqVLFORYSXgAAAABAnD1+/FiHDh3SoUOHJEkXLlzQoUOHdPnyZT19+lQNGjTQ/v37tXDhQkVFRSkkJEQhISGKjIyUJOXOnVtVq1ZVmzZttHfvXu3cuVOdO3dW48aN5efnJ0lq0qSJXFxc1Lp1ax0/flw//fSTJk6cGKPs+lUoaQYAAAAAC0jCFc3xsn//fpUvX970+EUS2qJFCw0ePFi//PKLJKlQoUJm+23ZskXlypWTJC1cuFCdO3dWxYoV5eDgoPr162vSpEmmtp6enlq/fr06deqkIkWKyMfHRwMHDjS7V29ckPACAAAAAOKsXLlyMhqNL93+X9te8Pb21qJFi/6zTYECBbRjx454x/dPlDQDAAAAAGwSI7wAAAAAYAFJeZZmW8UILwAAAADAJpHwAgAAAABsEiXNAAAAAGABVDRbHiO8AAAAAACbRMILAAAAALBJlDQDAAAAgAUwS7PlMcILAAAAALBJJLwAAAAAAJtESTMAAAAAWAAVzZbHCC8AAAAAwCaR8AIAAAAAbBIlzQAAAABgAczSbHmM8AIAAAAAbBIJLwAAAADAJlHSDAAAAAAWQEWz5THCCwAAAACwSSS8AAAAAACbREkzAAAAAFgAszRbHiO8AAAAAACbRMILAAAAALBJlDQDAAAAgAVQ0Wx5jPACAAAAAGwSCa+dKFeunLp3755knsPf318TJkxI1HhsyY+LFqpa5QoqVji/mjZuqKNHjlg7JIuaPWumCuYN0OjgEdYOxSLe5vNdMiCtfuhZVicm19O9BU1VvUjGl7Yd1+pd3VvQVO2rBJitX9SzrI5OqKvr3zXWySkfaEb7kvL1SmbWJm8mL/02oLKuf9dYxybWVdcaeRKlP4ntwP596tKxvSqVK6WCeQO0edNGa4eU6Bb/uEgN6tVSyXffUcl331GzJh/q9x3brB2WxbzNn+83Qb/tq98v2NvfbyRNJLywO1u3bpXBYND9+/etHUqcrF3zm8aMDla7jp3045IVCggIVId2rXXnzh1rh2YRx44e0dIlPypXroBXN7YBb/v5Tu7qpGOX76v3vH3/2a5G0YwqmiO1rt0Ni7Ftx4kbajV5h97t/ataTNyurGlTaF7X0qbtKZM5aVmfCrpyO1TlB6zRwB8Oqs8H+dWifI4E709iCw8PU0BAgPr2H2TtUCwmbTpfdevRSz8sWa5Fi5fp3eIl1K1zJ507d9baoSW6t/3z/brot331+wV7+/sdVwaDIckutoqEF3bl6dOn1g4h3ubPm6MPGjRS3Xr1lT1HDvUfNERubm5auXyZtUNLdGGhoerbp7cGDRkuD09Pa4djEW/7+d545JpGLD2s1fv/fmmb9KmSaVTzYmo7bZeeRUXH2D597SntP39HV+6Eau/Z25qw6riK5vCRk+PzP8YNS2aVi5ODOs/8Q6euPtDyPy5p5vrT6lgtMNH6lVhKlS6rzt16qGKlytYOxWLKla+g0mXKKksWf/n7Z1WXbj2UPHlyHTl8yNqhJbq3/fP9uui3ffVbss+/30i6SHhtUGhoqJo3b64UKVIoffr0Gjt2rNn2iIgI9erVSxkyZJC7u7uKFy+urVu3mrbfuXNHH330kTJkyKDkyZMrf/78+uGHH+L1HHERFhamTz75RClTplTmzJk1c+ZMs+1XrlxRo0aN5OXlJW9vb9WpU0cXL140bd+3b58qV64sHx8feXp6qmzZsvrzzz/NjmEwGDR9+nTVrl1b7u7uatOmjcqXLy9JSpUqlQwGg1q2bBnv2C3laWSkTp44rhJBJU3rHBwcVKJESR05fNCKkVnGyOFDVaZMWbP+2zJ7ON8GgzSjfUlNXn1Cp64+eGV7L3cXNSiZVXvP3tKzKKMkqVgOH+06dVNP/5EsbzpyXbn8POWZ3CXRYkfCi4qK0prfVis8PEwFCxa2djiJyh4+37Gh3/bV7xfs7e83kjYSXhvUu3dvbdu2TT///LPWr1+vrVu3miWCnTt31u7du/Xjjz/qyJEjatiwoapWraqzZ5+Xkz158kRFihTR6tWrdezYMbVt21bNmjXT3r174/wccTF27FgVLVpUBw8eVMeOHdWhQwedPn1a0vOR2CpVqihlypTasWOHdu7cqRQpUqhq1aqKjIyUJD169EgtWrTQ77//rj/++EM5c+ZU9erV9ejRI7PnGTx4sOrVq6ejR49qyJAhWrbs+S+rp0+f1vXr1zVx4sT4v8gWcu/+PUVFRSl16tRm61OnTq3bt29bKSrLWPPbap08eUJde3xm7VAsxh7Od/eaefUs2qhv1p3+z3aDPyykv7/9UBe+aaiMqZOryfj/XeOZ1iuZbj18Ytb+1oNwSVI6L7eEDxoJ7uyZ0ypRtLCKFc6vEUMHafykqcqe4+0rSY8Pe/h8x4Z+21e/Jfv8+x0fBkPSXWwVtyWyMY8fP9bs2bO1YMECVaxYUZI0b948Zcz4fOKYy5cva86cObp8+bL8/PwkSb169dLatWs1Z84cjRw5UhkyZFCvXr1Mx+zSpYvWrVunxYsX6913333lc8RV9erV1bFjR0lSnz59NH78eG3ZskUBAQH66aefFB0drW+//dZ0TcGcOXPk5eWlrVu36v3331eFChXMjjdz5kx5eXlp27Ztqlmzpml9kyZN1KpVK9PjCxcuSJLSpk0rLy+vl8YXERGhiIgIs3VGR1e5urrGq5+Iv5Dr1zX6qxH6ZtZ3vN42pKC/t9pVCVC5/mte2XbS6pOav+28Mvm4q0+9/JrRvqQ+HLM18YOERfj7Z9XiZSv1+PEjbVi/TgO+7KPZcxfYfNIL2Dr+fiMpIuG1MefPn1dkZKSKFy9uWuft7a2AgOcTBhw9elRRUVHKlSuX2X4RERGmXyGjoqI0cuRILV68WFevXlVkZKQiIiKUPHnyOD1HXBUoUMD0b4PBIF9fX928eVOSdPjwYZ07d04pU6Y02+fJkyc6f/68JOnGjRvq37+/tm7dqps3byoqKkphYWG6fPmy2T5FixaNV1wvBAcHa8iQIWbr+g0YpP4DB7/W8V5HKq9UcnR0jDHBxZ07d+Tj42OxOCztxInjunvnjho3/MC0LioqSgf279OPPyzUvoNH5ejoaMUIE4etn++ggDRK4+GmoxPrmtY5OTpoeNN31KFqoAr2+Nm0/u7jCN19HKHzIY905toDHZ/0gYrl8NG+c7d183640niYj+Sm8Xw+i/ON++Yjv0ianF1clDlLFklSnrz5dPzYUS1c8L0GDh5q5cgSj61/vl+GfttXv+317zeSNhJeO/P48WM5OjrqwIEDMb5wUqRIIUn6+uuvNXHiRE2YMEH58+eXu7u7unfvbiolTijOzs5mjw0Gg6Kjo01xFilSRAsXLoyxX5o0aSRJLVq00J07dzRx4kRlyZJFrq6uCgoKihGnu7v7a8XXt29f9ezZ02yd0dGyv1Y6u7god5682vPHblWoWEmSFB0drT17dqvxRx9bNBZLKl6ihJau/NVs3aB+feWfLZtatW5js38sbf18/7TzgrYdDzFbt/TzClq884IWbj//0v0c/r/Kw8X5+VU4+87dVv+GBeXkaDBd11s+v6/OXHugB2EJ+z0Fy4iOjtbTBP4bk9TY+uf7Zei3ffXbXv9+x4ctz4acVJHw2pjs2bPL2dlZe/bsUebMmSVJ9+7d05kzZ1S2bFkVLlxYUVFRunnzpkqXLh3rMXbu3Kk6dero44+ffyFHR0frzJkzypMnT5yeIyG88847+umnn5Q2bVp5eHi8NM5p06apevXqkp5PchWX62JcXJ5PahMVFfWf7VxdY5YvP3kWl+gTVrMWrTTgyz7Kmzef8uUvoAXz5yk8PFx1633w6p3fUu7uKZQzp3kVQrLkyeXl6RVjva1528+3u6uTsqb7X2VGljQplC9zKt0PjdDfd8J077F5UvMsKlo37ofr3PXn194XyZ5a72RLrd2nb+lBaKT806VQvwYF9deNR9p39vnne+mui/q8Xn5N/rSEJq46odwZvdTu/UD1W3jAch1NIGGhoWZVKVf//lunTp6Up6en0v//ZSe2ZuL4sSpVuox806dXWGioflu9Svv37dX0mbOtHVqie9s/36+LfttPv+357zeSLhJeG5MiRQq1bt1avXv3VurUqZU2bVr169dPDg7PR0Zy5cqlpk2bqnnz5ho7dqwKFy6sW7duadOmTSpQoIBq1KihnDlzaunSpdq1a5dSpUqlcePG6caNG6aE91XPkRCaNm2qr7/+WnXq1NHQoUOVMWNGXbp0ScuXL9fnn3+ujBkzKmfOnJo/f76KFi2qhw8fqnfv3kqWLNkrj50lSxYZDAatWrVK1atXV7JkyUyj20lR1WrVde/uXU2bMkm3b99SQGBuTfvmW6W24ZIoe/a2n+9C2by1qt//brEz8uMikqRF28+r08w/Xrl/eESUahbNpC8+KKDkrk66cT9cm45c05ifjyny2fMKkIfhT1V/1GZ93bKYtgyrpjuPI/T1yqOat+Vc4nQqER0/fkyftmpuejxmdLAkqXadeho28itrhZWo7t69o/59++jWrZtKkTKlcuUK0PSZsxVU8j1rh5bo3vbP9+ui3/bVbyCpMRiNRqO1g0DCevz4sTp06KDly5crZcqU+uyzz7R69WoVKlRIEyZM0NOnTzV8+HB9//33unr1qnx8fFSiRAkNGTJE+fPn1927d/XJJ59o06ZNSp48udq2bavLly/rwYMHWrlyZZye41X8/f3VvXt3de/e3bSuUKFCqlu3rgYPHixJCgkJUZ8+ffTbb7/p0aNHypAhgypWrKgxY8bIw8NDBw8eVNu2bXXs2DFlypRJI0eOVK9evcyOazAYtGLFCtWtW9fs+YcNG6Zp06bpxo0bat68uebOnRun19YaI7yApaVvGfNSAntwfW5Ta4cAAEgAbkl4SK/MuJ3WDuGltve0zR8eSXiBeCDhhT0g4QUAvM1IeF+PrSa83IcXAAAAAGCTkvDvH3hb7dixQ9WqVXvp9sePH1swGgAAACBpYJJmyyPhRYIrWrSoDh06ZO0wAAAAANg5El4kuGTJkilHjhzWDgMAAACAnSPhBQAAAAALMFDTbHFMWgUAAAAAsEkkvAAAAAAAm0RJMwAAAABYABXNlscILwAAAADAJpHwAgAAAABsEiXNAAAAAGABzNJseYzwAgAAAABsEgkvAAAAAMAmUdIMAAAAABZARbPlMcILAAAAALBJJLwAAAAAAJtESTMAAAAAWIADNc0WxwgvAAAAAMAmkfACAAAAAGwSJc0AAAAAYAFUNFseI7wAAAAAAJtEwgsAAAAAsEmUNAMAAACABRioabY4RngBAAAAADaJhBcAAAAAYJMoaQYAAAAAC3CgotniGOEFAAAAANgkEl4AAAAAgE2ipBkAAAAALIBZmi2PEV4AAAAAgE0i4QUAAAAA2CRKmgEAAADAAqhotjwSXgCvZDRaOwJY0vW5Ta0dglXUmbnH2iFYxc9ti1s7BKuw1+81/rMNwN5Q0gwAAAAAsEmM8AIAAACABRhEmYWlMcILAAAAALBJJLwAAAAAAJtESTMAAAAAWIADFc0WxwgvAAAAAMAmkfACAAAAAGwSJc0AAAAAYAEGboZtcYzwAgAAAABsEgkvAAAAACDOtm/frlq1asnPz08Gg0ErV6402240GjVw4EClT59eyZIlU6VKlXT27FmzNnfv3lXTpk3l4eEhLy8vtW7dWo8fPzZrc+TIEZUuXVpubm7KlCmTRo8eHe9YSXgBAAAAwAIMhqS7xEdoaKgKFiyoqVOnxrp99OjRmjRpkmbMmKE9e/bI3d1dVapU0ZMnT0xtmjZtquPHj2vDhg1atWqVtm/frrZt25q2P3z4UO+//76yZMmiAwcO6Ouvv9bgwYM1c+bMeMXKNbwAAAAAgDirVq2aqlWrFus2o9GoCRMmqH///qpTp44k6fvvv1e6dOm0cuVKNW7cWCdPntTatWu1b98+FS1aVJI0efJkVa9eXWPGjJGfn58WLlyoyMhIfffdd3JxcVHevHl16NAhjRs3ziwxfhVGeAEAAADAzkVEROjhw4dmS0RERLyPc+HCBYWEhKhSpUqmdZ6enipevLh2794tSdq9e7e8vLxMya4kVapUSQ4ODtqzZ4+pTZkyZeTi4mJqU6VKFZ0+fVr37t2LczwkvAAAAABgAQ4GQ5JdgoOD5enpabYEBwfHu48hISGSpHTp0pmtT5cunWlbSEiI0qZNa7bdyclJ3t7eZm1iO8Y/nyMuKGkGAAAAADvXt29f9ezZ02ydq6urlaJJOCS8AAAAAGDnXF1dEyTB9fX1lSTduHFD6dOnN62/ceOGChUqZGpz8+ZNs/2ePXumu3fvmvb39fXVjRs3zNq8ePyiTVxQ0gwAAAAAFmDtmZgTapbm/5I1a1b5+vpq06ZNpnUPHz7Unj17FBQUJEkKCgrS/fv3deDAAVObzZs3Kzo6WsWLFze12b59u54+fWpqs2HDBgUEBChVqlRxjoeEFwAAAAAQZ48fP9ahQ4d06NAhSc8nqjp06JAuX74sg8Gg7t27a/jw4frll1909OhRNW/eXH5+fqpbt64kKXfu3KpataratGmjvXv3aufOnercubMaN24sPz8/SVKTJk3k4uKi1q1b6/jx4/rpp580ceLEGGXXr0JJMwAAAAAgzvbv36/y5cubHr9IQlu0aKG5c+fq888/V2hoqNq2bav79++rVKlSWrt2rdzc3Ez7LFy4UJ07d1bFihXl4OCg+vXra9KkSabtnp6eWr9+vTp16qQiRYrIx8dHAwcOjNctiSTJYDQajW/YX8BuPHlm7Qisg28J+5KQZU1vkzoz91g7BKv4uW1xa4dgFfb6vWavn2/YF7ckPKTXYM6f1g7hpZa2esfaISQKSpoBAAAAADaJhBcAAAAAYJOS8IA/AAAAANgOLiuwPEZ4AQAAAAA2iYQXAAAAAGCTKGkGAAAAAAtwoKbZ4hjhBQAAAADYJBJeAAAAAIBNoqQZAAAAACyAgmbLY4QXAAAAAGCTSHgBAAAAADaJkmYAAAAAsAADszRbHCO8SBTlypVT9+7drR3GW2/61MkqmDfAbKlTs6q1w0pwB/bvU9dO7VW5fCkVyhegzZs2mm3ftGG92rf5RGXfK65C+QJ06tRJK0WasF7Vb6PRqGlTJqpSuVIqXqSA2n3aUpcuXbROsBbw46KFqla5gooVzq+mjRvq6JEj1g4pzvKlT6kh1XNpUYvCWtexuIKyporRpnmxDFrUorB+aVtMX9UKlJ+na6zHcnYwaFqjfFrXsbiypU5utq1IJk9N+CCvVnxaVD+1ekcDquRUupQuidKnxLL4x0VqUK+WSr77jkq++46aNflQv+/YZu2wEhyfb3Nv8+f7dR3Yv09dOrZXpXKlVDBvzPeALbPH842ki4QXNi8yMtLaIbyR7DlyatPW303L3PmLrB1SggsPD1OugAD17TfopdsLv/OOuvXoZeHIEter+j33u1latHC++g0crPmLFitZsmTq2K61IiIiLBxp4lu75jeNGR2sdh076cclKxQQEKgO7Vrrzp071g4tTtycHfTX7TBN2X4x1u2NCqdXnQK+mrztorotO6Ynz6I1smagnB1j/tLfumRm3Ql9GmN9upSuGlwtlw5ffaCOi4+q36+n5OHmpAFVcyV0dxJV2nS+6tajl35YslyLFi/Tu8VLqFvnTjp37qy1Q0tQfL7/523/fL+u8PAwBQQEqG//2N8DtspezzeSrngnvFeuXNHff/9terx37151795dM2fOTNDA8PZq2bKltm3bpokTJ8pgMMhgMGju3Lny8vIya7dy5Uqzso7BgwerUKFCmj9/vvz9/eXp6anGjRvr0aNHpjaPHj1S06ZN5e7urvTp02v8+PExRpP9/f01bNgwNW/eXB4eHmrbtq0kadmyZcqbN69cXV3l7++vsWPHJurrkFCcHB3lkyaNaUmVytvaISW4UqXLqnPXHqpQqXKs22vWrqt2HTqreFCQhSNLXP/Vb6PRqIXzv1ebth1UvkIl5QoI1LCRo3Xr5k1tscFRgvnz5uiDBo1Ut159Zc+RQ/0HDZGbm5tWLl9m7dDiZP/lB5q392/tunAv1u11C/jqhwNXtfviPV24E67Rm84rtbuLSv5rJLhoZk8VyeSpWbsuxzhGzjTucjBIc/f8resPI3TudpiWHrqu7D7J5ejw9pTIlStfQaXLlFWWLP7y98+qLt16KHny5Dpy+JC1Q0tQfL7/523/fL+uUqXLqnO3Hqr4kr9ttspez3dcORiS7mKr4p3wNmnSRFu2bJEkhYSEqHLlytq7d6/69eunoUOHJniAePtMnDhRQUFBatOmja5fv67r168rKioqTvueP39eK1eu1KpVq7Rq1Spt27ZNX331lWl7z549tXPnTv3yyy/asGGDduzYoT///DPGccaMGaOCBQvq4MGDGjBggA4cOKBGjRqpcePGOnr0qAYPHqwBAwZo7ty5CdXtRHPp8iVVKldK1atUVN/PP9P1a9esHRIs4Orff+v27VsqHlTStC5lypTKX6CgDh8+aMXIEt7TyEidPHFcJf7RVwcHB5UoUVJHbKCvvh6uSu3uoj+vPDStC4uM0qkbj5XbN6VpnVcyJ3Uvl02jN55XxLOY35lnb4UqWtL7udPIwSAld3FUpQAfHfz7oaKijZboSoKLiorSmt9WKzw8TAULFrZ2OBbD59t2Pt8wx/lGUhTvSauOHTumd999V5K0ePFi5cuXTzt37tT69evVvn17DRw4MMGDxNvF09NTLi4uSp48uXx9fSVJjo6Ocdo3Ojpac+fOVcqUz/8T2KxZM23atEkjRozQo0ePNG/ePC1atEgVK1aUJM2ZM0d+fn4xjlOhQgV99tlnpsdNmzZVxYoVNWDAAElSrly5dOLECX399ddq2bLlm3Q3UeUvUEDDRgTL3z+rbt26pW+mT1Wr5k217Odf5e6ewtrhIRHdvn1LkpQ6dWqz9d6pU+vO7dvWCCnR3Lt/T1FRUTH6mjp1al248JeVoko43smdJUn3w83LlO+HPzVtk6ReFbJr9fEbOnsrNNbrcm88itCXv55Sv/dzqlvZrHJ0MOhEyCP1X3U6cTuQCM6eOa1mTRorMjJCyZMn1/hJU5U9Rw5rh2UxfL5t5/MNc5xvJEXxTnifPn0qV9fnE21s3LhRtWvXliQFBgbq+vXrCRsd7I6/v78p2ZWk9OnT6+bNm5Kkv/76S0+fPjX94CI9T64DAgJiHKdo0aJmj0+ePKk6deqYrXvvvfc0YcIERUVFxZqQR0RExLiWyujoanr/W0Kp0mVN/84VEKj8BQqqWuXyWrd2jT6o39BicQBIXHXyp1MyF0f99OfLKzhSJXNW93JZteH0LW09e0fJnB3V/N2MGlAlp7749ZQFo31z/v5ZtXjZSj1+/Egb1q/TgC/7aPbcBXaV9AKwT8zSbHnxLmnOmzevZsyYoR07dmjDhg2qWvX5jLHXrl2L8WsO8IKDg4OMRvOSu6dPY07K4uzsbPbYYDAoOjo63s/n7u4e733+LTg4WJ6enmbL16OC3/i4b8LDw0NZsvjryuWY1/fBtvj4pJGkGJN83L1zR6l9fKwRUqJJ5ZVKjo6OMfp6584d+dhAX++GPf+u80pm/v3mlczZtK1QBg/lTpdCq9q9q9/av6s5TQtJkqY0zKdeFbJJkmrlT6fQyCjN3n1F52+H6dj1Rxq98bwKZ/JUYLq3q+LD2cVFmbNkUZ68+dStx2fKFRCohQu+t3ZYFsPn23Y+3zDH+UZSFO+Ed9SoUfrmm29Urlw5ffTRRypYsKAk6ZdffjEbeYN9c3FxMbtuN02aNHr06JFCQ0NN6w4dOhSvY2bLlk3Ozs7at2+fad2DBw905syZV+6bO3du7dy502zdzp07lStXrpeWW/ft21cPHjwwW3r36RuvmBNaWGiorly5Ip80aawaBxJfhowZ5eOTRnv/2G1a9/jxYx09ctjmrnV0dnFR7jx5tecffY2OjtaePbtVwAb6GvIwQndCI1U4o4dpXXJnRwWmS6GTIc8n5Zv2+yV1WHzUtPRf/bxMeeT6s5q75/lEkW5ODvrX74aK/v8Vb/tkI9HR0Xr6ls+oHx98vm3n8w1znG8kRfEuaS5Xrpxu376thw8fKlWq/80u2bZtWyVPnvw/9oQ98ff31549e3Tx4kWlSJFCxYsXV/LkyfXll1+qa9eu2rNnT7wnjEqZMqVatGih3r17y9vbW2nTptWgQYPk4ODwyvKQzz77TMWKFdOwYcP04Ycfavfu3ZoyZYqmTZv20n1cXWOWLz95Fq+Q39jYr0epbLnySu/np1s3b2r61MlydHRQteo1LRtIIgsLC9Xlf4xaX736t06dOilPT0+lT++nBw/u6/r167r1/+Xtly5ckCT5+PiYRkreRq/qd9NmzTVr5nRlzpJFGTJk1NQpE5UmbVqVr1jJilEnjmYtWmnAl32UN28+5ctfQAvmz1N4eLjq1vvA2qHFiZuTg/w83UyPfVO6Klvq5HoU8Uy3Hkdq5ZEQfVQkg64+eKKQhxFq8W5G3QmNNM3qfOuxebL35OnzHwyvPYjQ7dDn2/Zcuq96BX3VtGgGbTl7W8mdHdWqRCaFPIzQuVuheltMHD9WpUqXkW/69AoLDdVvq1dp/769mj5ztrVDS1B8vv/nbf98v66w0H+9B/7+W6dO/v97IJb5R2yFvZ7vuKKi2fLinfBKz6fTP3DggM6fP68mTZooZcqUpkmKAEnq1auXWrRooTx58ig8PFwXLlzQggUL1Lt3b82aNUsVK1bU4MGDTbcMiqtx48apffv2qlmzpjw8PPT555/rypUrcnNz+8/93nnnHS1evFgDBw7UsGHDlD59eg0dOjRJT1glSTduhOiL3j11//59pfL2VuF3imj+osXy9ratWxMdP3ZMbT5pbno8dvTz0vFadepp2IivtHXLZg3q/7/R9T69e0iS2nXorA6dulg22AT0qn63/KSNwsPDNWzwQD169FCF3ymiaTO+teh15JZStVp13bt7V9OmTNLt27cUEJhb07759q0p78yV1l1f181jety+VBZJ0vpTtzR2819afPC63Jwc1K1cVqVwcdLx64/Ub9VpPY2K++zKh68+1FcbzqlhYT81LJxeEU+jdfLGY/VfdUqR8TiOtd29e0f9+/bRrVs3lSJlSuXKFaDpM2crqOR71g4tQfH5/p+3/fP9uo4fP6ZPW/3vPTDm/98DtevU07CRX71st7eevZ5vJF0G478vrHyFS5cuqWrVqrp8+bIiIiJ05swZZcuWTd26dVNERIRmzJiRWLECMYSGhipDhgwaO3asWrdunejPZ+kR3qQift8SeNvZ66/PdWbusXYIVvFz2+LWDsEq7PV7zV4/37Avbq81pGcZzRYetnYILzW/aUFrh5Ao4n0Nb7du3VS0aFHdu3dPyZIlM62vV6+eNm3alKDBAf928OBB/fDDDzp//rz+/PNPNW3aVJJizMAMAAAAJDUGgyHJLrYq3r9/7NixQ7t27ZKLi/k9Av39/XX16tUECwx4mTFjxuj06dNycXFRkSJFtGPHDmb+AwAAABBDvBPe6Ohos9l3X/j777/N7p8KJIbChQvrwIED1g4DAAAAwFsg3iXN77//viZMmGB6bDAY9PjxYw0aNEjVq1dPyNgAAAAAwGY4GJLuYqviPcI7duxYValSRXny5NGTJ0/UpEkTnT17Vj4+Pvrhhx8SI0YAAAAAAOIt3glvxowZdfjwYf344486cuSIHj9+rNatW6tp06Zmk1gBAAAAAGBNrzVpt5OTkz7++OOEjgUAAAAAbJYtz4acVMU74f3+++//c3vz5s3/czsAAAAAAJYQ74S3W7duZo+fPn2qsLAwubi4KHny5CS8AAAAAIAkId4J771792KsO3v2rDp06KDevXsnSFAAAAAAYGsoaLa8eN+WKDY5c+bUV199FWP0FwAAAAAAa0mQhFd6PpHVtWvXEupwAAAAAAC8kXiXNP/yyy9mj41Go65fv64pU6bovffeS7DAAAAAAMCWODBLs8XFO+GtW7eu2WODwaA0adKoQoUKGjt2bELFBQAAAADAG4l3whsdHZ0YcQAAAAAAkKDinfACAAAAAOKPimbLi1PC27NnzzgfcNy4ca8dDAAAAAAACSVOCe/BgwfjdDADP1kAAAAAAJKIOCW8W7ZsSew4AAAAAMCmMUBoeQl2H14AAAAAAJKS15q0av/+/Vq8eLEuX76syMhIs23Lly9PkMAAAAAAAHgT8R7h/fHHH1WyZEmdPHlSK1as0NOnT3X8+HFt3rxZnp6eiREjAAAAALz1DIaku9iqeCe8I0eO1Pjx4/Xrr7/KxcVFEydO1KlTp9SoUSNlzpw5MWIEAAAAACDe4p3wnj9/XjVq1JAkubi4KDQ0VAaDQT169NDMmTMTPEAAAAAAAF5HvK/hTZUqlR49eiRJypAhg44dO6b8+fPr/v37CgsLS/AAAQAAAMAWONhy7XASFe+Et0yZMtqwYYPy58+vhg0bqlu3btq8ebM2bNigihUrJkaMAAAAAADEW5wT3mPHjilfvnyaMmWKnjx5Iknq16+fnJ2dtWvXLtWvX1/9+/dPtEABAAAAAIiPOCe8BQoUULFixfTpp5+qcePGkiQHBwd98cUXiRYcAAAAANgKKpotL86TVm3btk158+bVZ599pvTp06tFixbasWNHYsYGAAAAAMBri3PCW7p0aX333Xe6fv26Jk+erIsXL6ps2bLKlSuXRo0apZCQkMSMEwAAAACAeIn3bYnc3d3VqlUrbdu2TWfOnFHDhg01depUZc6cWbVr106MGAEAAADgrWcwGJLsYqvinfD+U44cOfTll1+qf//+SpkypVavXp1QcQEAAAAA8EbifVuiF7Zv367vvvtOy5Ytk4ODgxo1aqTWrVsnZGwAAAAAALy2eCW8165d09y5czV37lydO3dOJUuW1KRJk9SoUSO5u7snVowArMwoo7VDsApuDm9ffm5b3NohWEX7JUesHYJVzGhYwNohALBDb1Rei9cS54S3WrVq2rhxo3x8fNS8eXN98sknCggISMzYAAAAAAB4bXFOeJ2dnbV06VLVrFlTjo6OiRkTAAAAAABvLM4J7y+//JKYcQAAAACATbPl2ZCTKsrIAQAAAAA2iYQXAAAAAGCTXvu2RAAAAACAuHOgotniGOEFAAAAANikOI3wxmfCqtq1a792MAAAAAAAJJQ4Jbx169aN08EMBoOioqLeJB4AAAAAsEmUNFtenBLe6OjoxI4DAAAAAIAExTW8AAAAAACb9FqzNIeGhmrbtm26fPmyIiMjzbZ17do1QQIDAAAAAFtiMFDTbGnxTngPHjyo6tWrKywsTKGhofL29tbt27eVPHlypU2bloQXAAAAAJAkxLukuUePHqpVq5bu3bunZMmS6Y8//tClS5dUpEgRjRkzJjFiBAAAAAAg3uKd8B46dEifffaZHBwc5OjoqIiICGXKlEmjR4/Wl19+mRgxAgAAAMBbz8GQdBdbFe+E19nZWQ4Oz3dLmzatLl++LEny9PTUlStXEjY6AAAAAABeU7yv4S1cuLD27dunnDlzqmzZsho4cKBu376t+fPnK1++fIkRIwAAAAAA8RbvEd6RI0cqffr0kqQRI0YoVapU6tChg27duqWZM2cmeIAAAAAAYAsMhqS72Kp4j/AWLVrU9O+0adNq7dq1CRoQAAAAAAAJId4jvAAAAAAAvA3iPcKbNWvW/7xh8l9//fVGAQEAAACALXKw5drhJCreCW/37t3NHj99+lQHDx7U2rVr1bt374SKCwAAAACANxLvhLdbt26xrp86dar279//xgEBAAAAAJKmqKgoDR48WAsWLFBISIj8/PzUsmVL9e/f31QJbDQaNWjQIM2aNUv379/Xe++9p+nTpytnzpym49y9e1ddunTRr7/+KgcHB9WvX18TJ05UihQpEjTeBLuGt1q1alq2bFlCHQ4AAAAAbIpDEl7iatSoUZo+fbqmTJmikydPatSoURo9erQmT55sajN69GhNmjRJM2bM0J49e+Tu7q4qVaroyZMnpjZNmzbV8ePHtWHDBq1atUrbt29X27Zt4xFJ3MR7hPdlli5dKm9v74Q6HAAAAAAgidm1a5fq1KmjGjVqSJL8/f31ww8/aO/evZKej+5OmDBB/fv3V506dSRJ33//vdKlS6eVK1eqcePGOnnypNauXat9+/aZ7gI0efJkVa9eXWPGjJGfn1+CxRvvhLdw4cJmk1YZjUaFhITo1q1bmjZtWoIFBgAAAACwjIiICEVERJitc3V1laurq9m6kiVLaubMmTpz5oxy5cqlw4cP6/fff9e4ceMkSRcuXFBISIgqVapk2sfT01PFixfX7t271bhxY+3evVteXl5mt7ytVKmSHBwctGfPHtWrVy/B+hXvhLdOnTpmCa+Dg4PSpEmjcuXKKTAwMMECAwAAAABbkpQnaQ4ODtaQIUPM1g0aNEiDBw82W/fFF1/o4cOHCgwMlKOjo6KiojRixAg1bdpUkhQSEiJJSpcundl+6dKlM20LCQlR2rRpzbY7OTnJ29vb1CahxDvh/XeHAQAAAABvt759+6pnz55m6/49uitJixcv1sKFC7Vo0SLlzZtXhw4dUvfu3eXn56cWLVpYKtw4i3fC6+joqOvXr8fIyO/cuaO0adMqKioqwYIDAAAAACS+2MqXY9O7d2998cUXaty4sSQpf/78unTpkoKDg9WiRQv5+vpKkm7cuKH06dOb9rtx44YKFSokSfL19dXNmzfNjvvs2TPdvXvXtH9CifcszUajMdb1ERERcnFxeeOAAAAAAMAWORgMSXaJq7CwMDk4mKeRjo6Oio6OliRlzZpVvr6+2rRpk2n7w4cPtWfPHgUFBUmSgoKCdP/+fR04cMDUZvPmzYqOjlbx4sXf5CWOIc4jvJMmTZIkGQwGffvtt2b3R4qKitL27du5hhcAAAAAbFitWrU0YsQIZc6cWXnz5tXBgwc1btw4ffLJJ5Ke54vdu3fX8OHDlTNnTmXNmlUDBgyQn5+f6tatK0nKnTu3qlatqjZt2mjGjBl6+vSpOnfurMaNGyfoDM1SPBLe8ePHS3o+wjtjxgw5Ojqatrm4uMjf318zZsxI0OAAAAAAAEnH5MmTNWDAAHXs2FE3b96Un5+f2rVrp4EDB5rafP755woNDVXbtm11//59lSpVSmvXrpWbm5upzcKFC9W5c2dVrFhRDg4Oql+/vmmQNSEZjC+rUX6J8uXLa/ny5UqVKlWCB4M3V65cORUqVEgTJkywdig26ckzyz7f4h8XafFPP+ja1auSpOw5cqpdh44qVbqsReOIjt/XRLwd2L9P38+ZrRMnjuv2rVsaN3GKylf831T2d27f1sTxY7R71049fvRI7xQpqs+/7K8sWfwTNa74lPcklBs3bmjCuK+1c8cOPXkSrkyZs2jo8JHKmy+/xWOxpAP792nud7N18sQx3bp1S+MnTVWFf7wH7MHsWTM1acJYNf24uT7v289iz9t+yZEEPZ6bk4M+KJBO72T0lIerky7dC9eiP6/pwt1wORqkDwr4qoBfSqVN4aqwyCiduPFYSw5f1/3w/33Bdivtr8yp3OTh5qTQyCidCHmsxf9q86ZmNCyQYMeKi9mzvtGmDet14cJfcnVzU6FChdW9Zy/5Z81m0TgszV77PX3qZM2YNsVsnX/WrPp51VorRWRZPy5aqHlzZuv27VvKFRCoL74coPwFLPeZc4v3LEWWM3DdWWuH8FJDq+S0dgiJIt7X8G7ZsoVkF2+1rVu3ymAw6P79+9YO5ZXSpvNVtx699MOS5Vq0eJneLV5C3Tp30rlzSffL8nWEh4crV0Cg+vYbGGOb0WhUj26d9Pfff2vCpGn6YclypffzU/tPP1F4WJgVok08Dx88UMuPP5KTk7Omzpil5b+s1me9+8jDw9PaoSW68PAwBQQEqG//QdYOxSqOHT2ipUt+VK5cAdYO5Y21ejej8vqm1MzdV9R/zRkdD3ms3uWzySuZk1ycHJTFO5l+OXZTg9ae1ZTfL8k3pau6lfY3O8bJm481bedlfbHqtKb8fklpUrio03tZrNOhBLJ/3159+FFTzf9hsb6ZNUfPnj1T+zatFWZj32P/Zq/9lp7/SL1p6++mZe78RdYOySLWrvlNY0YHq13HTvpxyQoFBASqQ7vWunPnjrVDg52K9+8f9evX17vvvqs+ffqYrR89erT27dunJUuWJFhwSHyRkZFMNpaElStfwexxl249tPjHH3Tk8CHlyGE7v8KVKl1GpUqXiXXb5UsXdfTwYS1d+auy/3+fvxwwWJXKldKa31brgwYNLRlqovpu9iyl8/XVsBHBpnUZM2ayYkSWU6p0WYtXLiQVYaGh6tuntwYNGa5Z30y3djhvxNnRoKKZPDVpx0WduRUqSVp57IYKZUipCjlSa/nRGxqz5YLZPgsOXNWgKjnlndxZd8OeSpLWn75t2n4n7KlWn7ylrqWzyNEgRSVuwUmimT5zttnjoSO+UvnSQTp54riKFC1mpagSn732W5KcHB3lkyaNtcOwuPnz5uiDBo1Ut159SVL/QUO0fftWrVy+TK3btLVydLBH8R7h3b59u6pXrx5jfbVq1bR9+/YECQpv5tmzZ+rcubM8PT3l4+OjAQMGmGbX9vf317Bhw9S8eXN5eHiobdvnXzzLli1T3rx55erqKn9/f40dO9Z0vClTpihfvnymxytXrpTBYDC7ZrtSpUrq37+/pOf3ai5UqJDmz58vf39/eXp6qnHjxnr06FGc4l+6dKny58+vZMmSKXXq1KpUqZJCQ5//x6lly5aqW7euhgwZojRp0sjDw0Pt27dXZGSkaf+IiAh17dpVadOmlZubm0qVKqV9+/ZJki5evKjy5ctLklKlSiWDwaCWLVvG9yW2iqioKK35bbXCw8NUsGBha4djMS/OrYvL/6bJd3BwkIuziw4dPPCy3d5K27ZsVt68+dSrR1eVKx2kRvXratmSxdYOC4ls5PChKlOmrEoElbR2KG/M0WCQo4NBkf/KSiOjjMqVxj3WfZI5OyraaFRYZOy3NXR3cVRQFi+dux321ia7sXn8/38TPTxtv4Ljn+yp35cuX1KlcqVUvUpF9f38M12/ds3aISW6p5GROnniuNn3mYODg0qUKKkj/8fefcc1da5xAP8FBGUEEBRRZIgMFUFRVHCiWHHPOql7K+5VrbhXVRxQt1aoC22dtWqrKLhQEbegdeMAwYEIssn9g2tqCipakgMnv28/udecc3LyvOeEJE/e533P1csCRlZ0aEiK7k2svjjhTU5OzrdHUEtLC0lJSYUSFP03QUFBKFGiBC5cuICVK1di2bJl2Lhxo3z90qVLUaNGDVy+fBm+vr6IjIxEt27d0KNHD1y/fh2zZs2Cr68vAgMDAQBNmjRBVFQUEhISAABhYWEoU6YMQkNDAQCZmZkIDw+Hh4eH/Dnu3buHffv24eDBgzh48CDCwsKwaNGiz8YeGxuLnj17YsCAAYiOjkZoaCg6d+6scDmskJAQ+bodO3Zgz549mD17tnz95MmTsXv3bgQFBeHSpUuwtbWFl5cXXr16BQsLC+zevRsAcPv2bcTGxmLlypVfe6hV4s7ft+Hm6oI6Lk6YP2cmlvuvQmVbW6HDUhnrSjYwK18BASuXIenNG2RmZmDzpg14/jwOL/7/mhSLJ08eY9fOHbC0ssaa9ZvQrXtP/LhwHg7s2yt0aKQkhw/9gejoKIweN0HoUApFWlYO7iSkoIOjKYx0SkAiAdytjWBrogtDHa0822tpSNCtphnOP0pEWlaOwrquNcywrmt1rOriCBM9Law8+VBFrVC+nJwcLP5xAWq61IKdnb3Q4aiMOrXbydkZc+cvxOp1G/GD7yw8ffoU/ft4IyUlWejQlOp14mtkZ2fDxMREYbmJiQlevHjxkUcRKdcXlzQ7OTlh586dCrNwAUBwcDCqVatWaIHR17OwsMDy5cshkUjg4OCA69evY/ny5Rg8eDAAoFmzZpgw4Z8vV97e3vD09ISvry8AwN7eHlFRUViyZAn69euH6tWrw9jYGGFhYfj2228RGhqKCRMmyBPFCxcuIDMzE/Xr//NrXk5ODgIDAyGVSgEAvXv3RkhICObPn//J2GNjY5GVlYXOnTvDyip3vJaTk+JkPdra2vj555+hq6sLR0dHzJkzB5MmTcLcuXORmpqKNWvWIDAwEK1atQIAbNiwAUePHsWmTZswadIkGBsbAwBMTU1hZGT00VjS09ORnp6usEymWbALchcma+tK2LV7H5KT3+LoX3/Cd9oUbArcqjZJr5aWFvxW+GP2jOlo0qAeNDU1Uc/NHQ0aNf7odcGLq5wcGRyrV8foseMBAFWrVsPdu3fw665gtO/YSeDoqLDFxcZi8aL5WLfhZ5W/ryjT+nOPMbBeRazoWA3ZOTI8ep2KczGJsC6to7CdpgQY0cAKgARBEU/z7OdwdAJO3n+FMnra6FC9HIa4WWC5SJLeBfNm496dO2ozpvM9dWr3h0M07B2qwMm5Blp90xR/HjmMzl3EMxSHqDj44oTX19cXnTt3xr1799CsWe74wpCQEOzYsYPjd4sINzc3SD6YXdbd3R1+fn7Izs4tF3N1dVXYPjo6Gh06dFBY1qBBA6xYsQLZ2dnQ1NRE48aNERoaiubNmyMqKgojRozA4sWLcevWLYSFhaFOnTrQ1dWVP97a2lqe7AJA+fLlER8f/9nYa9SoAU9PTzg5OcHLywstWrTAt99+qzBRWo0aNRSey93dHcnJyXj8+DHevHmDzMxMNGjQQL5eS0sLdevWRXR09Gef/0MLFy5U6DkGgB98Z2L6jFlftJ//SktbG5b/T/6rOVbHzRvXsW3rL5gxa45K4xBSNcfq2Ll7H96+fYvMzEwYGxujd89uqOZY/fMPLkbKli0Lm8qVFZbZ2Njg2NE/BYqIlCkq6iZevXyJHl07y5dlZ2cj8mIEgndsQ8Tl6wqXACwuEpIzsCjkPrQ1JdDR0sSbtCwMr2+JhOR/hp68T3ZN9LTw4/H7eXp3ASA5IxvJGdl4/jYDz96kY3nHqqhsoot7L4v3ZEcL5s3BybBQ/By0FeXMzIQOR2XUtd3vGRgYwMrKGo9jYoQORalKG5WGpqZmngmqXr58iTJlyggUVdEixBUg1N0XlzS3a9cO+/btw927dzFixAhMmDABT548wbFjx+QXEqaiTU8v/3FUn+Lh4YHQ0FCcOnUKLi4uMDAwkCfBYWFhaNJEcbIZLS3F0jWJRIKcnLxfaP5NU1MTR48exeHDh1GtWjUEBATAwcEBDx48+OxjC9vUqVPx5s0bhdukKVNVHse/5eTkIPODMcvqRCqVwtjYGI8ePUTUzRt5JvUq7mq61MLDf73WHz18iAoVzAWKiJSpnpsbftv3O3bu3ie/OTpWR+u27bBz975imex+KCNbhjdpWdDV0oRTeSkuPc0d9vQ+2S0nLYklJ+4j5SNjdz/0/vuhlmbx/aIok8mwYN4cHA85ig0/B6nNhHTq2u5/e5eSgsePH4t+EistbW1UreaI8+fC5ctycnJw/nw4nNVo/hEqWr7qKlVt2rRBmzZt8iy/ceOGwuRGJIzz588r3D937hzs7Ow++uWpatWqOHPmjMKyM2fOwN7eXv6YJk2aYOzYsfj111/lY3U9PDxw7NgxnDlzRqFE+r+SSCRo0KABGjRogBkzZsDKygp79+7F+PG5ZZ5Xr15FamoqdHR05O3T19eHhYUFypQpA21tbZw5c0ZeEp2ZmYmIiAiMHTsWAORj0N/3eH9MyZJ5y5dVfR3elcv90LBRY5iVL493KSk49MdBXIy4kGfWy+Lu3bsUhV+9nz59gtu3omFgaIjy5Svg6J9HULp0aZiVr4A7d/7GkkXz4dHME+4NGgoYdeH7rk9f9P2uJzauX4sWXq1yL1Xz2y616M1/l5KCmA9fA0+e4FZ0NAwNDVG+QgUBI1MePT39POMYdXR1YWRoVKzHN1Y304dEAsQmpaOctCS61yyP2KQ0nL7/CpoSYGRDK1iV1sGKkw+hIZHA8P8XzEzOyEZ2jgw2JjqoZKyLOwkpSMnIhqlUG52dzPD8bTruvii+vbsL5s7G4UMHsSJgNfR09eRzEOhLpShVqpTA0SmPurbbb8mPaOLRFOUrVEBCfDzWrAqApqYGWrVuK3RoSte7b3/4TpsCR8fqqO7kjK1bgpCamoqOnTp//sFESvCfL8v89u1b7NixAxs3bkRkZORnkwhSvpiYGIwfPx5Dhw7FpUuXEBAQoDDr8r9NmDABderUwdy5c9G9e3eEh4fjp59+wurVq+XbODs7o3Tp0ti+fTsOHjwIIDfhnThxojxBLQznz59HSEgIWrRoAVNTU5w/fx4JCQmoWrWqfJuMjAwMHDgQ06dPx8OHDzFz5kz4+PhAQ0MDenp6GD58uHysrqWlJRYvXox3795h4MCBAAArKytIJBIcPHgQrVu3ho6ODvT19Qsl/sL26tVLTJ86BQkJ8dCXSmFv74A16zfBvX7hHO+iIurGDQwe0Fd+329x7gRn7Tp0xJz5i5CQEA+/xYtyS6LKlkXb9h0wZNhwocJVmupOzli28if4r1iGdWtWwbxiRUyeMg1t2rYXOjSlu3nzBgb17yO/v3Rx7qWZ2nfohLkLPj/hHRUdOlqa6FrDDKV1tZCSkY2Lj99g97U4ZMuAMnpaqFUxd3beua0Uk/pFIfdwKz4FGVky1LYwRCencihZQgOJqVm4HvsWB24+R1ZO8R23v2vnDgDAwH69FZbPmbcQHUScCKhru58/j8P3k8YjMTERpY2N4VKrNrZs3yWfR0TMWrZqjdevXmH1T/548SIBDlWqYvW6jTBhSTMJRCL7yllfTp48iY0bN2LPnj2oUKECOnfujC5duqBOHXFfU62o8/DwgKOjI3JycrB9+3Zoampi+PDhmDdvHiQSCaytrTF27Fh5b+d7u3fvxowZM3Dnzh2UL18eo0aNwsSJExW26dixI/744w+8fv0a+vr6yMnJQZkyZeDg4IDw8H9KV2bNmoV9+/bhypUr8mUrVqzAihUr8PDhw0/GHx0djXHjxuHSpUtISkqClZUVRo0aBR8fHwC5lyVKTExEjRo1sGrVKqSnp6Nnz54ICAiQ98ampaVh8uTJ2LFjB96+fQtXV1csX75c4bU5d+5crF69Gs+fP0efPn3kM1J/jqp7eIuKHJFNDlVQHGdD6mDYr9eEDkEQa7s6Cx0CESlJqf/cpac8c4/dFTqEj/JtLs4JUb8o4Y2Li0NgYCA2bdqEpKQkdOvWDWvXrsXVq1c5QzOpxPuEd9++fYI8PxNe9cKEl9QBE14iEhsmvF9HrAlvgSetateuHRwcHHDt2jWsWLECz549Q0BAgDJjIyIiIiIiIvpqBf794/Dhwxg9ejSGDx8OOzs7ZcZEIhYTE/PJaoCoqChYWlqqMCIiIiIiItXQYPGYyhU44T19+jQ2bdqE2rVro2rVqujduzd69OihzNhIhCpUqKAwtje/9Z9S0LG2REREREREBU543dzc4ObmhhUrVmDnzp34+eefMX78eOTk5ODo0aOwsLCAVCpVZqwkAiVKlICtrTjHBxARERERUdFS4DG87+np6WHAgAE4ffo0rl+/jgkTJmDRokUwNTVF+/biv3QGERERERHR15AU4f/E6osT3g85ODhg8eLFePLkCXbs2FFYMRERERERERH9Z/8p4X1PU1MTHTt2xIEDBwpjd0RERERERET/WRG+ShUREREREZF4cJZm1SuUHl4iIiIiIiKiooYJLxEREREREYkSS5qJiIiIiIhUgCXNqsceXiIiIiIiIhIlJrxEREREREQkSixpJiIiIiIiUgGJhDXNqsYeXiIiIiIiIhIlJrxEREREREQkSixpJiIiIiIiUgHO0qx67OElIiIiIiIiUWLCS0RERERERKLEkmYiIiIiIiIV4CTNqsceXiIiIiIiIhIlJrxEREREREQkSixpJiIiIiIiUgEN1jSrHHt4iYiIiIiISJSY8BIREREREZEosaSZiIiIiIhIBTRY0axy7OElIiIiIiIiUWLCS0RERERERKLEkmYiIiIiIiIV4CTNqsceXiIiIiIiIhIlJrxEREREREQkSixpJiIiIiIiUgENsKZZ1ZjwEtHnyYQOQCD8TCI1sOZbZ6FDEMTBm7FChyCIto7lhQ6BiEilWNJMREREREREosQeXiIiIiIiIhXgLM2qxx5eIiIiIiIiEiUmvERERERERCRKLGkmIiIiIiJSAQ2WNKsce3iJiIiIiIhIlJjwEhERERERkSixpJmIiIiIiEgFNDhNs8qxh5eIiIiIiIhEiQkvERERERERiRJLmomIiIiIiFSAFc2qxx5eIiIiIiIiEiUmvERERERERCRKLGkmIiIiIiJSAc7SrHrs4SUiIiIiIiJRYsJLREREREREosSSZiIiIiIiIhVgRbPqsYeXiIiIiIiIRIkJLxEREREREYkSS5qJiIiIiIhUgL2NqsdjTkRERERERKLEhJeIiIiIiIhEiSXNREREREREKiDhNM0qxx5eIiIiIiIiEiUmvERERERERCRKLGkmIiIiIiJSARY0qx57eImIiIiIiEiUmPASERERERGRKLGkmYiIiIiISAU0OEuzyrGHl4iIiIiIiESJCS8RERERERGJEkuaiYiIiIiIVIAFzarHhLcI8fDwQM2aNbFixQqhQ1GJWbNmYd++fbhy5QoAoF+/fkhMTMS+ffs++hh1O0a7grdj184dePb0KQCgsq0dhg4fgYaNmggcWeHatHEdjh87iocP7qNkqVKoUcMFY8ZNgHUlG/k282bPwPlz4UhIiIeOru7/t5mISjY2n9hz8bJpwzqEHP0LD/5/HGrWdMHY8RMVjoNYRV6MQODPmxAddQMJCQlY7r8KzTybCx2WSm3asB7+K/zg/V0fTJ76g9DhKFVKSjJWBazEiZBjePXqJRyqVMPk76ehupOz0KF9lZBdm3H8tyCFZWUqWGDcii14HR+LpT49831cj3Gz4OTuAQD4oZtHnvXdx/jCuYFnYYerUuryOfYxwdu3IWjzJrx4kQB7hyr4fpovnJyL5+v8S6hru6loYsJLgpk4cSJGjRoldBhFmmk5M4wZNxGWVlaQyWT4ff8+jPEZiZ2798LW1k7o8ArNpYsR6N6jFxyrOyErOxs/rVyO4UMHYc++g9DR1QUAVK3miFZt2qF8+fJ48+YN1q75CSOGDsTBI8egqakpcAsKx8WIC+je0xuOTk7IzspGwMplGDZ4IPYc+AO6/z8OYpWa+g4ODg7o2LkLxo/xEToclbtx/Rp++zUY9vYOQoeiErNnTMfdu3cwb+FilDU1xR+/H8Cwwf2xe/8hlCtXTujwvoqphTUG+PrJ72to5L4vGZYxxffrdytsG3HsIE4dCIa9S12F5V1GTIFdzX+WldLVV2LEqqEun2P5OXL4EJYuXojpM2fDyakGtm0JwvChA7H/4BGYmJgIHZ7SqGu7qehiwkuC0dfXh75+8f8wVyaPps0U7o8aMw67gnfg2tUrovqisGrtRoX7s+cthGeT+oiKuonarnUAAF26dpevr2BeESN9xqL7tx3w7NlTWFhYqjReZVmzfpPC/TnzF6FpI3dEf3AcxKphoyZq0+Pzb+9SUjB1yiTMnD0PG9atETocpUtLS0PIsb+w3H+1/HU9fOQonAw7gV93bofP6HECR/h1NDQ0ITXK+2U+v+VRF07Byb0pSpZS/CGrlK5+vvsoztTlcyw/W4I2o/O33dCxUxcAwPSZs3HyZCj27dmNgYOHCByd8qhruwuKkzSrHietEkhKSgr69OkDfX19lC9fHn5+fgrrX79+jT59+qB06dLQ1dVFq1atcOfOHQCATCZD2bJl8dtvv8m3r1mzJsqXLy+/f/r0aZQsWRLv3r0DAEgkEmzcuBGdOnWCrq4u7OzscODAAfn+bG1tsXTpUoUYrly5AolEgrt37362PRKJBOvWrUPbtm2hq6uLqlWrIjw8HHfv3oWHhwf09PRQv3593Lt3T/6YWbNmoWbNml99jAAgPT0dEydOhLm5OfT09FCvXj2EhoZ+9XEqyrKzs3H40B9ITX2HGjVchA5HqZKT3wIADA0N812f+u4dDuzbA3PzijAzM1NlaCqV/Db3OBh85DiQOCyYNweNGzeBm3t9oUNRiezsLGRnZ6NkyZIKy0uWLInLly4JFNV/9zLuKRYN7YKlPj2xy38eEl88z3e7p/dvI/bhXdRu1jrPugObVmL+wPZYPXUYLh4/BJlMpuywVUqdPscyMzIQHXVT4e9aQ0MDbm71ce3qZQEjUy51bbc6evr0Kb777juYmJhAR0cHTk5OuHjxony9TCbDjBkzUL58eejo6KB58+byXOa9V69ewdvbGwYGBjAyMsLAgQORnJxc6LEy4RXIpEmTEBYWhv379+Ovv/5CaGgoLn3wQd+vXz9cvHgRBw4cQHh4OGQyGVq3bo3MzExIJBI0btxYnti9fv0a0dHRSE1Nxa1btwAAYWFhqFOnjkIZ5OzZs9GtWzdcu3YNrVu3hre3N169egWJRIIBAwZg8+bNCjFu3rwZjRs3hq2tbYHaNHfuXPTp0wdXrlxBlSpV0KtXLwwdOhRTp07FxYsXIZPJ4ONT8FLFzx0jAPDx8UF4eDiCg4Nx7do1dO3aFS1btsSdO3e++jgVNXf+vg03VxfUcXHC/Dkzsdx/FSoX8JwURzk5OVj64wLUdKkFWzt7hXW7grejft1aqF+vFs6cPok1G36Glpa2QJEqV05ODhb//zjY/es4kHgcPvQHoqOjMHrcBKFDURk9PX0413DB+rWrER//HNnZ2fjj9/24dvUKXryIFzq8r1LRrhq6jPge/aYtRodB4/A6PhYbZoxGemreH1MvHj+EsuZWsHKorrDcs9sA9Bw3E/2n+6F6vcb4fdNyhB/eo6omKJW6fY4BwOvE18jOzs5TwmtiYoIXL14IFJXyqWu71c3r16/RoEEDaGlp4fDhw4iKioKfnx9Kly4t32bx4sXw9/fH2rVrcf78eejp6cHLywtpaWnybby9vXHz5k0cPXoUBw8exMmTJzFkSOFXAbCkWQDJycnYtGkTtm7dCk/P3MkogoKCULFiRQDAnTt3cODAAZw5cwb16+f+QrZt2zZYWFhg37596Nq1Kzw8PLBu3ToAwMmTJ+Hi4gIzMzOEhoaiSpUqCA0NRZMmiuWB/fr1Q8+euRNnLFiwAP7+/rhw4QJatmyJfv36YcaMGbhw4QLq1q2LzMxMbN++PU+v76f0798f3bp1AwBMmTIF7u7u8PX1hZeXFwBgzJgx6N+/f6EcIwCIiYnB5s2bERMTgwoVKgDIHRd85MgRbN68GQsWLPiq4/Reeno60tPTFZbJNEvm6ZVQNmvrSti1ex+Sk9/i6F9/wnfaFGwK3CraLwsL58/B3bt3sDloe551rdq0Qz33+niRkIBfgn7GlAljsXnLDpWfE1VYMG827t25g8AteY8DiUNcbCwWL5qPdRt+FuVr+FPmL1yMWTOmoUWzxtDU1ESVqtXQslUbREfdFDq0r+LgUk/+bzOryqhoVxVLRvTA9fATcG3WRr4uMyMd104fQ9MuffLso9m3/yyrUMkOGelpOP17MOq37qLc4FVA3T7HiD5FUoRrmvP77luyZN7vvj/++CMsLCwUOssqVaok/7dMJsOKFSswffp0dOjQAQDwyy+/oFy5cti3bx969OiB6OhoHDlyBBEREXB1dQUABAQEoHXr1li6dKn8u31hYA+vAO7du4eMjAzUq/fPB6SxsTEcHHInK4mOjkaJEiUU1puYmMDBwQHR0dEAgCZNmiAqKgoJCQkICwuDh4cHPDw8EBoaiszMTJw9exYeHh4Kz+v8wex4enp6MDAwQHx87q/pFSpUQJs2bfDzzz8DAH7//Xekp6eja9euBW7Xh/t/P+mIk5OTwrK0tDQkJSV9dl+fO0YAcP36dWRnZ8Pe3l4+HlhfXx9hYWHy0umvOU7vLVy4EIaGhgq3JT8uLPDxKCxa2tqwtLJCNcfqGDNuAuwdqmDb1l9UHocqLJo/B6fCQrFh0y8ol0+pslQqhZWVNWq71sHSZSvx4OEDHA85KkCkyrVg3hycDAvFhs1B+R4HEoeoqJt49fIlenTtjFrO1VDLuRouRlzA9m1bUMu5GrKzs4UOUWksLC2xKXArwi9cxpFjodgW/BuysrJgXtFC6NAKhY6eFGUqVMTLuKcKy2+cC0Nmejpcmnh9dh8V7arizcsEZGVmKCtMlVGnz7H3ShuVhqamJl6+fKmw/OXLlyhTpoxAUSmfurZbLPL77rtwYd7vvgcOHICrqyu6du0KU1NTuLi4YMOGDfL1Dx48QFxcHJo3/+dqC4aGhqhXrx7Cw8MBAOHh4TAyMpInuwDQvHlzaGho4Pz584XaLia8xZSTkxOMjY0RFhamkMiFhYUhIiICmZmZ8t7h97S0tBTuSyQS5OTkyO8PGjQIwcHBSE1NxebNm9G9e/cvKvX9cP/vf73Kb9mHz/lfJCcnQ1NTE5GRkbhy5Yr8Fh0djZUrVwL4uuP03tSpU/HmzRuF26QpUwsl9v8iJycHmRnF/wvQh2QyGRbNn4Pjx49h3aZAmH/Qk//xx+T+j5iOhUwmw4J5c3A85Cg2/ByEiiL58k/5q+fmht/2/Y6du/fJb46O1dG6bTvs3L1PNLOPf4qOri7KljVF0ps3OHv2NDyaFe9L8LyXnvYOr+Ke5ZmAKvL4H6jiWh96Bkaf3Ufsw7vQ0ZOihAiHbYjxc+zftLS1UbWaI86fC5cvy8nJwfnz4XAW8fhldW23WOT33Xfq1Lzffe/fv481a9bAzs4Of/75J4YPH47Ro0cjKCj38mxxcXEAkGfW/XLlysnXxcXFwdTUVGF9iRIlYGxsLN+msLCkWQCVK1eGlpYWzp8/D0vL3NllX79+jb///htNmjRB1apVkZWVhfPnz8uTsZcvX+L27duoVq0agNzksVGjRti/fz9u3ryJhg0bQldXF+np6Vi3bh1cXV2hp6f3RXG1bt0aenp6WLNmDY4cOYKTJ08WbsO/wOeOEQC4uLggOzsb8fHxaNSoUb77+S/HKb8SjrSsQmxkAaxc7oeGjRrDrHx5vEtJwaE/DuJixIU8s/kWdwvnz8HhQwexfOUq6Onp4cWLBACAvr4UpUqVwpPHj/Hnn4fg7t4ApY2N8fx5HDZv2oCSJUuKambfBXNn4/Chg1gRsBp6unp4kfD/4yDNPQ5i9i4lBTExMfL7T588wa3oaBgaGqJ8IZY1FSV6evp5xmfr6OrCyNBI9OO2z545BZlMBmvrSoiJicFyv8WoVMkGHTp2Fjq0r3L4l9Wo4lofRmXKIen1S4Ts2gyJhgZqNPwngX8Z9wQPo6+hz9RFeR4fffEskt+8gqVdNZTQ1sbda5EI27sNDdt1z7NtcaMun2P56d23P3ynTYGjY3VUd3LG1i1BSE1NRcdOxfN1XlDq2u6CKsq9jfl9981PTk4OXF1dsWDBAgC538lv3LiBtWvXom/fvsoO84sx4RWAvr4+Bg4ciEmTJsHExASmpqb44YcfoKGR+ydgZ2eHDh06YPDgwVi3bh2kUim+//57mJuby+vgAcDDwwMTJkyAq6ur/PI+jRs3xrZt2zBp0qQvjktTUxP9+vXD1KlTYWdnB3d398Jp8Ff43DECAHt7e3h7e6NPnz7w8/ODi4sLEhISEBISAmdnZ7RpkztuqrCPkyq9evUS06dOQUJCPPSlUtjbO2DN+k1wr99A6NAK1a87dwAABg9QHNc2e+4CtO/YGdoltXE5MhLbt/yCpKQkmJiYoFZtVwRu2QFjEV3Tb9f/j8PAfr0Vls+ZtxAdRP5F4ebNGxjU/5/zv3RxbglV+w6dMHdB3gSBire3b98iYMUyPH8eB0NDI3h+0wI+o8flqUQqLt68SsDOlXPx7m0S9AwMYVXFCcPmr1boyY08fhgGxmVh65z3EmOaJTRx/s99OBS0CpDJYGxmjtZ9RsDVs60KW6Ec6vI5lp+WrVrj9atXWP2TP168SIBDlapYvW4jTERe2quu7VYn5cuXl3fCvVe1alXs3p17zfH3V9B4/vy5wtVRnj9/Lr9Ci5mZmXxo5XtZWVl49epVoV+BgwmvQJYsWYLk5GS0a9cOUqkUEyZMwJs3b+TrN2/ejDFjxqBt27bIyMhA48aNcejQIYUvA02aNEF2drbCGFQPDw/s37//o+NSP2fgwIFYsGBBgSeXUqbPHSMg9zjNmzcPEyZMwNOnT1GmTBm4ubmhbdt/viQo4zipyuy5C4QOQSUuX7/1yfWmpuXw05r1KopGOFdv3hY6BMHUqVtPrdv/3qbALUKHoBJeLVvDq2Xey/IUVz3GzvzsNi16DUaLXoPzXWdfsx7sa9bLd11xpy6fYx/T0/s79PT+TugwVE5d260uGjRogNu3FT+z//77b1hZWQHIncDKzMwMISEh8gQ3KSkJ58+fx/DhwwEA7u7uSExMRGRkJGrXrg0AOH78OHJychTm8CkMEpnYLvJG/8mpU6fg6emJx48f56m7J9WXNBcVOTnq+TahoVF0Z1IkKizq+i3gj6hYoUMQRFvH8p/fiKiYK1WEu/R2XXkmdAgf1a1mwYYQRUREoH79+vJLnl64cAGDBw/G+vXr4e3tDSB3JudFixYhKCgIlSpVgq+vL65du4aoqCj5MK1WrVrh+fPnWLt2LTIzM9G/f3+4urpi+/bCvUJFEX45kCqlp6cjISEBs2bNQteuXZnsEhERERFRHnXq1MHevXsxdepUzJkzB5UqVcKKFSvkyS4ATJ48GSkpKRgyZAgSExPRsGFDHDlyRGFOkm3btsHHxweenp7Q0NBAly5d4O/vX+jxsoeXAACBgYEYOHAgatasiQMHDsDc3Fy+btu2bRg6dGi+j7OyssLNm8Xzuolfgz286oU9vKQO1PVbAHt4icSLPbxfp6A9vMUNE176rLdv3+L58+f5rtPS0pLX66sDJrzqhQkvqQN1/RbAhJdIvIpywvtrEU54u4o04S3CLwcqKqRSKaRSqdBhEBERERERfZGifCkoIiIiIiIioq/GHl4iIiIiIiIVkEg4XErV2MNLREREREREosSEl4iIiIiIiESJJc1EREREREQqwN5G1eMxJyIiIiIiIlFiwktERERERESixJJmIiIiIiIiFeAszarHHl4iIiIiIiISJSa8REREREREJEosaSYiIiIiIlIBFjSrHnt4iYiIiIiISJSY8BIREREREZEosaSZiIiIiIhIBThJs+qxh5eIiIiIiIhEiQkvERERERERiRJLmomIiIiIiFRAg/M0qxx7eImIiIiIiEiUmPASERERERGRKLGkmYiIiIiISAU4S7PqsYeXiIiIiIiIRIkJLxEREREREYkSS5qJiIiIiIhUQMJZmlWOPbxEREREREQkSkx4iYiIiIiISJRY0kxERERERKQCnKVZ9djDS0RERERERKLEhJeIiIiIiIhEiSXNRPRZEtbfEImWuv55t3UsL3QIgoi4/1roEARRx6a00CEQAQA0OEuzyrGHl4iIiIiIiESJCS8RERERERGJEkuaiYiIiIiIVEBdh5EIiT28REREREREJEpMeImIiIiIiEiUWNJMRERERESkAixpVj328BIREREREZEoMeElIiIiIiIiUWJJMxERERERkQpIwJpmVWMPLxEREREREYkSE14iIiIiIiISJZY0ExERERERqYAGK5pVjj28REREREREJEpMeImIiIiIiEiUWNJMRERERESkApylWfXYw0tERERERESixISXiIiIiIiIRIklzURERERERCogYUWzyrGHl4iIiIiIiESJCS8RERERERGJEkuaiYiIiIiIVICzNKsee3iJiIiIiIhIlJjwEhERERERkSixpJmIiIiIiEgFNFjRrHLs4SUiIiIiIiJRYsJLREREREREosSSZiIiIiIiIhXgLM2qxx5eIiIiIiIiEiUmvERERERERCRKLGkmIiIiIiJSAQkrmlWOPbxEREREREQkSkx4iYiIiIiISJREnfB6eHhg7NixQodRYNbW1lixYkWBtw8MDISRkVGBtp01axZq1qz5VXF9yvr162FhYQENDY3Pxl6QGPr164eOHTvK73/NOZRIJNi3b98XPaYoi7wYgVEjhqG5R0PUcHTA8ZBjQoekdNnZ2VgVsAKtvZqhXm1ntG3ZHOvXroJMJhM6NKVasyoANRwdFG4d2rYUOiyVCd6+Da2+aYY6Lk7w7tEV169dEzokpdq0YR16desC9zou8GjkjrGjRuDhg/tCh6V0u4K349tO7VC/bi3Ur1sLvXt1x+lTYUKHpXTq8Pd96NdfMKidG4I3LJcvWzx1OAa1c1O4bVn1o8Lj/r1+UDs3XDh5VNXhK4W6va+9p67tLghJEb6JFcfw/gf9+vVDYmJioSVXERER0NPTK5R9qUJSUhJ8fHywbNkydOnSBYaGhoX+HHv27IGWllah7jM0NBRNmzbF69evC/yDgZBSU9/BwcEBHTt3wfgxPkKHoxKbN23Arzt3YM78H1HZ1hZRN29g5vSp0NeXotd3fYQOT6kq29ph/cbN8vuaJTQFjEZ1jhw+hKWLF2L6zNlwcqqBbVuCMHzoQOw/eAQmJiZCh6cUFyMuoHtPbzg6OSE7KxsBK5dh2OCB2HPgD+jq6godntKYljPDmHETYWllBZlMht/378MYn5HYuXsvbG3thA5PqcT89/3g7yicPLIXFa1t86xr7NUBHbyHyO9rlyyVZ5v+Y6ajem13+X1dPX3lBKpC6vi+Bqhvu6noYsJbBGRkZEBbWxtly5YVOpQvEhMTg8zMTLRp0wbly5dXynMYGxsrZb/FScNGTdCwUROhw1Cpq1cuw6OpJxo38QAAmJtXxJFDf+DGdfH/QlxCUxNlitl7QWHYErQZnb/tho6dugAAps+cjZMnQ7Fvz24MHDzkM48untas36Rwf878RWjayB3RUTdR27WOQFEpn0fTZgr3R40Zh13BO3Dt6hXRJ7xi/ftOS32HjX4z0WfUVBzcuTnPeu2SpWBY+tOJjq6e9LPbFDfq+L4GqG+7qegSdUkzAGRlZcHHxweGhoYoU6YMfH195WWRW7ZsgaurK6RSKczMzNCrVy/Ex8crPP7mzZto27YtDAwMIJVK0ahRI9y7dw+zZs1CUFAQ9u/fD4lEAolEgtDQUADA48eP0a1bNxgZGcHY2BgdOnTAw4cP5ft8X7Y7f/58VKhQAQ4ODgDyljQvW7YMTk5O0NPTg4WFBUaMGIHk5ORCOzYbN25E1apVUapUKVSpUgWrV69WWD9lyhTY29tDV1cXNjY28PX1RWZmJoDccmonJycAgI2NDSQSiUIbP2XdunWwsLCArq4uunXrhjdv3nx023+XNMfGxqJNmzbQ0dFBpUqVsH379nxLwV+8eIFOnTpBV1cXdnZ2OHDgAADg4cOHaNq0KQCgdOnSkEgk6NevX4HiJtWpUdMF58+fw6OHDwAAt2/dwuVLkWjQqLHAkSnfo5hHaO7REK29PDF18gTEPnsmdEhKl5mRgeiom3Bzry9fpqGhATe3+rh29bKAkalW8tu3AAADJVTLFFXZ2dk4fOgPpKa+Q40aLkKHo3Ri/fvetnYpnFwboFrNuvmuPxf6J8b28sKMkb2wO2g10tPS8t3H2F5emDd+AE4f/b3YD2FR1/c1dW33l9CQSIrsTaxE38MbFBSEgQMH4sKFC7h48SKGDBkCS0tLDB48GJmZmZg7dy4cHBwQHx+P8ePHo1+/fjh06BAA4OnTp2jcuDE8PDxw/PhxGBgY4MyZM8jKysLEiRMRHR2NpKQkbN6c+2umsbExMjMz4eXlBXd3d5w6dQolSpTAvHnz0LJlS1y7dg3a2toAgJCQEBgYGODo0Y+PUdHQ0IC/vz8qVaqE+/fvY8SIEZg8eXKexPRrbNu2DTNmzMBPP/0EFxcXXL58GYMHD4aenh769u0LAJBKpQgMDESFChVw/fp1DB48GFKpFJMnT0b37t1hYWGB5s2b48KFC7CwsChQD/Xdu3exa9cu/P7770hKSsLAgQMxYsQIbNu2rUBx9+nTBy9evEBoaCi0tLQwfvz4PD9SAMDs2bOxePFiLFmyBAEBAfD29sajR49gYWGB3bt3o0uXLrh9+zYMDAygo6PzZQePlG7AoCFISUlGx3atoKmpiezsbPiMHoc2bdsLHZpSOTk7Y+78hbC2roSEhASsW7MK/ft4Y/f+36EngvK+j3md+BrZ2dl5St1MTEzwQA3GtAJATk4OFv+4ADVdasHOzl7ocJTuzt+30btXD2RkpENXVxfL/Vehsm3eUlgxEevf94WTRxFz7zamL/s53/X1mnjBxNQMRsZl8OThXewOXIW4p48wcto/43g7eA9BFefaKFmyFG5ePo+ta5YgLfUdmrfvrqpmFDp1fV9T13ZT0Sb6hNfCwgLLly+HRCKBg4MDrl+/juXLl2Pw4MEYMGCAfDsbGxv4+/ujTp06SE5Ohr6+PlatWgVDQ0MEBwfLx5Ha2//zRURHRwfp6ekwMzOTL9u6dStycnKwceNGSP7/S8nmzZthZGSE0NBQtGjRAgCgp6eHjRs3yhPg/HzYs2ltbY158+Zh2LBhhZLwzpw5E35+fujcuTMAoFKlSoiKisK6devkCe/06dMVnn/ixIkIDg7G5MmToaOjI38zK1u2rMIx+JS0tDT88ssvMDc3BwAEBASgTZs28PPz++w+bt26hWPHjiEiIgKurq4Acnup7ezylsD169cPPXv2BAAsWLAA/v7+uHDhAlq2bCkvkzY1Nf3kGN709HSkp6crLJNplkTJkiUL1Fb6en8dOYxDB3/Hwh/9UNnWFrdvRWPJjwtR1tQU7Tt0Ejo8pfmwdN3eoQqcnGug1TdN8eeRw+jcpauAkZGyLZg3G/fu3EHglu1Ch6IS1taVsGv3PiQnv8XRv/6E77Qp2BS4VdRJrxj/vl8lPMeODcswfo4/tLTz/2xs0rKj/N8VrW1hWLoM/Kb7ID72CUzLVwQAtOvxz/cxy8oOSE9Lw597txXrhJeIig7RlzS7ubnJE08AcHd3x507d5CdnY3IyEi0a9cOlpaWkEqlaNIk98MoJiYGAHDlyhU0atToiyZNunr1Ku7evQupVAp9fX3o6+vD2NgYaWlpuHfvnnw7JyenTya7AHDs2DF4enrC3NwcUqkUvXv3xsuXL/Hu3bsvOQR5pKSk4N69exg4cKA8Rn19fcybN08hxp07d6JBgwYwMzODvr4+pk+fLj82X8vS0lKe7AK55yMnJwe3b9/+7GNv376NEiVKoFatWvJltra2KF26dJ5tnZ2d5f/W09ODgYFBvj3Bn7Jw4UIYGhoq3Jb8uPCL9kFfZ7nfYvQfNAQtW7eBnb0D2rbviO/69MXPG9cJHZpKGRgYwMrKGo//499dUVfaqDQ0NTXx8uVLheUvX75EmTJlBIpKdRbMm4OTYaHYsDkI5Qr442Fxp6WtDUsrK1RzrI4x4ybA3qEKtm39ReiwVEoMf9+P7t7C28TXmDu2H4Z0aIAhHRrg7xuXEfL7Lgzp0AA52dl5HmPj4AgAiI998tH92jg44vWLeGRmZigtdmVT1/c1dW33lxB6JmbO0qxG0tLS4OXlBS8vL2zbtg1ly5ZFTEwMvLy8kJGR+wb7NaWuycnJqF27dr4luh+W/H5uNuaHDx+ibdu2GD58OObPnw9jY2OcPn0aAwcOREZGxn+awfP9OOANGzagXr16Cus0NXNnjAwPD4e3tzdmz54NLy8veU+3n5/fVz+vKv37RwqJRIKcnJwv2sfUqVMxfvx4hWUyTfbuqkJaWlqesSQaGprIySneY7q+1LuUFDx+/Bht2otvkpsPaWlro2o1R5w/F45mns0B5Jb4nj8fjh49vxM4OuWRyWRYOH8ujoccxabALahY0ULokASTk5ODzIzim9x8DTH8fVet4YrZPyl+39m8Yh7MKlqh1be9oaGZdxbqmPt/A8AnJ6iKuf83dPUNoKX16Y6Bokxd39fUtd1UtIk+4T1//rzC/XPnzsHOzg63bt3Cy5cvsWjRIlhY5H7JuHjxosK2zs7OCAoKQmZmZr69vNra2sj+16+XtWrVws6dO2FqagoDA4OvjjsyMhI5OTnw8/ODhkZuR/yuXbu+en8fKleuHCpUqID79+/D29s7323Onj0LKysr/PDDD/Jljx49+s/PHRMTg2fPnqFChQoAcs+HhoaGfOKuT3FwcEBWVhYuX76M2rVrA8gdE/z69esviuF9z/q/z92/lSyZt3w5LeuLnqpQvEtJUehZf/rkCW5FR8PQ0BDl/38cxaaxR1Ns3LAWZuUr5JY0R0dj6y+b0eH/Mz6Kld+SH9HEoynKV6iAhPh4rFkVAE1NDbRq3Vbo0JSud9/+8J02BY6O1VHdyRlbtwQhNTUVHTt1Fjo0pVkwdzYOHzqIFQGroaerhxcJCQAAfakUpUrlvWyLWKxc7oeGjRrDrHx5vEtJwaE/DuJixIU8s1aLjRj/vkvp6sHcqrLCMu1SpaBvYAhzq8qIj32C82F/wcm1PvSlBnjy8C52blwJe0cXWFTKHY505cIpJL1+hcpVqqOEljairlzAoV+D4NUp/+8nxYk6vq8B6ttuKrpEn/DGxMRg/PjxGDp0KC5duoSAgAD4+fnB0tIS2traCAgIwLBhw3Djxg3MnTtX4bE+Pj4ICAhAjx49MHXqVBgaGuLcuXOoW7cuHBwcYG1tjT///BO3b9+GiYkJDA0N4e3tjSVLlqBDhw6YM2cOKlasiEePHmHPnj2YPHkyKlasWKC4bW1tkZmZiYCAALRr1w5nzpzB2rVrC+24zJ49G6NHj4ahoSFatmyJ9PR0XLx4Ea9fv8b48eNhZ2eHmJgYBAcHo06dOvjjjz+wd+/e//y8pUqVQt++fbF06VIkJSVh9OjR6NatW4HGAFepUgXNmzfHkCFDsGbNGmhpaWHChAnQ0dFRKFv/HCsrK0gkEhw8eBCtW7eGjo4O9PWL7oQhN2/ewKD+/1x7duni3LLq9h06Ye6CRUKFpVTfT5uOVQErsXDebLx69RJly5qiS9fuGDp8pNChKdXz53H4ftJ4JCYmorSxMVxq1caW7bvU4vJcLVu1xutXr7D6J3+8eJEAhypVsXrdRpiIuARu184dAICB/XorLJ8zbyE6iPiL4atXLzF96hQkJMRDXyqFvb0D1qzfBPf6DYQOTanU8e+7RAktRF+JwLEDwUhPS4NxGVPUqu+Btt3/GbNbQrMEThzajZ2bVgIyGUzLV0T3gWPQyKuDgJEXDnV8XwPUt90FJuba4SJK9Alvnz59kJqairp160JTUxNjxozBkCFDIJFIEBgYiGnTpsHf3x+1atXC0qVL0b79P7PAmpiY4Pjx45g0aRKaNGkCTU1N1KxZEw0a5H4oDx48GKGhoXB1dUVycjJOnDgBDw8PnDx5ElOmTEHnzp3x9u1bmJubw9PT84t6fGvUqIFly5bhxx9/xNSpU9G4cWMsXLgQffr0+fyDC2DQoEHQ1dXFkiVLMGnSJOjp6cHJyUk+UVb79u0xbtw4+Pj4ID09HW3atIGvry9mzZr1n57X1tYWnTt3RuvWrfHq1Su0bdv2iybh+uWXXzBw4EA0btwYZmZmWLhwIW7evPlFvSHm5uaYPXs2vv/+e/Tv3x99+vRBYGDgV7RGNerUrYerNz8/xllM9PT0Mfn7HzD5+x8+v7GILF66XOgQBNXT+zv09Fafkjd1+7t+b/bcBUKHIAh1+fuevHCN/N/GZcth8qI1n9gaqF7bHdVruys7LMGo2/vae+rabiqaJLLifqEzUmtPnjyBhYWFfIIvZROipLkoUNd3CRFfko6I1FTE/S8bBiQWdWzyTnBJ4lWqCHfpnbuXKHQIH+VW2UjoEJSiCL8ciPI6fvw4kpOT4eTkhNjYWEyePBnW1tZo3Lix0KEREREREX2ShDXNKif6yxKpC0dHR4VLDH14y2/G6OIaQ2ZmJqZNmwZHR0d06tQJZcuWRWho6BddOoqIiIiIiNQDS5pF4tGjR8jMzMx3Xbly5SCVStUiBmVjSbN6YUkzEYkNS5pJHRTlkubz994IHcJH1atsKHQISsEeXpGwsrKCra1tvjdVJZpFIQYiIiIioqJKIim6t6+1aNEiSCQS+eS3AJCWloaRI0fCxMQE+vr66NKlC54/f67wuJiYGLRp0wa6urowNTXFpEmTkJVV+L1LTHiJiIiIiIjoi0VERGDdunVwdnZWWD5u3Dj8/vvv+PXXXxEWFoZnz56hc+d/LrmXnZ2NNm3aICMjA2fPnkVQUBACAwMxY8aMQo+RCS8RERERERF9keTkZHh7e2PDhg0oXfqfYQNv3rzBpk2bsGzZMjRr1gy1a9fG5s2bcfbsWZw7dw4A8NdffyEqKgpbt25FzZo10apVK8ydOxerVq1CRkZGocbJhJeIiIiIiEgFJEX4lp6ejqSkJIVbenr6R9sycuRItGnTBs2bN1dYHhkZiczMTIXlVapUgaWlJcLDwwEA4eHhcHJyQrly5eTbeHl5ISkpCTdv3izo4SwQJrxERERERERqbuHChTA0NFS4LVy4MN9tg4ODcenSpXzXx8XFQVtbG0ZGRgrLy5Urh7i4OPk2Hya779e/X1eYivAcZkRERERERKQKU6dOxfjx4xWWlSxZMs92jx8/xpgxY3D06FGUKlVKVeF9NfbwEhERERERqYLQdcufuJUsWRIGBgYKt/wS3sjISMTHx6NWrVooUaIESpQogbCwMPj7+6NEiRIoV64cMjIykJiYqPC458+fw8zMDABgZmaWZ9bm9/ffb1NYmPASERERERFRgXh6euL69eu4cuWK/Obq6gpvb2/5v7W0tBASEiJ/zO3btxETEwN3d3cAgLu7O65fv474+Hj5NkePHoWBgQGqVatWqPGypJmIiIiIiIgKRCqVonr16grL9PT0YGJiIl8+cOBAjB8/HsbGxjAwMMCoUaPg7u4ONzc3AECLFi1QrVo19O7dG4sXL0ZcXBymT5+OkSNH5tur/F8w4SUiIiIiIlIBCSRCh6ASy5cvh4aGBrp06YL09HR4eXlh9erV8vWampo4ePAghg8fDnd3d+jp6aFv376YM2dOoccikclkskLfK5FIpWUJHYEw1PVdQqIen0lEpEYi7r8WOgRB1LEp/fmNSDRKFeEuvYsPkoQO4aNcKxkIHYJScAwvERERERERiVIR/v2DiIiIiIhIPFg9pnrs4SUiIiIiIiJRYsJLREREREREosSSZiIiIiIiIhVgRbPqsYeXiIiIiIiIRIkJLxEREREREYkSS5qJiIiIiIhUgTXNKsceXiIiIiIiIhIlJrxEREREREQkSixpJiIiIiIiUgEJa5pVjj28REREREREJEpMeImIiIiIiEiUWNJMRERERESkAhJWNKsce3iJiIiIiIhIlJjwEhERERERkSixpJmIiIiIiEgFWNGseuzhJSIiIiIiIlFiDy8RfRYnWCAiEoc6NqWFDkEQUU+ShA5BENUqGggdApHgmPASERERERGpAjsRVI4lzURERERERCRKTHiJiIiIiIhIlFjSTEREREREpAIS1jSrHHt4iYiIiIiISJSY8BIREREREZEosaSZiIiIiIhIBXipR9VjDy8RERERERGJEhNeIiIiIiIiEiWWNBMREREREakAK5pVjz28REREREREJEpMeImIiIiIiEiUWNJMRERERESkCqxpVjn28BIREREREZEoMeElIiIiIiIiUWJJMxERERERkQpIWNOscuzhJSIiIiIiIlFiwktERERERESixJJmIiIiIiIiFZCwolnl2MNLREREREREosSEl4iIiIiIiESJJc1EREREREQqwIpm1WMPLxEREREREYkSE14iIiIiIiISJZY0ExERERERqQJrmlWOPbxEREREREQkSkx4iYiIiIiISJRY0kxERERERKQCEtY0qxx7eImIiIiIiEiUmPASERERERGRKLGkmYiIiIiISAUkrGhWOfbwEhERERERkSiJKuH18PDA2LFjhQ6jwKytrbFixYoCbx8YGAgjI6MCbTtr1izUrFnzq+L6lPXr18PCwgIaGhpfFLvQQkNDIZFIkJiYKHQoXyV4+za0+qYZ6rg4wbtHV1y/dk3okJRq04Z16NWtC9zruMCjkTvGjhqBhw/uCx2WSkRejMCoEcPQ3KMhajg64HjIMaFDUhl1e52/p67tfm/ThvWo4eiAxQvnCx2KUqnz+xogrtf50d9/w+RhPTGgkwcGdPLAjLEDcCXiDAAgIe4ZenrVyfd27uQ/7+eBq5di2sje6N22Pr4f3kuYhiiRmM43FX+iSniVrV+/fujYsWOh7S8iIgJDhgwptP0pW1JSEnx8fDBlyhQ8ffr0P8X+Jcm7ujty+BCWLl6IoSNGIvjXvXBwqILhQwfi5cuXQoemNBcjLqB7T29s2bEL6zZsRlZWFoYNHoh3794JHZrSpaa+g4ODA6ZOnyl0KCqljq9zQH3b/d6N69fw26/BsLd3EDoUpVPn9zWxvc6Ny5qi5wAfzP/pF8wPCIJjDVcsnTURjx/eg0nZcliz47DC7dveQ1BKRxc169RX2I+HVzu4N/5GoFYoj9jOd2GTFOGbWDHhFUBGRgYAoGzZstDV1RU4moKLiYlBZmYm2rRpg/Lly6sk9vfHSp1tCdqMzt92Q8dOXVDZ1hbTZ85GqVKlsG/PbqFDU5o16zehQ6fOsLW1g0OVKpgzfxFiY58hOuqm0KEpXcNGTeAzZhw8m4vvS9CnqOPrHFDfdgPAu5QUTJ0yCTNnz4OBoaHQ4SidOr+vie11XtutMVzqNkB5c0uUr2iF7v1HoFQpXdy9dQMampowMi6jcIs4Gwq3xs1RSuef7039RkxEi/bdYFreXMCWKIfYzjcVf6JLeLOysuDj4wNDQ0OUKVMGvr6+kMlkAIAtW7bA1dUVUqkUZmZm6NWrF+Lj4xUef/PmTbRt2xYGBgaQSqVo1KgR7t27h1mzZiEoKAj79++HRCKBRCJBaGgoAODx48fo1q0bjIyMYGxsjA4dOuDhw4fyfb7vGZ4/fz4qVKgAB4fcX7L/XdK8bNkyODk5QU9PDxYWFhgxYgSSk5ML7dhs3LgRVatWRalSpVClShWsXr1aYf2UKVNgb28PXV1d2NjYwNfXF5mZmQBye2SdnJwAADY2NpBIJAptzM/Vq1fRtGlTSKVSGBgYoHbt2rh48SJCQ0PRv39/vHnzRn4sZ82aJT8mc+fORZ8+fWBgYCDvRT59+jQaNWoEHR0dWFhYYPTo0UhJSZE/V0HO7YfevXuHVq1aoUGDBkW6zDkzIwPRUTfh5v7Pr8IaGhpwc6uPa1cvCxiZaiW/fQsAavGlWB2p6+tcXdv93oJ5c9C4cROF9qsTdXlfE/vrPCc7G2dD/0J6eirsqjrlWX//TjQe3fsbTb3aCxCd6on9fFPxJLqENygoCCVKlMCFCxewcuVKLFu2DBs3bgQAZGZmYu7cubh69Sr27duHhw8fol+/fvLHPn36FI0bN0bJkiVx/PhxREZGYsCAAcjKysLEiRPRrVs3tGzZErGxsYiNjUX9+vWRmZkJLy8vSKVSnDp1CmfOnIG+vj5atmyp0DsZEhKC27dv4+jRozh48GC+sWtoaMDf3x83b95EUFAQjh8/jsmTJxfKcdm2bRtmzJiB+fPnIzo6GgsWLICvry+CgoLk20ilUgQGBiIqKgorV67Ehg0bsHz5cgBA9+7dcexY7tiTCxcuIDY2FhYWFp98Tm9vb1SsWBERERGIjIzE999/Dy0tLdSvXx8rVqyAgYGB/FhOnDhR/rilS5eiRo0auHz5Mnx9fXHv3j20bNkSXbp0wbVr17Bz506cPn0aPj4+8sd87tx+KDExEd988w1ycnJw9OjRIl1a/TrxNbKzs2FiYqKw3MTEBC9evBAoKtXKycnB4h8XoKZLLdjZ2QsdDimBur7O1bXdAHD40B+Ijo7C6HEThA5FEOr0vibW13nMg7vo16ExerdtgE3+CzF+xhJUtLLJs92JI/thblkJ9o41BIhS9cR6vguV0HXLaljTLLrLEllYWGD58uWQSCRwcHDA9evXsXz5cgwePBgDBgyQb2djYwN/f3/UqVMHycnJ0NfXx6pVq2BoaIjg4GBoaWkBAOzt//kg0tHRQXp6OszMzOTLtm7dipycHGzcuBGS/88zvnnzZhgZGSE0NBQtWrQAAOjp6WHjxo3Q1tb+aOwfTrhlbW2NefPmYdiwYXl6Yr/GzJkz4efnh86dOwMAKlWqhKioKKxbtw59+/YFAEyfPl3h+SdOnIjg4GBMnjwZOjo68jevsmXLKhyDj4mJicGkSZNQpUoVAICdnZ18naGhISQSSb77adasGSZM+OdL0KBBg+Dt7S0/PnZ2dvD390eTJk2wZs0alCpV6rPn9r24uDh0794ddnZ22L59+yfPR3p6OtLT0xWWyTRLomTJkp9tOxWeBfNm496dOwjcsl3oUIioEMTFxmLxovlYt+FntX0/5fta8VehohUWrd6Gd++Scf5UCNYsnYUZS9YpJL0Z6Wk4e+JPdOo1UMBIiUh0Pbxubm7yxBMA3N3dcefOHWRnZyMyMhLt2rWDpaUlpFIpmjRpAiA3MQOAK1euoFGjRvJktyCuXr2Ku3fvQiqVQl9fH/r6+jA2NkZaWhru3bsn387JyemTyRUAHDt2DJ6enjA3N4dUKkXv3r3x8uXL/zyhRUpKCu7du4eBAwfKY9TX18e8efMUYty5cycaNGgAMzMz6OvrY/r06fJj8zXGjx+PQYMGoXnz5li0aJHCc32Kq6urwv2rV68iMDBQIXYvLy/k5OTgwYMHAPDZc/veN998A1tbW+zcufOz52PhwoUwNDRUuC35cWFBm18oShuVhqamZp6JHl6+fIkyZcqoNBYhLJg3ByfDQrFhcxDKFeBHFiqe1PV1rq7tjoq6iVcvX6JH186o5VwNtZyr4WLEBWzftgW1nKshOztb6BCVSt3e18T6Oi+hpQUzcwvY2FVFzwE+sKpkhyP7ghW2OX/qONLT09C4eRuBolQ9sZ5vKt5El/B+TFpaGry8vGBgYIBt27YhIiICe/fuBfDPxEg6OjpfvN/k5GTUrl0bV65cUbj9/fff6NXrn2nm9fT0Prmfhw8fom3btnB2dsbu3bsRGRmJVatWKcT3td6PA96wYYNCjDdu3MC5c+cAAOHh4fD29kbr1q1x8OBBXL58GT/88MN/eu5Zs2bh5s2baNOmDY4fP45q1arJj/mn/PtYJScnY+jQoQqxX716FXfu3EHlypWRkpLy2XP7Xps2bXDy5ElERUV9No6pU6fizZs3CrdJU6Z+wRH477S0tVG1miPOnwuXL8vJycH58+FwruGi0lhUSSaTYcG8OTgechQbfg5CxYqfLp+n4k1dX+fq2u56bm74bd/v2Ll7n/zm6Fgdrdu2w87d+6CpqSl0iEqhru9r6vI6z5HJkJmp+J3jxJ/7UdutMQyMSgsUleqpy/n+LyRF+D+xEl1J8/nz5xXunzt3DnZ2drh16xZevnyJRYsWyceeXrx4UWFbZ2dnBAUFITMzM99eXm1t7Ty/PNeqVQs7d+6EqakpDAwMvjruyMhI5OTkwM/PDxoaub9D7Nq166v396Fy5cqhQoUKuH//Pry9vfPd5uzZs7CyssIPP/wgX/bo0aP//Nz29vawt7fHuHHj0LNnT2zevBmdOnXK91h+TK1atRAVFQVbW9t811+/fv2z5/a9RYsWQV9fH56enggNDUW1atU++rwlS+YtX07LKlDIhap33/7wnTYFjo7VUd3JGVu3BCE1NRUdO3VWfTAqsmDubBw+dBArAlZDT1cPLxISAAD6UilKlSolcHTK9S4lRaEy4emTJ7gVHQ1DQ0OUr1BBwMiUSx1f54B6tltPTz/PuFUdXV0YGRqJejyrOr+vie11vuPnn1CzTn2UKWuG1NR3OHPiCKKvReL7+QHybeKePsat65cxee6KfPcR9/Qx0tLeIfHVS2RkpOPhvdsAgIqWNijxBZWGRZHYzjcVf6JLeGNiYjB+/HgMHToUly5dQkBAAPz8/GBpaQltbW0EBARg2LBhuHHjBubOnavwWB8fHwQEBKBHjx6YOnUqDA0Nce7cOdStWxcODg6wtrbGn3/+idu3b8PExASGhobw9vbGkiVL0KFDB8yZMwcVK1bEo0ePsGfPHkyePBkVK1YsUNy2trbIzMxEQEAA2rVrhzNnzmDt2rWFdlxmz56N0aNHw9DQEC1btkR6ejouXryI169fY/z48bCzs0NMTAyCg4NRp04d/PHHHwXqjf2Y1NRUTJo0Cd9++y0qVaqEJ0+eICIiAl26dAGQO0Y4OTkZISEhqFGjBnR1dT96maMpU6bAzc0NPj4+GDRoEPT09BAVFYWjR4/ip59+KtC5/dDSpUuRnZ2NZs2aITQ0VD7GuKhq2ao1Xr96hdU/+ePFiwQ4VKmK1es2wkTEpUG7du4AAAzs11th+Zx5C9FB5B+YN2/ewKD+feT3ly7OLaNv36ET5i5YJFRYSqeOr3NAfdutjtT5fU1sr/OkxNdYvWQWEl+9gK6uPiwr2eL7+QFwrl1Pvk3onwdgXMYUzrXd8t3H+hXzEH3tkvz+1BHfAQD8g/ajrFnx/nFTbOebij+J7P01e0TAw8MDjo6OyMnJwfbt26GpqYnhw4dj3rx5kEgk2LFjB6ZNm4bY2FjUqlULU6dORfv27XH58mXUrFkTAHDt2jVMmjQJp0+fhqamJmrWrInAwEDY2NggISEB3t7eCA8PR3JyMk6cOAEPDw/ExcVhypQpOHToEN6+fQtzc3N4enpi6dKlMDAwQL9+/ZCYmIh9+/YpxGttbY2xY8fKJ2Navnw5lixZgsTERDRu3Bje3t7o06cPXr9+DSMjIwQGBmLs2LEFuozOrFmzsG/fPly5ckW+bPv27ViyZAmioqKgp6cHJycnjB07Fp06dQIATJ48GT///DPS09PRpk0buLm5YdasWfLnu3LlClxcXPDgwQNYW1t/8vkzMjLQt29fnDlzBs+fP0eZMmXQuXNnLFmyRP5L9vDhw/Hrr7/i5cuXmDlzJmbNmpXnmLwXERGBH374AeHh4ZDJZKhcuTK6d++OadOmAcBnz21oaCiaNm0qP5YAMHr0aPz2228IDQ1VmJzsU4To4SUiIqL/JupJktAhCKJaxa+vPizOShXhLr278alCh/BRtqZfPryzOBBVwkukbEx4iYiIih8mvOqFCe/XEWvCqzaTVhEREREREZF6YcJbTDk6OipcpufD27Zt29QmBiIiIiKi4kJShG9iVYQ7/OlTDh06hMzMzHzXlStXTm1iICIiIiIi+hgmvMWUlZWV0CEUiRiIiIiIiIg+hgkvERERERGRKoi5driI4hheIiIiIiIiEiUmvERERERERCRKLGkmIiIiIiJSAQlrmlWOPbxEREREREQkSkx4iYiIiIiISJRY0kxERERERKQCElY0qxx7eImIiIiIiEiUmPASERERERGRKLGkmYiIiIiISAVY0ax67OElIiIiIiIiUWLCS0RERERERKLEhJeIiIiIiEgVJEX4VkALFy5EnTp1IJVKYWpqio4dO+L27dsK26SlpWHkyJEwMTGBvr4+unTpgufPnytsExMTgzZt2kBXVxempqaYNGkSsrKyCh5IATHhJSIiIiIiogIJCwvDyJEjce7cORw9ehSZmZlo0aIFUlJS5NuMGzcOv//+O3799VeEhYXh2bNn6Ny5s3x9dnY22rRpg4yMDJw9exZBQUEIDAzEjBkzCj1eiUwmkxX6XolEKq3wf3QiIiIiJYt6kiR0CIKoVtFA6BAEUaoIT8v78GWa0CF8lLVJqa96XEJCAkxNTREWFobGjRvjzZs3KFu2LLZv345vv/0WAHDr1i1UrVoV4eHhcHNzw+HDh9G2bVs8e/YM5cqVAwCsXbsWU6ZMQUJCArS1tQutXezhJSIiIiIiUgFJEf4vPT0dSUlJCrf09PTPtunNmzcAAGNjYwBAZGQkMjMz0bx5c/k2VapUgaWlJcLDwwEA4eHhcHJykie7AODl5YWkpCTcvHmzMA85E14iIiIiIiJ1t3DhQhgaGircFi5c+MnH5OTkYOzYsWjQoAGqV68OAIiLi4O2tjaMjIwUti1Xrhzi4uLk23yY7L5f/35dYSrCHf5ERERERESkClOnTsX48eMVlpUsWfKTjxk5ciRu3LiB06dPKzO0/4QJLxERERERkQpIvmA2ZFUrWbLkZxPcD/n4+ODgwYM4efIkKlasKF9uZmaGjIwMJCYmKvTyPn/+HGZmZvJtLly4oLC/97M4v9+msLCkmYiIiIiIiApEJpPBx8cHe/fuxfHjx1GpUiWF9bVr14aWlhZCQkLky27fvo2YmBi4u7sDANzd3XH9+nXEx8fLtzl69CgMDAxQrVq1Qo2XPbxERERERERUICNHjsT27duxf/9+SKVS+ZhbQ0ND6OjowNDQEAMHDsT48eNhbGwMAwMDjBo1Cu7u7nBzcwMAtGjRAtWqVUPv3r2xePFixMXFYfr06Rg5cuQX9TIXBC9LRPQFeFkiIiKi4oeXJVIvRfmyRI9ffX7WY6FYGBcs0ZR8pC578+bN6NevHwAgLS0NEyZMwI4dO5Ceng4vLy+sXr1aoVz50aNHGD58OEJDQ6Gnp4e+ffti0aJFKFGicE8gE16iL8CEl4iIqPhhwqtemPB+nYImvMUNx/ASERERERGRKBXh3z+IiIiIiIjEoyjP0ixW7OElIiIiIiIiUWLCS0RERERERKLEkmYiIiIiIiKVYE2zqjHhJSIiIiJRU9fZiv+OTRY6BEE4W+gLHQIVISxpJiIiIiIiIlFiDy8REREREZEKcJZm1WMPLxEREREREYkSE14iIiIiIiISJZY0ExERERERqQArmlWPPbxEREREREQkSkx4iYiIiIiISJRY0kxERERERKQCnKVZ9djDS0RERERERKLEhJeIiIiIiIhEiSXNREREREREKiDhPM0qxx5eIiIiIiIiEiUmvERERERERCRKLGkmIiIiIiJSBVY0qxx7eImIiIiIiEiUmPASERERERGRKLGkmYiIiIiISAVY0ax67OElIiIiIiIiUWLCS0RERERERKLEkmYiIiIiIiIVkLCmWeXYw0tERERERESixISXiIiIiIiIRIklzURERERERCog4TzNKsceXiIiIiIiIhIlJrxEREREREQkSixpJiIiIiIiUgVWNKsce3iJiIiIiIhIlJjwEhERERERkSixpJmIiIiIiEgFWNGseuzhJSIiIiIiIlFiwktERERERESixJJmIiIiIiIiFZCwplnlBO3h9fDwwNixY4UM4YtYW1tjxYoVBd4+MDAQRkZGSovna4SGhkIikSAxMbFA2xfmOSrI8Zg1axZq1qwpv9+vXz907Njxi57nS89TUbYreDu+7dQO9evWQv26tdC7V3ecPhUmdFgqEXkxAqNGDENzj4ao4eiA4yHHhA5JZYK3b0Orb5qhjosTvHt0xfVr14QOSanU+XUOqN/5fo/tZrvVgZja/eeBXzFhcHf0ad8Yfdo3xrRR/XD5whn5+nXL58Ond3v0al0fA7p44kff8Xga80BhHwnPY7Fg2mh4t6mPgd82xy/rViA7O0vVTSE1I+qS5q9Jlj4lIiICQ4YMKbT9CaF+/fqIjY2FoaGh0KEUyMqVKxEYGFio+3z48CEkEgmuXLlSqPtVBtNyZhgzbiJ2/LoH23ftRt16bhjjMxJ3794ROjSlS019BwcHB0ydPlPoUFTqyOFDWLp4IYaOGIngX/fCwaEKhg8diJcvXwodmtKo8+tcHc83wHaz3Wx3cWRSthy8B43Cj6u3YtHqLajuUgc/zhiPxw/vAQBs7KpixKRZWPHzb5i+6CfIIMPcKSORnZ0NAMjOzsbCH8YgKysL81Zuhs/k2Qj963fsDFwrZLNIDYg64S0sGRkZAICyZctCV1dX4Gj+G21tbZiZmUFSTOopDA0Ni1wvuSp5NG2GRo2bwMrKGtbWlTBqzDjo6uri2tUrQoemdA0bNYHPmHHwbP6N0KGo1Jagzej8bTd07NQFlW1tMX3mbJQqVQr79uwWOjSlUefXuTqeb4DtZrvZ7uLI1b0xatVriPIVLVGhohV6DRiJUjq6+Dv6OgDgm7adUc25FkzNKsDGrip69h+BlwnPkfD8GQDgWuQ5PIl5gNFT56KSrQNc6jZAj37DcWT/LmRmZgrZNJWSFOH/xErwhDcrKws+Pj4wNDREmTJl4OvrC5lMBgDYsmULXF1dIZVKYWZmhl69eiE+Pl7h8Tdv3kTbtm1hYGAAqVSKRo0a4d69e5g1axaCgoKwf/9+SCQSSCQShIaGAgAeP36Mbt26wcjICMbGxujQoQMePnwo3+f7nuH58+ejQoUKcHBwAJC3VHbZsmVwcnKCnp4eLCwsMGLECCQnJ3/VcXhfyrtu3TpYWFhAV1cX3bp1w5s3b+TbRERE4JtvvkGZMmVgaGiIJk2a4NKlSwr7kUgk2LhxIzp16gRdXV3Y2dnhwIED8vX5lTSfOXMGHh4e0NXVRenSpeHl5YXXr1/nG2d6ejomTpwIc3Nz6OnpoV69evLjWlD79u2DnZ0dSpUqBS8vLzx+/Pij2/67l/7t27fw9vaGnp4eypcvj+XLl+dbdv3u3TsMGDAAUqkUlpaWWL9+vXxdpUqVAAAuLi6QSCTw8PD4oviFkp2djcOH/kBq6jvUqOEidDikBJkZGYiOugk39/ryZRoaGnBzq49rVy8LGJnqqNPrXF3PN9vNdrPdxb/d2dnZOHPiT6SnpcK+mnOe9WmpqThx5ABMzcxhUtYMAHA76hosK9nCqLSJfLsaru5IfZeCJ//vJSZSBsET3qCgIJQoUQIXLlzAypUrsWzZMmzcuBEAkJmZiblz5+Lq1avYt28fHj58iH79+skf+/TpUzRu3BglS5bE8ePHERkZiQEDBiArKwsTJ05Et27d0LJlS8TGxiI2Nhb169dHZmYmvLy8IJVKcerUKZw5cwb6+vpo2bKlvCcXAEJCQnD79m0cPXoUBw8ezDd2DQ0N+Pv74+bNmwgKCsLx48cxefLkrz4Wd+/exa5du/D777/jyJEjuHz5MkaMGCFf//btW/Tt2xenT5/GuXPnYGdnh9atW+Pt27cK+5k9eza6deuGa9euoXXr1vD29sarV6/yfc4rV67A09MT1apVQ3h4OE6fPo127drJy0/+zcfHB+Hh4QgODsa1a9fQtWtXtGzZEnfuFKz08N27d5g/fz5++eUXnDlzBomJiejRo0cBjxAwfvx4nDlzBgcOHMDRo0dx6tSpPEk/APj5+cHV1VV+DIcPH47bt28DAC5cuAAAOHbsGGJjY7Fnz54CP78Q7vx9G26uLqjj4oT5c2Ziuf8qVLa1FTosUoLXia+RnZ0NExMTheUmJiZ48eKFQFGphjq+ztX1fLPdbDfAdhdXj+7fwXdtG6JXK3esX7EAk2YthYWVjXz9n/t34bu2DdG7XUNcjjgD38WroKWlBQBIfPUSRkbGCvszKp17P/F18SzzpuJB8FmaLSwssHz5ckgkEjg4OOD69etYvnw5Bg8ejAEDBsi3s7Gxgb+/P+rUqYPk5GTo6+tj1apVMDQ0RHBwsPyPyd7eXv4YHR0dpKenw8zMTL5s69atyMnJwcaNG+VlvZs3b4aRkRFCQ0PRokULAICenh42btwIbW3tj8b+Ya+itbU15s2bh2HDhmH16tVfdSzS0tLwyy+/wNzcHAAQEBCANm3awM/PD2ZmZmjWrJnC9uvXr4eRkRHCwsLQtm1b+fJ+/fqhZ8+eAIAFCxbA398fFy5cQMuWLfM85+LFi+Hq6qoQs6OjY77xxcTEYPPmzYiJiUGFChUAABMnTsSRI0ewefNmLFiw4LNtzMzMxE8//YR69eoByP3Bo2rVqrhw4QLq1q37yce+ffsWQUFB2L59Ozw9PQHknrv3sXyodevW8h8LpkyZguXLl+PEiRNwcHBA2bJlAeR+6Hz42vi39PR0pKenKyyTaZZEyZIlP9vOwmRtXQm7du9DcvJbHP3rT/hOm4JNgVtFnwyQeuHrnIio6KtgYY0l63bgXUoyzp08hp8Wz8TsZRvkSW9Dz1Zwru2G169e4MCvW7Bs7veYt/JnaGur9rtTUVZMRhWKiuA9vG5ubgrjSd3d3XHnzh1kZ2cjMjIS7dq1g6WlJaRSKZo0aQIgN/ECcnsnGzVqJE92C+Lq1au4e/cupFIp9PX1oa+vD2NjY6SlpeHevX/KKZycnD6Z7AK5PYSenp4wNzeHVCpF79698fLlS7x79+5LDoGcpaWlPNkFco9FTk6OvGfy+fPnGDx4MOzs7GBoaAgDAwMkJyfLj8d7zs7/lJbo6enBwMAgTyn4e+97eAvi+vXryM7Ohr29vfzY6evrIywsTOHYfUqJEiVQp04d+f0qVarAyMgI0dHRn33s/fv3kZmZqZAYGxoaykvOP/ThMZBIJDAzM/voMfiYhQsXwtDQUOG25MeFX7SPwqClrQ1LKytUc6yOMeMmwN6hCrZt/UXlcZDylTYqDU1NzTwTmrx8+RJlypQRKCrVUMfXubqeb7ab7QbY7uJKS0sL5c0tUNm+KrwHjYK1jT0O7dkhX6+nL0X5ipao5lwLE2YsxrPHD3Hh9AkAgJGxCRITFSsOE1/n3v+wzJmosAme8H5MWloavLy8YGBggG3btiEiIgJ79+4F8M8kUjo6Ol+83+TkZNSuXRtXrlxRuP3999/o1auXfDs9Pb1P7ufhw4do27YtnJ2dsXv3bkRGRmLVqlUK8RW2vn374sqVK1i5ciXOnj2LK1euwMTEJM/z/fsHAIlEgpycnHz3+SXHMDk5GZqamoiMjFQ4dtHR0Vi5cuWXN0iJvuQYfMzUqVPx5s0bhdukKVMLM8yvkpOTg0wlvcZIWFra2qhazRHnz4XLl+Xk5OD8+XA4i3w867+pw+tcXc832812s93iaXeOLAeZmR95r5bJIJPJ5Osdqjkj5sFdvHn9T9J7LfI8dHT1UPGDsmiiwiZ4SfP58+cV7r8fm3rr1i28fPkSixYtgoWFBQDg4sWLCts6OzsjKCgImZmZ+fbyamtr5xmLWqtWLezcuROmpqYwMDD46rgjIyORk5MDPz8/aGjk/m6wa9eur94fkNtz/ezZM3mJ7rlz56ChoSHvwTxz5gxWr16N1q1bA8idfOu/jgNxdnZGSEgIZs+e/dltXVxckJ2djfj4eDRq1Oirni8rKwsXL16U99Levn0biYmJqFq16mcfa2NjAy0tLURERMDS0hIA8ObNG/z9999o3LhxgWN433P/sXHK75Usmbd8OU3Fl4pbudwPDRs1hln58niXkoJDfxzExYgLWLN+k2oDEcC7lBSF6oWnT57gVnQ0DA0NUT6fMnax6N23P3ynTYGjY3VUd3LG1i1BSE1NRcdOnYUOTWnU+XWujucbYLvZbra7ONq2MQAudRugjKkZUt+l4PTxI4i6GokfFv2E58+e4GzoX3B2dYeBoRFevYjH3uBAaGuXQq26DQEAzrXdUNGyEgIW+eK7IWOQ+OoFggNXo2WHbtD6TFUl0X8heMIbExOD8ePHY+jQobh06RICAgLg5+cHS0tLaGtrIyAgAMOGDcONGzcwd+5chcf6+PggICAAPXr0wNSpU2FoaIhz586hbt26cHBwgLW1Nf7880/cvn0bJiYmMDQ0hLe3N5YsWYIOHTpgzpw5qFixIh49eoQ9e/Zg8uTJqFixYoHitrW1RWZmJgICAtCuXTucOXMGa9f+t+uIlSpVCn379sXSpUuRlJSE0aNHo1u3bvJxpnZ2dvKZq5OSkjBp0qSv6uX+0NSpU+Hk5IQRI0Zg2LBh0NbWxokTJ9C1a9c8JTf29vbw9vZGnz594OfnBxcXFyQkJCAkJATOzs5o06bNZ59PS0sLo0aNgr+/P0qUKAEfHx+4ubl9dvwuAEilUvTt2xeTJk2CsbExTE1NMXPmTGhoaHzRZZZMTU2ho6ODI0eOoGLFiihVqlSRvS7xq1cvMX3qFCQkxENfKoW9vQPWrN8E9/oNhA5N6W7evIFB/fvI7y9dnFtO3r5DJ8xdsEiosJSuZavWeP3qFVb/5I8XLxLgUKUqVq/bCJNiXAL3Oer8OlfH8w2w3Ww3210cvUl8jZ9+nIHXr15AV08fVpXs8MOin1CjthtevUhA9I0r+GPPDiQnJ8GotAmqOrlgnv/PMPz/xFSampqYOn8lNqxciB9G90PJUjrwaNEW3fsNE7hlJHaCJ7x9+vRBamoq6tatC01NTYwZMwZDhgyBRCJBYGAgpk2bBn9/f9SqVQtLly5F+/bt5Y81MTHB8ePHMWnSJDRp0gSampqoWbMmGjTI/ZI0ePBghIaGwtXVFcnJyThx4gQ8PDxw8uRJTJkyBZ07d8bbt29hbm4OT0/PL+rxrVGjBpYtW4Yff/wRU6dORePGjbFw4UL06dPn8w/+CFtbW3Tu3BmtW7fGq1ev0LZtW4XJpDZt2oQhQ4agVq1asLCwwIIFCzBx4sSvfj4gN4n966+/MG3aNNStWxc6OjqoV6+efNKrf9u8eTPmzZuHCRMm4OnTpyhTpgzc3NwUJs36FF1dXUyZMgW9evXC06dP0ahRI2zaVPBenGXLlmHYsGHyS1FNnjwZjx8/RqlSpQq8jxIlSsDf3x9z5szBjBkz0KhRoy++tJKqzJ77+YnAxKpO3Xq4evO20GEIoqf3d+jp/Z3QYaiMOr/OAfU73++x3eqF7S7+Rkyc8dF1xmXKYtoC/8/uo2y58gXajqgwSWTvL3pLgpo1axb27duHK1euCB1KsZKSkgJzc3P4+flh4MCBSn8+VZc0ExEREX2tv2OThQ5BEM4W+kKH8FGJqZ8eUickIx1NoUNQCsF7eIm+xOXLl3Hr1i3UrVsXb968wZw5cwAAHTp0EDgyIiIiIiIqaorsLM1i4+joqHApnw9v27ZtEzq8QtGqVauPtrEg1+gtqKVLl6JGjRpo3rw5UlJScOrUqWI9xT8RERERESkHS5pV5NGjR8jMzMx3Xbly5SCVSlUcUeF7+vQpUlNT811nbGwMY2NjFUdU+FjSTERERMUFS5qLnjepX3aZTFUy1BFnXyhLmlXEyspK6BCUztzcXOgQiIiIiIiI5MSZxhMREREREZHaYw8vERERERGRCkgkQkegftjDS0RERERERKLEhJeIiIiIiIhEiSXNREREREREKsCKZtVjDy8RERERERGJEhNeIiIiIiIiEiWWNBMREREREakCa5pVjj28REREREREJEpMeImIiIiIiEiUWNJMRERERESkAhLWNKsce3iJiIiIiIhIlJjwEhERERERkSixpJmIiIiIiEgFJKxoVjn28BIREREREZEoMeElIiIiIiIiUWJJMxERERERkQqwoln12MNLREREREREosSEl4iIiIiIiESJJc1ERERERESqwJpmlWMPLxEREREREYkSE14iIiIiIiISJZY0ExERERERqYCENc0qxx5eIiIiIiIiEiUmvERERERERPRFVq1aBWtra5QqVQr16tXDhQsXhA4pX0x4iYiIiIiIVEAiKbq3L7Fz506MHz8eM2fOxKVLl1CjRg14eXkhPj5eOQfuP5DIZDKZ0EEQFRdpWUJHQERERFQwf8cmCx2CIJwt9IUO4aOK8nfJUl8wu1O9evVQp04d/PTTTwCAnJwcWFhYYNSoUfj++++VFOHXYQ8vERERERGRmktPT0dSUpLCLT09Pc92GRkZiIyMRPPmzeXLNDQ00Lx5c4SHh6sy5IKREVGRl5aWJps5c6YsLS1N6FBUiu1mu9UB2812qwO2m+2mom/mzJkyAAq3mTNn5tnu6dOnMgCys2fPKiyfNGmSrG7duiqKtuBY0kxUDCQlJcHQ0BBv3ryBgYGB0OGoDNvNdqsDtpvtVgdsN9tNRV96enqeHt2SJUuiZMmSCsuePXsGc3NznD17Fu7u7vLlkydPRlhYGM6fP6+SeAuK1+ElIiIiIiJSc/klt/kpU6YMNDU18fz5c4Xlz58/h5mZmbLC+2ocw0tEREREREQFoq2tjdq1ayMkJES+LCcnByEhIQo9vkUFe3iJiIiIiIiowMaPH4++ffvC1dUVdevWxYoVK5CSkoL+/fsLHVoeTHiJioGSJUti5syZBSozERO2m+1WB2w3260O2G62m8Sle/fuSEhIwIwZMxAXF4eaNWviyJEjKFeunNCh5cFJq4iIiIiIiEiUOIaXiIiIiIiIRIkJLxEREREREYkSE14iIiIiIiISJSa8REREREREJEpMeImIiIiIiEiUmPASERERKVFaWprQIRARqS0mvEREJJgBAwbg7du3eZanpKRgwIABAkSkejKZDLxCoLgZGRmhcePG8PX1RUhICFJTU4UOiZQsKSnpo+vu3r2rwkiIiNfhJSpi/P39C7zt6NGjlRiJcAYMGICVK1dCKpUqLE9JScGoUaPw888/CxQZFTZNTU3ExsbC1NRUYfmLFy9gZmaGrKwsgSJTvl9++QVLlizBnTt3AAD29vaYNGkSevfuLXBkyqWOf9+nT5/GyZMnERoairNnzyIrKwuurq5o0qQJPDw88M033wgdotKo4/kGgEaNGuHYsWMoWbKkwvLbt2/D09MTT548ESgy5WrWrBn27NkDIyMjheVJSUno2LEjjh8/LkxgpNaY8BIVMZUqVSrQdhKJBPfv31dyNMJQ1yQoOzsby5cvx65duxATE4OMjAyF9a9evRIossKXlJQEmUyG0qVL486dOyhbtqx8XXZ2Nn7//Xd8//33ePbsmYBRKs+yZcvg6+sLHx8fNGjQAEBuUrRq1SrMmzcP48aNEzhC5VHXv+/3srKyEBERgXXr1mHbtm3IyclBdna20GEpjbqe71atWkEikeDAgQMoUaIEACA6OhrNmjVDt27dsHLlSoEjVA4NDQ3ExcXlOd/x8fEwNzdHZmamQJGROishdABEpOjBgwdChyCY90mQTCbD27dvUapUKfm67OxsHDp0KM+HqJjMnj0bGzduxIQJEzB9+nT88MMPePjwIfbt24cZM2YIHV6hMjIygkQigUQigb29fZ71EokEs2fPFiAy1QgICMCaNWvQp08f+bL27dvD0dERs2bNEmXCq+5/33///TdCQ0Plt/T0dLRt2xYeHh5Ch6YU6n6+9+zZg+bNm8Pb2xvBwcG4efMmPD094e3tjWXLlgkdXqG7du2a/N9RUVGIi4uT38/OzsaRI0dgbm4uRGhE7OEloqJDQ0MDEonko+vfJ0E//PCDCqNSncqVK8Pf3x9t2rSBVCrFlStX5MvOnTuH7du3Cx1ioQkLC4NMJkOzZs2we/duGBsby9dpa2vDysoKFSpUEDBC5SpVqhRu3LgBW1tbheV37tyBk5OTKCc5Uue/b3Nzc6SmpsLDwwMeHh5o0qQJnJ2dP3k8ijt1Pt/vJSYmwsPDA3Z2djh58iT69OmDJUuWCB2WUnx4vvNLLXR0dBAQEKA2czNQ0cIeXqIi7smTJzhw4EC+Ja5i+5X4xIkTap0ExcXFwcnJCQCgr6+PN2/eAADatm0LX19fIUMrdE2aNEFWVhb69u0LV1dXWFhYCB2SStna2mLXrl2YNm2awvKdO3fCzs5OoKiUS53/vsuWLYtbt24hLi4OcXFxeP78OVJTU6Grqyt0aEqjjuf73xNVaWhoYOfOnfjmm2/QpUsX+Pr6yrcxMDAQIkSlefDgAWQyGWxsbHDhwgWFYSra2towNTWFpqamgBGSOmMPL1ERFhISgvbt28PGxga3bt1C9erV8fDhQ8hkMtSqVUu0kz88evQIlpaWou79yI+DgwN++eUX1KtXDw0bNkTbtm3x/fffY+fOnRg1ahTi4+OFDrHQSaVSXL9+HdbW1kKHolK7d+9G9+7d0bx5c/kY3jNnziAkJAS7du1Cp06dBI5QeR49egQLCwtoaKjXhSISExNx8uRJhIWFISwsDFFRUahZsyaaNm2K+fPnCx2e0qjT+f5Yr/b7r9oSiQQymQwSiUTU47aJihomvERFWN26ddGqVSvMnj0bUqkUV69ehampKby9vdGyZUsMHz5c6BCV4siRI9DX10fDhg0BAKtWrcKGDRtQrVo1rFq1CqVLlxY4QuX4/vvvYWBggGnTpmHnzp347rvvYG1tjZiYGIwbNw6LFi0SOsRC16FDB3Tu3Bl9+/YVOhSVu3TpEpYtW4bo6GgAQNWqVTFhwgS4uLgIHJnyJSYmYtOmTfK2Ozo6YsCAATA0NBQ4MuV7+fIlQkNDsX//fuzYsUP0k1YBuef7woULiI+PR05OjsK6D8exF3dhYWEF3rZJkyZKjERYd+7cwYkTJ/I932Kbj4KKBya8REXYh+M4S5cujdOnT8PR0RFXr15Fhw4d8PDhQ6FDVAonJyf8+OOPaN26Na5fvw5XV1dMmDABJ06cQJUqVbB582ahQ1SJ8PBwhIeHw87ODu3atRM6HKVYu3YtZs+eDW9vb9SuXRt6enoK69u3by9QZMqTmZmJoUOHwtfXt8CzsovJxYsX4eXlBR0dHdStWxcAEBERgdTUVPz111+oVauWwBEWvj179sgnq4qKioKxsTEaNmwoH89bo0YNoUNUmt9//x3e3t5ITk6GgYGBQg+oRCIR1ezzBGzYsAHDhw9HmTJlYGZmlud8X7p0ScDoSF0x4SUqwszMzHDixAlUrVoV1apVw6JFi9C+fXtcvXoVDRo0QHJystAhKoW+vj5u3LgBa2trzJo1Czdu3MBvv/2GS5cuoXXr1gqzP1Lx9qkyRzGX/RkaGuLKlStqmfA2atQItra22LBhg/xyLVlZWRg0aBDu37+PkydPChxh4TM1NUXjxo3lCe77sfrqwN7eHq1bt8aCBQtEPWb5Y969e5fvHBzOzs4CRaRcVlZWGDFiBKZMmSJ0KERynLSKqAhzc3PD6dOnUbVqVbRu3RoTJkzA9evXsWfPHri5uQkdntJoa2vj3bt3AIBjx47JS96MjY3zTAoiNrdv30ZAQIBCmeuoUaPg4OAgcGTK8e9yN3XRsWNH7Nu3T5SXH/qcixcvKiS7AFCiRAlMnjwZrq6uAkamPGIcf19QT58+xejRo9Uu2U1ISED//v1x+PDhfNeL9ce8169fo2vXrkKHQaSACS9REbZs2TJ5L+7s2bORnJwsn8VVbDM0f6hhw4YYP348GjRogAsXLmDnzp0Acq9jWbFiRYGjU57du3ejR48ecHV1hbu7OwDg3LlzqF69OoKDg9GlSxeBI6TCYmdnhzlz5uDMmTP5lnKPHj1aoMiUz8DAADExMahSpYrC8sePH0MqlQoUlfLdu3cPmzdvxr1797By5UqYmpri8OHDsLS0hKOjo9DhKY2XlxcuXrwIGxsboUNRqbFjxyIxMRHnz5+Hh4cH9u7di+fPn2PevHnw8/MTOjyl6dq1K/766y8MGzZM6FCI5FjSTERFTkxMDEaMGIHHjx9j9OjRGDhwIABg3LhxyM7Ohr+/v8ARKkflypXh7e2NOXPmKCyfOXMmtm7dinv37gkUmXKlpKQgLCws37I/sSZ+nypllkgkuH//vgqjUa3Ro0dj7969WLp0KerXrw8gd4bqSZMmoUuXLlixYoWwASpBWFgYWrVqhQYNGuDkyZOIjo6GjY0NFi1ahIsXL+K3334TOsRCdeDAAfm/ExISMGfOHPTv3x9OTk7Q0tJS2FaM4/QBoHz58ti/fz/q1q0LAwMDXLx4Efb29jhw4AAWL16M06dPCx1iofnwMzklJQXLli1DmzZt8j3fYn1Pp6KNCS9RMZGcnJyn/FNs1/FTd7q6urh27RpsbW0Vlt+5cwc1atSQl3mLyeXLl9G6dWu8e/cOKSkpMDY2xosXL6CrqwtTU1NRJ37qKiMjA5MmTcLatWuRlZUFANDS0sLw4cOxaNEilCxZUuAIC5+7uzu6du2K8ePHy2fcf3+90s6dO+PJkydCh1ioCnoJIjGP0zcwMMC1a9dgbW0NKysrbN++HQ0aNMCDBw/g6Ogoqvfzgs5FIPYf86joYkkzURH24MED+Pj4IDQ0FGlpafLl6nQdv7S0tDy9fmJN9D08PHDq1Kk8Ce/p06fRqFEjgaJSrnHjxqFdu3ZYu3YtDA0Nce7cOWhpaeG7777DmDFjhA6PlEBbWxsrV67EwoUL5VULlStXFvUYz+vXr2P79u15lpuamuLFixcCRKRc6jo2/0MODg64ffs2rK2tUaNGDaxbtw7W1tZYu3YtypcvL3R4herBgwdCh0D0SUx4iYqw7777DjKZDD///DPKlSuX7wXtxSglJQVTpkzBrl278PLlyzzrxZrot2/fHlOmTEFkZKR8UrJz587h119/xezZsxXKBMVSBnjlyhWsW7cOGhoa0NTURHp6OmxsbLB48WL07dsXnTt3FjpEpRg/fny+yyUSCUqVKgVbW1t06NABxsbGKo5MdXR1dWFkZCT/t5gZGRkhNjY2T0/Y5cuXYW5uLlBUpExjxoxBbGwsgNxhKS1btsS2bdugra2NwMBAYYMjUjMsaSYqwvT19REZGSnaGXo/ZuTIkThx4gTmzp2L3r17Y9WqVXj69CnWrVuHRYsWwdvbW+gQlUIdywDLli2Ls2fPws7ODvb29ggICICXlxdu3bqF2rVrIyUlRegQlaJp06a4dOkSsrOz5X/ff//9NzQ1NVGlShXcvn0bEokEp0+fRrVq1QSOtnBlZWVh9uzZ8Pf3l0/Kp6+vj1GjRmHmzJl5xvyJwcSJE3H+/Hn8+uuvsLe3x6VLl/D8+XP06dMHffr0wcyZM4UOUWk+NufChz/uNG7cGJqamiqOTLXevXuHW7duwdLSEmXKlBE6HKXhj3lUFDHhJSrCmjZtih9++AHNmzcXOhSVsrS0xC+//AIPDw8YGBjg0qVLsLW1xZYtW7Bjxw4cOnRI6BCpkLRo0QL9+vVDr169MHjwYFy7dg2jR4/Gli1b8Pr1a5w/f17oEJVixYoVOHXqFDZv3iwv0X/z5g0GDRqEhg0bYvDgwejVqxdSU1Px559/Chxt4Ro+fDj27NmDOXPmyGcjDw8Px6xZs9CxY0esWbNG4AgLX0ZGBkaOHInAwEBkZ2ejRIkSyM7ORq9evRAYGCjqZK9SpUpISEjAu3fvULp0aQC5l67R1dWFvr4+4uPjYWNjgxMnTsDCwkLgaAtfRkYGHjx4gMqVKytcikus1PnHPCq6mPASFWH37t3DsGHD8N1336F69ep5ej7EeuF6fX19REVFwdLSEhUrVsSePXtQt25dPHjwAE5OTvJeIXWQmJgoL/sUo4sXL+Lt27do2rQp4uPj0adPH3mP788//4waNWoIHaJSmJub4+jRo3m+8N28eRMtWrTA06dPcenSJbRo0UJ0YzwNDQ0RHByMVq1aKSw/dOgQevbsiTdv3ggUmfLFxMTgxo0bSE5OhouLC+zs7IQOSel27NiB9evXY+PGjahcuTIA4O7duxg6dCiGDBmCBg0aoEePHjAzMxPVbNXv3r3DqFGjEBQUBCA36bOxscGoUaNgbm6O77//XuAIlUOdf8yjIkxGREVWeHi4rFKlSjKJRCK/aWhoyP9frJycnGShoaEymUwm8/T0lE2YMEEmk8lkK1eulJmbmwsZmlItWrRIFhwcLL//7bffyiQSiaxChQqyK1euCBgZFTY9PT3ZiRMn8iw/ceKETF9fXyaTyWT37t2TSaVSFUemfGXLlpVFRUXlWR4VFSUrU6aMABGRMtnY2MguX76cZ/mlS5dklSpVkslkMtmZM2dkZmZmKo5MuUaPHi2rXbu27NSpUzI9PT3ZvXv3ZDKZTLZv3z5ZzZo1BY5OeSpUqCC7efNmnuU3btyQVahQQSaTyWSRkZEyExMTVYdGakz8tRVExdiAAQPg4uKCHTt2qNWkVf3798fVq1fRpEkTfP/992jXrh1++uknZGZmYtmyZUKHpzRr167Ftm3bAABHjx7FsWPHcOTIEezatQuTJk3CX3/9JXCEypGVlYXQ0FDcu3cPvXr1glQqxbNnz2BgYAB9fX2hw1OKDh06YMCAAfDz80OdOnUAABEREZg4cSI6duwIALhw4QLs7e0FjFI5fHx8MHfuXGzevFl+CaL09HTMnz8fPj4+AkdXeD42ljE/Yn5fi42NlV9+6kNZWVmIi4sDAFSoUAFv375VdWhKtW/fPuzcuRNubm4Kn92Ojo6ivaY6kNubGx8fn6d6JSEhAUlJSQByJ3H799UXiJSJCS9REfbo0SMcOHAgz2VqxG7cuHHyfzdv3hy3bt1CZGQkbG1tRVvGDQBxcXHyMWwHDx5Et27d0KJFC1hbW6NevXoCR6ccjx49QsuWLRETE4P09HR88803kEql+PHHH5Geno61a9cKHaJSrFu3DuPGjUOPHj3kyUCJEiXQt29fLF++HABQpUoVbNy4UcgwleLy5csICQlBxYoV5SXrV69eRUZGBjw9PRVm5t6zZ49QYf5nly9fVrh/6dIlZGVl5RnXWLt2bSHCU5mmTZti6NCh2LhxI1xcXADkHpvhw4ejWbNmAHIv21TQa7kWFwkJCTA1Nc2zPCUlRdQ/Xqvzj3lUdDHhJSrCmjVrhqtXr6pdwvtvVlZWsLKyEjoMpStdujQeP34MCwsLHDlyBPPmzQOQe91lsczK/G9jxoyBq6srrl69ChMTE/nyTp06YfDgwQJGplz6+vrYsGEDli9fjvv37wMAbGxsFHq0a9asKVB0ymVkZIQuXbooLBPjZEUnTpyQ/3vZsmWQSqUICgpSmLipf//+or3G9nubNm1C7969Ubt2bfk8FFlZWfD09MSmTZsA5P49+Pn5CRlmoXN1dcUff/yBUaNGAYA8yd24caN8sjYxUucf86jo4qRVREXY+vXrMW/ePAwYMABOTk55Jq0Sy7VYgY9fuiI/o0ePVmIkwvHx8cHBgwdhZ2eHy5cv4+HDh9DX10dwcDAWL16MS5cuCR1ioTMxMcHZs2fh4OAAqVSKq1evwsbGBg8fPkS1atXw7t07oUMk+s/Mzc3x119/wdHRUWH5jRs30KJFCzx79kygyFTn1q1b+PvvvwEADg4Oor/c3unTp9GqVSt89913CAwMxNChQxEVFYWzZ88iLCxM9D37ycnJH/0xj0jV2MNLVIQNGzYMADBnzpw868R0LVYA8l9+P0cikYg24V2+fDmsra3x+PFjLF68WP4FITY2FiNGjBA4OuXIycnJ93X85MkTSKVSASJSjZSUFCxatAghISGIj49HTk6Owvr3XxTFKDU1FTKZDLq6ugByy9r37t2LatWqoUWLFgJHpxxJSUlISEjIszwhIUF0Y1c/pkqVKqhSpYrQYahMw4YNcfXqVSxcuBBOTk7466+/UKtWLYSHh8PJyUno8JROX19f1EOQqHhhDy8RFWnv36LEPOZJnXXv3h2GhoZYv349pFIprl27hrJly6JDhw6wtLTE5s2bhQ5RKXr27ImwsDD07t0b5cuXz/P6HjNmjECRKV+LFi3QuXNnDBs2DImJiXBwcIC2tjZevHiBZcuWYfjw4UKHWOj69OmDU6dOwc/PD3Xr1gUAnD9/HpMmTUKjRo3kl64Ri/Hjx2Pu3LnQ09P77ORdYpywKzMzE0OHDoWvr6/oxibnp3PnzggMDISBgYHCGPz8FOdx+VR8sYeXqIjKzMyEjo4Orly5gurVqwsdjspt2rQJy5cvx507dwAAdnZ2GDt2LAYNGiRwZIXrwIEDaNWqFbS0tHDgwIFPbiumEvb3/Pz84OXlhWrVqiEtLQ29evXCnTt3UKZMGezYsUPo8JTm8OHD+OOPP9CgQQOhQ1G5S5cuySs6fvvtN5iZmeHy5cvYvXs3ZsyYIcqEd+3atZg4cSJ69eqFzMxMALnjGgcOHIglS5YIHF3hu3z5sryd/56860Ni/SFTS0sLu3fvhq+vr9ChqIShoaH8XBoaGgocDVFe7OElKsJsbGywd+9e+Uym6mLGjBlYtmwZRo0aJZ/cIzw8HD/99BPGjRuXb4l3caWhoYG4uDiYmppCQ0Pjo9uJrYT9Q1lZWQgODsa1a9eQnJyMWrVqwdvbGzo6OkKHpjSVKlXCoUOHULVqVaFDUTldXV3cunULlpaW6NatGxwdHTFz5kw8fvwYDg4Ooh63nZKSIr8kTeXKlaGnpydwRKQsffv2Rc2aNRWuOkBEwmDCS1SEbdq0CXv27MGWLVtgbGwsdDgqU7ZsWfj7+6Nnz54Ky3fs2IFRo0bhxYsXAkVGVDi2bt2K/fv3IygoSD6WVV04Oztj0KBB6NSpE6pXr44jR47A3d0dkZGRaNOmjfzarETF2bx58+Dn5wdPT0/Url07z48bYp2LgqgoYsJLVIS5uLjg7t27yMzMhJWVVZ4PTDHO2gvkXrYkIiICdnZ2Csv//vtv1K1bF4mJicIEpgIhISH5TmQkkUjkl/AQmzt37uDEiRP5Tt40Y8YMgaJSLhcXF9y7dw8ymQzW1tZ5ZmAX6982kFvG3KtXL2RnZ8PT0xN//fUXAGDhwoU4efIkDh8+LHCEheNzYxk/JLZxjerc9vc+NXZXIpGIamI6FxeXApeni/m9jYoujuElKsLeX6Rd3fTu3Rtr1qzJM5nJ+vXr4e3tLVBUyjd79mzMmTMHrq6u+U5kJEYbNmzA8OHDUaZMGZiZmSm0WSKRiDbhVde/bQD49ttv0bBhQ8TGxioM1/D09ESnTp3k9588eYIKFSp8stS/KFPnsYzq3Pb3H4rZlwAANcJJREFUHjx4IHQIKqPO72dUPLCHl4iKhA9n8szKykJgYCAsLS3h5uYGIHdG05iYGPTp0wcBAQFChalU5cuXx+LFi9G7d2+hQ1EZKysrjBgxAlOmTBE6FCpiDAwMcOXKFdjY2AgdCtEXmzNnDiZOnJhnyEJqaiqWLFki2h/ziIoiJrxExUBkZCSio6MBAI6OjnBxcRE4osLXtGnTAm0nkUhw/PhxJUcjDBMTE1y4cAGVK1cWOhSVYVJDHyOVSnH16lW+NkQiPj4et2/fBgA4ODjA1NRU4IiUS1NTE7GxsXna+fLlS5iamop2EsL3Ll68KP/eUq1aNdSuXVvgiEidsaSZqAiLj49Hjx49EBoaCiMjIwBAYmIimjZtiuDgYJQtW1bYAAvRiRMnhA5BcIMGDcL27dvV5lIWANC1a1f89ddfGDZsmNChqJSGhsYnS9bF/mVYHdSqVQshISEoXbr0Z8c4inlcY1JSEkaOHIng4GD561pTUxPdu3fHqlWrRFv+LJPJ8j3nV69eFfUklE+ePEHPnj1x5swZhe8t9evXR3BwMCpWrChsgKSWmPASFWGjRo3C27dvcfPmTfnlS6KiotC3b1+MHj1a1NcpVRcflnLn5ORg/fr1OHbsGJydnfNMZPTvMc3Flb+/v/zftra28PX1xblz5+Dk5JSnzWKdyXTv3r0K9zMzM3H58mUEBQVh9uzZAkVFhalDhw4oWbIkAPUe4zh48GBcvnwZBw8eVLjM3JgxYzB06FAEBwcLHGHhKl26NCQSCSQSCezt7RWS3uzsbCQnJ4v6B75BgwYhMzMT0dHRcHBwAADcvn0b/fv3x6BBg3DkyBGBIyR1xJJmoiLM0NAQx44dQ506dRSWX7hwAS1atBD1bMXqQh1LuT81e+mHxDaTaUFs374dO3fuxP79+4UORXAsaRYHPT09/Pnnn2jYsKHC8lOnTqFly5ZISUkRKDLlCAoKgkwmw4ABA7BixQqFHmxtbW1YW1vLE38x0tHRwdmzZ/MMvYqMjESjRo1EfZ1tKrrYw0tUhOXk5OTp8QIALS2tPJdvoeJJHUu51Wn20i/l5uaGIUOGCB1GkSDWWcqTk5PzvH8bGBgIFI3ymZiY5Fu2bGhoiNKlSwsQkXL17dsXQO4Pew0aNECJEur1VdvCwgKZmZl5lmdnZ6NChQoCREQEFM+5/onURLNmzTBmzBg8e/ZMvuzp06cYN24cPD09BYyMSLUMDAxE39ubmpoKf39/mJubCx1KkSCmArQHDx6gTZs20NPTkyd6pUuXhpGRkSiTvg9Nnz4d48ePR1xcnHxZXFwcJk2aJOr5CqRSqXzSJgDYv38/OnbsiGnTpiEjI0PAyJRryZIlGDVqFC5evChfdvHiRYwZMwZLly4VMDJSZyxpJirCHj9+jPbt2+PmzZuwsLAAAMTExMDJyQkHDhzg5A+kNsRW3vp+nN97MpkMb9++ha6uLrZu3Yr27dsLGF3R8PjxY1SoUAGamppCh/KfNWjQADKZDGPGjEG5cuXy9F7/r717j8v5/v8H/rg6n0/UPjQlSjJlESNS7cv0sTWTfTVSpDlsFBofPhtN+Axz06wx8YmKTcNYTpuPQ0pOOXTAhpLU0Jxy6KDDVdfvDz/Xd9di85nevex9Pe63m9utXu931/W4hrme1+v5er18fX0FJZPGbzfpKiwsRG1tLRwcHAA8/HfM0NAQLi4ust2wq2fPnpg1axaGDRuGS5cuoUuXLggKCsKJEyfw+uuvY9myZaIjNpvf/v+sqqoKSqVSPbv96GtTU1OUl5eLiklaTLv6LIj+Ytq1a4ecnBzs379f/Umxm5sbBgwYIDgZET2LZcuWoaGhQV3M6ejowNbWFq+88goqKioEp5NWVVUVFi1ahP379+PGjRtN2nsfzeQ/+pBPDvLz83Hq1Cn1Jj5yp82bdD1SUFCAl19+GQCwefNm+Pr6YsOGDTh8+DDeeecdWRW8cnotJE8seImec+np6UhPT1e/MczNzcWGDRsAAGvXrhWcjoj+jLFjxz7xjE4nJydZH0v07rvvIjMzE6GhoWjTpo1s1+r+Ws+ePfHzzz9rTcH78ccf/9c/k5qaijfffBOmpqYSJGp5KpVK/WHOvn378MYbbwB4+EHOrVu3REZrdo/WLf83Fi1ahIkTJ6qPLiKSEgteoudYbGws5s2bBy8vL615Y0ikDZ50RmdlZSWMjIwEJGo5P/zwA3bt2oW+ffuKjtJiEhMTMXHiRFy9ehVdu3Ztshmhh4eHoGTPjwkTJuCVV16RzbIFLy8vLFiwAAMGDEBmZiZWrlwJ4OF67hdeeEFwOvE++eQTDB8+nAUvtQgWvETPsYSEBCQnJyM0NFR0FCKh5PJhz6NzlxUKBebMmQMTExP1tYaGBmRnZ6vbIOXK2toaNjY2omO0qJs3b6KoqAjh4eHqMYVCof7gQ84z+k9LblvKLFu2DCEhIUhLS8NHH30EZ2dnAMC3334Lb29vwenEk9vvNz3fWPASPcfq6ur4DyMR5PPmKDc3F8DD13PmzBkYGBiorxkYGKBbt26YPn26qHgtYv78+YiJiUFKSopGwS9nY8eOhaenJ1JTUx+7aRXJj4eHB86cOdNkfMmSJRobscmtlZvoecRdmomeYzNnzoSZmZmsj24gehqHDh1Cz549YWhoKDpKswgPD8fnn38u6/NXn8TT0xNFRUVQqVRo3759k/ZeOe7aa2pqivz8fPUsHzUlt53Yn5aFhQXy8vK07nVr6+83icEZXqLnWE1NDVavXo19+/bBw8OjyRvDuLg4QcmI/rxHbb1P49Gf8X79+kkVR4ikpCTREYTRxh18X331VRa89FicdyKSHgteoufY6dOn1ev5zp49q3GNLXH0V/WorfeRnJwcKJVK9Q62BQUF0NXVRY8ePUTEI4n9mR18/+oCAwMxbdo0nDlzBu7u7k0+vOS5y0RE0mHBS/QcO3DggOgIRM3u13+u4+LiYG5ujpSUFFhbWwMA7ty5g/DwcPj4+IiKSBKKiYmBv78/+vTpI/sdqR+ZOHEiAGDevHlNrslx06r4+HiMHz8eRkZGKC0tRbt27f7wQ1pHR8cmHwSQfPn4+MDY2Fh0DNISXMNLRETC2NvbY8+ePXjppZc0xs+ePYvXXnsN165dE5SMpDJw4EAcPXoUSqUSPXv2hK+vL/z8/NC3b1++AZYJPT09XLt2DXZ2dtDV1X3smdP0kBzXshYVFSEpKQlFRUX4/PPPYWdnhx9++AEODg5N/l9P1BJ0RAcgIiLtdf/+fdy8ebPJ+M2bN1FRUSEgEUlt7969uHv3Lvbv34/Bgwfj5MmTCAoKgpWVlezWav+33N3d8fPPP4uO8czatm2LLVu2oKSkBCqVCleuXEFpaeljf5G8ZGZmwt3dHdnZ2di6dSsqKysBAPn5+Vq5nIGeDyx4iYhImKFDhyI8PBxbt27FlStXcOXKFWzZsgUREREICgoSHY8koqenh759+2LYsGEYOnQoBg0ahMbGRpw/f150NKEuX76M+vp60TGe2ezZszF16lR06NABCoUCPXv2hJOTk8av9u3bw8nJSXTUZhUfH4+amhoAQGlp6VNtSCW3Vu5Zs2ZhwYIF2Lt3r8axa6+++iqOHTsmMBlpM7Y0ExGRMNXV1Zg+fTrWrl2rfqOvp6eHiIgILFmyhGdTytDq1auRkZGBzMxM1NbWwsfHB35+fvDz84OHh4dWb8gnp/bWiooKlJSUwMPDA/v27UOrVq0ee1+3bt1aOJl02MoNmJmZ4cyZM3ByctL483z58mV07txZ/YEAUUviplVERCSMiYkJvvzySyxZsgRFRUUAgI4dO7LQlbGJEyfC1tYWH3zwAd5//32YmZmJjkQSMDc3R9euXZGUlIS+ffvK5gzt3/OolXvw4MHqVu4nFXgODg4tnK5lWFlZoaysrMnsfW5uLuzt7QWlIm3HGV4iIiJqMWlpaTh48CAyMjJw7tw5eHp6qmd4+/XrBxMTE9ERhZHTDK82Wr16NSIjI6FUKp94j0qlkuXO3I9Mnz4d2dnZ2Lx5Mzp16oScnBxcv34dYWFhCAsL4zpeEoIFLxERCXXy5Els2rQJpaWlqKur07i2detWQamoJdy7dw9ZWVnYvHkzUlNToaOjo9Utj3IseBsaGvDZZ5898e94eXm5oGTS0MZW7l+rq6vDpEmTkJycjIaGBujp6aGhoQEjR45EcnIydHV1RUckLcSWZiIiEuabb75BWFgYBg0ahD179uC1115DQUEBrl+/jqFDh4qORxK5ffs2MjMzkZGRgYyMDPz444+wtrbm2csyFBsbi8TERHzwwQeYPXs2PvroI1y+fBlpaWmIiYkRHa/ZaWMr968ZGBjg3//+N+bMmYOzZ8+isrISnp6ecHFxER2NtBhneImISBgPDw9MmDABkyZNUs9uOTk5YcKECWjTpg1iY2NFR6Rm5u7ujnPnzsHa2hr9+/eHn58ffH194eHhITqaZC5duvRUs7YbNmzAkCFDZLWGvWPHjoiPj8frr78Oc3Nz5OXlqceOHTuGDRs2iI5IRDLHgpeIiIQxNTXFjz/+iPbt26NVq1bIyMhQF0SvvvoqysrKREekZrZixQr4+vqia9euoqO0GB0dHfj6+iIiIgJvv/02jIyMREdqMaampjh37hwcHBzQpk0b7Nq1C927d8elS5fg6emJe/fuiY4oCW1q5Y6Ojn7qe+Pi4iRMQvR4bGkmIiJhrK2tUVFRAQCwt7fH2bNn4e7ujrt376K6ulpwOpLCpEmT1F8/+sxd7kcR5eTkICkpCdHR0Zg8eTKCg4MRERGBXr16iY4muRdffBFlZWVwcHBAx44dsWfPHnTv3h0nTpyQdbuvNrVy5+bmPtV9cv97Ts8vzvASEZEwI0eOhJeXF6KjozF//nx88cUXGDJkCPbu3Yvu3btz0yqZWrduHZYsWYLCwkIAQKdOnTBjxgyEhoYKTiYtpVKJ7du3Izk5Gbt370anTp0wduxYhIaGwtbWVnQ8ScyaNQsWFhb48MMPsXHjRowaNQrt27dHaWkppk2bhkWLFomOKAm2chM9P1jwEhGRMOXl5aipqUHbtm3R2NiITz/9FEeOHIGLiwtmz54Na2tr0RGpmcXFxWHOnDmYPHky+vbtCwA4dOgQVqxYgQULFmDatGmCE0qvtrYWX375Jf75z3+irq4OBgYGGD58OBYvXow2bdqIjiepo0eP4ujRo3BxcUFgYKDoOJLR1lbue/fuoaGhATY2Nhrj5eXl0NPTg4WFhaBkpM1Y8BIREVGLcXJyQmxsLMLCwjTGU1JSMHfuXBQXFwtKJr2TJ09i7dq1+Oabb2BqaorRo0cjIiICV65cQWxsLO7fv4/jx4+LjknNwNXVFevWrcMrr7yCfv364Y033sCsWbOwceNGREZG4saNG6IjSuLvf/87AgMD8f7772uMJyQkYPv27fj+++8FJSNtpiM6ABERabeioiLMnj0bI0aMUL8J/OGHH/Djjz8KTkZSKCsrg7e3d5Nxb29v2W5SFhcXB3d3d3h7e+PatWtYt24dSkpKsGDBAjg5OcHHxwfJycnIyckRHVUS69evR9++fdG2bVuUlJQAAJYtW4Zt27YJTiadoUOHYv/+/QCAyMhIzJkzBy4uLggLC8PYsWMFp5NOdnY2/P39m4z7+fkhOztbQCIiFrxERCRQZmYm3N3dkZ2dja1bt6KyshIAkJ+fj48//lhwOpKCs7MzNm3a1GR848aNsj2rc+XKlRg5ciRKSkqQlpaGN954Azo6mm/B7OzssGbNGkEJpbNy5UpER0dj8ODBuHv3LhoaGgAAVlZWWLZsmdhwElq0aBE+/PBDAEBwcDAOHjyI9957D99++61s1y0DD9v1lUplk/H6+no8ePBAQCIitjQTEZFAffr0wf/+7/8iOjpafQ5vhw4dcPz4cQQFBeHKlSuiI1Iz27JlC4KDgzFgwAD1Gt7Dhw9j//792LRpE4YOHSo4ITWnLl264JNPPsFbb72l8Xf87Nmz8PPzw61bt0RHpGbk7++Prl274osvvtAYnzRpEk6fPo2srCxByUib8VgiIiIS5syZM4/drdTOzo5vhGVq2LBhOH78OOLi4pCWlgYAcHNzw/Hjx+Hp6Sk2nITu3r2L48eP48aNG2hsbNS49tv1zHJSXFz82N9XQ0NDVFVVCUjUctavX4+EhAQUFxfj6NGjcHR0xLJly+Dk5IQhQ4aIjieJBQsWYMCAAcjPz8f//M//AAD279+PEydOYM+ePYLTkbZiwUtERMJYWVmhrKwMTk5OGuO5ubmwt7cXlIqkUl9fjwkTJmDOnDn46quvRMdpMTt27EBISAgqKythYWGhcR6pQqGQdcHr5OSEvLw8ODo6aozv3r0bbm5uglJJb+XKlYiJicHUqVPxr3/9q0krt1wL3r59++Lo0aNYsmQJNm3aBGNjY3h4eGDNmjWyXbJAzz+2NBMRkTDTp09HdnY2Nm/ejE6dOiEnJwfXr19HWFgYwsLCuI5XhiwtLZGXl9fkQw4569SpEwYPHoxPPvkEJiYmouO0qMTERMydOxdLly5FREQEEhMTUVRUhIULFyIxMRHvvPOO6IiSYCs30fODBS8REQlTV1eHSZMmITk5GQ0NDdDT04NSqURISAiSk5Ohq6srOiI1s9GjR+Pll1/WivN2HzE1NcWZM2fQoUMH0VGE+PrrrzF37lwUFRUBANq2bYvY2FhEREQITiYdY2NjnD9/Ho6OjhoFb2FhITw8PGS1gdP9+/fV5+vev3//d+/lObwkAluaiYhIGAMDA/z73/9GTEwMzpw5g8rKSnh6erL1TcZcXFwwb948HD58GD169ICpqanG9aioKEHJpDNo0CCcPHlSawvekJAQhISEoLq6GpWVlbCzsxMdSXLa1MptbW2NsrIy2NnZwcrKSqNl/xGVSgWFQqFu7SZqSSx4iYioRUVHR//u9WPHjqm/jouLkzoOtbA1a9bAysoKp06dwqlTpzSuKRQKWRa8r7/+OmbMmIGffvoJ7u7u0NfX17j+5ptvCkrWskxMTLSmpTs6OhqTJk1CTU0NVCoVjh8/jtTUVHUrt5ykp6fDxsYGAHDgwAHBaYiaYkszERG1KH9/f43vc3JyoFQq4erqCgAoKCiArq4uevTogfT0dBERqYU8egvyuBkhOfntmbu/JsdZL09Pz6f+Pc3JyZE4jTja2MpN9DziDC8REbWoX88AxMXFwdzcHCkpKbC2tgYA3LlzB+Hh4fDx8REVkSS2Zs0afPbZZygsLATwsM156tSpePfddwUnk8ZvjyGSu7feekt0hOeCNrZyA9p7BBc9vzjDS0REwtjb22PPnj146aWXNMbPnj2L1157DdeuXROUjKQSExODuLg4REZGok+fPgCAo0ePYvny5Zg2bRrmzZsnOGHzq6mpgZGRkegYz7XU1FS8+eabTdZ001/LHx3BVV5eLjAdaSsWvEREJIy5uTl27NgBPz8/jfEDBw7gzTffREVFhZhgJBlbW1vEx8djxIgRGuOpqamIjIyU5XEtRkZG6NWrF3x9feHn5wdvb28YGxuLjvVcsbCwQF5e3l96Yy+2cmv3EVz0/GJLMxERCTN06FCEh4dj6dKl6NWrFwAgOzsbM2bMQFBQkOB0JIX6+np4eXk1Ge/RoweUSqWARNLbt28fDh48iIyMDHz22WdQKpXw8vJSF8ADBw4UHVE4Ocy/sJUbuHr1KqKioljs0nOFM7xERCRMdXU1pk+fjrVr16K+vh4AoKenh4iICCxZsoTtjTIUGRkJfX39JjtwT58+HQ8ePMCKFSsEJWsZSqUSJ06cwKpVq/D111+jsbFRdptW/Rm/PqtWm8itlTsoKAjvvPMOhg8fLjoKkRpneImISBgTExN8+eWXWLJkiXon044dO8rmzR899OujqBQKBRITE7Fnzx707t0bwMNZ/dLSUllvaFNQUICMjAz1r9raWrzxxhtN2vlJu0yYMAGvvPKKbAp9HsFFzyPO8BIREZGkfnsU1ZMoFApZHkVlb2+PBw8ewM/PD35+fvD19YWHh4fsj2P6b2jrDK/cXre2HcFFfw2c4SUiIiJJ/fooKm1ka2uL8+fP45dffsEvv/yC69ev48GDB1znSLKjbUdw0V/Dkz+GISIiIqJnlpeXh19++QWzZs1CbW0tPvzwQ7Ru3Rre3t746KOPRMd7Ljg6OjZpfyUiag5saSYiIiJqIbdv30ZGRga2bduG1NRUblql5eTW0gwAVVVVyMzMRGlpKerq6jSuRUVFCUpF2owtzUREREQS2rp1q3qzqp9++gk2Njbo168fli5dCl9fX9Hxmp21tfVTr08uLy+XOA21pNzcXAwePBjV1dWoqqqCjY0Nbt26BRMTE9jZ2bHgJSFY8BIRERFJaOLEiejfvz/Gjx8PX19fuLu7i44kqWXLlomO8Jcht1buadOmITAwEAkJCbC0tMSxY8egr6+PUaNGYcqUKaLjkZZiSzMRERERET0zKysrZGdnw9XVFVZWVjh69Cjc3NyQnZ2N0aNH4/z586IjkhbiDC8RERGRxBoaGpCWloZz584BALp06YIhQ4ZAV1dXcLKWU1NT02RNp4WFhaA0zY+t3IC+vr76aCI7OzuUlpbCzc0NlpaW+PnnnwWnI23FgpeIiIhIQhcvXsTgwYNx9epVuLq6AgAWLlyIdu3aYdeuXejYsaPghNKpqqrCzJkzsWnTJty+fbvJdTlt2MVWbsDT0xMnTpyAi4sLfH19ERMTg1u3bmH9+vXo2rWr6HikpdjSTERERCShwYMHQ6VS4euvv4aNjQ2Ah7s1jxo1Cjo6Oti1a5fghNKZNGkSDhw4gPnz5yM0NBQrVqzA1atXsWrVKixatAghISGiI1IzOnnyJCoqKuDv748bN24gLCwMR44cgYuLC9auXYtu3bqJjkhaiAUvERERkYRMTU1x7NixJptV5efno2/fvqisrBSUTHoODg5Yt24d/Pz8YGFhgZycHDg7O2P9+vVITU3F999/Lzqi5OTeyk30vNMRHYCIiIhIzgwNDVFRUdFkvLKyEgYGBgIStZzy8nL1GbMWFhbqtav9+vXDwYMHRUaTVFVVFSZPngw7OzuYmprC2tpa45fc3bhxA1lZWcjKysLNmzdFxyEtx4KXiIiISEJvvPEGxo8fj+zsbKhUKqhUKhw7dgwTJ07Em2++KTqepDp06IDi4mIAQOfOnbFp0yYAwI4dO2BlZSUwmbT+8Y9/ID09HStXroShoSESExMRGxuLtm3bYt26daLjSaaiogKhoaGwt7eHr68vfH190bZtW4waNQr37t0THY+0FFuaiYiIiCR09+5djB49Gjt27FCfuVpfX48hQ4YgKSlJ1oXfZ599Bl1dXURFRWHfvn0IDAyESqVCfX094uLiZHs2q7a2cgcHByM3NxdffPEF+vTpAwA4evQopkyZgpdffhnffPON4ISkjVjwEhEREbWAixcvqo8lcnNzg7Ozs+BELa+kpASnTp2Cs7MzPDw8RMeRjJmZGX766Sc4ODjgxRdfxNatW9GrVy8UFxfD3d1dtuu2TU1N8Z///Af9+vXTGM/KykJAQACqqqoEJSNtxmOJiIiIiJpZdHT0714/cOCA+uu4uDip4zw3HB0d4ejoKDqG5B61cjs4OKhbuXv16iX7Vu5WrVrB0tKyybilpaVWrF2m5xMLXiIiIqJmlpubq/F9Tk4OlEql+hzegoIC6OrqokePHiLiSSo+Ph7jx4+HkZER4uPjf/feqKioFkrVssLDw5Gfnw9fX1/MmjULgYGBWL58ubqVW65mz56N6OhorF+/Hn/7298AAL/88gtmzJiBOXPmCE5H2ootzUREREQSiouLQ0ZGBlJSUtSzXHfu3EF4eDh8fHzwwQcfCE7YvJycnHDy5Em0atUKTk5OT7xPoVDg0qVLLZhMHG1p5fb09MTFixdRW1sLBwcHAEBpaSkMDQ3h4uKicW9OTo6IiKSFWPASERERScje3h579uzBSy+9pDF+9uxZvPbaa7h27ZqgZETNKzY29qnv/fjjjyVMQvR/2NJMREREJKH79+8/9izSmzdvPvZ8Xrmor69H586dsXPnTri5uYmOIzm2cj99EZuamoqqqiqYmppKnIiIM7xEREREkgoLC0NWVhaWLl2KXr16AQCys7MxY8YM+Pj4ICUlRXBC6djb22Pfvn1aUfCylfvpWVhYIC8vDx06dBAdhbQAC14iIiIiCVVXV2P69OlYu3Yt6uvrAQB6enqIiIjAkiVLZD3L9cknn6CgoACJiYnQ02NjIT1kbm6O/Px8FrzUIljwEhEREbWAqqoqFBUVAQA6duwo60L3kaFDh2L//v0wMzODu7t7k9e8detWQcmko22t3H8GC15qSfyojYiIiKgFmJqaynqH3sexsrLCsGHDRMdoUfr6+qipqREdg4j+P87wEhERERE1I7Zy/z7O8FJL4t9AIiIiIpKMUqlERkYGioqKMHLkSJibm+PatWuwsLCAmZmZ6HiSOHHiBPbv3489e/ZoTSs30fOKBS8RERERSaKkpAQBAQEoLS1FbW0tBg4cCHNzcyxevBi1tbVISEgQHVES2tjK/d9wdHSEvr6+6BikJdjSTERERESSeOutt2Bubo41a9agVatW6jbWjIwMjBs3DoWFhaIjEpHMcYaXiIiIiCSRlZWFI0eOwMDAQGO8ffv2uHr1qqBULUNbWrmtra2hUCie6t7y8nKJ0xA1xYKXiIiIiCTR2NiIhoaGJuNXrlyBubm5gEQtQ5tauZctWyY6AtHvYkszEREREUkiODgYlpaWWL16NczNzXH69GnY2tpiyJAhcHBwQFJSkuiIkmArN9HzgwUvEREREUniypUrGDRoEFQqFQoLC+Hl5YXCwkK0bt0aBw8ehJ2dneiIkmjVqhWOHDkCV1dXjSN4Ll++jC5duqC6ulp0RMnV1NSgrq5OY8zCwkJQGtJmbGkmIiIiIkm8+OKLyM/Px8aNG5Gfn4/KykpEREQgJCQExsbGouNJRltbuauqqjBz5kxs2rQJt2/fbnL9cf9NiKTGGV4iIiIiksTBgwfh7e0NPT3NORalUokjR46gf//+gpJJS1tbuSdNmoQDBw5g/vz5CA0NxYoVK3D16lWsWrUKixYtQkhIiOiIpIVY8BIRERGRJHR1dVFWVtakdfn27duws7OT7YyftrZyOzg4YN26dfDz84OFhQVycnLg7OyM9evXIzU1Fd9//73oiKSF2NJMRERERJJQqVSPPbLm9u3bMDU1FZCoZWhrK3d5eTk6dOgA4OF63UfHEPXr1w/vvfeeyGikxVjwEhEREVGzCgoKAgAoFAqMGTMGhoaG6msNDQ04ffo0vL29RcWT3KNW7pCQEI02XqVSiYMHD8q2lbtDhw4oLi6Gg4MDOnfujE2bNqFXr17YsWMHrKysRMcjLcWCl4iIiIialaWlJYCHM7zm5uYas5oGBgbo3bs3xo0bJyqe5Pz9/R/byn3v3j34+/vLtpU7PDwc+fn58PX1xaxZsxAYGIjly5ejvr4ecXFxouORluIaXiIiIiKSRGxsLGbMmAETExPRUVqUjo4Orl+/DltbW43xgoICeHl54f79+4KStaySkhKcOnUKzs7O8PDwEB2HtBQLXiIiIiKSRHFxMZRKJVxcXDTGCwsLoa+vj/bt24sJJpFHrdzbtm1DQEDAY1u5XV1dsXv3blERibQOW5qJiIiISBJjxozB2LFjmxS82dnZSExMREZGhphgEtHGVu74+HiMHz8eRkZGiI+P/917o6KiWigV0f/hDC8RERERSeLXR9P82sWLF+Hl5YW7d++KCSYxbWrldnJywsmTJ9GqVSs4OTk98T6FQoFLly61YDKihzjDS0RERESSUCgUqKioaDJ+79492W7cBABhYWG4evWqVrRyFxcXP/ZroueFjugARERERCRP/fv3x8KFCzWK24aGBixcuBD9+vUTmExaY8aMwZEjR5qMZ2dnY8yYMS0fqAXU19ejY8eOOHfunOgoRBo4w0tEREREkli8eDH69+8PV1dX+Pj4AACysrJw//59pKenC04nndzcXPTt27fJeO/evTF58mQBiaSnr6+Pmpoa0TGImuAMLxERERFJokuXLjh9+jSGDx+OGzduoKKiAmFhYTh//jy6du0qOp5ktLWVe9KkSVi8eDGUSqXoKERq3LSKiIiIiKgZBQYGwtjYGKmpqdDV1QXwsJU7ODgYVVVV+OGHHwQnlMbQoUOxf/9+mJmZwd3dHaamphrXt27dKigZaTO2NBMRERGRpKqrq1FaWoq6ujqNcQ8PD0GJpKWtrdxWVlYYNmyY6BhEGjjDS0RERESSuHnzJsLDw584oynn9t5r165h+fLlyM/Ph7GxMTw8PDB58mTY2NiIjkakVVjwEhEREZEkQkJCUFJSgmXLlsHPzw/fffcdrl+/jgULFmDp0qV4/fXXRUekZqZUKpGRkYGioiKMHDkS5ubmuHbtGiwsLGBmZiY6HmkhFrxEREREJIk2bdpg27Zt6NWrFywsLHDy5El06tQJ27dvx6effopDhw6JjigpbWvlLikpQUBAAEpLS1FbW4uCggJ06NABU6ZMQW1tLRISEkRHJC3ENbxEREREJImqqirY2dkBAKytrXHz5k106tQJ7u7uyMnJEZxOOtrayj1lyhR4eXkhPz8frVq1Uo8PHToU48aNE5iMtBmPJSIiIiIiSbi6uuLChQsAgG7dumHVqlW4evUqEhIS0KZNG8HppDN16lTcvXsX2dnZMDY2xu7du5GSkgIXFxds375ddDzJZGVlYfbs2TAwMNAYb9++Pa5evSooFWk7zvASERERkSSmTJmCsrIyAMDHH3+MgIAAfPXVVzAwMEBKSorgdNJJT0/Htm3b4OXlBR0dHTg6OmLgwIGwsLDAwoULZbt2ubGx8bGz11euXIG5ubmARERcw0tERERELUClUuHBgwc4f/48HBwc0Lp1a9GRJGNhYYHTp0+jffv2cHR0xIYNG9C3b18UFxfjpZdeQnV1teiIkggODoalpSVWr14Nc3NznD59Gra2thgyZAgcHByQlJQkOiJpIbY0ExEREZFk1qxZg65du8LIyAjW1tYICwtDWlqa6FiS0tZW7qVLl+Lw4cPo0qULampqMHLkSHU78+LFi0XHIy3FGV4iIiIikkRMTAzi4uIQGRmJPn36AACOHj2K5cuXY9q0aZg3b57ghNL46quvoFQqMWbMGJw6dQoBAQG4ffu2upU7ODhYdETJKJVKbNy4Efn5+aisrET37t0REhICY2Nj0dFIS7HgJSIiIiJJ2NraIj4+HiNGjNAYT01NRWRkJG7duiUoWcvRplbugwcPwtvbG3p6mtsEKZVKHDlyBP379xeUjLQZW5qJiIiISBL19fXw8vJqMt6jRw8olUoBiVqONrZy+/v7o7y8vMn4vXv34O/vLyAREQteIiIiIpJIaGgoVq5c2WR89erVCAkJEZCoZcTExGDKlCkIDAzE5s2bsXnzZgQGBmLatGmIiYkRHU8yKpUKCoWiyfjt27dhamoqIBERW5qJiIiIqBlFR0erv1YqlUhOToaDgwN69+4NAMjOzkZpaSnCwsLwxRdfiIopKW1r5Q4KCgIAbNu2DQEBATA0NFRfa2howOnTp+Hq6ordu3eLikhajOfwEhEREVGzyc3N1fi+R48eAICioiIAQOvWrdG6dWv8+OOPLZ6tpWhbK7elpSWAhzO85ubmGhtUGRgYoHfv3hg3bpyoeKTlOMNLRERERNSMIiMjoa+vj7i4OI3x6dOn48GDB1ixYoWgZNKKjY3FjBkzYGJiIjoKkRoLXiIiIiKiZ8RWbqC4uBhKpRIuLi4a44WFhdDX10f79u3FBCOtxoKXiIiIiOgZPe0uxAqFAunp6RKnEcPX1xdjx47F6NGjNca/+uorJCYmIiMjQ0ww0moseImIiIiI6JlZWFggJycHzs7OGuMXL16El5cX7t69KyYYaTUeS0RERERERM9MoVCgoqKiyfi9e/fQ0NAgIBERZ3iJiIiIiKgZBAYGwtjYGKmpqdDV1QXw8Fii4OBgVFVV4YcffhCckLQRC14iIiIiInpmP/30E/r37w8rKyv4+PgAALKysnD//n2kp6eja9eughOSNmLBS0REREREzeLatWtYvnw58vPzYWxsDA8PD0yePBk2Njaio5GWYsFLREREREREsqQnOgAREREREclHdXU1SktLUVdXpzHu4eEhKBFpMxa8RERERET0zG7evInw8PAnbk7FnZpJBB5LREREREREz2zq1Km4e/cusrOzYWxsjN27dyMlJQUuLi7Yvn276HikpTjDS0REREREzyw9PR3btm2Dl5cXdHR04OjoiIEDB8LCwgILFy7E66+/LjoiaSHO8BIRERER0TOrqqqCnZ0dAMDa2ho3b94EALi7uyMnJ0dkNNJiLHiJiIiIiOiZubq64sKFCwCAbt26YdWqVbh69SoSEhLQpk0bwelIW/FYIiIiIiIiemZfffUVlEolxowZg1OnTiEgIAC3b9+GgYEBUlJSEBwcLDoiaSEWvERERERE1KxUKhUePHiA8+fPw8HBAa1btxYdibQUW5qJiIiIiKhZrFmzBl27doWRkRGsra0RFhaGtLQ00bFIi3GXZiIiIiIiemYxMTGIi4tDZGQk+vTpAwA4evQopk2bhtLSUsybN09wQtJGbGkmIiIiIqJnZmtri/j4eIwYMUJjPDU1FZGRkbh165agZKTN2NJMRERERETPrL6+Hl5eXk3Ge/ToAaVSKSAREQteIiIiIiJqBqGhoVi5cmWT8dWrVyMkJERAIiKu4SUiIiIioj8pOjpa/bVCoUBiYiL27NmD3r17AwCys7NRWlqKsLAwURFJy3ENLxERERER/Sn+/v5PdZ9CoUB6errEaYiaYsFLREREREREssQ1vERERERERCRLLHiJiIiIiIhIlljwEhERERERkSyx4CUiIiIiIiJZYsFLREREREREssSCl4iIiIiIiGSJBS8RERERERHJEgteIiIiIiIikiUWvERERERERCRLLHiJiIiIiIhIlljwEhERERERkSyx4CUiIiIiIiJZYsFLREREREREssSCl4iIiIiIiGSJBS8RERERERHJEgteIiIiIiIikiUWvERERERERCRLLHiJiIiIiIhIlljwEhERERERkSyx4CUiIiIiIiJZYsFLRET0nBgzZgzeeust9fd+fn6YOnVqi+fIyMiAQqHA3bt3JX0ehUKBtLQ0SZ+DiIi0GwteIiKi3zFmzBgoFAooFAoYGBjA2dkZ8+bNg1KplPy5t27divnz5z/VvS1VpNbV1aF169ZYtGjRY6/Pnz8fL7zwAurr6yXNQURE9DRY8BIREf2BgIAAlJWVobCwEB988AHmzp2LJUuWPPbeurq6ZnteGxsbmJubN9vjNQcDAwOMGjUKSUlJTa6pVCokJycjLCwM+vr6AtIRERFpYsFLRET0BwwNDfG3v/0Njo6OeO+99zBgwABs374dwP+1If/rX/9C27Zt4erqCgD4+eefMXz4cFhZWcHGxgZDhgzB5cuX1Y/Z0NCA6OhoWFlZoVWrVvjHP/4BlUql8by/bWmura3FzJkz0a5dOxgaGsLZ2Rlr1qzB5cuX4e/vDwCwtraGQqHAmDFjAACNjY1YuHAhnJycYGxsjG7duuHbb7/VeJ7vv/8enTp1grGxMfz9/TVyPk5ERAQKCgpw6NAhjfHMzExcunQJEREROHHiBAYOHIjWrVvD0tISvr6+yMnJeeJjPm6GOi8vDwqFQiPPoUOH4OPjA2NjY7Rr1w5RUVGoqqpSX//yyy/h4uICIyMjvPDCC3j77bd/97UQEZG8seAlIiL6LxkbG2vM5O7fvx8XLlzA3r17sXPnTtTX12PQoEEwNzdHVlYWDh8+DDMzMwQEBKh/bunSpUhOTsbatWtx6NAhlJeX47vvvvvd5w0LC0Nqairi4+Nx7tw5rFq1CmZmZmjXrh22bNkCALhw4QLKysrw+eefAwAWLlyIdevWISEhAT/++COmTZuGUaNGITMzE8DDwjwoKAiBgYHIy8vDu+++i1mzZv1uDnd3d/Ts2RNr167VGE9KSoK3tzc6d+6MiooKjB49GocOHcKxY8fg4uKCwYMHo6Ki4r/7j/0rRUVFCAgIwLBhw3D69Gls3LgRhw4dwuTJkwEAJ0+eRFRUFObNm4cLFy5g9+7d6N+//59+PiIikgEVERERPdHo0aNVQ4YMUalUKlVjY6Nq7969KkNDQ9X06dPV11944QVVbW2t+mfWr1+vcnV1VTU2NqrHamtrVcbGxqr//Oc/KpVKpWrTpo3q008/VV+vr69Xvfjii+rnUqlUKl9fX9WUKVNUKpVKdeHCBRUA1d69ex+b88CBAyoAqjt37qjHampqVCYmJqojR45o3BsREaEaMWKESqVSqf75z3+qunTponF95syZTR7rtxISElRmZmaqiooKlUqlUt2/f19lYmKiSkxMfOz9DQ0NKnNzc9WOHTvUYwBU33333RPz5+bmqgCoiouL1bnHjx+v8bhZWVkqHR0d1YMHD1RbtmxRWVhYqO7fv//E3EREpF04w0tERPQHdu7cCTMzMxgZGeHvf/87goODMXfuXPV1d3d3GBgYqL/Pz8/HxYsXYW5uDjMzM5iZmcHGxgY1NTUoKirCvXv3UFZWhldeeUX9M3p6evDy8npihry8POjq6sLX1/epc1+8eBHV1dUYOHCgOoeZmRnWrVuHoqIiAMC5c+c0cgBAnz59/vCxR4wYgYaGBmzatAkAsHHjRujo6CA4OBgAcP36dYwbNw4uLi6wtLSEhYUFKisrUVpa+tT5fys/Px/Jyckar2XQoEFobGxEcXExBg4cCEdHR3To0AGhoaH4+uuvUV1d/aefj4iI/vr0RAcgIiJ63vn7+2PlypUwMDBA27Ztoaen+c+nqampxveVlZXo0aMHvv766yaPZWtr+6cyGBsb/9c/U1lZCQDYtWsX7O3tNa4ZGhr+qRyPWFhY4O2330ZSUhLGjh2LpKQkDB8+HGZmZgCA0aNH4/bt2/j888/h6OgIQ0ND9OnT54mbeunoPPwMXvWrdcy/3em5srISEyZMQFRUVJOfd3BwgIGBAXJycpCRkYE9e/YgJiYGc+fOxYkTJ2BlZfVMr5eIiP6aWPASERH9AVNTUzg7Oz/1/d27d8fGjRthZ2cHCwuLx97Tpk0bZGdnq9eYKpVKnDp1Ct27d3/s/e7u7mhsbERmZiYGDBjQ5PqjGeaGhgb1WJcuXWBoaIjS0tInzgy7ubmpN+B65NixY3/8IvFw8yo/Pz/s3LkTR44c0di5+vDhw/jyyy8xePBgAA/XCt+6deuJj/Xog4CysjJYW1sDeDir/Wvdu3fHTz/99Lu/F3p6ehgwYAAGDBiAjz/+GFZWVkhPT0dQUNBTvSYiIpIXtjQTERE1s5CQELRu3RpDhgxBVlYWiouLkZGRgaioKFy5cgUAMGXKFCxatAhpaWk4f/483n///d89Q7d9+/YYPXo0xo4di7S0NPVjPmopdnR0hEKhwM6dO3Hz5k1UVlbC3Nwc06dPx7Rp05CSkoKioiLk5OTgiy++QEpKCgBg4sSJKCwsxIwZM3DhwgVs2LABycnJT/U6+/fvD2dnZ4SFhaFz587w9vZWX3NxccH69etx7tw5ZGdnIyQk5HdnqZ2dndGuXTvMnTsXhYWF2LVrF5YuXapxz8yZM3HkyBFMnjwZeXl5KCwsxLZt29SbVu3cuRPx8fHIy8tDSUkJ1q1bh8bGRvXO2UREpH1Y8BIRETUzExMTHDx4EA4ODggKCoKbmxsiIiJQU1OjnvH94IMPEBoaitGjR6NPnz4wNzfH0KFDf/dxV65cibfffhvvv/8+OnfujHHjxqmP5LG3t0dsbCxmzZqFF154QV0Ezp8/H3PmzMHChQvh5uaGgIAA7Nq1C05OTgAetgJv2bIFaWlp6NatGxISEvDJJ5881etUKBQYO3Ys7ty5g7Fjx2pcW7NmDe7cuYPu3bsjNDQUUVFRsLOze+Jj6evrIzU1FefPn4eHhwcWL16MBQsWaNzj4eGBzMxMFBQUwMfHB56enoiJiUHbtm0BAFZWVti6dSteffVVuLm5ISEhAampqXjppZee6vUQEZH8KFSq3xz6R0RERERERCQDnOElIiIiIiIiWWLBS0RERERERLLEgpeIiIiIiIhkiQUvERERERERyRILXiIiIiIiIpIlFrxEREREREQkSyx4iYiIiIiISJZY8BIREREREZEsseAlIiIiIiIiWWLBS0RERERERLLEgpeIiIiIiIhk6f8BeXPl2HDkRoYAAAAASUVORK5CYII=", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], @@ -2260,29 +2364,35 @@ } }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 163/163 [01:28<00:00, 1.84it/s]\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "[10:33:05] Load saved dataset for path\n", - "[10:33:05] Feature path transformed\n", + "[14:28:22] Feature path transformed\n", "Prediction for te_data:\n", - "array([[3.73373814e-02, 7.55886547e-03, 3.31766725e-01, ...,\n", - " 2.03554723e-02, 5.54920807e-05, 2.34885629e-05],\n", - " [9.47430134e-01, 1.18461961e-03, 3.56146842e-02, ...,\n", - " 2.40112818e-03, 1.31623001e-05, 1.04800677e-06],\n", - " [2.86391020e-01, 5.00728607e-01, 7.27913678e-02, ...,\n", - " 6.01911684e-04, 8.56203333e-05, 4.92153486e-05],\n", + "array([[1.57098308e-01, 2.81519257e-03, 5.96348643e-01, ...,\n", + " 1.08084995e-02, 1.95845146e-07, 1.42198633e-05],\n", + " [9.83384371e-01, 6.52049668e-04, 1.45791359e-02, ...,\n", + " 1.12365209e-03, 9.75986836e-07, 1.95965598e-07],\n", + " [1.68020770e-01, 3.79674375e-01, 1.86414778e-01, ...,\n", + " 1.67078048e-03, 1.21877249e-03, 3.75247910e-03],\n", " ...,\n", - " [1.48009066e-03, 1.05104391e-05, 1.81454215e-02, ...,\n", - " 1.07178465e-04, 4.83141348e-12, 1.84408755e-08],\n", - " [1.09512859e-03, 9.73159331e-06, 2.51230318e-02, ...,\n", - " 1.13380796e-04, 3.83674342e-11, 8.84786147e-08],\n", - " [6.47179200e-04, 1.02468675e-05, 1.26451282e-02, ...,\n", - " 4.21979857e-05, 4.80127786e-12, 1.27486942e-07]], dtype=float32)\n", + " [1.05072348e-03, 1.24680300e-05, 5.70231769e-03, ...,\n", + " 4.37476301e-05, 1.52421890e-07, 1.81421214e-07],\n", + " [6.52685121e-04, 4.47798493e-06, 5.04824053e-03, ...,\n", + " 2.13344283e-05, 1.52417726e-07, 1.62638599e-07],\n", + " [1.57185504e-03, 1.01540554e-05, 2.53849756e-02, ...,\n", + " 1.17763964e-04, 1.52426963e-07, 1.77946404e-07]], dtype=float32)\n", "Shape = (20814, 10)\n", - "CPU times: user 5.91 s, sys: 572 ms, total: 6.48 s\n", - "Wall time: 58.2 s\n" + "CPU times: user 55.8 s, sys: 21.6 s, total: 1min 17s\n", + "Wall time: 2min 19s\n" ] } ], @@ -2295,9 +2405,21 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 28, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_12895/1185757098.py:3: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " sub['pred_' + str(i)] = te_pred.data[:,i]\n" + ] + }, { "data": { "text/html": [ @@ -2336,72 +2458,72 @@ " \n", " 0\n", " 200001.jpg\n", - " 0.037337\n", - " 0.007559\n", - " 0.331767\n", - " 0.036282\n", - " 0.215647\n", - " 0.347311\n", - " 0.003662\n", - " 0.020355\n", - " 5.549208e-05\n", - " 2.348856e-05\n", + " 0.157098\n", + " 0.002815\n", + " 0.596349\n", + " 0.020590\n", + " 1.148577e-01\n", + " 0.095614\n", + " 0.001854\n", + " 0.010808\n", + " 1.958451e-07\n", + " 1.421986e-05\n", " \n", " \n", " 1\n", " 200002.jpg\n", - " 0.947430\n", - " 0.001185\n", - " 0.035615\n", - " 0.000533\n", - " 0.012389\n", - " 0.000408\n", - " 0.000025\n", - " 0.002401\n", - " 1.316230e-05\n", - " 1.048007e-06\n", + " 0.983384\n", + " 0.000652\n", + " 0.014579\n", + " 0.000139\n", + " 6.825896e-05\n", + " 0.000044\n", + " 0.000008\n", + " 0.001124\n", + " 9.759868e-07\n", + " 1.959656e-07\n", " \n", " \n", " 2\n", " 200003.jpg\n", - " 0.286391\n", - " 0.500729\n", - " 0.072791\n", - " 0.000385\n", - " 0.000191\n", - " 0.040129\n", - " 0.098647\n", - " 0.000602\n", - " 8.562033e-05\n", - " 4.921535e-05\n", + " 0.168021\n", + " 0.379674\n", + " 0.186415\n", + " 0.000225\n", + " 1.850213e-03\n", + " 0.036919\n", + " 0.220253\n", + " 0.001671\n", + " 1.218772e-03\n", + " 3.752479e-03\n", " \n", " \n", " 3\n", " 200004.jpg\n", - " 0.000020\n", - " 0.978054\n", - " 0.017930\n", - " 0.000190\n", - " 0.000370\n", - " 0.000252\n", - " 0.000518\n", - " 0.001085\n", - " 1.576396e-03\n", - " 4.549823e-06\n", + " 0.000013\n", + " 0.990730\n", + " 0.008530\n", + " 0.000097\n", + " 1.116415e-04\n", + " 0.000215\n", + " 0.000111\n", + " 0.000037\n", + " 1.548404e-04\n", + " 1.946677e-07\n", " \n", " \n", " 4\n", " 200005.jpg\n", - " 0.000421\n", - " 0.996886\n", - " 0.002095\n", - " 0.000037\n", - " 0.000001\n", - " 0.000042\n", - " 0.000015\n", - " 0.000387\n", - " 4.106595e-07\n", - " 1.145299e-04\n", + " 0.000340\n", + " 0.999536\n", + " 0.000031\n", + " 0.000002\n", + " 6.857538e-07\n", + " 0.000003\n", + " 0.000007\n", + " 0.000029\n", + " 5.404088e-07\n", + " 4.985940e-05\n", " \n", " \n", " ...\n", @@ -2420,72 +2542,72 @@ " \n", " 20809\n", " 203469.jpg\n", - " 0.003314\n", - " 0.000015\n", - " 0.080585\n", - " 0.898532\n", - " 0.000060\n", - " 0.017272\n", - " 0.000069\n", - " 0.000152\n", - " 1.868006e-10\n", - " 5.368335e-07\n", + " 0.003061\n", + " 0.000017\n", + " 0.041731\n", + " 0.943745\n", + " 1.648944e-04\n", + " 0.010877\n", + " 0.000146\n", + " 0.000258\n", + " 1.524480e-07\n", + " 2.509265e-07\n", " \n", " \n", " 20810\n", " 203469.jpg\n", - " 0.001965\n", - " 0.000014\n", - " 0.012343\n", - " 0.981738\n", - " 0.000065\n", - " 0.003774\n", - " 0.000024\n", - " 0.000076\n", - " 1.609716e-11\n", - " 4.625642e-08\n", + " 0.000430\n", + " 0.000003\n", + " 0.002508\n", + " 0.993409\n", + " 2.613632e-05\n", + " 0.003580\n", + " 0.000007\n", + " 0.000036\n", + " 1.524176e-07\n", + " 1.595918e-07\n", " \n", " \n", " 20811\n", " 203469.jpg\n", - " 0.001480\n", - " 0.000011\n", - " 0.018145\n", - " 0.966826\n", - " 0.000135\n", - " 0.013271\n", - " 0.000024\n", - " 0.000107\n", - " 4.831413e-12\n", - " 1.844088e-08\n", + " 0.001051\n", + " 0.000012\n", + " 0.005702\n", + " 0.989972\n", + " 5.734707e-05\n", + " 0.003144\n", + " 0.000018\n", + " 0.000044\n", + " 1.524219e-07\n", + " 1.814212e-07\n", " \n", " \n", " 20812\n", " 203469.jpg\n", - " 0.001095\n", - " 0.000010\n", - " 0.025123\n", - " 0.962685\n", - " 0.000090\n", - " 0.010848\n", - " 0.000036\n", - " 0.000113\n", - " 3.836743e-11\n", - " 8.847861e-08\n", + " 0.000653\n", + " 0.000004\n", + " 0.005048\n", + " 0.990724\n", + " 3.223727e-05\n", + " 0.003505\n", + " 0.000012\n", + " 0.000021\n", + " 1.524177e-07\n", + " 1.626386e-07\n", " \n", " \n", " 20813\n", " 203469.jpg\n", - " 0.000647\n", + " 0.001572\n", " 0.000010\n", - " 0.012645\n", - " 0.981308\n", - " 0.000062\n", - " 0.005263\n", - " 0.000022\n", - " 0.000042\n", - " 4.801278e-12\n", - " 1.274869e-07\n", + " 0.025385\n", + " 0.965282\n", + " 1.030424e-04\n", + " 0.007472\n", + " 0.000058\n", + " 0.000118\n", + " 1.524270e-07\n", + " 1.779464e-07\n", " \n", " \n", "\n", @@ -2493,36 +2615,36 @@ "" ], "text/plain": [ - " image_id pred_0 pred_1 pred_2 pred_3 pred_4 pred_5 \\\n", - "0 200001.jpg 0.037337 0.007559 0.331767 0.036282 0.215647 0.347311 \n", - "1 200002.jpg 0.947430 0.001185 0.035615 0.000533 0.012389 0.000408 \n", - "2 200003.jpg 0.286391 0.500729 0.072791 0.000385 0.000191 0.040129 \n", - "3 200004.jpg 0.000020 0.978054 0.017930 0.000190 0.000370 0.000252 \n", - "4 200005.jpg 0.000421 0.996886 0.002095 0.000037 0.000001 0.000042 \n", - "... ... ... ... ... ... ... ... \n", - "20809 203469.jpg 0.003314 0.000015 0.080585 0.898532 0.000060 0.017272 \n", - "20810 203469.jpg 0.001965 0.000014 0.012343 0.981738 0.000065 0.003774 \n", - "20811 203469.jpg 0.001480 0.000011 0.018145 0.966826 0.000135 0.013271 \n", - "20812 203469.jpg 0.001095 0.000010 0.025123 0.962685 0.000090 0.010848 \n", - "20813 203469.jpg 0.000647 0.000010 0.012645 0.981308 0.000062 0.005263 \n", + " image_id pred_0 pred_1 pred_2 pred_3 pred_4 \\\n", + "0 200001.jpg 0.157098 0.002815 0.596349 0.020590 1.148577e-01 \n", + "1 200002.jpg 0.983384 0.000652 0.014579 0.000139 6.825896e-05 \n", + "2 200003.jpg 0.168021 0.379674 0.186415 0.000225 1.850213e-03 \n", + "3 200004.jpg 0.000013 0.990730 0.008530 0.000097 1.116415e-04 \n", + "4 200005.jpg 0.000340 0.999536 0.000031 0.000002 6.857538e-07 \n", + "... ... ... ... ... ... ... \n", + "20809 203469.jpg 0.003061 0.000017 0.041731 0.943745 1.648944e-04 \n", + "20810 203469.jpg 0.000430 0.000003 0.002508 0.993409 2.613632e-05 \n", + "20811 203469.jpg 0.001051 0.000012 0.005702 0.989972 5.734707e-05 \n", + "20812 203469.jpg 0.000653 0.000004 0.005048 0.990724 3.223727e-05 \n", + "20813 203469.jpg 0.001572 0.000010 0.025385 0.965282 1.030424e-04 \n", "\n", - " pred_6 pred_7 pred_8 pred_9 \n", - "0 0.003662 0.020355 5.549208e-05 2.348856e-05 \n", - "1 0.000025 0.002401 1.316230e-05 1.048007e-06 \n", - "2 0.098647 0.000602 8.562033e-05 4.921535e-05 \n", - "3 0.000518 0.001085 1.576396e-03 4.549823e-06 \n", - "4 0.000015 0.000387 4.106595e-07 1.145299e-04 \n", - "... ... ... ... ... \n", - "20809 0.000069 0.000152 1.868006e-10 5.368335e-07 \n", - "20810 0.000024 0.000076 1.609716e-11 4.625642e-08 \n", - "20811 0.000024 0.000107 4.831413e-12 1.844088e-08 \n", - "20812 0.000036 0.000113 3.836743e-11 8.847861e-08 \n", - "20813 0.000022 0.000042 4.801278e-12 1.274869e-07 \n", + " pred_5 pred_6 pred_7 pred_8 pred_9 \n", + "0 0.095614 0.001854 0.010808 1.958451e-07 1.421986e-05 \n", + "1 0.000044 0.000008 0.001124 9.759868e-07 1.959656e-07 \n", + "2 0.036919 0.220253 0.001671 1.218772e-03 3.752479e-03 \n", + "3 0.000215 0.000111 0.000037 1.548404e-04 1.946677e-07 \n", + "4 0.000003 0.000007 0.000029 5.404088e-07 4.985940e-05 \n", + "... ... ... ... ... ... \n", + "20809 0.010877 0.000146 0.000258 1.524480e-07 2.509265e-07 \n", + "20810 0.003580 0.000007 0.000036 1.524176e-07 1.595918e-07 \n", + "20811 0.003144 0.000018 0.000044 1.524219e-07 1.814212e-07 \n", + "20812 0.003505 0.000012 0.000021 1.524177e-07 1.626386e-07 \n", + "20813 0.007472 0.000058 0.000118 1.524270e-07 1.779464e-07 \n", "\n", "[20814 rows x 11 columns]" ] }, - "execution_count": 29, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -2537,7 +2659,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -2578,72 +2700,72 @@ " \n", " 0\n", " 200001.jpg\n", - " 0.053227\n", - " 0.003212\n", - " 0.419196\n", - " 3.920345e-02\n", - " 0.310807\n", - " 0.149547\n", - " 0.004056\n", - " 0.020704\n", - " 1.978658e-05\n", - " 2.894970e-05\n", + " 0.127650\n", + " 0.001409\n", + " 0.599914\n", + " 0.017568\n", + " 0.136898\n", + " 0.106915\n", + " 0.001796\n", + " 0.007829\n", + " 8.801418e-06\n", + " 1.216593e-05\n", " \n", " \n", " 1\n", " 200002.jpg\n", - " 0.952464\n", - " 0.001455\n", - " 0.031027\n", - " 4.967228e-04\n", - " 0.008271\n", - " 0.000645\n", - " 0.000039\n", - " 0.005577\n", - " 2.432690e-05\n", - " 9.184693e-07\n", + " 0.937035\n", + " 0.000638\n", + " 0.060420\n", + " 0.000098\n", + " 0.000105\n", + " 0.000096\n", + " 0.000016\n", + " 0.001586\n", + " 6.087082e-06\n", + " 2.249314e-07\n", " \n", " \n", " 2\n", " 200003.jpg\n", - " 0.246870\n", - " 0.569154\n", - " 0.080173\n", - " 7.125317e-04\n", - " 0.001712\n", - " 0.058631\n", - " 0.041982\n", - " 0.000260\n", - " 2.588407e-04\n", - " 2.464455e-04\n", + " 0.120163\n", + " 0.523312\n", + " 0.106169\n", + " 0.000473\n", + " 0.000748\n", + " 0.042688\n", + " 0.201373\n", + " 0.002807\n", + " 1.389023e-03\n", + " 8.788786e-04\n", " \n", " \n", " 3\n", " 200004.jpg\n", - " 0.000058\n", - " 0.959784\n", - " 0.011034\n", - " 2.430165e-03\n", - " 0.003499\n", - " 0.001969\n", - " 0.013944\n", - " 0.003807\n", - " 3.462628e-03\n", - " 1.142663e-05\n", + " 0.000020\n", + " 0.888623\n", + " 0.006415\n", + " 0.001150\n", + " 0.000430\n", + " 0.004390\n", + " 0.000616\n", + " 0.001799\n", + " 9.654120e-02\n", + " 1.466518e-05\n", " \n", " \n", " 4\n", " 200005.jpg\n", - " 0.001487\n", - " 0.977418\n", - " 0.008626\n", - " 2.508827e-05\n", - " 0.000008\n", - " 0.000428\n", - " 0.000075\n", - " 0.011624\n", - " 2.858276e-06\n", - " 3.070692e-04\n", + " 0.000680\n", + " 0.998898\n", + " 0.000085\n", + " 0.000009\n", + " 0.000001\n", + " 0.000002\n", + " 0.000021\n", + " 0.000172\n", + " 1.743805e-06\n", + " 1.304403e-04\n", " \n", " \n", " ...\n", @@ -2662,72 +2784,72 @@ " \n", " 3464\n", " 203465.jpg\n", - " 0.000320\n", - " 0.005188\n", - " 0.004152\n", - " 9.781365e-01\n", - " 0.005305\n", - " 0.001066\n", - " 0.002319\n", - " 0.000410\n", - " 2.963319e-03\n", - " 1.392240e-04\n", + " 0.000224\n", + " 0.002143\n", + " 0.001514\n", + " 0.990281\n", + " 0.002657\n", + " 0.000401\n", + " 0.001074\n", + " 0.000134\n", + " 1.530934e-03\n", + " 4.091801e-05\n", " \n", " \n", " 3465\n", " 203466.jpg\n", - " 0.265845\n", - " 0.012218\n", - " 0.721779\n", - " 9.995165e-07\n", - " 0.000029\n", - " 0.000084\n", - " 0.000012\n", - " 0.000009\n", - " 2.496185e-05\n", - " 1.246285e-07\n", + " 0.250769\n", + " 0.007148\n", + " 0.741840\n", + " 0.000002\n", + " 0.000022\n", + " 0.000013\n", + " 0.000076\n", + " 0.000129\n", + " 2.629060e-07\n", + " 2.120279e-07\n", " \n", " \n", " 3466\n", " 203467.jpg\n", - " 0.926839\n", - " 0.007073\n", - " 0.001850\n", - " 2.035212e-03\n", - " 0.029714\n", - " 0.015886\n", - " 0.000108\n", - " 0.016488\n", - " 2.400547e-07\n", - " 5.314636e-06\n", + " 0.960745\n", + " 0.004105\n", + " 0.001135\n", + " 0.000646\n", + " 0.016724\n", + " 0.008584\n", + " 0.000062\n", + " 0.007749\n", + " 2.438365e-04\n", + " 6.326832e-06\n", " \n", " \n", " 3467\n", " 203468.jpg\n", - " 0.008029\n", - " 0.002945\n", - " 0.025131\n", - " 3.647061e-05\n", - " 0.000677\n", - " 0.000099\n", - " 0.004348\n", - " 0.000237\n", - " 9.584755e-01\n", - " 2.236708e-05\n", + " 0.003675\n", + " 0.001097\n", + " 0.038018\n", + " 0.000038\n", + " 0.000483\n", + " 0.000310\n", + " 0.000223\n", + " 0.000208\n", + " 9.551883e-01\n", + " 7.596347e-04\n", " \n", " \n", " 3468\n", " 203469.jpg\n", - " 0.001868\n", - " 0.000015\n", - " 0.028723\n", - " 9.598277e-01\n", - " 0.000096\n", - " 0.009313\n", - " 0.000037\n", - " 0.000120\n", - " 5.089584e-11\n", - " 1.515332e-07\n", + " 0.001372\n", + " 0.000012\n", + " 0.015432\n", + " 0.977533\n", + " 0.000086\n", + " 0.005415\n", + " 0.000046\n", + " 0.000104\n", + " 1.524300e-07\n", + " 1.962799e-07\n", " \n", " \n", "\n", @@ -2735,36 +2857,36 @@ "" ], "text/plain": [ - " image_id pred_0 pred_1 pred_2 pred_3 pred_4 \\\n", - "0 200001.jpg 0.053227 0.003212 0.419196 3.920345e-02 0.310807 \n", - "1 200002.jpg 0.952464 0.001455 0.031027 4.967228e-04 0.008271 \n", - "2 200003.jpg 0.246870 0.569154 0.080173 7.125317e-04 0.001712 \n", - "3 200004.jpg 0.000058 0.959784 0.011034 2.430165e-03 0.003499 \n", - "4 200005.jpg 0.001487 0.977418 0.008626 2.508827e-05 0.000008 \n", - "... ... ... ... ... ... ... \n", - "3464 203465.jpg 0.000320 0.005188 0.004152 9.781365e-01 0.005305 \n", - "3465 203466.jpg 0.265845 0.012218 0.721779 9.995165e-07 0.000029 \n", - "3466 203467.jpg 0.926839 0.007073 0.001850 2.035212e-03 0.029714 \n", - "3467 203468.jpg 0.008029 0.002945 0.025131 3.647061e-05 0.000677 \n", - "3468 203469.jpg 0.001868 0.000015 0.028723 9.598277e-01 0.000096 \n", + " image_id pred_0 pred_1 pred_2 pred_3 pred_4 pred_5 \\\n", + "0 200001.jpg 0.127650 0.001409 0.599914 0.017568 0.136898 0.106915 \n", + "1 200002.jpg 0.937035 0.000638 0.060420 0.000098 0.000105 0.000096 \n", + "2 200003.jpg 0.120163 0.523312 0.106169 0.000473 0.000748 0.042688 \n", + "3 200004.jpg 0.000020 0.888623 0.006415 0.001150 0.000430 0.004390 \n", + "4 200005.jpg 0.000680 0.998898 0.000085 0.000009 0.000001 0.000002 \n", + "... ... ... ... ... ... ... ... \n", + "3464 203465.jpg 0.000224 0.002143 0.001514 0.990281 0.002657 0.000401 \n", + "3465 203466.jpg 0.250769 0.007148 0.741840 0.000002 0.000022 0.000013 \n", + "3466 203467.jpg 0.960745 0.004105 0.001135 0.000646 0.016724 0.008584 \n", + "3467 203468.jpg 0.003675 0.001097 0.038018 0.000038 0.000483 0.000310 \n", + "3468 203469.jpg 0.001372 0.000012 0.015432 0.977533 0.000086 0.005415 \n", "\n", - " pred_5 pred_6 pred_7 pred_8 pred_9 \n", - "0 0.149547 0.004056 0.020704 1.978658e-05 2.894970e-05 \n", - "1 0.000645 0.000039 0.005577 2.432690e-05 9.184693e-07 \n", - "2 0.058631 0.041982 0.000260 2.588407e-04 2.464455e-04 \n", - "3 0.001969 0.013944 0.003807 3.462628e-03 1.142663e-05 \n", - "4 0.000428 0.000075 0.011624 2.858276e-06 3.070692e-04 \n", - "... ... ... ... ... ... \n", - "3464 0.001066 0.002319 0.000410 2.963319e-03 1.392240e-04 \n", - "3465 0.000084 0.000012 0.000009 2.496185e-05 1.246285e-07 \n", - "3466 0.015886 0.000108 0.016488 2.400547e-07 5.314636e-06 \n", - "3467 0.000099 0.004348 0.000237 9.584755e-01 2.236708e-05 \n", - "3468 0.009313 0.000037 0.000120 5.089584e-11 1.515332e-07 \n", + " pred_6 pred_7 pred_8 pred_9 \n", + "0 0.001796 0.007829 8.801418e-06 1.216593e-05 \n", + "1 0.000016 0.001586 6.087082e-06 2.249314e-07 \n", + "2 0.201373 0.002807 1.389023e-03 8.788786e-04 \n", + "3 0.000616 0.001799 9.654120e-02 1.466518e-05 \n", + "4 0.000021 0.000172 1.743805e-06 1.304403e-04 \n", + "... ... ... ... ... \n", + "3464 0.001074 0.000134 1.530934e-03 4.091801e-05 \n", + "3465 0.000076 0.000129 2.629060e-07 2.120279e-07 \n", + "3466 0.000062 0.007749 2.438365e-04 6.326832e-06 \n", + "3467 0.000223 0.000208 9.551883e-01 7.596347e-04 \n", + "3468 0.000046 0.000104 1.524300e-07 1.962799e-07 \n", "\n", "[3469 rows x 11 columns]" ] }, - "execution_count": 30, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -2776,7 +2898,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -2796,7 +2918,7 @@ "Length: 3469, dtype: object" ] }, - "execution_count": 31, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -2808,7 +2930,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "metadata": { "execution": { "iopub.execute_input": "2022-06-09T09:09:41.251065Z", @@ -2922,7 +3044,7 @@ "[3469 rows x 2 columns]" ] }, - "execution_count": 32, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -2996,7 +3118,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.8.17" }, "vscode": { "interpreter": { From a2680de221516d0795d4b90a3504f1093da67a81 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Thu, 3 Aug 2023 15:04:45 +0000 Subject: [PATCH 07/49] new example --- examples/tutorials/Tutorial_8_CV_preset.ipynb | 233 ++++++++++++++++++ 1 file changed, 233 insertions(+) diff --git a/examples/tutorials/Tutorial_8_CV_preset.ipynb b/examples/tutorials/Tutorial_8_CV_preset.ipynb index 8a946476..18f7c68a 100644 --- a/examples/tutorials/Tutorial_8_CV_preset.ipynb +++ b/examples/tutorials/Tutorial_8_CV_preset.ipynb @@ -3055,6 +3055,239 @@ "sub[['image_id', 'label']]" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "No we can choose another model from timm. So we will use resnet50.a1_in1k, by default it uses vit_base_patch16_224.augreg_in21k" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "automl = TabularCVAutoML(task = task,\n", + " timeout=5 * 3600,\n", + " autocv_features={\"embed_model\": 'timm/tf_efficientnetv2_b0.in1k'},\n", + " cpu_limit = 2,\n", + " reader_params = {'cv': 5, 'random_state': 42})" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[14:37:43] Stdout logging level is INFO3.\n", + "[14:37:43] Task: multiclass\n", + "\n", + "[14:37:43] Start automl preset with listed constraints:\n", + "[14:37:43] - time: 18000.00 seconds\n", + "[14:37:43] - CPU: 2 cores\n", + "[14:37:43] - memory: 16 GB\n", + "\n", + "[14:37:43] \u001b[1mTrain data shape: (114477, 5)\u001b[0m\n", + "\n", + "[14:37:43] Layer \u001b[1m1\u001b[0m train process start. Time left 17999.80 secs\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2795cb31118c42a8a3c0753468f54c4a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading model.safetensors: 0%| | 0.00/28.8M [00:00 Date: Mon, 7 Aug 2023 08:47:09 +0000 Subject: [PATCH 08/49] chnged autonlp params --- lightautoml/automl/presets/text_config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightautoml/automl/presets/text_config.yml b/lightautoml/automl/presets/text_config.yml index 0d3b9d37..45886d1c 100755 --- a/lightautoml/automl/presets/text_config.yml +++ b/lightautoml/automl/presets/text_config.yml @@ -282,7 +282,7 @@ autonlp_params: # 'pooled_bert' - embeddings from pooled bert output # 'wat' - weighted average transformers # borep and random_lstm: https://arxiv.org/abs/1901.10444 - model_name: 'random_lstm_bert' + model_name: 'pooled_bert' # dict with params of random_lstm, bert_embedder, borep or wat # check corresponding classes for details From 81c444f6120196dc631dcbb57565461bac2bb784 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Thu, 10 Aug 2023 10:02:44 +0000 Subject: [PATCH 09/49] add autoint --- lightautoml/automl/presets/image_config.yml | 2 + lightautoml/automl/presets/tabular_config.yml | 8 +- lightautoml/automl/presets/tabular_presets.py | 2 + lightautoml/automl/presets/text_config.yml | 2 + .../automl/presets/time_series_config.yml | 2 + lightautoml/ml_algo/dl_model.py | 51 ++- .../torch_based/autoint/autoint_utils.py | 307 ++++++++++++++++ .../ml_algo/torch_based/autoint/ghost_norm.py | 79 ++++ lightautoml/ml_algo/torch_based/nn_models.py | 208 ++++++++++- lightautoml/text/nn_model.py | 337 +++++++++++++++++- 10 files changed, 964 insertions(+), 34 deletions(-) create mode 100644 lightautoml/ml_algo/torch_based/autoint/autoint_utils.py create mode 100644 lightautoml/ml_algo/torch_based/autoint/ghost_norm.py diff --git a/lightautoml/automl/presets/image_config.yml b/lightautoml/automl/presets/image_config.yml index b937ae83..01c04b30 100755 --- a/lightautoml/automl/presets/image_config.yml +++ b/lightautoml/automl/presets/image_config.yml @@ -242,6 +242,8 @@ nn_params: # Look for NN train params here. # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn'] or custom torch model model: denselight + # embedding_size if needed + embedding_size: 10 # use model with custom embeddings model_with_emb: false # tune custom network diff --git a/lightautoml/automl/presets/tabular_config.yml b/lightautoml/automl/presets/tabular_config.yml index 0560d046..691609f4 100755 --- a/lightautoml/automl/presets/tabular_config.yml +++ b/lightautoml/automl/presets/tabular_config.yml @@ -128,6 +128,8 @@ nn_params: # Look for NN train params here. # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn'] or custom torch model model: denselight + # embedding_size if needed + embedding_size: 10 # use model with custom embeddings model_with_emb: false # tune custom network @@ -146,7 +148,7 @@ nn_params: # add fc layer before model with certain dim num_init_features: null # activation function (str in torch.nn activation functions or custom nn.Module) - act_fun: ReLU + act_fun: LeakyReLU # add noise after dropout layer for more regularization use_noise: false # noise parameter @@ -154,7 +156,7 @@ nn_params: # use BatchNorm use_bn: true # define hidden layer dimensions for models in ['mlp', 'denselight', 'snn'] - hidden_size: [512, 512, 512] + hidden_size: [512, 256, 128, 64] # dim of intermediate fc is increased times this factor in ResnetModel layer hid_factor: [2, 2] # list of number of layers within each DenseModel block @@ -178,7 +180,7 @@ nn_params: # scheduler sch: ReduceLROnPlateau # params of ReduceLROnPlateau scheduler - scheduler_params: { 'patience': 5, 'factor': 0.5, 'min_lr': 0.00001 } + scheduler_params: {} #{ 'patience': 5, 'factor': 0.5, 'min_lr': 0.00001 } # using snapshot ensembles # https://arxiv.org/abs/1704.00109 is_snap: false diff --git a/lightautoml/automl/presets/tabular_presets.py b/lightautoml/automl/presets/tabular_presets.py index f4d1101d..cf4ba8fe 100755 --- a/lightautoml/automl/presets/tabular_presets.py +++ b/lightautoml/automl/presets/tabular_presets.py @@ -607,6 +607,8 @@ def create_automl(self, **fit_args): "linear_layer", "_linear_layer", "node", + "autoint", + "autoint_emb_v2", ] available_nn_models = available_nn_models + [x + "_tuned" for x in available_nn_models] nn_models = [ diff --git a/lightautoml/automl/presets/text_config.yml b/lightautoml/automl/presets/text_config.yml index 45886d1c..14d9c3f1 100755 --- a/lightautoml/automl/presets/text_config.yml +++ b/lightautoml/automl/presets/text_config.yml @@ -120,6 +120,8 @@ linear_l2_params: # params for NN model nn_params: + # embedding_size if needed + embedding_size: 10 # early stopping and scheduler use metric stop_by_metric: False random_state: 42 diff --git a/lightautoml/automl/presets/time_series_config.yml b/lightautoml/automl/presets/time_series_config.yml index 0b4e84b1..2e4cbdb7 100644 --- a/lightautoml/automl/presets/time_series_config.yml +++ b/lightautoml/automl/presets/time_series_config.yml @@ -132,6 +132,8 @@ nn_params: # Look for NN train params here. # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn'] or custom torch model model: denselight + # embedding_size if needed + embedding_size: 10 # use model with custom embeddings model_with_emb: false # tune custom network diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index 8db9d7db..8b968f35 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -1,5 +1,6 @@ """Neural net for tabular datasets.""" + from lightautoml.utils.installation import __validate_extra_deps @@ -43,7 +44,7 @@ from ..ml_algo.base import TabularDataset from ..ml_algo.base import TabularMLAlgo from ..pipelines.utils import get_columns_by_role -from ..text.nn_model import CatEmbedder +from ..text.nn_model import CatEmbedder, DefaultEmbedding, DenseEmbedding, LinearEmbedding, BasicEmbedding from ..text.nn_model import ContEmbedder from ..text.nn_model import TextBert from ..text.nn_model import TorchUniversalModel @@ -63,6 +64,7 @@ from .torch_based.nn_models import LinearLayer from .torch_based.nn_models import ResNetModel from .torch_based.nn_models import _LinearLayer +from .torch_based.nn_models import AutoInt logger = logging.getLogger(__name__) @@ -76,6 +78,32 @@ "_linear_layer": _LinearLayer, "snn": SNN, "node": NODE, + "autoint": AutoInt, + "autoint_emb_v2": AutoInt, +} +cat_embedder_by_name = { + "denselight": CatEmbedder, + "dense": CatEmbedder, + "resnet": CatEmbedder, + "mlp": CatEmbedder, + "linear_layer": CatEmbedder, + "_linear_layer": CatEmbedder, + "snn": CatEmbedder, + "node": CatEmbedder, + "autoint": BasicEmbedding, + "autoint_emb_v2": DefaultEmbedding, +} +cont_embedder_params_by_name = { + "denselight": ContEmbedder, + "dense": ContEmbedder, + "resnet": ContEmbedder, + "mlp": ContEmbedder, + "linear_layer": ContEmbedder, + "_linear_layer": ContEmbedder, + "snn": ContEmbedder, + "node": ContEmbedder, + "autoint": LinearEmbedding, + "autoint_emb_v2": DenseEmbedding, } @@ -245,23 +273,29 @@ def _infer_params(self): if isinstance(params[p_name], str): params[p_name] = getattr(module, params[p_name]) + # params = self._select_params(params) model = Trainer( net=TorchUniversalModel if not params["model_with_emb"] else params["model"], net_params={ "task": self.task, - "cont_embedder": ContEmbedder if is_cont else None, + "cont_embedder": cont_embedder_params_by_name[params["model"]] if is_cont else None, "cont_params": { - "num_dims": params["cont_dim"], + "num_dims": params["num_dims"], "input_bn": params["input_bn"], + "device": params["device"], + "embedding_size": params["embedding_size"], } if is_cont else None, - "cat_embedder": CatEmbedder if is_cat else None, + "cat_embedder": cat_embedder_by_name[params["model"]] if is_cat else None, "cat_params": { + "cat_vc": params["cat_vc"], "cat_dims": params["cat_dims"], "emb_dropout": params["emb_dropout"], "emb_ratio": params["emb_ratio"], "max_emb_size": params["max_emb_size"], + "embedding_size": params["embedding_size"], + "device": params["device"], } if is_cat else None, @@ -350,6 +384,7 @@ def _init_params_on_input(self, train_valid_iterator) -> dict: # Cat_features are needed to be preprocessed with LE, where 0 = not known category valid = train_valid_iterator.get_validation_data() + cat_value_counts = [] for cat_feature in new_params["cat_features"]: num_unique_categories = ( max( @@ -358,18 +393,20 @@ def _init_params_on_input(self, train_valid_iterator) -> dict: ) + 1 ) + values, counts = np.unique(train_valid_iterator.train[:, cat_feature].data, return_counts=True) + cat_value_counts.append(dict(zip(values, counts))) cat_dims.append(num_unique_categories) new_params["cat_dims"] = cat_dims - + new_params["cat_vc"] = cat_value_counts new_params["cont_features"] = get_columns_by_role(train_valid_iterator.train, "Numeric") - new_params["cont_dim"] = len(new_params["cont_features"]) + new_params["num_dims"] = len(new_params["cont_features"]) new_params["text_features"] = get_columns_by_role(train_valid_iterator.train, "Text") new_params["bias"] = self.get_mean_target(target, task_name) if params["init_bias"] else None logger.debug(f'number of text features: {len(new_params["text_features"])} ') logger.debug(f'number of categorical features: {len(new_params["cat_features"])} ') - logger.debug(f'number of continuous features: {new_params["cont_dim"]} ') + logger.debug(f'number of continuous features: {new_params["num_dims"]} ') return new_params diff --git a/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py b/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py new file mode 100644 index 00000000..c14944f5 --- /dev/null +++ b/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py @@ -0,0 +1,307 @@ +"""PyTorch modules for the AutoInt model.""" +# Paper: https://arxiv.org/pdf/1810.11921v2.pdf +# Official implementation: https://github.com/DeepGraphLearning/RecommenderSystems + +from collections import namedtuple +from typing import Optional, Type, Union +import torch +from torch import nn, Tensor +from torch.nn import functional as F + + +EmbeddingInfo = namedtuple("EmbeddingInfo", ["num_fields", "output_size"]) +UniformEmbeddingInfo = namedtuple("EmbeddingInfo", ["num_fields", "embedding_size", "output_size"]) + +MODULE_INIT_DOC = """ +Parameters +---------- +output_size : int + number of final output values; i.e., number of targets for + regression or number of classes for classification +embedding_num : EmbeddingBase or None + initialized and fit embedding for numeric fields +embedding_cat : EmbeddingBase or None + initialized and fit embedding for categorical fields +embedding_l1_reg : float, optional + value for l1 regularization of embedding vectors; default is 0.0 +embedding_l2_reg : float, optional + value for l2 regularization of embedding vectors; default is 0.0 +{} +mlp_hidden_sizes : int or iterable of int, optional + sizes for the linear transformations between the MLP input and + the output size needed based on the target; default is (512, 256, 128, 64) +mlp_activation : subclass of torch.nn.Module (uninitialized), optional + default is nn.LeakyReLU +mlp_use_bn : boolean, optional + whether to use batch normalization between MLP linear layers; + default is True +mlp_bn_momentum : float, optional + only used if `mlp_use_bn` is True; default is 0.01 +mlp_ghost_batch : int or None, optional + only used if `mlp_use_bn` is True; size of batch in "ghost batch norm"; + if None, normal batch norm is used; defualt is None +mlp_dropout : float, optional + whether and how much dropout to use between MLP linear layers; + `0.0 <= mlp_dropout < 1.0`; default is 0.0 +mlp_use_skip : boolean, optional + use a side path in the MLP containing just the optional leaky gate + plus single linear layer; default is True +mlp_l1_reg : float, optional + value for l1 regularization of MLP weights; default is 0.0 +mlp_l2_reg : float, optional + value for l2 regularization of MLP weights; default is 0.0 +use_leaky_gate : boolean, optional + whether to include "leaky gate" layers; default is True +loss_fn : "auto" or PyTorch loss function, optional + default is "auto" +device : string or torch.device, optional + default is "cpu" + +""" + + +class LeakyGate(nn.Module): + """LeakyGate from https://github.com/jrfiedler/xynn. + + This performs an element-wise linear transformation followed by a chosen + activation; the default activation is nn.LeakyReLU. Fields may be + represented by individual values or vectors of values (i.e., embedded). + + Input needs to be shaped like (num_rows, num_fields) or + (num_rows, num_fields, embedding_size) + + Args: + input_size: input_size. + bias: if to use bias. + activation: activation function. + device: device. + """ + + def __init__( + self, + input_size: int, + bias: bool = True, + activation: Type[nn.Module] = nn.LeakyReLU, + device: Union[str, torch.device] = "cpu", + ): + super().__init__() + self.weight = nn.Parameter(torch.normal(mean=0, std=1.0, size=(1, input_size))) + self.bias = nn.Parameter(torch.zeros(size=(1, input_size)), requires_grad=bias) + self.activation = activation() + self.to(device) + + def forward(self, X: Tensor) -> Tensor: + """Transform the input tensor. + + Args: + X : torch.Tensor + + Returns: + torch.Tensor + """ + out = X + if len(X.shape) > 2: + out = out.reshape((X.shape[0], -1)) + out = out * self.weight + self.bias + if len(X.shape) > 2: + out = out.reshape(X.shape) + out = self.activation(out) + return out + + +def _initialized_tensor(*sizes): + weight = nn.Parameter(torch.Tensor(*sizes)) + nn.init.kaiming_uniform_(weight) + return weight + + +class AttnInteractionLayer(nn.Module): + """The attention interaction layer for the AutoInt model. + + Paper for the original AutoInt model: https://arxiv.org/pdf/1810.11921v2.pdf + + Args: + field_input_size : int + original embedding size for each field + field_output_size : int, optional + embedding size after transformation; default is 8 + num_heads : int, optional + number of attention heads; default is 2 + activation : subclass of torch.nn.Module or None, optional + applied to the W tensors; default is None + use_residual : bool, optional + default is True + dropout : float, optional + default is 0.1 + normalize : bool, optional + default is True + ghost_batch_size : int or None, optional + only used if `use_bn` is True; size of batch in "ghost batch norm"; + if None, normal batch norm is used; defualt is None + device : string or torch.device, optional + default is "cpu" + + """ + + def __init__( + self, + field_input_size: int, + field_output_size: int = 8, + num_heads: int = 2, + activation: Optional[Type[nn.Module]] = None, + use_residual: bool = True, + dropout: float = 0.1, + normalize: bool = True, + ghost_batch_size: Optional[int] = None, + device: Union[str, torch.device] = "cpu", + ): + super().__init__() + + self.use_residual = use_residual + + self.W_q = _initialized_tensor(field_input_size, field_output_size, num_heads) + self.W_k = _initialized_tensor(field_input_size, field_output_size, num_heads) + self.W_v = _initialized_tensor(field_input_size, field_output_size, num_heads) + + if use_residual: + self.W_r = _initialized_tensor(field_input_size, field_output_size * num_heads) + else: + self.W_r = None + + if activation: + self.w_act = activation() + else: + self.w_act = nn.Identity() + + if dropout > 0.0: + self.dropout = nn.Dropout(dropout) + else: + self.dropout = nn.Identity() + + if normalize: + self.layer_norm = nn.LayerNorm(field_output_size * num_heads) + else: + self.layer_norm = nn.Identity() + + self.to(device) + + def forward(self, x: Tensor) -> Tensor: + """Transform the input tensor with attention interaction. + + Args: + x : torch.Tensor + 3-d tensor; for example, embedded numeric and/or categorical values, + or the output of a previous attention interaction layer + + Returns: + torch.Tensor + + """ + # R : # rows + # F, D : # fields + # I : field embedding size in + # O : field embedding size out + # H : # heads + num_rows, num_fields, _ = x.shape # R, F, I + + # (R, F, I) * (I, O, H) -> (R, F, O, H) + qrys = torch.tensordot(x, self.w_act(self.W_q), dims=([-1], [0])) + keys = torch.tensordot(x, self.w_act(self.W_k), dims=([-1], [0])) + vals = torch.tensordot(x, self.w_act(self.W_v), dims=([-1], [0])) + if self.use_residual: + rsdl = torch.tensordot(x, self.w_act(self.W_r), dims=([-1], [0])) + + product = torch.einsum("rdoh,rfoh->rdfh", qrys, keys) # (R, F, F, H) + + alpha = F.softmax(product, dim=2) # (R, F, F, H) + alpha = self.dropout(alpha) + + # (R, F, F, H) * (R, F, O, H) -> (R, F, O, H) + out = torch.einsum("rfdh,rfoh->rfoh", alpha, vals) + out = out.reshape((num_rows, num_fields, -1)) # (R, F, O * H) + if self.use_residual: + out = out + rsdl # (R, F, O * H) + out = F.leaky_relu(out) + out = self.layer_norm(out) + + return out + + +class AttnInteractionBlock(nn.Module): + """A collection of AttnInteractionLayers, followed by an optional "leaky gate" and then a linear layer. + + This block is originally for the AutoInt model. + + Code from: https://github.com/jrfiedler/xynn + + Args: + field_input_size : int + original embedding size for each field + field_output_size : int, optional + embedding size after transformation; default is 8 + num_layers : int, optional + number of attention layers; default is 3 + num_heads : int, optional + number of attention heads per layer; default is 2 + activation : subclass of torch.nn.Module or None, optional + applied to the W tensors; default is None + use_residual : bool, optional + default is True + dropout : float, optional + default is 0.0 + normalize : bool, optional + default is True + ghost_batch_size : int or None, optional + only used if `use_bn` is True; size of batch in "ghost batch norm"; + if None, normal batch norm is used; defualt is None + device : string or torch.device, optional + default is "cpu" + """ + + def __init__( + self, + field_input_size: int, + field_output_size: int = 8, + num_layers: int = 3, + num_heads: int = 2, + activation: Optional[Type[nn.Module]] = None, + use_residual: bool = True, + dropout: float = 0.1, + normalize: bool = True, + ghost_batch_size: Optional[int] = None, + device: Union[str, torch.device] = "cpu", + ): + super().__init__() + + layers = [] + for _ in range(num_layers): + layers.append( + AttnInteractionLayer( + field_input_size, + field_output_size, + num_heads, + activation, + use_residual, + dropout, + normalize, + ghost_batch_size, + device, + ) + ) + field_input_size = field_output_size * num_heads + + self.layers = nn.Sequential(*layers) + self.to(device) + + def forward(self, x: Tensor) -> Tensor: + """Transform the input tensor. + + Args: + x : torch.Tensor + 3-d tensor, usually embedded numeric and/or categorical values + + Returns: + torch.Tensor + """ + out = self.layers(x) + return out diff --git a/lightautoml/ml_algo/torch_based/autoint/ghost_norm.py b/lightautoml/ml_algo/torch_based/autoint/ghost_norm.py new file mode 100644 index 00000000..da0cbc2a --- /dev/null +++ b/lightautoml/ml_algo/torch_based/autoint/ghost_norm.py @@ -0,0 +1,79 @@ +"""Module for Ghost Batch Norm and variations. + +Ghost Batch Norm: https://arxiv.org/pdf/1705.08741.pdf + +""" + +from math import ceil +from typing import Union + +import torch +from torch import Tensor +from torch import nn + + +class GhostNorm(nn.Module): + """Ghost Normalization. + + https://arxiv.org/pdf/1705.08741.pdf + + Args: + inner_norm : torch.nn.Module (initialiezd) + examples: `nn.BatchNorm1d`, `nn.LayerNorm` + virtual_batch_size : int + device : string or torch.device, optional + default is "cpu" + """ + + def __init__( + self, + inner_norm: nn.Module, + virtual_batch_size: int, + device: Union[str, torch.device] = "cpu", + ): + super().__init__() + self.virtual_batch_size = virtual_batch_size + self.inner_norm = inner_norm + self.to(device) + + def forward(self, x: Tensor) -> Tensor: + """Transform the input tensor. + + Args: + x : torch.Tensor + + Returns: + torch.Tensor + + """ + chunk_size = int(ceil(x.shape[0] / self.virtual_batch_size)) + chunk_norm = [self.inner_norm(chunk) for chunk in x.chunk(chunk_size, dim=0)] + return torch.cat(chunk_norm, dim=0) + + +class GhostBatchNorm(GhostNorm): + """Ghost Normalization, using BatchNorm1d as inner normalization. + + https://arxiv.org/pdf/1705.08741.pdf + + Args: + num_features : int + virtual_batch_size : int, optional + default is 64 + momentum : float, optional + default is 0.1 + device : string or torch.device, optional + default is "cpu" + """ + + def __init__( + self, + num_features: int, + virtual_batch_size: int = 64, + momentum: float = 0.1, + device: Union[str, torch.device] = "cpu", + ): + super().__init__( + inner_norm=nn.BatchNorm1d(num_features, momentum=momentum), + virtual_batch_size=virtual_batch_size, + ) diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index 119e0779..291a2587 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -1,13 +1,15 @@ """Torch models.""" from collections import OrderedDict -from typing import List +from typing import List, Tuple, Type from typing import Optional from typing import Union import numpy as np import torch import torch.nn as nn +from lightautoml.ml_algo.torch_based.autoint.autoint_utils import AttnInteractionBlock, LeakyGate +from lightautoml.ml_algo.torch_based.autoint.ghost_norm import GhostBatchNorm from lightautoml.ml_algo.torch_based.node_nn_model import DenseODSTBlock from lightautoml.ml_algo.torch_based.node_nn_model import Lambda @@ -80,20 +82,28 @@ def __init__( use_bn: bool = True, use_noise: bool = False, device: torch.device = torch.device("cuda:0"), + bn_momentum: float = 0.1, + ghost_batch: Optional[int] = None, **kwargs, ): super(DenseLightBlock, self).__init__() self.features = nn.Sequential(OrderedDict([])) - + self.features.add_module("dense", nn.Linear(n_in, n_out, bias=(not use_bn))) if use_bn: - self.features.add_module("norm", nn.BatchNorm1d(n_in)) + if ghost_batch is None: + self.features.add_module("norm", nn.BatchNorm1d(n_out, momentum=bn_momentum)) + else: + self.features.add_module("norm", GhostBatchNorm(n_out, ghost_batch, momentum=bn_momentum)) + + self.features.add_module("act", act_fun()) + if drop_rate: self.features.add_module("dropout", nn.Dropout(p=drop_rate)) if use_noise: self.features.add_module("noise", GaussianNoise(noise_std, device)) - self.features.add_module("dense", nn.Linear(n_in, n_out)) - self.features.add_module("act", act_fun()) + # self.features.add_module("dense", nn.Linear(n_in, n_out)) + # self.features.add_module("act", act_fun()) def forward(self, x: torch.Tensor) -> torch.Tensor: """Forward-pass.""" @@ -115,9 +125,14 @@ class DenseLightModel(nn.Module): num_init_features: If not none add fc layer before model with certain dim. use_bn: Use BatchNorm. use_noise: Use noise. - concat_input: Concatenate input to all hidden layers. + concat_input: Concatenate input to all hidden layers. # MLP False + dropout_first: Use dropout in the first layer or not. + bn_momentum: BatchNorm momentum + ghost_batch: If not none use GhoastNorm with ghost_batch. + leaky_gate: Use LeakyGate or not. + use_skip: Use another Linear model to blend them after. + weighted_sum: Use weighted blender or half-half. device: Device to compute on. - """ def __init__( @@ -129,21 +144,32 @@ def __init__( 750, ], drop_rate: Union[float, List[float]] = 0.1, - act_fun: nn.Module = nn.ReLU, + act_fun: nn.Module = nn.LeakyReLU, noise_std: float = 0.05, num_init_features: Optional[int] = None, use_bn: bool = True, use_noise: bool = False, concat_input: bool = True, + dropout_first: bool = True, + bn_momentum: float = 0.1, + ghost_batch: Optional[int] = 64, + leaky_gate: bool = True, + use_skip: bool = True, + weighted_sum: bool = True, device: torch.device = torch.device("cuda:0"), **kwargs, ): super(DenseLightModel, self).__init__() + if isinstance(hidden_size, int): + hidden_size = [hidden_size] + if isinstance(drop_rate, float): - drop_rate = [drop_rate] * len(hidden_size) + drop_rate = [drop_rate] * (len(hidden_size) + (1 if dropout_first else 0)) - assert len(hidden_size) == len(drop_rate), "Wrong number hidden_sizes/drop_rates. Must be equal." + assert ( + len(hidden_size) == len(drop_rate) if not dropout_first else 1 + len(hidden_size) == len(drop_rate) + ), "Wrong number hidden_sizes/drop_rates. Must be equal." self.concat_input = concat_input num_features = n_in if num_init_features is None else num_init_features @@ -152,6 +178,13 @@ def __init__( if num_init_features is not None: self.features.add_module("dense0", nn.Linear(n_in, num_features)) + if leaky_gate: + self.features.add_module("leakygate0", LeakyGate(n_in)) + + if dropout_first and drop_rate[0] > 0: + self.features.add_module("dropout0", nn.Dropout(drop_rate[0])) + drop_rate = drop_rate[1:] + for i, hid_size in enumerate(hidden_size): block = DenseLightBlock( n_in=num_features, @@ -162,6 +195,8 @@ def __init__( use_bn=use_bn, use_noise=use_noise, device=device, + bn_momentum=bn_momentum, + ghost_batch=ghost_batch, ) self.features.add_module("denseblock%d" % (i + 1), block) @@ -172,16 +207,35 @@ def __init__( num_features = hidden_size[-1] self.fc = nn.Linear(num_features, n_out) + self.use_skip = use_skip + if use_skip: + skip_linear = nn.Linear(n_in, n_out) + if leaky_gate: + self.skip_layers = nn.Sequential(LeakyGate(n_in), skip_linear) + else: + self.skip_layers = skip_linear + if weighted_sum: + self.mix = nn.Parameter(torch.tensor([0.0])) + else: + self.mix = torch.tensor([0.0], device=device) + else: + self.skip_layers = None + self.mix = None - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, X: torch.Tensor) -> torch.Tensor: """Forward-pass.""" + x = X input = x.detach().clone() for name, layer in self.features.named_children(): if name != "denseblock1" and name != "dense0" and self.concat_input: x = torch.cat([x, input], 1) x = layer(x) - x = self.fc(x) - return x + out = self.fc(x) + if self.use_skip: + mix = torch.sigmoid(self.mix) + skip_out = self.skip_layers(X) + out = mix * skip_out + (1 - mix) * out + return out class MLP(DenseLightModel): @@ -197,8 +251,13 @@ class MLP(DenseLightModel): num_init_features: If not none add fc layer before model with certain dim. use_bn: Use BatchNorm. use_noise: Use noise. + dropout_first: Use dropout in the first layer or not. + bn_momentum: BatchNorm momentum + ghost_batch: If not none use GhoastNorm with ghost_batch. + leaky_gate: Use LeakyGate or not. + use_skip: Use another Linear model to blend them after. + weighted_sum: Use weighted blender or half-half. device: Device to compute on. - """ def __init__(self, *args, **kwargs): @@ -796,3 +855,124 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: x = self.features1(x) x = self.features2(x) return x.view(x.shape[0], -1) + + +class AutoInt(nn.Module): + """The NODE model from https://github.com/Qwicen. + + Args: + n_in: Input dim. + n_out: Output dim. + layer_dim: num trees in one layer. + num_layers: number of forests. + tree_dim: number of response channels in the response of individual tree. + use_original_head use averaging as a head or put linear layer instead. + depth: number of splits in every tree. + drop_rate: Dropout rate for each layer altogether. + act_fun: Activation function. + num_init_features: If not none add fc layer before model with certain dim. + use_bn: Use BatchNorm. + """ + + def __init__( + self, + n_in: int, + embedding_size: int, + n_out: int = 1, + attn_embedding_size: int = 8, + attn_num_layers: int = 3, + attn_num_heads: int = 2, + attn_activation: Optional[Type[nn.Module]] = None, + attn_use_residual: bool = True, + attn_dropout: float = 0.1, + attn_normalize: bool = True, + attn_use_mlp: bool = True, + mlp_hidden_sizes: Union[int, Tuple[int, ...], List[int]] = (512, 256, 128, 64), + mlp_activation: Type[nn.Module] = nn.LeakyReLU, + mlp_use_bn: bool = True, + mlp_bn_momentum: float = 0.1, + mlp_ghost_batch: Optional[int] = 16, + mlp_dropout: float = 0.0, + mlp_use_skip: bool = True, + use_leaky_gate: bool = True, + weighted_sum: bool = True, + device: Union[str, torch.device] = "cpu", + **kwargs, + ): + super(AutoInt, self).__init__() + super().__init__() + device = torch.device(device) + + if use_leaky_gate: + self.attn_gate = LeakyGate(n_in * embedding_size, device=device) + else: + self.attn_gate = nn.Identity() + + self.attn_interact = AttnInteractionBlock( + field_input_size=embedding_size, + field_output_size=attn_embedding_size, + num_layers=attn_num_layers, + num_heads=attn_num_heads, + activation=attn_activation, + use_residual=attn_use_residual, + dropout=attn_dropout, + normalize=attn_normalize, + ghost_batch_size=mlp_ghost_batch, + device=device, + ) + + self.attn_final = MLP( + n_in=n_in * attn_embedding_size * attn_num_heads, + hidden_size=(mlp_hidden_sizes if mlp_hidden_sizes and attn_use_mlp else []), + n_out=n_out, + act_fun=mlp_activation, + drop_rate=mlp_dropout, + use_bn=mlp_use_bn, + bn_momentum=mlp_bn_momentum, + ghost_batch=mlp_ghost_batch, + leaky_gate=use_leaky_gate, + use_skip=mlp_use_skip, + device=device, + ) + + if mlp_hidden_sizes: + self.mlp = MLP( + n_in=n_in * embedding_size, + hidden_size=mlp_hidden_sizes, + n_out=n_out, + act_fun=mlp_activation, + drop_rate=mlp_dropout, + use_bn=mlp_use_bn, + bn_momentum=mlp_bn_momentum, + ghost_batch=mlp_ghost_batch, + leaky_gate=use_leaky_gate, + use_skip=mlp_use_skip, + device=device, + ) + if weighted_sum: + self.mix = nn.Parameter(torch.tensor([0.0], device=device)) + else: + self.mix = torch.tensor([0.0], device=device) + else: + self.mlp = None + self.mix = None + + def forward(self, embedded: torch.Tensor) -> torch.Tensor: + """Transform the input tensor. + + Args: + embedded : torch.Tensor + embedded fields + + Returns: + torch.Tensor + + """ + out = self.attn_gate(embedded) + out = self.attn_interact(out) + out = self.attn_final(out.reshape((out.shape[0], -1))) + if self.mlp is not None: + embedded_2d = embedded.reshape((embedded.shape[0], -1)) + mix = torch.sigmoid(self.mix) + out = mix * out + (1 - mix) * self.mlp(embedded_2d) + return out diff --git a/lightautoml/text/nn_model.py b/lightautoml/text/nn_model.py index 276a60cd..8366af82 100644 --- a/lightautoml/text/nn_model.py +++ b/lightautoml/text/nn_model.py @@ -2,17 +2,18 @@ import logging -from typing import Any +from typing import Any, List, Tuple, Type from typing import Callable from typing import Dict from typing import Optional from typing import Sequence from typing import Union - +from functools import reduce import numpy as np import torch import torch.nn as nn - +from torch import Tensor +import operator try: from transformers import AutoModel @@ -175,11 +176,7 @@ class CatEmbedder(nn.Module): """ def __init__( - self, - cat_dims: Sequence[int], - emb_dropout: bool = 0.1, - emb_ratio: int = 3, - max_emb_size: int = 50, + self, cat_dims: Sequence[int], emb_dropout: bool = 0.1, emb_ratio: int = 3, max_emb_size: int = 50, **kwargs ): super(CatEmbedder, self).__init__() emb_dims = [(int(x), int(min(max_emb_size, max(1, (x + 1) // emb_ratio)))) for x in cat_dims] @@ -219,7 +216,7 @@ class ContEmbedder(nn.Module): """ - def __init__(self, num_dims: int, input_bn: bool = True): + def __init__(self, num_dims: int, input_bn: bool = True, **kwargs): super(ContEmbedder, self).__init__() self.n_out = num_dims self.bn = nn.Identity() @@ -243,6 +240,321 @@ def forward(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: return output +class BasicEmbedding(nn.Module): + """A basic embedding that creates an embedded vector for each field value from https://github.com/jrfiedler/xynn. + + Args: + embedding_size : int, optional + size of each value's embedding vector; default is 10 + device : string or torch.device + + """ + + def __init__( + self, cat_vc: Sequence[Dict], embedding_size: int = 10, device: Union[str, torch.device] = "cuda:0", **kwargs + ): + super().__init__() + self._device = device + self._isfit = False + self.num_fields = 0 + self.output_size = 0 + self.lookup: Dict[Tuple[int, Any], int] = {} + self.lookup_nan: Dict[int, int] = {} + self.num_values = 0 + self.embedding: Optional[nn.Embedding] = None + self.embedding_size = embedding_size + self._from_summary(cat_vc) + self.cat_len = len(cat_vc) + + def _from_summary(self, uniques: List[Union[List, Tensor, np.ndarray]]): + lookup = {} + lookup_nan = {} + num_values = 0 + for fieldnum, field in enumerate(uniques): + for value in field: + if (fieldnum, value) in lookup: + # extra defense against repeated values + continue + lookup[(fieldnum, value)] = num_values + num_values += 1 + + self.num_fields = len(uniques) + self.output_size = self.num_fields * self.embedding_size + self.lookup = lookup + self.lookup_nan = lookup_nan + self.num_values = num_values + self.embedding = nn.Embedding(num_values, self.embedding_size) + nn.init.xavier_uniform_(self.embedding.weight) + self._isfit = True + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + int with module output shape. + + """ + return self.cat_len + + def forward(self, X: Dict) -> Tensor: + """Produce embedding for each value in input. + + Args: + X : Dict + + Returns: + torch.Tensor + + """ + if not self._isfit: + raise RuntimeError("need to call `fit` or `from_summary` first") + X = X["cat"] + idxs: List[List[int]] = [] + for row in X: + idxs.append([]) + for col, val in enumerate(row): + val = val.item() + idx = self.lookup[(col, val)] + idxs[-1].append(idx) + + return self.embedding(torch.tensor(idxs, dtype=torch.int64, device=self._device)) + + +class DefaultEmbedding(nn.Module): + """DefaultEmbedding from https://github.com/jrfiedler/xynn. + + An embedding with a default value for each field. The default is returned for + any field value not seen when the embedding was initialized (using `fit` or + `from_summary`). For any value seen at initialization, a weighted average of + that value's embedding and the default embedding is returned. The weights for + the average are determined by the parameter `alpha`: + + weight = count / (count + alpha) + final = embedding * weight + default * (1 - weight) + + Args: + embedding_size : int, optional + size of each value's embedding vector; default is 10 + alpha : int, optional + controls the weighting of each embedding vector with the default; + when `alpha`-many values are seen at initialization; the final + vector is evenly weighted; the influence of the default is decreased + with either higher counts or lower `alpha`; default is 20 + device : string or torch.device + + """ + + def __init__( + self, + cat_vc: Sequence[Dict], + embedding_size: int = 10, + alpha: int = 20, + device: Union[str, torch.device] = "cuda:0", + **kwargs, + ): + super().__init__() + self._isfit = False + self._device = device + self.num_fields = 0 + self.output_size = 0 + self.alpha = alpha + self.lookup: Dict[Tuple[int, Any], Tuple[int, int]] = {} + self.lookup_default: Dict[int, Tuple[int, int]] = {} + self.num_values = 0 + self.embedding: Optional[nn.Embedding] = None + self.embedding_size = embedding_size + self._from_summary(cat_vc) + self.cat_len = len(cat_vc) + + def _from_summary(self, unique_counts: List[Dict[Any, int]]): + lookup = {} + lookup_default = {} + num_values = 0 + for fieldnum, counts in enumerate(unique_counts): + lookup_default[fieldnum] = (num_values, 0) + num_values += 1 + for value, count in counts.items(): + lookup[(fieldnum, value)] = (num_values, count) + num_values += 1 + + self.num_fields = len(unique_counts) + self.output_size = self.num_fields * self.embedding_size + self.lookup = lookup + self.lookup_default = lookup_default + self.num_values = num_values + self.embedding = nn.Embedding(num_values, self.embedding_size) + nn.init.xavier_uniform_(self.embedding.weight) + + self._isfit = True + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + int with module output shape. + + """ + return self.cat_len + + def forward(self, X: Dict) -> Tensor: + """Produce embedding for each value in input. + + Args: + X : Dict + + Returns: + torch.Tensor + """ + if not self._isfit: + raise RuntimeError("need to call `fit` or `from_summary` first") + X = X["cat"] + list_weights: List[List[List[float]]] = [] + idxs_primary: List[List[int]] = [] + idxs_default: List[List[int]] = [] + for row in X: + list_weights.append([]) + idxs_primary.append([]) + idxs_default.append([]) + for col, val in enumerate(row): + val = val.item() + default = self.lookup_default[col] + idx, count = self.lookup.get((col, val), default) + list_weights[-1].append([count / (count + self.alpha)]) + idxs_primary[-1].append(idx) + idxs_default[-1].append(default[0]) + tsr_weights = torch.tensor(list_weights, dtype=torch.float32, device=self._device) + emb_primary = self.embedding(torch.tensor(idxs_primary, dtype=torch.int64, device=self._device)) + emb_default = self.embedding(torch.tensor(idxs_default, dtype=torch.int64, device=self._device)) + x = tsr_weights * emb_primary + (1 - tsr_weights) * emb_default + return x + + +class LinearEmbedding(nn.Module): + """An embedding for numeric fields from https://github.com/jrfiedler/xynn. + + There is one embedded vector for each field. + The embedded vector for a value is that value times its field's vector. + + Args: + embedding_size : int, optional + size of each value's embedding vector; default is 10 + device : string or torch.device + + """ + + def __init__(self, num_dims: int, embedding_size: int = 10, **kwargs): + super().__init__() + self._isfit = False + self.num_fields = num_dims + self.output_size = 0 + self.embedding: Optional[nn.Embedding] = None + self.embedding_size = embedding_size + self._from_summary(self.num_fields) + + def _from_summary(self, num_fields: int): + self.num_fields = num_fields + self.output_size = num_fields * self.embedding_size + self.embedding = nn.Embedding(num_fields, self.embedding_size) + nn.init.xavier_uniform_(self.embedding.weight) + self._isfit = True + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + int with module output shape. + + """ + return self.num_fields + + def forward(self, X: Dict) -> Tensor: + """Produce embedding for each value in input. + + Args: + X : Dict + + Returns: + torch.Tensor + + """ + X = X["cont"] + if not self._isfit: + raise RuntimeError("need to call `fit` or `from_summary` first") + return self.embedding.weight * X.unsqueeze(dim=-1) + + +class DenseEmbedding(nn.Module): + """An embedding for numeric fields, consisting of just a linear transformation with an activation from https://github.com/jrfiedler/xynn. + + Maps an input with shape n_rows * n_fields to an output with shape + n_rows * 1 * embedding_size if one value passed for embedding_size or + n_rows * embeddin_size[0] * embedding_size[1] if two values are passed + + Args: + embedding_size : int, tuple of ints, or list of ints; optional + size of each value's embedding vector; default is 10 + activation : subclass of torch.nn.Module, optional + default is nn.LeakyReLU + device : string or torch.device + """ + + def __init__( + self, + num_dims: int, + embedding_size: Union[int, Tuple[int, ...], List[int]] = 10, + activation: Type[nn.Module] = nn.LeakyReLU, + **kwargs, + ): + super().__init__() + + if isinstance(embedding_size, int): + embedding_size = (1, embedding_size) + elif len(embedding_size) == 1: + embedding_size = (1, embedding_size[0]) + self._isfit = False + self.num_fields = num_dims + self.output_size = 0 + self.embedding_w = None + self.embedding_b = None + self.dense_out_size = embedding_size + self.embedding_size = embedding_size[-1] + self.activation = activation() + self._from_summary(self.num_fields) + + def _from_summary(self, num_fields: int): + self.output_size = reduce(operator.mul, self.dense_out_size, 1) + self.embedding_w = nn.Parameter(torch.zeros((num_fields, *self.dense_out_size))) + self.embedding_b = nn.Parameter(torch.zeros(self.dense_out_size)) + nn.init.xavier_uniform_(self.embedding_w) + self._isfit = True + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + int with module output shape. + + """ + return self.dense_out_size[0] + + def forward(self, X: Dict) -> Tensor: + """Produce embedding for each value in input. + + Args: + X : Dict + + Returns: + torch.Tensor + + """ + X = X["cont"] + if not self._isfit: + raise RuntimeError("need to call `fit` or `from_summary` first") + embedded = self.embedding_w.T.matmul(X.T.to(dtype=torch.float)).T + self.embedding_b + embedded = self.activation(embedded.reshape((X.shape[0], -1))) + return embedded.reshape((X.shape[0], *self.dense_out_size)) + + class TorchUniversalModel(nn.Module): """Mixed data model. @@ -305,7 +617,12 @@ def __init__( torch_model( **{ **kwargs, - **{"n_in": n_in, "n_out": n_out, "loss": loss, "task": task}, + **{ + "n_in": n_in, + "n_out": n_out, + "loss": loss, + "task": task, + }, } ) if torch_model is not None From eaa90017780b58aff80989de5fdd861309abfbc1 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Thu, 10 Aug 2023 14:16:47 +0000 Subject: [PATCH 10/49] added flatten versions of embeddings --- lightautoml/ml_algo/dl_model.py | 6 +- lightautoml/text/embed.py | 546 ++++++++++++++++++++++++++++++++ lightautoml/text/nn_model.py | 470 +-------------------------- 3 files changed, 550 insertions(+), 472 deletions(-) create mode 100644 lightautoml/text/embed.py diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index 8b968f35..49be49f0 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -44,9 +44,9 @@ from ..ml_algo.base import TabularDataset from ..ml_algo.base import TabularMLAlgo from ..pipelines.utils import get_columns_by_role -from ..text.nn_model import CatEmbedder, DefaultEmbedding, DenseEmbedding, LinearEmbedding, BasicEmbedding -from ..text.nn_model import ContEmbedder -from ..text.nn_model import TextBert +from ..text.embed import CatEmbedder, DefaultEmbedding, DenseEmbedding, LinearEmbedding, BasicEmbedding +from ..text.embed import ContEmbedder +from ..text.embed import TextBert from ..text.nn_model import TorchUniversalModel from ..text.nn_model import UniversalDataset from ..text.trainer import Trainer diff --git a/lightautoml/text/embed.py b/lightautoml/text/embed.py new file mode 100644 index 00000000..eaa15558 --- /dev/null +++ b/lightautoml/text/embed.py @@ -0,0 +1,546 @@ +"""Neural Net modules for differen data types.""" + +import logging + +from typing import Any, List, Tuple, Type +from typing import Dict +from typing import Optional +from typing import Sequence +from typing import Union +from functools import reduce +import numpy as np +import torch +import torch.nn as nn +from torch import Tensor +import operator + +try: + from transformers import AutoModel +except: + import warnings + + warnings.warn("'transformers' - package isn't installed") + +from .dl_transformers import pooling_by_name + + +logger = logging.getLogger(__name__) + + +class TextBert(nn.Module): + """Text data model. + + Class for working with text data based on HuggingFace transformers. + + Args: + model_name: Transformers model name. + pooling: Pooling type. + + Note: + There are different pooling types: + + - cls: Use CLS token for sentence embedding + from last hidden state. + - max: Maximum on seq_len dimension for non masked + inputs from last hidden state. + - mean: Mean on seq_len dimension for non masked + inputs from last hidden state. + - sum: Sum on seq_len dimension for non masked + inputs from last hidden state. + - none: Without pooling for seq2seq models. + + """ + + _poolers = {"cls", "max", "mean", "sum", "none"} + + def __init__(self, model_name: str = "bert-base-uncased", pooling: str = "cls"): + super(TextBert, self).__init__() + if pooling not in self._poolers: + raise ValueError("pooling - {} - not in the list of available types {}".format(pooling, self._poolers)) + + self.transformer = AutoModel.from_pretrained(model_name) + self.n_out = self.transformer.config.hidden_size + self.dropout = torch.nn.Dropout(0.2) + self.activation = torch.nn.ReLU(inplace=True) + self.pooling = pooling_by_name[pooling]() + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + int with module output shape. + + """ + return self.n_out + + def forward(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: + """Forward-pass.""" + # last hidden layer + encoded_layers, _ = self.transformer( + input_ids=inp["input_ids"], + attention_mask=inp["attention_mask"], + token_type_ids=inp.get("token_type_ids"), + return_dict=False, + ) + + # pool the outputs into a vector + encoded_layers = self.pooling(encoded_layers, inp["attention_mask"].unsqueeze(-1).bool()) + mean_last_hidden_state = self.activation(encoded_layers) + mean_last_hidden_state = self.dropout(mean_last_hidden_state) + return mean_last_hidden_state + + +class CatEmbedder(nn.Module): + """Category data model. + + Args: + cat_dims: Sequence with number of unique categories + for category features. + emb_dropout: Dropout probability. + emb_ratio: Ratio for embedding size = (x + 1) // emb_ratio. + max_emb_size: Max embedding size. + + """ + + def __init__( + self, cat_dims: Sequence[int], emb_dropout: bool = 0.1, emb_ratio: int = 3, max_emb_size: int = 50, **kwargs + ): + super(CatEmbedder, self).__init__() + emb_dims = [(int(x), int(min(max_emb_size, max(1, (x + 1) // emb_ratio)))) for x in cat_dims] + self.no_of_embs = sum([y for x, y in emb_dims]) + assert self.no_of_embs != 0, "The input is empty." + # Embedding layers + self.emb_layers = nn.ModuleList([nn.Embedding(x, y) for x, y in emb_dims]) + self.emb_dropout_layer = nn.Dropout(emb_dropout) if emb_dropout else nn.Identity() + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + Int with module output shape. + + """ + return self.no_of_embs + + def forward(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: + """Forward-pass.""" + output = torch.cat( + [emb_layer(inp["cat"][:, i]) for i, emb_layer in enumerate(self.emb_layers)], + dim=1, + ) + output = self.emb_dropout_layer(output) + return output + + +class ContEmbedder(nn.Module): + """Numeric data model. + + Class for working with numeric data. + + Args: + num_dims: Sequence with number of numeric features. + input_bn: Use 1d batch norm for input data. + + """ + + def __init__(self, num_dims: int, input_bn: bool = True, **kwargs): + super(ContEmbedder, self).__init__() + self.n_out = num_dims + self.bn = nn.Identity() + if input_bn: + self.bn = nn.BatchNorm1d(num_dims) + assert num_dims != 0, "The input is empty." + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + int with module output shape. + + """ + return self.n_out + + def forward(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: + """Forward-pass.""" + output = inp["cont"] + output = self.bn(output) + return output + + +class BasicEmbedding(nn.Module): + """A basic embedding that creates an embedded vector for each field value from https://github.com/jrfiedler/xynn. + + Args: + embedding_size : int, optional + size of each value's embedding vector; default is 10 + device : string or torch.device + flatten_output: if flatten output or not. + + """ + + def __init__( + self, + cat_vc: Sequence[Dict], + embedding_size: int = 10, + device: Union[str, torch.device] = "cuda:0", + flatten_output: bool = False, + **kwargs, + ): + super().__init__() + self.flatten_output = flatten_output + self._device = device + self._isfit = False + self.num_fields = 0 + self.output_size = 0 + self.lookup: Dict[Tuple[int, Any], int] = {} + self.lookup_nan: Dict[int, int] = {} + self.num_values = 0 + self.embedding: Optional[nn.Embedding] = None + self.embedding_size = embedding_size + self._from_summary(cat_vc) + self.cat_len = len(cat_vc) + + def _from_summary(self, uniques: List[Union[List, Tensor, np.ndarray]]): + lookup = {} + lookup_nan = {} + num_values = 0 + for fieldnum, field in enumerate(uniques): + for value in field: + if (fieldnum, value) in lookup: + # extra defense against repeated values + continue + lookup[(fieldnum, value)] = num_values + num_values += 1 + + self.num_fields = len(uniques) + self.output_size = self.num_fields * self.embedding_size + self.lookup = lookup + self.lookup_nan = lookup_nan + self.num_values = num_values + self.embedding = nn.Embedding(num_values, self.embedding_size) + nn.init.xavier_uniform_(self.embedding.weight) + self._isfit = True + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + int with module output shape. + + """ + if self.flatten_output: + return self.cat_len * self.embedding_size + else: + return self.cat_len + + def forward(self, X: Dict) -> Tensor: + """Produce embedding for each value in input. + + Args: + X : Dict + + Returns: + torch.Tensor + + """ + if not self._isfit: + raise RuntimeError("need to call `fit` or `from_summary` first") + X = X["cat"] + idxs: List[List[int]] = [] + for row in X: + idxs.append([]) + for col, val in enumerate(row): + val = val.item() + idx = self.lookup[(col, val)] + idxs[-1].append(idx) + x = self.embedding(torch.tensor(idxs, dtype=torch.int64, device=self._device)) + if self.flatten_output: + return x.view(x.shape[0], -1) + return x + + +class DefaultEmbedding(nn.Module): + """DefaultEmbedding from https://github.com/jrfiedler/xynn. + + An embedding with a default value for each field. The default is returned for + any field value not seen when the embedding was initialized (using `fit` or + `from_summary`). For any value seen at initialization, a weighted average of + that value's embedding and the default embedding is returned. The weights for + the average are determined by the parameter `alpha`: + + weight = count / (count + alpha) + final = embedding * weight + default * (1 - weight) + + Args: + embedding_size : int, optional + size of each value's embedding vector; default is 10 + alpha : int, optional + controls the weighting of each embedding vector with the default; + when `alpha`-many values are seen at initialization; the final + vector is evenly weighted; the influence of the default is decreased + with either higher counts or lower `alpha`; default is 20 + device : string or torch.device + flatten_output: if flatten output or not. + + """ + + def __init__( + self, + cat_vc: Sequence[Dict], + embedding_size: int = 10, + alpha: int = 20, + device: Union[str, torch.device] = "cuda:0", + flatten_output: bool = False, + **kwargs, + ): + super().__init__() + self.flatten_output = flatten_output + self._isfit = False + self._device = device + self.num_fields = 0 + self.output_size = 0 + self.alpha = alpha + self.lookup: Dict[Tuple[int, Any], Tuple[int, int]] = {} + self.lookup_default: Dict[int, Tuple[int, int]] = {} + self.num_values = 0 + self.embedding: Optional[nn.Embedding] = None + self.embedding_size = embedding_size + self._from_summary(cat_vc) + self.cat_len = len(cat_vc) + + def _from_summary(self, unique_counts: List[Dict[Any, int]]): + lookup = {} + lookup_default = {} + num_values = 0 + for fieldnum, counts in enumerate(unique_counts): + lookup_default[fieldnum] = (num_values, 0) + num_values += 1 + for value, count in counts.items(): + lookup[(fieldnum, value)] = (num_values, count) + num_values += 1 + + self.num_fields = len(unique_counts) + self.output_size = self.num_fields * self.embedding_size + self.lookup = lookup + self.lookup_default = lookup_default + self.num_values = num_values + self.embedding = nn.Embedding(num_values, self.embedding_size) + nn.init.xavier_uniform_(self.embedding.weight) + + self._isfit = True + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + int with module output shape. + + """ + if self.flatten_output: + return self.cat_len * self.embedding_size + else: + return self.cat_len + + def forward(self, X: Dict) -> Tensor: + """Produce embedding for each value in input. + + Args: + X : Dict + + Returns: + torch.Tensor + """ + if not self._isfit: + raise RuntimeError("need to call `fit` or `from_summary` first") + X = X["cat"] + list_weights: List[List[List[float]]] = [] + idxs_primary: List[List[int]] = [] + idxs_default: List[List[int]] = [] + for row in X: + list_weights.append([]) + idxs_primary.append([]) + idxs_default.append([]) + for col, val in enumerate(row): + val = val.item() + default = self.lookup_default[col] + idx, count = self.lookup.get((col, val), default) + list_weights[-1].append([count / (count + self.alpha)]) + idxs_primary[-1].append(idx) + idxs_default[-1].append(default[0]) + tsr_weights = torch.tensor(list_weights, dtype=torch.float32, device=self._device) + emb_primary = self.embedding(torch.tensor(idxs_primary, dtype=torch.int64, device=self._device)) + emb_default = self.embedding(torch.tensor(idxs_default, dtype=torch.int64, device=self._device)) + x = tsr_weights * emb_primary + (1 - tsr_weights) * emb_default + if self.flatten_output: + return x.view(x.shape[0], -1) + return x + + +class LinearEmbedding(nn.Module): + """An embedding for numeric fields from https://github.com/jrfiedler/xynn. + + There is one embedded vector for each field. + The embedded vector for a value is that value times its field's vector. + + Args: + embedding_size : int, optional + size of each value's embedding vector; default is 10 + device : string or torch.device + flatten_output: if flatten output or not. + + """ + + def __init__(self, num_dims: int, embedding_size: int = 10, flatten_output: bool = False, **kwargs): + super().__init__() + self.flatten_output = flatten_output + self._isfit = False + self.num_fields = num_dims + self.output_size = 0 + self.embedding: Optional[nn.Embedding] = None + self.embedding_size = embedding_size + self._from_summary(self.num_fields) + + def _from_summary(self, num_fields: int): + self.num_fields = num_fields + self.output_size = num_fields * self.embedding_size + self.embedding = nn.Embedding(num_fields, self.embedding_size) + nn.init.xavier_uniform_(self.embedding.weight) + self._isfit = True + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + int with module output shape. + + """ + if self.flatten_output: + return self.num_fields * self.embedding_size + else: + return self.num_fields + + def forward(self, X: Dict) -> Tensor: + """Produce embedding for each value in input. + + Args: + X : Dict + + Returns: + torch.Tensor + + """ + X = X["cont"] + if not self._isfit: + raise RuntimeError("need to call `fit` or `from_summary` first") + x = self.embedding.weight * X.unsqueeze(dim=-1) + if self.flatten_output: + return x.view(x.shape[0], -1) + return x + + +class DenseEmbedding(nn.Module): + """An embedding for numeric fields, consisting of just a linear transformation with an activation from https://github.com/jrfiedler/xynn. + + Maps an input with shape n_rows * n_fields to an output with shape + n_rows * 1 * embedding_size if one value passed for embedding_size or + n_rows * embeddin_size[0] * embedding_size[1] if two values are passed + + Args: + embedding_size : int, tuple of ints, or list of ints; optional + size of each value's embedding vector; default is 10 + activation : subclass of torch.nn.Module, optional + default is nn.LeakyReLU + device : string or torch.device + flatten_output: if flatten output or not. + """ + + def __init__( + self, + num_dims: int, + embedding_size: Union[int, Tuple[int, ...], List[int]] = 10, + activation: Type[nn.Module] = nn.LeakyReLU, + flatten_output: bool = False, + **kwargs, + ): + super().__init__() + self.flatten_output = flatten_output + if isinstance(embedding_size, int): + embedding_size = (1, embedding_size) + elif len(embedding_size) == 1: + embedding_size = (1, embedding_size[0]) + self._isfit = False + self.num_fields = num_dims + self.output_size = 0 + self.embedding_w = None + self.embedding_b = None + self.dense_out_size = embedding_size + self.embedding_size = embedding_size[-1] + self.activation = activation() + self._from_summary(self.num_fields) + + def _from_summary(self, num_fields: int): + self.output_size = reduce(operator.mul, self.dense_out_size, 1) + self.embedding_w = nn.Parameter(torch.zeros((num_fields, *self.dense_out_size))) + self.embedding_b = nn.Parameter(torch.zeros(self.dense_out_size)) + nn.init.xavier_uniform_(self.embedding_w) + self._isfit = True + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + int with module output shape. + + """ + if self.flatten_output: + return self.output_size + else: + return self.dense_out_size[0] + + def forward(self, X: Dict) -> Tensor: + """Produce embedding for each value in input. + + Args: + X : Dict + + Returns: + torch.Tensor + + """ + X = X["cont"] + if not self._isfit: + raise RuntimeError("need to call `fit` or `from_summary` first") + embedded = self.embedding_w.T.matmul(X.T.to(dtype=torch.float)).T + self.embedding_b + embedded = self.activation(embedded.reshape((X.shape[0], -1))) + x = embedded.reshape((X.shape[0], *self.dense_out_size)) + if self.flatten_output: + return x.view(x.shape[0], -1) + return x + + +class DenseEmbeddingFlat(DenseEmbedding): + """Flatten version of DenseEmbedding.""" + + def __init__(self, *args, **kwargs): + super(DenseEmbeddingFlat, self).__init__(*args, **{**kwargs, **{"flatten_output": True}}) + + +class LinearEmbeddingFlat(LinearEmbedding): + """Flatten version of LinearEmbedding.""" + + def __init__(self, *args, **kwargs): + super(LinearEmbeddingFlat, self).__init__(*args, **{**kwargs, **{"flatten_output": True}}) + + +class DefaultEmbeddingFlat(DefaultEmbedding): + """Flatten version of DefaultEmbedding.""" + + def __init__(self, *args, **kwargs): + super(DefaultEmbeddingFlat, self).__init__(*args, **{**kwargs, **{"flatten_output": True}}) + + +class BasicEmbeddingFlat(BasicEmbedding): + """Flatten version of BasicEmbedding.""" + + def __init__(self, *args, **kwargs): + super(BasicEmbeddingFlat, self).__init__(*args, **{**kwargs, **{"flatten_output": True}}) diff --git a/lightautoml/text/nn_model.py b/lightautoml/text/nn_model.py index 8366af82..916cfec6 100644 --- a/lightautoml/text/nn_model.py +++ b/lightautoml/text/nn_model.py @@ -2,28 +2,15 @@ import logging -from typing import Any, List, Tuple, Type +from typing import Any from typing import Callable from typing import Dict from typing import Optional -from typing import Sequence from typing import Union -from functools import reduce import numpy as np import torch import torch.nn as nn -from torch import Tensor -import operator - -try: - from transformers import AutoModel -except: - import warnings - - warnings.warn("'transformers' - package isn't installed") - from ..tasks.base import Task -from .dl_transformers import pooling_by_name logger = logging.getLogger(__name__) @@ -100,461 +87,6 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return x -class TextBert(nn.Module): - """Text data model. - - Class for working with text data based on HuggingFace transformers. - - Args: - model_name: Transformers model name. - pooling: Pooling type. - - Note: - There are different pooling types: - - - cls: Use CLS token for sentence embedding - from last hidden state. - - max: Maximum on seq_len dimension for non masked - inputs from last hidden state. - - mean: Mean on seq_len dimension for non masked - inputs from last hidden state. - - sum: Sum on seq_len dimension for non masked - inputs from last hidden state. - - none: Without pooling for seq2seq models. - - """ - - _poolers = {"cls", "max", "mean", "sum", "none"} - - def __init__(self, model_name: str = "bert-base-uncased", pooling: str = "cls"): - super(TextBert, self).__init__() - if pooling not in self._poolers: - raise ValueError("pooling - {} - not in the list of available types {}".format(pooling, self._poolers)) - - self.transformer = AutoModel.from_pretrained(model_name) - self.n_out = self.transformer.config.hidden_size - self.dropout = torch.nn.Dropout(0.2) - self.activation = torch.nn.ReLU(inplace=True) - self.pooling = pooling_by_name[pooling]() - - def get_out_shape(self) -> int: - """Output shape. - - Returns: - int with module output shape. - - """ - return self.n_out - - def forward(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: - """Forward-pass.""" - # last hidden layer - encoded_layers, _ = self.transformer( - input_ids=inp["input_ids"], - attention_mask=inp["attention_mask"], - token_type_ids=inp.get("token_type_ids"), - return_dict=False, - ) - - # pool the outputs into a vector - encoded_layers = self.pooling(encoded_layers, inp["attention_mask"].unsqueeze(-1).bool()) - mean_last_hidden_state = self.activation(encoded_layers) - mean_last_hidden_state = self.dropout(mean_last_hidden_state) - return mean_last_hidden_state - - -class CatEmbedder(nn.Module): - """Category data model. - - Args: - cat_dims: Sequence with number of unique categories - for category features. - emb_dropout: Dropout probability. - emb_ratio: Ratio for embedding size = (x + 1) // emb_ratio. - max_emb_size: Max embedding size. - - """ - - def __init__( - self, cat_dims: Sequence[int], emb_dropout: bool = 0.1, emb_ratio: int = 3, max_emb_size: int = 50, **kwargs - ): - super(CatEmbedder, self).__init__() - emb_dims = [(int(x), int(min(max_emb_size, max(1, (x + 1) // emb_ratio)))) for x in cat_dims] - self.no_of_embs = sum([y for x, y in emb_dims]) - assert self.no_of_embs != 0, "The input is empty." - # Embedding layers - self.emb_layers = nn.ModuleList([nn.Embedding(x, y) for x, y in emb_dims]) - self.emb_dropout_layer = nn.Dropout(emb_dropout) if emb_dropout else nn.Identity() - - def get_out_shape(self) -> int: - """Output shape. - - Returns: - Int with module output shape. - - """ - return self.no_of_embs - - def forward(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: - """Forward-pass.""" - output = torch.cat( - [emb_layer(inp["cat"][:, i]) for i, emb_layer in enumerate(self.emb_layers)], - dim=1, - ) - output = self.emb_dropout_layer(output) - return output - - -class ContEmbedder(nn.Module): - """Numeric data model. - - Class for working with numeric data. - - Args: - num_dims: Sequence with number of numeric features. - input_bn: Use 1d batch norm for input data. - - """ - - def __init__(self, num_dims: int, input_bn: bool = True, **kwargs): - super(ContEmbedder, self).__init__() - self.n_out = num_dims - self.bn = nn.Identity() - if input_bn: - self.bn = nn.BatchNorm1d(num_dims) - assert num_dims != 0, "The input is empty." - - def get_out_shape(self) -> int: - """Output shape. - - Returns: - int with module output shape. - - """ - return self.n_out - - def forward(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: - """Forward-pass.""" - output = inp["cont"] - output = self.bn(output) - return output - - -class BasicEmbedding(nn.Module): - """A basic embedding that creates an embedded vector for each field value from https://github.com/jrfiedler/xynn. - - Args: - embedding_size : int, optional - size of each value's embedding vector; default is 10 - device : string or torch.device - - """ - - def __init__( - self, cat_vc: Sequence[Dict], embedding_size: int = 10, device: Union[str, torch.device] = "cuda:0", **kwargs - ): - super().__init__() - self._device = device - self._isfit = False - self.num_fields = 0 - self.output_size = 0 - self.lookup: Dict[Tuple[int, Any], int] = {} - self.lookup_nan: Dict[int, int] = {} - self.num_values = 0 - self.embedding: Optional[nn.Embedding] = None - self.embedding_size = embedding_size - self._from_summary(cat_vc) - self.cat_len = len(cat_vc) - - def _from_summary(self, uniques: List[Union[List, Tensor, np.ndarray]]): - lookup = {} - lookup_nan = {} - num_values = 0 - for fieldnum, field in enumerate(uniques): - for value in field: - if (fieldnum, value) in lookup: - # extra defense against repeated values - continue - lookup[(fieldnum, value)] = num_values - num_values += 1 - - self.num_fields = len(uniques) - self.output_size = self.num_fields * self.embedding_size - self.lookup = lookup - self.lookup_nan = lookup_nan - self.num_values = num_values - self.embedding = nn.Embedding(num_values, self.embedding_size) - nn.init.xavier_uniform_(self.embedding.weight) - self._isfit = True - - def get_out_shape(self) -> int: - """Output shape. - - Returns: - int with module output shape. - - """ - return self.cat_len - - def forward(self, X: Dict) -> Tensor: - """Produce embedding for each value in input. - - Args: - X : Dict - - Returns: - torch.Tensor - - """ - if not self._isfit: - raise RuntimeError("need to call `fit` or `from_summary` first") - X = X["cat"] - idxs: List[List[int]] = [] - for row in X: - idxs.append([]) - for col, val in enumerate(row): - val = val.item() - idx = self.lookup[(col, val)] - idxs[-1].append(idx) - - return self.embedding(torch.tensor(idxs, dtype=torch.int64, device=self._device)) - - -class DefaultEmbedding(nn.Module): - """DefaultEmbedding from https://github.com/jrfiedler/xynn. - - An embedding with a default value for each field. The default is returned for - any field value not seen when the embedding was initialized (using `fit` or - `from_summary`). For any value seen at initialization, a weighted average of - that value's embedding and the default embedding is returned. The weights for - the average are determined by the parameter `alpha`: - - weight = count / (count + alpha) - final = embedding * weight + default * (1 - weight) - - Args: - embedding_size : int, optional - size of each value's embedding vector; default is 10 - alpha : int, optional - controls the weighting of each embedding vector with the default; - when `alpha`-many values are seen at initialization; the final - vector is evenly weighted; the influence of the default is decreased - with either higher counts or lower `alpha`; default is 20 - device : string or torch.device - - """ - - def __init__( - self, - cat_vc: Sequence[Dict], - embedding_size: int = 10, - alpha: int = 20, - device: Union[str, torch.device] = "cuda:0", - **kwargs, - ): - super().__init__() - self._isfit = False - self._device = device - self.num_fields = 0 - self.output_size = 0 - self.alpha = alpha - self.lookup: Dict[Tuple[int, Any], Tuple[int, int]] = {} - self.lookup_default: Dict[int, Tuple[int, int]] = {} - self.num_values = 0 - self.embedding: Optional[nn.Embedding] = None - self.embedding_size = embedding_size - self._from_summary(cat_vc) - self.cat_len = len(cat_vc) - - def _from_summary(self, unique_counts: List[Dict[Any, int]]): - lookup = {} - lookup_default = {} - num_values = 0 - for fieldnum, counts in enumerate(unique_counts): - lookup_default[fieldnum] = (num_values, 0) - num_values += 1 - for value, count in counts.items(): - lookup[(fieldnum, value)] = (num_values, count) - num_values += 1 - - self.num_fields = len(unique_counts) - self.output_size = self.num_fields * self.embedding_size - self.lookup = lookup - self.lookup_default = lookup_default - self.num_values = num_values - self.embedding = nn.Embedding(num_values, self.embedding_size) - nn.init.xavier_uniform_(self.embedding.weight) - - self._isfit = True - - def get_out_shape(self) -> int: - """Output shape. - - Returns: - int with module output shape. - - """ - return self.cat_len - - def forward(self, X: Dict) -> Tensor: - """Produce embedding for each value in input. - - Args: - X : Dict - - Returns: - torch.Tensor - """ - if not self._isfit: - raise RuntimeError("need to call `fit` or `from_summary` first") - X = X["cat"] - list_weights: List[List[List[float]]] = [] - idxs_primary: List[List[int]] = [] - idxs_default: List[List[int]] = [] - for row in X: - list_weights.append([]) - idxs_primary.append([]) - idxs_default.append([]) - for col, val in enumerate(row): - val = val.item() - default = self.lookup_default[col] - idx, count = self.lookup.get((col, val), default) - list_weights[-1].append([count / (count + self.alpha)]) - idxs_primary[-1].append(idx) - idxs_default[-1].append(default[0]) - tsr_weights = torch.tensor(list_weights, dtype=torch.float32, device=self._device) - emb_primary = self.embedding(torch.tensor(idxs_primary, dtype=torch.int64, device=self._device)) - emb_default = self.embedding(torch.tensor(idxs_default, dtype=torch.int64, device=self._device)) - x = tsr_weights * emb_primary + (1 - tsr_weights) * emb_default - return x - - -class LinearEmbedding(nn.Module): - """An embedding for numeric fields from https://github.com/jrfiedler/xynn. - - There is one embedded vector for each field. - The embedded vector for a value is that value times its field's vector. - - Args: - embedding_size : int, optional - size of each value's embedding vector; default is 10 - device : string or torch.device - - """ - - def __init__(self, num_dims: int, embedding_size: int = 10, **kwargs): - super().__init__() - self._isfit = False - self.num_fields = num_dims - self.output_size = 0 - self.embedding: Optional[nn.Embedding] = None - self.embedding_size = embedding_size - self._from_summary(self.num_fields) - - def _from_summary(self, num_fields: int): - self.num_fields = num_fields - self.output_size = num_fields * self.embedding_size - self.embedding = nn.Embedding(num_fields, self.embedding_size) - nn.init.xavier_uniform_(self.embedding.weight) - self._isfit = True - - def get_out_shape(self) -> int: - """Output shape. - - Returns: - int with module output shape. - - """ - return self.num_fields - - def forward(self, X: Dict) -> Tensor: - """Produce embedding for each value in input. - - Args: - X : Dict - - Returns: - torch.Tensor - - """ - X = X["cont"] - if not self._isfit: - raise RuntimeError("need to call `fit` or `from_summary` first") - return self.embedding.weight * X.unsqueeze(dim=-1) - - -class DenseEmbedding(nn.Module): - """An embedding for numeric fields, consisting of just a linear transformation with an activation from https://github.com/jrfiedler/xynn. - - Maps an input with shape n_rows * n_fields to an output with shape - n_rows * 1 * embedding_size if one value passed for embedding_size or - n_rows * embeddin_size[0] * embedding_size[1] if two values are passed - - Args: - embedding_size : int, tuple of ints, or list of ints; optional - size of each value's embedding vector; default is 10 - activation : subclass of torch.nn.Module, optional - default is nn.LeakyReLU - device : string or torch.device - """ - - def __init__( - self, - num_dims: int, - embedding_size: Union[int, Tuple[int, ...], List[int]] = 10, - activation: Type[nn.Module] = nn.LeakyReLU, - **kwargs, - ): - super().__init__() - - if isinstance(embedding_size, int): - embedding_size = (1, embedding_size) - elif len(embedding_size) == 1: - embedding_size = (1, embedding_size[0]) - self._isfit = False - self.num_fields = num_dims - self.output_size = 0 - self.embedding_w = None - self.embedding_b = None - self.dense_out_size = embedding_size - self.embedding_size = embedding_size[-1] - self.activation = activation() - self._from_summary(self.num_fields) - - def _from_summary(self, num_fields: int): - self.output_size = reduce(operator.mul, self.dense_out_size, 1) - self.embedding_w = nn.Parameter(torch.zeros((num_fields, *self.dense_out_size))) - self.embedding_b = nn.Parameter(torch.zeros(self.dense_out_size)) - nn.init.xavier_uniform_(self.embedding_w) - self._isfit = True - - def get_out_shape(self) -> int: - """Output shape. - - Returns: - int with module output shape. - - """ - return self.dense_out_size[0] - - def forward(self, X: Dict) -> Tensor: - """Produce embedding for each value in input. - - Args: - X : Dict - - Returns: - torch.Tensor - - """ - X = X["cont"] - if not self._isfit: - raise RuntimeError("need to call `fit` or `from_summary` first") - embedded = self.embedding_w.T.matmul(X.T.to(dtype=torch.float)).T + self.embedding_b - embedded = self.activation(embedded.reshape((X.shape[0], -1))) - return embedded.reshape((X.shape[0], *self.dense_out_size)) - - class TorchUniversalModel(nn.Module): """Mixed data model. From 294383fef2ea6b8e79ced69c951deb00486d42d2 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 15 Aug 2023 08:41:04 +0000 Subject: [PATCH 11/49] not done still --- lightautoml/automl/presets/tabular_presets.py | 1 + lightautoml/ml_algo/dl_model.py | 7 +- lightautoml/ml_algo/tabnet/utils.py | 388 ++++++++ .../torch_based/autoint/autoint_utils.py | 46 - lightautoml/ml_algo/torch_based/nn_models.py | 118 ++- .../ml_algo/torch_based/node_nn_model.py | 33 +- .../pytorch_tabnet/abstract_model.py | 826 ++++++++++++++++ .../pytorch_tabnet/augmentations.py | 85 ++ .../torch_based/pytorch_tabnet/callbacks.py | 287 ++++++ .../torch_based/pytorch_tabnet/metrics.py | 523 ++++++++++ .../pytorch_tabnet/multiclass_utils.py | 425 ++++++++ .../torch_based/pytorch_tabnet/multitask.py | 178 ++++ .../torch_based/pytorch_tabnet/pretraining.py | 428 ++++++++ .../pytorch_tabnet/pretraining_utils.py | 128 +++ .../torch_based/pytorch_tabnet/sparsemax.py | 278 ++++++ .../torch_based/pytorch_tabnet/tab_model.py | 154 +++ .../torch_based/pytorch_tabnet/tab_network.py | 934 ++++++++++++++++++ .../torch_based/pytorch_tabnet/utils.py | 552 +++++++++++ 18 files changed, 5332 insertions(+), 59 deletions(-) create mode 100644 lightautoml/ml_algo/tabnet/utils.py create mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/abstract_model.py create mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/augmentations.py create mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/callbacks.py create mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/metrics.py create mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/multiclass_utils.py create mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/multitask.py create mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining.py create mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining_utils.py create mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/sparsemax.py create mode 100755 lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_model.py create mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_network.py create mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/utils.py diff --git a/lightautoml/automl/presets/tabular_presets.py b/lightautoml/automl/presets/tabular_presets.py index cf4ba8fe..539b2df4 100755 --- a/lightautoml/automl/presets/tabular_presets.py +++ b/lightautoml/automl/presets/tabular_presets.py @@ -609,6 +609,7 @@ def create_automl(self, **fit_args): "node", "autoint", "autoint_emb_v2", + "tabnet" ] available_nn_models = available_nn_models + [x + "_tuned" for x in available_nn_models] nn_models = [ diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index 49be49f0..fc4b38ef 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -44,7 +44,7 @@ from ..ml_algo.base import TabularDataset from ..ml_algo.base import TabularMLAlgo from ..pipelines.utils import get_columns_by_role -from ..text.embed import CatEmbedder, DefaultEmbedding, DenseEmbedding, LinearEmbedding, BasicEmbedding +from ..text.embed import BasicEmbeddingFlat, CatEmbedder, DefaultEmbedding, DenseEmbedding, LinearEmbedding, BasicEmbedding, LinearEmbeddingFlat from ..text.embed import ContEmbedder from ..text.embed import TextBert from ..text.nn_model import TorchUniversalModel @@ -56,7 +56,7 @@ from ..text.utils import is_shuffle from ..text.utils import parse_devices from ..text.utils import seed_everything -from .torch_based.nn_models import MLP +from .torch_based.nn_models import MLP, TabNet from .torch_based.nn_models import NODE from .torch_based.nn_models import SNN from .torch_based.nn_models import DenseLightModel @@ -80,6 +80,7 @@ "node": NODE, "autoint": AutoInt, "autoint_emb_v2": AutoInt, + "tabnet":TabNet, } cat_embedder_by_name = { "denselight": CatEmbedder, @@ -92,6 +93,7 @@ "node": CatEmbedder, "autoint": BasicEmbedding, "autoint_emb_v2": DefaultEmbedding, + "tabnet":BasicEmbeddingFlat, } cont_embedder_params_by_name = { "denselight": ContEmbedder, @@ -104,6 +106,7 @@ "node": ContEmbedder, "autoint": LinearEmbedding, "autoint_emb_v2": DenseEmbedding, + "tabnet":LinearEmbeddingFlat, } diff --git a/lightautoml/ml_algo/tabnet/utils.py b/lightautoml/ml_algo/tabnet/utils.py new file mode 100644 index 00000000..9dad6259 --- /dev/null +++ b/lightautoml/ml_algo/tabnet/utils.py @@ -0,0 +1,388 @@ +"""Utils for pytorch-tabnet model.""" +import torch +import numpy as np +import torch.nn as nn +from lightautoml.ml_algo.torch_based.node_nn_model import Entmax15, Sparsemax, sparsemax,entmax15 +from lightautoml.ml_algo.torch_based.autoint.ghost_norm import GhostBatchNorm + + +def initialize_non_glu(module, input_dim, output_dim): + gain_value = np.sqrt((input_dim + output_dim) / np.sqrt(4 * input_dim)) + torch.nn.init.xavier_normal_(module.weight, gain=gain_value) + # torch.nn.init.zeros_(module.bias) + return + + +def initialize_glu(module, input_dim, output_dim): + gain_value = np.sqrt((input_dim + output_dim) / np.sqrt(input_dim)) + torch.nn.init.xavier_normal_(module.weight, gain=gain_value) + # torch.nn.init.zeros_(module.bias) + return + + + + + +class TabNetEncoder(torch.nn.Module): + def __init__( + self, + input_dim, + output_dim, + n_d=8, + n_a=8, + n_steps=3, + gamma=1.3, + n_independent=2, + n_shared=2, + epsilon=1e-15, + virtual_batch_size=128, + momentum=0.02, + mask_type="sparsemax", + group_attention_matrix=None, + ): + """ + Defines main part of the TabNet network without the embedding layers. + + Parameters + ---------- + input_dim : int + Number of features + output_dim : int or list of int for multi task classification + Dimension of network output + examples : one for regression, 2 for binary classification etc... + n_d : int + Dimension of the prediction layer (usually between 4 and 64) + n_a : int + Dimension of the attention layer (usually between 4 and 64) + n_steps : int + Number of successive steps in the network (usually between 3 and 10) + gamma : float + Float above 1, scaling factor for attention updates (usually between 1.0 to 2.0) + n_independent : int + Number of independent GLU layer in each GLU block (default 2) + n_shared : int + Number of independent GLU layer in each GLU block (default 2) + epsilon : float + Avoid log(0), this should be kept very low + virtual_batch_size : int + Batch size for Ghost Batch Normalization + momentum : float + Float value between 0 and 1 which will be used for momentum in all batch norm + mask_type : str + Either "sparsemax" or "entmax" : this is the masking function to use + group_attention_matrix : torch matrix + Matrix of size (n_groups, input_dim), m_ij = importance within group i of feature j + """ + super(TabNetEncoder, self).__init__() + self.input_dim = input_dim + self.output_dim = output_dim + self.is_multi_task = isinstance(output_dim, list) + self.n_d = n_d + self.n_a = n_a + self.n_steps = n_steps + self.gamma = gamma + self.epsilon = epsilon + self.n_independent = n_independent + self.n_shared = n_shared + self.virtual_batch_size = virtual_batch_size + self.mask_type = mask_type + self.initial_bn = nn.BatchNorm1d(self.input_dim, momentum=0.01) + self.group_attention_matrix = group_attention_matrix + + if self.group_attention_matrix is None: + # no groups + self.group_attention_matrix = torch.eye(self.input_dim) + self.attention_dim = self.input_dim + else: + self.attention_dim = self.group_attention_matrix.shape[0] + + if self.n_shared > 0: + shared_feat_transform = torch.nn.ModuleList() + for i in range(self.n_shared): + if i == 0: + shared_feat_transform.append( + nn.Linear(self.input_dim, 2 * (n_d + n_a), bias=False) + ) + else: + shared_feat_transform.append( + nn.Linear(n_d + n_a, 2 * (n_d + n_a), bias=False) + ) + + else: + shared_feat_transform = None + + self.initial_splitter = FeatTransformer( + self.input_dim, + n_d + n_a, + shared_feat_transform, + n_glu_independent=self.n_independent, + virtual_batch_size=self.virtual_batch_size, + momentum=momentum, + ) + + self.feat_transformers = torch.nn.ModuleList() + self.att_transformers = torch.nn.ModuleList() + + for step in range(n_steps): + transformer = FeatTransformer( + self.input_dim, + n_d + n_a, + shared_feat_transform, + n_glu_independent=self.n_independent, + virtual_batch_size=self.virtual_batch_size, + momentum=momentum, + ) + attention = AttentiveTransformer( + n_a, + self.attention_dim, + group_matrix=group_attention_matrix, + virtual_batch_size=self.virtual_batch_size, + momentum=momentum, + mask_type=self.mask_type, + ) + self.feat_transformers.append(transformer) + self.att_transformers.append(attention) + + def forward(self, x, prior=None): + x = self.initial_bn(x) + + bs = x.shape[0] # batch size + if prior is None: + prior = torch.ones((bs, self.attention_dim)).to(x.device) + + M_loss = 0 + att = self.initial_splitter(x)[:, self.n_d :] + steps_output = [] + for step in range(self.n_steps): + M = self.att_transformers[step](prior, att) + M_loss += torch.mean( + torch.sum(torch.mul(M, torch.log(M + self.epsilon)), dim=1) + ) + # update prior + prior = torch.mul(self.gamma - M, prior) + # output + M_feature_level = torch.matmul(M, self.group_attention_matrix.to(x.device)) + masked_x = torch.mul(M_feature_level, x) + out = self.feat_transformers[step](masked_x) + d = nn.ReLU()(out[:, : self.n_d]) + steps_output.append(d) + # update attention + att = out[:, self.n_d :] + + M_loss /= self.n_steps + return steps_output, M_loss + + def forward_masks(self, x): + x = self.initial_bn(x) + bs = x.shape[0] # batch size + prior = torch.ones((bs, self.attention_dim)).to(x.device) + M_explain = torch.zeros(x.shape).to(x.device) + att = self.initial_splitter(x)[:, self.n_d :] + masks = {} + + for step in range(self.n_steps): + M = self.att_transformers[step](prior, att) + M_feature_level = torch.matmul(M, self.group_attention_matrix.to(x.device)) + masks[step] = M_feature_level + # update prior + prior = torch.mul(self.gamma - M, prior) + # output + masked_x = torch.mul(M_feature_level, x) + out = self.feat_transformers[step](masked_x) + d = nn.ReLU()(out[:, : self.n_d]) + # explain + step_importance = torch.sum(d, dim=1) + M_explain += torch.mul(M_feature_level, step_importance.unsqueeze(dim=1)) + # update attention + att = out[:, self.n_d :] + + return M_explain, masks + + + +class FeatTransformer(torch.nn.Module): + def __init__( + self, + input_dim, + output_dim, + shared_layers, + n_glu_independent, + virtual_batch_size=128, + momentum=0.02, + ): + super(FeatTransformer, self).__init__() + """ + Initialize a feature transformer. + + Parameters + ---------- + input_dim : int + Input size + output_dim : int + Output_size + shared_layers : torch.nn.ModuleList + The shared block that should be common to every step + n_glu_independent : int + Number of independent GLU layers + virtual_batch_size : int + Batch size for Ghost Batch Normalization within GLU block(s) + momentum : float + Float value between 0 and 1 which will be used for momentum in batch norm + """ + + params = { + "n_glu": n_glu_independent, + "virtual_batch_size": virtual_batch_size, + "momentum": momentum, + } + + if shared_layers is None: + # no shared layers + self.shared = torch.nn.Identity() + is_first = True + else: + self.shared = GLU_Block( + input_dim, + output_dim, + first=True, + shared_layers=shared_layers, + n_glu=len(shared_layers), + virtual_batch_size=virtual_batch_size, + momentum=momentum, + ) + is_first = False + + if n_glu_independent == 0: + # no independent layers + self.specifics = torch.nn.Identity() + else: + spec_input_dim = input_dim if is_first else output_dim + self.specifics = GLU_Block( + spec_input_dim, output_dim, first=is_first, **params + ) + + def forward(self, x): + x = self.shared(x) + x = self.specifics(x) + return x + + +class GLU_Block(torch.nn.Module): + """ + Independent GLU block, specific to each step + """ + + def __init__( + self, + input_dim, + output_dim, + n_glu=2, + first=False, + shared_layers=None, + virtual_batch_size=128, + momentum=0.02, + ): + super(GLU_Block, self).__init__() + self.first = first + self.shared_layers = shared_layers + self.n_glu = n_glu + self.glu_layers = torch.nn.ModuleList() + + params = {"virtual_batch_size": virtual_batch_size, "momentum": momentum} + + fc = shared_layers[0] if shared_layers else None + self.glu_layers.append(GLU_Layer(input_dim, output_dim, fc=fc, **params)) + for glu_id in range(1, self.n_glu): + fc = shared_layers[glu_id] if shared_layers else None + self.glu_layers.append(GLU_Layer(output_dim, output_dim, fc=fc, **params)) + + def forward(self, x): + scale = torch.sqrt(torch.FloatTensor([0.5]).to(x.device)) + if self.first: # the first layer of the block has no scale multiplication + x = self.glu_layers[0](x) + layers_left = range(1, self.n_glu) + else: + layers_left = range(self.n_glu) + + for glu_id in layers_left: + x = torch.add(x, self.glu_layers[glu_id](x)) + x = x * scale + return x + + + +class GLU_Layer(torch.nn.Module): + def __init__( + self, input_dim, output_dim, fc=None, virtual_batch_size=128, momentum=0.02 + ): + super(GLU_Layer, self).__init__() + + self.output_dim = output_dim + if fc: + self.fc = fc + else: + self.fc = nn.Linear(input_dim, 2 * output_dim, bias=False) + initialize_glu(self.fc, input_dim, 2 * output_dim) + + self.bn = GhostBatchNorm( + 2 * output_dim, virtual_batch_size=virtual_batch_size, momentum=momentum + ) + + def forward(self, x): + x = self.fc(x) + x = self.bn(x) + out = torch.mul(x[:, : self.output_dim], torch.sigmoid(x[:, self.output_dim :])) + return out + + + +class AttentiveTransformer(torch.nn.Module): + def __init__( + self, + input_dim, + group_dim, + group_matrix, + virtual_batch_size=128, + momentum=0.02, + mask_type="sparsemax", + ): + """ + Initialize an attention transformer. + + Parameters + ---------- + input_dim : int + Input size + group_dim : int + Number of groups for features + virtual_batch_size : int + Batch size for Ghost Batch Normalization + momentum : float + Float value between 0 and 1 which will be used for momentum in batch norm + mask_type : str + Either "sparsemax" or "entmax" : this is the masking function to use + """ + super(AttentiveTransformer, self).__init__() + self.fc = nn.Linear(input_dim, group_dim, bias=False) + initialize_non_glu(self.fc, input_dim, group_dim) + self.bn = GhostBatchNorm( + group_dim, virtual_batch_size=virtual_batch_size, momentum=momentum + ) + + if mask_type == "sparsemax": + # Sparsemax + self.selector = Sparsemax() + elif mask_type == "entmax": + # Entmax + self.selector = Entmax15() + else: + raise NotImplementedError( + "Please choose either sparsemax" + "or entmax as masktype" + ) + + def forward(self, priors, processed_feat): + x = self.fc(processed_feat) + x = self.bn(x) + x = torch.mul(x, priors) + x = self.selector(x) + return x \ No newline at end of file diff --git a/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py b/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py index c14944f5..c96b3241 100644 --- a/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py +++ b/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py @@ -12,52 +12,6 @@ EmbeddingInfo = namedtuple("EmbeddingInfo", ["num_fields", "output_size"]) UniformEmbeddingInfo = namedtuple("EmbeddingInfo", ["num_fields", "embedding_size", "output_size"]) -MODULE_INIT_DOC = """ -Parameters ----------- -output_size : int - number of final output values; i.e., number of targets for - regression or number of classes for classification -embedding_num : EmbeddingBase or None - initialized and fit embedding for numeric fields -embedding_cat : EmbeddingBase or None - initialized and fit embedding for categorical fields -embedding_l1_reg : float, optional - value for l1 regularization of embedding vectors; default is 0.0 -embedding_l2_reg : float, optional - value for l2 regularization of embedding vectors; default is 0.0 -{} -mlp_hidden_sizes : int or iterable of int, optional - sizes for the linear transformations between the MLP input and - the output size needed based on the target; default is (512, 256, 128, 64) -mlp_activation : subclass of torch.nn.Module (uninitialized), optional - default is nn.LeakyReLU -mlp_use_bn : boolean, optional - whether to use batch normalization between MLP linear layers; - default is True -mlp_bn_momentum : float, optional - only used if `mlp_use_bn` is True; default is 0.01 -mlp_ghost_batch : int or None, optional - only used if `mlp_use_bn` is True; size of batch in "ghost batch norm"; - if None, normal batch norm is used; defualt is None -mlp_dropout : float, optional - whether and how much dropout to use between MLP linear layers; - `0.0 <= mlp_dropout < 1.0`; default is 0.0 -mlp_use_skip : boolean, optional - use a side path in the MLP containing just the optional leaky gate - plus single linear layer; default is True -mlp_l1_reg : float, optional - value for l1 regularization of MLP weights; default is 0.0 -mlp_l2_reg : float, optional - value for l2 regularization of MLP weights; default is 0.0 -use_leaky_gate : boolean, optional - whether to include "leaky gate" layers; default is True -loss_fn : "auto" or PyTorch loss function, optional - default is "auto" -device : string or torch.device, optional - default is "cpu" - -""" class LeakyGate(nn.Module): diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index 291a2587..22512329 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -8,11 +8,11 @@ import numpy as np import torch import torch.nn as nn +from lightautoml.ml_algo.tabnet.utils import TabNetEncoder, initialize_non_glu from lightautoml.ml_algo.torch_based.autoint.autoint_utils import AttnInteractionBlock, LeakyGate from lightautoml.ml_algo.torch_based.autoint.ghost_norm import GhostBatchNorm -from lightautoml.ml_algo.torch_based.node_nn_model import DenseODSTBlock -from lightautoml.ml_algo.torch_based.node_nn_model import Lambda +from lightautoml.ml_algo.torch_based.node_nn_model import DenseODSTBlock, MeanPooling class GaussianNoise(nn.Module): @@ -840,7 +840,7 @@ def __init__( self.features1.add_module("ODSTForestblock%d", block) self.features2 = nn.Sequential(OrderedDict([])) if use_original_head: - last_layer = Lambda(lambda x: x[..., :n_out].mean(dim=-2)) + last_layer = MeanPooling(n_out,dim=-2) self.features2.add_module("head", last_layer) else: if use_bn: @@ -976,3 +976,115 @@ def forward(self, embedded: torch.Tensor) -> torch.Tensor: mix = torch.sigmoid(self.mix) out = mix * out + (1 - mix) * self.mlp(embedded_2d) return out + + + + +class TabNet(torch.nn.Module): + def __init__( + self, + n_in, + n_out, + n_d=8, + n_a=8, + n_steps=3, + gamma=1.3, + n_independent=2, + n_shared=2, + epsilon=1e-15, + virtual_batch_size=128, + momentum=0.02, + mask_type="sparsemax", + group_attention_matrix=None, + **kwargs, + ): + """ + Defines main part of the TabNet network without the embedding layers. + + Parameters + ---------- + input_dim : int + Number of features + output_dim : int or list of int for multi task classification + Dimension of network output + examples : one for regression, 2 for binary classification etc... + n_d : int + Dimension of the prediction layer (usually between 4 and 64) + n_a : int + Dimension of the attention layer (usually between 4 and 64) + n_steps : int + Number of successive steps in the network (usually between 3 and 10) + gamma : float + Float above 1, scaling factor for attention updates (usually between 1.0 to 2.0) + n_independent : int + Number of independent GLU layer in each GLU block (default 2) + n_shared : int + Number of independent GLU layer in each GLU block (default 2) + epsilon : float + Avoid log(0), this should be kept very low + virtual_batch_size : int + Batch size for Ghost Batch Normalization + momentum : float + Float value between 0 and 1 which will be used for momentum in all batch norm + mask_type : str + Either "sparsemax" or "entmax" : this is the masking function to use + group_attention_matrix : torch matrix + Matrix of size (n_groups, input_dim), m_ij = importance within group i of feature j + """ + super(TabNet, self).__init__() + self.input_dim = n_in + self.output_dim = n_out + self.is_multi_task = isinstance(n_out, list) + self.n_d = n_d + self.n_a = n_a + self.n_steps = n_steps + self.gamma = gamma + self.epsilon = epsilon + self.n_independent = n_independent + self.n_shared = n_shared + self.virtual_batch_size = virtual_batch_size + self.mask_type = mask_type + self.initial_bn = nn.BatchNorm1d(self.input_dim, momentum=0.01) + + self.encoder = TabNetEncoder( + input_dim=n_in, + output_dim=n_out, + n_d=n_d, + n_a=n_a, + n_steps=n_steps, + gamma=gamma, + n_independent=n_independent, + n_shared=n_shared, + epsilon=epsilon, + virtual_batch_size=virtual_batch_size, + momentum=momentum, + mask_type=mask_type, + group_attention_matrix=group_attention_matrix + ) + + if self.is_multi_task: + self.multi_task_mappings = torch.nn.ModuleList() + for task_dim in n_out: + task_mapping = nn.Linear(n_d, task_dim, bias=False) + initialize_non_glu(task_mapping, n_d, task_dim) + self.multi_task_mappings.append(task_mapping) + else: + self.final_mapping = nn.Linear(n_d, n_out, bias=False) + initialize_non_glu(self.final_mapping, n_d, n_out) + + def forward(self, x): + res = 0 + steps_output, M_loss = self.encoder(x) + res = torch.sum(torch.stack(steps_output, dim=0), dim=0) + + if self.is_multi_task: + # Result will be in list format + out = [] + for task_mapping in self.multi_task_mappings: + out.append(task_mapping(res)) + else: + out = self.final_mapping(res) + return out + + def forward_masks(self, x): + return self.encoder.forward_masks(x) \ No newline at end of file diff --git a/lightautoml/ml_algo/torch_based/node_nn_model.py b/lightautoml/ml_algo/torch_based/node_nn_model.py index cdfedbea..e414c6db 100644 --- a/lightautoml/ml_algo/torch_based/node_nn_model.py +++ b/lightautoml/ml_algo/torch_based/node_nn_model.py @@ -124,7 +124,14 @@ def _threshold_and_support(input, dim=-1): sparsemax = lambda input, dim=-1: SparsemaxFunction.apply(input, dim) # noqa: E731 sparsemoid = lambda input: (0.5 * input + 0.5).clamp_(0, 1) # noqa: E731 +class Sparsemax(nn.Module): + def __init__(self, dim=-1): + self.dim = dim + super(Sparsemax, self).__init__() + + def forward(self, input): + return SparsemaxFunction.apply(input, self.dim) class Entmax15Function(Function): """An implementation of exact Entmax with alpha=1.5 (B. Peters, V. Niculae, A. Martins). @@ -256,28 +263,38 @@ def _backward(output, grad_output): entmax15 = lambda input, dim=-1: Entmax15Function.apply(input, dim) # noqa: E731 entmoid15 = Entmoid15.apply # noqa: E731 +class Entmax15(nn.Module): + def __init__(self, dim=-1): + self.dim = dim + super(Entmax15, self).__init__() + + def forward(self, input): + return Entmax15Function.apply(input, self.dim) -class Lambda(nn.Module): - """Pytorch implementation of lambda. +class MeanPooling(nn.Module): + """Pytorch implementation of MeanPooling head. Args: - func : returned func + n_out: int, output dim. + dim: int: the dimension to be averaged. + """ - def __init__(self, func): + def __init__(self, n_out, dim=-1): super().__init__() - self.func = func + self.n_out = n_out + self.dim = dim - def forward(self, *args, **kwargs): + def forward(self, x: torch.Tensor): """Forward-pass. # noqa: DAR101 Returns: - f(*args, **kwargs) + x[..., :self.n_out].mean(dim=self.dim) """ - return self.func(*args, **kwargs) + return x[..., :self.n_out].mean(dim=self.dim) class ModuleWithInit(nn.Module): diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/abstract_model.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/abstract_model.py new file mode 100644 index 00000000..a1734439 --- /dev/null +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/abstract_model.py @@ -0,0 +1,826 @@ +from dataclasses import dataclass, field +from typing import List, Any, Dict +import torch +from torch.nn.utils import clip_grad_norm_ +import numpy as np +from scipy.sparse import csc_matrix +from abc import abstractmethod +from pytorch_tabnet import tab_network +from pytorch_tabnet.utils import ( + SparsePredictDataset, + PredictDataset, + create_explain_matrix, + validate_eval_set, + create_dataloaders, + define_device, + ComplexEncoder, + check_input, + check_warm_start, + create_group_matrix, + check_embedding_parameters +) +from pytorch_tabnet.callbacks import ( + CallbackContainer, + History, + EarlyStopping, + LRSchedulerCallback, +) +from pytorch_tabnet.metrics import MetricContainer, check_metrics +from sklearn.base import BaseEstimator + +from torch.utils.data import DataLoader +import io +import json +from pathlib import Path +import shutil +import zipfile +import warnings +import copy +import scipy + + +@dataclass +class TabModel(BaseEstimator): + """ Class for TabNet model.""" + + n_d: int = 8 + n_a: int = 8 + n_steps: int = 3 + gamma: float = 1.3 + cat_idxs: List[int] = field(default_factory=list) + cat_dims: List[int] = field(default_factory=list) + cat_emb_dim: int = 1 + n_independent: int = 2 + n_shared: int = 2 + epsilon: float = 1e-15 + momentum: float = 0.02 + lambda_sparse: float = 1e-3 + seed: int = 0 + clip_value: int = 1 + verbose: int = 1 + optimizer_fn: Any = torch.optim.Adam + optimizer_params: Dict = field(default_factory=lambda: dict(lr=2e-2)) + scheduler_fn: Any = None + scheduler_params: Dict = field(default_factory=dict) + mask_type: str = "sparsemax" + input_dim: int = None + output_dim: int = None + device_name: str = "auto" + n_shared_decoder: int = 1 + n_indep_decoder: int = 1 + grouped_features: List[List[int]] = field(default_factory=list) + + def __post_init__(self): + # These are default values needed for saving model + self.batch_size = 1024 + self.virtual_batch_size = 128 + + torch.manual_seed(self.seed) + # Defining device + self.device = torch.device(define_device(self.device_name)) + if self.verbose != 0: + warnings.warn(f"Device used : {self.device}") + + # create deep copies of mutable parameters + self.optimizer_fn = copy.deepcopy(self.optimizer_fn) + self.scheduler_fn = copy.deepcopy(self.scheduler_fn) + + updated_params = check_embedding_parameters(self.cat_dims, + self.cat_idxs, + self.cat_emb_dim) + self.cat_dims, self.cat_idxs, self.cat_emb_dim = updated_params + + def __update__(self, **kwargs): + """ + Updates parameters. + If does not already exists, creates it. + Otherwise overwrite with warnings. + """ + update_list = [ + "cat_dims", + "cat_emb_dim", + "cat_idxs", + "input_dim", + "mask_type", + "n_a", + "n_d", + "n_independent", + "n_shared", + "n_steps", + "grouped_features", + ] + for var_name, value in kwargs.items(): + if var_name in update_list: + try: + exec(f"global previous_val; previous_val = self.{var_name}") + if previous_val != value: # noqa + wrn_msg = f"Pretraining: {var_name} changed from {previous_val} to {value}" # noqa + warnings.warn(wrn_msg) + exec(f"self.{var_name} = value") + except AttributeError: + exec(f"self.{var_name} = value") + + def fit( + self, + X_train, + y_train, + eval_set=None, + eval_name=None, + eval_metric=None, + loss_fn=None, + weights=0, + max_epochs=100, + patience=10, + batch_size=1024, + virtual_batch_size=128, + num_workers=0, + drop_last=True, + callbacks=None, + pin_memory=True, + from_unsupervised=None, + warm_start=False, + augmentations=None, + compute_importance=True + ): + """Train a neural network stored in self.network + Using train_dataloader for training data and + valid_dataloader for validation. + + Parameters + ---------- + X_train : np.ndarray + Train set + y_train : np.array + Train targets + eval_set : list of tuple + List of eval tuple set (X, y). + The last one is used for early stopping + eval_name : list of str + List of eval set names. + eval_metric : list of str + List of evaluation metrics. + The last metric is used for early stopping. + loss_fn : callable or None + a PyTorch loss function + weights : bool or dictionnary + 0 for no balancing + 1 for automated balancing + dict for custom weights per class + max_epochs : int + Maximum number of epochs during training + patience : int + Number of consecutive non improving epoch before early stopping + batch_size : int + Training batch size + virtual_batch_size : int + Batch size for Ghost Batch Normalization (virtual_batch_size < batch_size) + num_workers : int + Number of workers used in torch.utils.data.DataLoader + drop_last : bool + Whether to drop last batch during training + callbacks : list of callback function + List of custom callbacks + pin_memory: bool + Whether to set pin_memory to True or False during training + from_unsupervised: unsupervised trained model + Use a previously self supervised model as starting weights + warm_start: bool + If True, current model parameters are used to start training + compute_importance : bool + Whether to compute feature importance + """ + # update model name + + self.max_epochs = max_epochs + self.patience = patience + self.batch_size = batch_size + self.virtual_batch_size = virtual_batch_size + self.num_workers = num_workers + self.drop_last = drop_last + self.input_dim = X_train.shape[1] + self._stop_training = False + self.pin_memory = pin_memory and (self.device.type != "cpu") + self.augmentations = augmentations + self.compute_importance = compute_importance + + if self.augmentations is not None: + # This ensure reproducibility + self.augmentations._set_seed() + + eval_set = eval_set if eval_set else [] + + if loss_fn is None: + self.loss_fn = self._default_loss + else: + self.loss_fn = loss_fn + + check_input(X_train) + check_warm_start(warm_start, from_unsupervised) + + self.update_fit_params( + X_train, + y_train, + eval_set, + weights, + ) + + # Validate and reformat eval set depending on training data + eval_names, eval_set = validate_eval_set(eval_set, eval_name, X_train, y_train) + + train_dataloader, valid_dataloaders = self._construct_loaders( + X_train, y_train, eval_set + ) + + if from_unsupervised is not None: + # Update parameters to match self pretraining + self.__update__(**from_unsupervised.get_params()) + + if not hasattr(self, "network") or not warm_start: + # model has never been fitted before of warm_start is False + self._set_network() + self._update_network_params() + self._set_metrics(eval_metric, eval_names) + self._set_optimizer() + self._set_callbacks(callbacks) + + if from_unsupervised is not None: + self.load_weights_from_unsupervised(from_unsupervised) + warnings.warn("Loading weights from unsupervised pretraining") + # Call method on_train_begin for all callbacks + self._callback_container.on_train_begin() + + # Training loop over epochs + for epoch_idx in range(self.max_epochs): + + # Call method on_epoch_begin for all callbacks + self._callback_container.on_epoch_begin(epoch_idx) + + self._train_epoch(train_dataloader) + + # Apply predict epoch to all eval sets + for eval_name, valid_dataloader in zip(eval_names, valid_dataloaders): + self._predict_epoch(eval_name, valid_dataloader) + + # Call method on_epoch_end for all callbacks + self._callback_container.on_epoch_end( + epoch_idx, logs=self.history.epoch_metrics + ) + + if self._stop_training: + break + + # Call method on_train_end for all callbacks + self._callback_container.on_train_end() + self.network.eval() + + if self.compute_importance: + # compute feature importance once the best model is defined + self.feature_importances_ = self._compute_feature_importances(X_train) + + def predict(self, X): + """ + Make predictions on a batch (valid) + + Parameters + ---------- + X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` + Input data + + Returns + ------- + predictions : np.array + Predictions of the regression problem + """ + self.network.eval() + + if scipy.sparse.issparse(X): + dataloader = DataLoader( + SparsePredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) + else: + dataloader = DataLoader( + PredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) + + results = [] + for batch_nb, data in enumerate(dataloader): + data = data.to(self.device).float() + output, M_loss = self.network(data) + predictions = output.cpu().detach().numpy() + results.append(predictions) + res = np.vstack(results) + return self.predict_func(res) + + def explain(self, X, normalize=False): + """ + Return local explanation + + Parameters + ---------- + X : tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` + Input data + normalize : bool (default False) + Wheter to normalize so that sum of features are equal to 1 + + Returns + ------- + M_explain : matrix + Importance per sample, per columns. + masks : matrix + Sparse matrix showing attention masks used by network. + """ + self.network.eval() + + if scipy.sparse.issparse(X): + dataloader = DataLoader( + SparsePredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) + else: + dataloader = DataLoader( + PredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) + + res_explain = [] + + for batch_nb, data in enumerate(dataloader): + data = data.to(self.device).float() + + M_explain, masks = self.network.forward_masks(data) + for key, value in masks.items(): + masks[key] = csc_matrix.dot( + value.cpu().detach().numpy(), self.reducing_matrix + ) + original_feat_explain = csc_matrix.dot(M_explain.cpu().detach().numpy(), + self.reducing_matrix) + res_explain.append(original_feat_explain) + + if batch_nb == 0: + res_masks = masks + else: + for key, value in masks.items(): + res_masks[key] = np.vstack([res_masks[key], value]) + + res_explain = np.vstack(res_explain) + + if normalize: + res_explain /= np.sum(res_explain, axis=1)[:, None] + + return res_explain, res_masks + + def load_weights_from_unsupervised(self, unsupervised_model): + update_state_dict = copy.deepcopy(self.network.state_dict()) + for param, weights in unsupervised_model.network.state_dict().items(): + if param.startswith("encoder"): + # Convert encoder's layers name to match + new_param = "tabnet." + param + else: + new_param = param + if self.network.state_dict().get(new_param) is not None: + # update only common layers + update_state_dict[new_param] = weights + + self.network.load_state_dict(update_state_dict) + + def load_class_attrs(self, class_attrs): + for attr_name, attr_value in class_attrs.items(): + setattr(self, attr_name, attr_value) + + def save_model(self, path): + """Saving TabNet model in two distinct files. + + Parameters + ---------- + path : str + Path of the model. + + Returns + ------- + str + input filepath with ".zip" appended + + """ + saved_params = {} + init_params = {} + for key, val in self.get_params().items(): + if isinstance(val, type): + # Don't save torch specific params + continue + else: + init_params[key] = val + saved_params["init_params"] = init_params + + class_attrs = { + "preds_mapper": self.preds_mapper + } + saved_params["class_attrs"] = class_attrs + + # Create folder + Path(path).mkdir(parents=True, exist_ok=True) + + # Save models params + with open(Path(path).joinpath("model_params.json"), "w", encoding="utf8") as f: + json.dump(saved_params, f, cls=ComplexEncoder) + + # Save state_dict + torch.save(self.network.state_dict(), Path(path).joinpath("network.pt")) + shutil.make_archive(path, "zip", path) + shutil.rmtree(path) + print(f"Successfully saved model at {path}.zip") + return f"{path}.zip" + + def load_model(self, filepath): + """Load TabNet model. + + Parameters + ---------- + filepath : str + Path of the model. + """ + try: + with zipfile.ZipFile(filepath) as z: + with z.open("model_params.json") as f: + loaded_params = json.load(f) + loaded_params["init_params"]["device_name"] = self.device_name + with z.open("network.pt") as f: + try: + saved_state_dict = torch.load(f, map_location=self.device) + except io.UnsupportedOperation: + # In Python <3.7, the returned file object is not seekable (which at least + # some versions of PyTorch require) - so we'll try buffering it in to a + # BytesIO instead: + saved_state_dict = torch.load( + io.BytesIO(f.read()), + map_location=self.device, + ) + except KeyError: + raise KeyError("Your zip file is missing at least one component") + + self.__init__(**loaded_params["init_params"]) + + self._set_network() + self.network.load_state_dict(saved_state_dict) + self.network.eval() + self.load_class_attrs(loaded_params["class_attrs"]) + + return + + def _train_epoch(self, train_loader): + """ + Trains one epoch of the network in self.network + + Parameters + ---------- + train_loader : a :class: `torch.utils.data.Dataloader` + DataLoader with train set + """ + self.network.train() + + for batch_idx, (X, y) in enumerate(train_loader): + self._callback_container.on_batch_begin(batch_idx) + + batch_logs = self._train_batch(X, y) + + self._callback_container.on_batch_end(batch_idx, batch_logs) + + epoch_logs = {"lr": self._optimizer.param_groups[-1]["lr"]} + self.history.epoch_metrics.update(epoch_logs) + + return + + def _train_batch(self, X, y): + """ + Trains one batch of data + + Parameters + ---------- + X : torch.Tensor + Train matrix + y : torch.Tensor + Target matrix + + Returns + ------- + batch_outs : dict + Dictionnary with "y": target and "score": prediction scores. + batch_logs : dict + Dictionnary with "batch_size" and "loss". + """ + batch_logs = {"batch_size": X.shape[0]} + + X = X.to(self.device).float() + y = y.to(self.device).float() + + if self.augmentations is not None: + X, y = self.augmentations(X, y) + + for param in self.network.parameters(): + param.grad = None + + output, M_loss = self.network(X) + + loss = self.compute_loss(output, y) + # Add the overall sparsity loss + loss = loss - self.lambda_sparse * M_loss + + # Perform backward pass and optimization + loss.backward() + if self.clip_value: + clip_grad_norm_(self.network.parameters(), self.clip_value) + self._optimizer.step() + + batch_logs["loss"] = loss.cpu().detach().numpy().item() + + return batch_logs + + def _predict_epoch(self, name, loader): + """ + Predict an epoch and update metrics. + + Parameters + ---------- + name : str + Name of the validation set + loader : torch.utils.data.Dataloader + DataLoader with validation set + """ + # Setting network on evaluation mode + self.network.eval() + + list_y_true = [] + list_y_score = [] + + # Main loop + for batch_idx, (X, y) in enumerate(loader): + scores = self._predict_batch(X) + list_y_true.append(y) + list_y_score.append(scores) + + y_true, scores = self.stack_batches(list_y_true, list_y_score) + + metrics_logs = self._metric_container_dict[name](y_true, scores) + self.network.train() + self.history.epoch_metrics.update(metrics_logs) + return + + def _predict_batch(self, X): + """ + Predict one batch of data. + + Parameters + ---------- + X : torch.Tensor + Owned products + + Returns + ------- + np.array + model scores + """ + X = X.to(self.device).float() + + # compute model output + scores, _ = self.network(X) + + if isinstance(scores, list): + scores = [x.cpu().detach().numpy() for x in scores] + else: + scores = scores.cpu().detach().numpy() + + return scores + + def _set_network(self): + """Setup the network and explain matrix.""" + torch.manual_seed(self.seed) + + self.group_matrix = create_group_matrix(self.grouped_features, self.input_dim) + + self.network = tab_network.TabNet( + self.input_dim, + self.output_dim, + n_d=self.n_d, + n_a=self.n_a, + n_steps=self.n_steps, + gamma=self.gamma, + cat_idxs=self.cat_idxs, + cat_dims=self.cat_dims, + cat_emb_dim=self.cat_emb_dim, + n_independent=self.n_independent, + n_shared=self.n_shared, + epsilon=self.epsilon, + virtual_batch_size=self.virtual_batch_size, + momentum=self.momentum, + mask_type=self.mask_type, + group_attention_matrix=self.group_matrix.to(self.device), + ).to(self.device) + + self.reducing_matrix = create_explain_matrix( + self.network.input_dim, + self.network.cat_emb_dim, + self.network.cat_idxs, + self.network.post_embed_dim, + ) + + def _set_metrics(self, metrics, eval_names): + """Set attributes relative to the metrics. + + Parameters + ---------- + metrics : list of str + List of eval metric names. + eval_names : list of str + List of eval set names. + + """ + metrics = metrics or [self._default_metric] + + metrics = check_metrics(metrics) + # Set metric container for each sets + self._metric_container_dict = {} + for name in eval_names: + self._metric_container_dict.update( + {name: MetricContainer(metrics, prefix=f"{name}_")} + ) + + self._metrics = [] + self._metrics_names = [] + for _, metric_container in self._metric_container_dict.items(): + self._metrics.extend(metric_container.metrics) + self._metrics_names.extend(metric_container.names) + + # Early stopping metric is the last eval metric + self.early_stopping_metric = ( + self._metrics_names[-1] if len(self._metrics_names) > 0 else None + ) + + def _set_callbacks(self, custom_callbacks): + """Setup the callbacks functions. + + Parameters + ---------- + custom_callbacks : list of func + List of callback functions. + + """ + # Setup default callbacks history, early stopping and scheduler + callbacks = [] + self.history = History(self, verbose=self.verbose) + callbacks.append(self.history) + if (self.early_stopping_metric is not None) and (self.patience > 0): + early_stopping = EarlyStopping( + early_stopping_metric=self.early_stopping_metric, + is_maximize=( + self._metrics[-1]._maximize if len(self._metrics) > 0 else None + ), + patience=self.patience, + ) + callbacks.append(early_stopping) + else: + wrn_msg = "No early stopping will be performed, last training weights will be used." + warnings.warn(wrn_msg) + + if self.scheduler_fn is not None: + # Add LR Scheduler call_back + is_batch_level = self.scheduler_params.pop("is_batch_level", False) + scheduler = LRSchedulerCallback( + scheduler_fn=self.scheduler_fn, + scheduler_params=self.scheduler_params, + optimizer=self._optimizer, + early_stopping_metric=self.early_stopping_metric, + is_batch_level=is_batch_level, + ) + callbacks.append(scheduler) + + if custom_callbacks: + callbacks.extend(custom_callbacks) + self._callback_container = CallbackContainer(callbacks) + self._callback_container.set_trainer(self) + + def _set_optimizer(self): + """Setup optimizer.""" + self._optimizer = self.optimizer_fn( + self.network.parameters(), **self.optimizer_params + ) + + def _construct_loaders(self, X_train, y_train, eval_set): + """Generate dataloaders for train and eval set. + + Parameters + ---------- + X_train : np.array + Train set. + y_train : np.array + Train targets. + eval_set : list of tuple + List of eval tuple set (X, y). + + Returns + ------- + train_dataloader : `torch.utils.data.Dataloader` + Training dataloader. + valid_dataloaders : list of `torch.utils.data.Dataloader` + List of validation dataloaders. + + """ + # all weights are not allowed for this type of model + y_train_mapped = self.prepare_target(y_train) + for i, (X, y) in enumerate(eval_set): + y_mapped = self.prepare_target(y) + eval_set[i] = (X, y_mapped) + + train_dataloader, valid_dataloaders = create_dataloaders( + X_train, + y_train_mapped, + eval_set, + self.updated_weights, + self.batch_size, + self.num_workers, + self.drop_last, + self.pin_memory, + ) + return train_dataloader, valid_dataloaders + + def _compute_feature_importances(self, X): + """Compute global feature importance. + + Parameters + ---------- + loader : `torch.utils.data.Dataloader` + Pytorch dataloader. + + """ + M_explain, _ = self.explain(X, normalize=False) + sum_explain = M_explain.sum(axis=0) + feature_importances_ = sum_explain / np.sum(sum_explain) + return feature_importances_ + + def _update_network_params(self): + self.network.virtual_batch_size = self.virtual_batch_size + + @abstractmethod + def update_fit_params(self, X_train, y_train, eval_set, weights): + """ + Set attributes relative to fit function. + + Parameters + ---------- + X_train : np.ndarray + Train set + y_train : np.array + Train targets + eval_set : list of tuple + List of eval tuple set (X, y). + weights : bool or dictionnary + 0 for no balancing + 1 for automated balancing + """ + raise NotImplementedError( + "users must define update_fit_params to use this base class" + ) + + @abstractmethod + def compute_loss(self, y_score, y_true): + """ + Compute the loss. + + Parameters + ---------- + y_score : a :tensor: `torch.Tensor` + Score matrix + y_true : a :tensor: `torch.Tensor` + Target matrix + + Returns + ------- + float + Loss value + """ + raise NotImplementedError( + "users must define compute_loss to use this base class" + ) + + @abstractmethod + def prepare_target(self, y): + """ + Prepare target before training. + + Parameters + ---------- + y : a :tensor: `torch.Tensor` + Target matrix. + + Returns + ------- + `torch.Tensor` + Converted target matrix. + """ + raise NotImplementedError( + "users must define prepare_target to use this base class" + ) diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/augmentations.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/augmentations.py new file mode 100644 index 00000000..287fa365 --- /dev/null +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/augmentations.py @@ -0,0 +1,85 @@ +import torch +from pytorch_tabnet.utils import define_device +import numpy as np + + +class RegressionSMOTE(): + """ + Apply SMOTE + + This will average a percentage p of the elements in the batch with other elements. + The target will be averaged as well (this might work with binary classification + and certain loss), following a beta distribution. + """ + def __init__(self, device_name="auto", p=0.8, alpha=0.5, beta=0.5, seed=0): + "" + self.seed = seed + self._set_seed() + self.device = define_device(device_name) + self.alpha = alpha + self.beta = beta + self.p = p + if (p < 0.) or (p > 1.0): + raise ValueError("Value of p should be between 0. and 1.") + + def _set_seed(self): + torch.manual_seed(self.seed) + np.random.seed(self.seed) + return + + def __call__(self, X, y): + batch_size = X.shape[0] + random_values = torch.rand(batch_size, device=self.device) + idx_to_change = random_values < self.p + + # ensure that first element to switch has probability > 0.5 + np_betas = np.random.beta(self.alpha, self.beta, batch_size) / 2 + 0.5 + random_betas = torch.from_numpy(np_betas).to(self.device).float() + index_permute = torch.randperm(batch_size, device=self.device) + + X[idx_to_change] = random_betas[idx_to_change, None] * X[idx_to_change] + X[idx_to_change] += (1 - random_betas[idx_to_change, None]) * X[index_permute][idx_to_change].view(X[idx_to_change].size()) # noqa + + y[idx_to_change] = random_betas[idx_to_change, None] * y[idx_to_change] + y[idx_to_change] += (1 - random_betas[idx_to_change, None]) * y[index_permute][idx_to_change].view(y[idx_to_change].size()) # noqa + + return X, y + + +class ClassificationSMOTE(): + """ + Apply SMOTE for classification tasks. + + This will average a percentage p of the elements in the batch with other elements. + The target will stay unchanged and keep the value of the most important row in the mix. + """ + def __init__(self, device_name="auto", p=0.8, alpha=0.5, beta=0.5, seed=0): + "" + self.seed = seed + self._set_seed() + self.device = define_device(device_name) + self.alpha = alpha + self.beta = beta + self.p = p + if (p < 0.) or (p > 1.0): + raise ValueError("Value of p should be between 0. and 1.") + + def _set_seed(self): + torch.manual_seed(self.seed) + np.random.seed(self.seed) + return + + def __call__(self, X, y): + batch_size = X.shape[0] + random_values = torch.rand(batch_size, device=self.device) + idx_to_change = random_values < self.p + + # ensure that first element to switch has probability > 0.5 + np_betas = np.random.beta(self.alpha, self.beta, batch_size) / 2 + 0.5 + random_betas = torch.from_numpy(np_betas).to(self.device).float() + index_permute = torch.randperm(batch_size, device=self.device) + + X[idx_to_change] = random_betas[idx_to_change, None] * X[idx_to_change] + X[idx_to_change] += (1 - random_betas[idx_to_change, None]) * X[index_permute][idx_to_change].view(X[idx_to_change].size()) # noqa + + return X, y diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/callbacks.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/callbacks.py new file mode 100644 index 00000000..cb031d54 --- /dev/null +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/callbacks.py @@ -0,0 +1,287 @@ +import time +import datetime +import copy +import numpy as np +from dataclasses import dataclass, field +from typing import List, Any +import warnings + + +class Callback: + """ + Abstract base class used to build new callbacks. + """ + + def __init__(self): + pass + + def set_params(self, params): + self.params = params + + def set_trainer(self, model): + self.trainer = model + + def on_epoch_begin(self, epoch, logs=None): + pass + + def on_epoch_end(self, epoch, logs=None): + pass + + def on_batch_begin(self, batch, logs=None): + pass + + def on_batch_end(self, batch, logs=None): + pass + + def on_train_begin(self, logs=None): + pass + + def on_train_end(self, logs=None): + pass + + +@dataclass +class CallbackContainer: + """ + Container holding a list of callbacks. + """ + + callbacks: List[Callback] = field(default_factory=list) + + def append(self, callback): + self.callbacks.append(callback) + + def set_params(self, params): + for callback in self.callbacks: + callback.set_params(params) + + def set_trainer(self, trainer): + self.trainer = trainer + for callback in self.callbacks: + callback.set_trainer(trainer) + + def on_epoch_begin(self, epoch, logs=None): + logs = logs or {} + for callback in self.callbacks: + callback.on_epoch_begin(epoch, logs) + + def on_epoch_end(self, epoch, logs=None): + logs = logs or {} + for callback in self.callbacks: + callback.on_epoch_end(epoch, logs) + + def on_batch_begin(self, batch, logs=None): + logs = logs or {} + for callback in self.callbacks: + callback.on_batch_begin(batch, logs) + + def on_batch_end(self, batch, logs=None): + logs = logs or {} + for callback in self.callbacks: + callback.on_batch_end(batch, logs) + + def on_train_begin(self, logs=None): + logs = logs or {} + logs["start_time"] = time.time() + for callback in self.callbacks: + callback.on_train_begin(logs) + + def on_train_end(self, logs=None): + logs = logs or {} + for callback in self.callbacks: + callback.on_train_end(logs) + + +@dataclass +class EarlyStopping(Callback): + """EarlyStopping callback to exit the training loop if early_stopping_metric + does not improve by a certain amount for a certain + number of epochs. + + Parameters + --------- + early_stopping_metric : str + Early stopping metric name + is_maximize : bool + Whether to maximize or not early_stopping_metric + tol : float + minimum change in monitored value to qualify as improvement. + This number should be positive. + patience : integer + number of epochs to wait for improvement before terminating. + the counter be reset after each improvement + + """ + + early_stopping_metric: str + is_maximize: bool + tol: float = 0.0 + patience: int = 5 + + def __post_init__(self): + self.best_epoch = 0 + self.stopped_epoch = 0 + self.wait = 0 + self.best_weights = None + self.best_loss = np.inf + if self.is_maximize: + self.best_loss = -self.best_loss + super().__init__() + + def on_epoch_end(self, epoch, logs=None): + current_loss = logs.get(self.early_stopping_metric) + if current_loss is None: + return + + loss_change = current_loss - self.best_loss + max_improved = self.is_maximize and loss_change > self.tol + min_improved = (not self.is_maximize) and (-loss_change > self.tol) + if max_improved or min_improved: + self.best_loss = current_loss + self.best_epoch = epoch + self.wait = 1 + self.best_weights = copy.deepcopy(self.trainer.network.state_dict()) + else: + if self.wait >= self.patience: + self.stopped_epoch = epoch + self.trainer._stop_training = True + self.wait += 1 + + def on_train_end(self, logs=None): + self.trainer.best_epoch = self.best_epoch + self.trainer.best_cost = self.best_loss + + if self.best_weights is not None: + self.trainer.network.load_state_dict(self.best_weights) + + if self.stopped_epoch > 0: + msg = f"\nEarly stopping occurred at epoch {self.stopped_epoch}" + msg += ( + f" with best_epoch = {self.best_epoch} and " + + f"best_{self.early_stopping_metric} = {round(self.best_loss, 5)}" + ) + print(msg) + else: + msg = ( + f"Stop training because you reached max_epochs = {self.trainer.max_epochs}" + + f" with best_epoch = {self.best_epoch} and " + + f"best_{self.early_stopping_metric} = {round(self.best_loss, 5)}" + ) + print(msg) + wrn_msg = "Best weights from best epoch are automatically used!" + warnings.warn(wrn_msg) + + +@dataclass +class History(Callback): + """Callback that records events into a `History` object. + This callback is automatically applied to + every SuperModule. + + Parameters + --------- + trainer : DeepRecoModel + Model class to train + verbose : int + Print results every verbose iteration + + """ + + trainer: Any + verbose: int = 1 + + def __post_init__(self): + super().__init__() + self.samples_seen = 0.0 + self.total_time = 0.0 + + def on_train_begin(self, logs=None): + self.history = {"loss": []} + self.history.update({"lr": []}) + self.history.update({name: [] for name in self.trainer._metrics_names}) + self.start_time = logs["start_time"] + self.epoch_loss = 0.0 + + def on_epoch_begin(self, epoch, logs=None): + self.epoch_metrics = {"loss": 0.0} + self.samples_seen = 0.0 + + def on_epoch_end(self, epoch, logs=None): + self.epoch_metrics["loss"] = self.epoch_loss + for metric_name, metric_value in self.epoch_metrics.items(): + self.history[metric_name].append(metric_value) + if self.verbose == 0: + return + if epoch % self.verbose != 0: + return + msg = f"epoch {epoch:<3}" + for metric_name, metric_value in self.epoch_metrics.items(): + if metric_name != "lr": + msg += f"| {metric_name:<3}: {np.round(metric_value, 5):<8}" + self.total_time = int(time.time() - self.start_time) + msg += f"| {str(datetime.timedelta(seconds=self.total_time)) + 's':<6}" + print(msg) + + def on_batch_end(self, batch, logs=None): + batch_size = logs["batch_size"] + self.epoch_loss = ( + self.samples_seen * self.epoch_loss + batch_size * logs["loss"] + ) / (self.samples_seen + batch_size) + self.samples_seen += batch_size + + def __getitem__(self, name): + return self.history[name] + + def __repr__(self): + return str(self.history) + + def __str__(self): + return str(self.history) + + +@dataclass +class LRSchedulerCallback(Callback): + """Wrapper for most torch scheduler functions. + + Parameters + --------- + scheduler_fn : torch.optim.lr_scheduler + Torch scheduling class + scheduler_params : dict + Dictionnary containing all parameters for the scheduler_fn + is_batch_level : bool (default = False) + If set to False : lr updates will happen at every epoch + If set to True : lr updates happen at every batch + Set this to True for OneCycleLR for example + """ + + scheduler_fn: Any + optimizer: Any + scheduler_params: dict + early_stopping_metric: str + is_batch_level: bool = False + + def __post_init__( + self, + ): + self.is_metric_related = hasattr(self.scheduler_fn, "is_better") + self.scheduler = self.scheduler_fn(self.optimizer, **self.scheduler_params) + super().__init__() + + def on_batch_end(self, batch, logs=None): + if self.is_batch_level: + self.scheduler.step() + else: + pass + + def on_epoch_end(self, epoch, logs=None): + current_loss = logs.get(self.early_stopping_metric) + if current_loss is None: + return + if self.is_batch_level: + pass + else: + if self.is_metric_related: + self.scheduler.step(current_loss) + else: + self.scheduler.step() diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/metrics.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/metrics.py new file mode 100644 index 00000000..e8ad8181 --- /dev/null +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/metrics.py @@ -0,0 +1,523 @@ +from dataclasses import dataclass +from typing import List +import numpy as np +from sklearn.metrics import ( + roc_auc_score, + mean_squared_error, + mean_absolute_error, + accuracy_score, + log_loss, + balanced_accuracy_score, + mean_squared_log_error, +) +import torch + + +def UnsupervisedLoss(y_pred, embedded_x, obf_vars, eps=1e-9): + """ + Implements unsupervised loss function. + This differs from orginal paper as it's scaled to be batch size independent + and number of features reconstructed independent (by taking the mean) + + Parameters + ---------- + y_pred : torch.Tensor or np.array + Reconstructed prediction (with embeddings) + embedded_x : torch.Tensor + Original input embedded by network + obf_vars : torch.Tensor + Binary mask for obfuscated variables. + 1 means the variable was obfuscated so reconstruction is based on this. + eps : float + A small floating point to avoid ZeroDivisionError + This can happen in degenerated case when a feature has only one value + + Returns + ------- + loss : torch float + Unsupervised loss, average value over batch samples. + """ + errors = y_pred - embedded_x + reconstruction_errors = torch.mul(errors, obf_vars) ** 2 + batch_means = torch.mean(embedded_x, dim=0) + batch_means[batch_means == 0] = 1 + + batch_stds = torch.std(embedded_x, dim=0) ** 2 + batch_stds[batch_stds == 0] = batch_means[batch_stds == 0] + features_loss = torch.matmul(reconstruction_errors, 1 / batch_stds) + # compute the number of obfuscated variables to reconstruct + nb_reconstructed_variables = torch.sum(obf_vars, dim=1) + # take the mean of the reconstructed variable errors + features_loss = features_loss / (nb_reconstructed_variables + eps) + # here we take the mean per batch, contrary to the paper + loss = torch.mean(features_loss) + return loss + + +def UnsupervisedLossNumpy(y_pred, embedded_x, obf_vars, eps=1e-9): + errors = y_pred - embedded_x + reconstruction_errors = np.multiply(errors, obf_vars) ** 2 + batch_means = np.mean(embedded_x, axis=0) + batch_means = np.where(batch_means == 0, 1, batch_means) + + batch_stds = np.std(embedded_x, axis=0, ddof=1) ** 2 + batch_stds = np.where(batch_stds == 0, batch_means, batch_stds) + features_loss = np.matmul(reconstruction_errors, 1 / batch_stds) + # compute the number of obfuscated variables to reconstruct + nb_reconstructed_variables = np.sum(obf_vars, axis=1) + # take the mean of the reconstructed variable errors + features_loss = features_loss / (nb_reconstructed_variables + eps) + # here we take the mean per batch, contrary to the paper + loss = np.mean(features_loss) + return loss + + +@dataclass +class UnsupMetricContainer: + """Container holding a list of metrics. + + Parameters + ---------- + y_pred : torch.Tensor or np.array + Reconstructed prediction (with embeddings) + embedded_x : torch.Tensor + Original input embedded by network + obf_vars : torch.Tensor + Binary mask for obfuscated variables. + 1 means the variables was obfuscated so reconstruction is based on this. + + """ + + metric_names: List[str] + prefix: str = "" + + def __post_init__(self): + self.metrics = Metric.get_metrics_by_names(self.metric_names) + self.names = [self.prefix + name for name in self.metric_names] + + def __call__(self, y_pred, embedded_x, obf_vars): + """Compute all metrics and store into a dict. + + Parameters + ---------- + y_true : np.ndarray + Target matrix or vector + y_pred : np.ndarray + Score matrix or vector + + Returns + ------- + dict + Dict of metrics ({metric_name: metric_value}). + + """ + logs = {} + for metric in self.metrics: + res = metric(y_pred, embedded_x, obf_vars) + logs[self.prefix + metric._name] = res + return logs + + +@dataclass +class MetricContainer: + """Container holding a list of metrics. + + Parameters + ---------- + metric_names : list of str + List of metric names. + prefix : str + Prefix of metric names. + + """ + + metric_names: List[str] + prefix: str = "" + + def __post_init__(self): + self.metrics = Metric.get_metrics_by_names(self.metric_names) + self.names = [self.prefix + name for name in self.metric_names] + + def __call__(self, y_true, y_pred): + """Compute all metrics and store into a dict. + + Parameters + ---------- + y_true : np.ndarray + Target matrix or vector + y_pred : np.ndarray + Score matrix or vector + + Returns + ------- + dict + Dict of metrics ({metric_name: metric_value}). + + """ + logs = {} + for metric in self.metrics: + if isinstance(y_pred, list): + res = np.mean( + [metric(y_true[:, i], y_pred[i]) for i in range(len(y_pred))] + ) + else: + res = metric(y_true, y_pred) + logs[self.prefix + metric._name] = res + return logs + + +class Metric: + def __call__(self, y_true, y_pred): + raise NotImplementedError("Custom Metrics must implement this function") + + @classmethod + def get_metrics_by_names(cls, names): + """Get list of metric classes. + + Parameters + ---------- + cls : Metric + Metric class. + names : list + List of metric names. + + Returns + ------- + metrics : list + List of metric classes. + + """ + available_metrics = cls.__subclasses__() + available_names = [metric()._name for metric in available_metrics] + metrics = [] + for name in names: + assert ( + name in available_names + ), f"{name} is not available, choose in {available_names}" + idx = available_names.index(name) + metric = available_metrics[idx]() + metrics.append(metric) + return metrics + + +class AUC(Metric): + """ + AUC. + """ + + def __init__(self): + self._name = "auc" + self._maximize = True + + def __call__(self, y_true, y_score): + """ + Compute AUC of predictions. + + Parameters + ---------- + y_true : np.ndarray + Target matrix or vector + y_score : np.ndarray + Score matrix or vector + + Returns + ------- + float + AUC of predictions vs targets. + """ + return roc_auc_score(y_true, y_score[:, 1]) + + +class Accuracy(Metric): + """ + Accuracy. + """ + + def __init__(self): + self._name = "accuracy" + self._maximize = True + + def __call__(self, y_true, y_score): + """ + Compute Accuracy of predictions. + + Parameters + ---------- + y_true: np.ndarray + Target matrix or vector + y_score: np.ndarray + Score matrix or vector + + Returns + ------- + float + Accuracy of predictions vs targets. + """ + y_pred = np.argmax(y_score, axis=1) + return accuracy_score(y_true, y_pred) + + +class BalancedAccuracy(Metric): + """ + Balanced Accuracy. + """ + + def __init__(self): + self._name = "balanced_accuracy" + self._maximize = True + + def __call__(self, y_true, y_score): + """ + Compute Accuracy of predictions. + + Parameters + ---------- + y_true : np.ndarray + Target matrix or vector + y_score : np.ndarray + Score matrix or vector + + Returns + ------- + float + Accuracy of predictions vs targets. + """ + y_pred = np.argmax(y_score, axis=1) + return balanced_accuracy_score(y_true, y_pred) + + +class LogLoss(Metric): + """ + LogLoss. + """ + + def __init__(self): + self._name = "logloss" + self._maximize = False + + def __call__(self, y_true, y_score): + """ + Compute LogLoss of predictions. + + Parameters + ---------- + y_true : np.ndarray + Target matrix or vector + y_score : np.ndarray + Score matrix or vector + + Returns + ------- + float + LogLoss of predictions vs targets. + """ + return log_loss(y_true, y_score) + + +class MAE(Metric): + """ + Mean Absolute Error. + """ + + def __init__(self): + self._name = "mae" + self._maximize = False + + def __call__(self, y_true, y_score): + """ + Compute MAE (Mean Absolute Error) of predictions. + + Parameters + ---------- + y_true : np.ndarray + Target matrix or vector + y_score : np.ndarray + Score matrix or vector + + Returns + ------- + float + MAE of predictions vs targets. + """ + return mean_absolute_error(y_true, y_score) + + +class MSE(Metric): + """ + Mean Squared Error. + """ + + def __init__(self): + self._name = "mse" + self._maximize = False + + def __call__(self, y_true, y_score): + """ + Compute MSE (Mean Squared Error) of predictions. + + Parameters + ---------- + y_true : np.ndarray + Target matrix or vector + y_score : np.ndarray + Score matrix or vector + + Returns + ------- + float + MSE of predictions vs targets. + """ + return mean_squared_error(y_true, y_score) + + +class RMSLE(Metric): + """ + Root Mean squared logarithmic error regression loss. + Scikit-implementation: + https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_log_error.html + Note: In order to avoid error, negative predictions are clipped to 0. + This means that you should clip negative predictions manually after calling predict. + """ + + def __init__(self): + self._name = "rmsle" + self._maximize = False + + def __call__(self, y_true, y_score): + """ + Compute RMSLE of predictions. + + Parameters + ---------- + y_true : np.ndarray + Target matrix or vector + y_score : np.ndarray + Score matrix or vector + + Returns + ------- + float + RMSLE of predictions vs targets. + """ + y_score = np.clip(y_score, a_min=0, a_max=None) + return np.sqrt(mean_squared_log_error(y_true, y_score)) + + +class UnsupervisedMetric(Metric): + """ + Unsupervised metric + """ + + def __init__(self): + self._name = "unsup_loss" + self._maximize = False + + def __call__(self, y_pred, embedded_x, obf_vars): + """ + Compute MSE (Mean Squared Error) of predictions. + + Parameters + ---------- + y_pred : torch.Tensor or np.array + Reconstructed prediction (with embeddings) + embedded_x : torch.Tensor + Original input embedded by network + obf_vars : torch.Tensor + Binary mask for obfuscated variables. + 1 means the variables was obfuscated so reconstruction is based on this. + + Returns + ------- + float + MSE of predictions vs targets. + """ + loss = UnsupervisedLoss(y_pred, embedded_x, obf_vars) + return loss.item() + + +class UnsupervisedNumpyMetric(Metric): + """ + Unsupervised metric + """ + + def __init__(self): + self._name = "unsup_loss_numpy" + self._maximize = False + + def __call__(self, y_pred, embedded_x, obf_vars): + """ + Compute MSE (Mean Squared Error) of predictions. + + Parameters + ---------- + y_pred : torch.Tensor or np.array + Reconstructed prediction (with embeddings) + embedded_x : torch.Tensor + Original input embedded by network + obf_vars : torch.Tensor + Binary mask for obfuscated variables. + 1 means the variables was obfuscated so reconstruction is based on this. + + Returns + ------- + float + MSE of predictions vs targets. + """ + return UnsupervisedLossNumpy( + y_pred, + embedded_x, + obf_vars + ) + + +class RMSE(Metric): + """ + Root Mean Squared Error. + """ + + def __init__(self): + self._name = "rmse" + self._maximize = False + + def __call__(self, y_true, y_score): + """ + Compute RMSE (Root Mean Squared Error) of predictions. + + Parameters + ---------- + y_true : np.ndarray + Target matrix or vector + y_score : np.ndarray + Score matrix or vector + + Returns + ------- + float + RMSE of predictions vs targets. + """ + return np.sqrt(mean_squared_error(y_true, y_score)) + + +def check_metrics(metrics): + """Check if custom metrics are provided. + + Parameters + ---------- + metrics : list of str or classes + List with built-in metrics (str) or custom metrics (classes). + + Returns + ------- + val_metrics : list of str + List of metric names. + + """ + val_metrics = [] + for metric in metrics: + if isinstance(metric, str): + val_metrics.append(metric) + elif issubclass(metric, Metric): + val_metrics.append(metric()._name) + else: + raise TypeError("You need to provide a valid metric format") + return val_metrics diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/multiclass_utils.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/multiclass_utils.py new file mode 100644 index 00000000..8dbf08c5 --- /dev/null +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/multiclass_utils.py @@ -0,0 +1,425 @@ +# Author: Arnaud Joly, Joel Nothman, Hamzeh Alsalhi +# +# License: BSD 3 clause +""" +Multi-class / multi-label utility function +========================================== + +""" +from collections.abc import Sequence +from itertools import chain + +from scipy.sparse import issparse +from scipy.sparse.base import spmatrix +from scipy.sparse import dok_matrix +from scipy.sparse import lil_matrix +import scipy.sparse as sp + +import numpy as np +import pandas as pd + + +def _assert_all_finite(X, allow_nan=False): + """Like assert_all_finite, but only for ndarray.""" + + X = np.asanyarray(X) + # First try an O(n) time, O(1) space solution for the common case that + # everything is finite; fall back to O(n) space np.isfinite to prevent + # false positives from overflow in sum method. The sum is also calculated + # safely to reduce dtype induced overflows. + is_float = X.dtype.kind in "fc" + if is_float and (np.isfinite(np.sum(X))): + pass + elif is_float: + msg_err = "Input contains {} or a value too large for {!r}." + if ( + allow_nan + and np.isinf(X).any() + or not allow_nan + and not np.isfinite(X).all() + ): + type_err = "infinity" if allow_nan else "NaN, infinity" + raise ValueError(msg_err.format(type_err, X.dtype)) + # for object dtype data, we only check for NaNs (GH-13254) + elif X.dtype == np.dtype("object") and not allow_nan: + if np.isnan(X).any(): + raise ValueError("Input contains NaN") + + +def assert_all_finite(X, allow_nan=False): + """Throw a ValueError if X contains NaN or infinity. + + Parameters + ---------- + X : array or sparse matrix + allow_nan : bool + """ + _assert_all_finite(X.data if sp.issparse(X) else X, allow_nan) + + +def _unique_multiclass(y): + if hasattr(y, "__array__"): + return np.unique(np.asarray(y)) + else: + return set(y) + + +def _unique_indicator(y): + """ + Not implemented + """ + raise IndexError( + f"""Given labels are of size {y.shape} while they should be (n_samples,) \n""" + + """If attempting multilabel classification, try using TabNetMultiTaskClassification """ + + """or TabNetRegressor""" + ) + + +_FN_UNIQUE_LABELS = { + "binary": _unique_multiclass, + "multiclass": _unique_multiclass, + "multilabel-indicator": _unique_indicator, +} + + +def unique_labels(*ys): + """Extract an ordered array of unique labels + + We don't allow: + - mix of multilabel and multiclass (single label) targets + - mix of label indicator matrix and anything else, + because there are no explicit labels) + - mix of label indicator matrices of different sizes + - mix of string and integer labels + + At the moment, we also don't allow "multiclass-multioutput" input type. + + Parameters + ---------- + *ys : array-likes + + Returns + ------- + out : numpy array of shape [n_unique_labels] + An ordered array of unique labels. + + Examples + -------- + >>> from sklearn.utils.multiclass import unique_labels + >>> unique_labels([3, 5, 5, 5, 7, 7]) + array([3, 5, 7]) + >>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4]) + array([1, 2, 3, 4]) + >>> unique_labels([1, 2, 10], [5, 11]) + array([ 1, 2, 5, 10, 11]) + """ + if not ys: + raise ValueError("No argument has been passed.") + # Check that we don't mix label format + + ys_types = set(type_of_target(x) for x in ys) + if ys_types == {"binary", "multiclass"}: + ys_types = {"multiclass"} + + if len(ys_types) > 1: + raise ValueError("Mix type of y not allowed, got types %s" % ys_types) + + label_type = ys_types.pop() + + # Get the unique set of labels + _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None) + if not _unique_labels: + raise ValueError("Unknown label type: %s" % repr(ys)) + + ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys)) + + # Check that we don't mix string type with number type + if len(set(isinstance(label, str) for label in ys_labels)) > 1: + raise ValueError("Mix of label input types (string and number)") + + return np.array(sorted(ys_labels)) + + +def _is_integral_float(y): + return y.dtype.kind == "f" and np.all(y.astype(int) == y) + + +def is_multilabel(y): + """Check if ``y`` is in a multilabel format. + + Parameters + ---------- + y : numpy array of shape [n_samples] + Target values. + + Returns + ------- + out : bool + Return ``True``, if ``y`` is in a multilabel format, else ```False``. + + Examples + -------- + >>> import numpy as np + >>> from sklearn.utils.multiclass import is_multilabel + >>> is_multilabel([0, 1, 0, 1]) + False + >>> is_multilabel([[1], [0, 2], []]) + False + >>> is_multilabel(np.array([[1, 0], [0, 0]])) + True + >>> is_multilabel(np.array([[1], [0], [0]])) + False + >>> is_multilabel(np.array([[1, 0, 0]])) + True + """ + if hasattr(y, "__array__"): + y = np.asarray(y) + if not (hasattr(y, "shape") and y.ndim == 2 and y.shape[1] > 1): + return False + + if issparse(y): + if isinstance(y, (dok_matrix, lil_matrix)): + y = y.tocsr() + return ( + len(y.data) == 0 + or np.unique(y.data).size == 1 + and ( + y.dtype.kind in "biu" + or _is_integral_float(np.unique(y.data)) # bool, int, uint + ) + ) + else: + labels = np.unique(y) + + return len(labels) < 3 and ( + y.dtype.kind in "biu" or _is_integral_float(labels) # bool, int, uint + ) + + +def check_classification_targets(y): + """Ensure that target y is of a non-regression type. + + Only the following target types (as defined in type_of_target) are allowed: + 'binary', 'multiclass', 'multiclass-multioutput', + 'multilabel-indicator', 'multilabel-sequences' + + Parameters + ---------- + y : array-like + """ + y_type = type_of_target(y) + if y_type not in [ + "binary", + "multiclass", + "multiclass-multioutput", + "multilabel-indicator", + "multilabel-sequences", + ]: + raise ValueError("Unknown label type: %r" % y_type) + + +def type_of_target(y): + """Determine the type of data indicated by the target. + + Note that this type is the most specific type that can be inferred. + For example: + + * ``binary`` is more specific but compatible with ``multiclass``. + * ``multiclass`` of integers is more specific but compatible with + ``continuous``. + * ``multilabel-indicator`` is more specific but compatible with + ``multiclass-multioutput``. + + Parameters + ---------- + y : array-like + + Returns + ------- + target_type : string + One of: + + * 'continuous': `y` is an array-like of floats that are not all + integers, and is 1d or a column vector. + * 'continuous-multioutput': `y` is a 2d array of floats that are + not all integers, and both dimensions are of size > 1. + * 'binary': `y` contains <= 2 discrete values and is 1d or a column + vector. + * 'multiclass': `y` contains more than two discrete values, is not a + sequence of sequences, and is 1d or a column vector. + * 'multiclass-multioutput': `y` is a 2d array that contains more + than two discrete values, is not a sequence of sequences, and both + dimensions are of size > 1. + * 'multilabel-indicator': `y` is a label indicator matrix, an array + of two dimensions with at least two columns, and at most 2 unique + values. + * 'unknown': `y` is array-like but none of the above, such as a 3d + array, sequence of sequences, or an array of non-sequence objects. + + Examples + -------- + >>> import numpy as np + >>> type_of_target([0.1, 0.6]) + 'continuous' + >>> type_of_target([1, -1, -1, 1]) + 'binary' + >>> type_of_target(['a', 'b', 'a']) + 'binary' + >>> type_of_target([1.0, 2.0]) + 'binary' + >>> type_of_target([1, 0, 2]) + 'multiclass' + >>> type_of_target([1.0, 0.0, 3.0]) + 'multiclass' + >>> type_of_target(['a', 'b', 'c']) + 'multiclass' + >>> type_of_target(np.array([[1, 2], [3, 1]])) + 'multiclass-multioutput' + >>> type_of_target([[1, 2]]) + 'multiclass-multioutput' + >>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]])) + 'continuous-multioutput' + >>> type_of_target(np.array([[0, 1], [1, 1]])) + 'multilabel-indicator' + """ + valid = ( + isinstance(y, (Sequence, spmatrix)) or hasattr(y, "__array__") + ) and not isinstance(y, str) + + if not valid: + raise ValueError( + "Expected array-like (array or non-string sequence), " "got %r" % y + ) + + sparseseries = y.__class__.__name__ == "SparseSeries" + if sparseseries: + raise ValueError("y cannot be class 'SparseSeries'.") + + if is_multilabel(y): + return "multilabel-indicator" + + try: + y = np.asarray(y) + except ValueError: + # Known to fail in numpy 1.3 for array of arrays + return "unknown" + + # The old sequence of sequences format + try: + if ( + not hasattr(y[0], "__array__") + and isinstance(y[0], Sequence) + and not isinstance(y[0], str) + ): + raise ValueError( + "You appear to be using a legacy multi-label data" + " representation. Sequence of sequences are no" + " longer supported; use a binary array or sparse" + " matrix instead - the MultiLabelBinarizer" + " transformer can convert to this format." + ) + except IndexError: + pass + + # Invalid inputs + if y.ndim > 2 or (y.dtype == object and len(y) and not isinstance(y.flat[0], str)): + return "unknown" # [[[1, 2]]] or [obj_1] and not ["label_1"] + + if y.ndim == 2 and y.shape[1] == 0: + return "unknown" # [[]] + + if y.ndim == 2 and y.shape[1] > 1: + suffix = "-multioutput" # [[1, 2], [1, 2]] + else: + suffix = "" # [1, 2, 3] or [[1], [2], [3]] + + # check float and contains non-integer float values + if y.dtype.kind == "f" and np.any(y != y.astype(int)): + # [.1, .2, 3] or [[.1, .2, 3]] or [[1., .2]] and not [1., 2., 3.] + _assert_all_finite(y) + return "continuous" + suffix + + if (len(np.unique(y)) > 2) or (y.ndim >= 2 and len(y[0]) > 1): + return "multiclass" + suffix # [1, 2, 3] or [[1., 2., 3]] or [[1, 2]] + else: + return "binary" # [1, 2] or [["a"], ["b"]] + + +def check_unique_type(y): + target_types = pd.Series(y).map(type).unique() + if len(target_types) != 1: + raise TypeError( + f"Values on the target must have the same type. Target has types {target_types}" + ) + + +def infer_output_dim(y_train): + """ + Infer output_dim from targets + + Parameters + ---------- + y_train : np.array + Training targets + + Returns + ------- + output_dim : int + Number of classes for output + train_labels : list + Sorted list of initial classes + """ + check_unique_type(y_train) + train_labels = unique_labels(y_train) + output_dim = len(train_labels) + + return output_dim, train_labels + + +def check_output_dim(labels, y): + if y is not None: + check_unique_type(y) + valid_labels = unique_labels(y) + if not set(valid_labels).issubset(set(labels)): + raise ValueError( + f"""Valid set -- {set(valid_labels)} -- + contains unkown targets from training -- + {set(labels)}""" + ) + return + + +def infer_multitask_output(y_train): + """ + Infer output_dim from targets + This is for multiple tasks. + + Parameters + ---------- + y_train : np.ndarray + Training targets + + Returns + ------- + tasks_dims : list + Number of classes for output + tasks_labels : list + List of sorted list of initial classes + """ + + if len(y_train.shape) < 2: + raise ValueError( + "y_train should be of shape (n_examples, n_tasks)" + + f"but got {y_train.shape}" + ) + nb_tasks = y_train.shape[1] + tasks_dims = [] + tasks_labels = [] + for task_idx in range(nb_tasks): + try: + output_dim, train_labels = infer_output_dim(y_train[:, task_idx]) + tasks_dims.append(output_dim) + tasks_labels.append(train_labels) + except ValueError as err: + raise ValueError(f"""Error for task {task_idx} : {err}""") + return tasks_dims, tasks_labels diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/multitask.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/multitask.py new file mode 100644 index 00000000..da836203 --- /dev/null +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/multitask.py @@ -0,0 +1,178 @@ +import torch +import numpy as np +from scipy.special import softmax +from pytorch_tabnet.utils import SparsePredictDataset, PredictDataset, filter_weights +from pytorch_tabnet.abstract_model import TabModel +from pytorch_tabnet.multiclass_utils import infer_multitask_output, check_output_dim +from torch.utils.data import DataLoader +import scipy + + +class TabNetMultiTaskClassifier(TabModel): + def __post_init__(self): + super(TabNetMultiTaskClassifier, self).__post_init__() + self._task = 'classification' + self._default_loss = torch.nn.functional.cross_entropy + self._default_metric = 'logloss' + + def prepare_target(self, y): + y_mapped = y.copy() + for task_idx in range(y.shape[1]): + task_mapper = self.target_mapper[task_idx] + y_mapped[:, task_idx] = np.vectorize(task_mapper.get)(y[:, task_idx]) + return y_mapped + + def compute_loss(self, y_pred, y_true): + """ + Computes the loss according to network output and targets + + Parameters + ---------- + y_pred : list of tensors + Output of network + y_true : LongTensor + Targets label encoded + + Returns + ------- + loss : torch.Tensor + output of loss function(s) + + """ + loss = 0 + y_true = y_true.long() + if isinstance(self.loss_fn, list): + # if you specify a different loss for each task + for task_loss, task_output, task_id in zip( + self.loss_fn, y_pred, range(len(self.loss_fn)) + ): + loss += task_loss(task_output, y_true[:, task_id]) + else: + # same loss function is applied to all tasks + for task_id, task_output in enumerate(y_pred): + loss += self.loss_fn(task_output, y_true[:, task_id]) + + loss /= len(y_pred) + return loss + + def stack_batches(self, list_y_true, list_y_score): + y_true = np.vstack(list_y_true) + y_score = [] + for i in range(len(self.output_dim)): + score = np.vstack([x[i] for x in list_y_score]) + score = softmax(score, axis=1) + y_score.append(score) + return y_true, y_score + + def update_fit_params(self, X_train, y_train, eval_set, weights): + output_dim, train_labels = infer_multitask_output(y_train) + for _, y in eval_set: + for task_idx in range(y.shape[1]): + check_output_dim(train_labels[task_idx], y[:, task_idx]) + self.output_dim = output_dim + self.classes_ = train_labels + self.target_mapper = [ + {class_label: index for index, class_label in enumerate(classes)} + for classes in self.classes_ + ] + self.preds_mapper = [ + {str(index): str(class_label) for index, class_label in enumerate(classes)} + for classes in self.classes_ + ] + self.updated_weights = weights + filter_weights(self.updated_weights) + + def predict(self, X): + """ + Make predictions on a batch (valid) + + Parameters + ---------- + X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` + Input data + + Returns + ------- + results : np.array + Predictions of the most probable class + """ + self.network.eval() + + if scipy.sparse.issparse(X): + dataloader = DataLoader( + SparsePredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) + else: + dataloader = DataLoader( + PredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) + + results = {} + for data in dataloader: + data = data.to(self.device).float() + output, _ = self.network(data) + predictions = [ + torch.argmax(torch.nn.Softmax(dim=1)(task_output), dim=1) + .cpu() + .detach() + .numpy() + .reshape(-1) + for task_output in output + ] + + for task_idx in range(len(self.output_dim)): + results[task_idx] = results.get(task_idx, []) + [predictions[task_idx]] + # stack all task individually + results = [np.hstack(task_res) for task_res in results.values()] + # map all task individually + results = [ + np.vectorize(self.preds_mapper[task_idx].get)(task_res.astype(str)) + for task_idx, task_res in enumerate(results) + ] + return results + + def predict_proba(self, X): + """ + Make predictions for classification on a batch (valid) + + Parameters + ---------- + X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` + Input data + + Returns + ------- + res : list of np.ndarray + + """ + self.network.eval() + + if scipy.sparse.issparse(X): + dataloader = DataLoader( + SparsePredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) + else: + dataloader = DataLoader( + PredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) + + results = {} + for data in dataloader: + data = data.to(self.device).float() + output, _ = self.network(data) + predictions = [ + torch.nn.Softmax(dim=1)(task_output).cpu().detach().numpy() + for task_output in output + ] + for task_idx in range(len(self.output_dim)): + results[task_idx] = results.get(task_idx, []) + [predictions[task_idx]] + res = [np.vstack(task_res) for task_res in results.values()] + return res diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining.py new file mode 100644 index 00000000..87de306d --- /dev/null +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining.py @@ -0,0 +1,428 @@ +import torch +import numpy as np +from torch.utils.data import DataLoader +from pytorch_tabnet import tab_network +from pytorch_tabnet.utils import ( + create_explain_matrix, + filter_weights, + SparsePredictDataset, + PredictDataset, + check_input, + create_group_matrix, +) +from torch.nn.utils import clip_grad_norm_ +from pytorch_tabnet.pretraining_utils import ( + create_dataloaders, + validate_eval_set, +) +from pytorch_tabnet.metrics import ( + UnsupMetricContainer, + check_metrics, + UnsupervisedLoss, +) +from pytorch_tabnet.abstract_model import TabModel +import scipy + + +class TabNetPretrainer(TabModel): + def __post_init__(self): + super(TabNetPretrainer, self).__post_init__() + self._task = 'unsupervised' + self._default_loss = UnsupervisedLoss + self._default_metric = 'unsup_loss_numpy' + + def prepare_target(self, y): + return y + + def compute_loss(self, output, embedded_x, obf_vars): + return self.loss_fn(output, embedded_x, obf_vars) + + def update_fit_params( + self, + weights, + ): + self.updated_weights = weights + filter_weights(self.updated_weights) + self.preds_mapper = None + + def fit( + self, + X_train, + eval_set=None, + eval_name=None, + loss_fn=None, + pretraining_ratio=0.5, + weights=0, + max_epochs=100, + patience=10, + batch_size=1024, + virtual_batch_size=128, + num_workers=0, + drop_last=True, + callbacks=None, + pin_memory=True, + warm_start=False + ): + """Train a neural network stored in self.network + Using train_dataloader for training data and + valid_dataloader for validation. + + Parameters + ---------- + X_train : np.ndarray + Train set to reconstruct in self supervision + eval_set : list of np.array + List of evaluation set + The last one is used for early stopping + eval_name : list of str + List of eval set names. + eval_metric : list of str + List of evaluation metrics. + The last metric is used for early stopping. + loss_fn : callable or None + a PyTorch loss function + should be left to None for self supervised and non experts + pretraining_ratio : float + Between 0 and 1, percentage of feature to mask for reconstruction + weights : np.array + Sampling weights for each example. + max_epochs : int + Maximum number of epochs during training + patience : int + Number of consecutive non improving epoch before early stopping + batch_size : int + Training batch size + virtual_batch_size : int + Batch size for Ghost Batch Normalization (virtual_batch_size < batch_size) + num_workers : int + Number of workers used in torch.utils.data.DataLoader + drop_last : bool + Whether to drop last batch during training + callbacks : list of callback function + List of custom callbacks + pin_memory: bool + Whether to set pin_memory to True or False during training + """ + # update model name + + self.max_epochs = max_epochs + self.patience = patience + self.batch_size = batch_size + self.virtual_batch_size = virtual_batch_size + self.num_workers = num_workers + self.drop_last = drop_last + self.input_dim = X_train.shape[1] + self._stop_training = False + self.pin_memory = pin_memory and (self.device.type != "cpu") + self.pretraining_ratio = pretraining_ratio + eval_set = eval_set if eval_set else [] + + if loss_fn is None: + self.loss_fn = self._default_loss + else: + self.loss_fn = loss_fn + + check_input(X_train) + + self.update_fit_params( + weights, + ) + + # Validate and reformat eval set depending on training data + eval_names = validate_eval_set(eval_set, eval_name, X_train) + train_dataloader, valid_dataloaders = self._construct_loaders( + X_train, eval_set + ) + + if not hasattr(self, "network") or not warm_start: + # model has never been fitted before of warm_start is False + self._set_network() + + self._update_network_params() + self._set_metrics(eval_names) + self._set_optimizer() + self._set_callbacks(callbacks) + + # Call method on_train_begin for all callbacks + self._callback_container.on_train_begin() + + # Training loop over epochs + for epoch_idx in range(self.max_epochs): + + # Call method on_epoch_begin for all callbacks + self._callback_container.on_epoch_begin(epoch_idx) + + self._train_epoch(train_dataloader) + + # Apply predict epoch to all eval sets + for eval_name, valid_dataloader in zip(eval_names, valid_dataloaders): + self._predict_epoch(eval_name, valid_dataloader) + + # Call method on_epoch_end for all callbacks + self._callback_container.on_epoch_end( + epoch_idx, logs=self.history.epoch_metrics + ) + + if self._stop_training: + break + + # Call method on_train_end for all callbacks + self._callback_container.on_train_end() + self.network.eval() + + def _set_network(self): + """Setup the network and explain matrix.""" + if not hasattr(self, 'pretraining_ratio'): + self.pretraining_ratio = 0.5 + torch.manual_seed(self.seed) + + self.group_matrix = create_group_matrix(self.grouped_features, self.input_dim) + + self.network = tab_network.TabNetPretraining( + self.input_dim, + pretraining_ratio=self.pretraining_ratio, + n_d=self.n_d, + n_a=self.n_a, + n_steps=self.n_steps, + gamma=self.gamma, + cat_idxs=self.cat_idxs, + cat_dims=self.cat_dims, + cat_emb_dim=self.cat_emb_dim, + n_independent=self.n_independent, + n_shared=self.n_shared, + n_shared_decoder=self.n_shared_decoder, + n_indep_decoder=self.n_indep_decoder, + epsilon=self.epsilon, + virtual_batch_size=self.virtual_batch_size, + momentum=self.momentum, + mask_type=self.mask_type, + group_attention_matrix=self.group_matrix.to(self.device), + ).to(self.device) + + self.reducing_matrix = create_explain_matrix( + self.network.input_dim, + self.network.cat_emb_dim, + self.network.cat_idxs, + self.network.post_embed_dim, + ) + + def _update_network_params(self): + self.network.virtual_batch_size = self.virtual_batch_size + self.network.pretraining_ratio = self.pretraining_ratio + + def _set_metrics(self, eval_names): + """Set attributes relative to the metrics. + + Parameters + ---------- + metrics : list of str + List of eval metric names. + eval_names : list of str + List of eval set names. + + """ + metrics = [self._default_metric] + + metrics = check_metrics(metrics) + # Set metric container for each sets + self._metric_container_dict = {} + for name in eval_names: + self._metric_container_dict.update( + {name: UnsupMetricContainer(metrics, prefix=f"{name}_")} + ) + + self._metrics = [] + self._metrics_names = [] + for _, metric_container in self._metric_container_dict.items(): + self._metrics.extend(metric_container.metrics) + self._metrics_names.extend(metric_container.names) + + # Early stopping metric is the last eval metric + self.early_stopping_metric = ( + self._metrics_names[-1] if len(self._metrics_names) > 0 else None + ) + + def _construct_loaders(self, X_train, eval_set): + """Generate dataloaders for unsupervised train and eval set. + + Parameters + ---------- + X_train : np.array + Train set. + eval_set : list of tuple + List of eval tuple set (X, y). + + Returns + ------- + train_dataloader : `torch.utils.data.Dataloader` + Training dataloader. + valid_dataloaders : list of `torch.utils.data.Dataloader` + List of validation dataloaders. + + """ + train_dataloader, valid_dataloaders = create_dataloaders( + X_train, + eval_set, + self.updated_weights, + self.batch_size, + self.num_workers, + self.drop_last, + self.pin_memory, + ) + return train_dataloader, valid_dataloaders + + def _train_epoch(self, train_loader): + """ + Trains one epoch of the network in self.network + + Parameters + ---------- + train_loader : a :class: `torch.utils.data.Dataloader` + DataLoader with train set + """ + self.network.train() + + for batch_idx, X in enumerate(train_loader): + self._callback_container.on_batch_begin(batch_idx) + + batch_logs = self._train_batch(X) + + self._callback_container.on_batch_end(batch_idx, batch_logs) + + epoch_logs = {"lr": self._optimizer.param_groups[-1]["lr"]} + self.history.epoch_metrics.update(epoch_logs) + + return + + def _train_batch(self, X): + """ + Trains one batch of data + + Parameters + ---------- + X : torch.Tensor + Train matrix + + Returns + ------- + batch_outs : dict + Dictionnary with "y": target and "score": prediction scores. + batch_logs : dict + Dictionnary with "batch_size" and "loss". + """ + batch_logs = {"batch_size": X.shape[0]} + + X = X.to(self.device).float() + + for param in self.network.parameters(): + param.grad = None + + output, embedded_x, obf_vars = self.network(X) + loss = self.compute_loss(output, embedded_x, obf_vars) + + # Perform backward pass and optimization + loss.backward() + if self.clip_value: + clip_grad_norm_(self.network.parameters(), self.clip_value) + self._optimizer.step() + + batch_logs["loss"] = loss.cpu().detach().numpy().item() + + return batch_logs + + def _predict_epoch(self, name, loader): + """ + Predict an epoch and update metrics. + + Parameters + ---------- + name : str + Name of the validation set + loader : torch.utils.data.Dataloader + DataLoader with validation set + """ + # Setting network on evaluation mode + self.network.eval() + + list_output = [] + list_embedded_x = [] + list_obfuscation = [] + # Main loop + for batch_idx, X in enumerate(loader): + output, embedded_x, obf_vars = self._predict_batch(X) + list_output.append(output.cpu().detach().numpy()) + list_embedded_x.append(embedded_x.cpu().detach().numpy()) + list_obfuscation.append(obf_vars.cpu().detach().numpy()) + + output, embedded_x, obf_vars = self.stack_batches(list_output, + list_embedded_x, + list_obfuscation) + + metrics_logs = self._metric_container_dict[name](output, embedded_x, obf_vars) + self.network.train() + self.history.epoch_metrics.update(metrics_logs) + return + + def _predict_batch(self, X): + """ + Predict one batch of data. + + Parameters + ---------- + X : torch.Tensor + Owned products + + Returns + ------- + np.array + model scores + """ + X = X.to(self.device).float() + return self.network(X) + + def stack_batches(self, list_output, list_embedded_x, list_obfuscation): + output = np.vstack(list_output) + embedded_x = np.vstack(list_embedded_x) + obf_vars = np.vstack(list_obfuscation) + return output, embedded_x, obf_vars + + def predict(self, X): + """ + Make predictions on a batch (valid) + + Parameters + ---------- + X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` + Input data + + Returns + ------- + predictions : np.array + Predictions of the regression problem + """ + self.network.eval() + + if scipy.sparse.issparse(X): + dataloader = DataLoader( + SparsePredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) + else: + dataloader = DataLoader( + PredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) + + results = [] + embedded_res = [] + for batch_nb, data in enumerate(dataloader): + data = data.to(self.device).float() + output, embeded_x, _ = self.network(data) + predictions = output.cpu().detach().numpy() + results.append(predictions) + embedded_res.append(embeded_x.cpu().detach().numpy()) + res_output = np.vstack(results) + embedded_inputs = np.vstack(embedded_res) + return res_output, embedded_inputs diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining_utils.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining_utils.py new file mode 100644 index 00000000..0874be95 --- /dev/null +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining_utils.py @@ -0,0 +1,128 @@ +from torch.utils.data import DataLoader +from pytorch_tabnet.utils import ( + create_sampler, + SparsePredictDataset, + PredictDataset, + check_input +) +import scipy + + +def create_dataloaders( + X_train, eval_set, weights, batch_size, num_workers, drop_last, pin_memory +): + """ + Create dataloaders with or without subsampling depending on weights and balanced. + + Parameters + ---------- + X_train : np.ndarray or scipy.sparse.csr_matrix + Training data + eval_set : list of np.array (for Xs and ys) or scipy.sparse.csr_matrix (for Xs) + List of eval sets + weights : either 0, 1, dict or iterable + if 0 (default) : no weights will be applied + if 1 : classification only, will balanced class with inverse frequency + if dict : keys are corresponding class values are sample weights + if iterable : list or np array must be of length equal to nb elements + in the training set + batch_size : int + how many samples per batch to load + num_workers : int + how many subprocesses to use for data loading. 0 means that the data + will be loaded in the main process + drop_last : bool + set to True to drop the last incomplete batch, if the dataset size is not + divisible by the batch size. If False and the size of dataset is not + divisible by the batch size, then the last batch will be smaller + pin_memory : bool + Whether to pin GPU memory during training + + Returns + ------- + train_dataloader, valid_dataloader : torch.DataLoader, torch.DataLoader + Training and validation dataloaders + """ + need_shuffle, sampler = create_sampler(weights, X_train) + + if scipy.sparse.issparse(X_train): + train_dataloader = DataLoader( + SparsePredictDataset(X_train), + batch_size=batch_size, + sampler=sampler, + shuffle=need_shuffle, + num_workers=num_workers, + drop_last=drop_last, + pin_memory=pin_memory, + ) + else: + train_dataloader = DataLoader( + PredictDataset(X_train), + batch_size=batch_size, + sampler=sampler, + shuffle=need_shuffle, + num_workers=num_workers, + drop_last=drop_last, + pin_memory=pin_memory, + ) + + valid_dataloaders = [] + for X in eval_set: + if scipy.sparse.issparse(X): + valid_dataloaders.append( + DataLoader( + SparsePredictDataset(X), + batch_size=batch_size, + sampler=sampler, + shuffle=need_shuffle, + num_workers=num_workers, + drop_last=drop_last, + pin_memory=pin_memory, + ) + ) + else: + valid_dataloaders.append( + DataLoader( + PredictDataset(X), + batch_size=batch_size, + sampler=sampler, + shuffle=need_shuffle, + num_workers=num_workers, + drop_last=drop_last, + pin_memory=pin_memory, + ) + ) + + return train_dataloader, valid_dataloaders + + +def validate_eval_set(eval_set, eval_name, X_train): + """Check if the shapes of eval_set are compatible with X_train. + + Parameters + ---------- + eval_set : List of numpy array + The list evaluation set. + The last one is used for early stopping + X_train : np.ndarray + Train owned products + + Returns + ------- + eval_names : list of str + Validated list of eval_names. + + """ + eval_names = eval_name or [f"val_{i}" for i in range(len(eval_set))] + assert len(eval_set) == len( + eval_names + ), "eval_set and eval_name have not the same length" + + for set_nb, X in enumerate(eval_set): + check_input(X) + msg = ( + f"Number of columns is different between eval set {set_nb}" + + f"({X.shape[1]}) and X_train ({X_train.shape[1]})" + ) + assert X.shape[1] == X_train.shape[1], msg + return eval_names diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/sparsemax.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/sparsemax.py new file mode 100644 index 00000000..9862efa4 --- /dev/null +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/sparsemax.py @@ -0,0 +1,278 @@ +from torch import nn +from torch.autograd import Function +import torch.nn.functional as F + +import torch + +""" +Other possible implementations: +https://github.com/KrisKorrel/sparsemax-pytorch/blob/master/sparsemax.py +https://github.com/msobroza/SparsemaxPytorch/blob/master/mnist/sparsemax.py +https://github.com/vene/sparse-structured-attention/blob/master/pytorch/torchsparseattn/sparsemax.py +""" + + +# credits to Yandex https://github.com/Qwicen/node/blob/master/lib/nn_utils.py +def _make_ix_like(input, dim=0): + d = input.size(dim) + rho = torch.arange(1, d + 1, device=input.device, dtype=input.dtype) + view = [1] * input.dim() + view[0] = -1 + return rho.view(view).transpose(0, dim) + + +class SparsemaxFunction(Function): + """ + An implementation of sparsemax (Martins & Astudillo, 2016). See + :cite:`DBLP:journals/corr/MartinsA16` for detailed description. + By Ben Peters and Vlad Niculae + """ + + @staticmethod + def forward(ctx, input, dim=-1): + """sparsemax: normalizing sparse transform (a la softmax) + + Parameters + ---------- + ctx : torch.autograd.function._ContextMethodMixin + input : torch.Tensor + any shape + dim : int + dimension along which to apply sparsemax + + Returns + ------- + output : torch.Tensor + same shape as input + + """ + ctx.dim = dim + max_val, _ = input.max(dim=dim, keepdim=True) + input -= max_val # same numerical stability trick as for softmax + tau, supp_size = SparsemaxFunction._threshold_and_support(input, dim=dim) + output = torch.clamp(input - tau, min=0) + ctx.save_for_backward(supp_size, output) + return output + + @staticmethod + def backward(ctx, grad_output): + supp_size, output = ctx.saved_tensors + dim = ctx.dim + grad_input = grad_output.clone() + grad_input[output == 0] = 0 + + v_hat = grad_input.sum(dim=dim) / supp_size.to(output.dtype).squeeze() + v_hat = v_hat.unsqueeze(dim) + grad_input = torch.where(output != 0, grad_input - v_hat, grad_input) + return grad_input, None + + @staticmethod + def _threshold_and_support(input, dim=-1): + """Sparsemax building block: compute the threshold + + Parameters + ---------- + input: torch.Tensor + any dimension + dim : int + dimension along which to apply the sparsemax + + Returns + ------- + tau : torch.Tensor + the threshold value + support_size : torch.Tensor + + """ + + input_srt, _ = torch.sort(input, descending=True, dim=dim) + input_cumsum = input_srt.cumsum(dim) - 1 + rhos = _make_ix_like(input, dim) + support = rhos * input_srt > input_cumsum + + support_size = support.sum(dim=dim).unsqueeze(dim) + tau = input_cumsum.gather(dim, support_size - 1) + tau /= support_size.to(input.dtype) + return tau, support_size + + +sparsemax = SparsemaxFunction.apply + + +class Sparsemax(nn.Module): + + def __init__(self, dim=-1): + self.dim = dim + super(Sparsemax, self).__init__() + + def forward(self, input): + return sparsemax(input, self.dim) + + +class Entmax15Function(Function): + """ + An implementation of exact Entmax with alpha=1.5 (B. Peters, V. Niculae, A. Martins). See + :cite:`https://arxiv.org/abs/1905.05702 for detailed description. + Source: https://github.com/deep-spin/entmax + """ + + @staticmethod + def forward(ctx, input, dim=-1): + ctx.dim = dim + + max_val, _ = input.max(dim=dim, keepdim=True) + input = input - max_val # same numerical stability trick as for softmax + input = input / 2 # divide by 2 to solve actual Entmax + + tau_star, _ = Entmax15Function._threshold_and_support(input, dim) + output = torch.clamp(input - tau_star, min=0) ** 2 + ctx.save_for_backward(output) + return output + + @staticmethod + def backward(ctx, grad_output): + Y, = ctx.saved_tensors + gppr = Y.sqrt() # = 1 / g'' (Y) + dX = grad_output * gppr + q = dX.sum(ctx.dim) / gppr.sum(ctx.dim) + q = q.unsqueeze(ctx.dim) + dX -= q * gppr + return dX, None + + @staticmethod + def _threshold_and_support(input, dim=-1): + Xsrt, _ = torch.sort(input, descending=True, dim=dim) + + rho = _make_ix_like(input, dim) + mean = Xsrt.cumsum(dim) / rho + mean_sq = (Xsrt ** 2).cumsum(dim) / rho + ss = rho * (mean_sq - mean ** 2) + delta = (1 - ss) / rho + + # NOTE this is not exactly the same as in reference algo + # Fortunately it seems the clamped values never wrongly + # get selected by tau <= sorted_z. Prove this! + delta_nz = torch.clamp(delta, 0) + tau = mean - torch.sqrt(delta_nz) + + support_size = (tau <= Xsrt).sum(dim).unsqueeze(dim) + tau_star = tau.gather(dim, support_size - 1) + return tau_star, support_size + + +class Entmoid15(Function): + """ A highly optimized equivalent of lambda x: Entmax15([x, 0]) """ + + @staticmethod + def forward(ctx, input): + output = Entmoid15._forward(input) + ctx.save_for_backward(output) + return output + + @staticmethod + def _forward(input): + input, is_pos = abs(input), input >= 0 + tau = (input + torch.sqrt(F.relu(8 - input ** 2))) / 2 + tau.masked_fill_(tau <= input, 2.0) + y_neg = 0.25 * F.relu(tau - input, inplace=True) ** 2 + return torch.where(is_pos, 1 - y_neg, y_neg) + + @staticmethod + def backward(ctx, grad_output): + return Entmoid15._backward(ctx.saved_tensors[0], grad_output) + + @staticmethod + def _backward(output, grad_output): + gppr0, gppr1 = output.sqrt(), (1 - output).sqrt() + grad_input = grad_output * gppr0 + q = grad_input / (gppr0 + gppr1) + grad_input -= q * gppr0 + return grad_input + + +entmax15 = Entmax15Function.apply +entmoid15 = Entmoid15.apply + + +class Entmax15(nn.Module): + + def __init__(self, dim=-1): + self.dim = dim + super(Entmax15, self).__init__() + + def forward(self, input): + return entmax15(input, self.dim) + + +# Credits were lost... +# def _make_ix_like(input, dim=0): +# d = input.size(dim) +# rho = torch.arange(1, d + 1, device=input.device, dtype=input.dtype) +# view = [1] * input.dim() +# view[0] = -1 +# return rho.view(view).transpose(0, dim) +# +# +# def _threshold_and_support(input, dim=0): +# """Sparsemax building block: compute the threshold +# Args: +# input: any dimension +# dim: dimension along which to apply the sparsemax +# Returns: +# the threshold value +# """ +# +# input_srt, _ = torch.sort(input, descending=True, dim=dim) +# input_cumsum = input_srt.cumsum(dim) - 1 +# rhos = _make_ix_like(input, dim) +# support = rhos * input_srt > input_cumsum +# +# support_size = support.sum(dim=dim).unsqueeze(dim) +# tau = input_cumsum.gather(dim, support_size - 1) +# tau /= support_size.to(input.dtype) +# return tau, support_size +# +# +# class SparsemaxFunction(Function): +# +# @staticmethod +# def forward(ctx, input, dim=0): +# """sparsemax: normalizing sparse transform (a la softmax) +# Parameters: +# input (Tensor): any shape +# dim: dimension along which to apply sparsemax +# Returns: +# output (Tensor): same shape as input +# """ +# ctx.dim = dim +# max_val, _ = input.max(dim=dim, keepdim=True) +# input -= max_val # same numerical stability trick as for softmax +# tau, supp_size = _threshold_and_support(input, dim=dim) +# output = torch.clamp(input - tau, min=0) +# ctx.save_for_backward(supp_size, output) +# return output +# +# @staticmethod +# def backward(ctx, grad_output): +# supp_size, output = ctx.saved_tensors +# dim = ctx.dim +# grad_input = grad_output.clone() +# grad_input[output == 0] = 0 +# +# v_hat = grad_input.sum(dim=dim) / supp_size.to(output.dtype).squeeze() +# v_hat = v_hat.unsqueeze(dim) +# grad_input = torch.where(output != 0, grad_input - v_hat, grad_input) +# return grad_input, None +# +# +# sparsemax = SparsemaxFunction.apply +# +# +# class Sparsemax(nn.Module): +# +# def __init__(self, dim=0): +# self.dim = dim +# super(Sparsemax, self).__init__() +# +# def forward(self, input): +# return sparsemax(input, self.dim) diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_model.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_model.py new file mode 100755 index 00000000..ff01991c --- /dev/null +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_model.py @@ -0,0 +1,154 @@ +import torch +import numpy as np +from scipy.special import softmax +from pytorch_tabnet.utils import SparsePredictDataset, PredictDataset, filter_weights +from pytorch_tabnet.abstract_model import TabModel +from pytorch_tabnet.multiclass_utils import infer_output_dim, check_output_dim +from torch.utils.data import DataLoader +import scipy + + +class TabNetClassifier(TabModel): + def __post_init__(self): + super(TabNetClassifier, self).__post_init__() + self._task = 'classification' + self._default_loss = torch.nn.functional.cross_entropy + self._default_metric = 'accuracy' + + def weight_updater(self, weights): + """ + Updates weights dictionary according to target_mapper. + + Parameters + ---------- + weights : bool or dict + Given weights for balancing training. + + Returns + ------- + bool or dict + Same bool if weights are bool, updated dict otherwise. + + """ + if isinstance(weights, int): + return weights + elif isinstance(weights, dict): + return {self.target_mapper[key]: value for key, value in weights.items()} + else: + return weights + + def prepare_target(self, y): + return np.vectorize(self.target_mapper.get)(y) + + def compute_loss(self, y_pred, y_true): + return self.loss_fn(y_pred, y_true.long()) + + def update_fit_params( + self, + X_train, + y_train, + eval_set, + weights, + ): + output_dim, train_labels = infer_output_dim(y_train) + for X, y in eval_set: + check_output_dim(train_labels, y) + self.output_dim = output_dim + self._default_metric = ('auc' if self.output_dim == 2 else 'accuracy') + self.classes_ = train_labels + self.target_mapper = { + class_label: index for index, class_label in enumerate(self.classes_) + } + self.preds_mapper = { + str(index): class_label for index, class_label in enumerate(self.classes_) + } + self.updated_weights = self.weight_updater(weights) + + def stack_batches(self, list_y_true, list_y_score): + y_true = np.hstack(list_y_true) + y_score = np.vstack(list_y_score) + y_score = softmax(y_score, axis=1) + return y_true, y_score + + def predict_func(self, outputs): + outputs = np.argmax(outputs, axis=1) + return np.vectorize(self.preds_mapper.get)(outputs.astype(str)) + + def predict_proba(self, X): + """ + Make predictions for classification on a batch (valid) + + Parameters + ---------- + X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` + Input data + + Returns + ------- + res : np.ndarray + + """ + self.network.eval() + + if scipy.sparse.issparse(X): + dataloader = DataLoader( + SparsePredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) + else: + dataloader = DataLoader( + PredictDataset(X), + batch_size=self.batch_size, + shuffle=False, + ) + + results = [] + for batch_nb, data in enumerate(dataloader): + data = data.to(self.device).float() + + output, M_loss = self.network(data) + predictions = torch.nn.Softmax(dim=1)(output).cpu().detach().numpy() + results.append(predictions) + res = np.vstack(results) + return res + + +class TabNetRegressor(TabModel): + def __post_init__(self): + super(TabNetRegressor, self).__post_init__() + self._task = 'regression' + self._default_loss = torch.nn.functional.mse_loss + self._default_metric = 'mse' + + def prepare_target(self, y): + return y + + def compute_loss(self, y_pred, y_true): + return self.loss_fn(y_pred, y_true) + + def update_fit_params( + self, + X_train, + y_train, + eval_set, + weights + ): + if len(y_train.shape) != 2: + msg = "Targets should be 2D : (n_samples, n_regression) " + \ + f"but y_train.shape={y_train.shape} given.\n" + \ + "Use reshape(-1, 1) for single regression." + raise ValueError(msg) + self.output_dim = y_train.shape[1] + self.preds_mapper = None + + self.updated_weights = weights + filter_weights(self.updated_weights) + + def predict_func(self, outputs): + return outputs + + def stack_batches(self, list_y_true, list_y_score): + y_true = np.vstack(list_y_true) + y_score = np.vstack(list_y_score) + return y_true, y_score diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_network.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_network.py new file mode 100644 index 00000000..95c2bae2 --- /dev/null +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_network.py @@ -0,0 +1,934 @@ +import torch +from torch.nn import Linear, BatchNorm1d, ReLU +import numpy as np +from pytorch_tabnet import sparsemax + + +def initialize_non_glu(module, input_dim, output_dim): + gain_value = np.sqrt((input_dim + output_dim) / np.sqrt(4 * input_dim)) + torch.nn.init.xavier_normal_(module.weight, gain=gain_value) + # torch.nn.init.zeros_(module.bias) + return + + +def initialize_glu(module, input_dim, output_dim): + gain_value = np.sqrt((input_dim + output_dim) / np.sqrt(input_dim)) + torch.nn.init.xavier_normal_(module.weight, gain=gain_value) + # torch.nn.init.zeros_(module.bias) + return + + +class GBN(torch.nn.Module): + """ + Ghost Batch Normalization + https://arxiv.org/abs/1705.08741 + """ + + def __init__(self, input_dim, virtual_batch_size=128, momentum=0.01): + super(GBN, self).__init__() + + self.input_dim = input_dim + self.virtual_batch_size = virtual_batch_size + self.bn = BatchNorm1d(self.input_dim, momentum=momentum) + + def forward(self, x): + chunks = x.chunk(int(np.ceil(x.shape[0] / self.virtual_batch_size)), 0) + res = [self.bn(x_) for x_ in chunks] + + return torch.cat(res, dim=0) + + +class TabNetEncoder(torch.nn.Module): + def __init__( + self, + input_dim, + output_dim, + n_d=8, + n_a=8, + n_steps=3, + gamma=1.3, + n_independent=2, + n_shared=2, + epsilon=1e-15, + virtual_batch_size=128, + momentum=0.02, + mask_type="sparsemax", + group_attention_matrix=None, + ): + """ + Defines main part of the TabNet network without the embedding layers. + + Parameters + ---------- + input_dim : int + Number of features + output_dim : int or list of int for multi task classification + Dimension of network output + examples : one for regression, 2 for binary classification etc... + n_d : int + Dimension of the prediction layer (usually between 4 and 64) + n_a : int + Dimension of the attention layer (usually between 4 and 64) + n_steps : int + Number of successive steps in the network (usually between 3 and 10) + gamma : float + Float above 1, scaling factor for attention updates (usually between 1.0 to 2.0) + n_independent : int + Number of independent GLU layer in each GLU block (default 2) + n_shared : int + Number of independent GLU layer in each GLU block (default 2) + epsilon : float + Avoid log(0), this should be kept very low + virtual_batch_size : int + Batch size for Ghost Batch Normalization + momentum : float + Float value between 0 and 1 which will be used for momentum in all batch norm + mask_type : str + Either "sparsemax" or "entmax" : this is the masking function to use + group_attention_matrix : torch matrix + Matrix of size (n_groups, input_dim), m_ij = importance within group i of feature j + """ + super(TabNetEncoder, self).__init__() + self.input_dim = input_dim + self.output_dim = output_dim + self.is_multi_task = isinstance(output_dim, list) + self.n_d = n_d + self.n_a = n_a + self.n_steps = n_steps + self.gamma = gamma + self.epsilon = epsilon + self.n_independent = n_independent + self.n_shared = n_shared + self.virtual_batch_size = virtual_batch_size + self.mask_type = mask_type + self.initial_bn = BatchNorm1d(self.input_dim, momentum=0.01) + self.group_attention_matrix = group_attention_matrix + + if self.group_attention_matrix is None: + # no groups + self.group_attention_matrix = torch.eye(self.input_dim) + self.attention_dim = self.input_dim + else: + self.attention_dim = self.group_attention_matrix.shape[0] + + if self.n_shared > 0: + shared_feat_transform = torch.nn.ModuleList() + for i in range(self.n_shared): + if i == 0: + shared_feat_transform.append( + Linear(self.input_dim, 2 * (n_d + n_a), bias=False) + ) + else: + shared_feat_transform.append( + Linear(n_d + n_a, 2 * (n_d + n_a), bias=False) + ) + + else: + shared_feat_transform = None + + self.initial_splitter = FeatTransformer( + self.input_dim, + n_d + n_a, + shared_feat_transform, + n_glu_independent=self.n_independent, + virtual_batch_size=self.virtual_batch_size, + momentum=momentum, + ) + + self.feat_transformers = torch.nn.ModuleList() + self.att_transformers = torch.nn.ModuleList() + + for step in range(n_steps): + transformer = FeatTransformer( + self.input_dim, + n_d + n_a, + shared_feat_transform, + n_glu_independent=self.n_independent, + virtual_batch_size=self.virtual_batch_size, + momentum=momentum, + ) + attention = AttentiveTransformer( + n_a, + self.attention_dim, + group_matrix=group_attention_matrix, + virtual_batch_size=self.virtual_batch_size, + momentum=momentum, + mask_type=self.mask_type, + ) + self.feat_transformers.append(transformer) + self.att_transformers.append(attention) + + def forward(self, x, prior=None): + x = self.initial_bn(x) + + bs = x.shape[0] # batch size + if prior is None: + prior = torch.ones((bs, self.attention_dim)).to(x.device) + + M_loss = 0 + att = self.initial_splitter(x)[:, self.n_d :] + steps_output = [] + for step in range(self.n_steps): + M = self.att_transformers[step](prior, att) + M_loss += torch.mean( + torch.sum(torch.mul(M, torch.log(M + self.epsilon)), dim=1) + ) + # update prior + prior = torch.mul(self.gamma - M, prior) + # output + M_feature_level = torch.matmul(M, self.group_attention_matrix) + masked_x = torch.mul(M_feature_level, x) + out = self.feat_transformers[step](masked_x) + d = ReLU()(out[:, : self.n_d]) + steps_output.append(d) + # update attention + att = out[:, self.n_d :] + + M_loss /= self.n_steps + return steps_output, M_loss + + def forward_masks(self, x): + x = self.initial_bn(x) + bs = x.shape[0] # batch size + prior = torch.ones((bs, self.attention_dim)).to(x.device) + M_explain = torch.zeros(x.shape).to(x.device) + att = self.initial_splitter(x)[:, self.n_d :] + masks = {} + + for step in range(self.n_steps): + M = self.att_transformers[step](prior, att) + M_feature_level = torch.matmul(M, self.group_attention_matrix) + masks[step] = M_feature_level + # update prior + prior = torch.mul(self.gamma - M, prior) + # output + masked_x = torch.mul(M_feature_level, x) + out = self.feat_transformers[step](masked_x) + d = ReLU()(out[:, : self.n_d]) + # explain + step_importance = torch.sum(d, dim=1) + M_explain += torch.mul(M_feature_level, step_importance.unsqueeze(dim=1)) + # update attention + att = out[:, self.n_d :] + + return M_explain, masks + + +class TabNetDecoder(torch.nn.Module): + def __init__( + self, + input_dim, + n_d=8, + n_steps=3, + n_independent=1, + n_shared=1, + virtual_batch_size=128, + momentum=0.02, + ): + """ + Defines main part of the TabNet network without the embedding layers. + + Parameters + ---------- + input_dim : int + Number of features + output_dim : int or list of int for multi task classification + Dimension of network output + examples : one for regression, 2 for binary classification etc... + n_d : int + Dimension of the prediction layer (usually between 4 and 64) + n_steps : int + Number of successive steps in the network (usually between 3 and 10) + gamma : float + Float above 1, scaling factor for attention updates (usually between 1.0 to 2.0) + n_independent : int + Number of independent GLU layer in each GLU block (default 1) + n_shared : int + Number of independent GLU layer in each GLU block (default 1) + virtual_batch_size : int + Batch size for Ghost Batch Normalization + momentum : float + Float value between 0 and 1 which will be used for momentum in all batch norm + """ + super(TabNetDecoder, self).__init__() + self.input_dim = input_dim + self.n_d = n_d + self.n_steps = n_steps + self.n_independent = n_independent + self.n_shared = n_shared + self.virtual_batch_size = virtual_batch_size + + self.feat_transformers = torch.nn.ModuleList() + + if self.n_shared > 0: + shared_feat_transform = torch.nn.ModuleList() + for i in range(self.n_shared): + shared_feat_transform.append(Linear(n_d, 2 * n_d, bias=False)) + else: + shared_feat_transform = None + + for step in range(n_steps): + transformer = FeatTransformer( + n_d, + n_d, + shared_feat_transform, + n_glu_independent=self.n_independent, + virtual_batch_size=self.virtual_batch_size, + momentum=momentum, + ) + self.feat_transformers.append(transformer) + + self.reconstruction_layer = Linear(n_d, self.input_dim, bias=False) + initialize_non_glu(self.reconstruction_layer, n_d, self.input_dim) + + def forward(self, steps_output): + res = 0 + for step_nb, step_output in enumerate(steps_output): + x = self.feat_transformers[step_nb](step_output) + res = torch.add(res, x) + res = self.reconstruction_layer(res) + return res + + +class TabNetPretraining(torch.nn.Module): + def __init__( + self, + input_dim, + pretraining_ratio=0.2, + n_d=8, + n_a=8, + n_steps=3, + gamma=1.3, + cat_idxs=[], + cat_dims=[], + cat_emb_dim=1, + n_independent=2, + n_shared=2, + epsilon=1e-15, + virtual_batch_size=128, + momentum=0.02, + mask_type="sparsemax", + n_shared_decoder=1, + n_indep_decoder=1, + group_attention_matrix=None, + ): + super(TabNetPretraining, self).__init__() + + self.cat_idxs = cat_idxs or [] + self.cat_dims = cat_dims or [] + self.cat_emb_dim = cat_emb_dim + + self.input_dim = input_dim + self.n_d = n_d + self.n_a = n_a + self.n_steps = n_steps + self.gamma = gamma + self.epsilon = epsilon + self.n_independent = n_independent + self.n_shared = n_shared + self.mask_type = mask_type + self.pretraining_ratio = pretraining_ratio + self.n_shared_decoder = n_shared_decoder + self.n_indep_decoder = n_indep_decoder + + if self.n_steps <= 0: + raise ValueError("n_steps should be a positive integer.") + if self.n_independent == 0 and self.n_shared == 0: + raise ValueError("n_shared and n_independent can't be both zero.") + + self.virtual_batch_size = virtual_batch_size + self.embedder = EmbeddingGenerator(input_dim, + cat_dims, + cat_idxs, + cat_emb_dim, + group_attention_matrix) + self.post_embed_dim = self.embedder.post_embed_dim + + self.masker = RandomObfuscator(self.pretraining_ratio, + group_matrix=self.embedder.embedding_group_matrix) + self.encoder = TabNetEncoder( + input_dim=self.post_embed_dim, + output_dim=self.post_embed_dim, + n_d=n_d, + n_a=n_a, + n_steps=n_steps, + gamma=gamma, + n_independent=n_independent, + n_shared=n_shared, + epsilon=epsilon, + virtual_batch_size=virtual_batch_size, + momentum=momentum, + mask_type=mask_type, + group_attention_matrix=self.embedder.embedding_group_matrix, + ) + self.decoder = TabNetDecoder( + self.post_embed_dim, + n_d=n_d, + n_steps=n_steps, + n_independent=self.n_indep_decoder, + n_shared=self.n_shared_decoder, + virtual_batch_size=virtual_batch_size, + momentum=momentum, + ) + + def forward(self, x): + """ + Returns: res, embedded_x, obf_vars + res : output of reconstruction + embedded_x : embedded input + obf_vars : which variable where obfuscated + """ + embedded_x = self.embedder(x) + if self.training: + masked_x, obfuscated_groups, obfuscated_vars = self.masker(embedded_x) + # set prior of encoder with obfuscated groups + prior = 1 - obfuscated_groups + steps_out, _ = self.encoder(masked_x, prior=prior) + res = self.decoder(steps_out) + return res, embedded_x, obfuscated_vars + else: + steps_out, _ = self.encoder(embedded_x) + res = self.decoder(steps_out) + return res, embedded_x, torch.ones(embedded_x.shape).to(x.device) + + def forward_masks(self, x): + embedded_x = self.embedder(x) + return self.encoder.forward_masks(embedded_x) + + +class TabNetNoEmbeddings(torch.nn.Module): + def __init__( + self, + input_dim, + output_dim, + n_d=8, + n_a=8, + n_steps=3, + gamma=1.3, + n_independent=2, + n_shared=2, + epsilon=1e-15, + virtual_batch_size=128, + momentum=0.02, + mask_type="sparsemax", + group_attention_matrix=None, + ): + """ + Defines main part of the TabNet network without the embedding layers. + + Parameters + ---------- + input_dim : int + Number of features + output_dim : int or list of int for multi task classification + Dimension of network output + examples : one for regression, 2 for binary classification etc... + n_d : int + Dimension of the prediction layer (usually between 4 and 64) + n_a : int + Dimension of the attention layer (usually between 4 and 64) + n_steps : int + Number of successive steps in the network (usually between 3 and 10) + gamma : float + Float above 1, scaling factor for attention updates (usually between 1.0 to 2.0) + n_independent : int + Number of independent GLU layer in each GLU block (default 2) + n_shared : int + Number of independent GLU layer in each GLU block (default 2) + epsilon : float + Avoid log(0), this should be kept very low + virtual_batch_size : int + Batch size for Ghost Batch Normalization + momentum : float + Float value between 0 and 1 which will be used for momentum in all batch norm + mask_type : str + Either "sparsemax" or "entmax" : this is the masking function to use + group_attention_matrix : torch matrix + Matrix of size (n_groups, input_dim), m_ij = importance within group i of feature j + """ + super(TabNetNoEmbeddings, self).__init__() + self.input_dim = input_dim + self.output_dim = output_dim + self.is_multi_task = isinstance(output_dim, list) + self.n_d = n_d + self.n_a = n_a + self.n_steps = n_steps + self.gamma = gamma + self.epsilon = epsilon + self.n_independent = n_independent + self.n_shared = n_shared + self.virtual_batch_size = virtual_batch_size + self.mask_type = mask_type + self.initial_bn = BatchNorm1d(self.input_dim, momentum=0.01) + + self.encoder = TabNetEncoder( + input_dim=input_dim, + output_dim=output_dim, + n_d=n_d, + n_a=n_a, + n_steps=n_steps, + gamma=gamma, + n_independent=n_independent, + n_shared=n_shared, + epsilon=epsilon, + virtual_batch_size=virtual_batch_size, + momentum=momentum, + mask_type=mask_type, + group_attention_matrix=group_attention_matrix + ) + + if self.is_multi_task: + self.multi_task_mappings = torch.nn.ModuleList() + for task_dim in output_dim: + task_mapping = Linear(n_d, task_dim, bias=False) + initialize_non_glu(task_mapping, n_d, task_dim) + self.multi_task_mappings.append(task_mapping) + else: + self.final_mapping = Linear(n_d, output_dim, bias=False) + initialize_non_glu(self.final_mapping, n_d, output_dim) + + def forward(self, x): + res = 0 + steps_output, M_loss = self.encoder(x) + res = torch.sum(torch.stack(steps_output, dim=0), dim=0) + + if self.is_multi_task: + # Result will be in list format + out = [] + for task_mapping in self.multi_task_mappings: + out.append(task_mapping(res)) + else: + out = self.final_mapping(res) + return out, M_loss + + def forward_masks(self, x): + return self.encoder.forward_masks(x) + + +class TabNet(torch.nn.Module): + def __init__( + self, + input_dim, + output_dim, + n_d=8, + n_a=8, + n_steps=3, + gamma=1.3, + cat_idxs=[], + cat_dims=[], + cat_emb_dim=1, + n_independent=2, + n_shared=2, + epsilon=1e-15, + virtual_batch_size=128, + momentum=0.02, + mask_type="sparsemax", + group_attention_matrix=[], + ): + """ + Defines TabNet network + + Parameters + ---------- + input_dim : int + Initial number of features + output_dim : int + Dimension of network output + examples : one for regression, 2 for binary classification etc... + n_d : int + Dimension of the prediction layer (usually between 4 and 64) + n_a : int + Dimension of the attention layer (usually between 4 and 64) + n_steps : int + Number of successive steps in the network (usually between 3 and 10) + gamma : float + Float above 1, scaling factor for attention updates (usually between 1.0 to 2.0) + cat_idxs : list of int + Index of each categorical column in the dataset + cat_dims : list of int + Number of categories in each categorical column + cat_emb_dim : int or list of int + Size of the embedding of categorical features + if int, all categorical features will have same embedding size + if list of int, every corresponding feature will have specific size + n_independent : int + Number of independent GLU layer in each GLU block (default 2) + n_shared : int + Number of independent GLU layer in each GLU block (default 2) + epsilon : float + Avoid log(0), this should be kept very low + virtual_batch_size : int + Batch size for Ghost Batch Normalization + momentum : float + Float value between 0 and 1 which will be used for momentum in all batch norm + mask_type : str + Either "sparsemax" or "entmax" : this is the masking function to use + group_attention_matrix : torch matrix + Matrix of size (n_groups, input_dim), m_ij = importance within group i of feature j + """ + super(TabNet, self).__init__() + self.cat_idxs = cat_idxs or [] + self.cat_dims = cat_dims or [] + self.cat_emb_dim = cat_emb_dim + + self.input_dim = input_dim + self.output_dim = output_dim + self.n_d = n_d + self.n_a = n_a + self.n_steps = n_steps + self.gamma = gamma + self.epsilon = epsilon + self.n_independent = n_independent + self.n_shared = n_shared + self.mask_type = mask_type + + if self.n_steps <= 0: + raise ValueError("n_steps should be a positive integer.") + if self.n_independent == 0 and self.n_shared == 0: + raise ValueError("n_shared and n_independent can't be both zero.") + + self.virtual_batch_size = virtual_batch_size + self.embedder = EmbeddingGenerator(input_dim, + cat_dims, + cat_idxs, + cat_emb_dim, + group_attention_matrix) + self.post_embed_dim = self.embedder.post_embed_dim + + self.tabnet = TabNetNoEmbeddings( + self.post_embed_dim, + output_dim, + n_d, + n_a, + n_steps, + gamma, + n_independent, + n_shared, + epsilon, + virtual_batch_size, + momentum, + mask_type, + self.embedder.embedding_group_matrix + ) + + def forward(self, x): + x = self.embedder(x) + return self.tabnet(x) + + def forward_masks(self, x): + x = self.embedder(x) + return self.tabnet.forward_masks(x) + + +class AttentiveTransformer(torch.nn.Module): + def __init__( + self, + input_dim, + group_dim, + group_matrix, + virtual_batch_size=128, + momentum=0.02, + mask_type="sparsemax", + ): + """ + Initialize an attention transformer. + + Parameters + ---------- + input_dim : int + Input size + group_dim : int + Number of groups for features + virtual_batch_size : int + Batch size for Ghost Batch Normalization + momentum : float + Float value between 0 and 1 which will be used for momentum in batch norm + mask_type : str + Either "sparsemax" or "entmax" : this is the masking function to use + """ + super(AttentiveTransformer, self).__init__() + self.fc = Linear(input_dim, group_dim, bias=False) + initialize_non_glu(self.fc, input_dim, group_dim) + self.bn = GBN( + group_dim, virtual_batch_size=virtual_batch_size, momentum=momentum + ) + + if mask_type == "sparsemax": + # Sparsemax + self.selector = sparsemax.Sparsemax(dim=-1) + elif mask_type == "entmax": + # Entmax + self.selector = sparsemax.Entmax15(dim=-1) + else: + raise NotImplementedError( + "Please choose either sparsemax" + "or entmax as masktype" + ) + + def forward(self, priors, processed_feat): + x = self.fc(processed_feat) + x = self.bn(x) + x = torch.mul(x, priors) + x = self.selector(x) + return x + + +class FeatTransformer(torch.nn.Module): + def __init__( + self, + input_dim, + output_dim, + shared_layers, + n_glu_independent, + virtual_batch_size=128, + momentum=0.02, + ): + super(FeatTransformer, self).__init__() + """ + Initialize a feature transformer. + + Parameters + ---------- + input_dim : int + Input size + output_dim : int + Output_size + shared_layers : torch.nn.ModuleList + The shared block that should be common to every step + n_glu_independent : int + Number of independent GLU layers + virtual_batch_size : int + Batch size for Ghost Batch Normalization within GLU block(s) + momentum : float + Float value between 0 and 1 which will be used for momentum in batch norm + """ + + params = { + "n_glu": n_glu_independent, + "virtual_batch_size": virtual_batch_size, + "momentum": momentum, + } + + if shared_layers is None: + # no shared layers + self.shared = torch.nn.Identity() + is_first = True + else: + self.shared = GLU_Block( + input_dim, + output_dim, + first=True, + shared_layers=shared_layers, + n_glu=len(shared_layers), + virtual_batch_size=virtual_batch_size, + momentum=momentum, + ) + is_first = False + + if n_glu_independent == 0: + # no independent layers + self.specifics = torch.nn.Identity() + else: + spec_input_dim = input_dim if is_first else output_dim + self.specifics = GLU_Block( + spec_input_dim, output_dim, first=is_first, **params + ) + + def forward(self, x): + x = self.shared(x) + x = self.specifics(x) + return x + + +class GLU_Block(torch.nn.Module): + """ + Independent GLU block, specific to each step + """ + + def __init__( + self, + input_dim, + output_dim, + n_glu=2, + first=False, + shared_layers=None, + virtual_batch_size=128, + momentum=0.02, + ): + super(GLU_Block, self).__init__() + self.first = first + self.shared_layers = shared_layers + self.n_glu = n_glu + self.glu_layers = torch.nn.ModuleList() + + params = {"virtual_batch_size": virtual_batch_size, "momentum": momentum} + + fc = shared_layers[0] if shared_layers else None + self.glu_layers.append(GLU_Layer(input_dim, output_dim, fc=fc, **params)) + for glu_id in range(1, self.n_glu): + fc = shared_layers[glu_id] if shared_layers else None + self.glu_layers.append(GLU_Layer(output_dim, output_dim, fc=fc, **params)) + + def forward(self, x): + scale = torch.sqrt(torch.FloatTensor([0.5]).to(x.device)) + if self.first: # the first layer of the block has no scale multiplication + x = self.glu_layers[0](x) + layers_left = range(1, self.n_glu) + else: + layers_left = range(self.n_glu) + + for glu_id in layers_left: + x = torch.add(x, self.glu_layers[glu_id](x)) + x = x * scale + return x + + +class GLU_Layer(torch.nn.Module): + def __init__( + self, input_dim, output_dim, fc=None, virtual_batch_size=128, momentum=0.02 + ): + super(GLU_Layer, self).__init__() + + self.output_dim = output_dim + if fc: + self.fc = fc + else: + self.fc = Linear(input_dim, 2 * output_dim, bias=False) + initialize_glu(self.fc, input_dim, 2 * output_dim) + + self.bn = GBN( + 2 * output_dim, virtual_batch_size=virtual_batch_size, momentum=momentum + ) + + def forward(self, x): + x = self.fc(x) + x = self.bn(x) + out = torch.mul(x[:, : self.output_dim], torch.sigmoid(x[:, self.output_dim :])) + return out + + +class EmbeddingGenerator(torch.nn.Module): + """ + Classical embeddings generator + """ + + def __init__(self, input_dim, cat_dims, cat_idxs, cat_emb_dims, group_matrix): + """This is an embedding module for an entire set of features + + Parameters + ---------- + input_dim : int + Number of features coming as input (number of columns) + cat_dims : list of int + Number of modalities for each categorial features + If the list is empty, no embeddings will be done + cat_idxs : list of int + Positional index for each categorical features in inputs + cat_emb_dim : list of int + Embedding dimension for each categorical features + If int, the same embedding dimension will be used for all categorical features + group_matrix : torch matrix + Original group matrix before embeddings + """ + super(EmbeddingGenerator, self).__init__() + + if cat_dims == [] and cat_idxs == []: + self.skip_embedding = True + self.post_embed_dim = input_dim + self.embedding_group_matrix = group_matrix.to(group_matrix.device) + return + else: + self.skip_embedding = False + + self.post_embed_dim = int(input_dim + np.sum(cat_emb_dims) - len(cat_emb_dims)) + + self.embeddings = torch.nn.ModuleList() + + for cat_dim, emb_dim in zip(cat_dims, cat_emb_dims): + self.embeddings.append(torch.nn.Embedding(cat_dim, emb_dim)) + + # record continuous indices + self.continuous_idx = torch.ones(input_dim, dtype=torch.bool) + self.continuous_idx[cat_idxs] = 0 + + # update group matrix + n_groups = group_matrix.shape[0] + self.embedding_group_matrix = torch.empty((n_groups, self.post_embed_dim), + device=group_matrix.device) + for group_idx in range(n_groups): + post_emb_idx = 0 + cat_feat_counter = 0 + for init_feat_idx in range(input_dim): + if self.continuous_idx[init_feat_idx] == 1: + # this means that no embedding is applied to this column + self.embedding_group_matrix[group_idx, post_emb_idx] = group_matrix[group_idx, init_feat_idx] # noqa + post_emb_idx += 1 + else: + # this is a categorical feature which creates multiple embeddings + n_embeddings = cat_emb_dims[cat_feat_counter] + self.embedding_group_matrix[group_idx, post_emb_idx:post_emb_idx+n_embeddings] = group_matrix[group_idx, init_feat_idx] / n_embeddings # noqa + post_emb_idx += n_embeddings + cat_feat_counter += 1 + + def forward(self, x): + """ + Apply embeddings to inputs + Inputs should be (batch_size, input_dim) + Outputs will be of size (batch_size, self.post_embed_dim) + """ + if self.skip_embedding: + # no embeddings required + return x + + cols = [] + cat_feat_counter = 0 + for feat_init_idx, is_continuous in enumerate(self.continuous_idx): + # Enumerate through continuous idx boolean mask to apply embeddings + if is_continuous: + cols.append(x[:, feat_init_idx].float().view(-1, 1)) + else: + cols.append( + self.embeddings[cat_feat_counter](x[:, feat_init_idx].long()) + ) + cat_feat_counter += 1 + # concat + post_embeddings = torch.cat(cols, dim=1) + return post_embeddings + + +class RandomObfuscator(torch.nn.Module): + """ + Create and applies obfuscation masks. + The obfuscation is done at group level to match attention. + """ + + def __init__(self, pretraining_ratio, group_matrix): + """ + This create random obfuscation for self suppervised pretraining + Parameters + ---------- + pretraining_ratio : float + Ratio of feature to randomly discard for reconstruction + + """ + super(RandomObfuscator, self).__init__() + self.pretraining_ratio = pretraining_ratio + # group matrix is set to boolean here to pass all posssible information + self.group_matrix = (group_matrix > 0) + 0. + self.num_groups = group_matrix.shape[0] + + def forward(self, x): + """ + Generate random obfuscation mask. + + Returns + ------- + masked input and obfuscated variables. + """ + bs = x.shape[0] + + obfuscated_groups = torch.bernoulli( + self.pretraining_ratio * torch.ones((bs, self.num_groups), device=x.device) + ) + obfuscated_vars = torch.matmul(obfuscated_groups, self.group_matrix) + masked_input = torch.mul(1 - obfuscated_vars, x) + return masked_input, obfuscated_groups, obfuscated_vars diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/utils.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/utils.py new file mode 100644 index 00000000..fda3bfd4 --- /dev/null +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/utils.py @@ -0,0 +1,552 @@ +from torch.utils.data import Dataset +from torch.utils.data import DataLoader, WeightedRandomSampler +import torch +import numpy as np +import scipy +import json +from sklearn.utils import check_array +import pandas as pd +import warnings + + +class TorchDataset(Dataset): + """ + Format for numpy array + + Parameters + ---------- + X : 2D array + The input matrix + y : 2D array + The one-hot encoded target + """ + + def __init__(self, x, y): + self.x = x + self.y = y + + def __len__(self): + return len(self.x) + + def __getitem__(self, index): + x, y = self.x[index], self.y[index] + return x, y + + +class SparseTorchDataset(Dataset): + """ + Format for csr_matrix + + Parameters + ---------- + X : CSR matrix + The input matrix + y : 2D array + The one-hot encoded target + """ + + def __init__(self, x, y): + self.x = x + self.y = y + + def __len__(self): + return self.x.shape[0] + + def __getitem__(self, index): + x = torch.from_numpy(self.x[index].toarray()[0]).float() + y = self.y[index] + return x, y + + +class PredictDataset(Dataset): + """ + Format for numpy array + + Parameters + ---------- + X : 2D array + The input matrix + """ + + def __init__(self, x): + self.x = x + + def __len__(self): + return len(self.x) + + def __getitem__(self, index): + x = self.x[index] + return x + + +class SparsePredictDataset(Dataset): + """ + Format for csr_matrix + + Parameters + ---------- + X : CSR matrix + The input matrix + """ + + def __init__(self, x): + self.x = x + + def __len__(self): + return self.x.shape[0] + + def __getitem__(self, index): + x = torch.from_numpy(self.x[index].toarray()[0]).float() + return x + + +def create_sampler(weights, y_train): + """ + This creates a sampler from the given weights + + Parameters + ---------- + weights : either 0, 1, dict or iterable + if 0 (default) : no weights will be applied + if 1 : classification only, will balanced class with inverse frequency + if dict : keys are corresponding class values are sample weights + if iterable : list or np array must be of length equal to nb elements + in the training set + y_train : np.array + Training targets + """ + if isinstance(weights, int): + if weights == 0: + need_shuffle = True + sampler = None + elif weights == 1: + need_shuffle = False + class_sample_count = np.array( + [len(np.where(y_train == t)[0]) for t in np.unique(y_train)] + ) + + weights = 1.0 / class_sample_count + + samples_weight = np.array([weights[t] for t in y_train]) + + samples_weight = torch.from_numpy(samples_weight) + samples_weight = samples_weight.double() + sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) + else: + raise ValueError("Weights should be either 0, 1, dictionnary or list.") + elif isinstance(weights, dict): + # custom weights per class + need_shuffle = False + samples_weight = np.array([weights[t] for t in y_train]) + sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) + else: + # custom weights + if len(weights) != len(y_train): + raise ValueError("Custom weights should match number of train samples.") + need_shuffle = False + samples_weight = np.array(weights) + sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) + return need_shuffle, sampler + + +def create_dataloaders( + X_train, y_train, eval_set, weights, batch_size, num_workers, drop_last, pin_memory +): + """ + Create dataloaders with or without subsampling depending on weights and balanced. + + Parameters + ---------- + X_train : np.ndarray + Training data + y_train : np.array + Mapped Training targets + eval_set : list of tuple + List of eval tuple set (X, y) + weights : either 0, 1, dict or iterable + if 0 (default) : no weights will be applied + if 1 : classification only, will balanced class with inverse frequency + if dict : keys are corresponding class values are sample weights + if iterable : list or np array must be of length equal to nb elements + in the training set + batch_size : int + how many samples per batch to load + num_workers : int + how many subprocesses to use for data loading. 0 means that the data + will be loaded in the main process + drop_last : bool + set to True to drop the last incomplete batch, if the dataset size is not + divisible by the batch size. If False and the size of dataset is not + divisible by the batch size, then the last batch will be smaller + pin_memory : bool + Whether to pin GPU memory during training + + Returns + ------- + train_dataloader, valid_dataloader : torch.DataLoader, torch.DataLoader + Training and validation dataloaders + """ + need_shuffle, sampler = create_sampler(weights, y_train) + + if scipy.sparse.issparse(X_train): + train_dataloader = DataLoader( + SparseTorchDataset(X_train.astype(np.float32), y_train), + batch_size=batch_size, + sampler=sampler, + shuffle=need_shuffle, + num_workers=num_workers, + drop_last=drop_last, + pin_memory=pin_memory, + ) + else: + train_dataloader = DataLoader( + TorchDataset(X_train.astype(np.float32), y_train), + batch_size=batch_size, + sampler=sampler, + shuffle=need_shuffle, + num_workers=num_workers, + drop_last=drop_last, + pin_memory=pin_memory, + ) + + valid_dataloaders = [] + for X, y in eval_set: + if scipy.sparse.issparse(X): + valid_dataloaders.append( + DataLoader( + SparseTorchDataset(X.astype(np.float32), y), + batch_size=batch_size, + shuffle=False, + num_workers=num_workers, + pin_memory=pin_memory, + ) + ) + else: + valid_dataloaders.append( + DataLoader( + TorchDataset(X.astype(np.float32), y), + batch_size=batch_size, + shuffle=False, + num_workers=num_workers, + pin_memory=pin_memory, + ) + ) + + return train_dataloader, valid_dataloaders + + +def create_explain_matrix(input_dim, cat_emb_dim, cat_idxs, post_embed_dim): + """ + This is a computational trick. + In order to rapidly sum importances from same embeddings + to the initial index. + + Parameters + ---------- + input_dim : int + Initial input dim + cat_emb_dim : int or list of int + if int : size of embedding for all categorical feature + if list of int : size of embedding for each categorical feature + cat_idxs : list of int + Initial position of categorical features + post_embed_dim : int + Post embedding inputs dimension + + Returns + ------- + reducing_matrix : np.array + Matrix of dim (post_embed_dim, input_dim) to performe reduce + """ + + if isinstance(cat_emb_dim, int): + all_emb_impact = [cat_emb_dim - 1] * len(cat_idxs) + else: + all_emb_impact = [emb_dim - 1 for emb_dim in cat_emb_dim] + + acc_emb = 0 + nb_emb = 0 + indices_trick = [] + for i in range(input_dim): + if i not in cat_idxs: + indices_trick.append([i + acc_emb]) + else: + indices_trick.append( + range(i + acc_emb, i + acc_emb + all_emb_impact[nb_emb] + 1) + ) + acc_emb += all_emb_impact[nb_emb] + nb_emb += 1 + + reducing_matrix = np.zeros((post_embed_dim, input_dim)) + for i, cols in enumerate(indices_trick): + reducing_matrix[cols, i] = 1 + + return scipy.sparse.csc_matrix(reducing_matrix) + + +def create_group_matrix(list_groups, input_dim): + """ + Create the group matrix corresponding to the given list_groups + + Parameters + ---------- + - list_groups : list of list of int + Each element is a list representing features in the same group. + One feature should appear in maximum one group. + Feature that don't get assigned a group will be in their own group of one feature. + - input_dim : number of feature in the initial dataset + + Returns + ------- + - group_matrix : torch matrix + A matrix of size (n_groups, input_dim) + where m_ij represents the importance of feature j in group i + The rows must some to 1 as each group is equally important a priori. + + """ + check_list_groups(list_groups, input_dim) + + if len(list_groups) == 0: + group_matrix = torch.eye(input_dim) + return group_matrix + else: + n_groups = input_dim - int(np.sum([len(gp) - 1 for gp in list_groups])) + group_matrix = torch.zeros((n_groups, input_dim)) + + remaining_features = [feat_idx for feat_idx in range(input_dim)] + + current_group_idx = 0 + for group in list_groups: + group_size = len(group) + for elem_idx in group: + # add importrance of element in group matrix and corresponding group + group_matrix[current_group_idx, elem_idx] = 1 / group_size + # remove features from list of features + remaining_features.remove(elem_idx) + # move to next group + current_group_idx += 1 + # features not mentionned in list_groups get assigned their own group of singleton + for remaining_feat_idx in remaining_features: + group_matrix[current_group_idx, remaining_feat_idx] = 1 + current_group_idx += 1 + return group_matrix + + +def check_list_groups(list_groups, input_dim): + """ + Check that list groups: + - is a list of list + - does not contain twice the same feature in different groups + - does not contain unknown features (>= input_dim) + - does not contain empty groups + Parameters + ---------- + - list_groups : list of list of int + Each element is a list representing features in the same group. + One feature should appear in maximum one group. + Feature that don't get assign a group will be in their own group of one feature. + - input_dim : number of feature in the initial dataset + """ + assert isinstance(list_groups, list), "list_groups must be a list of list." + + if len(list_groups) == 0: + return + else: + for group_pos, group in enumerate(list_groups): + msg = f"Groups must be given as a list of list, but found {group} in position {group_pos}." # noqa + assert isinstance(group, list), msg + assert len(group) > 0, "Empty groups are forbidding please remove empty groups []" + + n_elements_in_groups = np.sum([len(group) for group in list_groups]) + flat_list = [] + for group in list_groups: + flat_list.extend(group) + unique_elements = np.unique(flat_list) + n_unique_elements_in_groups = len(unique_elements) + msg = f"One feature can only appear in one group, please check your grouped_features." + assert n_unique_elements_in_groups == n_elements_in_groups, msg + + highest_feat = np.max(unique_elements) + assert highest_feat < input_dim, f"Number of features is {input_dim} but one group contains {highest_feat}." # noqa + return + + +def filter_weights(weights): + """ + This function makes sure that weights are in correct format for + regression and multitask TabNet + + Parameters + ---------- + weights : int, dict or list + Initial weights parameters given by user + + Returns + ------- + None : This function will only throw an error if format is wrong + """ + err_msg = """Please provide a list or np.array of weights for """ + err_msg += """regression, multitask or pretraining: """ + if isinstance(weights, int): + if weights == 1: + raise ValueError(err_msg + "1 given.") + if isinstance(weights, dict): + raise ValueError(err_msg + "Dict given.") + return + + +def validate_eval_set(eval_set, eval_name, X_train, y_train): + """Check if the shapes of eval_set are compatible with (X_train, y_train). + + Parameters + ---------- + eval_set : list of tuple + List of eval tuple set (X, y). + The last one is used for early stopping + eval_name : list of str + List of eval set names. + X_train : np.ndarray + Train owned products + y_train : np.array + Train targeted products + + Returns + ------- + eval_names : list of str + Validated list of eval_names. + eval_set : list of tuple + Validated list of eval_set. + + """ + eval_name = eval_name or [f"val_{i}" for i in range(len(eval_set))] + + assert len(eval_set) == len( + eval_name + ), "eval_set and eval_name have not the same length" + if len(eval_set) > 0: + assert all( + len(elem) == 2 for elem in eval_set + ), "Each tuple of eval_set need to have two elements" + for name, (X, y) in zip(eval_name, eval_set): + check_input(X) + msg = ( + f"Dimension mismatch between X_{name} " + + f"{X.shape} and X_train {X_train.shape}" + ) + assert len(X.shape) == len(X_train.shape), msg + + msg = ( + f"Dimension mismatch between y_{name} " + + f"{y.shape} and y_train {y_train.shape}" + ) + assert len(y.shape) == len(y_train.shape), msg + + msg = ( + f"Number of columns is different between X_{name} " + + f"({X.shape[1]}) and X_train ({X_train.shape[1]})" + ) + assert X.shape[1] == X_train.shape[1], msg + + if len(y_train.shape) == 2: + msg = ( + f"Number of columns is different between y_{name} " + + f"({y.shape[1]}) and y_train ({y_train.shape[1]})" + ) + assert y.shape[1] == y_train.shape[1], msg + msg = ( + f"You need the same number of rows between X_{name} " + + f"({X.shape[0]}) and y_{name} ({y.shape[0]})" + ) + assert X.shape[0] == y.shape[0], msg + + return eval_name, eval_set + + +def define_device(device_name): + """ + Define the device to use during training and inference. + If auto it will detect automatically whether to use cuda or cpu + + Parameters + ---------- + device_name : str + Either "auto", "cpu" or "cuda" + + Returns + ------- + str + Either "cpu" or "cuda" + """ + if device_name == "auto": + if torch.cuda.is_available(): + return "cuda" + else: + return "cpu" + elif device_name == "cuda" and not torch.cuda.is_available(): + return "cpu" + else: + return device_name + + +class ComplexEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, (np.generic, np.ndarray)): + return obj.tolist() + # Let the base class default method raise the TypeError + return json.JSONEncoder.default(self, obj) + + +def check_input(X): + """ + Raise a clear error if X is a pandas dataframe + and check array according to scikit rules + """ + if isinstance(X, (pd.DataFrame, pd.Series)): + err_message = "Pandas DataFrame are not supported: apply X.values when calling fit" + raise TypeError(err_message) + check_array(X, accept_sparse=True) + + +def check_warm_start(warm_start, from_unsupervised): + """ + Gives a warning about ambiguous usage of the two parameters. + """ + if warm_start and from_unsupervised is not None: + warn_msg = "warm_start=True and from_unsupervised != None: " + warn_msg = "warm_start will be ignore, training will start from unsupervised weights" + warnings.warn(warn_msg) + return + + +def check_embedding_parameters(cat_dims, cat_idxs, cat_emb_dim): + """ + Check parameters related to embeddings and rearrange them in a unique manner. + """ + if (cat_dims == []) ^ (cat_idxs == []): + if cat_dims == []: + msg = "If cat_idxs is non-empty, cat_dims must be defined as a list of same length." + else: + msg = "If cat_dims is non-empty, cat_idxs must be defined as a list of same length." + raise ValueError(msg) + elif len(cat_dims) != len(cat_idxs): + msg = "The lists cat_dims and cat_idxs must have the same length." + raise ValueError(msg) + + if isinstance(cat_emb_dim, int): + cat_emb_dims = [cat_emb_dim] * len(cat_idxs) + else: + cat_emb_dims = cat_emb_dim + + # check that all embeddings are provided + if len(cat_emb_dims) != len(cat_dims): + msg = f"""cat_emb_dim and cat_dims must be lists of same length, got {len(cat_emb_dims)} + and {len(cat_dims)}""" + raise ValueError(msg) + + # Rearrange to get reproducible seeds with different ordering + if len(cat_idxs) > 0: + sorted_idxs = np.argsort(cat_idxs) + cat_dims = [cat_dims[i] for i in sorted_idxs] + cat_emb_dims = [cat_emb_dims[i] for i in sorted_idxs] + + return cat_dims, cat_idxs, cat_emb_dims From 984f4b08f7d7748c6764823dab29c2551da950f5 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 15 Aug 2023 08:42:42 +0000 Subject: [PATCH 12/49] not done still --- .../torch_based/autoint/autoint_utils.py | 1 - .../pytorch_tabnet/augmentations.py | 22 +++++++--- .../torch_based/pytorch_tabnet/callbacks.py | 6 +-- .../torch_based/pytorch_tabnet/metrics.py | 14 ++---- .../pytorch_tabnet/multiclass_utils.py | 39 ++++------------- .../torch_based/pytorch_tabnet/multitask.py | 25 +++-------- .../torch_based/pytorch_tabnet/pretraining.py | 28 ++++-------- .../pytorch_tabnet/pretraining_utils.py | 15 ++----- .../torch_based/pytorch_tabnet/sparsemax.py | 4 +- .../torch_based/pytorch_tabnet/tab_model.py | 34 ++++++--------- .../torch_based/pytorch_tabnet/utils.py | 43 +++++-------------- 11 files changed, 72 insertions(+), 159 deletions(-) diff --git a/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py b/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py index c96b3241..ba047d52 100644 --- a/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py +++ b/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py @@ -13,7 +13,6 @@ UniformEmbeddingInfo = namedtuple("EmbeddingInfo", ["num_fields", "embedding_size", "output_size"]) - class LeakyGate(nn.Module): """LeakyGate from https://github.com/jrfiedler/xynn. diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/augmentations.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/augmentations.py index 287fa365..b520c0b0 100644 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/augmentations.py +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/augmentations.py @@ -3,7 +3,7 @@ import numpy as np -class RegressionSMOTE(): +class RegressionSMOTE: """ Apply SMOTE @@ -11,6 +11,7 @@ class RegressionSMOTE(): The target will be averaged as well (this might work with binary classification and certain loss), following a beta distribution. """ + def __init__(self, device_name="auto", p=0.8, alpha=0.5, beta=0.5, seed=0): "" self.seed = seed @@ -19,7 +20,7 @@ def __init__(self, device_name="auto", p=0.8, alpha=0.5, beta=0.5, seed=0): self.alpha = alpha self.beta = beta self.p = p - if (p < 0.) or (p > 1.0): + if (p < 0.0) or (p > 1.0): raise ValueError("Value of p should be between 0. and 1.") def _set_seed(self): @@ -38,21 +39,26 @@ def __call__(self, X, y): index_permute = torch.randperm(batch_size, device=self.device) X[idx_to_change] = random_betas[idx_to_change, None] * X[idx_to_change] - X[idx_to_change] += (1 - random_betas[idx_to_change, None]) * X[index_permute][idx_to_change].view(X[idx_to_change].size()) # noqa + X[idx_to_change] += (1 - random_betas[idx_to_change, None]) * X[index_permute][idx_to_change].view( + X[idx_to_change].size() + ) # noqa y[idx_to_change] = random_betas[idx_to_change, None] * y[idx_to_change] - y[idx_to_change] += (1 - random_betas[idx_to_change, None]) * y[index_permute][idx_to_change].view(y[idx_to_change].size()) # noqa + y[idx_to_change] += (1 - random_betas[idx_to_change, None]) * y[index_permute][idx_to_change].view( + y[idx_to_change].size() + ) # noqa return X, y -class ClassificationSMOTE(): +class ClassificationSMOTE: """ Apply SMOTE for classification tasks. This will average a percentage p of the elements in the batch with other elements. The target will stay unchanged and keep the value of the most important row in the mix. """ + def __init__(self, device_name="auto", p=0.8, alpha=0.5, beta=0.5, seed=0): "" self.seed = seed @@ -61,7 +67,7 @@ def __init__(self, device_name="auto", p=0.8, alpha=0.5, beta=0.5, seed=0): self.alpha = alpha self.beta = beta self.p = p - if (p < 0.) or (p > 1.0): + if (p < 0.0) or (p > 1.0): raise ValueError("Value of p should be between 0. and 1.") def _set_seed(self): @@ -80,6 +86,8 @@ def __call__(self, X, y): index_permute = torch.randperm(batch_size, device=self.device) X[idx_to_change] = random_betas[idx_to_change, None] * X[idx_to_change] - X[idx_to_change] += (1 - random_betas[idx_to_change, None]) * X[index_permute][idx_to_change].view(X[idx_to_change].size()) # noqa + X[idx_to_change] += (1 - random_betas[idx_to_change, None]) * X[index_permute][idx_to_change].view( + X[idx_to_change].size() + ) # noqa return X, y diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/callbacks.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/callbacks.py index cb031d54..5c266502 100644 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/callbacks.py +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/callbacks.py @@ -224,9 +224,9 @@ def on_epoch_end(self, epoch, logs=None): def on_batch_end(self, batch, logs=None): batch_size = logs["batch_size"] - self.epoch_loss = ( - self.samples_seen * self.epoch_loss + batch_size * logs["loss"] - ) / (self.samples_seen + batch_size) + self.epoch_loss = (self.samples_seen * self.epoch_loss + batch_size * logs["loss"]) / ( + self.samples_seen + batch_size + ) self.samples_seen += batch_size def __getitem__(self, name): diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/metrics.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/metrics.py index e8ad8181..ae716f33 100644 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/metrics.py +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/metrics.py @@ -157,9 +157,7 @@ def __call__(self, y_true, y_pred): logs = {} for metric in self.metrics: if isinstance(y_pred, list): - res = np.mean( - [metric(y_true[:, i], y_pred[i]) for i in range(len(y_pred))] - ) + res = np.mean([metric(y_true[:, i], y_pred[i]) for i in range(len(y_pred))]) else: res = metric(y_true, y_pred) logs[self.prefix + metric._name] = res @@ -191,9 +189,7 @@ def get_metrics_by_names(cls, names): available_names = [metric()._name for metric in available_metrics] metrics = [] for name in names: - assert ( - name in available_names - ), f"{name} is not available, choose in {available_names}" + assert name in available_names, f"{name} is not available, choose in {available_names}" idx = available_names.index(name) metric = available_metrics[idx]() metrics.append(metric) @@ -463,11 +459,7 @@ def __call__(self, y_pred, embedded_x, obf_vars): float MSE of predictions vs targets. """ - return UnsupervisedLossNumpy( - y_pred, - embedded_x, - obf_vars - ) + return UnsupervisedLossNumpy(y_pred, embedded_x, obf_vars) class RMSE(Metric): diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/multiclass_utils.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/multiclass_utils.py index 8dbf08c5..b6fa2ef3 100644 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/multiclass_utils.py +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/multiclass_utils.py @@ -32,12 +32,7 @@ def _assert_all_finite(X, allow_nan=False): pass elif is_float: msg_err = "Input contains {} or a value too large for {!r}." - if ( - allow_nan - and np.isinf(X).any() - or not allow_nan - and not np.isfinite(X).all() - ): + if allow_nan and np.isinf(X).any() or not allow_nan and not np.isfinite(X).all(): type_err = "infinity" if allow_nan else "NaN, infinity" raise ValueError(msg_err.format(type_err, X.dtype)) # for object dtype data, we only check for NaNs (GH-13254) @@ -183,17 +178,12 @@ def is_multilabel(y): return ( len(y.data) == 0 or np.unique(y.data).size == 1 - and ( - y.dtype.kind in "biu" - or _is_integral_float(np.unique(y.data)) # bool, int, uint - ) + and (y.dtype.kind in "biu" or _is_integral_float(np.unique(y.data))) # bool, int, uint ) else: labels = np.unique(y) - return len(labels) < 3 and ( - y.dtype.kind in "biu" or _is_integral_float(labels) # bool, int, uint - ) + return len(labels) < 3 and (y.dtype.kind in "biu" or _is_integral_float(labels)) # bool, int, uint def check_classification_targets(y): @@ -282,14 +272,10 @@ def type_of_target(y): >>> type_of_target(np.array([[0, 1], [1, 1]])) 'multilabel-indicator' """ - valid = ( - isinstance(y, (Sequence, spmatrix)) or hasattr(y, "__array__") - ) and not isinstance(y, str) + valid = (isinstance(y, (Sequence, spmatrix)) or hasattr(y, "__array__")) and not isinstance(y, str) if not valid: - raise ValueError( - "Expected array-like (array or non-string sequence), " "got %r" % y - ) + raise ValueError("Expected array-like (array or non-string sequence), " "got %r" % y) sparseseries = y.__class__.__name__ == "SparseSeries" if sparseseries: @@ -306,11 +292,7 @@ def type_of_target(y): # The old sequence of sequences format try: - if ( - not hasattr(y[0], "__array__") - and isinstance(y[0], Sequence) - and not isinstance(y[0], str) - ): + if not hasattr(y[0], "__array__") and isinstance(y[0], Sequence) and not isinstance(y[0], str): raise ValueError( "You appear to be using a legacy multi-label data" " representation. Sequence of sequences are no" @@ -348,9 +330,7 @@ def type_of_target(y): def check_unique_type(y): target_types = pd.Series(y).map(type).unique() if len(target_types) != 1: - raise TypeError( - f"Values on the target must have the same type. Target has types {target_types}" - ) + raise TypeError(f"Values on the target must have the same type. Target has types {target_types}") def infer_output_dim(y_train): @@ -408,10 +388,7 @@ def infer_multitask_output(y_train): """ if len(y_train.shape) < 2: - raise ValueError( - "y_train should be of shape (n_examples, n_tasks)" - + f"but got {y_train.shape}" - ) + raise ValueError("y_train should be of shape (n_examples, n_tasks)" + f"but got {y_train.shape}") nb_tasks = y_train.shape[1] tasks_dims = [] tasks_labels = [] diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/multitask.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/multitask.py index da836203..309c0e39 100644 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/multitask.py +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/multitask.py @@ -11,9 +11,9 @@ class TabNetMultiTaskClassifier(TabModel): def __post_init__(self): super(TabNetMultiTaskClassifier, self).__post_init__() - self._task = 'classification' + self._task = "classification" self._default_loss = torch.nn.functional.cross_entropy - self._default_metric = 'logloss' + self._default_metric = "logloss" def prepare_target(self, y): y_mapped = y.copy() @@ -43,9 +43,7 @@ def compute_loss(self, y_pred, y_true): y_true = y_true.long() if isinstance(self.loss_fn, list): # if you specify a different loss for each task - for task_loss, task_output, task_id in zip( - self.loss_fn, y_pred, range(len(self.loss_fn)) - ): + for task_loss, task_output, task_id in zip(self.loss_fn, y_pred, range(len(self.loss_fn))): loss += task_loss(task_output, y_true[:, task_id]) else: # same loss function is applied to all tasks @@ -72,12 +70,10 @@ def update_fit_params(self, X_train, y_train, eval_set, weights): self.output_dim = output_dim self.classes_ = train_labels self.target_mapper = [ - {class_label: index for index, class_label in enumerate(classes)} - for classes in self.classes_ + {class_label: index for index, class_label in enumerate(classes)} for classes in self.classes_ ] self.preds_mapper = [ - {str(index): str(class_label) for index, class_label in enumerate(classes)} - for classes in self.classes_ + {str(index): str(class_label) for index, class_label in enumerate(classes)} for classes in self.classes_ ] self.updated_weights = weights filter_weights(self.updated_weights) @@ -116,11 +112,7 @@ def predict(self, X): data = data.to(self.device).float() output, _ = self.network(data) predictions = [ - torch.argmax(torch.nn.Softmax(dim=1)(task_output), dim=1) - .cpu() - .detach() - .numpy() - .reshape(-1) + torch.argmax(torch.nn.Softmax(dim=1)(task_output), dim=1).cpu().detach().numpy().reshape(-1) for task_output in output ] @@ -168,10 +160,7 @@ def predict_proba(self, X): for data in dataloader: data = data.to(self.device).float() output, _ = self.network(data) - predictions = [ - torch.nn.Softmax(dim=1)(task_output).cpu().detach().numpy() - for task_output in output - ] + predictions = [torch.nn.Softmax(dim=1)(task_output).cpu().detach().numpy() for task_output in output] for task_idx in range(len(self.output_dim)): results[task_idx] = results.get(task_idx, []) + [predictions[task_idx]] res = [np.vstack(task_res) for task_res in results.values()] diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining.py index 87de306d..9044d497 100644 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining.py +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining.py @@ -27,9 +27,9 @@ class TabNetPretrainer(TabModel): def __post_init__(self): super(TabNetPretrainer, self).__post_init__() - self._task = 'unsupervised' + self._task = "unsupervised" self._default_loss = UnsupervisedLoss - self._default_metric = 'unsup_loss_numpy' + self._default_metric = "unsup_loss_numpy" def prepare_target(self, y): return y @@ -61,7 +61,7 @@ def fit( drop_last=True, callbacks=None, pin_memory=True, - warm_start=False + warm_start=False, ): """Train a neural network stored in self.network Using train_dataloader for training data and @@ -130,9 +130,7 @@ def fit( # Validate and reformat eval set depending on training data eval_names = validate_eval_set(eval_set, eval_name, X_train) - train_dataloader, valid_dataloaders = self._construct_loaders( - X_train, eval_set - ) + train_dataloader, valid_dataloaders = self._construct_loaders(X_train, eval_set) if not hasattr(self, "network") or not warm_start: # model has never been fitted before of warm_start is False @@ -159,9 +157,7 @@ def fit( self._predict_epoch(eval_name, valid_dataloader) # Call method on_epoch_end for all callbacks - self._callback_container.on_epoch_end( - epoch_idx, logs=self.history.epoch_metrics - ) + self._callback_container.on_epoch_end(epoch_idx, logs=self.history.epoch_metrics) if self._stop_training: break @@ -172,7 +168,7 @@ def fit( def _set_network(self): """Setup the network and explain matrix.""" - if not hasattr(self, 'pretraining_ratio'): + if not hasattr(self, "pretraining_ratio"): self.pretraining_ratio = 0.5 torch.manual_seed(self.seed) @@ -227,9 +223,7 @@ def _set_metrics(self, eval_names): # Set metric container for each sets self._metric_container_dict = {} for name in eval_names: - self._metric_container_dict.update( - {name: UnsupMetricContainer(metrics, prefix=f"{name}_")} - ) + self._metric_container_dict.update({name: UnsupMetricContainer(metrics, prefix=f"{name}_")}) self._metrics = [] self._metrics_names = [] @@ -238,9 +232,7 @@ def _set_metrics(self, eval_names): self._metrics_names.extend(metric_container.names) # Early stopping metric is the last eval metric - self.early_stopping_metric = ( - self._metrics_names[-1] if len(self._metrics_names) > 0 else None - ) + self.early_stopping_metric = self._metrics_names[-1] if len(self._metrics_names) > 0 else None def _construct_loaders(self, X_train, eval_set): """Generate dataloaders for unsupervised train and eval set. @@ -354,9 +346,7 @@ def _predict_epoch(self, name, loader): list_embedded_x.append(embedded_x.cpu().detach().numpy()) list_obfuscation.append(obf_vars.cpu().detach().numpy()) - output, embedded_x, obf_vars = self.stack_batches(list_output, - list_embedded_x, - list_obfuscation) + output, embedded_x, obf_vars = self.stack_batches(list_output, list_embedded_x, list_obfuscation) metrics_logs = self._metric_container_dict[name](output, embedded_x, obf_vars) self.network.train() diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining_utils.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining_utils.py index 0874be95..d35e34f2 100644 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining_utils.py +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining_utils.py @@ -1,16 +1,9 @@ from torch.utils.data import DataLoader -from pytorch_tabnet.utils import ( - create_sampler, - SparsePredictDataset, - PredictDataset, - check_input -) +from pytorch_tabnet.utils import create_sampler, SparsePredictDataset, PredictDataset, check_input import scipy -def create_dataloaders( - X_train, eval_set, weights, batch_size, num_workers, drop_last, pin_memory -): +def create_dataloaders(X_train, eval_set, weights, batch_size, num_workers, drop_last, pin_memory): """ Create dataloaders with or without subsampling depending on weights and balanced. @@ -114,9 +107,7 @@ def validate_eval_set(eval_set, eval_name, X_train): """ eval_names = eval_name or [f"val_{i}" for i in range(len(eval_set))] - assert len(eval_set) == len( - eval_names - ), "eval_set and eval_name have not the same length" + assert len(eval_set) == len(eval_names), "eval_set and eval_name have not the same length" for set_nb, X in enumerate(eval_set): check_input(X) diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/sparsemax.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/sparsemax.py index 9862efa4..53a71792 100644 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/sparsemax.py +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/sparsemax.py @@ -100,7 +100,6 @@ def _threshold_and_support(input, dim=-1): class Sparsemax(nn.Module): - def __init__(self, dim=-1): self.dim = dim super(Sparsemax, self).__init__() @@ -131,7 +130,7 @@ def forward(ctx, input, dim=-1): @staticmethod def backward(ctx, grad_output): - Y, = ctx.saved_tensors + (Y,) = ctx.saved_tensors gppr = Y.sqrt() # = 1 / g'' (Y) dX = grad_output * gppr q = dX.sum(ctx.dim) / gppr.sum(ctx.dim) @@ -195,7 +194,6 @@ def _backward(output, grad_output): class Entmax15(nn.Module): - def __init__(self, dim=-1): self.dim = dim super(Entmax15, self).__init__() diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_model.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_model.py index ff01991c..32115c8c 100755 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_model.py +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_model.py @@ -11,9 +11,9 @@ class TabNetClassifier(TabModel): def __post_init__(self): super(TabNetClassifier, self).__post_init__() - self._task = 'classification' + self._task = "classification" self._default_loss = torch.nn.functional.cross_entropy - self._default_metric = 'accuracy' + self._default_metric = "accuracy" def weight_updater(self, weights): """ @@ -54,14 +54,10 @@ def update_fit_params( for X, y in eval_set: check_output_dim(train_labels, y) self.output_dim = output_dim - self._default_metric = ('auc' if self.output_dim == 2 else 'accuracy') + self._default_metric = "auc" if self.output_dim == 2 else "accuracy" self.classes_ = train_labels - self.target_mapper = { - class_label: index for index, class_label in enumerate(self.classes_) - } - self.preds_mapper = { - str(index): class_label for index, class_label in enumerate(self.classes_) - } + self.target_mapper = {class_label: index for index, class_label in enumerate(self.classes_)} + self.preds_mapper = {str(index): class_label for index, class_label in enumerate(self.classes_)} self.updated_weights = self.weight_updater(weights) def stack_batches(self, list_y_true, list_y_score): @@ -117,9 +113,9 @@ def predict_proba(self, X): class TabNetRegressor(TabModel): def __post_init__(self): super(TabNetRegressor, self).__post_init__() - self._task = 'regression' + self._task = "regression" self._default_loss = torch.nn.functional.mse_loss - self._default_metric = 'mse' + self._default_metric = "mse" def prepare_target(self, y): return y @@ -127,17 +123,13 @@ def prepare_target(self, y): def compute_loss(self, y_pred, y_true): return self.loss_fn(y_pred, y_true) - def update_fit_params( - self, - X_train, - y_train, - eval_set, - weights - ): + def update_fit_params(self, X_train, y_train, eval_set, weights): if len(y_train.shape) != 2: - msg = "Targets should be 2D : (n_samples, n_regression) " + \ - f"but y_train.shape={y_train.shape} given.\n" + \ - "Use reshape(-1, 1) for single regression." + msg = ( + "Targets should be 2D : (n_samples, n_regression) " + + f"but y_train.shape={y_train.shape} given.\n" + + "Use reshape(-1, 1) for single regression." + ) raise ValueError(msg) self.output_dim = y_train.shape[1] self.preds_mapper = None diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/utils.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/utils.py index fda3bfd4..52d15a72 100644 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/utils.py +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/utils.py @@ -121,9 +121,7 @@ def create_sampler(weights, y_train): sampler = None elif weights == 1: need_shuffle = False - class_sample_count = np.array( - [len(np.where(y_train == t)[0]) for t in np.unique(y_train)] - ) + class_sample_count = np.array([len(np.where(y_train == t)[0]) for t in np.unique(y_train)]) weights = 1.0 / class_sample_count @@ -149,9 +147,7 @@ def create_sampler(weights, y_train): return need_shuffle, sampler -def create_dataloaders( - X_train, y_train, eval_set, weights, batch_size, num_workers, drop_last, pin_memory -): +def create_dataloaders(X_train, y_train, eval_set, weights, batch_size, num_workers, drop_last, pin_memory): """ Create dataloaders with or without subsampling depending on weights and balanced. @@ -271,9 +267,7 @@ def create_explain_matrix(input_dim, cat_emb_dim, cat_idxs, post_embed_dim): if i not in cat_idxs: indices_trick.append([i + acc_emb]) else: - indices_trick.append( - range(i + acc_emb, i + acc_emb + all_emb_impact[nb_emb] + 1) - ) + indices_trick.append(range(i + acc_emb, i + acc_emb + all_emb_impact[nb_emb] + 1)) acc_emb += all_emb_impact[nb_emb] nb_emb += 1 @@ -420,43 +414,26 @@ def validate_eval_set(eval_set, eval_name, X_train, y_train): """ eval_name = eval_name or [f"val_{i}" for i in range(len(eval_set))] - assert len(eval_set) == len( - eval_name - ), "eval_set and eval_name have not the same length" + assert len(eval_set) == len(eval_name), "eval_set and eval_name have not the same length" if len(eval_set) > 0: - assert all( - len(elem) == 2 for elem in eval_set - ), "Each tuple of eval_set need to have two elements" + assert all(len(elem) == 2 for elem in eval_set), "Each tuple of eval_set need to have two elements" for name, (X, y) in zip(eval_name, eval_set): check_input(X) - msg = ( - f"Dimension mismatch between X_{name} " - + f"{X.shape} and X_train {X_train.shape}" - ) + msg = f"Dimension mismatch between X_{name} " + f"{X.shape} and X_train {X_train.shape}" assert len(X.shape) == len(X_train.shape), msg - msg = ( - f"Dimension mismatch between y_{name} " - + f"{y.shape} and y_train {y_train.shape}" - ) + msg = f"Dimension mismatch between y_{name} " + f"{y.shape} and y_train {y_train.shape}" assert len(y.shape) == len(y_train.shape), msg - msg = ( - f"Number of columns is different between X_{name} " - + f"({X.shape[1]}) and X_train ({X_train.shape[1]})" - ) + msg = f"Number of columns is different between X_{name} " + f"({X.shape[1]}) and X_train ({X_train.shape[1]})" assert X.shape[1] == X_train.shape[1], msg if len(y_train.shape) == 2: msg = ( - f"Number of columns is different between y_{name} " - + f"({y.shape[1]}) and y_train ({y_train.shape[1]})" + f"Number of columns is different between y_{name} " + f"({y.shape[1]}) and y_train ({y_train.shape[1]})" ) assert y.shape[1] == y_train.shape[1], msg - msg = ( - f"You need the same number of rows between X_{name} " - + f"({X.shape[0]}) and y_{name} ({y.shape[0]})" - ) + msg = f"You need the same number of rows between X_{name} " + f"({X.shape[0]}) and y_{name} ({y.shape[0]})" assert X.shape[0] == y.shape[0], msg return eval_name, eval_set From fbc5076f5aefe8b3d7a6ec0bed45b8afc570290d Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 15 Aug 2023 08:43:24 +0000 Subject: [PATCH 13/49] not done still --- lightautoml/ml_algo/tabnet/utils.py | 44 ++++++++--------------------- 1 file changed, 11 insertions(+), 33 deletions(-) diff --git a/lightautoml/ml_algo/tabnet/utils.py b/lightautoml/ml_algo/tabnet/utils.py index 9dad6259..332f0d89 100644 --- a/lightautoml/ml_algo/tabnet/utils.py +++ b/lightautoml/ml_algo/tabnet/utils.py @@ -2,7 +2,7 @@ import torch import numpy as np import torch.nn as nn -from lightautoml.ml_algo.torch_based.node_nn_model import Entmax15, Sparsemax, sparsemax,entmax15 +from lightautoml.ml_algo.torch_based.node_nn_model import Entmax15, Sparsemax, sparsemax, entmax15 from lightautoml.ml_algo.torch_based.autoint.ghost_norm import GhostBatchNorm @@ -20,9 +20,6 @@ def initialize_glu(module, input_dim, output_dim): return - - - class TabNetEncoder(torch.nn.Module): def __init__( self, @@ -100,13 +97,9 @@ def __init__( shared_feat_transform = torch.nn.ModuleList() for i in range(self.n_shared): if i == 0: - shared_feat_transform.append( - nn.Linear(self.input_dim, 2 * (n_d + n_a), bias=False) - ) + shared_feat_transform.append(nn.Linear(self.input_dim, 2 * (n_d + n_a), bias=False)) else: - shared_feat_transform.append( - nn.Linear(n_d + n_a, 2 * (n_d + n_a), bias=False) - ) + shared_feat_transform.append(nn.Linear(n_d + n_a, 2 * (n_d + n_a), bias=False)) else: shared_feat_transform = None @@ -155,9 +148,7 @@ def forward(self, x, prior=None): steps_output = [] for step in range(self.n_steps): M = self.att_transformers[step](prior, att) - M_loss += torch.mean( - torch.sum(torch.mul(M, torch.log(M + self.epsilon)), dim=1) - ) + M_loss += torch.mean(torch.sum(torch.mul(M, torch.log(M + self.epsilon)), dim=1)) # update prior prior = torch.mul(self.gamma - M, prior) # output @@ -197,7 +188,6 @@ def forward_masks(self, x): att = out[:, self.n_d :] return M_explain, masks - class FeatTransformer(torch.nn.Module): @@ -257,15 +247,13 @@ def __init__( self.specifics = torch.nn.Identity() else: spec_input_dim = input_dim if is_first else output_dim - self.specifics = GLU_Block( - spec_input_dim, output_dim, first=is_first, **params - ) + self.specifics = GLU_Block(spec_input_dim, output_dim, first=is_first, **params) def forward(self, x): x = self.shared(x) x = self.specifics(x) return x - + class GLU_Block(torch.nn.Module): """ @@ -308,13 +296,10 @@ def forward(self, x): x = torch.add(x, self.glu_layers[glu_id](x)) x = x * scale return x - class GLU_Layer(torch.nn.Module): - def __init__( - self, input_dim, output_dim, fc=None, virtual_batch_size=128, momentum=0.02 - ): + def __init__(self, input_dim, output_dim, fc=None, virtual_batch_size=128, momentum=0.02): super(GLU_Layer, self).__init__() self.output_dim = output_dim @@ -324,16 +309,13 @@ def __init__( self.fc = nn.Linear(input_dim, 2 * output_dim, bias=False) initialize_glu(self.fc, input_dim, 2 * output_dim) - self.bn = GhostBatchNorm( - 2 * output_dim, virtual_batch_size=virtual_batch_size, momentum=momentum - ) + self.bn = GhostBatchNorm(2 * output_dim, virtual_batch_size=virtual_batch_size, momentum=momentum) def forward(self, x): x = self.fc(x) x = self.bn(x) out = torch.mul(x[:, : self.output_dim], torch.sigmoid(x[:, self.output_dim :])) return out - class AttentiveTransformer(torch.nn.Module): @@ -365,9 +347,7 @@ def __init__( super(AttentiveTransformer, self).__init__() self.fc = nn.Linear(input_dim, group_dim, bias=False) initialize_non_glu(self.fc, input_dim, group_dim) - self.bn = GhostBatchNorm( - group_dim, virtual_batch_size=virtual_batch_size, momentum=momentum - ) + self.bn = GhostBatchNorm(group_dim, virtual_batch_size=virtual_batch_size, momentum=momentum) if mask_type == "sparsemax": # Sparsemax @@ -376,13 +356,11 @@ def __init__( # Entmax self.selector = Entmax15() else: - raise NotImplementedError( - "Please choose either sparsemax" + "or entmax as masktype" - ) + raise NotImplementedError("Please choose either sparsemax" + "or entmax as masktype") def forward(self, priors, processed_feat): x = self.fc(processed_feat) x = self.bn(x) x = torch.mul(x, priors) x = self.selector(x) - return x \ No newline at end of file + return x From 3a62edbf1f8aa5b09ff8c127f00f3ab482f97353 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 15 Aug 2023 08:46:31 +0000 Subject: [PATCH 14/49] -Lambda + MP --- .../ml_algo/torch_based/node_nn_model.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/lightautoml/ml_algo/torch_based/node_nn_model.py b/lightautoml/ml_algo/torch_based/node_nn_model.py index cdfedbea..d14ba601 100644 --- a/lightautoml/ml_algo/torch_based/node_nn_model.py +++ b/lightautoml/ml_algo/torch_based/node_nn_model.py @@ -258,26 +258,29 @@ def _backward(output, grad_output): entmoid15 = Entmoid15.apply # noqa: E731 -class Lambda(nn.Module): - """Pytorch implementation of lambda. +class MeanPooling(nn.Module): + """Pytorch implementation of MeanPooling head. Args: - func : returned func + n_out: int, output dim. + dim: int: the dimension to be averaged. + """ - def __init__(self, func): + def __init__(self, n_out, dim=-1): super().__init__() - self.func = func + self.n_out = n_out + self.dim = dim - def forward(self, *args, **kwargs): + def forward(self, x: torch.Tensor): """Forward-pass. # noqa: DAR101 Returns: - f(*args, **kwargs) + x[..., :self.n_out].mean(dim=self.dim) """ - return self.func(*args, **kwargs) + return x[..., :self.n_out].mean(dim=self.dim) class ModuleWithInit(nn.Module): From dc003fa67e66438cd70d40041915498cfb42ae8f Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 15 Aug 2023 10:19:03 +0000 Subject: [PATCH 15/49] changed on comments --- lightautoml/ml_algo/torch_based/nn_models.py | 5 +- .../ml_algo/torch_based/node_nn_model.py | 95 ++++++++++++++++--- 2 files changed, 85 insertions(+), 15 deletions(-) diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index 119e0779..19ec5313 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -9,8 +9,7 @@ import torch import torch.nn as nn -from lightautoml.ml_algo.torch_based.node_nn_model import DenseODSTBlock -from lightautoml.ml_algo.torch_based.node_nn_model import Lambda +from lightautoml.ml_algo.torch_based.node_nn_model import DenseODSTBlock, MeanPooling class GaussianNoise(nn.Module): @@ -781,7 +780,7 @@ def __init__( self.features1.add_module("ODSTForestblock%d", block) self.features2 = nn.Sequential(OrderedDict([])) if use_original_head: - last_layer = Lambda(lambda x: x[..., :n_out].mean(dim=-2)) + last_layer = MeanPooling(n_out, dim=-2) self.features2.add_module("head", last_layer) else: if use_bn: diff --git a/lightautoml/ml_algo/torch_based/node_nn_model.py b/lightautoml/ml_algo/torch_based/node_nn_model.py index d14ba601..77ecc076 100644 --- a/lightautoml/ml_algo/torch_based/node_nn_model.py +++ b/lightautoml/ml_algo/torch_based/node_nn_model.py @@ -122,8 +122,41 @@ def _threshold_and_support(input, dim=-1): return tau, support_size -sparsemax = lambda input, dim=-1: SparsemaxFunction.apply(input, dim) # noqa: E731 -sparsemoid = lambda input: (0.5 * input + 0.5).clamp_(0, 1) # noqa: E731 +class Sparsemax(nn.Module): + """Py-Torch class for Sparsemax.""" + + def __init__(self): + super(Sparsemax, self).__init__() + + def forward(self, input, dim): + """Forward-pass. + + Args: + input (Tensor): input Tensor. + dim (int): dimension which will be aggregatedю + + Returns: + Entmax15(input,dim=dim) + """ + return SparsemaxFunction.apply(input, dim) + + +class Sparsemoid(nn.Module): + """Py-Torch class for Sparsemoid.""" + + def __init__(self): + super(Sparsemoid, self).__init__() + + def forward(self, input): + """Forward-pass. + + Args: + input (Tensor): input Tensor + + Returns: + Sparsemoid(input) + """ + return (0.5 * input + 0.5).clamp_(0, 1) class Entmax15Function(Function): @@ -204,8 +237,8 @@ def _threshold_and_support(input, dim=-1): return tau_star, support_size -class Entmoid15(Function): - """A highly optimized equivalent of labda x: Entmax15([x, 0]).""" +class Entmoid15Optimied(Function): + """A highly optimized equivalent of lambda x: Entmax15([x, 0]).""" @staticmethod def forward(ctx, input): @@ -218,7 +251,7 @@ def forward(ctx, input): Returns: output (Tensor): same shape as input """ - output = Entmoid15._forward(input) + output = Entmoid15Optimied._forward(input) ctx.save_for_backward(output) return output @@ -242,7 +275,7 @@ def backward(ctx, grad_output): Returns: grad output """ - return Entmoid15._backward(ctx.saved_tensors[0], grad_output) + return Entmoid15Optimied._backward(ctx.saved_tensors[0], grad_output) @staticmethod @script @@ -254,8 +287,41 @@ def _backward(output, grad_output): return grad_input -entmax15 = lambda input, dim=-1: Entmax15Function.apply(input, dim) # noqa: E731 -entmoid15 = Entmoid15.apply # noqa: E731 +class Entmax15(nn.Module): + """Py-Torch class for Entmax15.""" + + def __init__(self): + super(Entmax15, self).__init__() + + def forward(self, input, dim): + """Forward-pass. + + Args: + input (Tensor): input Tensor. + dim (int): dimension which will be aggregatedю + + Returns: + Entmax15(input,dim=dim) + """ + return Entmax15Function.apply(input, dim) + + +class Entmoid15(nn.Module): + """Py-Torch class for Entmoid15.""" + + def __init__(self): + super(Entmoid15, self).__init__() + + def forward(self, input): + """Forward-pass. + + Args: + input (Tensor): input Tensor + + Returns: + Entmoid15(input) + """ + return Entmoid15Optimied.apply(input) class MeanPooling(nn.Module): @@ -264,7 +330,7 @@ class MeanPooling(nn.Module): Args: n_out: int, output dim. dim: int: the dimension to be averaged. - + """ def __init__(self, n_out, dim=-1): @@ -280,7 +346,7 @@ def forward(self, x: torch.Tensor): Returns: x[..., :self.n_out].mean(dim=self.dim) """ - return x[..., :self.n_out].mean(dim=self.dim) + return x[..., : self.n_out].mean(dim=self.dim) class ModuleWithInit(nn.Module): @@ -355,8 +421,8 @@ def __init__( depth=6, tree_dim=1, flatten_output=True, - choice_function=entmax15, - bin_function=entmoid15, + choice_function=Entmax15(), + bin_function=Entmoid15(), initialize_response_=nn.init.normal_, initialize_selection_logits_=nn.init.uniform_, threshold_init_beta=1.0, @@ -523,6 +589,11 @@ def forward(self, x): tail_features = min(self.max_features, layer_inp.shape[-1]) - initial_features if tail_features != 0: layer_inp = torch.cat([layer_inp[..., :initial_features], layer_inp[..., -tail_features:]], dim=-1) + """ + Originally it was: + if self.training and self.input_dropout: + layer_inp = F.dropout(layer_inp, self.input_dropout) + """ if self.input_dropout: layer_inp = F.dropout(layer_inp, self.input_dropout, self.training) h = layer(layer_inp) From 32bae0ae1b5abb786725fc910ec2377e51f17a9e Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 15 Aug 2023 10:37:55 +0000 Subject: [PATCH 16/49] changes on comments --- lightautoml/ml_algo/torch_based/linear_model.py | 4 ++-- lightautoml/ml_algo/utils.py | 4 ++-- pyproject.toml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lightautoml/ml_algo/torch_based/linear_model.py b/lightautoml/ml_algo/torch_based/linear_model.py index 6321caf5..3fcfc96e 100644 --- a/lightautoml/ml_algo/torch_based/linear_model.py +++ b/lightautoml/ml_algo/torch_based/linear_model.py @@ -16,7 +16,7 @@ from torch import optim from ...tasks.losses import TorchLossWrapper -from ..utils import MySoftmaxClip +from ..utils import SoftmaxClip logger = logging.getLogger(__name__) @@ -138,7 +138,7 @@ class CatMulticlass(CatLinear): def __init__(self, numeric_size: int, embed_sizes: Sequence[int] = (), output_size: int = 1): super().__init__(numeric_size, embed_sizes=embed_sizes, output_size=output_size) - self.final_act = MySoftmaxClip(dim=1) + self.final_act = SoftmaxClip(dim=1) class TorchBasedLinearEstimator: diff --git a/lightautoml/ml_algo/utils.py b/lightautoml/ml_algo/utils.py index 3cbbd5e1..f142f3f8 100644 --- a/lightautoml/ml_algo/utils.py +++ b/lightautoml/ml_algo/utils.py @@ -83,7 +83,7 @@ def tune_and_fit_predict( return ml_algo, preds -class MySoftmaxClip(nn.Module): +class SoftmaxClip(nn.Module): """Softmax with clip-norm. Args: @@ -92,7 +92,7 @@ class MySoftmaxClip(nn.Module): """ def __init__(self, dim: Optional[int] = None) -> None: - super(MySoftmaxClip, self).__init__() + super(SoftmaxClip, self).__init__() self.dim = dim self.smax = nn.Softmax(dim=dim) diff --git a/pyproject.toml b/pyproject.toml index b78a6714..352a0cad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ catboost = ">=0.26.1" optuna = "*" torch = [ {platform = "win32", python = "3.6.1", version = "1.7.0"}, - {version = "<=2.0.0"} + {version = ">=1.9.0, <=2.0.0"} ] dataclasses = {version = "0.6", python = "<3.7"} holidays = "*" From a58df59b5bc18b20c6696f0f54903078ca495c84 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 15 Aug 2023 12:14:47 +0000 Subject: [PATCH 17/49] added changes on comments --- lightautoml/automl/presets/image_config.yml | 4 + lightautoml/automl/presets/tabular_config.yml | 6 +- lightautoml/automl/presets/text_config.yml | 4 + .../automl/presets/time_series_config.yml | 4 + lightautoml/ml_algo/dl_model.py | 86 +++++++++++++------ .../torch_based/autoint/autoint_utils.py | 47 ---------- lightautoml/text/embed.py | 79 +++++------------ lightautoml/text/nn_model.py | 12 +-- 8 files changed, 106 insertions(+), 136 deletions(-) diff --git a/lightautoml/automl/presets/image_config.yml b/lightautoml/automl/presets/image_config.yml index 01c04b30..2ba5b4c7 100755 --- a/lightautoml/automl/presets/image_config.yml +++ b/lightautoml/automl/presets/image_config.yml @@ -244,6 +244,10 @@ nn_params: model: denselight # embedding_size if needed embedding_size: 10 + # str in ['cat', 'cat_no_dropout', 'weighted'] + cat_embedder: "cat" + # str in ['cont', 'linear', 'dense'] + cont_embedder: "cont" # use model with custom embeddings model_with_emb: false # tune custom network diff --git a/lightautoml/automl/presets/tabular_config.yml b/lightautoml/automl/presets/tabular_config.yml index 691609f4..bcf6abbd 100755 --- a/lightautoml/automl/presets/tabular_config.yml +++ b/lightautoml/automl/presets/tabular_config.yml @@ -130,6 +130,10 @@ nn_params: model: denselight # embedding_size if needed embedding_size: 10 + # str in ['cat', 'cat_no_dropout', 'weighted'] + cat_embedder: "cat" + # str in ['cont', 'linear', 'dense'] + cont_embedder: "cont" # use model with custom embeddings model_with_emb: false # tune custom network @@ -156,7 +160,7 @@ nn_params: # use BatchNorm use_bn: true # define hidden layer dimensions for models in ['mlp', 'denselight', 'snn'] - hidden_size: [512, 256, 128, 64] + hidden_size: [512, 256, 128] # dim of intermediate fc is increased times this factor in ResnetModel layer hid_factor: [2, 2] # list of number of layers within each DenseModel block diff --git a/lightautoml/automl/presets/text_config.yml b/lightautoml/automl/presets/text_config.yml index 14d9c3f1..db82a874 100755 --- a/lightautoml/automl/presets/text_config.yml +++ b/lightautoml/automl/presets/text_config.yml @@ -122,6 +122,10 @@ linear_l2_params: nn_params: # embedding_size if needed embedding_size: 10 + # str in ['cat', 'cat_no_dropout', 'weighted'] + cat_embedder: "cat" + # str in ['cont', 'linear', 'dense'] + cont_embedder: "cont" # early stopping and scheduler use metric stop_by_metric: False random_state: 42 diff --git a/lightautoml/automl/presets/time_series_config.yml b/lightautoml/automl/presets/time_series_config.yml index 2e4cbdb7..8bb055ae 100644 --- a/lightautoml/automl/presets/time_series_config.yml +++ b/lightautoml/automl/presets/time_series_config.yml @@ -134,6 +134,10 @@ nn_params: model: denselight # embedding_size if needed embedding_size: 10 + # str in ['cat', 'cat_no_dropout', 'weighted'] + cat_embedder: "cat" + # str in ['cont', 'linear', 'dense'] + cont_embedder: "cont" # use model with custom embeddings model_with_emb: false # tune custom network diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index 49be49f0..d7b1ed0c 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -44,7 +44,17 @@ from ..ml_algo.base import TabularDataset from ..ml_algo.base import TabularMLAlgo from ..pipelines.utils import get_columns_by_role -from ..text.embed import CatEmbedder, DefaultEmbedding, DenseEmbedding, LinearEmbedding, BasicEmbedding +from ..text.embed import ( + BasicCatEmbeddingFlat, + CatEmbedder, + DenseEmbedding, + DenseEmbeddingFlat, + LinearEmbedding, + LinearEmbeddingFlat, + WeightedCatEmbedding, + BasicCatEmbedding, + WeightedCatEmbeddingFlat, +) from ..text.embed import ContEmbedder from ..text.embed import TextBert from ..text.nn_model import TorchUniversalModel @@ -79,32 +89,56 @@ "snn": SNN, "node": NODE, "autoint": AutoInt, - "autoint_emb_v2": AutoInt, } -cat_embedder_by_name = { - "denselight": CatEmbedder, - "dense": CatEmbedder, - "resnet": CatEmbedder, - "mlp": CatEmbedder, - "linear_layer": CatEmbedder, - "_linear_layer": CatEmbedder, - "snn": CatEmbedder, - "node": CatEmbedder, - "autoint": BasicEmbedding, - "autoint_emb_v2": DefaultEmbedding, +input_type_by_name = { + "denselight": "flat", + "dense": "flat", + "resnet": "flat", + "mlp": "flat", + "linear_layer": "flat", + "_linear_layer": "flat", + "snn": "flat", + "node": "flat", + "autoint": "seq", } -cont_embedder_params_by_name = { - "denselight": ContEmbedder, - "dense": ContEmbedder, - "resnet": ContEmbedder, - "mlp": ContEmbedder, - "linear_layer": ContEmbedder, - "_linear_layer": ContEmbedder, - "snn": ContEmbedder, - "node": ContEmbedder, - "autoint": LinearEmbedding, - "autoint_emb_v2": DenseEmbedding, +cat_embedder_by_name_flat = { + "cat": CatEmbedder, + "cat_no_dropout": BasicCatEmbeddingFlat, + "weighted": WeightedCatEmbeddingFlat, } +cat_embedder_by_name = {"cat_no_dropout": BasicCatEmbedding, "weighted": WeightedCatEmbedding} +cont_embedder_by_name_flat = {"cont": ContEmbedder, "linear": LinearEmbeddingFlat, "dense": DenseEmbeddingFlat} +cont_embedder_by_name = {"linear": LinearEmbedding, "dense": DenseEmbedding} + + +def _get_embedder_cat(params): + if input_type_by_name[params["model"]] == "seq": + try: + out = cat_embedder_by_name[params["cat_embedder"]] + except KeyError: + out = BasicCatEmbedding + return out + else: + try: + out = cat_embedder_by_name_flat[params["cat_embedder"]] + except KeyError: + out = CatEmbedder + return out + + +def _get_embedder_cont(params): + if input_type_by_name[params["model"]] == "seq": + try: + out = cont_embedder_by_name[params["cont_embedder"]] + except KeyError: + out = LinearEmbedding + return out + else: + try: + out = cont_embedder_by_name_flat[params["cont_embedder"]] + except KeyError: + out = ContEmbedder + return out class TorchModel(TabularMLAlgo): @@ -278,7 +312,7 @@ def _infer_params(self): net=TorchUniversalModel if not params["model_with_emb"] else params["model"], net_params={ "task": self.task, - "cont_embedder": cont_embedder_params_by_name[params["model"]] if is_cont else None, + "cont_embedder_": _get_embedder_cont(params) if is_cont else None, "cont_params": { "num_dims": params["num_dims"], "input_bn": params["input_bn"], @@ -287,7 +321,7 @@ def _infer_params(self): } if is_cont else None, - "cat_embedder": cat_embedder_by_name[params["model"]] if is_cat else None, + "cat_embedder_": _get_embedder_cat(params) if is_cat else None, "cat_params": { "cat_vc": params["cat_vc"], "cat_dims": params["cat_dims"], diff --git a/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py b/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py index c14944f5..ba047d52 100644 --- a/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py +++ b/lightautoml/ml_algo/torch_based/autoint/autoint_utils.py @@ -12,53 +12,6 @@ EmbeddingInfo = namedtuple("EmbeddingInfo", ["num_fields", "output_size"]) UniformEmbeddingInfo = namedtuple("EmbeddingInfo", ["num_fields", "embedding_size", "output_size"]) -MODULE_INIT_DOC = """ -Parameters ----------- -output_size : int - number of final output values; i.e., number of targets for - regression or number of classes for classification -embedding_num : EmbeddingBase or None - initialized and fit embedding for numeric fields -embedding_cat : EmbeddingBase or None - initialized and fit embedding for categorical fields -embedding_l1_reg : float, optional - value for l1 regularization of embedding vectors; default is 0.0 -embedding_l2_reg : float, optional - value for l2 regularization of embedding vectors; default is 0.0 -{} -mlp_hidden_sizes : int or iterable of int, optional - sizes for the linear transformations between the MLP input and - the output size needed based on the target; default is (512, 256, 128, 64) -mlp_activation : subclass of torch.nn.Module (uninitialized), optional - default is nn.LeakyReLU -mlp_use_bn : boolean, optional - whether to use batch normalization between MLP linear layers; - default is True -mlp_bn_momentum : float, optional - only used if `mlp_use_bn` is True; default is 0.01 -mlp_ghost_batch : int or None, optional - only used if `mlp_use_bn` is True; size of batch in "ghost batch norm"; - if None, normal batch norm is used; defualt is None -mlp_dropout : float, optional - whether and how much dropout to use between MLP linear layers; - `0.0 <= mlp_dropout < 1.0`; default is 0.0 -mlp_use_skip : boolean, optional - use a side path in the MLP containing just the optional leaky gate - plus single linear layer; default is True -mlp_l1_reg : float, optional - value for l1 regularization of MLP weights; default is 0.0 -mlp_l2_reg : float, optional - value for l2 regularization of MLP weights; default is 0.0 -use_leaky_gate : boolean, optional - whether to include "leaky gate" layers; default is True -loss_fn : "auto" or PyTorch loss function, optional - default is "auto" -device : string or torch.device, optional - default is "cpu" - -""" - class LeakyGate(nn.Module): """LeakyGate from https://github.com/jrfiedler/xynn. diff --git a/lightautoml/text/embed.py b/lightautoml/text/embed.py index eaa15558..650a86e6 100644 --- a/lightautoml/text/embed.py +++ b/lightautoml/text/embed.py @@ -8,7 +8,6 @@ from typing import Sequence from typing import Union from functools import reduce -import numpy as np import torch import torch.nn as nn from torch import Tensor @@ -167,9 +166,11 @@ def forward(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: return output -class BasicEmbedding(nn.Module): +class BasicCatEmbedding(nn.Module): """A basic embedding that creates an embedded vector for each field value from https://github.com/jrfiedler/xynn. + The same as CatEmbedder, but without dropout, and it can be presented as a sequance. + Args: embedding_size : int, optional size of each value's embedding vector; default is 10 @@ -180,7 +181,7 @@ class BasicEmbedding(nn.Module): def __init__( self, - cat_vc: Sequence[Dict], + cat_dims: Sequence[int], embedding_size: int = 10, device: Union[str, torch.device] = "cuda:0", flatten_output: bool = False, @@ -189,37 +190,22 @@ def __init__( super().__init__() self.flatten_output = flatten_output self._device = device - self._isfit = False self.num_fields = 0 self.output_size = 0 - self.lookup: Dict[Tuple[int, Any], int] = {} - self.lookup_nan: Dict[int, int] = {} - self.num_values = 0 self.embedding: Optional[nn.Embedding] = None self.embedding_size = embedding_size - self._from_summary(cat_vc) - self.cat_len = len(cat_vc) + self._from_summary(cat_dims) + self.cat_len = len(cat_dims) - def _from_summary(self, uniques: List[Union[List, Tensor, np.ndarray]]): - lookup = {} - lookup_nan = {} + def _from_summary(self, cat_dims: Sequence[int]): num_values = 0 - for fieldnum, field in enumerate(uniques): - for value in field: - if (fieldnum, value) in lookup: - # extra defense against repeated values - continue - lookup[(fieldnum, value)] = num_values - num_values += 1 - self.num_fields = len(uniques) + self.emb_layers = nn.ModuleList([nn.Embedding(int(x), self.embedding_size) for x in cat_dims]) + self.num_fields = len(cat_dims) self.output_size = self.num_fields * self.embedding_size - self.lookup = lookup - self.lookup_nan = lookup_nan self.num_values = num_values - self.embedding = nn.Embedding(num_values, self.embedding_size) - nn.init.xavier_uniform_(self.embedding.weight) - self._isfit = True + for emb in self.emb_layers: + nn.init.xavier_uniform_(emb.weight) def get_out_shape(self) -> int: """Output shape. @@ -243,23 +229,17 @@ def forward(self, X: Dict) -> Tensor: torch.Tensor """ - if not self._isfit: - raise RuntimeError("need to call `fit` or `from_summary` first") X = X["cat"] - idxs: List[List[int]] = [] - for row in X: - idxs.append([]) - for col, val in enumerate(row): - val = val.item() - idx = self.lookup[(col, val)] - idxs[-1].append(idx) - x = self.embedding(torch.tensor(idxs, dtype=torch.int64, device=self._device)) + x = torch.stack( + [emb_layer(X[:, i]) for i, emb_layer in enumerate(self.emb_layers)], + dim=1, + ) if self.flatten_output: return x.view(x.shape[0], -1) return x -class DefaultEmbedding(nn.Module): +class WeightedCatEmbedding(nn.Module): """DefaultEmbedding from https://github.com/jrfiedler/xynn. An embedding with a default value for each field. The default is returned for @@ -295,7 +275,6 @@ def __init__( ): super().__init__() self.flatten_output = flatten_output - self._isfit = False self._device = device self.num_fields = 0 self.output_size = 0 @@ -327,8 +306,6 @@ def _from_summary(self, unique_counts: List[Dict[Any, int]]): self.embedding = nn.Embedding(num_values, self.embedding_size) nn.init.xavier_uniform_(self.embedding.weight) - self._isfit = True - def get_out_shape(self) -> int: """Output shape. @@ -350,8 +327,6 @@ def forward(self, X: Dict) -> Tensor: Returns: torch.Tensor """ - if not self._isfit: - raise RuntimeError("need to call `fit` or `from_summary` first") X = X["cat"] list_weights: List[List[List[float]]] = [] idxs_primary: List[List[int]] = [] @@ -393,7 +368,6 @@ class LinearEmbedding(nn.Module): def __init__(self, num_dims: int, embedding_size: int = 10, flatten_output: bool = False, **kwargs): super().__init__() self.flatten_output = flatten_output - self._isfit = False self.num_fields = num_dims self.output_size = 0 self.embedding: Optional[nn.Embedding] = None @@ -405,7 +379,6 @@ def _from_summary(self, num_fields: int): self.output_size = num_fields * self.embedding_size self.embedding = nn.Embedding(num_fields, self.embedding_size) nn.init.xavier_uniform_(self.embedding.weight) - self._isfit = True def get_out_shape(self) -> int: """Output shape. @@ -430,8 +403,6 @@ def forward(self, X: Dict) -> Tensor: """ X = X["cont"] - if not self._isfit: - raise RuntimeError("need to call `fit` or `from_summary` first") x = self.embedding.weight * X.unsqueeze(dim=-1) if self.flatten_output: return x.view(x.shape[0], -1) @@ -468,7 +439,6 @@ def __init__( embedding_size = (1, embedding_size) elif len(embedding_size) == 1: embedding_size = (1, embedding_size[0]) - self._isfit = False self.num_fields = num_dims self.output_size = 0 self.embedding_w = None @@ -483,7 +453,6 @@ def _from_summary(self, num_fields: int): self.embedding_w = nn.Parameter(torch.zeros((num_fields, *self.dense_out_size))) self.embedding_b = nn.Parameter(torch.zeros(self.dense_out_size)) nn.init.xavier_uniform_(self.embedding_w) - self._isfit = True def get_out_shape(self) -> int: """Output shape. @@ -508,9 +477,7 @@ def forward(self, X: Dict) -> Tensor: """ X = X["cont"] - if not self._isfit: - raise RuntimeError("need to call `fit` or `from_summary` first") - embedded = self.embedding_w.T.matmul(X.T.to(dtype=torch.float)).T + self.embedding_b + embedded = self.embedding_w.T.matmul(X.T.float()).T + self.embedding_b embedded = self.activation(embedded.reshape((X.shape[0], -1))) x = embedded.reshape((X.shape[0], *self.dense_out_size)) if self.flatten_output: @@ -532,15 +499,15 @@ def __init__(self, *args, **kwargs): super(LinearEmbeddingFlat, self).__init__(*args, **{**kwargs, **{"flatten_output": True}}) -class DefaultEmbeddingFlat(DefaultEmbedding): - """Flatten version of DefaultEmbedding.""" +class WeightedCatEmbeddingFlat(WeightedCatEmbedding): + """Flatten version of WeightedCatEmbedding.""" def __init__(self, *args, **kwargs): - super(DefaultEmbeddingFlat, self).__init__(*args, **{**kwargs, **{"flatten_output": True}}) + super(WeightedCatEmbeddingFlat, self).__init__(*args, **{**kwargs, **{"flatten_output": True}}) -class BasicEmbeddingFlat(BasicEmbedding): - """Flatten version of BasicEmbedding.""" +class BasicCatEmbeddingFlat(BasicCatEmbedding): + """Flatten version of BasicCatEmbedding.""" def __init__(self, *args, **kwargs): - super(BasicEmbeddingFlat, self).__init__(*args, **{**kwargs, **{"flatten_output": True}}) + super(BasicCatEmbeddingFlat, self).__init__(*args, **{**kwargs, **{"flatten_output": True}}) diff --git a/lightautoml/text/nn_model.py b/lightautoml/text/nn_model.py index 916cfec6..dc4db2ae 100644 --- a/lightautoml/text/nn_model.py +++ b/lightautoml/text/nn_model.py @@ -114,9 +114,9 @@ def __init__( task: Task, torch_model: nn.Module, n_out: int = 1, - cont_embedder: Optional[Any] = None, + cont_embedder_: Optional[Any] = None, cont_params: Optional[Dict] = None, - cat_embedder: Optional[Any] = None, + cat_embedder_: Optional[Any] = None, cat_params: Optional[Dict] = None, text_embedder: Optional[Any] = None, text_params: Optional[Dict] = None, @@ -135,11 +135,11 @@ def __init__( self.text_embedder = None n_in = 0 - if cont_embedder is not None: - self.cont_embedder = cont_embedder(**cont_params) + if cont_embedder_ is not None: + self.cont_embedder = cont_embedder_(**cont_params) n_in += self.cont_embedder.get_out_shape() - if cat_embedder is not None: - self.cat_embedder = cat_embedder(**cat_params) + if cat_embedder_ is not None: + self.cat_embedder = cat_embedder_(**cat_params) n_in += self.cat_embedder.get_out_shape() if text_embedder is not None: self.text_embedder = text_embedder(**text_params) From 788d381e616f220c1ac7b44611b8a867ecf9b7b2 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 15 Aug 2023 12:24:38 +0000 Subject: [PATCH 18/49] resolve merge conflicts --- lightautoml/ml_algo/torch_based/nn_models.py | 2 +- lightautoml/ml_algo/torch_based/node_nn_model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index 92ed10ee..0988e0b1 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -1091,4 +1091,4 @@ def forward(self, x): return out def forward_masks(self, x): - return self.encoder.forward_masks(x) \ No newline at end of file + return self.encoder.forward_masks(x) diff --git a/lightautoml/ml_algo/torch_based/node_nn_model.py b/lightautoml/ml_algo/torch_based/node_nn_model.py index f0f195af..5259f24f 100644 --- a/lightautoml/ml_algo/torch_based/node_nn_model.py +++ b/lightautoml/ml_algo/torch_based/node_nn_model.py @@ -357,7 +357,7 @@ class MeanPooling(nn.Module): n_out: int, output dim. dim: int: the dimension to be averaged. <<<<<<< HEAD - + ======= >>>>>>> autoint++ From f6fdb58b49510c01fb0ac8e2308c56529d74ad13 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 15 Aug 2023 12:27:16 +0000 Subject: [PATCH 19/49] resolve merge conflicts --- lightautoml/ml_algo/torch_based/nn_models.py | 4 --- .../ml_algo/torch_based/node_nn_model.py | 29 ------------------- 2 files changed, 33 deletions(-) diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index 0988e0b1..f8bfbea1 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -840,11 +840,7 @@ def __init__( self.features1.add_module("ODSTForestblock%d", block) self.features2 = nn.Sequential(OrderedDict([])) if use_original_head: -<<<<<<< HEAD - last_layer = MeanPooling(n_out,dim=-2) -======= last_layer = MeanPooling(n_out, dim=-2) ->>>>>>> autoint++ self.features2.add_module("head", last_layer) else: if use_bn: diff --git a/lightautoml/ml_algo/torch_based/node_nn_model.py b/lightautoml/ml_algo/torch_based/node_nn_model.py index 5259f24f..9bf02d44 100644 --- a/lightautoml/ml_algo/torch_based/node_nn_model.py +++ b/lightautoml/ml_algo/torch_based/node_nn_model.py @@ -122,11 +122,6 @@ def _threshold_and_support(input, dim=-1): return tau, support_size -<<<<<<< HEAD -sparsemax = lambda input, dim=-1: SparsemaxFunction.apply(input, dim) # noqa: E731 -sparsemoid = lambda input: (0.5 * input + 0.5).clamp_(0, 1) # noqa: E731 -class Sparsemax(nn.Module): -======= class Sparsemax(nn.Module): """Py-Torch class for Sparsemax.""" @@ -162,7 +157,6 @@ def forward(self, input): Sparsemoid(input) """ return (0.5 * input + 0.5).clamp_(0, 1) ->>>>>>> autoint++ def __init__(self, dim=-1): self.dim = dim @@ -299,19 +293,6 @@ def _backward(output, grad_output): return grad_input -<<<<<<< HEAD -entmax15 = lambda input, dim=-1: Entmax15Function.apply(input, dim) # noqa: E731 -entmoid15 = Entmoid15.apply # noqa: E731 -class Entmax15(nn.Module): - - def __init__(self, dim=-1): - self.dim = dim - super(Entmax15, self).__init__() - - def forward(self, input): - return Entmax15Function.apply(input, self.dim) - -======= class Entmax15(nn.Module): """Py-Torch class for Entmax15.""" @@ -349,18 +330,12 @@ def forward(self, input): return Entmoid15Optimied.apply(input) ->>>>>>> autoint++ class MeanPooling(nn.Module): """Pytorch implementation of MeanPooling head. Args: n_out: int, output dim. dim: int: the dimension to be averaged. -<<<<<<< HEAD - -======= - ->>>>>>> autoint++ """ def __init__(self, n_out, dim=-1): @@ -376,11 +351,7 @@ def forward(self, x: torch.Tensor): Returns: x[..., :self.n_out].mean(dim=self.dim) """ -<<<<<<< HEAD - return x[..., :self.n_out].mean(dim=self.dim) -======= return x[..., : self.n_out].mean(dim=self.dim) ->>>>>>> autoint++ class ModuleWithInit(nn.Module): From a7fe9be2694092dc66547fa7feacbef2d530d97e Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Wed, 16 Aug 2023 11:57:35 +0000 Subject: [PATCH 20/49] PLR + SOFTEmb --- lightautoml/automl/presets/tabular_presets.py | 2 +- lightautoml/ml_algo/dl_model.py | 17 +- lightautoml/ml_algo/tabnet/utils.py | 2 +- lightautoml/ml_algo/torch_based/nn_models.py | 4 +- .../ml_algo/torch_based/node_nn_model.py | 18 +- .../pytorch_tabnet/abstract_model.py | 57 ++---- .../torch_based/pytorch_tabnet/tab_network.py | 72 +++---- lightautoml/text/embed.py | 191 ++++++++++++++++++ 8 files changed, 254 insertions(+), 109 deletions(-) diff --git a/lightautoml/automl/presets/tabular_presets.py b/lightautoml/automl/presets/tabular_presets.py index 539b2df4..cac85a0e 100755 --- a/lightautoml/automl/presets/tabular_presets.py +++ b/lightautoml/automl/presets/tabular_presets.py @@ -609,7 +609,7 @@ def create_automl(self, **fit_args): "node", "autoint", "autoint_emb_v2", - "tabnet" + "tabnet", ] available_nn_models = available_nn_models + [x + "_tuned" for x in available_nn_models] nn_models = [ diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index b3124c0e..6f49e633 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -51,6 +51,10 @@ DenseEmbeddingFlat, LinearEmbedding, LinearEmbeddingFlat, + PLREmbedding, + PLREmbeddingFlat, + SoftEmbedding, + SoftEmbeddingFlat, WeightedCatEmbedding, BasicCatEmbedding, WeightedCatEmbeddingFlat, @@ -89,6 +93,7 @@ "snn": SNN, "node": NODE, "autoint": AutoInt, + "tabnet": TabNet, } input_type_by_name = { "denselight": "flat", @@ -100,6 +105,7 @@ "snn": "flat", "node": "flat", "autoint": "seq", + "tabnet": "flat", } cat_embedder_by_name_flat = { "cat": CatEmbedder, @@ -107,8 +113,15 @@ "weighted": WeightedCatEmbeddingFlat, } cat_embedder_by_name = {"cat_no_dropout": BasicCatEmbedding, "weighted": WeightedCatEmbedding} -cont_embedder_by_name_flat = {"cont": ContEmbedder, "linear": LinearEmbeddingFlat, "dense": DenseEmbeddingFlat} -cont_embedder_by_name = {"linear": LinearEmbedding, "dense": DenseEmbedding} + +cont_embedder_by_name_flat = { + "cont": ContEmbedder, + "linear": LinearEmbeddingFlat, + "dense": DenseEmbeddingFlat, + "plr": PLREmbeddingFlat, + "soft": SoftEmbeddingFlat, +} +cont_embedder_by_name = {"linear": LinearEmbedding, "dense": DenseEmbedding, "plr": PLREmbedding, "soft": SoftEmbedding} def _get_embedder_cat(params): diff --git a/lightautoml/ml_algo/tabnet/utils.py b/lightautoml/ml_algo/tabnet/utils.py index 332f0d89..40845a8b 100644 --- a/lightautoml/ml_algo/tabnet/utils.py +++ b/lightautoml/ml_algo/tabnet/utils.py @@ -2,7 +2,7 @@ import torch import numpy as np import torch.nn as nn -from lightautoml.ml_algo.torch_based.node_nn_model import Entmax15, Sparsemax, sparsemax, entmax15 +from lightautoml.ml_algo.torch_based.node_nn_model import Entmax15, Sparsemax from lightautoml.ml_algo.torch_based.autoint.ghost_norm import GhostBatchNorm diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index f8bfbea1..b00fe220 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -978,8 +978,6 @@ def forward(self, embedded: torch.Tensor) -> torch.Tensor: return out - - class TabNet(torch.nn.Module): def __init__( self, @@ -1059,7 +1057,7 @@ def __init__( virtual_batch_size=virtual_batch_size, momentum=momentum, mask_type=mask_type, - group_attention_matrix=group_attention_matrix + group_attention_matrix=group_attention_matrix, ) if self.is_multi_task: diff --git a/lightautoml/ml_algo/torch_based/node_nn_model.py b/lightautoml/ml_algo/torch_based/node_nn_model.py index 9bf02d44..e57f5125 100644 --- a/lightautoml/ml_algo/torch_based/node_nn_model.py +++ b/lightautoml/ml_algo/torch_based/node_nn_model.py @@ -128,7 +128,7 @@ class Sparsemax(nn.Module): def __init__(self): super(Sparsemax, self).__init__() - def forward(self, input, dim): + def forward(self, input, dim=-1): """Forward-pass. Args: @@ -158,12 +158,6 @@ def forward(self, input): """ return (0.5 * input + 0.5).clamp_(0, 1) - def __init__(self, dim=-1): - self.dim = dim - super(Sparsemax, self).__init__() - - def forward(self, input): - return SparsemaxFunction.apply(input, self.dim) class Entmax15Function(Function): """An implementation of exact Entmax with alpha=1.5 (B. Peters, V. Niculae, A. Martins). @@ -243,7 +237,7 @@ def _threshold_and_support(input, dim=-1): return tau_star, support_size -class Entmoid15Optimied(Function): +class Entmoid15Optimized(Function): """A highly optimized equivalent of lambda x: Entmax15([x, 0]).""" @staticmethod @@ -257,7 +251,7 @@ def forward(ctx, input): Returns: output (Tensor): same shape as input """ - output = Entmoid15Optimied._forward(input) + output = Entmoid15Optimized._forward(input) ctx.save_for_backward(output) return output @@ -281,7 +275,7 @@ def backward(ctx, grad_output): Returns: grad output """ - return Entmoid15Optimied._backward(ctx.saved_tensors[0], grad_output) + return Entmoid15Optimized._backward(ctx.saved_tensors[0], grad_output) @staticmethod @script @@ -299,7 +293,7 @@ class Entmax15(nn.Module): def __init__(self): super(Entmax15, self).__init__() - def forward(self, input, dim): + def forward(self, input, dim=-1): """Forward-pass. Args: @@ -327,7 +321,7 @@ def forward(self, input): Returns: Entmoid15(input) """ - return Entmoid15Optimied.apply(input) + return Entmoid15Optimized.apply(input) class MeanPooling(nn.Module): diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/abstract_model.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/abstract_model.py index a1734439..76c4de53 100644 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/abstract_model.py +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/abstract_model.py @@ -17,7 +17,7 @@ check_input, check_warm_start, create_group_matrix, - check_embedding_parameters + check_embedding_parameters, ) from pytorch_tabnet.callbacks import ( CallbackContainer, @@ -85,9 +85,7 @@ def __post_init__(self): self.optimizer_fn = copy.deepcopy(self.optimizer_fn) self.scheduler_fn = copy.deepcopy(self.scheduler_fn) - updated_params = check_embedding_parameters(self.cat_dims, - self.cat_idxs, - self.cat_emb_dim) + updated_params = check_embedding_parameters(self.cat_dims, self.cat_idxs, self.cat_emb_dim) self.cat_dims, self.cat_idxs, self.cat_emb_dim = updated_params def __update__(self, **kwargs): @@ -140,7 +138,7 @@ def fit( from_unsupervised=None, warm_start=False, augmentations=None, - compute_importance=True + compute_importance=True, ): """Train a neural network stored in self.network Using train_dataloader for training data and @@ -227,9 +225,7 @@ def fit( # Validate and reformat eval set depending on training data eval_names, eval_set = validate_eval_set(eval_set, eval_name, X_train, y_train) - train_dataloader, valid_dataloaders = self._construct_loaders( - X_train, y_train, eval_set - ) + train_dataloader, valid_dataloaders = self._construct_loaders(X_train, y_train, eval_set) if from_unsupervised is not None: # Update parameters to match self pretraining @@ -262,9 +258,7 @@ def fit( self._predict_epoch(eval_name, valid_dataloader) # Call method on_epoch_end for all callbacks - self._callback_container.on_epoch_end( - epoch_idx, logs=self.history.epoch_metrics - ) + self._callback_container.on_epoch_end(epoch_idx, logs=self.history.epoch_metrics) if self._stop_training: break @@ -355,11 +349,8 @@ def explain(self, X, normalize=False): M_explain, masks = self.network.forward_masks(data) for key, value in masks.items(): - masks[key] = csc_matrix.dot( - value.cpu().detach().numpy(), self.reducing_matrix - ) - original_feat_explain = csc_matrix.dot(M_explain.cpu().detach().numpy(), - self.reducing_matrix) + masks[key] = csc_matrix.dot(value.cpu().detach().numpy(), self.reducing_matrix) + original_feat_explain = csc_matrix.dot(M_explain.cpu().detach().numpy(), self.reducing_matrix) res_explain.append(original_feat_explain) if batch_nb == 0: @@ -417,9 +408,7 @@ def save_model(self, path): init_params[key] = val saved_params["init_params"] = init_params - class_attrs = { - "preds_mapper": self.preds_mapper - } + class_attrs = {"preds_mapper": self.preds_mapper} saved_params["class_attrs"] = class_attrs # Create folder @@ -645,9 +634,7 @@ def _set_metrics(self, metrics, eval_names): # Set metric container for each sets self._metric_container_dict = {} for name in eval_names: - self._metric_container_dict.update( - {name: MetricContainer(metrics, prefix=f"{name}_")} - ) + self._metric_container_dict.update({name: MetricContainer(metrics, prefix=f"{name}_")}) self._metrics = [] self._metrics_names = [] @@ -656,9 +643,7 @@ def _set_metrics(self, metrics, eval_names): self._metrics_names.extend(metric_container.names) # Early stopping metric is the last eval metric - self.early_stopping_metric = ( - self._metrics_names[-1] if len(self._metrics_names) > 0 else None - ) + self.early_stopping_metric = self._metrics_names[-1] if len(self._metrics_names) > 0 else None def _set_callbacks(self, custom_callbacks): """Setup the callbacks functions. @@ -668,7 +653,7 @@ def _set_callbacks(self, custom_callbacks): custom_callbacks : list of func List of callback functions. - """ + """ # Setup default callbacks history, early stopping and scheduler callbacks = [] self.history = History(self, verbose=self.verbose) @@ -676,9 +661,7 @@ def _set_callbacks(self, custom_callbacks): if (self.early_stopping_metric is not None) and (self.patience > 0): early_stopping = EarlyStopping( early_stopping_metric=self.early_stopping_metric, - is_maximize=( - self._metrics[-1]._maximize if len(self._metrics) > 0 else None - ), + is_maximize=(self._metrics[-1]._maximize if len(self._metrics) > 0 else None), patience=self.patience, ) callbacks.append(early_stopping) @@ -705,9 +688,7 @@ def _set_callbacks(self, custom_callbacks): def _set_optimizer(self): """Setup optimizer.""" - self._optimizer = self.optimizer_fn( - self.network.parameters(), **self.optimizer_params - ) + self._optimizer = self.optimizer_fn(self.network.parameters(), **self.optimizer_params) def _construct_loaders(self, X_train, y_train, eval_set): """Generate dataloaders for train and eval set. @@ -781,9 +762,7 @@ def update_fit_params(self, X_train, y_train, eval_set, weights): 0 for no balancing 1 for automated balancing """ - raise NotImplementedError( - "users must define update_fit_params to use this base class" - ) + raise NotImplementedError("users must define update_fit_params to use this base class") @abstractmethod def compute_loss(self, y_score, y_true): @@ -802,9 +781,7 @@ def compute_loss(self, y_score, y_true): float Loss value """ - raise NotImplementedError( - "users must define compute_loss to use this base class" - ) + raise NotImplementedError("users must define compute_loss to use this base class") @abstractmethod def prepare_target(self, y): @@ -821,6 +798,4 @@ def prepare_target(self, y): `torch.Tensor` Converted target matrix. """ - raise NotImplementedError( - "users must define prepare_target to use this base class" - ) + raise NotImplementedError("users must define prepare_target to use this base class") diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_network.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_network.py index 95c2bae2..4cc67f55 100644 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_network.py +++ b/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_network.py @@ -115,13 +115,9 @@ def __init__( shared_feat_transform = torch.nn.ModuleList() for i in range(self.n_shared): if i == 0: - shared_feat_transform.append( - Linear(self.input_dim, 2 * (n_d + n_a), bias=False) - ) + shared_feat_transform.append(Linear(self.input_dim, 2 * (n_d + n_a), bias=False)) else: - shared_feat_transform.append( - Linear(n_d + n_a, 2 * (n_d + n_a), bias=False) - ) + shared_feat_transform.append(Linear(n_d + n_a, 2 * (n_d + n_a), bias=False)) else: shared_feat_transform = None @@ -170,9 +166,7 @@ def forward(self, x, prior=None): steps_output = [] for step in range(self.n_steps): M = self.att_transformers[step](prior, att) - M_loss += torch.mean( - torch.sum(torch.mul(M, torch.log(M + self.epsilon)), dim=1) - ) + M_loss += torch.mean(torch.sum(torch.mul(M, torch.log(M + self.epsilon)), dim=1)) # update prior prior = torch.mul(self.gamma - M, prior) # output @@ -337,15 +331,10 @@ def __init__( raise ValueError("n_shared and n_independent can't be both zero.") self.virtual_batch_size = virtual_batch_size - self.embedder = EmbeddingGenerator(input_dim, - cat_dims, - cat_idxs, - cat_emb_dim, - group_attention_matrix) + self.embedder = EmbeddingGenerator(input_dim, cat_dims, cat_idxs, cat_emb_dim, group_attention_matrix) self.post_embed_dim = self.embedder.post_embed_dim - self.masker = RandomObfuscator(self.pretraining_ratio, - group_matrix=self.embedder.embedding_group_matrix) + self.masker = RandomObfuscator(self.pretraining_ratio, group_matrix=self.embedder.embedding_group_matrix) self.encoder = TabNetEncoder( input_dim=self.post_embed_dim, output_dim=self.post_embed_dim, @@ -474,7 +463,7 @@ def __init__( virtual_batch_size=virtual_batch_size, momentum=momentum, mask_type=mask_type, - group_attention_matrix=group_attention_matrix + group_attention_matrix=group_attention_matrix, ) if self.is_multi_task: @@ -588,11 +577,7 @@ def __init__( raise ValueError("n_shared and n_independent can't be both zero.") self.virtual_batch_size = virtual_batch_size - self.embedder = EmbeddingGenerator(input_dim, - cat_dims, - cat_idxs, - cat_emb_dim, - group_attention_matrix) + self.embedder = EmbeddingGenerator(input_dim, cat_dims, cat_idxs, cat_emb_dim, group_attention_matrix) self.post_embed_dim = self.embedder.post_embed_dim self.tabnet = TabNetNoEmbeddings( @@ -608,7 +593,7 @@ def __init__( virtual_batch_size, momentum, mask_type, - self.embedder.embedding_group_matrix + self.embedder.embedding_group_matrix, ) def forward(self, x): @@ -649,9 +634,7 @@ def __init__( super(AttentiveTransformer, self).__init__() self.fc = Linear(input_dim, group_dim, bias=False) initialize_non_glu(self.fc, input_dim, group_dim) - self.bn = GBN( - group_dim, virtual_batch_size=virtual_batch_size, momentum=momentum - ) + self.bn = GBN(group_dim, virtual_batch_size=virtual_batch_size, momentum=momentum) if mask_type == "sparsemax": # Sparsemax @@ -660,9 +643,7 @@ def __init__( # Entmax self.selector = sparsemax.Entmax15(dim=-1) else: - raise NotImplementedError( - "Please choose either sparsemax" + "or entmax as masktype" - ) + raise NotImplementedError("Please choose either sparsemax" + "or entmax as masktype") def forward(self, priors, processed_feat): x = self.fc(processed_feat) @@ -729,9 +710,7 @@ def __init__( self.specifics = torch.nn.Identity() else: spec_input_dim = input_dim if is_first else output_dim - self.specifics = GLU_Block( - spec_input_dim, output_dim, first=is_first, **params - ) + self.specifics = GLU_Block(spec_input_dim, output_dim, first=is_first, **params) def forward(self, x): x = self.shared(x) @@ -783,9 +762,7 @@ def forward(self, x): class GLU_Layer(torch.nn.Module): - def __init__( - self, input_dim, output_dim, fc=None, virtual_batch_size=128, momentum=0.02 - ): + def __init__(self, input_dim, output_dim, fc=None, virtual_batch_size=128, momentum=0.02): super(GLU_Layer, self).__init__() self.output_dim = output_dim @@ -795,9 +772,7 @@ def __init__( self.fc = Linear(input_dim, 2 * output_dim, bias=False) initialize_glu(self.fc, input_dim, 2 * output_dim) - self.bn = GBN( - 2 * output_dim, virtual_batch_size=virtual_batch_size, momentum=momentum - ) + self.bn = GBN(2 * output_dim, virtual_batch_size=virtual_batch_size, momentum=momentum) def forward(self, x): x = self.fc(x) @@ -852,20 +827,23 @@ def __init__(self, input_dim, cat_dims, cat_idxs, cat_emb_dims, group_matrix): # update group matrix n_groups = group_matrix.shape[0] - self.embedding_group_matrix = torch.empty((n_groups, self.post_embed_dim), - device=group_matrix.device) + self.embedding_group_matrix = torch.empty((n_groups, self.post_embed_dim), device=group_matrix.device) for group_idx in range(n_groups): post_emb_idx = 0 cat_feat_counter = 0 for init_feat_idx in range(input_dim): if self.continuous_idx[init_feat_idx] == 1: # this means that no embedding is applied to this column - self.embedding_group_matrix[group_idx, post_emb_idx] = group_matrix[group_idx, init_feat_idx] # noqa + self.embedding_group_matrix[group_idx, post_emb_idx] = group_matrix[ + group_idx, init_feat_idx + ] # noqa post_emb_idx += 1 else: # this is a categorical feature which creates multiple embeddings n_embeddings = cat_emb_dims[cat_feat_counter] - self.embedding_group_matrix[group_idx, post_emb_idx:post_emb_idx+n_embeddings] = group_matrix[group_idx, init_feat_idx] / n_embeddings # noqa + self.embedding_group_matrix[group_idx, post_emb_idx : post_emb_idx + n_embeddings] = ( + group_matrix[group_idx, init_feat_idx] / n_embeddings + ) # noqa post_emb_idx += n_embeddings cat_feat_counter += 1 @@ -886,9 +864,7 @@ def forward(self, x): if is_continuous: cols.append(x[:, feat_init_idx].float().view(-1, 1)) else: - cols.append( - self.embeddings[cat_feat_counter](x[:, feat_init_idx].long()) - ) + cols.append(self.embeddings[cat_feat_counter](x[:, feat_init_idx].long())) cat_feat_counter += 1 # concat post_embeddings = torch.cat(cols, dim=1) @@ -913,7 +889,7 @@ def __init__(self, pretraining_ratio, group_matrix): super(RandomObfuscator, self).__init__() self.pretraining_ratio = pretraining_ratio # group matrix is set to boolean here to pass all posssible information - self.group_matrix = (group_matrix > 0) + 0. + self.group_matrix = (group_matrix > 0) + 0.0 self.num_groups = group_matrix.shape[0] def forward(self, x): @@ -926,9 +902,7 @@ def forward(self, x): """ bs = x.shape[0] - obfuscated_groups = torch.bernoulli( - self.pretraining_ratio * torch.ones((bs, self.num_groups), device=x.device) - ) + obfuscated_groups = torch.bernoulli(self.pretraining_ratio * torch.ones((bs, self.num_groups), device=x.device)) obfuscated_vars = torch.matmul(obfuscated_groups, self.group_matrix) masked_input = torch.mul(1 - obfuscated_vars, x) return masked_input, obfuscated_groups, obfuscated_vars diff --git a/lightautoml/text/embed.py b/lightautoml/text/embed.py index 650a86e6..970e8b3e 100644 --- a/lightautoml/text/embed.py +++ b/lightautoml/text/embed.py @@ -511,3 +511,194 @@ class BasicCatEmbeddingFlat(BasicCatEmbedding): def __init__(self, *args, **kwargs): super(BasicCatEmbeddingFlat, self).__init__(*args, **{**kwargs, **{"flatten_output": True}}) + + +class NLinearMemoryEfficient(nn.Module): + """Linear multi-dim embedding from https://github.com/yandex-research/tabular-dl-num-embeddings/tree/c1d9eb63c0685b51d7e1bc081cdce6ffdb8886a8. + + Args: + n : num of features. + d_in: input size. + d_out: output size. + """ + + def __init__(self, n: int, d_in: int, d_out: int) -> None: + super().__init__() + self.layers = nn.ModuleList([nn.Linear(d_in, d_out) for _ in range(n)]) + + def forward(self, x): + return torch.stack([l(x[:, i]) for i, l in enumerate(self.layers)], 1) + + +class Periodic(nn.Module): + """Periodic positional embedding for numeric features from https://github.com/yandex-research/tabular-dl-num-embeddings/tree/c1d9eb63c0685b51d7e1bc081cdce6ffdb8886a8. + + Args: + n_features: num of numeric features + emb_size: output size will be 2*emb_size + sigma: weights will be initialized with N(0,sigma) + flatten_output: if flatten output or not. + """ + + def __init__( + self, n_features: int, emb_size: int = 64, sigma: float = 0.05, flatten_output: bool = False, **kwargs + ) -> None: + super().__init__() + self.n_features = n_features + self.emb_size = emb_size + coefficients = torch.normal(0.0, sigma, (n_features, emb_size)) + self.coefficients = nn.Parameter(coefficients) + self.flatten_output = flatten_output + + @staticmethod + def _cos_sin(x: Tensor) -> Tensor: + return torch.cat([torch.cos(x), torch.sin(x)], -1) + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + int with module output shape. + + """ + if self.flatten_output: + return self.emb_size * 2 * self.n_features + else: + return self.n_features + + def forward(self, x: Tensor) -> Tensor: + x = self._cos_sin(2 * torch.pi * self.coefficients[None] * x[..., None]) + if self.flatten_output: + return x.view(x.shape[0], -1) + return x + + +class PLREmbedding(nn.Module): + """ReLU ◦ Linear ◦ Periodic embedding for numeric features from https://arxiv.org/pdf/2203.05556.pdf. + + Args: + num_dims: int + emb_size: int + sigma: float + flatten_output : bool + """ + + def __init__( + self, + num_dims: int, + embedding_size: Union[int, Tuple[int, ...], List[int]] = 64, + emb_size_periodic: int = 64, + sigma_periodic: float = 0.05, + flatten_output: bool = False, + **kwargs, + ) -> None: + super().__init__() + self.num_dims = num_dims + self.embedding_size = embedding_size + self.layers: list[nn.Module] = [] + self.layers.append(Periodic(num_dims, emb_size_periodic, sigma_periodic)) + self.layers.append(NLinearMemoryEfficient(num_dims, 2 * emb_size_periodic, embedding_size)) + self.layers.append(nn.ReLU()) + self.layers = nn.Sequential(*self.layers) + self.flatten_output = flatten_output + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + int with module output shape. + + """ + if self.flatten_output: + return self.num_dims * self.embedding_size + else: + return self.num_dims + + def forward(self, X: Dict) -> Tensor: + """Produce embedding for each value in input. + + Args: + X : Dict + + Returns: + torch.Tensor + + """ + X = X["cont"] + x = self.layers(X) + if self.flatten_output: + return x.view(x.shape[0], -1) + return x + + +class PLREmbeddingFlat(PLREmbedding): + """Flatten version of BasicCatEmbedding.""" + + def __init__(self, *args, **kwargs): + super(PLREmbeddingFlat, self).__init__(*args, **{**kwargs, **{"flatten_output": True}}) + + +class SoftEmbedding(torch.nn.Module): + """ + Soft-one hot encoding embedding technique, from https://arxiv.org/pdf/1708.00065.pdf + In a nutshell, it represents a continuous feature as a weighted average of embeddings + """ + + def __init__(self, num_dims, embedding_size=10, flatten_output: bool = False, **kwargs) -> None: + """ + + Parameters + ---------- + num_embeddings: Number of embeddings to use (cardinality of the embedding table). + embeddings_dim: The dimension of the vector space for projecting the scalar value. + embeddings_init_std: The standard deviation factor for normal initialization of the + embedding matrix weights. + emb_initializer: Dict where keys are feature names and values are callable to initialize + embedding tables + """ + super(SoftEmbedding, self).__init__() + self.embedding_table = torch.nn.Embedding(num_dims, embedding_size) + nn.init.xavier_uniform_(self.embedding_table.weight) + + self.projection_layer = torch.nn.Linear(1, num_dims, bias=True) + self.softmax = torch.nn.Softmax(dim=-1) + self.emb_size = embedding_size + self.num_dims = num_dims + self.flatten_output = flatten_output + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + int with module output shape. + + """ + if self.flatten_output: + return self.num_dims * self.emb_size + else: + return self.num_dims + + def forward(self, X: Dict) -> Tensor: + """Produce embedding for each value in input. + + Args: + X : Dict + + Returns: + torch.Tensor + + """ + X = X["cont"] + input_numeric = X.unsqueeze(-1) + weights = self.softmax(self.projection_layer(input_numeric)) + x = (weights.unsqueeze(-1) * self.embedding_table.weight).sum(-2) + if self.flatten_output: + return x.view(x.shape[0], -1) + return x + + +class SoftEmbeddingFlat(SoftEmbedding): + """Flatten version of BasicCatEmbedding.""" + + def __init__(self, *args, **kwargs): + super(SoftEmbeddingFlat, self).__init__(*args, **{**kwargs, **{"flatten_output": True}}) From f58b4c5f49e5c3206d0f3eeb017dc773b9d723e6 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Mon, 28 Aug 2023 08:39:05 +0000 Subject: [PATCH 21/49] no-changes --- lightautoml/automl/presets/tabular_presets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lightautoml/automl/presets/tabular_presets.py b/lightautoml/automl/presets/tabular_presets.py index cac85a0e..ff97e075 100755 --- a/lightautoml/automl/presets/tabular_presets.py +++ b/lightautoml/automl/presets/tabular_presets.py @@ -555,7 +555,6 @@ def create_automl(self, **fit_args): self.infer_auto_params(train_data, multilevel_avail) reader = PandasToPandasReader(task=self.task, **self.reader_params) pre_selector = self.get_selector() - levels = [] for n, names in enumerate(self.general_params["use_algos"]): From a50a90fef22a8bf8f7f01b2a210a737c7d45c8d1 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 29 Aug 2023 11:55:11 +0000 Subject: [PATCH 22/49] beautiful CV tutorial --- examples/tutorials/Tutorial_8_CV_preset.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tutorials/Tutorial_8_CV_preset.ipynb b/examples/tutorials/Tutorial_8_CV_preset.ipynb index 18f7c68a..e23668a5 100644 --- a/examples/tutorials/Tutorial_8_CV_preset.ipynb +++ b/examples/tutorials/Tutorial_8_CV_preset.ipynb @@ -3060,7 +3060,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "No we can choose another model from timm. So we will use resnet50.a1_in1k, by default it uses vit_base_patch16_224.augreg_in21k" + "### Now we can choose another model from ```timm```. So we will use ```tf_efficientnetv2_b0.in1k```, by default it uses ```vit_base_patch16_224.augreg_in21k```" ] }, { From c66b3c267e659e310aa57b9757ddb2becefcc482 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 29 Aug 2023 12:37:26 +0000 Subject: [PATCH 23/49] added some changes on comments --- lightautoml/automl/presets/image_config.yml | 6 +++--- lightautoml/automl/presets/tabular_config.yml | 4 ++-- .../tabular_configs/conf_0_sel_type_0.yml | 14 +++++++++---- .../tabular_configs/conf_1_sel_type_1.yml | 12 ++++++++--- .../conf_2_select_mode_1_no_typ.yml | 12 ++++++++--- .../conf_3_sel_type_1_no_inter_lgbm.yml | 12 ++++++++--- .../conf_4_sel_type_0_no_int.yml | 12 ++++++++--- .../conf_5_sel_type_1_tuning_full.yml | 12 ++++++++--- ...f_6_sel_type_1_tuning_full_no_int_lgbm.yml | 12 ++++++++--- lightautoml/ml_algo/dl_model.py | 20 ++++--------------- pyproject.toml | 2 +- 11 files changed, 74 insertions(+), 44 deletions(-) diff --git a/lightautoml/automl/presets/image_config.yml b/lightautoml/automl/presets/image_config.yml index 2ba5b4c7..fa206e3c 100755 --- a/lightautoml/automl/presets/image_config.yml +++ b/lightautoml/automl/presets/image_config.yml @@ -240,7 +240,7 @@ nn_pipeline_params: nn_params: # Look for NN train params here. - # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn'] or custom torch model + # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn', 'node', 'autoint'] or custom torch model model: denselight # embedding_size if needed embedding_size: 10 @@ -266,7 +266,7 @@ nn_params: # add fc layer before model with certain dim num_init_features: null # activation function (str in torch.nn activation functions or custom nn.Module) - act_fun: ReLU + act_fun: LeakyReLU # add noise after dropout layer for more regularization use_noise: false # noise parameter @@ -274,7 +274,7 @@ nn_params: # use BatchNorm use_bn: true # define hidden layer dimensions for models in ['mlp', 'denselight', 'snn'] - hidden_size: [512, 512, 512] + hidden_size: [512, 256] # dim of intermediate fc is increased times this factor in ResnetModel layer hid_factor: [2, 2] # list of number of layers within each DenseModel block diff --git a/lightautoml/automl/presets/tabular_config.yml b/lightautoml/automl/presets/tabular_config.yml index bcf6abbd..d391d5e8 100755 --- a/lightautoml/automl/presets/tabular_config.yml +++ b/lightautoml/automl/presets/tabular_config.yml @@ -126,7 +126,7 @@ linear_l2_params: # params for NN model nn_params: # Look for NN train params here. - # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn'] or custom torch model + # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn', 'node', 'autoint'] or custom torch model model: denselight # embedding_size if needed embedding_size: 10 @@ -160,7 +160,7 @@ nn_params: # use BatchNorm use_bn: true # define hidden layer dimensions for models in ['mlp', 'denselight', 'snn'] - hidden_size: [512, 256, 128] + hidden_size: [512, 256] # dim of intermediate fc is increased times this factor in ResnetModel layer hid_factor: [2, 2] # list of number of layers within each DenseModel block diff --git a/lightautoml/automl/presets/tabular_configs/conf_0_sel_type_0.yml b/lightautoml/automl/presets/tabular_configs/conf_0_sel_type_0.yml index dad3f314..95494f9b 100644 --- a/lightautoml/automl/presets/tabular_configs/conf_0_sel_type_0.yml +++ b/lightautoml/automl/presets/tabular_configs/conf_0_sel_type_0.yml @@ -98,9 +98,15 @@ linear_l2_params: # params for NN model nn_params: - # Look for NN train params here. - # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn'] or custom torch model + # Look for NN train params here. + # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn', 'node', 'autoint'] or custom torch model model: denselight + # embedding_size if needed + embedding_size: 10 + # str in ['cat', 'cat_no_dropout', 'weighted'] + cat_embedder: "cat" + # str in ['cont', 'linear', 'dense'] + cont_embedder: "cont" # use model with custom embeddings model_with_emb: false # tune custom network @@ -119,7 +125,7 @@ nn_params: # add fc layer before model with certain dim num_init_features: null # activation function (str in torch.nn activation functions or custom nn.Module) - act_fun: ReLU + act_fun: LeakyReLU # add noise after dropout layer for more regularization use_noise: false # noise parameter @@ -127,7 +133,7 @@ nn_params: # use BatchNorm use_bn: true # define hidden layer dimensions for models in ['mlp', 'denselight', 'snn'] - hidden_size: [512, 512, 512] + hidden_size: [512, 256] # dim of intermediate fc is increased times this factor in ResnetModel layer hid_factor: [2, 2] # list of number of layers within each DenseModel block diff --git a/lightautoml/automl/presets/tabular_configs/conf_1_sel_type_1.yml b/lightautoml/automl/presets/tabular_configs/conf_1_sel_type_1.yml index 6841b11e..bf202cd6 100644 --- a/lightautoml/automl/presets/tabular_configs/conf_1_sel_type_1.yml +++ b/lightautoml/automl/presets/tabular_configs/conf_1_sel_type_1.yml @@ -100,8 +100,14 @@ linear_l2_params: # params for NN model nn_params: # Look for NN train params here. - # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn'] or custom torch model + # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn', 'node', 'autoint'] or custom torch model model: denselight + # embedding_size if needed + embedding_size: 10 + # str in ['cat', 'cat_no_dropout', 'weighted'] + cat_embedder: "cat" + # str in ['cont', 'linear', 'dense'] + cont_embedder: "cont" # use model with custom embeddings model_with_emb: false # tune custom network @@ -120,7 +126,7 @@ nn_params: # add fc layer before model with certain dim num_init_features: null # activation function (str in torch.nn activation functions or custom nn.Module) - act_fun: ReLU + act_fun: LeakyReLU # add noise after dropout layer for more regularization use_noise: false # noise parameter @@ -128,7 +134,7 @@ nn_params: # use BatchNorm use_bn: true # define hidden layer dimensions for models in ['mlp', 'denselight', 'snn'] - hidden_size: [512, 512, 512] + hidden_size: [512, 256] # dim of intermediate fc is increased times this factor in ResnetModel layer hid_factor: [2, 2] # list of number of layers within each DenseModel block diff --git a/lightautoml/automl/presets/tabular_configs/conf_2_select_mode_1_no_typ.yml b/lightautoml/automl/presets/tabular_configs/conf_2_select_mode_1_no_typ.yml index cff3a6cd..eeaa2535 100644 --- a/lightautoml/automl/presets/tabular_configs/conf_2_select_mode_1_no_typ.yml +++ b/lightautoml/automl/presets/tabular_configs/conf_2_select_mode_1_no_typ.yml @@ -100,8 +100,14 @@ linear_l2_params: # params for NN model nn_params: # Look for NN train params here. - # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn'] or custom torch model + # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn', 'node', 'autoint'] or custom torch model model: denselight + # embedding_size if needed + embedding_size: 10 + # str in ['cat', 'cat_no_dropout', 'weighted'] + cat_embedder: "cat" + # str in ['cont', 'linear', 'dense'] + cont_embedder: "cont" # use model with custom embeddings model_with_emb: false # tune custom network @@ -120,7 +126,7 @@ nn_params: # add fc layer before model with certain dim num_init_features: null # activation function (str in torch.nn activation functions or custom nn.Module) - act_fun: ReLU + act_fun: LeakyReLU # add noise after dropout layer for more regularization use_noise: false # noise parameter @@ -128,7 +134,7 @@ nn_params: # use BatchNorm use_bn: true # define hidden layer dimensions for models in ['mlp', 'denselight', 'snn'] - hidden_size: [512, 512, 512] + hidden_size: [512, 256] # dim of intermediate fc is increased times this factor in ResnetModel layer hid_factor: [2, 2] # list of number of layers within each DenseModel block diff --git a/lightautoml/automl/presets/tabular_configs/conf_3_sel_type_1_no_inter_lgbm.yml b/lightautoml/automl/presets/tabular_configs/conf_3_sel_type_1_no_inter_lgbm.yml index 4af9e989..318af04b 100644 --- a/lightautoml/automl/presets/tabular_configs/conf_3_sel_type_1_no_inter_lgbm.yml +++ b/lightautoml/automl/presets/tabular_configs/conf_3_sel_type_1_no_inter_lgbm.yml @@ -100,8 +100,14 @@ linear_l2_params: # params for NN model nn_params: # Look for NN train params here. - # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn'] or custom torch model + # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn', 'node', 'autoint'] or custom torch model model: denselight + # embedding_size if needed + embedding_size: 10 + # str in ['cat', 'cat_no_dropout', 'weighted'] + cat_embedder: "cat" + # str in ['cont', 'linear', 'dense'] + cont_embedder: "cont" # use model with custom embeddings model_with_emb: false # tune custom network @@ -120,7 +126,7 @@ nn_params: # add fc layer before model with certain dim num_init_features: null # activation function (str in torch.nn activation functions or custom nn.Module) - act_fun: ReLU + act_fun: LeakyReLU # add noise after dropout layer for more regularization use_noise: false # noise parameter @@ -128,7 +134,7 @@ nn_params: # use BatchNorm use_bn: true # define hidden layer dimensions for models in ['mlp', 'denselight', 'snn'] - hidden_size: [512, 512, 512] + hidden_size: [512, 256] # dim of intermediate fc is increased times this factor in ResnetModel layer hid_factor: [2, 2] # list of number of layers within each DenseModel block diff --git a/lightautoml/automl/presets/tabular_configs/conf_4_sel_type_0_no_int.yml b/lightautoml/automl/presets/tabular_configs/conf_4_sel_type_0_no_int.yml index 07ba1a91..6cc47500 100644 --- a/lightautoml/automl/presets/tabular_configs/conf_4_sel_type_0_no_int.yml +++ b/lightautoml/automl/presets/tabular_configs/conf_4_sel_type_0_no_int.yml @@ -100,8 +100,14 @@ linear_l2_params: # params for NN model nn_params: # Look for NN train params here. - # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn'] or custom torch model + # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn', 'node', 'autoint'] or custom torch model model: denselight + # embedding_size if needed + embedding_size: 10 + # str in ['cat', 'cat_no_dropout', 'weighted'] + cat_embedder: "cat" + # str in ['cont', 'linear', 'dense'] + cont_embedder: "cont" # use model with custom embeddings model_with_emb: false # tune custom network @@ -120,7 +126,7 @@ nn_params: # add fc layer before model with certain dim num_init_features: null # activation function (str in torch.nn activation functions or custom nn.Module) - act_fun: ReLU + act_fun: LeakyReLU # add noise after dropout layer for more regularization use_noise: false # noise parameter @@ -128,7 +134,7 @@ nn_params: # use BatchNorm use_bn: true # define hidden layer dimensions for models in ['mlp', 'denselight', 'snn'] - hidden_size: [512, 512, 512] + hidden_size: [512, 256] # dim of intermediate fc is increased times this factor in ResnetModel layer hid_factor: [2, 2] # list of number of layers within each DenseModel block diff --git a/lightautoml/automl/presets/tabular_configs/conf_5_sel_type_1_tuning_full.yml b/lightautoml/automl/presets/tabular_configs/conf_5_sel_type_1_tuning_full.yml index f0175669..2c7b65fd 100644 --- a/lightautoml/automl/presets/tabular_configs/conf_5_sel_type_1_tuning_full.yml +++ b/lightautoml/automl/presets/tabular_configs/conf_5_sel_type_1_tuning_full.yml @@ -100,8 +100,14 @@ linear_l2_params: # params for NN model nn_params: # Look for NN train params here. - # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn'] or custom torch model + # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn', 'node', 'autoint'] or custom torch model model: denselight + # embedding_size if needed + embedding_size: 10 + # str in ['cat', 'cat_no_dropout', 'weighted'] + cat_embedder: "cat" + # str in ['cont', 'linear', 'dense'] + cont_embedder: "cont" # use model with custom embeddings model_with_emb: false # tune custom network @@ -120,7 +126,7 @@ nn_params: # add fc layer before model with certain dim num_init_features: null # activation function (str in torch.nn activation functions or custom nn.Module) - act_fun: ReLU + act_fun: LeakyReLU # add noise after dropout layer for more regularization use_noise: false # noise parameter @@ -128,7 +134,7 @@ nn_params: # use BatchNorm use_bn: true # define hidden layer dimensions for models in ['mlp', 'denselight', 'snn'] - hidden_size: [512, 512, 512] + hidden_size: [512, 256] # dim of intermediate fc is increased times this factor in ResnetModel layer hid_factor: [2, 2] # list of number of layers within each DenseModel block diff --git a/lightautoml/automl/presets/tabular_configs/conf_6_sel_type_1_tuning_full_no_int_lgbm.yml b/lightautoml/automl/presets/tabular_configs/conf_6_sel_type_1_tuning_full_no_int_lgbm.yml index 4af9e989..318af04b 100644 --- a/lightautoml/automl/presets/tabular_configs/conf_6_sel_type_1_tuning_full_no_int_lgbm.yml +++ b/lightautoml/automl/presets/tabular_configs/conf_6_sel_type_1_tuning_full_no_int_lgbm.yml @@ -100,8 +100,14 @@ linear_l2_params: # params for NN model nn_params: # Look for NN train params here. - # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn'] or custom torch model + # str in ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn', 'node', 'autoint'] or custom torch model model: denselight + # embedding_size if needed + embedding_size: 10 + # str in ['cat', 'cat_no_dropout', 'weighted'] + cat_embedder: "cat" + # str in ['cont', 'linear', 'dense'] + cont_embedder: "cont" # use model with custom embeddings model_with_emb: false # tune custom network @@ -120,7 +126,7 @@ nn_params: # add fc layer before model with certain dim num_init_features: null # activation function (str in torch.nn activation functions or custom nn.Module) - act_fun: ReLU + act_fun: LeakyReLU # add noise after dropout layer for more regularization use_noise: false # noise parameter @@ -128,7 +134,7 @@ nn_params: # use BatchNorm use_bn: true # define hidden layer dimensions for models in ['mlp', 'denselight', 'snn'] - hidden_size: [512, 512, 512] + hidden_size: [512, 256] # dim of intermediate fc is increased times this factor in ResnetModel layer hid_factor: [2, 2] # list of number of layers within each DenseModel block diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index 8e6de1b9..22780343 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -113,31 +113,19 @@ def _get_embedder_cat(params): if input_type_by_name[params["model"]] == "seq": - try: - out = cat_embedder_by_name[params["cat_embedder"]] - except KeyError: - out = BasicCatEmbedding + out = cat_embedder_by_name.get(params["cat_embedder"], BasicCatEmbedding) return out else: - try: - out = cat_embedder_by_name_flat[params["cat_embedder"]] - except KeyError: - out = CatEmbedder + out = cat_embedder_by_name_flat.get(params["cat_embedder"], CatEmbedder) return out def _get_embedder_cont(params): if input_type_by_name[params["model"]] == "seq": - try: - out = cont_embedder_by_name[params["cont_embedder"]] - except KeyError: - out = LinearEmbedding + out = cont_embedder_by_name.get(params["cat_embedder"], LinearEmbedding) return out else: - try: - out = cont_embedder_by_name_flat[params["cont_embedder"]] - except KeyError: - out = ContEmbedder + out = cont_embedder_by_name_flat.get(params["cat_embedder"], ContEmbedder) return out diff --git a/pyproject.toml b/pyproject.toml index 352a0cad..85473d4a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,7 +71,7 @@ transformers = {version = ">=4", optional = true} # CV albumentations = {version = "<=1.0.3", optional = true} -timm = {version = "*", optional = true} +timm = {version = ">=0.9.0", optional = true} opencv-python = {version = "<=4.8.0.74", optional = true} PyWavelets = {version = "*", optional = true} torchvision = [ From d045b8e45b7507f0dff042c8232ffa1d8680b6bc Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 29 Aug 2023 13:46:40 +0000 Subject: [PATCH 24/49] removed useless function --- lightautoml/ml_algo/dl_model.py | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index 22780343..844a983c 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -111,24 +111,6 @@ cont_embedder_by_name = {"linear": LinearEmbedding, "dense": DenseEmbedding} -def _get_embedder_cat(params): - if input_type_by_name[params["model"]] == "seq": - out = cat_embedder_by_name.get(params["cat_embedder"], BasicCatEmbedding) - return out - else: - out = cat_embedder_by_name_flat.get(params["cat_embedder"], CatEmbedder) - return out - - -def _get_embedder_cont(params): - if input_type_by_name[params["model"]] == "seq": - out = cont_embedder_by_name.get(params["cat_embedder"], LinearEmbedding) - return out - else: - out = cont_embedder_by_name_flat.get(params["cat_embedder"], ContEmbedder) - return out - - class TorchModel(TabularMLAlgo): """Neural net for tabular datasets. @@ -300,7 +282,11 @@ def _infer_params(self): net=TorchUniversalModel if not params["model_with_emb"] else params["model"], net_params={ "task": self.task, - "cont_embedder_": _get_embedder_cont(params) if is_cont else None, + "cont_embedder_": cont_embedder_by_name.get(params["cont_embedder"], LinearEmbedding) + if input_type_by_name[params["model"]] == "seq" + else cont_embedder_by_name_flat.get(params["cont_embedder"], ContEmbedder) + if is_cont + else None, "cont_params": { "num_dims": params["num_dims"], "input_bn": params["input_bn"], @@ -309,7 +295,11 @@ def _infer_params(self): } if is_cont else None, - "cat_embedder_": _get_embedder_cat(params) if is_cat else None, + "cat_embedder_": cat_embedder_by_name.get(params["cat_embedder"], BasicCatEmbedding) + if input_type_by_name[params["model"]] == "seq" + else cat_embedder_by_name_flat.get(params["cat_embedder"], CatEmbedder) + if is_cat + else None, "cat_params": { "cat_vc": params["cat_vc"], "cat_dims": params["cat_dims"], From 382b385a0e66292625d39aed901852ad0c9a45a9 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 29 Aug 2023 15:59:21 +0000 Subject: [PATCH 25/49] removed for-for --- lightautoml/ml_algo/dl_model.py | 5 ++- lightautoml/text/embed.py | 69 +++++++++++++++++---------------- 2 files changed, 40 insertions(+), 34 deletions(-) diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index 844a983c..bc28fa77 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -405,7 +405,10 @@ def _init_params_on_input(self, train_valid_iterator) -> dict: ) + 1 ) - values, counts = np.unique(train_valid_iterator.train[:, cat_feature].data, return_counts=True) + values, counts = np.unique( + np.concatenate([train_valid_iterator.train[:, cat_feature].data, valid[:, cat_feature].data]), + return_counts=True, + ) cat_value_counts.append(dict(zip(values, counts))) cat_dims.append(num_unique_categories) new_params["cat_dims"] = cat_dims diff --git a/lightautoml/text/embed.py b/lightautoml/text/embed.py index 650a86e6..54a6ee64 100644 --- a/lightautoml/text/embed.py +++ b/lightautoml/text/embed.py @@ -266,6 +266,7 @@ class WeightedCatEmbedding(nn.Module): def __init__( self, + cat_dims: Sequence[int], cat_vc: Sequence[Dict], embedding_size: int = 10, alpha: int = 20, @@ -284,27 +285,29 @@ def __init__( self.num_values = 0 self.embedding: Optional[nn.Embedding] = None self.embedding_size = embedding_size - self._from_summary(cat_vc) + self._from_summary(cat_vc, cat_dims) self.cat_len = len(cat_vc) + self.cat_dims = cat_dims - def _from_summary(self, unique_counts: List[Dict[Any, int]]): - lookup = {} - lookup_default = {} - num_values = 0 + def _from_summary(self, unique_counts: List[Dict[Any, int]], cat_dims: Sequence[int]): + self.emb_layers = nn.ModuleList([nn.Embedding(int(x), self.embedding_size) for x in cat_dims]) + self.def_layers = nn.ModuleList([nn.Embedding(1, 1) for _ in cat_dims]) + weights_list = [] for fieldnum, counts in enumerate(unique_counts): - lookup_default[fieldnum] = (num_values, 0) - num_values += 1 - for value, count in counts.items(): - lookup[(fieldnum, value)] = (num_values, count) - num_values += 1 - + weights = [] + for i, vc in enumerate(sorted(counts.items())): + value, count = vc + if i == 0 and value != 0.0: + weights.append([0]) + weights.append([count / (count + self.alpha)]) + weights_list.append(weights) + self.w_emb_layers = nn.ModuleList( + [nn.Embedding.from_pretrained(torch.tensor(x, dtype=torch.float32)) for x in weights_list] + ) self.num_fields = len(unique_counts) self.output_size = self.num_fields * self.embedding_size - self.lookup = lookup - self.lookup_default = lookup_default - self.num_values = num_values - self.embedding = nn.Embedding(num_values, self.embedding_size) - nn.init.xavier_uniform_(self.embedding.weight) + for emb in self.emb_layers: + nn.init.xavier_uniform_(emb.weight) def get_out_shape(self) -> int: """Output shape. @@ -328,23 +331,23 @@ def forward(self, X: Dict) -> Tensor: torch.Tensor """ X = X["cat"] - list_weights: List[List[List[float]]] = [] - idxs_primary: List[List[int]] = [] - idxs_default: List[List[int]] = [] - for row in X: - list_weights.append([]) - idxs_primary.append([]) - idxs_default.append([]) - for col, val in enumerate(row): - val = val.item() - default = self.lookup_default[col] - idx, count = self.lookup.get((col, val), default) - list_weights[-1].append([count / (count + self.alpha)]) - idxs_primary[-1].append(idx) - idxs_default[-1].append(default[0]) - tsr_weights = torch.tensor(list_weights, dtype=torch.float32, device=self._device) - emb_primary = self.embedding(torch.tensor(idxs_primary, dtype=torch.int64, device=self._device)) - emb_default = self.embedding(torch.tensor(idxs_default, dtype=torch.int64, device=self._device)) + emb_primary = torch.stack( + [emb_layer(X[:, i]) for i, emb_layer in enumerate(self.emb_layers)], + dim=1, + ) + tsr_weights = torch.stack( + [emb_layer(X[:, i]) for i, emb_layer in enumerate(self.w_emb_layers)], + dim=1, + ) + + emb_default = torch.stack( + [ + emb_layer(torch.tensor([0] * len(X[:, i]), device=self._device)) + for i, emb_layer in enumerate(self.def_layers) + ], + dim=1, + ) + x = tsr_weights * emb_primary + (1 - tsr_weights) * emb_default if self.flatten_output: return x.view(x.shape[0], -1) From 4955b2eeff7d53a915d4dfccf2439526af61fa0d Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Wed, 30 Aug 2023 09:40:14 +0000 Subject: [PATCH 26/49] WeightedEmbedder bugfix --- lightautoml/text/embed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightautoml/text/embed.py b/lightautoml/text/embed.py index 54a6ee64..dcb63b6e 100644 --- a/lightautoml/text/embed.py +++ b/lightautoml/text/embed.py @@ -291,7 +291,7 @@ def __init__( def _from_summary(self, unique_counts: List[Dict[Any, int]], cat_dims: Sequence[int]): self.emb_layers = nn.ModuleList([nn.Embedding(int(x), self.embedding_size) for x in cat_dims]) - self.def_layers = nn.ModuleList([nn.Embedding(1, 1) for _ in cat_dims]) + self.def_layers = nn.ModuleList([nn.Embedding(1, self.embedding_size) for _ in cat_dims]) weights_list = [] for fieldnum, counts in enumerate(unique_counts): weights = [] From 1bdf9d52934ede8496fba3bb99c4108164ea08da Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Wed, 30 Aug 2023 09:45:07 +0000 Subject: [PATCH 27/49] delete unused import --- lightautoml/ml_algo/torch_based/nn_models.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index 47f7f43d..fe0c6575 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -13,8 +13,6 @@ from lightautoml.ml_algo.torch_based.node_nn_model import DenseODSTBlock, MeanPooling -from lightautoml.ml_algo.torch_based.node_nn_model import DenseODSTBlock, MeanPooling - class GaussianNoise(nn.Module): """Adds gaussian noise. From b51e4deb1f9a36f53e5e096dd777aade092b95a3 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Wed, 30 Aug 2023 10:03:54 +0000 Subject: [PATCH 28/49] changed link --- lightautoml/ml_algo/torch_based/nn_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index fe0c6575..30676df8 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -857,7 +857,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: class AutoInt(nn.Module): - """The NODE model from https://github.com/Qwicen. + """The AutoInt model from https://github.com/jrfiedler/xynn. Args: n_in: Input dim. From 6d4a74ea4e1e8fa16a0581c02bdfd1ebe11848d6 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Wed, 30 Aug 2023 13:46:47 +0000 Subject: [PATCH 29/49] add tabnet/plr/softemb --- lightautoml/ml_algo/tabnet/utils.py | 166 ++-- lightautoml/ml_algo/torch_based/nn_models.py | 45 +- .../pytorch_tabnet/abstract_model.py | 801 --------------- .../pytorch_tabnet/augmentations.py | 93 -- .../torch_based/pytorch_tabnet/callbacks.py | 287 ------ .../torch_based/pytorch_tabnet/metrics.py | 515 ---------- .../pytorch_tabnet/multiclass_utils.py | 402 -------- .../torch_based/pytorch_tabnet/multitask.py | 167 ---- .../torch_based/pytorch_tabnet/pretraining.py | 418 -------- .../pytorch_tabnet/pretraining_utils.py | 119 --- .../torch_based/pytorch_tabnet/sparsemax.py | 276 ------ .../torch_based/pytorch_tabnet/tab_model.py | 146 --- .../torch_based/pytorch_tabnet/tab_network.py | 908 ------------------ .../torch_based/pytorch_tabnet/utils.py | 529 ---------- lightautoml/text/embed.py | 17 +- 15 files changed, 141 insertions(+), 4748 deletions(-) delete mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/abstract_model.py delete mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/augmentations.py delete mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/callbacks.py delete mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/metrics.py delete mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/multiclass_utils.py delete mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/multitask.py delete mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining.py delete mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining_utils.py delete mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/sparsemax.py delete mode 100755 lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_model.py delete mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_network.py delete mode 100644 lightautoml/ml_algo/torch_based/pytorch_tabnet/utils.py diff --git a/lightautoml/ml_algo/tabnet/utils.py b/lightautoml/ml_algo/tabnet/utils.py index 40845a8b..a901b7c6 100644 --- a/lightautoml/ml_algo/tabnet/utils.py +++ b/lightautoml/ml_algo/tabnet/utils.py @@ -6,14 +6,14 @@ from lightautoml.ml_algo.torch_based.autoint.ghost_norm import GhostBatchNorm -def initialize_non_glu(module, input_dim, output_dim): +def _initialize_non_glu(module, input_dim, output_dim): gain_value = np.sqrt((input_dim + output_dim) / np.sqrt(4 * input_dim)) torch.nn.init.xavier_normal_(module.weight, gain=gain_value) # torch.nn.init.zeros_(module.bias) return -def initialize_glu(module, input_dim, output_dim): +def _initialize_glu(module, input_dim, output_dim): gain_value = np.sqrt((input_dim + output_dim) / np.sqrt(input_dim)) torch.nn.init.xavier_normal_(module.weight, gain=gain_value) # torch.nn.init.zeros_(module.bias) @@ -21,27 +21,11 @@ def initialize_glu(module, input_dim, output_dim): class TabNetEncoder(torch.nn.Module): - def __init__( - self, - input_dim, - output_dim, - n_d=8, - n_a=8, - n_steps=3, - gamma=1.3, - n_independent=2, - n_shared=2, - epsilon=1e-15, - virtual_batch_size=128, - momentum=0.02, - mask_type="sparsemax", - group_attention_matrix=None, - ): - """ - Defines main part of the TabNet network without the embedding layers. + """Defines main part of the TabNet network without the embedding layers. - Parameters - ---------- + Code from https://github.com/dreamquark-ai/tabnet + + Args: input_dim : int Number of features output_dim : int or list of int for multi task classification @@ -69,7 +53,24 @@ def __init__( Either "sparsemax" or "entmax" : this is the masking function to use group_attention_matrix : torch matrix Matrix of size (n_groups, input_dim), m_ij = importance within group i of feature j - """ + """ + + def __init__( + self, + input_dim, + output_dim, + n_d=8, + n_a=8, + n_steps=3, + gamma=1.3, + n_independent=2, + n_shared=2, + epsilon=1e-15, + virtual_batch_size=128, + momentum=0.02, + mask_type="sparsemax", + group_attention_matrix=None, + ): super(TabNetEncoder, self).__init__() self.input_dim = input_dim self.output_dim = output_dim @@ -137,6 +138,15 @@ def __init__( self.att_transformers.append(attention) def forward(self, x, prior=None): + """Forward-pass of encoder. + + Args: + x : input Tensor + prior : mask for AttentiveTransformer + + Returns: + sequence of outputs, regulariztion loss + """ x = self.initial_bn(x) bs = x.shape[0] # batch size @@ -164,6 +174,14 @@ def forward(self, x, prior=None): return steps_output, M_loss def forward_masks(self, x): + """Magic forward-pass of encoder that returns masks. + + Args: + x : input Tensor + + Returns: + new and old masks. + """ x = self.initial_bn(x) bs = x.shape[0] # batch size prior = torch.ones((bs, self.attention_dim)).to(x.device) @@ -191,21 +209,9 @@ def forward_masks(self, x): class FeatTransformer(torch.nn.Module): - def __init__( - self, - input_dim, - output_dim, - shared_layers, - n_glu_independent, - virtual_batch_size=128, - momentum=0.02, - ): - super(FeatTransformer, self).__init__() - """ - Initialize a feature transformer. + """Feature transformer from https://github.com/dreamquark-ai/tabnet. - Parameters - ---------- + Args: input_dim : int Input size output_dim : int @@ -218,8 +224,18 @@ def __init__( Batch size for Ghost Batch Normalization within GLU block(s) momentum : float Float value between 0 and 1 which will be used for momentum in batch norm - """ + """ + def __init__( + self, + input_dim, + output_dim, + shared_layers, + n_glu_independent, + virtual_batch_size=128, + momentum=0.02, + ): + super(FeatTransformer, self).__init__() params = { "n_glu": n_glu_independent, "virtual_batch_size": virtual_batch_size, @@ -250,14 +266,32 @@ def __init__( self.specifics = GLU_Block(spec_input_dim, output_dim, first=is_first, **params) def forward(self, x): + """Forward-pass.""" x = self.shared(x) x = self.specifics(x) return x class GLU_Block(torch.nn.Module): - """ - Independent GLU block, specific to each step + """Independent GLU block, specific to each step. + + Code from https://github.com/dreamquark-ai/tabnet. + + Args: + input_dim : int + Input size + output_dim : int + Output_size + shared_layers : torch.nn.ModuleList + The shared block that should be common to every step + n_glu : int + Number of independent GLU layers + virtual_batch_size : int + Batch size for Ghost Batch Normalization within GLU block(s) + momentum : float + Float value between 0 and 1 which will be used for momentum in batch norm + first : bool + if the first layer of the block has no scale multiplication or not """ def __init__( @@ -285,6 +319,7 @@ def __init__( self.glu_layers.append(GLU_Layer(output_dim, output_dim, fc=fc, **params)) def forward(self, x): + """Forward-pass.""" scale = torch.sqrt(torch.FloatTensor([0.5]).to(x.device)) if self.first: # the first layer of the block has no scale multiplication x = self.glu_layers[0](x) @@ -299,6 +334,22 @@ def forward(self, x): class GLU_Layer(torch.nn.Module): + """GLU layer implementation. + + Args: + input_dim : int + Input size + output_dim : int + Output_size + fc : torch.nn.Module + Optional fully-connected layer + virtual_batch_size : int + Batch size for Ghost Batch Normalization within GLU block(s) + momentum : float + Float value between 0 and 1 which will be used for momentum in batch norm + + """ + def __init__(self, input_dim, output_dim, fc=None, virtual_batch_size=128, momentum=0.02): super(GLU_Layer, self).__init__() @@ -307,11 +358,12 @@ def __init__(self, input_dim, output_dim, fc=None, virtual_batch_size=128, momen self.fc = fc else: self.fc = nn.Linear(input_dim, 2 * output_dim, bias=False) - initialize_glu(self.fc, input_dim, 2 * output_dim) + _initialize_glu(self.fc, input_dim, 2 * output_dim) self.bn = GhostBatchNorm(2 * output_dim, virtual_batch_size=virtual_batch_size, momentum=momentum) def forward(self, x): + """Forward-pass.""" x = self.fc(x) x = self.bn(x) out = torch.mul(x[:, : self.output_dim], torch.sigmoid(x[:, self.output_dim :])) @@ -319,20 +371,11 @@ def forward(self, x): class AttentiveTransformer(torch.nn.Module): - def __init__( - self, - input_dim, - group_dim, - group_matrix, - virtual_batch_size=128, - momentum=0.02, - mask_type="sparsemax", - ): - """ - Initialize an attention transformer. + """Attention transformer. + + Code from https://github.com/dreamquark-ai/tabnet. - Parameters - ---------- + Args: input_dim : int Input size group_dim : int @@ -343,10 +386,20 @@ def __init__( Float value between 0 and 1 which will be used for momentum in batch norm mask_type : str Either "sparsemax" or "entmax" : this is the masking function to use - """ + """ + + def __init__( + self, + input_dim, + group_dim, + group_matrix, + virtual_batch_size=128, + momentum=0.02, + mask_type="sparsemax", + ): super(AttentiveTransformer, self).__init__() self.fc = nn.Linear(input_dim, group_dim, bias=False) - initialize_non_glu(self.fc, input_dim, group_dim) + _initialize_non_glu(self.fc, input_dim, group_dim) self.bn = GhostBatchNorm(group_dim, virtual_batch_size=virtual_batch_size, momentum=momentum) if mask_type == "sparsemax": @@ -359,6 +412,7 @@ def __init__( raise NotImplementedError("Please choose either sparsemax" + "or entmax as masktype") def forward(self, priors, processed_feat): + """Forward-pass.""" x = self.fc(processed_feat) x = self.bn(x) x = torch.mul(x, priors) diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index 44b57b56..cee78575 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -979,28 +979,9 @@ def forward(self, embedded: torch.Tensor) -> torch.Tensor: class TabNet(torch.nn.Module): - def __init__( - self, - n_in, - n_out, - n_d=8, - n_a=8, - n_steps=3, - gamma=1.3, - n_independent=2, - n_shared=2, - epsilon=1e-15, - virtual_batch_size=128, - momentum=0.02, - mask_type="sparsemax", - group_attention_matrix=None, - **kwargs, - ): - """ - Defines main part of the TabNet network without the embedding layers. + """Implementation of TabNet from https://github.com/dreamquark-ai/tabnet. - Parameters - ---------- + Args: input_dim : int Number of features output_dim : int or list of int for multi task classification @@ -1028,7 +1009,25 @@ def __init__( Either "sparsemax" or "entmax" : this is the masking function to use group_attention_matrix : torch matrix Matrix of size (n_groups, input_dim), m_ij = importance within group i of feature j - """ + """ + + def __init__( + self, + n_in, + n_out, + n_d=8, + n_a=8, + n_steps=3, + gamma=1.3, + n_independent=2, + n_shared=2, + epsilon=1e-15, + virtual_batch_size=128, + momentum=0.02, + mask_type="sparsemax", + group_attention_matrix=None, + **kwargs, + ): super(TabNet, self).__init__() self.input_dim = n_in self.output_dim = n_out @@ -1071,6 +1070,7 @@ def __init__( initialize_non_glu(self.final_mapping, n_d, n_out) def forward(self, x): + """Forward-pass.""" res = 0 steps_output, M_loss = self.encoder(x) res = torch.sum(torch.stack(steps_output, dim=0), dim=0) @@ -1085,4 +1085,5 @@ def forward(self, x): return out def forward_masks(self, x): + """Magic forward-pass of encoder that returns masks.""" return self.encoder.forward_masks(x) diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/abstract_model.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/abstract_model.py deleted file mode 100644 index 76c4de53..00000000 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/abstract_model.py +++ /dev/null @@ -1,801 +0,0 @@ -from dataclasses import dataclass, field -from typing import List, Any, Dict -import torch -from torch.nn.utils import clip_grad_norm_ -import numpy as np -from scipy.sparse import csc_matrix -from abc import abstractmethod -from pytorch_tabnet import tab_network -from pytorch_tabnet.utils import ( - SparsePredictDataset, - PredictDataset, - create_explain_matrix, - validate_eval_set, - create_dataloaders, - define_device, - ComplexEncoder, - check_input, - check_warm_start, - create_group_matrix, - check_embedding_parameters, -) -from pytorch_tabnet.callbacks import ( - CallbackContainer, - History, - EarlyStopping, - LRSchedulerCallback, -) -from pytorch_tabnet.metrics import MetricContainer, check_metrics -from sklearn.base import BaseEstimator - -from torch.utils.data import DataLoader -import io -import json -from pathlib import Path -import shutil -import zipfile -import warnings -import copy -import scipy - - -@dataclass -class TabModel(BaseEstimator): - """ Class for TabNet model.""" - - n_d: int = 8 - n_a: int = 8 - n_steps: int = 3 - gamma: float = 1.3 - cat_idxs: List[int] = field(default_factory=list) - cat_dims: List[int] = field(default_factory=list) - cat_emb_dim: int = 1 - n_independent: int = 2 - n_shared: int = 2 - epsilon: float = 1e-15 - momentum: float = 0.02 - lambda_sparse: float = 1e-3 - seed: int = 0 - clip_value: int = 1 - verbose: int = 1 - optimizer_fn: Any = torch.optim.Adam - optimizer_params: Dict = field(default_factory=lambda: dict(lr=2e-2)) - scheduler_fn: Any = None - scheduler_params: Dict = field(default_factory=dict) - mask_type: str = "sparsemax" - input_dim: int = None - output_dim: int = None - device_name: str = "auto" - n_shared_decoder: int = 1 - n_indep_decoder: int = 1 - grouped_features: List[List[int]] = field(default_factory=list) - - def __post_init__(self): - # These are default values needed for saving model - self.batch_size = 1024 - self.virtual_batch_size = 128 - - torch.manual_seed(self.seed) - # Defining device - self.device = torch.device(define_device(self.device_name)) - if self.verbose != 0: - warnings.warn(f"Device used : {self.device}") - - # create deep copies of mutable parameters - self.optimizer_fn = copy.deepcopy(self.optimizer_fn) - self.scheduler_fn = copy.deepcopy(self.scheduler_fn) - - updated_params = check_embedding_parameters(self.cat_dims, self.cat_idxs, self.cat_emb_dim) - self.cat_dims, self.cat_idxs, self.cat_emb_dim = updated_params - - def __update__(self, **kwargs): - """ - Updates parameters. - If does not already exists, creates it. - Otherwise overwrite with warnings. - """ - update_list = [ - "cat_dims", - "cat_emb_dim", - "cat_idxs", - "input_dim", - "mask_type", - "n_a", - "n_d", - "n_independent", - "n_shared", - "n_steps", - "grouped_features", - ] - for var_name, value in kwargs.items(): - if var_name in update_list: - try: - exec(f"global previous_val; previous_val = self.{var_name}") - if previous_val != value: # noqa - wrn_msg = f"Pretraining: {var_name} changed from {previous_val} to {value}" # noqa - warnings.warn(wrn_msg) - exec(f"self.{var_name} = value") - except AttributeError: - exec(f"self.{var_name} = value") - - def fit( - self, - X_train, - y_train, - eval_set=None, - eval_name=None, - eval_metric=None, - loss_fn=None, - weights=0, - max_epochs=100, - patience=10, - batch_size=1024, - virtual_batch_size=128, - num_workers=0, - drop_last=True, - callbacks=None, - pin_memory=True, - from_unsupervised=None, - warm_start=False, - augmentations=None, - compute_importance=True, - ): - """Train a neural network stored in self.network - Using train_dataloader for training data and - valid_dataloader for validation. - - Parameters - ---------- - X_train : np.ndarray - Train set - y_train : np.array - Train targets - eval_set : list of tuple - List of eval tuple set (X, y). - The last one is used for early stopping - eval_name : list of str - List of eval set names. - eval_metric : list of str - List of evaluation metrics. - The last metric is used for early stopping. - loss_fn : callable or None - a PyTorch loss function - weights : bool or dictionnary - 0 for no balancing - 1 for automated balancing - dict for custom weights per class - max_epochs : int - Maximum number of epochs during training - patience : int - Number of consecutive non improving epoch before early stopping - batch_size : int - Training batch size - virtual_batch_size : int - Batch size for Ghost Batch Normalization (virtual_batch_size < batch_size) - num_workers : int - Number of workers used in torch.utils.data.DataLoader - drop_last : bool - Whether to drop last batch during training - callbacks : list of callback function - List of custom callbacks - pin_memory: bool - Whether to set pin_memory to True or False during training - from_unsupervised: unsupervised trained model - Use a previously self supervised model as starting weights - warm_start: bool - If True, current model parameters are used to start training - compute_importance : bool - Whether to compute feature importance - """ - # update model name - - self.max_epochs = max_epochs - self.patience = patience - self.batch_size = batch_size - self.virtual_batch_size = virtual_batch_size - self.num_workers = num_workers - self.drop_last = drop_last - self.input_dim = X_train.shape[1] - self._stop_training = False - self.pin_memory = pin_memory and (self.device.type != "cpu") - self.augmentations = augmentations - self.compute_importance = compute_importance - - if self.augmentations is not None: - # This ensure reproducibility - self.augmentations._set_seed() - - eval_set = eval_set if eval_set else [] - - if loss_fn is None: - self.loss_fn = self._default_loss - else: - self.loss_fn = loss_fn - - check_input(X_train) - check_warm_start(warm_start, from_unsupervised) - - self.update_fit_params( - X_train, - y_train, - eval_set, - weights, - ) - - # Validate and reformat eval set depending on training data - eval_names, eval_set = validate_eval_set(eval_set, eval_name, X_train, y_train) - - train_dataloader, valid_dataloaders = self._construct_loaders(X_train, y_train, eval_set) - - if from_unsupervised is not None: - # Update parameters to match self pretraining - self.__update__(**from_unsupervised.get_params()) - - if not hasattr(self, "network") or not warm_start: - # model has never been fitted before of warm_start is False - self._set_network() - self._update_network_params() - self._set_metrics(eval_metric, eval_names) - self._set_optimizer() - self._set_callbacks(callbacks) - - if from_unsupervised is not None: - self.load_weights_from_unsupervised(from_unsupervised) - warnings.warn("Loading weights from unsupervised pretraining") - # Call method on_train_begin for all callbacks - self._callback_container.on_train_begin() - - # Training loop over epochs - for epoch_idx in range(self.max_epochs): - - # Call method on_epoch_begin for all callbacks - self._callback_container.on_epoch_begin(epoch_idx) - - self._train_epoch(train_dataloader) - - # Apply predict epoch to all eval sets - for eval_name, valid_dataloader in zip(eval_names, valid_dataloaders): - self._predict_epoch(eval_name, valid_dataloader) - - # Call method on_epoch_end for all callbacks - self._callback_container.on_epoch_end(epoch_idx, logs=self.history.epoch_metrics) - - if self._stop_training: - break - - # Call method on_train_end for all callbacks - self._callback_container.on_train_end() - self.network.eval() - - if self.compute_importance: - # compute feature importance once the best model is defined - self.feature_importances_ = self._compute_feature_importances(X_train) - - def predict(self, X): - """ - Make predictions on a batch (valid) - - Parameters - ---------- - X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` - Input data - - Returns - ------- - predictions : np.array - Predictions of the regression problem - """ - self.network.eval() - - if scipy.sparse.issparse(X): - dataloader = DataLoader( - SparsePredictDataset(X), - batch_size=self.batch_size, - shuffle=False, - ) - else: - dataloader = DataLoader( - PredictDataset(X), - batch_size=self.batch_size, - shuffle=False, - ) - - results = [] - for batch_nb, data in enumerate(dataloader): - data = data.to(self.device).float() - output, M_loss = self.network(data) - predictions = output.cpu().detach().numpy() - results.append(predictions) - res = np.vstack(results) - return self.predict_func(res) - - def explain(self, X, normalize=False): - """ - Return local explanation - - Parameters - ---------- - X : tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` - Input data - normalize : bool (default False) - Wheter to normalize so that sum of features are equal to 1 - - Returns - ------- - M_explain : matrix - Importance per sample, per columns. - masks : matrix - Sparse matrix showing attention masks used by network. - """ - self.network.eval() - - if scipy.sparse.issparse(X): - dataloader = DataLoader( - SparsePredictDataset(X), - batch_size=self.batch_size, - shuffle=False, - ) - else: - dataloader = DataLoader( - PredictDataset(X), - batch_size=self.batch_size, - shuffle=False, - ) - - res_explain = [] - - for batch_nb, data in enumerate(dataloader): - data = data.to(self.device).float() - - M_explain, masks = self.network.forward_masks(data) - for key, value in masks.items(): - masks[key] = csc_matrix.dot(value.cpu().detach().numpy(), self.reducing_matrix) - original_feat_explain = csc_matrix.dot(M_explain.cpu().detach().numpy(), self.reducing_matrix) - res_explain.append(original_feat_explain) - - if batch_nb == 0: - res_masks = masks - else: - for key, value in masks.items(): - res_masks[key] = np.vstack([res_masks[key], value]) - - res_explain = np.vstack(res_explain) - - if normalize: - res_explain /= np.sum(res_explain, axis=1)[:, None] - - return res_explain, res_masks - - def load_weights_from_unsupervised(self, unsupervised_model): - update_state_dict = copy.deepcopy(self.network.state_dict()) - for param, weights in unsupervised_model.network.state_dict().items(): - if param.startswith("encoder"): - # Convert encoder's layers name to match - new_param = "tabnet." + param - else: - new_param = param - if self.network.state_dict().get(new_param) is not None: - # update only common layers - update_state_dict[new_param] = weights - - self.network.load_state_dict(update_state_dict) - - def load_class_attrs(self, class_attrs): - for attr_name, attr_value in class_attrs.items(): - setattr(self, attr_name, attr_value) - - def save_model(self, path): - """Saving TabNet model in two distinct files. - - Parameters - ---------- - path : str - Path of the model. - - Returns - ------- - str - input filepath with ".zip" appended - - """ - saved_params = {} - init_params = {} - for key, val in self.get_params().items(): - if isinstance(val, type): - # Don't save torch specific params - continue - else: - init_params[key] = val - saved_params["init_params"] = init_params - - class_attrs = {"preds_mapper": self.preds_mapper} - saved_params["class_attrs"] = class_attrs - - # Create folder - Path(path).mkdir(parents=True, exist_ok=True) - - # Save models params - with open(Path(path).joinpath("model_params.json"), "w", encoding="utf8") as f: - json.dump(saved_params, f, cls=ComplexEncoder) - - # Save state_dict - torch.save(self.network.state_dict(), Path(path).joinpath("network.pt")) - shutil.make_archive(path, "zip", path) - shutil.rmtree(path) - print(f"Successfully saved model at {path}.zip") - return f"{path}.zip" - - def load_model(self, filepath): - """Load TabNet model. - - Parameters - ---------- - filepath : str - Path of the model. - """ - try: - with zipfile.ZipFile(filepath) as z: - with z.open("model_params.json") as f: - loaded_params = json.load(f) - loaded_params["init_params"]["device_name"] = self.device_name - with z.open("network.pt") as f: - try: - saved_state_dict = torch.load(f, map_location=self.device) - except io.UnsupportedOperation: - # In Python <3.7, the returned file object is not seekable (which at least - # some versions of PyTorch require) - so we'll try buffering it in to a - # BytesIO instead: - saved_state_dict = torch.load( - io.BytesIO(f.read()), - map_location=self.device, - ) - except KeyError: - raise KeyError("Your zip file is missing at least one component") - - self.__init__(**loaded_params["init_params"]) - - self._set_network() - self.network.load_state_dict(saved_state_dict) - self.network.eval() - self.load_class_attrs(loaded_params["class_attrs"]) - - return - - def _train_epoch(self, train_loader): - """ - Trains one epoch of the network in self.network - - Parameters - ---------- - train_loader : a :class: `torch.utils.data.Dataloader` - DataLoader with train set - """ - self.network.train() - - for batch_idx, (X, y) in enumerate(train_loader): - self._callback_container.on_batch_begin(batch_idx) - - batch_logs = self._train_batch(X, y) - - self._callback_container.on_batch_end(batch_idx, batch_logs) - - epoch_logs = {"lr": self._optimizer.param_groups[-1]["lr"]} - self.history.epoch_metrics.update(epoch_logs) - - return - - def _train_batch(self, X, y): - """ - Trains one batch of data - - Parameters - ---------- - X : torch.Tensor - Train matrix - y : torch.Tensor - Target matrix - - Returns - ------- - batch_outs : dict - Dictionnary with "y": target and "score": prediction scores. - batch_logs : dict - Dictionnary with "batch_size" and "loss". - """ - batch_logs = {"batch_size": X.shape[0]} - - X = X.to(self.device).float() - y = y.to(self.device).float() - - if self.augmentations is not None: - X, y = self.augmentations(X, y) - - for param in self.network.parameters(): - param.grad = None - - output, M_loss = self.network(X) - - loss = self.compute_loss(output, y) - # Add the overall sparsity loss - loss = loss - self.lambda_sparse * M_loss - - # Perform backward pass and optimization - loss.backward() - if self.clip_value: - clip_grad_norm_(self.network.parameters(), self.clip_value) - self._optimizer.step() - - batch_logs["loss"] = loss.cpu().detach().numpy().item() - - return batch_logs - - def _predict_epoch(self, name, loader): - """ - Predict an epoch and update metrics. - - Parameters - ---------- - name : str - Name of the validation set - loader : torch.utils.data.Dataloader - DataLoader with validation set - """ - # Setting network on evaluation mode - self.network.eval() - - list_y_true = [] - list_y_score = [] - - # Main loop - for batch_idx, (X, y) in enumerate(loader): - scores = self._predict_batch(X) - list_y_true.append(y) - list_y_score.append(scores) - - y_true, scores = self.stack_batches(list_y_true, list_y_score) - - metrics_logs = self._metric_container_dict[name](y_true, scores) - self.network.train() - self.history.epoch_metrics.update(metrics_logs) - return - - def _predict_batch(self, X): - """ - Predict one batch of data. - - Parameters - ---------- - X : torch.Tensor - Owned products - - Returns - ------- - np.array - model scores - """ - X = X.to(self.device).float() - - # compute model output - scores, _ = self.network(X) - - if isinstance(scores, list): - scores = [x.cpu().detach().numpy() for x in scores] - else: - scores = scores.cpu().detach().numpy() - - return scores - - def _set_network(self): - """Setup the network and explain matrix.""" - torch.manual_seed(self.seed) - - self.group_matrix = create_group_matrix(self.grouped_features, self.input_dim) - - self.network = tab_network.TabNet( - self.input_dim, - self.output_dim, - n_d=self.n_d, - n_a=self.n_a, - n_steps=self.n_steps, - gamma=self.gamma, - cat_idxs=self.cat_idxs, - cat_dims=self.cat_dims, - cat_emb_dim=self.cat_emb_dim, - n_independent=self.n_independent, - n_shared=self.n_shared, - epsilon=self.epsilon, - virtual_batch_size=self.virtual_batch_size, - momentum=self.momentum, - mask_type=self.mask_type, - group_attention_matrix=self.group_matrix.to(self.device), - ).to(self.device) - - self.reducing_matrix = create_explain_matrix( - self.network.input_dim, - self.network.cat_emb_dim, - self.network.cat_idxs, - self.network.post_embed_dim, - ) - - def _set_metrics(self, metrics, eval_names): - """Set attributes relative to the metrics. - - Parameters - ---------- - metrics : list of str - List of eval metric names. - eval_names : list of str - List of eval set names. - - """ - metrics = metrics or [self._default_metric] - - metrics = check_metrics(metrics) - # Set metric container for each sets - self._metric_container_dict = {} - for name in eval_names: - self._metric_container_dict.update({name: MetricContainer(metrics, prefix=f"{name}_")}) - - self._metrics = [] - self._metrics_names = [] - for _, metric_container in self._metric_container_dict.items(): - self._metrics.extend(metric_container.metrics) - self._metrics_names.extend(metric_container.names) - - # Early stopping metric is the last eval metric - self.early_stopping_metric = self._metrics_names[-1] if len(self._metrics_names) > 0 else None - - def _set_callbacks(self, custom_callbacks): - """Setup the callbacks functions. - - Parameters - ---------- - custom_callbacks : list of func - List of callback functions. - - """ - # Setup default callbacks history, early stopping and scheduler - callbacks = [] - self.history = History(self, verbose=self.verbose) - callbacks.append(self.history) - if (self.early_stopping_metric is not None) and (self.patience > 0): - early_stopping = EarlyStopping( - early_stopping_metric=self.early_stopping_metric, - is_maximize=(self._metrics[-1]._maximize if len(self._metrics) > 0 else None), - patience=self.patience, - ) - callbacks.append(early_stopping) - else: - wrn_msg = "No early stopping will be performed, last training weights will be used." - warnings.warn(wrn_msg) - - if self.scheduler_fn is not None: - # Add LR Scheduler call_back - is_batch_level = self.scheduler_params.pop("is_batch_level", False) - scheduler = LRSchedulerCallback( - scheduler_fn=self.scheduler_fn, - scheduler_params=self.scheduler_params, - optimizer=self._optimizer, - early_stopping_metric=self.early_stopping_metric, - is_batch_level=is_batch_level, - ) - callbacks.append(scheduler) - - if custom_callbacks: - callbacks.extend(custom_callbacks) - self._callback_container = CallbackContainer(callbacks) - self._callback_container.set_trainer(self) - - def _set_optimizer(self): - """Setup optimizer.""" - self._optimizer = self.optimizer_fn(self.network.parameters(), **self.optimizer_params) - - def _construct_loaders(self, X_train, y_train, eval_set): - """Generate dataloaders for train and eval set. - - Parameters - ---------- - X_train : np.array - Train set. - y_train : np.array - Train targets. - eval_set : list of tuple - List of eval tuple set (X, y). - - Returns - ------- - train_dataloader : `torch.utils.data.Dataloader` - Training dataloader. - valid_dataloaders : list of `torch.utils.data.Dataloader` - List of validation dataloaders. - - """ - # all weights are not allowed for this type of model - y_train_mapped = self.prepare_target(y_train) - for i, (X, y) in enumerate(eval_set): - y_mapped = self.prepare_target(y) - eval_set[i] = (X, y_mapped) - - train_dataloader, valid_dataloaders = create_dataloaders( - X_train, - y_train_mapped, - eval_set, - self.updated_weights, - self.batch_size, - self.num_workers, - self.drop_last, - self.pin_memory, - ) - return train_dataloader, valid_dataloaders - - def _compute_feature_importances(self, X): - """Compute global feature importance. - - Parameters - ---------- - loader : `torch.utils.data.Dataloader` - Pytorch dataloader. - - """ - M_explain, _ = self.explain(X, normalize=False) - sum_explain = M_explain.sum(axis=0) - feature_importances_ = sum_explain / np.sum(sum_explain) - return feature_importances_ - - def _update_network_params(self): - self.network.virtual_batch_size = self.virtual_batch_size - - @abstractmethod - def update_fit_params(self, X_train, y_train, eval_set, weights): - """ - Set attributes relative to fit function. - - Parameters - ---------- - X_train : np.ndarray - Train set - y_train : np.array - Train targets - eval_set : list of tuple - List of eval tuple set (X, y). - weights : bool or dictionnary - 0 for no balancing - 1 for automated balancing - """ - raise NotImplementedError("users must define update_fit_params to use this base class") - - @abstractmethod - def compute_loss(self, y_score, y_true): - """ - Compute the loss. - - Parameters - ---------- - y_score : a :tensor: `torch.Tensor` - Score matrix - y_true : a :tensor: `torch.Tensor` - Target matrix - - Returns - ------- - float - Loss value - """ - raise NotImplementedError("users must define compute_loss to use this base class") - - @abstractmethod - def prepare_target(self, y): - """ - Prepare target before training. - - Parameters - ---------- - y : a :tensor: `torch.Tensor` - Target matrix. - - Returns - ------- - `torch.Tensor` - Converted target matrix. - """ - raise NotImplementedError("users must define prepare_target to use this base class") diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/augmentations.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/augmentations.py deleted file mode 100644 index b520c0b0..00000000 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/augmentations.py +++ /dev/null @@ -1,93 +0,0 @@ -import torch -from pytorch_tabnet.utils import define_device -import numpy as np - - -class RegressionSMOTE: - """ - Apply SMOTE - - This will average a percentage p of the elements in the batch with other elements. - The target will be averaged as well (this might work with binary classification - and certain loss), following a beta distribution. - """ - - def __init__(self, device_name="auto", p=0.8, alpha=0.5, beta=0.5, seed=0): - "" - self.seed = seed - self._set_seed() - self.device = define_device(device_name) - self.alpha = alpha - self.beta = beta - self.p = p - if (p < 0.0) or (p > 1.0): - raise ValueError("Value of p should be between 0. and 1.") - - def _set_seed(self): - torch.manual_seed(self.seed) - np.random.seed(self.seed) - return - - def __call__(self, X, y): - batch_size = X.shape[0] - random_values = torch.rand(batch_size, device=self.device) - idx_to_change = random_values < self.p - - # ensure that first element to switch has probability > 0.5 - np_betas = np.random.beta(self.alpha, self.beta, batch_size) / 2 + 0.5 - random_betas = torch.from_numpy(np_betas).to(self.device).float() - index_permute = torch.randperm(batch_size, device=self.device) - - X[idx_to_change] = random_betas[idx_to_change, None] * X[idx_to_change] - X[idx_to_change] += (1 - random_betas[idx_to_change, None]) * X[index_permute][idx_to_change].view( - X[idx_to_change].size() - ) # noqa - - y[idx_to_change] = random_betas[idx_to_change, None] * y[idx_to_change] - y[idx_to_change] += (1 - random_betas[idx_to_change, None]) * y[index_permute][idx_to_change].view( - y[idx_to_change].size() - ) # noqa - - return X, y - - -class ClassificationSMOTE: - """ - Apply SMOTE for classification tasks. - - This will average a percentage p of the elements in the batch with other elements. - The target will stay unchanged and keep the value of the most important row in the mix. - """ - - def __init__(self, device_name="auto", p=0.8, alpha=0.5, beta=0.5, seed=0): - "" - self.seed = seed - self._set_seed() - self.device = define_device(device_name) - self.alpha = alpha - self.beta = beta - self.p = p - if (p < 0.0) or (p > 1.0): - raise ValueError("Value of p should be between 0. and 1.") - - def _set_seed(self): - torch.manual_seed(self.seed) - np.random.seed(self.seed) - return - - def __call__(self, X, y): - batch_size = X.shape[0] - random_values = torch.rand(batch_size, device=self.device) - idx_to_change = random_values < self.p - - # ensure that first element to switch has probability > 0.5 - np_betas = np.random.beta(self.alpha, self.beta, batch_size) / 2 + 0.5 - random_betas = torch.from_numpy(np_betas).to(self.device).float() - index_permute = torch.randperm(batch_size, device=self.device) - - X[idx_to_change] = random_betas[idx_to_change, None] * X[idx_to_change] - X[idx_to_change] += (1 - random_betas[idx_to_change, None]) * X[index_permute][idx_to_change].view( - X[idx_to_change].size() - ) # noqa - - return X, y diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/callbacks.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/callbacks.py deleted file mode 100644 index 5c266502..00000000 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/callbacks.py +++ /dev/null @@ -1,287 +0,0 @@ -import time -import datetime -import copy -import numpy as np -from dataclasses import dataclass, field -from typing import List, Any -import warnings - - -class Callback: - """ - Abstract base class used to build new callbacks. - """ - - def __init__(self): - pass - - def set_params(self, params): - self.params = params - - def set_trainer(self, model): - self.trainer = model - - def on_epoch_begin(self, epoch, logs=None): - pass - - def on_epoch_end(self, epoch, logs=None): - pass - - def on_batch_begin(self, batch, logs=None): - pass - - def on_batch_end(self, batch, logs=None): - pass - - def on_train_begin(self, logs=None): - pass - - def on_train_end(self, logs=None): - pass - - -@dataclass -class CallbackContainer: - """ - Container holding a list of callbacks. - """ - - callbacks: List[Callback] = field(default_factory=list) - - def append(self, callback): - self.callbacks.append(callback) - - def set_params(self, params): - for callback in self.callbacks: - callback.set_params(params) - - def set_trainer(self, trainer): - self.trainer = trainer - for callback in self.callbacks: - callback.set_trainer(trainer) - - def on_epoch_begin(self, epoch, logs=None): - logs = logs or {} - for callback in self.callbacks: - callback.on_epoch_begin(epoch, logs) - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - for callback in self.callbacks: - callback.on_epoch_end(epoch, logs) - - def on_batch_begin(self, batch, logs=None): - logs = logs or {} - for callback in self.callbacks: - callback.on_batch_begin(batch, logs) - - def on_batch_end(self, batch, logs=None): - logs = logs or {} - for callback in self.callbacks: - callback.on_batch_end(batch, logs) - - def on_train_begin(self, logs=None): - logs = logs or {} - logs["start_time"] = time.time() - for callback in self.callbacks: - callback.on_train_begin(logs) - - def on_train_end(self, logs=None): - logs = logs or {} - for callback in self.callbacks: - callback.on_train_end(logs) - - -@dataclass -class EarlyStopping(Callback): - """EarlyStopping callback to exit the training loop if early_stopping_metric - does not improve by a certain amount for a certain - number of epochs. - - Parameters - --------- - early_stopping_metric : str - Early stopping metric name - is_maximize : bool - Whether to maximize or not early_stopping_metric - tol : float - minimum change in monitored value to qualify as improvement. - This number should be positive. - patience : integer - number of epochs to wait for improvement before terminating. - the counter be reset after each improvement - - """ - - early_stopping_metric: str - is_maximize: bool - tol: float = 0.0 - patience: int = 5 - - def __post_init__(self): - self.best_epoch = 0 - self.stopped_epoch = 0 - self.wait = 0 - self.best_weights = None - self.best_loss = np.inf - if self.is_maximize: - self.best_loss = -self.best_loss - super().__init__() - - def on_epoch_end(self, epoch, logs=None): - current_loss = logs.get(self.early_stopping_metric) - if current_loss is None: - return - - loss_change = current_loss - self.best_loss - max_improved = self.is_maximize and loss_change > self.tol - min_improved = (not self.is_maximize) and (-loss_change > self.tol) - if max_improved or min_improved: - self.best_loss = current_loss - self.best_epoch = epoch - self.wait = 1 - self.best_weights = copy.deepcopy(self.trainer.network.state_dict()) - else: - if self.wait >= self.patience: - self.stopped_epoch = epoch - self.trainer._stop_training = True - self.wait += 1 - - def on_train_end(self, logs=None): - self.trainer.best_epoch = self.best_epoch - self.trainer.best_cost = self.best_loss - - if self.best_weights is not None: - self.trainer.network.load_state_dict(self.best_weights) - - if self.stopped_epoch > 0: - msg = f"\nEarly stopping occurred at epoch {self.stopped_epoch}" - msg += ( - f" with best_epoch = {self.best_epoch} and " - + f"best_{self.early_stopping_metric} = {round(self.best_loss, 5)}" - ) - print(msg) - else: - msg = ( - f"Stop training because you reached max_epochs = {self.trainer.max_epochs}" - + f" with best_epoch = {self.best_epoch} and " - + f"best_{self.early_stopping_metric} = {round(self.best_loss, 5)}" - ) - print(msg) - wrn_msg = "Best weights from best epoch are automatically used!" - warnings.warn(wrn_msg) - - -@dataclass -class History(Callback): - """Callback that records events into a `History` object. - This callback is automatically applied to - every SuperModule. - - Parameters - --------- - trainer : DeepRecoModel - Model class to train - verbose : int - Print results every verbose iteration - - """ - - trainer: Any - verbose: int = 1 - - def __post_init__(self): - super().__init__() - self.samples_seen = 0.0 - self.total_time = 0.0 - - def on_train_begin(self, logs=None): - self.history = {"loss": []} - self.history.update({"lr": []}) - self.history.update({name: [] for name in self.trainer._metrics_names}) - self.start_time = logs["start_time"] - self.epoch_loss = 0.0 - - def on_epoch_begin(self, epoch, logs=None): - self.epoch_metrics = {"loss": 0.0} - self.samples_seen = 0.0 - - def on_epoch_end(self, epoch, logs=None): - self.epoch_metrics["loss"] = self.epoch_loss - for metric_name, metric_value in self.epoch_metrics.items(): - self.history[metric_name].append(metric_value) - if self.verbose == 0: - return - if epoch % self.verbose != 0: - return - msg = f"epoch {epoch:<3}" - for metric_name, metric_value in self.epoch_metrics.items(): - if metric_name != "lr": - msg += f"| {metric_name:<3}: {np.round(metric_value, 5):<8}" - self.total_time = int(time.time() - self.start_time) - msg += f"| {str(datetime.timedelta(seconds=self.total_time)) + 's':<6}" - print(msg) - - def on_batch_end(self, batch, logs=None): - batch_size = logs["batch_size"] - self.epoch_loss = (self.samples_seen * self.epoch_loss + batch_size * logs["loss"]) / ( - self.samples_seen + batch_size - ) - self.samples_seen += batch_size - - def __getitem__(self, name): - return self.history[name] - - def __repr__(self): - return str(self.history) - - def __str__(self): - return str(self.history) - - -@dataclass -class LRSchedulerCallback(Callback): - """Wrapper for most torch scheduler functions. - - Parameters - --------- - scheduler_fn : torch.optim.lr_scheduler - Torch scheduling class - scheduler_params : dict - Dictionnary containing all parameters for the scheduler_fn - is_batch_level : bool (default = False) - If set to False : lr updates will happen at every epoch - If set to True : lr updates happen at every batch - Set this to True for OneCycleLR for example - """ - - scheduler_fn: Any - optimizer: Any - scheduler_params: dict - early_stopping_metric: str - is_batch_level: bool = False - - def __post_init__( - self, - ): - self.is_metric_related = hasattr(self.scheduler_fn, "is_better") - self.scheduler = self.scheduler_fn(self.optimizer, **self.scheduler_params) - super().__init__() - - def on_batch_end(self, batch, logs=None): - if self.is_batch_level: - self.scheduler.step() - else: - pass - - def on_epoch_end(self, epoch, logs=None): - current_loss = logs.get(self.early_stopping_metric) - if current_loss is None: - return - if self.is_batch_level: - pass - else: - if self.is_metric_related: - self.scheduler.step(current_loss) - else: - self.scheduler.step() diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/metrics.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/metrics.py deleted file mode 100644 index ae716f33..00000000 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/metrics.py +++ /dev/null @@ -1,515 +0,0 @@ -from dataclasses import dataclass -from typing import List -import numpy as np -from sklearn.metrics import ( - roc_auc_score, - mean_squared_error, - mean_absolute_error, - accuracy_score, - log_loss, - balanced_accuracy_score, - mean_squared_log_error, -) -import torch - - -def UnsupervisedLoss(y_pred, embedded_x, obf_vars, eps=1e-9): - """ - Implements unsupervised loss function. - This differs from orginal paper as it's scaled to be batch size independent - and number of features reconstructed independent (by taking the mean) - - Parameters - ---------- - y_pred : torch.Tensor or np.array - Reconstructed prediction (with embeddings) - embedded_x : torch.Tensor - Original input embedded by network - obf_vars : torch.Tensor - Binary mask for obfuscated variables. - 1 means the variable was obfuscated so reconstruction is based on this. - eps : float - A small floating point to avoid ZeroDivisionError - This can happen in degenerated case when a feature has only one value - - Returns - ------- - loss : torch float - Unsupervised loss, average value over batch samples. - """ - errors = y_pred - embedded_x - reconstruction_errors = torch.mul(errors, obf_vars) ** 2 - batch_means = torch.mean(embedded_x, dim=0) - batch_means[batch_means == 0] = 1 - - batch_stds = torch.std(embedded_x, dim=0) ** 2 - batch_stds[batch_stds == 0] = batch_means[batch_stds == 0] - features_loss = torch.matmul(reconstruction_errors, 1 / batch_stds) - # compute the number of obfuscated variables to reconstruct - nb_reconstructed_variables = torch.sum(obf_vars, dim=1) - # take the mean of the reconstructed variable errors - features_loss = features_loss / (nb_reconstructed_variables + eps) - # here we take the mean per batch, contrary to the paper - loss = torch.mean(features_loss) - return loss - - -def UnsupervisedLossNumpy(y_pred, embedded_x, obf_vars, eps=1e-9): - errors = y_pred - embedded_x - reconstruction_errors = np.multiply(errors, obf_vars) ** 2 - batch_means = np.mean(embedded_x, axis=0) - batch_means = np.where(batch_means == 0, 1, batch_means) - - batch_stds = np.std(embedded_x, axis=0, ddof=1) ** 2 - batch_stds = np.where(batch_stds == 0, batch_means, batch_stds) - features_loss = np.matmul(reconstruction_errors, 1 / batch_stds) - # compute the number of obfuscated variables to reconstruct - nb_reconstructed_variables = np.sum(obf_vars, axis=1) - # take the mean of the reconstructed variable errors - features_loss = features_loss / (nb_reconstructed_variables + eps) - # here we take the mean per batch, contrary to the paper - loss = np.mean(features_loss) - return loss - - -@dataclass -class UnsupMetricContainer: - """Container holding a list of metrics. - - Parameters - ---------- - y_pred : torch.Tensor or np.array - Reconstructed prediction (with embeddings) - embedded_x : torch.Tensor - Original input embedded by network - obf_vars : torch.Tensor - Binary mask for obfuscated variables. - 1 means the variables was obfuscated so reconstruction is based on this. - - """ - - metric_names: List[str] - prefix: str = "" - - def __post_init__(self): - self.metrics = Metric.get_metrics_by_names(self.metric_names) - self.names = [self.prefix + name for name in self.metric_names] - - def __call__(self, y_pred, embedded_x, obf_vars): - """Compute all metrics and store into a dict. - - Parameters - ---------- - y_true : np.ndarray - Target matrix or vector - y_pred : np.ndarray - Score matrix or vector - - Returns - ------- - dict - Dict of metrics ({metric_name: metric_value}). - - """ - logs = {} - for metric in self.metrics: - res = metric(y_pred, embedded_x, obf_vars) - logs[self.prefix + metric._name] = res - return logs - - -@dataclass -class MetricContainer: - """Container holding a list of metrics. - - Parameters - ---------- - metric_names : list of str - List of metric names. - prefix : str - Prefix of metric names. - - """ - - metric_names: List[str] - prefix: str = "" - - def __post_init__(self): - self.metrics = Metric.get_metrics_by_names(self.metric_names) - self.names = [self.prefix + name for name in self.metric_names] - - def __call__(self, y_true, y_pred): - """Compute all metrics and store into a dict. - - Parameters - ---------- - y_true : np.ndarray - Target matrix or vector - y_pred : np.ndarray - Score matrix or vector - - Returns - ------- - dict - Dict of metrics ({metric_name: metric_value}). - - """ - logs = {} - for metric in self.metrics: - if isinstance(y_pred, list): - res = np.mean([metric(y_true[:, i], y_pred[i]) for i in range(len(y_pred))]) - else: - res = metric(y_true, y_pred) - logs[self.prefix + metric._name] = res - return logs - - -class Metric: - def __call__(self, y_true, y_pred): - raise NotImplementedError("Custom Metrics must implement this function") - - @classmethod - def get_metrics_by_names(cls, names): - """Get list of metric classes. - - Parameters - ---------- - cls : Metric - Metric class. - names : list - List of metric names. - - Returns - ------- - metrics : list - List of metric classes. - - """ - available_metrics = cls.__subclasses__() - available_names = [metric()._name for metric in available_metrics] - metrics = [] - for name in names: - assert name in available_names, f"{name} is not available, choose in {available_names}" - idx = available_names.index(name) - metric = available_metrics[idx]() - metrics.append(metric) - return metrics - - -class AUC(Metric): - """ - AUC. - """ - - def __init__(self): - self._name = "auc" - self._maximize = True - - def __call__(self, y_true, y_score): - """ - Compute AUC of predictions. - - Parameters - ---------- - y_true : np.ndarray - Target matrix or vector - y_score : np.ndarray - Score matrix or vector - - Returns - ------- - float - AUC of predictions vs targets. - """ - return roc_auc_score(y_true, y_score[:, 1]) - - -class Accuracy(Metric): - """ - Accuracy. - """ - - def __init__(self): - self._name = "accuracy" - self._maximize = True - - def __call__(self, y_true, y_score): - """ - Compute Accuracy of predictions. - - Parameters - ---------- - y_true: np.ndarray - Target matrix or vector - y_score: np.ndarray - Score matrix or vector - - Returns - ------- - float - Accuracy of predictions vs targets. - """ - y_pred = np.argmax(y_score, axis=1) - return accuracy_score(y_true, y_pred) - - -class BalancedAccuracy(Metric): - """ - Balanced Accuracy. - """ - - def __init__(self): - self._name = "balanced_accuracy" - self._maximize = True - - def __call__(self, y_true, y_score): - """ - Compute Accuracy of predictions. - - Parameters - ---------- - y_true : np.ndarray - Target matrix or vector - y_score : np.ndarray - Score matrix or vector - - Returns - ------- - float - Accuracy of predictions vs targets. - """ - y_pred = np.argmax(y_score, axis=1) - return balanced_accuracy_score(y_true, y_pred) - - -class LogLoss(Metric): - """ - LogLoss. - """ - - def __init__(self): - self._name = "logloss" - self._maximize = False - - def __call__(self, y_true, y_score): - """ - Compute LogLoss of predictions. - - Parameters - ---------- - y_true : np.ndarray - Target matrix or vector - y_score : np.ndarray - Score matrix or vector - - Returns - ------- - float - LogLoss of predictions vs targets. - """ - return log_loss(y_true, y_score) - - -class MAE(Metric): - """ - Mean Absolute Error. - """ - - def __init__(self): - self._name = "mae" - self._maximize = False - - def __call__(self, y_true, y_score): - """ - Compute MAE (Mean Absolute Error) of predictions. - - Parameters - ---------- - y_true : np.ndarray - Target matrix or vector - y_score : np.ndarray - Score matrix or vector - - Returns - ------- - float - MAE of predictions vs targets. - """ - return mean_absolute_error(y_true, y_score) - - -class MSE(Metric): - """ - Mean Squared Error. - """ - - def __init__(self): - self._name = "mse" - self._maximize = False - - def __call__(self, y_true, y_score): - """ - Compute MSE (Mean Squared Error) of predictions. - - Parameters - ---------- - y_true : np.ndarray - Target matrix or vector - y_score : np.ndarray - Score matrix or vector - - Returns - ------- - float - MSE of predictions vs targets. - """ - return mean_squared_error(y_true, y_score) - - -class RMSLE(Metric): - """ - Root Mean squared logarithmic error regression loss. - Scikit-implementation: - https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_log_error.html - Note: In order to avoid error, negative predictions are clipped to 0. - This means that you should clip negative predictions manually after calling predict. - """ - - def __init__(self): - self._name = "rmsle" - self._maximize = False - - def __call__(self, y_true, y_score): - """ - Compute RMSLE of predictions. - - Parameters - ---------- - y_true : np.ndarray - Target matrix or vector - y_score : np.ndarray - Score matrix or vector - - Returns - ------- - float - RMSLE of predictions vs targets. - """ - y_score = np.clip(y_score, a_min=0, a_max=None) - return np.sqrt(mean_squared_log_error(y_true, y_score)) - - -class UnsupervisedMetric(Metric): - """ - Unsupervised metric - """ - - def __init__(self): - self._name = "unsup_loss" - self._maximize = False - - def __call__(self, y_pred, embedded_x, obf_vars): - """ - Compute MSE (Mean Squared Error) of predictions. - - Parameters - ---------- - y_pred : torch.Tensor or np.array - Reconstructed prediction (with embeddings) - embedded_x : torch.Tensor - Original input embedded by network - obf_vars : torch.Tensor - Binary mask for obfuscated variables. - 1 means the variables was obfuscated so reconstruction is based on this. - - Returns - ------- - float - MSE of predictions vs targets. - """ - loss = UnsupervisedLoss(y_pred, embedded_x, obf_vars) - return loss.item() - - -class UnsupervisedNumpyMetric(Metric): - """ - Unsupervised metric - """ - - def __init__(self): - self._name = "unsup_loss_numpy" - self._maximize = False - - def __call__(self, y_pred, embedded_x, obf_vars): - """ - Compute MSE (Mean Squared Error) of predictions. - - Parameters - ---------- - y_pred : torch.Tensor or np.array - Reconstructed prediction (with embeddings) - embedded_x : torch.Tensor - Original input embedded by network - obf_vars : torch.Tensor - Binary mask for obfuscated variables. - 1 means the variables was obfuscated so reconstruction is based on this. - - Returns - ------- - float - MSE of predictions vs targets. - """ - return UnsupervisedLossNumpy(y_pred, embedded_x, obf_vars) - - -class RMSE(Metric): - """ - Root Mean Squared Error. - """ - - def __init__(self): - self._name = "rmse" - self._maximize = False - - def __call__(self, y_true, y_score): - """ - Compute RMSE (Root Mean Squared Error) of predictions. - - Parameters - ---------- - y_true : np.ndarray - Target matrix or vector - y_score : np.ndarray - Score matrix or vector - - Returns - ------- - float - RMSE of predictions vs targets. - """ - return np.sqrt(mean_squared_error(y_true, y_score)) - - -def check_metrics(metrics): - """Check if custom metrics are provided. - - Parameters - ---------- - metrics : list of str or classes - List with built-in metrics (str) or custom metrics (classes). - - Returns - ------- - val_metrics : list of str - List of metric names. - - """ - val_metrics = [] - for metric in metrics: - if isinstance(metric, str): - val_metrics.append(metric) - elif issubclass(metric, Metric): - val_metrics.append(metric()._name) - else: - raise TypeError("You need to provide a valid metric format") - return val_metrics diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/multiclass_utils.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/multiclass_utils.py deleted file mode 100644 index b6fa2ef3..00000000 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/multiclass_utils.py +++ /dev/null @@ -1,402 +0,0 @@ -# Author: Arnaud Joly, Joel Nothman, Hamzeh Alsalhi -# -# License: BSD 3 clause -""" -Multi-class / multi-label utility function -========================================== - -""" -from collections.abc import Sequence -from itertools import chain - -from scipy.sparse import issparse -from scipy.sparse.base import spmatrix -from scipy.sparse import dok_matrix -from scipy.sparse import lil_matrix -import scipy.sparse as sp - -import numpy as np -import pandas as pd - - -def _assert_all_finite(X, allow_nan=False): - """Like assert_all_finite, but only for ndarray.""" - - X = np.asanyarray(X) - # First try an O(n) time, O(1) space solution for the common case that - # everything is finite; fall back to O(n) space np.isfinite to prevent - # false positives from overflow in sum method. The sum is also calculated - # safely to reduce dtype induced overflows. - is_float = X.dtype.kind in "fc" - if is_float and (np.isfinite(np.sum(X))): - pass - elif is_float: - msg_err = "Input contains {} or a value too large for {!r}." - if allow_nan and np.isinf(X).any() or not allow_nan and not np.isfinite(X).all(): - type_err = "infinity" if allow_nan else "NaN, infinity" - raise ValueError(msg_err.format(type_err, X.dtype)) - # for object dtype data, we only check for NaNs (GH-13254) - elif X.dtype == np.dtype("object") and not allow_nan: - if np.isnan(X).any(): - raise ValueError("Input contains NaN") - - -def assert_all_finite(X, allow_nan=False): - """Throw a ValueError if X contains NaN or infinity. - - Parameters - ---------- - X : array or sparse matrix - allow_nan : bool - """ - _assert_all_finite(X.data if sp.issparse(X) else X, allow_nan) - - -def _unique_multiclass(y): - if hasattr(y, "__array__"): - return np.unique(np.asarray(y)) - else: - return set(y) - - -def _unique_indicator(y): - """ - Not implemented - """ - raise IndexError( - f"""Given labels are of size {y.shape} while they should be (n_samples,) \n""" - + """If attempting multilabel classification, try using TabNetMultiTaskClassification """ - + """or TabNetRegressor""" - ) - - -_FN_UNIQUE_LABELS = { - "binary": _unique_multiclass, - "multiclass": _unique_multiclass, - "multilabel-indicator": _unique_indicator, -} - - -def unique_labels(*ys): - """Extract an ordered array of unique labels - - We don't allow: - - mix of multilabel and multiclass (single label) targets - - mix of label indicator matrix and anything else, - because there are no explicit labels) - - mix of label indicator matrices of different sizes - - mix of string and integer labels - - At the moment, we also don't allow "multiclass-multioutput" input type. - - Parameters - ---------- - *ys : array-likes - - Returns - ------- - out : numpy array of shape [n_unique_labels] - An ordered array of unique labels. - - Examples - -------- - >>> from sklearn.utils.multiclass import unique_labels - >>> unique_labels([3, 5, 5, 5, 7, 7]) - array([3, 5, 7]) - >>> unique_labels([1, 2, 3, 4], [2, 2, 3, 4]) - array([1, 2, 3, 4]) - >>> unique_labels([1, 2, 10], [5, 11]) - array([ 1, 2, 5, 10, 11]) - """ - if not ys: - raise ValueError("No argument has been passed.") - # Check that we don't mix label format - - ys_types = set(type_of_target(x) for x in ys) - if ys_types == {"binary", "multiclass"}: - ys_types = {"multiclass"} - - if len(ys_types) > 1: - raise ValueError("Mix type of y not allowed, got types %s" % ys_types) - - label_type = ys_types.pop() - - # Get the unique set of labels - _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None) - if not _unique_labels: - raise ValueError("Unknown label type: %s" % repr(ys)) - - ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys)) - - # Check that we don't mix string type with number type - if len(set(isinstance(label, str) for label in ys_labels)) > 1: - raise ValueError("Mix of label input types (string and number)") - - return np.array(sorted(ys_labels)) - - -def _is_integral_float(y): - return y.dtype.kind == "f" and np.all(y.astype(int) == y) - - -def is_multilabel(y): - """Check if ``y`` is in a multilabel format. - - Parameters - ---------- - y : numpy array of shape [n_samples] - Target values. - - Returns - ------- - out : bool - Return ``True``, if ``y`` is in a multilabel format, else ```False``. - - Examples - -------- - >>> import numpy as np - >>> from sklearn.utils.multiclass import is_multilabel - >>> is_multilabel([0, 1, 0, 1]) - False - >>> is_multilabel([[1], [0, 2], []]) - False - >>> is_multilabel(np.array([[1, 0], [0, 0]])) - True - >>> is_multilabel(np.array([[1], [0], [0]])) - False - >>> is_multilabel(np.array([[1, 0, 0]])) - True - """ - if hasattr(y, "__array__"): - y = np.asarray(y) - if not (hasattr(y, "shape") and y.ndim == 2 and y.shape[1] > 1): - return False - - if issparse(y): - if isinstance(y, (dok_matrix, lil_matrix)): - y = y.tocsr() - return ( - len(y.data) == 0 - or np.unique(y.data).size == 1 - and (y.dtype.kind in "biu" or _is_integral_float(np.unique(y.data))) # bool, int, uint - ) - else: - labels = np.unique(y) - - return len(labels) < 3 and (y.dtype.kind in "biu" or _is_integral_float(labels)) # bool, int, uint - - -def check_classification_targets(y): - """Ensure that target y is of a non-regression type. - - Only the following target types (as defined in type_of_target) are allowed: - 'binary', 'multiclass', 'multiclass-multioutput', - 'multilabel-indicator', 'multilabel-sequences' - - Parameters - ---------- - y : array-like - """ - y_type = type_of_target(y) - if y_type not in [ - "binary", - "multiclass", - "multiclass-multioutput", - "multilabel-indicator", - "multilabel-sequences", - ]: - raise ValueError("Unknown label type: %r" % y_type) - - -def type_of_target(y): - """Determine the type of data indicated by the target. - - Note that this type is the most specific type that can be inferred. - For example: - - * ``binary`` is more specific but compatible with ``multiclass``. - * ``multiclass`` of integers is more specific but compatible with - ``continuous``. - * ``multilabel-indicator`` is more specific but compatible with - ``multiclass-multioutput``. - - Parameters - ---------- - y : array-like - - Returns - ------- - target_type : string - One of: - - * 'continuous': `y` is an array-like of floats that are not all - integers, and is 1d or a column vector. - * 'continuous-multioutput': `y` is a 2d array of floats that are - not all integers, and both dimensions are of size > 1. - * 'binary': `y` contains <= 2 discrete values and is 1d or a column - vector. - * 'multiclass': `y` contains more than two discrete values, is not a - sequence of sequences, and is 1d or a column vector. - * 'multiclass-multioutput': `y` is a 2d array that contains more - than two discrete values, is not a sequence of sequences, and both - dimensions are of size > 1. - * 'multilabel-indicator': `y` is a label indicator matrix, an array - of two dimensions with at least two columns, and at most 2 unique - values. - * 'unknown': `y` is array-like but none of the above, such as a 3d - array, sequence of sequences, or an array of non-sequence objects. - - Examples - -------- - >>> import numpy as np - >>> type_of_target([0.1, 0.6]) - 'continuous' - >>> type_of_target([1, -1, -1, 1]) - 'binary' - >>> type_of_target(['a', 'b', 'a']) - 'binary' - >>> type_of_target([1.0, 2.0]) - 'binary' - >>> type_of_target([1, 0, 2]) - 'multiclass' - >>> type_of_target([1.0, 0.0, 3.0]) - 'multiclass' - >>> type_of_target(['a', 'b', 'c']) - 'multiclass' - >>> type_of_target(np.array([[1, 2], [3, 1]])) - 'multiclass-multioutput' - >>> type_of_target([[1, 2]]) - 'multiclass-multioutput' - >>> type_of_target(np.array([[1.5, 2.0], [3.0, 1.6]])) - 'continuous-multioutput' - >>> type_of_target(np.array([[0, 1], [1, 1]])) - 'multilabel-indicator' - """ - valid = (isinstance(y, (Sequence, spmatrix)) or hasattr(y, "__array__")) and not isinstance(y, str) - - if not valid: - raise ValueError("Expected array-like (array or non-string sequence), " "got %r" % y) - - sparseseries = y.__class__.__name__ == "SparseSeries" - if sparseseries: - raise ValueError("y cannot be class 'SparseSeries'.") - - if is_multilabel(y): - return "multilabel-indicator" - - try: - y = np.asarray(y) - except ValueError: - # Known to fail in numpy 1.3 for array of arrays - return "unknown" - - # The old sequence of sequences format - try: - if not hasattr(y[0], "__array__") and isinstance(y[0], Sequence) and not isinstance(y[0], str): - raise ValueError( - "You appear to be using a legacy multi-label data" - " representation. Sequence of sequences are no" - " longer supported; use a binary array or sparse" - " matrix instead - the MultiLabelBinarizer" - " transformer can convert to this format." - ) - except IndexError: - pass - - # Invalid inputs - if y.ndim > 2 or (y.dtype == object and len(y) and not isinstance(y.flat[0], str)): - return "unknown" # [[[1, 2]]] or [obj_1] and not ["label_1"] - - if y.ndim == 2 and y.shape[1] == 0: - return "unknown" # [[]] - - if y.ndim == 2 and y.shape[1] > 1: - suffix = "-multioutput" # [[1, 2], [1, 2]] - else: - suffix = "" # [1, 2, 3] or [[1], [2], [3]] - - # check float and contains non-integer float values - if y.dtype.kind == "f" and np.any(y != y.astype(int)): - # [.1, .2, 3] or [[.1, .2, 3]] or [[1., .2]] and not [1., 2., 3.] - _assert_all_finite(y) - return "continuous" + suffix - - if (len(np.unique(y)) > 2) or (y.ndim >= 2 and len(y[0]) > 1): - return "multiclass" + suffix # [1, 2, 3] or [[1., 2., 3]] or [[1, 2]] - else: - return "binary" # [1, 2] or [["a"], ["b"]] - - -def check_unique_type(y): - target_types = pd.Series(y).map(type).unique() - if len(target_types) != 1: - raise TypeError(f"Values on the target must have the same type. Target has types {target_types}") - - -def infer_output_dim(y_train): - """ - Infer output_dim from targets - - Parameters - ---------- - y_train : np.array - Training targets - - Returns - ------- - output_dim : int - Number of classes for output - train_labels : list - Sorted list of initial classes - """ - check_unique_type(y_train) - train_labels = unique_labels(y_train) - output_dim = len(train_labels) - - return output_dim, train_labels - - -def check_output_dim(labels, y): - if y is not None: - check_unique_type(y) - valid_labels = unique_labels(y) - if not set(valid_labels).issubset(set(labels)): - raise ValueError( - f"""Valid set -- {set(valid_labels)} -- - contains unkown targets from training -- - {set(labels)}""" - ) - return - - -def infer_multitask_output(y_train): - """ - Infer output_dim from targets - This is for multiple tasks. - - Parameters - ---------- - y_train : np.ndarray - Training targets - - Returns - ------- - tasks_dims : list - Number of classes for output - tasks_labels : list - List of sorted list of initial classes - """ - - if len(y_train.shape) < 2: - raise ValueError("y_train should be of shape (n_examples, n_tasks)" + f"but got {y_train.shape}") - nb_tasks = y_train.shape[1] - tasks_dims = [] - tasks_labels = [] - for task_idx in range(nb_tasks): - try: - output_dim, train_labels = infer_output_dim(y_train[:, task_idx]) - tasks_dims.append(output_dim) - tasks_labels.append(train_labels) - except ValueError as err: - raise ValueError(f"""Error for task {task_idx} : {err}""") - return tasks_dims, tasks_labels diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/multitask.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/multitask.py deleted file mode 100644 index 309c0e39..00000000 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/multitask.py +++ /dev/null @@ -1,167 +0,0 @@ -import torch -import numpy as np -from scipy.special import softmax -from pytorch_tabnet.utils import SparsePredictDataset, PredictDataset, filter_weights -from pytorch_tabnet.abstract_model import TabModel -from pytorch_tabnet.multiclass_utils import infer_multitask_output, check_output_dim -from torch.utils.data import DataLoader -import scipy - - -class TabNetMultiTaskClassifier(TabModel): - def __post_init__(self): - super(TabNetMultiTaskClassifier, self).__post_init__() - self._task = "classification" - self._default_loss = torch.nn.functional.cross_entropy - self._default_metric = "logloss" - - def prepare_target(self, y): - y_mapped = y.copy() - for task_idx in range(y.shape[1]): - task_mapper = self.target_mapper[task_idx] - y_mapped[:, task_idx] = np.vectorize(task_mapper.get)(y[:, task_idx]) - return y_mapped - - def compute_loss(self, y_pred, y_true): - """ - Computes the loss according to network output and targets - - Parameters - ---------- - y_pred : list of tensors - Output of network - y_true : LongTensor - Targets label encoded - - Returns - ------- - loss : torch.Tensor - output of loss function(s) - - """ - loss = 0 - y_true = y_true.long() - if isinstance(self.loss_fn, list): - # if you specify a different loss for each task - for task_loss, task_output, task_id in zip(self.loss_fn, y_pred, range(len(self.loss_fn))): - loss += task_loss(task_output, y_true[:, task_id]) - else: - # same loss function is applied to all tasks - for task_id, task_output in enumerate(y_pred): - loss += self.loss_fn(task_output, y_true[:, task_id]) - - loss /= len(y_pred) - return loss - - def stack_batches(self, list_y_true, list_y_score): - y_true = np.vstack(list_y_true) - y_score = [] - for i in range(len(self.output_dim)): - score = np.vstack([x[i] for x in list_y_score]) - score = softmax(score, axis=1) - y_score.append(score) - return y_true, y_score - - def update_fit_params(self, X_train, y_train, eval_set, weights): - output_dim, train_labels = infer_multitask_output(y_train) - for _, y in eval_set: - for task_idx in range(y.shape[1]): - check_output_dim(train_labels[task_idx], y[:, task_idx]) - self.output_dim = output_dim - self.classes_ = train_labels - self.target_mapper = [ - {class_label: index for index, class_label in enumerate(classes)} for classes in self.classes_ - ] - self.preds_mapper = [ - {str(index): str(class_label) for index, class_label in enumerate(classes)} for classes in self.classes_ - ] - self.updated_weights = weights - filter_weights(self.updated_weights) - - def predict(self, X): - """ - Make predictions on a batch (valid) - - Parameters - ---------- - X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` - Input data - - Returns - ------- - results : np.array - Predictions of the most probable class - """ - self.network.eval() - - if scipy.sparse.issparse(X): - dataloader = DataLoader( - SparsePredictDataset(X), - batch_size=self.batch_size, - shuffle=False, - ) - else: - dataloader = DataLoader( - PredictDataset(X), - batch_size=self.batch_size, - shuffle=False, - ) - - results = {} - for data in dataloader: - data = data.to(self.device).float() - output, _ = self.network(data) - predictions = [ - torch.argmax(torch.nn.Softmax(dim=1)(task_output), dim=1).cpu().detach().numpy().reshape(-1) - for task_output in output - ] - - for task_idx in range(len(self.output_dim)): - results[task_idx] = results.get(task_idx, []) + [predictions[task_idx]] - # stack all task individually - results = [np.hstack(task_res) for task_res in results.values()] - # map all task individually - results = [ - np.vectorize(self.preds_mapper[task_idx].get)(task_res.astype(str)) - for task_idx, task_res in enumerate(results) - ] - return results - - def predict_proba(self, X): - """ - Make predictions for classification on a batch (valid) - - Parameters - ---------- - X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` - Input data - - Returns - ------- - res : list of np.ndarray - - """ - self.network.eval() - - if scipy.sparse.issparse(X): - dataloader = DataLoader( - SparsePredictDataset(X), - batch_size=self.batch_size, - shuffle=False, - ) - else: - dataloader = DataLoader( - PredictDataset(X), - batch_size=self.batch_size, - shuffle=False, - ) - - results = {} - for data in dataloader: - data = data.to(self.device).float() - output, _ = self.network(data) - predictions = [torch.nn.Softmax(dim=1)(task_output).cpu().detach().numpy() for task_output in output] - for task_idx in range(len(self.output_dim)): - results[task_idx] = results.get(task_idx, []) + [predictions[task_idx]] - res = [np.vstack(task_res) for task_res in results.values()] - return res diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining.py deleted file mode 100644 index 9044d497..00000000 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining.py +++ /dev/null @@ -1,418 +0,0 @@ -import torch -import numpy as np -from torch.utils.data import DataLoader -from pytorch_tabnet import tab_network -from pytorch_tabnet.utils import ( - create_explain_matrix, - filter_weights, - SparsePredictDataset, - PredictDataset, - check_input, - create_group_matrix, -) -from torch.nn.utils import clip_grad_norm_ -from pytorch_tabnet.pretraining_utils import ( - create_dataloaders, - validate_eval_set, -) -from pytorch_tabnet.metrics import ( - UnsupMetricContainer, - check_metrics, - UnsupervisedLoss, -) -from pytorch_tabnet.abstract_model import TabModel -import scipy - - -class TabNetPretrainer(TabModel): - def __post_init__(self): - super(TabNetPretrainer, self).__post_init__() - self._task = "unsupervised" - self._default_loss = UnsupervisedLoss - self._default_metric = "unsup_loss_numpy" - - def prepare_target(self, y): - return y - - def compute_loss(self, output, embedded_x, obf_vars): - return self.loss_fn(output, embedded_x, obf_vars) - - def update_fit_params( - self, - weights, - ): - self.updated_weights = weights - filter_weights(self.updated_weights) - self.preds_mapper = None - - def fit( - self, - X_train, - eval_set=None, - eval_name=None, - loss_fn=None, - pretraining_ratio=0.5, - weights=0, - max_epochs=100, - patience=10, - batch_size=1024, - virtual_batch_size=128, - num_workers=0, - drop_last=True, - callbacks=None, - pin_memory=True, - warm_start=False, - ): - """Train a neural network stored in self.network - Using train_dataloader for training data and - valid_dataloader for validation. - - Parameters - ---------- - X_train : np.ndarray - Train set to reconstruct in self supervision - eval_set : list of np.array - List of evaluation set - The last one is used for early stopping - eval_name : list of str - List of eval set names. - eval_metric : list of str - List of evaluation metrics. - The last metric is used for early stopping. - loss_fn : callable or None - a PyTorch loss function - should be left to None for self supervised and non experts - pretraining_ratio : float - Between 0 and 1, percentage of feature to mask for reconstruction - weights : np.array - Sampling weights for each example. - max_epochs : int - Maximum number of epochs during training - patience : int - Number of consecutive non improving epoch before early stopping - batch_size : int - Training batch size - virtual_batch_size : int - Batch size for Ghost Batch Normalization (virtual_batch_size < batch_size) - num_workers : int - Number of workers used in torch.utils.data.DataLoader - drop_last : bool - Whether to drop last batch during training - callbacks : list of callback function - List of custom callbacks - pin_memory: bool - Whether to set pin_memory to True or False during training - """ - # update model name - - self.max_epochs = max_epochs - self.patience = patience - self.batch_size = batch_size - self.virtual_batch_size = virtual_batch_size - self.num_workers = num_workers - self.drop_last = drop_last - self.input_dim = X_train.shape[1] - self._stop_training = False - self.pin_memory = pin_memory and (self.device.type != "cpu") - self.pretraining_ratio = pretraining_ratio - eval_set = eval_set if eval_set else [] - - if loss_fn is None: - self.loss_fn = self._default_loss - else: - self.loss_fn = loss_fn - - check_input(X_train) - - self.update_fit_params( - weights, - ) - - # Validate and reformat eval set depending on training data - eval_names = validate_eval_set(eval_set, eval_name, X_train) - train_dataloader, valid_dataloaders = self._construct_loaders(X_train, eval_set) - - if not hasattr(self, "network") or not warm_start: - # model has never been fitted before of warm_start is False - self._set_network() - - self._update_network_params() - self._set_metrics(eval_names) - self._set_optimizer() - self._set_callbacks(callbacks) - - # Call method on_train_begin for all callbacks - self._callback_container.on_train_begin() - - # Training loop over epochs - for epoch_idx in range(self.max_epochs): - - # Call method on_epoch_begin for all callbacks - self._callback_container.on_epoch_begin(epoch_idx) - - self._train_epoch(train_dataloader) - - # Apply predict epoch to all eval sets - for eval_name, valid_dataloader in zip(eval_names, valid_dataloaders): - self._predict_epoch(eval_name, valid_dataloader) - - # Call method on_epoch_end for all callbacks - self._callback_container.on_epoch_end(epoch_idx, logs=self.history.epoch_metrics) - - if self._stop_training: - break - - # Call method on_train_end for all callbacks - self._callback_container.on_train_end() - self.network.eval() - - def _set_network(self): - """Setup the network and explain matrix.""" - if not hasattr(self, "pretraining_ratio"): - self.pretraining_ratio = 0.5 - torch.manual_seed(self.seed) - - self.group_matrix = create_group_matrix(self.grouped_features, self.input_dim) - - self.network = tab_network.TabNetPretraining( - self.input_dim, - pretraining_ratio=self.pretraining_ratio, - n_d=self.n_d, - n_a=self.n_a, - n_steps=self.n_steps, - gamma=self.gamma, - cat_idxs=self.cat_idxs, - cat_dims=self.cat_dims, - cat_emb_dim=self.cat_emb_dim, - n_independent=self.n_independent, - n_shared=self.n_shared, - n_shared_decoder=self.n_shared_decoder, - n_indep_decoder=self.n_indep_decoder, - epsilon=self.epsilon, - virtual_batch_size=self.virtual_batch_size, - momentum=self.momentum, - mask_type=self.mask_type, - group_attention_matrix=self.group_matrix.to(self.device), - ).to(self.device) - - self.reducing_matrix = create_explain_matrix( - self.network.input_dim, - self.network.cat_emb_dim, - self.network.cat_idxs, - self.network.post_embed_dim, - ) - - def _update_network_params(self): - self.network.virtual_batch_size = self.virtual_batch_size - self.network.pretraining_ratio = self.pretraining_ratio - - def _set_metrics(self, eval_names): - """Set attributes relative to the metrics. - - Parameters - ---------- - metrics : list of str - List of eval metric names. - eval_names : list of str - List of eval set names. - - """ - metrics = [self._default_metric] - - metrics = check_metrics(metrics) - # Set metric container for each sets - self._metric_container_dict = {} - for name in eval_names: - self._metric_container_dict.update({name: UnsupMetricContainer(metrics, prefix=f"{name}_")}) - - self._metrics = [] - self._metrics_names = [] - for _, metric_container in self._metric_container_dict.items(): - self._metrics.extend(metric_container.metrics) - self._metrics_names.extend(metric_container.names) - - # Early stopping metric is the last eval metric - self.early_stopping_metric = self._metrics_names[-1] if len(self._metrics_names) > 0 else None - - def _construct_loaders(self, X_train, eval_set): - """Generate dataloaders for unsupervised train and eval set. - - Parameters - ---------- - X_train : np.array - Train set. - eval_set : list of tuple - List of eval tuple set (X, y). - - Returns - ------- - train_dataloader : `torch.utils.data.Dataloader` - Training dataloader. - valid_dataloaders : list of `torch.utils.data.Dataloader` - List of validation dataloaders. - - """ - train_dataloader, valid_dataloaders = create_dataloaders( - X_train, - eval_set, - self.updated_weights, - self.batch_size, - self.num_workers, - self.drop_last, - self.pin_memory, - ) - return train_dataloader, valid_dataloaders - - def _train_epoch(self, train_loader): - """ - Trains one epoch of the network in self.network - - Parameters - ---------- - train_loader : a :class: `torch.utils.data.Dataloader` - DataLoader with train set - """ - self.network.train() - - for batch_idx, X in enumerate(train_loader): - self._callback_container.on_batch_begin(batch_idx) - - batch_logs = self._train_batch(X) - - self._callback_container.on_batch_end(batch_idx, batch_logs) - - epoch_logs = {"lr": self._optimizer.param_groups[-1]["lr"]} - self.history.epoch_metrics.update(epoch_logs) - - return - - def _train_batch(self, X): - """ - Trains one batch of data - - Parameters - ---------- - X : torch.Tensor - Train matrix - - Returns - ------- - batch_outs : dict - Dictionnary with "y": target and "score": prediction scores. - batch_logs : dict - Dictionnary with "batch_size" and "loss". - """ - batch_logs = {"batch_size": X.shape[0]} - - X = X.to(self.device).float() - - for param in self.network.parameters(): - param.grad = None - - output, embedded_x, obf_vars = self.network(X) - loss = self.compute_loss(output, embedded_x, obf_vars) - - # Perform backward pass and optimization - loss.backward() - if self.clip_value: - clip_grad_norm_(self.network.parameters(), self.clip_value) - self._optimizer.step() - - batch_logs["loss"] = loss.cpu().detach().numpy().item() - - return batch_logs - - def _predict_epoch(self, name, loader): - """ - Predict an epoch and update metrics. - - Parameters - ---------- - name : str - Name of the validation set - loader : torch.utils.data.Dataloader - DataLoader with validation set - """ - # Setting network on evaluation mode - self.network.eval() - - list_output = [] - list_embedded_x = [] - list_obfuscation = [] - # Main loop - for batch_idx, X in enumerate(loader): - output, embedded_x, obf_vars = self._predict_batch(X) - list_output.append(output.cpu().detach().numpy()) - list_embedded_x.append(embedded_x.cpu().detach().numpy()) - list_obfuscation.append(obf_vars.cpu().detach().numpy()) - - output, embedded_x, obf_vars = self.stack_batches(list_output, list_embedded_x, list_obfuscation) - - metrics_logs = self._metric_container_dict[name](output, embedded_x, obf_vars) - self.network.train() - self.history.epoch_metrics.update(metrics_logs) - return - - def _predict_batch(self, X): - """ - Predict one batch of data. - - Parameters - ---------- - X : torch.Tensor - Owned products - - Returns - ------- - np.array - model scores - """ - X = X.to(self.device).float() - return self.network(X) - - def stack_batches(self, list_output, list_embedded_x, list_obfuscation): - output = np.vstack(list_output) - embedded_x = np.vstack(list_embedded_x) - obf_vars = np.vstack(list_obfuscation) - return output, embedded_x, obf_vars - - def predict(self, X): - """ - Make predictions on a batch (valid) - - Parameters - ---------- - X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` - Input data - - Returns - ------- - predictions : np.array - Predictions of the regression problem - """ - self.network.eval() - - if scipy.sparse.issparse(X): - dataloader = DataLoader( - SparsePredictDataset(X), - batch_size=self.batch_size, - shuffle=False, - ) - else: - dataloader = DataLoader( - PredictDataset(X), - batch_size=self.batch_size, - shuffle=False, - ) - - results = [] - embedded_res = [] - for batch_nb, data in enumerate(dataloader): - data = data.to(self.device).float() - output, embeded_x, _ = self.network(data) - predictions = output.cpu().detach().numpy() - results.append(predictions) - embedded_res.append(embeded_x.cpu().detach().numpy()) - res_output = np.vstack(results) - embedded_inputs = np.vstack(embedded_res) - return res_output, embedded_inputs diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining_utils.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining_utils.py deleted file mode 100644 index d35e34f2..00000000 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/pretraining_utils.py +++ /dev/null @@ -1,119 +0,0 @@ -from torch.utils.data import DataLoader -from pytorch_tabnet.utils import create_sampler, SparsePredictDataset, PredictDataset, check_input -import scipy - - -def create_dataloaders(X_train, eval_set, weights, batch_size, num_workers, drop_last, pin_memory): - """ - Create dataloaders with or without subsampling depending on weights and balanced. - - Parameters - ---------- - X_train : np.ndarray or scipy.sparse.csr_matrix - Training data - eval_set : list of np.array (for Xs and ys) or scipy.sparse.csr_matrix (for Xs) - List of eval sets - weights : either 0, 1, dict or iterable - if 0 (default) : no weights will be applied - if 1 : classification only, will balanced class with inverse frequency - if dict : keys are corresponding class values are sample weights - if iterable : list or np array must be of length equal to nb elements - in the training set - batch_size : int - how many samples per batch to load - num_workers : int - how many subprocesses to use for data loading. 0 means that the data - will be loaded in the main process - drop_last : bool - set to True to drop the last incomplete batch, if the dataset size is not - divisible by the batch size. If False and the size of dataset is not - divisible by the batch size, then the last batch will be smaller - pin_memory : bool - Whether to pin GPU memory during training - - Returns - ------- - train_dataloader, valid_dataloader : torch.DataLoader, torch.DataLoader - Training and validation dataloaders - """ - need_shuffle, sampler = create_sampler(weights, X_train) - - if scipy.sparse.issparse(X_train): - train_dataloader = DataLoader( - SparsePredictDataset(X_train), - batch_size=batch_size, - sampler=sampler, - shuffle=need_shuffle, - num_workers=num_workers, - drop_last=drop_last, - pin_memory=pin_memory, - ) - else: - train_dataloader = DataLoader( - PredictDataset(X_train), - batch_size=batch_size, - sampler=sampler, - shuffle=need_shuffle, - num_workers=num_workers, - drop_last=drop_last, - pin_memory=pin_memory, - ) - - valid_dataloaders = [] - for X in eval_set: - if scipy.sparse.issparse(X): - valid_dataloaders.append( - DataLoader( - SparsePredictDataset(X), - batch_size=batch_size, - sampler=sampler, - shuffle=need_shuffle, - num_workers=num_workers, - drop_last=drop_last, - pin_memory=pin_memory, - ) - ) - else: - valid_dataloaders.append( - DataLoader( - PredictDataset(X), - batch_size=batch_size, - sampler=sampler, - shuffle=need_shuffle, - num_workers=num_workers, - drop_last=drop_last, - pin_memory=pin_memory, - ) - ) - - return train_dataloader, valid_dataloaders - - -def validate_eval_set(eval_set, eval_name, X_train): - """Check if the shapes of eval_set are compatible with X_train. - - Parameters - ---------- - eval_set : List of numpy array - The list evaluation set. - The last one is used for early stopping - X_train : np.ndarray - Train owned products - - Returns - ------- - eval_names : list of str - Validated list of eval_names. - - """ - eval_names = eval_name or [f"val_{i}" for i in range(len(eval_set))] - assert len(eval_set) == len(eval_names), "eval_set and eval_name have not the same length" - - for set_nb, X in enumerate(eval_set): - check_input(X) - msg = ( - f"Number of columns is different between eval set {set_nb}" - + f"({X.shape[1]}) and X_train ({X_train.shape[1]})" - ) - assert X.shape[1] == X_train.shape[1], msg - return eval_names diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/sparsemax.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/sparsemax.py deleted file mode 100644 index 53a71792..00000000 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/sparsemax.py +++ /dev/null @@ -1,276 +0,0 @@ -from torch import nn -from torch.autograd import Function -import torch.nn.functional as F - -import torch - -""" -Other possible implementations: -https://github.com/KrisKorrel/sparsemax-pytorch/blob/master/sparsemax.py -https://github.com/msobroza/SparsemaxPytorch/blob/master/mnist/sparsemax.py -https://github.com/vene/sparse-structured-attention/blob/master/pytorch/torchsparseattn/sparsemax.py -""" - - -# credits to Yandex https://github.com/Qwicen/node/blob/master/lib/nn_utils.py -def _make_ix_like(input, dim=0): - d = input.size(dim) - rho = torch.arange(1, d + 1, device=input.device, dtype=input.dtype) - view = [1] * input.dim() - view[0] = -1 - return rho.view(view).transpose(0, dim) - - -class SparsemaxFunction(Function): - """ - An implementation of sparsemax (Martins & Astudillo, 2016). See - :cite:`DBLP:journals/corr/MartinsA16` for detailed description. - By Ben Peters and Vlad Niculae - """ - - @staticmethod - def forward(ctx, input, dim=-1): - """sparsemax: normalizing sparse transform (a la softmax) - - Parameters - ---------- - ctx : torch.autograd.function._ContextMethodMixin - input : torch.Tensor - any shape - dim : int - dimension along which to apply sparsemax - - Returns - ------- - output : torch.Tensor - same shape as input - - """ - ctx.dim = dim - max_val, _ = input.max(dim=dim, keepdim=True) - input -= max_val # same numerical stability trick as for softmax - tau, supp_size = SparsemaxFunction._threshold_and_support(input, dim=dim) - output = torch.clamp(input - tau, min=0) - ctx.save_for_backward(supp_size, output) - return output - - @staticmethod - def backward(ctx, grad_output): - supp_size, output = ctx.saved_tensors - dim = ctx.dim - grad_input = grad_output.clone() - grad_input[output == 0] = 0 - - v_hat = grad_input.sum(dim=dim) / supp_size.to(output.dtype).squeeze() - v_hat = v_hat.unsqueeze(dim) - grad_input = torch.where(output != 0, grad_input - v_hat, grad_input) - return grad_input, None - - @staticmethod - def _threshold_and_support(input, dim=-1): - """Sparsemax building block: compute the threshold - - Parameters - ---------- - input: torch.Tensor - any dimension - dim : int - dimension along which to apply the sparsemax - - Returns - ------- - tau : torch.Tensor - the threshold value - support_size : torch.Tensor - - """ - - input_srt, _ = torch.sort(input, descending=True, dim=dim) - input_cumsum = input_srt.cumsum(dim) - 1 - rhos = _make_ix_like(input, dim) - support = rhos * input_srt > input_cumsum - - support_size = support.sum(dim=dim).unsqueeze(dim) - tau = input_cumsum.gather(dim, support_size - 1) - tau /= support_size.to(input.dtype) - return tau, support_size - - -sparsemax = SparsemaxFunction.apply - - -class Sparsemax(nn.Module): - def __init__(self, dim=-1): - self.dim = dim - super(Sparsemax, self).__init__() - - def forward(self, input): - return sparsemax(input, self.dim) - - -class Entmax15Function(Function): - """ - An implementation of exact Entmax with alpha=1.5 (B. Peters, V. Niculae, A. Martins). See - :cite:`https://arxiv.org/abs/1905.05702 for detailed description. - Source: https://github.com/deep-spin/entmax - """ - - @staticmethod - def forward(ctx, input, dim=-1): - ctx.dim = dim - - max_val, _ = input.max(dim=dim, keepdim=True) - input = input - max_val # same numerical stability trick as for softmax - input = input / 2 # divide by 2 to solve actual Entmax - - tau_star, _ = Entmax15Function._threshold_and_support(input, dim) - output = torch.clamp(input - tau_star, min=0) ** 2 - ctx.save_for_backward(output) - return output - - @staticmethod - def backward(ctx, grad_output): - (Y,) = ctx.saved_tensors - gppr = Y.sqrt() # = 1 / g'' (Y) - dX = grad_output * gppr - q = dX.sum(ctx.dim) / gppr.sum(ctx.dim) - q = q.unsqueeze(ctx.dim) - dX -= q * gppr - return dX, None - - @staticmethod - def _threshold_and_support(input, dim=-1): - Xsrt, _ = torch.sort(input, descending=True, dim=dim) - - rho = _make_ix_like(input, dim) - mean = Xsrt.cumsum(dim) / rho - mean_sq = (Xsrt ** 2).cumsum(dim) / rho - ss = rho * (mean_sq - mean ** 2) - delta = (1 - ss) / rho - - # NOTE this is not exactly the same as in reference algo - # Fortunately it seems the clamped values never wrongly - # get selected by tau <= sorted_z. Prove this! - delta_nz = torch.clamp(delta, 0) - tau = mean - torch.sqrt(delta_nz) - - support_size = (tau <= Xsrt).sum(dim).unsqueeze(dim) - tau_star = tau.gather(dim, support_size - 1) - return tau_star, support_size - - -class Entmoid15(Function): - """ A highly optimized equivalent of lambda x: Entmax15([x, 0]) """ - - @staticmethod - def forward(ctx, input): - output = Entmoid15._forward(input) - ctx.save_for_backward(output) - return output - - @staticmethod - def _forward(input): - input, is_pos = abs(input), input >= 0 - tau = (input + torch.sqrt(F.relu(8 - input ** 2))) / 2 - tau.masked_fill_(tau <= input, 2.0) - y_neg = 0.25 * F.relu(tau - input, inplace=True) ** 2 - return torch.where(is_pos, 1 - y_neg, y_neg) - - @staticmethod - def backward(ctx, grad_output): - return Entmoid15._backward(ctx.saved_tensors[0], grad_output) - - @staticmethod - def _backward(output, grad_output): - gppr0, gppr1 = output.sqrt(), (1 - output).sqrt() - grad_input = grad_output * gppr0 - q = grad_input / (gppr0 + gppr1) - grad_input -= q * gppr0 - return grad_input - - -entmax15 = Entmax15Function.apply -entmoid15 = Entmoid15.apply - - -class Entmax15(nn.Module): - def __init__(self, dim=-1): - self.dim = dim - super(Entmax15, self).__init__() - - def forward(self, input): - return entmax15(input, self.dim) - - -# Credits were lost... -# def _make_ix_like(input, dim=0): -# d = input.size(dim) -# rho = torch.arange(1, d + 1, device=input.device, dtype=input.dtype) -# view = [1] * input.dim() -# view[0] = -1 -# return rho.view(view).transpose(0, dim) -# -# -# def _threshold_and_support(input, dim=0): -# """Sparsemax building block: compute the threshold -# Args: -# input: any dimension -# dim: dimension along which to apply the sparsemax -# Returns: -# the threshold value -# """ -# -# input_srt, _ = torch.sort(input, descending=True, dim=dim) -# input_cumsum = input_srt.cumsum(dim) - 1 -# rhos = _make_ix_like(input, dim) -# support = rhos * input_srt > input_cumsum -# -# support_size = support.sum(dim=dim).unsqueeze(dim) -# tau = input_cumsum.gather(dim, support_size - 1) -# tau /= support_size.to(input.dtype) -# return tau, support_size -# -# -# class SparsemaxFunction(Function): -# -# @staticmethod -# def forward(ctx, input, dim=0): -# """sparsemax: normalizing sparse transform (a la softmax) -# Parameters: -# input (Tensor): any shape -# dim: dimension along which to apply sparsemax -# Returns: -# output (Tensor): same shape as input -# """ -# ctx.dim = dim -# max_val, _ = input.max(dim=dim, keepdim=True) -# input -= max_val # same numerical stability trick as for softmax -# tau, supp_size = _threshold_and_support(input, dim=dim) -# output = torch.clamp(input - tau, min=0) -# ctx.save_for_backward(supp_size, output) -# return output -# -# @staticmethod -# def backward(ctx, grad_output): -# supp_size, output = ctx.saved_tensors -# dim = ctx.dim -# grad_input = grad_output.clone() -# grad_input[output == 0] = 0 -# -# v_hat = grad_input.sum(dim=dim) / supp_size.to(output.dtype).squeeze() -# v_hat = v_hat.unsqueeze(dim) -# grad_input = torch.where(output != 0, grad_input - v_hat, grad_input) -# return grad_input, None -# -# -# sparsemax = SparsemaxFunction.apply -# -# -# class Sparsemax(nn.Module): -# -# def __init__(self, dim=0): -# self.dim = dim -# super(Sparsemax, self).__init__() -# -# def forward(self, input): -# return sparsemax(input, self.dim) diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_model.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_model.py deleted file mode 100755 index 32115c8c..00000000 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_model.py +++ /dev/null @@ -1,146 +0,0 @@ -import torch -import numpy as np -from scipy.special import softmax -from pytorch_tabnet.utils import SparsePredictDataset, PredictDataset, filter_weights -from pytorch_tabnet.abstract_model import TabModel -from pytorch_tabnet.multiclass_utils import infer_output_dim, check_output_dim -from torch.utils.data import DataLoader -import scipy - - -class TabNetClassifier(TabModel): - def __post_init__(self): - super(TabNetClassifier, self).__post_init__() - self._task = "classification" - self._default_loss = torch.nn.functional.cross_entropy - self._default_metric = "accuracy" - - def weight_updater(self, weights): - """ - Updates weights dictionary according to target_mapper. - - Parameters - ---------- - weights : bool or dict - Given weights for balancing training. - - Returns - ------- - bool or dict - Same bool if weights are bool, updated dict otherwise. - - """ - if isinstance(weights, int): - return weights - elif isinstance(weights, dict): - return {self.target_mapper[key]: value for key, value in weights.items()} - else: - return weights - - def prepare_target(self, y): - return np.vectorize(self.target_mapper.get)(y) - - def compute_loss(self, y_pred, y_true): - return self.loss_fn(y_pred, y_true.long()) - - def update_fit_params( - self, - X_train, - y_train, - eval_set, - weights, - ): - output_dim, train_labels = infer_output_dim(y_train) - for X, y in eval_set: - check_output_dim(train_labels, y) - self.output_dim = output_dim - self._default_metric = "auc" if self.output_dim == 2 else "accuracy" - self.classes_ = train_labels - self.target_mapper = {class_label: index for index, class_label in enumerate(self.classes_)} - self.preds_mapper = {str(index): class_label for index, class_label in enumerate(self.classes_)} - self.updated_weights = self.weight_updater(weights) - - def stack_batches(self, list_y_true, list_y_score): - y_true = np.hstack(list_y_true) - y_score = np.vstack(list_y_score) - y_score = softmax(y_score, axis=1) - return y_true, y_score - - def predict_func(self, outputs): - outputs = np.argmax(outputs, axis=1) - return np.vectorize(self.preds_mapper.get)(outputs.astype(str)) - - def predict_proba(self, X): - """ - Make predictions for classification on a batch (valid) - - Parameters - ---------- - X : a :tensor: `torch.Tensor` or matrix: `scipy.sparse.csr_matrix` - Input data - - Returns - ------- - res : np.ndarray - - """ - self.network.eval() - - if scipy.sparse.issparse(X): - dataloader = DataLoader( - SparsePredictDataset(X), - batch_size=self.batch_size, - shuffle=False, - ) - else: - dataloader = DataLoader( - PredictDataset(X), - batch_size=self.batch_size, - shuffle=False, - ) - - results = [] - for batch_nb, data in enumerate(dataloader): - data = data.to(self.device).float() - - output, M_loss = self.network(data) - predictions = torch.nn.Softmax(dim=1)(output).cpu().detach().numpy() - results.append(predictions) - res = np.vstack(results) - return res - - -class TabNetRegressor(TabModel): - def __post_init__(self): - super(TabNetRegressor, self).__post_init__() - self._task = "regression" - self._default_loss = torch.nn.functional.mse_loss - self._default_metric = "mse" - - def prepare_target(self, y): - return y - - def compute_loss(self, y_pred, y_true): - return self.loss_fn(y_pred, y_true) - - def update_fit_params(self, X_train, y_train, eval_set, weights): - if len(y_train.shape) != 2: - msg = ( - "Targets should be 2D : (n_samples, n_regression) " - + f"but y_train.shape={y_train.shape} given.\n" - + "Use reshape(-1, 1) for single regression." - ) - raise ValueError(msg) - self.output_dim = y_train.shape[1] - self.preds_mapper = None - - self.updated_weights = weights - filter_weights(self.updated_weights) - - def predict_func(self, outputs): - return outputs - - def stack_batches(self, list_y_true, list_y_score): - y_true = np.vstack(list_y_true) - y_score = np.vstack(list_y_score) - return y_true, y_score diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_network.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_network.py deleted file mode 100644 index 4cc67f55..00000000 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/tab_network.py +++ /dev/null @@ -1,908 +0,0 @@ -import torch -from torch.nn import Linear, BatchNorm1d, ReLU -import numpy as np -from pytorch_tabnet import sparsemax - - -def initialize_non_glu(module, input_dim, output_dim): - gain_value = np.sqrt((input_dim + output_dim) / np.sqrt(4 * input_dim)) - torch.nn.init.xavier_normal_(module.weight, gain=gain_value) - # torch.nn.init.zeros_(module.bias) - return - - -def initialize_glu(module, input_dim, output_dim): - gain_value = np.sqrt((input_dim + output_dim) / np.sqrt(input_dim)) - torch.nn.init.xavier_normal_(module.weight, gain=gain_value) - # torch.nn.init.zeros_(module.bias) - return - - -class GBN(torch.nn.Module): - """ - Ghost Batch Normalization - https://arxiv.org/abs/1705.08741 - """ - - def __init__(self, input_dim, virtual_batch_size=128, momentum=0.01): - super(GBN, self).__init__() - - self.input_dim = input_dim - self.virtual_batch_size = virtual_batch_size - self.bn = BatchNorm1d(self.input_dim, momentum=momentum) - - def forward(self, x): - chunks = x.chunk(int(np.ceil(x.shape[0] / self.virtual_batch_size)), 0) - res = [self.bn(x_) for x_ in chunks] - - return torch.cat(res, dim=0) - - -class TabNetEncoder(torch.nn.Module): - def __init__( - self, - input_dim, - output_dim, - n_d=8, - n_a=8, - n_steps=3, - gamma=1.3, - n_independent=2, - n_shared=2, - epsilon=1e-15, - virtual_batch_size=128, - momentum=0.02, - mask_type="sparsemax", - group_attention_matrix=None, - ): - """ - Defines main part of the TabNet network without the embedding layers. - - Parameters - ---------- - input_dim : int - Number of features - output_dim : int or list of int for multi task classification - Dimension of network output - examples : one for regression, 2 for binary classification etc... - n_d : int - Dimension of the prediction layer (usually between 4 and 64) - n_a : int - Dimension of the attention layer (usually between 4 and 64) - n_steps : int - Number of successive steps in the network (usually between 3 and 10) - gamma : float - Float above 1, scaling factor for attention updates (usually between 1.0 to 2.0) - n_independent : int - Number of independent GLU layer in each GLU block (default 2) - n_shared : int - Number of independent GLU layer in each GLU block (default 2) - epsilon : float - Avoid log(0), this should be kept very low - virtual_batch_size : int - Batch size for Ghost Batch Normalization - momentum : float - Float value between 0 and 1 which will be used for momentum in all batch norm - mask_type : str - Either "sparsemax" or "entmax" : this is the masking function to use - group_attention_matrix : torch matrix - Matrix of size (n_groups, input_dim), m_ij = importance within group i of feature j - """ - super(TabNetEncoder, self).__init__() - self.input_dim = input_dim - self.output_dim = output_dim - self.is_multi_task = isinstance(output_dim, list) - self.n_d = n_d - self.n_a = n_a - self.n_steps = n_steps - self.gamma = gamma - self.epsilon = epsilon - self.n_independent = n_independent - self.n_shared = n_shared - self.virtual_batch_size = virtual_batch_size - self.mask_type = mask_type - self.initial_bn = BatchNorm1d(self.input_dim, momentum=0.01) - self.group_attention_matrix = group_attention_matrix - - if self.group_attention_matrix is None: - # no groups - self.group_attention_matrix = torch.eye(self.input_dim) - self.attention_dim = self.input_dim - else: - self.attention_dim = self.group_attention_matrix.shape[0] - - if self.n_shared > 0: - shared_feat_transform = torch.nn.ModuleList() - for i in range(self.n_shared): - if i == 0: - shared_feat_transform.append(Linear(self.input_dim, 2 * (n_d + n_a), bias=False)) - else: - shared_feat_transform.append(Linear(n_d + n_a, 2 * (n_d + n_a), bias=False)) - - else: - shared_feat_transform = None - - self.initial_splitter = FeatTransformer( - self.input_dim, - n_d + n_a, - shared_feat_transform, - n_glu_independent=self.n_independent, - virtual_batch_size=self.virtual_batch_size, - momentum=momentum, - ) - - self.feat_transformers = torch.nn.ModuleList() - self.att_transformers = torch.nn.ModuleList() - - for step in range(n_steps): - transformer = FeatTransformer( - self.input_dim, - n_d + n_a, - shared_feat_transform, - n_glu_independent=self.n_independent, - virtual_batch_size=self.virtual_batch_size, - momentum=momentum, - ) - attention = AttentiveTransformer( - n_a, - self.attention_dim, - group_matrix=group_attention_matrix, - virtual_batch_size=self.virtual_batch_size, - momentum=momentum, - mask_type=self.mask_type, - ) - self.feat_transformers.append(transformer) - self.att_transformers.append(attention) - - def forward(self, x, prior=None): - x = self.initial_bn(x) - - bs = x.shape[0] # batch size - if prior is None: - prior = torch.ones((bs, self.attention_dim)).to(x.device) - - M_loss = 0 - att = self.initial_splitter(x)[:, self.n_d :] - steps_output = [] - for step in range(self.n_steps): - M = self.att_transformers[step](prior, att) - M_loss += torch.mean(torch.sum(torch.mul(M, torch.log(M + self.epsilon)), dim=1)) - # update prior - prior = torch.mul(self.gamma - M, prior) - # output - M_feature_level = torch.matmul(M, self.group_attention_matrix) - masked_x = torch.mul(M_feature_level, x) - out = self.feat_transformers[step](masked_x) - d = ReLU()(out[:, : self.n_d]) - steps_output.append(d) - # update attention - att = out[:, self.n_d :] - - M_loss /= self.n_steps - return steps_output, M_loss - - def forward_masks(self, x): - x = self.initial_bn(x) - bs = x.shape[0] # batch size - prior = torch.ones((bs, self.attention_dim)).to(x.device) - M_explain = torch.zeros(x.shape).to(x.device) - att = self.initial_splitter(x)[:, self.n_d :] - masks = {} - - for step in range(self.n_steps): - M = self.att_transformers[step](prior, att) - M_feature_level = torch.matmul(M, self.group_attention_matrix) - masks[step] = M_feature_level - # update prior - prior = torch.mul(self.gamma - M, prior) - # output - masked_x = torch.mul(M_feature_level, x) - out = self.feat_transformers[step](masked_x) - d = ReLU()(out[:, : self.n_d]) - # explain - step_importance = torch.sum(d, dim=1) - M_explain += torch.mul(M_feature_level, step_importance.unsqueeze(dim=1)) - # update attention - att = out[:, self.n_d :] - - return M_explain, masks - - -class TabNetDecoder(torch.nn.Module): - def __init__( - self, - input_dim, - n_d=8, - n_steps=3, - n_independent=1, - n_shared=1, - virtual_batch_size=128, - momentum=0.02, - ): - """ - Defines main part of the TabNet network without the embedding layers. - - Parameters - ---------- - input_dim : int - Number of features - output_dim : int or list of int for multi task classification - Dimension of network output - examples : one for regression, 2 for binary classification etc... - n_d : int - Dimension of the prediction layer (usually between 4 and 64) - n_steps : int - Number of successive steps in the network (usually between 3 and 10) - gamma : float - Float above 1, scaling factor for attention updates (usually between 1.0 to 2.0) - n_independent : int - Number of independent GLU layer in each GLU block (default 1) - n_shared : int - Number of independent GLU layer in each GLU block (default 1) - virtual_batch_size : int - Batch size for Ghost Batch Normalization - momentum : float - Float value between 0 and 1 which will be used for momentum in all batch norm - """ - super(TabNetDecoder, self).__init__() - self.input_dim = input_dim - self.n_d = n_d - self.n_steps = n_steps - self.n_independent = n_independent - self.n_shared = n_shared - self.virtual_batch_size = virtual_batch_size - - self.feat_transformers = torch.nn.ModuleList() - - if self.n_shared > 0: - shared_feat_transform = torch.nn.ModuleList() - for i in range(self.n_shared): - shared_feat_transform.append(Linear(n_d, 2 * n_d, bias=False)) - else: - shared_feat_transform = None - - for step in range(n_steps): - transformer = FeatTransformer( - n_d, - n_d, - shared_feat_transform, - n_glu_independent=self.n_independent, - virtual_batch_size=self.virtual_batch_size, - momentum=momentum, - ) - self.feat_transformers.append(transformer) - - self.reconstruction_layer = Linear(n_d, self.input_dim, bias=False) - initialize_non_glu(self.reconstruction_layer, n_d, self.input_dim) - - def forward(self, steps_output): - res = 0 - for step_nb, step_output in enumerate(steps_output): - x = self.feat_transformers[step_nb](step_output) - res = torch.add(res, x) - res = self.reconstruction_layer(res) - return res - - -class TabNetPretraining(torch.nn.Module): - def __init__( - self, - input_dim, - pretraining_ratio=0.2, - n_d=8, - n_a=8, - n_steps=3, - gamma=1.3, - cat_idxs=[], - cat_dims=[], - cat_emb_dim=1, - n_independent=2, - n_shared=2, - epsilon=1e-15, - virtual_batch_size=128, - momentum=0.02, - mask_type="sparsemax", - n_shared_decoder=1, - n_indep_decoder=1, - group_attention_matrix=None, - ): - super(TabNetPretraining, self).__init__() - - self.cat_idxs = cat_idxs or [] - self.cat_dims = cat_dims or [] - self.cat_emb_dim = cat_emb_dim - - self.input_dim = input_dim - self.n_d = n_d - self.n_a = n_a - self.n_steps = n_steps - self.gamma = gamma - self.epsilon = epsilon - self.n_independent = n_independent - self.n_shared = n_shared - self.mask_type = mask_type - self.pretraining_ratio = pretraining_ratio - self.n_shared_decoder = n_shared_decoder - self.n_indep_decoder = n_indep_decoder - - if self.n_steps <= 0: - raise ValueError("n_steps should be a positive integer.") - if self.n_independent == 0 and self.n_shared == 0: - raise ValueError("n_shared and n_independent can't be both zero.") - - self.virtual_batch_size = virtual_batch_size - self.embedder = EmbeddingGenerator(input_dim, cat_dims, cat_idxs, cat_emb_dim, group_attention_matrix) - self.post_embed_dim = self.embedder.post_embed_dim - - self.masker = RandomObfuscator(self.pretraining_ratio, group_matrix=self.embedder.embedding_group_matrix) - self.encoder = TabNetEncoder( - input_dim=self.post_embed_dim, - output_dim=self.post_embed_dim, - n_d=n_d, - n_a=n_a, - n_steps=n_steps, - gamma=gamma, - n_independent=n_independent, - n_shared=n_shared, - epsilon=epsilon, - virtual_batch_size=virtual_batch_size, - momentum=momentum, - mask_type=mask_type, - group_attention_matrix=self.embedder.embedding_group_matrix, - ) - self.decoder = TabNetDecoder( - self.post_embed_dim, - n_d=n_d, - n_steps=n_steps, - n_independent=self.n_indep_decoder, - n_shared=self.n_shared_decoder, - virtual_batch_size=virtual_batch_size, - momentum=momentum, - ) - - def forward(self, x): - """ - Returns: res, embedded_x, obf_vars - res : output of reconstruction - embedded_x : embedded input - obf_vars : which variable where obfuscated - """ - embedded_x = self.embedder(x) - if self.training: - masked_x, obfuscated_groups, obfuscated_vars = self.masker(embedded_x) - # set prior of encoder with obfuscated groups - prior = 1 - obfuscated_groups - steps_out, _ = self.encoder(masked_x, prior=prior) - res = self.decoder(steps_out) - return res, embedded_x, obfuscated_vars - else: - steps_out, _ = self.encoder(embedded_x) - res = self.decoder(steps_out) - return res, embedded_x, torch.ones(embedded_x.shape).to(x.device) - - def forward_masks(self, x): - embedded_x = self.embedder(x) - return self.encoder.forward_masks(embedded_x) - - -class TabNetNoEmbeddings(torch.nn.Module): - def __init__( - self, - input_dim, - output_dim, - n_d=8, - n_a=8, - n_steps=3, - gamma=1.3, - n_independent=2, - n_shared=2, - epsilon=1e-15, - virtual_batch_size=128, - momentum=0.02, - mask_type="sparsemax", - group_attention_matrix=None, - ): - """ - Defines main part of the TabNet network without the embedding layers. - - Parameters - ---------- - input_dim : int - Number of features - output_dim : int or list of int for multi task classification - Dimension of network output - examples : one for regression, 2 for binary classification etc... - n_d : int - Dimension of the prediction layer (usually between 4 and 64) - n_a : int - Dimension of the attention layer (usually between 4 and 64) - n_steps : int - Number of successive steps in the network (usually between 3 and 10) - gamma : float - Float above 1, scaling factor for attention updates (usually between 1.0 to 2.0) - n_independent : int - Number of independent GLU layer in each GLU block (default 2) - n_shared : int - Number of independent GLU layer in each GLU block (default 2) - epsilon : float - Avoid log(0), this should be kept very low - virtual_batch_size : int - Batch size for Ghost Batch Normalization - momentum : float - Float value between 0 and 1 which will be used for momentum in all batch norm - mask_type : str - Either "sparsemax" or "entmax" : this is the masking function to use - group_attention_matrix : torch matrix - Matrix of size (n_groups, input_dim), m_ij = importance within group i of feature j - """ - super(TabNetNoEmbeddings, self).__init__() - self.input_dim = input_dim - self.output_dim = output_dim - self.is_multi_task = isinstance(output_dim, list) - self.n_d = n_d - self.n_a = n_a - self.n_steps = n_steps - self.gamma = gamma - self.epsilon = epsilon - self.n_independent = n_independent - self.n_shared = n_shared - self.virtual_batch_size = virtual_batch_size - self.mask_type = mask_type - self.initial_bn = BatchNorm1d(self.input_dim, momentum=0.01) - - self.encoder = TabNetEncoder( - input_dim=input_dim, - output_dim=output_dim, - n_d=n_d, - n_a=n_a, - n_steps=n_steps, - gamma=gamma, - n_independent=n_independent, - n_shared=n_shared, - epsilon=epsilon, - virtual_batch_size=virtual_batch_size, - momentum=momentum, - mask_type=mask_type, - group_attention_matrix=group_attention_matrix, - ) - - if self.is_multi_task: - self.multi_task_mappings = torch.nn.ModuleList() - for task_dim in output_dim: - task_mapping = Linear(n_d, task_dim, bias=False) - initialize_non_glu(task_mapping, n_d, task_dim) - self.multi_task_mappings.append(task_mapping) - else: - self.final_mapping = Linear(n_d, output_dim, bias=False) - initialize_non_glu(self.final_mapping, n_d, output_dim) - - def forward(self, x): - res = 0 - steps_output, M_loss = self.encoder(x) - res = torch.sum(torch.stack(steps_output, dim=0), dim=0) - - if self.is_multi_task: - # Result will be in list format - out = [] - for task_mapping in self.multi_task_mappings: - out.append(task_mapping(res)) - else: - out = self.final_mapping(res) - return out, M_loss - - def forward_masks(self, x): - return self.encoder.forward_masks(x) - - -class TabNet(torch.nn.Module): - def __init__( - self, - input_dim, - output_dim, - n_d=8, - n_a=8, - n_steps=3, - gamma=1.3, - cat_idxs=[], - cat_dims=[], - cat_emb_dim=1, - n_independent=2, - n_shared=2, - epsilon=1e-15, - virtual_batch_size=128, - momentum=0.02, - mask_type="sparsemax", - group_attention_matrix=[], - ): - """ - Defines TabNet network - - Parameters - ---------- - input_dim : int - Initial number of features - output_dim : int - Dimension of network output - examples : one for regression, 2 for binary classification etc... - n_d : int - Dimension of the prediction layer (usually between 4 and 64) - n_a : int - Dimension of the attention layer (usually between 4 and 64) - n_steps : int - Number of successive steps in the network (usually between 3 and 10) - gamma : float - Float above 1, scaling factor for attention updates (usually between 1.0 to 2.0) - cat_idxs : list of int - Index of each categorical column in the dataset - cat_dims : list of int - Number of categories in each categorical column - cat_emb_dim : int or list of int - Size of the embedding of categorical features - if int, all categorical features will have same embedding size - if list of int, every corresponding feature will have specific size - n_independent : int - Number of independent GLU layer in each GLU block (default 2) - n_shared : int - Number of independent GLU layer in each GLU block (default 2) - epsilon : float - Avoid log(0), this should be kept very low - virtual_batch_size : int - Batch size for Ghost Batch Normalization - momentum : float - Float value between 0 and 1 which will be used for momentum in all batch norm - mask_type : str - Either "sparsemax" or "entmax" : this is the masking function to use - group_attention_matrix : torch matrix - Matrix of size (n_groups, input_dim), m_ij = importance within group i of feature j - """ - super(TabNet, self).__init__() - self.cat_idxs = cat_idxs or [] - self.cat_dims = cat_dims or [] - self.cat_emb_dim = cat_emb_dim - - self.input_dim = input_dim - self.output_dim = output_dim - self.n_d = n_d - self.n_a = n_a - self.n_steps = n_steps - self.gamma = gamma - self.epsilon = epsilon - self.n_independent = n_independent - self.n_shared = n_shared - self.mask_type = mask_type - - if self.n_steps <= 0: - raise ValueError("n_steps should be a positive integer.") - if self.n_independent == 0 and self.n_shared == 0: - raise ValueError("n_shared and n_independent can't be both zero.") - - self.virtual_batch_size = virtual_batch_size - self.embedder = EmbeddingGenerator(input_dim, cat_dims, cat_idxs, cat_emb_dim, group_attention_matrix) - self.post_embed_dim = self.embedder.post_embed_dim - - self.tabnet = TabNetNoEmbeddings( - self.post_embed_dim, - output_dim, - n_d, - n_a, - n_steps, - gamma, - n_independent, - n_shared, - epsilon, - virtual_batch_size, - momentum, - mask_type, - self.embedder.embedding_group_matrix, - ) - - def forward(self, x): - x = self.embedder(x) - return self.tabnet(x) - - def forward_masks(self, x): - x = self.embedder(x) - return self.tabnet.forward_masks(x) - - -class AttentiveTransformer(torch.nn.Module): - def __init__( - self, - input_dim, - group_dim, - group_matrix, - virtual_batch_size=128, - momentum=0.02, - mask_type="sparsemax", - ): - """ - Initialize an attention transformer. - - Parameters - ---------- - input_dim : int - Input size - group_dim : int - Number of groups for features - virtual_batch_size : int - Batch size for Ghost Batch Normalization - momentum : float - Float value between 0 and 1 which will be used for momentum in batch norm - mask_type : str - Either "sparsemax" or "entmax" : this is the masking function to use - """ - super(AttentiveTransformer, self).__init__() - self.fc = Linear(input_dim, group_dim, bias=False) - initialize_non_glu(self.fc, input_dim, group_dim) - self.bn = GBN(group_dim, virtual_batch_size=virtual_batch_size, momentum=momentum) - - if mask_type == "sparsemax": - # Sparsemax - self.selector = sparsemax.Sparsemax(dim=-1) - elif mask_type == "entmax": - # Entmax - self.selector = sparsemax.Entmax15(dim=-1) - else: - raise NotImplementedError("Please choose either sparsemax" + "or entmax as masktype") - - def forward(self, priors, processed_feat): - x = self.fc(processed_feat) - x = self.bn(x) - x = torch.mul(x, priors) - x = self.selector(x) - return x - - -class FeatTransformer(torch.nn.Module): - def __init__( - self, - input_dim, - output_dim, - shared_layers, - n_glu_independent, - virtual_batch_size=128, - momentum=0.02, - ): - super(FeatTransformer, self).__init__() - """ - Initialize a feature transformer. - - Parameters - ---------- - input_dim : int - Input size - output_dim : int - Output_size - shared_layers : torch.nn.ModuleList - The shared block that should be common to every step - n_glu_independent : int - Number of independent GLU layers - virtual_batch_size : int - Batch size for Ghost Batch Normalization within GLU block(s) - momentum : float - Float value between 0 and 1 which will be used for momentum in batch norm - """ - - params = { - "n_glu": n_glu_independent, - "virtual_batch_size": virtual_batch_size, - "momentum": momentum, - } - - if shared_layers is None: - # no shared layers - self.shared = torch.nn.Identity() - is_first = True - else: - self.shared = GLU_Block( - input_dim, - output_dim, - first=True, - shared_layers=shared_layers, - n_glu=len(shared_layers), - virtual_batch_size=virtual_batch_size, - momentum=momentum, - ) - is_first = False - - if n_glu_independent == 0: - # no independent layers - self.specifics = torch.nn.Identity() - else: - spec_input_dim = input_dim if is_first else output_dim - self.specifics = GLU_Block(spec_input_dim, output_dim, first=is_first, **params) - - def forward(self, x): - x = self.shared(x) - x = self.specifics(x) - return x - - -class GLU_Block(torch.nn.Module): - """ - Independent GLU block, specific to each step - """ - - def __init__( - self, - input_dim, - output_dim, - n_glu=2, - first=False, - shared_layers=None, - virtual_batch_size=128, - momentum=0.02, - ): - super(GLU_Block, self).__init__() - self.first = first - self.shared_layers = shared_layers - self.n_glu = n_glu - self.glu_layers = torch.nn.ModuleList() - - params = {"virtual_batch_size": virtual_batch_size, "momentum": momentum} - - fc = shared_layers[0] if shared_layers else None - self.glu_layers.append(GLU_Layer(input_dim, output_dim, fc=fc, **params)) - for glu_id in range(1, self.n_glu): - fc = shared_layers[glu_id] if shared_layers else None - self.glu_layers.append(GLU_Layer(output_dim, output_dim, fc=fc, **params)) - - def forward(self, x): - scale = torch.sqrt(torch.FloatTensor([0.5]).to(x.device)) - if self.first: # the first layer of the block has no scale multiplication - x = self.glu_layers[0](x) - layers_left = range(1, self.n_glu) - else: - layers_left = range(self.n_glu) - - for glu_id in layers_left: - x = torch.add(x, self.glu_layers[glu_id](x)) - x = x * scale - return x - - -class GLU_Layer(torch.nn.Module): - def __init__(self, input_dim, output_dim, fc=None, virtual_batch_size=128, momentum=0.02): - super(GLU_Layer, self).__init__() - - self.output_dim = output_dim - if fc: - self.fc = fc - else: - self.fc = Linear(input_dim, 2 * output_dim, bias=False) - initialize_glu(self.fc, input_dim, 2 * output_dim) - - self.bn = GBN(2 * output_dim, virtual_batch_size=virtual_batch_size, momentum=momentum) - - def forward(self, x): - x = self.fc(x) - x = self.bn(x) - out = torch.mul(x[:, : self.output_dim], torch.sigmoid(x[:, self.output_dim :])) - return out - - -class EmbeddingGenerator(torch.nn.Module): - """ - Classical embeddings generator - """ - - def __init__(self, input_dim, cat_dims, cat_idxs, cat_emb_dims, group_matrix): - """This is an embedding module for an entire set of features - - Parameters - ---------- - input_dim : int - Number of features coming as input (number of columns) - cat_dims : list of int - Number of modalities for each categorial features - If the list is empty, no embeddings will be done - cat_idxs : list of int - Positional index for each categorical features in inputs - cat_emb_dim : list of int - Embedding dimension for each categorical features - If int, the same embedding dimension will be used for all categorical features - group_matrix : torch matrix - Original group matrix before embeddings - """ - super(EmbeddingGenerator, self).__init__() - - if cat_dims == [] and cat_idxs == []: - self.skip_embedding = True - self.post_embed_dim = input_dim - self.embedding_group_matrix = group_matrix.to(group_matrix.device) - return - else: - self.skip_embedding = False - - self.post_embed_dim = int(input_dim + np.sum(cat_emb_dims) - len(cat_emb_dims)) - - self.embeddings = torch.nn.ModuleList() - - for cat_dim, emb_dim in zip(cat_dims, cat_emb_dims): - self.embeddings.append(torch.nn.Embedding(cat_dim, emb_dim)) - - # record continuous indices - self.continuous_idx = torch.ones(input_dim, dtype=torch.bool) - self.continuous_idx[cat_idxs] = 0 - - # update group matrix - n_groups = group_matrix.shape[0] - self.embedding_group_matrix = torch.empty((n_groups, self.post_embed_dim), device=group_matrix.device) - for group_idx in range(n_groups): - post_emb_idx = 0 - cat_feat_counter = 0 - for init_feat_idx in range(input_dim): - if self.continuous_idx[init_feat_idx] == 1: - # this means that no embedding is applied to this column - self.embedding_group_matrix[group_idx, post_emb_idx] = group_matrix[ - group_idx, init_feat_idx - ] # noqa - post_emb_idx += 1 - else: - # this is a categorical feature which creates multiple embeddings - n_embeddings = cat_emb_dims[cat_feat_counter] - self.embedding_group_matrix[group_idx, post_emb_idx : post_emb_idx + n_embeddings] = ( - group_matrix[group_idx, init_feat_idx] / n_embeddings - ) # noqa - post_emb_idx += n_embeddings - cat_feat_counter += 1 - - def forward(self, x): - """ - Apply embeddings to inputs - Inputs should be (batch_size, input_dim) - Outputs will be of size (batch_size, self.post_embed_dim) - """ - if self.skip_embedding: - # no embeddings required - return x - - cols = [] - cat_feat_counter = 0 - for feat_init_idx, is_continuous in enumerate(self.continuous_idx): - # Enumerate through continuous idx boolean mask to apply embeddings - if is_continuous: - cols.append(x[:, feat_init_idx].float().view(-1, 1)) - else: - cols.append(self.embeddings[cat_feat_counter](x[:, feat_init_idx].long())) - cat_feat_counter += 1 - # concat - post_embeddings = torch.cat(cols, dim=1) - return post_embeddings - - -class RandomObfuscator(torch.nn.Module): - """ - Create and applies obfuscation masks. - The obfuscation is done at group level to match attention. - """ - - def __init__(self, pretraining_ratio, group_matrix): - """ - This create random obfuscation for self suppervised pretraining - Parameters - ---------- - pretraining_ratio : float - Ratio of feature to randomly discard for reconstruction - - """ - super(RandomObfuscator, self).__init__() - self.pretraining_ratio = pretraining_ratio - # group matrix is set to boolean here to pass all posssible information - self.group_matrix = (group_matrix > 0) + 0.0 - self.num_groups = group_matrix.shape[0] - - def forward(self, x): - """ - Generate random obfuscation mask. - - Returns - ------- - masked input and obfuscated variables. - """ - bs = x.shape[0] - - obfuscated_groups = torch.bernoulli(self.pretraining_ratio * torch.ones((bs, self.num_groups), device=x.device)) - obfuscated_vars = torch.matmul(obfuscated_groups, self.group_matrix) - masked_input = torch.mul(1 - obfuscated_vars, x) - return masked_input, obfuscated_groups, obfuscated_vars diff --git a/lightautoml/ml_algo/torch_based/pytorch_tabnet/utils.py b/lightautoml/ml_algo/torch_based/pytorch_tabnet/utils.py deleted file mode 100644 index 52d15a72..00000000 --- a/lightautoml/ml_algo/torch_based/pytorch_tabnet/utils.py +++ /dev/null @@ -1,529 +0,0 @@ -from torch.utils.data import Dataset -from torch.utils.data import DataLoader, WeightedRandomSampler -import torch -import numpy as np -import scipy -import json -from sklearn.utils import check_array -import pandas as pd -import warnings - - -class TorchDataset(Dataset): - """ - Format for numpy array - - Parameters - ---------- - X : 2D array - The input matrix - y : 2D array - The one-hot encoded target - """ - - def __init__(self, x, y): - self.x = x - self.y = y - - def __len__(self): - return len(self.x) - - def __getitem__(self, index): - x, y = self.x[index], self.y[index] - return x, y - - -class SparseTorchDataset(Dataset): - """ - Format for csr_matrix - - Parameters - ---------- - X : CSR matrix - The input matrix - y : 2D array - The one-hot encoded target - """ - - def __init__(self, x, y): - self.x = x - self.y = y - - def __len__(self): - return self.x.shape[0] - - def __getitem__(self, index): - x = torch.from_numpy(self.x[index].toarray()[0]).float() - y = self.y[index] - return x, y - - -class PredictDataset(Dataset): - """ - Format for numpy array - - Parameters - ---------- - X : 2D array - The input matrix - """ - - def __init__(self, x): - self.x = x - - def __len__(self): - return len(self.x) - - def __getitem__(self, index): - x = self.x[index] - return x - - -class SparsePredictDataset(Dataset): - """ - Format for csr_matrix - - Parameters - ---------- - X : CSR matrix - The input matrix - """ - - def __init__(self, x): - self.x = x - - def __len__(self): - return self.x.shape[0] - - def __getitem__(self, index): - x = torch.from_numpy(self.x[index].toarray()[0]).float() - return x - - -def create_sampler(weights, y_train): - """ - This creates a sampler from the given weights - - Parameters - ---------- - weights : either 0, 1, dict or iterable - if 0 (default) : no weights will be applied - if 1 : classification only, will balanced class with inverse frequency - if dict : keys are corresponding class values are sample weights - if iterable : list or np array must be of length equal to nb elements - in the training set - y_train : np.array - Training targets - """ - if isinstance(weights, int): - if weights == 0: - need_shuffle = True - sampler = None - elif weights == 1: - need_shuffle = False - class_sample_count = np.array([len(np.where(y_train == t)[0]) for t in np.unique(y_train)]) - - weights = 1.0 / class_sample_count - - samples_weight = np.array([weights[t] for t in y_train]) - - samples_weight = torch.from_numpy(samples_weight) - samples_weight = samples_weight.double() - sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) - else: - raise ValueError("Weights should be either 0, 1, dictionnary or list.") - elif isinstance(weights, dict): - # custom weights per class - need_shuffle = False - samples_weight = np.array([weights[t] for t in y_train]) - sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) - else: - # custom weights - if len(weights) != len(y_train): - raise ValueError("Custom weights should match number of train samples.") - need_shuffle = False - samples_weight = np.array(weights) - sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) - return need_shuffle, sampler - - -def create_dataloaders(X_train, y_train, eval_set, weights, batch_size, num_workers, drop_last, pin_memory): - """ - Create dataloaders with or without subsampling depending on weights and balanced. - - Parameters - ---------- - X_train : np.ndarray - Training data - y_train : np.array - Mapped Training targets - eval_set : list of tuple - List of eval tuple set (X, y) - weights : either 0, 1, dict or iterable - if 0 (default) : no weights will be applied - if 1 : classification only, will balanced class with inverse frequency - if dict : keys are corresponding class values are sample weights - if iterable : list or np array must be of length equal to nb elements - in the training set - batch_size : int - how many samples per batch to load - num_workers : int - how many subprocesses to use for data loading. 0 means that the data - will be loaded in the main process - drop_last : bool - set to True to drop the last incomplete batch, if the dataset size is not - divisible by the batch size. If False and the size of dataset is not - divisible by the batch size, then the last batch will be smaller - pin_memory : bool - Whether to pin GPU memory during training - - Returns - ------- - train_dataloader, valid_dataloader : torch.DataLoader, torch.DataLoader - Training and validation dataloaders - """ - need_shuffle, sampler = create_sampler(weights, y_train) - - if scipy.sparse.issparse(X_train): - train_dataloader = DataLoader( - SparseTorchDataset(X_train.astype(np.float32), y_train), - batch_size=batch_size, - sampler=sampler, - shuffle=need_shuffle, - num_workers=num_workers, - drop_last=drop_last, - pin_memory=pin_memory, - ) - else: - train_dataloader = DataLoader( - TorchDataset(X_train.astype(np.float32), y_train), - batch_size=batch_size, - sampler=sampler, - shuffle=need_shuffle, - num_workers=num_workers, - drop_last=drop_last, - pin_memory=pin_memory, - ) - - valid_dataloaders = [] - for X, y in eval_set: - if scipy.sparse.issparse(X): - valid_dataloaders.append( - DataLoader( - SparseTorchDataset(X.astype(np.float32), y), - batch_size=batch_size, - shuffle=False, - num_workers=num_workers, - pin_memory=pin_memory, - ) - ) - else: - valid_dataloaders.append( - DataLoader( - TorchDataset(X.astype(np.float32), y), - batch_size=batch_size, - shuffle=False, - num_workers=num_workers, - pin_memory=pin_memory, - ) - ) - - return train_dataloader, valid_dataloaders - - -def create_explain_matrix(input_dim, cat_emb_dim, cat_idxs, post_embed_dim): - """ - This is a computational trick. - In order to rapidly sum importances from same embeddings - to the initial index. - - Parameters - ---------- - input_dim : int - Initial input dim - cat_emb_dim : int or list of int - if int : size of embedding for all categorical feature - if list of int : size of embedding for each categorical feature - cat_idxs : list of int - Initial position of categorical features - post_embed_dim : int - Post embedding inputs dimension - - Returns - ------- - reducing_matrix : np.array - Matrix of dim (post_embed_dim, input_dim) to performe reduce - """ - - if isinstance(cat_emb_dim, int): - all_emb_impact = [cat_emb_dim - 1] * len(cat_idxs) - else: - all_emb_impact = [emb_dim - 1 for emb_dim in cat_emb_dim] - - acc_emb = 0 - nb_emb = 0 - indices_trick = [] - for i in range(input_dim): - if i not in cat_idxs: - indices_trick.append([i + acc_emb]) - else: - indices_trick.append(range(i + acc_emb, i + acc_emb + all_emb_impact[nb_emb] + 1)) - acc_emb += all_emb_impact[nb_emb] - nb_emb += 1 - - reducing_matrix = np.zeros((post_embed_dim, input_dim)) - for i, cols in enumerate(indices_trick): - reducing_matrix[cols, i] = 1 - - return scipy.sparse.csc_matrix(reducing_matrix) - - -def create_group_matrix(list_groups, input_dim): - """ - Create the group matrix corresponding to the given list_groups - - Parameters - ---------- - - list_groups : list of list of int - Each element is a list representing features in the same group. - One feature should appear in maximum one group. - Feature that don't get assigned a group will be in their own group of one feature. - - input_dim : number of feature in the initial dataset - - Returns - ------- - - group_matrix : torch matrix - A matrix of size (n_groups, input_dim) - where m_ij represents the importance of feature j in group i - The rows must some to 1 as each group is equally important a priori. - - """ - check_list_groups(list_groups, input_dim) - - if len(list_groups) == 0: - group_matrix = torch.eye(input_dim) - return group_matrix - else: - n_groups = input_dim - int(np.sum([len(gp) - 1 for gp in list_groups])) - group_matrix = torch.zeros((n_groups, input_dim)) - - remaining_features = [feat_idx for feat_idx in range(input_dim)] - - current_group_idx = 0 - for group in list_groups: - group_size = len(group) - for elem_idx in group: - # add importrance of element in group matrix and corresponding group - group_matrix[current_group_idx, elem_idx] = 1 / group_size - # remove features from list of features - remaining_features.remove(elem_idx) - # move to next group - current_group_idx += 1 - # features not mentionned in list_groups get assigned their own group of singleton - for remaining_feat_idx in remaining_features: - group_matrix[current_group_idx, remaining_feat_idx] = 1 - current_group_idx += 1 - return group_matrix - - -def check_list_groups(list_groups, input_dim): - """ - Check that list groups: - - is a list of list - - does not contain twice the same feature in different groups - - does not contain unknown features (>= input_dim) - - does not contain empty groups - Parameters - ---------- - - list_groups : list of list of int - Each element is a list representing features in the same group. - One feature should appear in maximum one group. - Feature that don't get assign a group will be in their own group of one feature. - - input_dim : number of feature in the initial dataset - """ - assert isinstance(list_groups, list), "list_groups must be a list of list." - - if len(list_groups) == 0: - return - else: - for group_pos, group in enumerate(list_groups): - msg = f"Groups must be given as a list of list, but found {group} in position {group_pos}." # noqa - assert isinstance(group, list), msg - assert len(group) > 0, "Empty groups are forbidding please remove empty groups []" - - n_elements_in_groups = np.sum([len(group) for group in list_groups]) - flat_list = [] - for group in list_groups: - flat_list.extend(group) - unique_elements = np.unique(flat_list) - n_unique_elements_in_groups = len(unique_elements) - msg = f"One feature can only appear in one group, please check your grouped_features." - assert n_unique_elements_in_groups == n_elements_in_groups, msg - - highest_feat = np.max(unique_elements) - assert highest_feat < input_dim, f"Number of features is {input_dim} but one group contains {highest_feat}." # noqa - return - - -def filter_weights(weights): - """ - This function makes sure that weights are in correct format for - regression and multitask TabNet - - Parameters - ---------- - weights : int, dict or list - Initial weights parameters given by user - - Returns - ------- - None : This function will only throw an error if format is wrong - """ - err_msg = """Please provide a list or np.array of weights for """ - err_msg += """regression, multitask or pretraining: """ - if isinstance(weights, int): - if weights == 1: - raise ValueError(err_msg + "1 given.") - if isinstance(weights, dict): - raise ValueError(err_msg + "Dict given.") - return - - -def validate_eval_set(eval_set, eval_name, X_train, y_train): - """Check if the shapes of eval_set are compatible with (X_train, y_train). - - Parameters - ---------- - eval_set : list of tuple - List of eval tuple set (X, y). - The last one is used for early stopping - eval_name : list of str - List of eval set names. - X_train : np.ndarray - Train owned products - y_train : np.array - Train targeted products - - Returns - ------- - eval_names : list of str - Validated list of eval_names. - eval_set : list of tuple - Validated list of eval_set. - - """ - eval_name = eval_name or [f"val_{i}" for i in range(len(eval_set))] - - assert len(eval_set) == len(eval_name), "eval_set and eval_name have not the same length" - if len(eval_set) > 0: - assert all(len(elem) == 2 for elem in eval_set), "Each tuple of eval_set need to have two elements" - for name, (X, y) in zip(eval_name, eval_set): - check_input(X) - msg = f"Dimension mismatch between X_{name} " + f"{X.shape} and X_train {X_train.shape}" - assert len(X.shape) == len(X_train.shape), msg - - msg = f"Dimension mismatch between y_{name} " + f"{y.shape} and y_train {y_train.shape}" - assert len(y.shape) == len(y_train.shape), msg - - msg = f"Number of columns is different between X_{name} " + f"({X.shape[1]}) and X_train ({X_train.shape[1]})" - assert X.shape[1] == X_train.shape[1], msg - - if len(y_train.shape) == 2: - msg = ( - f"Number of columns is different between y_{name} " + f"({y.shape[1]}) and y_train ({y_train.shape[1]})" - ) - assert y.shape[1] == y_train.shape[1], msg - msg = f"You need the same number of rows between X_{name} " + f"({X.shape[0]}) and y_{name} ({y.shape[0]})" - assert X.shape[0] == y.shape[0], msg - - return eval_name, eval_set - - -def define_device(device_name): - """ - Define the device to use during training and inference. - If auto it will detect automatically whether to use cuda or cpu - - Parameters - ---------- - device_name : str - Either "auto", "cpu" or "cuda" - - Returns - ------- - str - Either "cpu" or "cuda" - """ - if device_name == "auto": - if torch.cuda.is_available(): - return "cuda" - else: - return "cpu" - elif device_name == "cuda" and not torch.cuda.is_available(): - return "cpu" - else: - return device_name - - -class ComplexEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, (np.generic, np.ndarray)): - return obj.tolist() - # Let the base class default method raise the TypeError - return json.JSONEncoder.default(self, obj) - - -def check_input(X): - """ - Raise a clear error if X is a pandas dataframe - and check array according to scikit rules - """ - if isinstance(X, (pd.DataFrame, pd.Series)): - err_message = "Pandas DataFrame are not supported: apply X.values when calling fit" - raise TypeError(err_message) - check_array(X, accept_sparse=True) - - -def check_warm_start(warm_start, from_unsupervised): - """ - Gives a warning about ambiguous usage of the two parameters. - """ - if warm_start and from_unsupervised is not None: - warn_msg = "warm_start=True and from_unsupervised != None: " - warn_msg = "warm_start will be ignore, training will start from unsupervised weights" - warnings.warn(warn_msg) - return - - -def check_embedding_parameters(cat_dims, cat_idxs, cat_emb_dim): - """ - Check parameters related to embeddings and rearrange them in a unique manner. - """ - if (cat_dims == []) ^ (cat_idxs == []): - if cat_dims == []: - msg = "If cat_idxs is non-empty, cat_dims must be defined as a list of same length." - else: - msg = "If cat_dims is non-empty, cat_idxs must be defined as a list of same length." - raise ValueError(msg) - elif len(cat_dims) != len(cat_idxs): - msg = "The lists cat_dims and cat_idxs must have the same length." - raise ValueError(msg) - - if isinstance(cat_emb_dim, int): - cat_emb_dims = [cat_emb_dim] * len(cat_idxs) - else: - cat_emb_dims = cat_emb_dim - - # check that all embeddings are provided - if len(cat_emb_dims) != len(cat_dims): - msg = f"""cat_emb_dim and cat_dims must be lists of same length, got {len(cat_emb_dims)} - and {len(cat_dims)}""" - raise ValueError(msg) - - # Rearrange to get reproducible seeds with different ordering - if len(cat_idxs) > 0: - sorted_idxs = np.argsort(cat_idxs) - cat_dims = [cat_dims[i] for i in sorted_idxs] - cat_emb_dims = [cat_emb_dims[i] for i in sorted_idxs] - - return cat_dims, cat_idxs, cat_emb_dims diff --git a/lightautoml/text/embed.py b/lightautoml/text/embed.py index fb22b91e..e8e46afa 100644 --- a/lightautoml/text/embed.py +++ b/lightautoml/text/embed.py @@ -530,6 +530,7 @@ def __init__(self, n: int, d_in: int, d_out: int) -> None: self.layers = nn.ModuleList([nn.Linear(d_in, d_out) for _ in range(n)]) def forward(self, x): + """Forward-pass.""" return torch.stack([l(x[:, i]) for i, l in enumerate(self.layers)], 1) @@ -570,6 +571,7 @@ def get_out_shape(self) -> int: return self.n_features def forward(self, x: Tensor) -> Tensor: + """Forward-pass.""" x = self._cos_sin(2 * torch.pi * self.coefficients[None] * x[..., None]) if self.flatten_output: return x.view(x.shape[0], -1) @@ -642,23 +644,20 @@ def __init__(self, *args, **kwargs): class SoftEmbedding(torch.nn.Module): - """ - Soft-one hot encoding embedding technique, from https://arxiv.org/pdf/1708.00065.pdf - In a nutshell, it represents a continuous feature as a weighted average of embeddings - """ + """Soft-one hot encoding embedding technique, from https://arxiv.org/pdf/1708.00065.pdf. - def __init__(self, num_dims, embedding_size=10, flatten_output: bool = False, **kwargs) -> None: - """ + In a nutshell, it represents a continuous feature as a weighted average of embeddings - Parameters - ---------- + Args: num_embeddings: Number of embeddings to use (cardinality of the embedding table). embeddings_dim: The dimension of the vector space for projecting the scalar value. embeddings_init_std: The standard deviation factor for normal initialization of the embedding matrix weights. emb_initializer: Dict where keys are feature names and values are callable to initialize embedding tables - """ + """ + + def __init__(self, num_dims, embedding_size=10, flatten_output: bool = False, **kwargs) -> None: super(SoftEmbedding, self).__init__() self.embedding_table = torch.nn.Embedding(num_dims, embedding_size) nn.init.xavier_uniform_(self.embedding_table.weight) From 2557c4c82a53e7559d442742629c39ca6f31da5d Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Fri, 1 Sep 2023 13:48:45 +0000 Subject: [PATCH 30/49] bugfix --- lightautoml/ml_algo/torch_based/nn_models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index cee78575..17f8afff 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -8,7 +8,7 @@ import numpy as np import torch import torch.nn as nn -from lightautoml.ml_algo.tabnet.utils import TabNetEncoder, initialize_non_glu +from lightautoml.ml_algo.tabnet.utils import TabNetEncoder, _initialize_non_glu from lightautoml.ml_algo.torch_based.autoint.autoint_utils import AttnInteractionBlock, LeakyGate from lightautoml.ml_algo.torch_based.autoint.ghost_norm import GhostBatchNorm @@ -1063,11 +1063,11 @@ def __init__( self.multi_task_mappings = torch.nn.ModuleList() for task_dim in n_out: task_mapping = nn.Linear(n_d, task_dim, bias=False) - initialize_non_glu(task_mapping, n_d, task_dim) + _initialize_non_glu(task_mapping, n_d, task_dim) self.multi_task_mappings.append(task_mapping) else: self.final_mapping = nn.Linear(n_d, n_out, bias=False) - initialize_non_glu(self.final_mapping, n_d, n_out) + _initialize_non_glu(self.final_mapping, n_d, n_out) def forward(self, x): """Forward-pass.""" From 42fd85fe9a3298f468c28712cbb67b388fc6041b Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Fri, 1 Sep 2023 13:55:32 +0000 Subject: [PATCH 31/49] changed import links --- lightautoml/ml_algo/torch_based/nn_models.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index 30676df8..99914e32 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -8,10 +8,10 @@ import numpy as np import torch import torch.nn as nn -from lightautoml.ml_algo.torch_based.autoint.autoint_utils import AttnInteractionBlock, LeakyGate -from lightautoml.ml_algo.torch_based.autoint.ghost_norm import GhostBatchNorm +from .autoint.autoint_utils import AttnInteractionBlock, LeakyGate +from .autoint.ghost_norm import GhostBatchNorm -from lightautoml.ml_algo.torch_based.node_nn_model import DenseODSTBlock, MeanPooling +from .node_nn_model import DenseODSTBlock, MeanPooling class GaussianNoise(nn.Module): From 7a8bf65debc1d30cd7b1bd0f979230cd5caa2f46 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Wed, 6 Sep 2023 13:11:28 +0000 Subject: [PATCH 32/49] changed import links --- lightautoml/ml_algo/tabnet/utils.py | 4 ++-- lightautoml/ml_algo/torch_based/nn_models.py | 21 +++----------------- lightautoml/ml_algo/tuning/base.py | 6 +++--- lightautoml/ml_algo/tuning/optuna.py | 14 ++++++------- 4 files changed, 15 insertions(+), 30 deletions(-) diff --git a/lightautoml/ml_algo/tabnet/utils.py b/lightautoml/ml_algo/tabnet/utils.py index a901b7c6..8530be5d 100644 --- a/lightautoml/ml_algo/tabnet/utils.py +++ b/lightautoml/ml_algo/tabnet/utils.py @@ -2,8 +2,8 @@ import torch import numpy as np import torch.nn as nn -from lightautoml.ml_algo.torch_based.node_nn_model import Entmax15, Sparsemax -from lightautoml.ml_algo.torch_based.autoint.ghost_norm import GhostBatchNorm +from ..torch_based.node_nn_model import Entmax15, Sparsemax +from ..torch_based.autoint.ghost_norm import GhostBatchNorm def _initialize_non_glu(module, input_dim, output_dim): diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index 56813004..f380ef38 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -1143,7 +1143,6 @@ def __init__( super(TabNet, self).__init__() self.input_dim = n_in self.output_dim = n_out - self.is_multi_task = isinstance(n_out, list) self.n_d = n_d self.n_a = n_a self.n_steps = n_steps @@ -1171,29 +1170,15 @@ def __init__( group_attention_matrix=group_attention_matrix, ) - if self.is_multi_task: - self.multi_task_mappings = torch.nn.ModuleList() - for task_dim in n_out: - task_mapping = nn.Linear(n_d, task_dim, bias=False) - _initialize_non_glu(task_mapping, n_d, task_dim) - self.multi_task_mappings.append(task_mapping) - else: - self.final_mapping = nn.Linear(n_d, n_out, bias=False) - _initialize_non_glu(self.final_mapping, n_d, n_out) + self.final_mapping = nn.Linear(n_d, n_out, bias=True) + _initialize_non_glu(self.final_mapping, n_d, n_out) def forward(self, x): """Forward-pass.""" res = 0 steps_output, M_loss = self.encoder(x) res = torch.sum(torch.stack(steps_output, dim=0), dim=0) - - if self.is_multi_task: - # Result will be in list format - out = [] - for task_mapping in self.multi_task_mappings: - out.append(task_mapping(res)) - else: - out = self.final_mapping(res) + out = self.final_mapping(res) return out def forward_masks(self, x): diff --git a/lightautoml/ml_algo/tuning/base.py b/lightautoml/ml_algo/tuning/base.py index 5c1a803e..692c5080 100644 --- a/lightautoml/ml_algo/tuning/base.py +++ b/lightautoml/ml_algo/tuning/base.py @@ -7,11 +7,11 @@ from typing import Tuple from typing import overload -from lightautoml.dataset.base import LAMLDataset +from ...dataset.base import LAMLDataset # if TYPE_CHECKING: -from lightautoml.ml_algo.base import MLAlgo -from lightautoml.validation.base import TrainValidIterator +from ...ml_algo.base import MLAlgo +from ...validation.base import TrainValidIterator class DistributionBase(ABC): diff --git a/lightautoml/ml_algo/tuning/optuna.py b/lightautoml/ml_algo/tuning/optuna.py index 3e86e4dd..eade5d12 100644 --- a/lightautoml/ml_algo/tuning/optuna.py +++ b/lightautoml/ml_algo/tuning/optuna.py @@ -12,13 +12,13 @@ import optuna -from lightautoml.dataset.base import LAMLDataset -from lightautoml.ml_algo.base import MLAlgo -from lightautoml.ml_algo.tuning.base import Choice -from lightautoml.ml_algo.tuning.base import ParamsTuner -from lightautoml.ml_algo.tuning.base import Uniform -from lightautoml.validation.base import HoldoutIterator -from lightautoml.validation.base import TrainValidIterator +from ...dataset.base import LAMLDataset +from ..base import MLAlgo +from .base import Choice +from .base import ParamsTuner +from .base import Uniform +from ...validation.base import HoldoutIterator +from ...validation.base import TrainValidIterator logger = logging.getLogger(__name__) From 94fdd763ac2d88feef9780201977521c7c09147c Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Wed, 6 Sep 2023 14:41:19 +0000 Subject: [PATCH 33/49] bugfix --- lightautoml/ml_algo/dl_model.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index 010a4a28..bff1eedc 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -125,8 +125,6 @@ "soft": SoftEmbeddingFlat, } cont_embedder_by_name = {"linear": LinearEmbedding, "dense": DenseEmbedding, "plr": PLREmbedding, "soft": SoftEmbedding} -cont_embedder_by_name_flat = {"cont": ContEmbedder, "linear": LinearEmbeddingFlat, "dense": DenseEmbeddingFlat} -cont_embedder_by_name = {"linear": LinearEmbedding, "dense": DenseEmbedding} class TorchModel(TabularMLAlgo): From 1c4170e8d1d5d75eae1671393d7de2bd711422e8 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Thu, 7 Sep 2023 12:52:28 +0000 Subject: [PATCH 34/49] some new changes --- lightautoml/ml_algo/base.py | 4 +- lightautoml/ml_algo/torch_based/nn_models.py | 15 +++--- lightautoml/text/embed.py | 3 +- lightautoml/text/nn_model.py | 57 +++++++++++++++----- 4 files changed, 57 insertions(+), 22 deletions(-) diff --git a/lightautoml/ml_algo/base.py b/lightautoml/ml_algo/base.py index 904b1e0a..0dec5aba 100755 --- a/lightautoml/ml_algo/base.py +++ b/lightautoml/ml_algo/base.py @@ -240,7 +240,9 @@ def fit_predict(self, train_valid_iterator: TrainValidIterator) -> NumpyDataset: iterator_len = len(train_valid_iterator) if iterator_len > 1: logger.info("Start fitting \x1b[1m{}\x1b[0m ...".format(self._name)) - logger.debug(f"Training params: {self.params}") + stop_params = ["cat_features", "cont_features", "cat_dims", "cat_vc"] + printable_params = {key: value for key, value in self.params.items() if key not in stop_params} + logger.debug(f"Training params: {printable_params}") # save features names self._features = train_valid_iterator.features diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index f380ef38..485d0ac4 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -154,8 +154,8 @@ def __init__( dropout_first: bool = True, bn_momentum: float = 0.1, ghost_batch: Optional[int] = 64, - leaky_gate: bool = True, use_skip: bool = True, + leaky_gate: bool = True, weighted_sum: bool = True, device: torch.device = torch.device("cuda:0"), **kwargs, @@ -180,7 +180,7 @@ def __init__( self.features.add_module("dense0", nn.Linear(n_in, num_features)) if leaky_gate: - self.features.add_module("leakygate0", LeakyGate(n_in)) + self.features.add_module("leakygate0", LeakyGate(num_features)) if dropout_first and drop_rate[0] > 0: self.features.add_module("dropout0", nn.Dropout(drop_rate[0])) @@ -228,7 +228,7 @@ def forward(self, X: torch.Tensor) -> torch.Tensor: x = X input = x.detach().clone() for name, layer in self.features.named_children(): - if name != "denseblock1" and name != "dense0" and self.concat_input: + if name not in ["dropout0", "leakygate0", "denseblock1", "dense0"] and self.concat_input: x = torch.cat([x, input], 1) x = layer(x) out = self.fc(x) @@ -976,6 +976,7 @@ def __init__( use_skip=mlp_use_skip, device=device, ) + self.use_skip = True if weighted_sum: self.mix = nn.Parameter(torch.tensor([0.0], device=device)) else: @@ -1127,16 +1128,16 @@ def __init__( self, n_in, n_out, - n_d=8, - n_a=8, - n_steps=3, + n_d=32, + n_a=32, + n_steps=1, gamma=1.3, n_independent=2, n_shared=2, epsilon=1e-15, virtual_batch_size=128, momentum=0.02, - mask_type="sparsemax", + mask_type="entemax", group_attention_matrix=None, **kwargs, ): diff --git a/lightautoml/text/embed.py b/lightautoml/text/embed.py index e8e46afa..0fbe062d 100644 --- a/lightautoml/text/embed.py +++ b/lightautoml/text/embed.py @@ -12,6 +12,7 @@ import torch.nn as nn from torch import Tensor import operator +import numpy as np try: from transformers import AutoModel @@ -572,7 +573,7 @@ def get_out_shape(self) -> int: def forward(self, x: Tensor) -> Tensor: """Forward-pass.""" - x = self._cos_sin(2 * torch.pi * self.coefficients[None] * x[..., None]) + x = self._cos_sin(2 * np.pi * self.coefficients[None] * x[..., None]) if self.flatten_output: return x.view(x.shape[0], -1) return x diff --git a/lightautoml/text/nn_model.py b/lightautoml/text/nn_model.py index dc4db2ae..58fa1574 100644 --- a/lightautoml/text/nn_model.py +++ b/lightautoml/text/nn_model.py @@ -162,13 +162,34 @@ def __init__( ) if bias is not None: - try: - last_layer = list( - filter( - lambda x: isinstance(x, nn.Linear) or isinstance(x, nn.Sequential), - list(self.torch_model.children()), - ) - )[-1] + self._set_last_layer(self.torch_model, bias) + + self.сlump = Clump() + self.sig = nn.Sigmoid() + self.softmax = nn.Softmax(dim=1) + + def _set_last_layer(self, torch_model, bias): + try: + use_skip = torch_model.use_skip + self._init_last_layers(torch_model, bias, use_skip) + except: + self._init_last_layers(torch_model, bias, False) + + def _init_last_layers(self, torch_model, bias, use_skip=False): + try: + all_layers = list(torch_model.children()) + layers = list( + filter( + lambda x: isinstance(x, nn.Linear) or isinstance(x, nn.Sequential), + all_layers, + ) + ) + if len(layers) == 0: + last_layer = all_layers[-1] + self._set_last_layer(last_layer, bias) + + else: + last_layer = layers[-1] while isinstance(last_layer, nn.Sequential): last_layer = list( filter(lambda x: isinstance(x, nn.Linear) or isinstance(x, nn.Sequential), last_layer) @@ -177,12 +198,22 @@ def __init__( last_layer.bias.data = bias shape = last_layer.weight.data.shape last_layer.weight.data = torch.zeros(shape[0], shape[1], requires_grad=True) - except: - logger.info3("Last linear layer not founded, so init_bias=False") - - self.сlump = Clump() - self.sig = nn.Sigmoid() - self.softmax = nn.Softmax(dim=1) + if use_skip: + if len(layers) <= 1: + last_layer = all_layers[-2] + self._set_last_layer(last_layer, bias) + else: + pre_last_layer = layers[-2] + while isinstance(last_layer, nn.Sequential): + pre_last_layer = list( + filter(lambda x: isinstance(x, nn.Linear) or isinstance(x, nn.Sequential), pre_last_layer) + )[-1] + bias = torch.Tensor(bias) + pre_last_layer.bias.data = bias + shape = pre_last_layer.weight.data.shape + pre_last_layer.weight.data = torch.zeros(shape[0], shape[1], requires_grad=True) + except: + logger.info3("Last linear layer not founded, so init_bias=False") def get_logits(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: """Forward-pass of model with embeddings.""" From 203511350ed81aae9b4f28c7829dd35a194f9417 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Thu, 7 Sep 2023 13:06:20 +0000 Subject: [PATCH 35/49] now we dont count VC for cat features for every embedding --- lightautoml/ml_algo/dl_model.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index bff1eedc..e8051613 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -421,11 +421,12 @@ def _init_params_on_input(self, train_valid_iterator) -> dict: ) + 1 ) - values, counts = np.unique( - np.concatenate([train_valid_iterator.train[:, cat_feature].data, valid[:, cat_feature].data]), - return_counts=True, - ) - cat_value_counts.append(dict(zip(values, counts))) + if params["cat_embedder"] == "weighted": + values, counts = np.unique( + np.concatenate([train_valid_iterator.train[:, cat_feature].data, valid[:, cat_feature].data]), + return_counts=True, + ) + cat_value_counts.append(dict(zip(values, counts))) cat_dims.append(num_unique_categories) new_params["cat_dims"] = cat_dims new_params["cat_vc"] = cat_value_counts From 0afe07f7e92076e541a5bfaff5101c1bbcff59a4 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Fri, 8 Sep 2023 14:28:01 +0000 Subject: [PATCH 36/49] no embedder bugfix --- lightautoml/ml_algo/dl_model.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index e8051613..0a207ce1 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -115,7 +115,7 @@ "cat_no_dropout": BasicCatEmbeddingFlat, "weighted": WeightedCatEmbeddingFlat, } -cat_embedder_by_name = {"cat_no_dropout": BasicCatEmbedding, "weighted": WeightedCatEmbedding} +cat_embedder_by_name = {"cat": BasicCatEmbedding, "cat_no_dropout": BasicCatEmbedding, "weighted": WeightedCatEmbedding} cont_embedder_by_name_flat = { "cont": ContEmbedder, @@ -124,7 +124,13 @@ "plr": PLREmbeddingFlat, "soft": SoftEmbeddingFlat, } -cont_embedder_by_name = {"linear": LinearEmbedding, "dense": DenseEmbedding, "plr": PLREmbedding, "soft": SoftEmbedding} +cont_embedder_by_name = { + "cont": LinearEmbedding, + "linear": LinearEmbedding, + "dense": DenseEmbedding, + "plr": PLREmbedding, + "soft": SoftEmbedding, +} class TorchModel(TabularMLAlgo): @@ -299,7 +305,7 @@ def _infer_params(self): net_params={ "task": self.task, "cont_embedder_": cont_embedder_by_name.get(params["cont_embedder"], LinearEmbedding) - if input_type_by_name[params["model"]] == "seq" + if input_type_by_name[params["model"]] == "seq" and is_cont else cont_embedder_by_name_flat.get(params["cont_embedder"], ContEmbedder) if is_cont else None, @@ -312,7 +318,7 @@ def _infer_params(self): if is_cont else None, "cat_embedder_": cat_embedder_by_name.get(params["cat_embedder"], BasicCatEmbedding) - if input_type_by_name[params["model"]] == "seq" + if input_type_by_name[params["model"]] == "seq" and is_cat else cat_embedder_by_name_flat.get(params["cat_embedder"], CatEmbedder) if is_cat else None, From 39beb9ec0ff92c2bb7036b844d017e44e0729453 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Fri, 8 Sep 2023 15:58:32 +0000 Subject: [PATCH 37/49] scheduler params --- lightautoml/automl/presets/base.py | 3 ++- lightautoml/automl/presets/tabular_config.yml | 2 +- lightautoml/ml_algo/dl_model.py | 6 +++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/lightautoml/automl/presets/base.py b/lightautoml/automl/presets/base.py index 01c3f01f..975358ef 100644 --- a/lightautoml/automl/presets/base.py +++ b/lightautoml/automl/presets/base.py @@ -37,8 +37,9 @@ def upd_params(old: dict, new: dict) -> dict: Updated parameters. """ + not_updatable_params = ["scheduler_params"] for k in new: - if type(new[k]) is dict and k in old and type(old[k]) is dict: + if type(new[k]) is dict and k in old and type(old[k]) is dict and k not in not_updatable_params: upd_params(old[k], new[k]) else: old[k] = new[k] diff --git a/lightautoml/automl/presets/tabular_config.yml b/lightautoml/automl/presets/tabular_config.yml index d391d5e8..cecb9a9d 100755 --- a/lightautoml/automl/presets/tabular_config.yml +++ b/lightautoml/automl/presets/tabular_config.yml @@ -184,7 +184,7 @@ nn_params: # scheduler sch: ReduceLROnPlateau # params of ReduceLROnPlateau scheduler - scheduler_params: {} #{ 'patience': 5, 'factor': 0.5, 'min_lr': 0.00001 } + scheduler_params: { 'patience': 5, 'factor': 0.5, 'min_lr': 0.00001 } # using snapshot ensembles # https://arxiv.org/abs/1704.00109 is_snap: false diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index 0a207ce1..4fe7240c 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -115,7 +115,11 @@ "cat_no_dropout": BasicCatEmbeddingFlat, "weighted": WeightedCatEmbeddingFlat, } -cat_embedder_by_name = {"cat": BasicCatEmbedding, "cat_no_dropout": BasicCatEmbedding, "weighted": WeightedCatEmbedding} +cat_embedder_by_name = { + "cat_no_dropout": BasicCatEmbedding, + "cat_no_dropout": BasicCatEmbedding, + "weighted": WeightedCatEmbedding, +} cont_embedder_by_name_flat = { "cont": ContEmbedder, From 6294e9f51ed38bcb2a79f5cf0056ef98ab6637c2 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Mon, 11 Sep 2023 09:29:53 +0000 Subject: [PATCH 38/49] bfixs --- lightautoml/ml_algo/dl_model.py | 24 ++++++++++--------- lightautoml/ml_algo/torch_based/nn_models.py | 9 ++++--- .../ml_algo/torch_based/node_nn_model.py | 14 ++++++++++- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index 4fe7240c..4338659d 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -314,10 +314,11 @@ def _infer_params(self): if is_cont else None, "cont_params": { - "num_dims": params["num_dims"], - "input_bn": params["input_bn"], - "device": params["device"], - "embedding_size": params["embedding_size"], + # "num_dims": params["num_dims"], + # "input_bn": params["input_bn"], + # "device": params["device"], + # "embedding_size": params["embedding_size"], + **params } if is_cont else None, @@ -327,13 +328,14 @@ def _infer_params(self): if is_cat else None, "cat_params": { - "cat_vc": params["cat_vc"], - "cat_dims": params["cat_dims"], - "emb_dropout": params["emb_dropout"], - "emb_ratio": params["emb_ratio"], - "max_emb_size": params["max_emb_size"], - "embedding_size": params["embedding_size"], - "device": params["device"], + # "cat_vc": params["cat_vc"], + # "cat_dims": params["cat_dims"], + # "emb_dropout": params["emb_dropout"], + # "emb_ratio": params["emb_ratio"], + # "max_emb_size": params["max_emb_size"], + # "embedding_size": params["embedding_size"], + # "device": params["device"], + **params } if is_cat else None, diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index 485d0ac4..752b1b77 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -153,9 +153,9 @@ def __init__( concat_input: bool = True, dropout_first: bool = True, bn_momentum: float = 0.1, - ghost_batch: Optional[int] = 64, - use_skip: bool = True, - leaky_gate: bool = True, + ghost_batch: Optional[int] = None, + use_skip: bool = False, + leaky_gate: bool = False, weighted_sum: bool = True, device: torch.device = torch.device("cuda:0"), **kwargs, @@ -828,6 +828,7 @@ class NODE(nn.Module): layer_dim: num trees in one layer. num_layers: number of forests. tree_dim: number of response channels in the response of individual tree. + choice_function: str `entmax` or `sparsmax` use_original_head use averaging as a head or put linear layer instead. depth: number of splits in every tree. drop_rate: Dropout rate for each layer altogether. @@ -843,6 +844,7 @@ def __init__( layer_dim: int = 2048, num_layers: int = 1, tree_dim: int = 1, + choice_function="entmax", use_original_head: bool = False, depth: int = 6, drop_rate: float = 0.0, @@ -861,6 +863,7 @@ def __init__( num_layers=num_layers, tree_dim=tree_dim if not use_original_head else n_out, depth=depth, + choice_function=choice_function, input_dropout=drop_rate, flatten_output=not use_original_head, ) diff --git a/lightautoml/ml_algo/torch_based/node_nn_model.py b/lightautoml/ml_algo/torch_based/node_nn_model.py index e57f5125..e3f3f6da 100644 --- a/lightautoml/ml_algo/torch_based/node_nn_model.py +++ b/lightautoml/ml_algo/torch_based/node_nn_model.py @@ -554,6 +554,7 @@ class DenseODSTBlock(nn.Sequential): max_features: maximum number of features per input depth: number of splits in every tree. input_dropout: Dropout rate forest layer. + choice_function: str `entmax` or `sparsmax`. flatten_output: flatten output or not. """ @@ -565,12 +566,23 @@ def __init__( tree_dim=1, max_features=None, input_dropout=0.0, + choice_function="entmax", flatten_output=True, **kwargs ): layers = [] + ch_f = Sparsemax() if choice_function == "sparsmax" else Entmax15() + bin_f = Sparsemoid() if choice_function == "sparsmax" else Entmoid15() for i in range(num_layers): - oddt = ODST(input_dim, layer_dim, tree_dim=tree_dim, flatten_output=True, **kwargs) + oddt = ODST( + input_dim, + layer_dim, + tree_dim=tree_dim, + flatten_output=True, + choice_function=ch_f, + bin_function=bin_f, + **kwargs + ) input_dim = min(input_dim + layer_dim * tree_dim, max_features or float("inf")) layers.append(oddt) From 81fab518d71ebc98f8fe181b6443ab89a72913f1 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Mon, 11 Sep 2023 09:35:55 +0000 Subject: [PATCH 39/49] bfixs --- lightautoml/text/nn_model.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lightautoml/text/nn_model.py b/lightautoml/text/nn_model.py index 58fa1574..3ad54bb9 100644 --- a/lightautoml/text/nn_model.py +++ b/lightautoml/text/nn_model.py @@ -169,11 +169,8 @@ def __init__( self.softmax = nn.Softmax(dim=1) def _set_last_layer(self, torch_model, bias): - try: - use_skip = torch_model.use_skip - self._init_last_layers(torch_model, bias, use_skip) - except: - self._init_last_layers(torch_model, bias, False) + use_skip = getattr(torch_model, "use_skip", False) + self._init_last_layers(torch_model, bias, use_skip) def _init_last_layers(self, torch_model, bias, use_skip=False): try: From 99d77f8eee459a4cd068955c820f62e023424fc5 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Tue, 12 Sep 2023 10:40:11 +0000 Subject: [PATCH 40/49] mlp embedder --- lightautoml/text/embed.py | 58 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/lightautoml/text/embed.py b/lightautoml/text/embed.py index 0fbe062d..fa0ea834 100644 --- a/lightautoml/text/embed.py +++ b/lightautoml/text/embed.py @@ -705,3 +705,61 @@ class SoftEmbeddingFlat(SoftEmbedding): def __init__(self, *args, **kwargs): super(SoftEmbeddingFlat, self).__init__(*args, **{**kwargs, **{"flatten_output": True}}) + + +class MLPContEmbedding(nn.Module): + """MLP multi-dim embedding. + + Args: + num_dims : num of features. + d_in: input size. + d_out: output size. + d_hidden: hidden size. + """ + + def __init__( + self, + num_dims: int, + embedding_size: int = 10, + d_hidden: int = 64, + flatten_output: bool = False, + **kwargs, + ) -> None: + super().__init__() + self.flatten_output = flatten_output + self.embedding_size = embedding_size + self.num_dims = num_dims + self.layers = nn.ModuleList( + [ + nn.Sequential(nn.Linear(1, d_hidden), nn.ReLU(), nn.Linear(d_hidden, embedding_size)) + for _ in range(num_dims) + ] + ) + + def get_out_shape(self) -> int: + """Output shape. + + Returns: + int with module output shape. + + """ + if self.flatten_output: + return self.num_dims * self.embedding_size + else: + return self.num_dims + + def forward(self, X: Dict) -> Tensor: + """Produce embedding for each value in input. + + Args: + X : Dict + + Returns: + torch.Tensor + + """ + x = X["cont"] + x = torch.stack([l(x[:, i]) for i, l in enumerate(self.layers)], 1) + if self.flatten_output: + return x.view(x.shape[0], -1) + return x From 41e547f279ecbbcd389d98e3daeacfdcc9e798fd Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Wed, 20 Sep 2023 14:21:04 +0000 Subject: [PATCH 41/49] no descr --- lightautoml/automl/presets/tabular_presets.py | 1 + lightautoml/ml_algo/dl_model.py | 60 ++++-- lightautoml/ml_algo/torch_based/nn_models.py | 90 +++++++++ .../ml_algo/torch_based/saint/saint.py | 144 +++++++++++++++ lightautoml/text/nn_model.py | 6 +- lightautoml/text/trainer.py | 171 +++++++++++++++++- lightautoml/text/utils.py | 2 +- 7 files changed, 445 insertions(+), 29 deletions(-) create mode 100644 lightautoml/ml_algo/torch_based/saint/saint.py diff --git a/lightautoml/automl/presets/tabular_presets.py b/lightautoml/automl/presets/tabular_presets.py index 166cb653..4b46e1da 100755 --- a/lightautoml/automl/presets/tabular_presets.py +++ b/lightautoml/automl/presets/tabular_presets.py @@ -609,6 +609,7 @@ def create_automl(self, **fit_args): "autoint", "tabnet", "fttransformer", + "saint", ] available_nn_models = available_nn_models + [x + "_tuned" for x in available_nn_models] nn_models = [ diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index 4338659d..38b1f521 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -1,6 +1,7 @@ """Neural net for tabular datasets.""" +from itertools import cycle from lightautoml.utils.installation import __validate_extra_deps @@ -73,6 +74,7 @@ from .torch_based.nn_models import MLP, TabNet from .torch_based.nn_models import NODE from .torch_based.nn_models import SNN +from .torch_based.nn_models import SAINT from .torch_based.nn_models import DenseLightModel from .torch_based.nn_models import DenseModel from .torch_based.nn_models import LinearLayer @@ -84,6 +86,8 @@ logger = logging.getLogger(__name__) +models_dependent_on_training_data = ["saint"] + model_by_name = { "denselight": DenseLightModel, "dense": DenseModel, @@ -96,7 +100,9 @@ "autoint": AutoInt, "tabnet": TabNet, "fttransformer": FTTransformer, + "saint":SAINT, } + input_type_by_name = { "denselight": "flat", "dense": "flat", @@ -109,6 +115,7 @@ "autoint": "seq", "tabnet": "flat", "fttransformer": "seq", + "saint": "seq", } cat_embedder_by_name_flat = { "cat": CatEmbedder, @@ -255,7 +262,7 @@ class TorchModel(TabularMLAlgo): **_default_models_params, } - def _infer_params(self): + def _infer_params(self, train = None): if self.params["path_to_save"] is not None: self.path_to_save = os.path.relpath(self.params["path_to_save"]) if not os.path.exists(self.path_to_save): @@ -304,6 +311,22 @@ def _infer_params(self): params[p_name] = getattr(module, params[p_name]) # params = self._select_params(params) + if params['model'] in models_dependent_on_training_data: + self.use_sampler = True + if train is not None: + self.train = train + else: + self.use_sampler = False + + self.train_params = { + "dataset": params["dataset"], + "bs": params["bs"], + "num_workers": params["num_workers"], + "pin_memory": params["pin_memory"], + "tokenizer": AutoTokenizer.from_pretrained(params["bert_name"], use_fast=False) if is_text else None, + "max_length": params["max_length"], + } + model = Trainer( net=TorchUniversalModel if not params["model_with_emb"] else params["model"], net_params={ @@ -349,18 +372,11 @@ def _infer_params(self): "torch_model": torch_model, **params, }, - **{"apex": False, **params}, + + **{"apex": False, + **params}, ) - self.train_params = { - "dataset": params["dataset"], - "bs": params["bs"], - "num_workers": params["num_workers"], - "pin_memory": params["pin_memory"], - "tokenizer": AutoTokenizer.from_pretrained(params["bert_name"], use_fast=False) if is_text else None, - "max_length": params["max_length"], - } - return model @staticmethod @@ -553,8 +569,8 @@ def fit_predict(self, train_valid_iterator: TrainValidIterator) -> NumpyDataset: self.params = self.init_params_on_input(train_valid_iterator) self.params = self._init_params_on_input(train_valid_iterator) return super().fit_predict(train_valid_iterator) - - def fit_predict_single_fold(self, train, valid): + + def fit_predict_single_fold(self, train: TabularDataset, valid: TabularDataset): """Implements training and prediction on single fold. Args: @@ -570,14 +586,17 @@ def fit_predict_single_fold(self, train, valid): target = train.target self.params["bias"] = self.get_mean_target(target, task_name) if self.params["init_bias"] else None - model = self._infer_params() + model = self._infer_params(train) model_path = ( os.path.join(self.path_to_save, f"{uuid.uuid4()}.pickle") if self.path_to_save is not None else None ) # init datasets - dataloaders = self.get_dataloaders_from_dicts({"train": train.to_pandas(), "val": valid.to_pandas()}) - + if self.use_sampler: + dataloaders = self.get_dataloaders_from_dicts({"train": train.to_pandas(), "val": valid.to_pandas(),"sampler": train.to_pandas()}) + else: + dataloaders = self.get_dataloaders_from_dicts({"train": train.to_pandas(), "val": valid.to_pandas()}) + dataloaders['sampler'] = None val_pred = model.fit(dataloaders) if model_path is None: @@ -603,12 +622,17 @@ def predict_single_fold(self, model: any, dataset: TabularDataset) -> np.ndarray """ seed_everything(self.params["random_state"], self.params["deterministic"]) - dataloaders = self.get_dataloaders_from_dicts({"test": dataset.to_pandas()}) + if self.use_sampler: + dataloaders = self.get_dataloaders_from_dicts({"test": dataset.to_pandas(),"sampler": self.train.to_pandas()}) + else: + dataloaders = self.get_dataloaders_from_dicts({"test": dataset.to_pandas()}) + dataloaders['sampler'] = None + if isinstance(model, (str, dict)): model = self._infer_params().load_state(model) - pred = model.predict(dataloaders["test"], "test") + pred = model.predict(dataloaders, "test") model.clean() del dataloaders, model diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index d1834506..f2368853 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -8,6 +8,8 @@ import numpy as np import torch import torch.nn as nn + +from .saint.saint import ColTransformer, RowColTransformer from ..tabnet.utils import TabNetEncoder, _initialize_non_glu from .autoint.autoint_utils import AttnInteractionBlock, LeakyGate from .autoint.ghost_norm import GhostBatchNorm @@ -1187,3 +1189,91 @@ def forward(self, x): def forward_masks(self, x): """Magic forward-pass of encoder that returns masks.""" return self.encoder.forward_masks(x) + + +class SAINT(nn.Module): + def __init__( + self, + n_in: int, + n_out: int = 1, + embedding_size: int = 10, + depth: int =2, + heads: int = 8, + dim_head = 16, + mlp_hidden_mults = (4, 2), + ffn_mult = 4, + attn_dropout = 0., + ff_dropout = 0., + mlp_dropout =0., + attentiontype = 'colrow', + device: torch.device = torch.device("cuda:0"), + **kwargs + ): + super().__init__() + self.device = device + self.cls_token = nn.Embedding(2, embedding_size) + self.attentiontype = attentiontype + if attentiontype == 'col': + self.transformer = ColTransformer( + dim = embedding_size, + depth = depth, + heads = heads, + dim_head = dim_head, + attn_dropout = attn_dropout, + ff_dropout = ff_dropout + ) + elif attentiontype in ['row','colrow'] : + self.transformer = RowColTransformer( + dim = embedding_size, + nfeats= n_in+1, #num featurs + depth = depth, + heads = heads, + dim_head = dim_head, + ffn_mult = ffn_mult, + attn_dropout = attn_dropout, + ff_dropout = ff_dropout, + style = attentiontype + ) + + l = (n_in+1) // 8 #input_size = (dim * self.num_categories) + (dim * num_continuous) + hidden_dimensions = list(map(lambda t: l * t, mlp_hidden_mults)) + + self.mlp = MLP(n_in = embedding_size, + n_out = n_out, + hidden_size = hidden_dimensions, + drop_rate=mlp_dropout, + use_bn = False, + dropout_first= False) + # self.embeds = nn.Embedding(self.total_tokens, self.dim) #.to(device) + + + + def forward(self, embedded: torch.Tensor, bs: int) -> torch.Tensor: + """Transform the input tensor. + + Args: + embedded : torch.Tensor + embedded fields + + Returns: + torch.Tensor + + """ + mask = torch.zeros((len(embedded),len(embedded)), device=self.device, dtype=torch.bool) + mask[torch.arange(bs), torch.arange(bs)] = 1 + mask[:bs, bs:] = 1 + mask[bs:, bs:] = 1 + + cls_token = torch.unsqueeze( + self.cls_token(torch.ones(embedded.shape[0], dtype=torch.int).to(self.device)), dim=1 + ) + x = torch.cat((cls_token, embedded), dim=1) + x = self.transformer(x, mask_samples=mask) + + # NOTE modified to simple X -> Y supervised model + + # cat_outs = self.mlp1(x[:,:self.num_categories,:]) + # con_outs = self.mlp2(x[:,self.num_categories:,:]) + # return cat_outs, con_outs + + return self.mlp(x[:,0,:]) diff --git a/lightautoml/ml_algo/torch_based/saint/saint.py b/lightautoml/ml_algo/torch_based/saint/saint.py new file mode 100644 index 00000000..03761aae --- /dev/null +++ b/lightautoml/ml_algo/torch_based/saint/saint.py @@ -0,0 +1,144 @@ + +import numpy as np +import torch +import torch.nn.functional as F +from einops import rearrange +from torch import einsum, nn + +def exists(val): + return val is not None + +def default(val, d): + return val if exists(val) else d + +def ff_encodings(x,B): + x_proj = (2. * np.pi * x.unsqueeze(-1)) @ B.t() + return torch.cat([torch.sin(x_proj), torch.cos(x_proj)], dim=-1) + + +class Residual(nn.Module): + def __init__(self, fn): + super().__init__() + self.fn = fn + + def forward(self, x, **kwargs): + return self.fn(x, **kwargs) + x + +class PreNorm(nn.Module): + def __init__(self, dim, fn): + super().__init__() + self.norm = nn.LayerNorm(dim) + self.fn = fn + + def forward(self, x, **kwargs): + return self.fn(self.norm(x), **kwargs) + +# attention + +class GEGLU(nn.Module): + def forward(self, x): + x, gates = x.chunk(2, dim = -1) + return x * F.gelu(gates) + +class FeedForward(nn.Module): + def __init__(self, dim, mult = 4, dropout = 0.): + super().__init__() + self.net = nn.Sequential( + nn.Linear(dim, int(dim * mult) * 2), + GEGLU(), + nn.Dropout(dropout), + nn.Linear(int(dim * mult), dim) + ) + + def forward(self, x, **kwargs): + return self.net(x, **kwargs) + +class Attention(nn.Module): + def __init__( + self, + dim, + heads = 8, + dim_head = 16, + dropout = 0. + ): + super().__init__() + inner_dim = dim_head * heads + self.heads = heads + self.scale = dim_head ** -0.5 + + self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False) + self.to_out = nn.Linear(inner_dim, dim) + + self.dropout = nn.Dropout(dropout) + + def forward(self, x, mask=None): + h = self.heads + q, k, v = self.to_qkv(x).chunk(3, dim = -1) + q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), (q, k, v)) + sim = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale + if mask is not None: + sim[~mask[None, None].expand_as(sim)] = float('-inf') + attn = sim.softmax(dim = -1) + out = einsum('b h i j, b h j d -> b h i d', attn, v) + out = rearrange(out, 'b h n d -> b n (h d)', h = h) + return self.to_out(out) + + +class RowColTransformer(nn.Module): + def __init__(self, dim, nfeats, depth, heads, dim_head, ffn_mult, attn_dropout, ff_dropout, style='col'): + super().__init__() + self.layers = nn.ModuleList([]) + self.mask_embed = nn.Embedding(nfeats, dim) + self.style = style + for _ in range(depth): + if self.style == 'colrow': + self.layers.append(nn.ModuleList([ + PreNorm(dim, Residual(Attention(dim, heads = heads, dim_head = dim_head, dropout = attn_dropout))), + PreNorm(dim, Residual(FeedForward(dim, mult=ffn_mult, dropout = ff_dropout))), + PreNorm(dim*nfeats, Residual(Attention(dim*nfeats, heads = heads, dim_head = dim_head, dropout = attn_dropout))), + PreNorm(dim*nfeats, Residual(FeedForward(dim*nfeats, mult=ffn_mult, dropout = ff_dropout))), + ])) + else: + self.layers.append(nn.ModuleList([ + PreNorm(dim*nfeats, Residual(Attention(dim*nfeats, heads = heads, dim_head = 64, dropout = attn_dropout))), + PreNorm(dim*nfeats, Residual(FeedForward(dim*nfeats, mult=ffn_mult, dropout = ff_dropout))), + ])) + + def forward(self, x, mask_features=None, mask_samples=None): + + _, n, _ = x.shape + if self.style == 'colrow': + for attn1, ff1, attn2, ff2 in self.layers: # type: ignore[code] + x = attn1(x, mask=mask_features) + x = ff1(x) + x = rearrange(x, 'b n d -> 1 b (n d)') + x = attn2(x, mask=mask_samples) + x = ff2(x) + x = rearrange(x, '1 b (n d) -> b n d', n = n) + else: + for attn1, ff1 in self.layers: # type: ignore[code] + x = rearrange(x, 'b n d -> 1 b (n d)') + x = attn1(x) + x = ff1(x) + x = rearrange(x, '1 b (n d) -> b n d', n = n) + return x + + +# transformer +class ColTransformer(nn.Module): + def __init__(self, dim, depth, heads, dim_head, attn_dropout, ff_dropout): + super().__init__() + self.layers = nn.ModuleList([]) + + + for _ in range(depth): + self.layers.append(nn.ModuleList([ + PreNorm(dim, Residual(Attention(dim, heads = heads, dim_head = dim_head, dropout = attn_dropout))), + PreNorm(dim, Residual(FeedForward(dim, dropout = ff_dropout))), + ])) + + def forward(self, x, mask_features=None, mask_samples=None): + for attn, ff in self.layers: + x = attn(x) + x = ff(x) + return x diff --git a/lightautoml/text/nn_model.py b/lightautoml/text/nn_model.py index 3ad54bb9..7508aead 100644 --- a/lightautoml/text/nn_model.py +++ b/lightautoml/text/nn_model.py @@ -228,8 +228,10 @@ def get_logits(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: output = torch.cat(outputs, dim=1) else: output = outputs[0] - - logits = self.torch_model(output) + if 'batch_size' in inp.keys(): + logits = self.torch_model(output,inp['batch_size']) + else: + logits = self.torch_model(output) return logits def get_preds_from_logits(self, logits: torch.Tensor) -> torch.Tensor: diff --git a/lightautoml/text/trainer.py b/lightautoml/text/trainer.py index 240af7e0..79f60438 100644 --- a/lightautoml/text/trainer.py +++ b/lightautoml/text/trainer.py @@ -3,7 +3,7 @@ import logging from copy import deepcopy -from typing import Any +from typing import Any, Iterable from typing import Callable from typing import Dict from typing import List @@ -290,6 +290,7 @@ def __init__( stop_by_metric: bool = False, clip_grad: bool = False, clip_grad_params: Optional[Dict] = None, + **kwargs ): self.net = net @@ -312,7 +313,7 @@ def __init__( self.stop_by_metric = stop_by_metric self.clip_grad = clip_grad self.clip_grad_params = clip_grad_params if clip_grad_params is not None else {} - + self.dataloader = None self.model = None self.optimizer = None @@ -433,10 +434,16 @@ def fit(self, dataloaders: Dict[str, DataLoader]) -> np.ndarray: for epoch in range(self.n_epochs): self.epoch = epoch # train - train_loss = self.train(dataloaders=dataloaders) + if dataloaders['sampler'] is not None: + train_loss = self.train_with_sampler(dataloaders=dataloaders) + else: + train_loss = self.train(dataloaders=dataloaders) train_log.extend(train_loss) # test - val_loss, val_data, weights = self.test(dataloader=dataloaders["val"]) + if dataloaders['sampler'] is not None: + val_loss, val_data, weights = self.test_with_sampler(dataloader=dataloaders["val"], sampler = dataloaders["sampler"] ) + else: + val_loss, val_data, weights = self.test(dataloader=dataloaders["val"]) if self.stop_by_metric: cond = -1 * self.metric(*val_data, weights) else: @@ -461,14 +468,20 @@ def fit(self, dataloaders: Dict[str, DataLoader]) -> np.ndarray: self.se.set_best_params(self.model) if self.is_snap: - val_loss, val_data, weights = self.test(dataloader=dataloaders["val"], snap=True, stage="val") + if dataloaders['sampler'] is not None: + val_loss, val_data, weights = self.test_with_sampler(dataloader=dataloaders["val"],sampler=dataloaders["sampler"], snap=True, stage="val") + else: + val_loss, val_data, weights = self.test(dataloader=dataloaders["val"], snap=True, stage="val") logger.info3( "Result SE, val loss: {vl}, val metric: {me}".format( me=self.metric(*val_data, weights), vl=np.mean(val_loss) ) ) elif self.se.swa: - val_loss, val_data, weights = self.test(dataloader=dataloaders["val"]) + if dataloaders['sampler'] is not None: + val_loss, val_data, weights = self.test_with_sampler(dataloader=dataloaders["val"], sampler=dataloaders["sampler"]) + else: + val_loss, val_data, weights = self.test(dataloader=dataloaders["val"]) logger.info3( "Early stopping: val loss: {vl}, val metric: {me}".format( me=self.metric(*val_data, weights), vl=np.mean(val_loss) @@ -479,6 +492,75 @@ def fit(self, dataloaders: Dict[str, DataLoader]) -> np.ndarray: return val_data[1] + + def train_with_sampler(self, dataloaders: Dict[str, DataLoader]) -> List[float]: + """Training loop. + + Args: + dataloaders: Dict with torch dataloaders. + + Returns: + Loss. + + """ + ################## + loss_log = [] + self.model.train() + running_loss = 0 + c = 0 + + logging_level = get_stdout_level() + if logging_level < logging.INFO and self.verbose and self.verbose_bar: + loader = tqdm(zip(dataloaders["train"],dataloaders['sampler']), desc="train", disable=False) + else: + loader = zip(dataloaders["train"],dataloaders['sampler']) + for sample, candidate_sample in loader: + data = { + i: torch.cat([(sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device)), + (candidate_sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else candidate_sample[i].to(self.device))]) + for i in sample.keys() + } + ### NOTE, HERE WE CAN ADD TORCH.UNIQUE + data['batch_size'] = len(sample['label']) + + loss = self.model(data).mean() + if self.apex: + with self.amp.scale_loss(loss, self.optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + + if self.clip_grad: + torch.nn.utils.clip_grad_norm_(self.model.parameters(), **self.clip_grad_params) + self.optimizer.step() + self.optimizer.zero_grad() + + loss = loss.data.cpu().numpy() + loss_log.append(loss) + running_loss += loss + + c += 1 + if self.verbose and self.verbose_bar and logging_level < logging.INFO: + if self.verbose_inside and c % self.verbose_inside == 0: + val_loss, val_data, weights = self.test_with_sampler(dataloader=dataloaders["val"],sampler=dataloaders['sampler']) + if self.stop_by_metric: + cond = -1 * self.metric(*val_data, weights) + else: + cond = np.mean(val_loss) + self.se.update(self.model, cond) + + logger.info3( + "Epoch: {e}, iter: {c}, val loss: {vl}, val metric: {me}".format( + me=self.metric(*val_data, weights), + e=self.epoch, + c=c, + vl=np.mean(val_loss), + ) + ) + loader.set_description("train (loss=%g)" % (running_loss / c)) + + return loss_log + def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: """Training loop. @@ -489,6 +571,7 @@ def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: Loss. """ + ################## loss_log = [] self.model.train() running_loss = 0 @@ -558,6 +641,7 @@ def test( Loss, (Target, OOF). """ + ##################### loss_log = [] weights_log = [] self.model.eval() @@ -609,7 +693,75 @@ def test( np.array(weights_log), ) - def predict(self, dataloader: DataLoader, stage: str) -> np.ndarray: + def test_with_sampler( + self, dataloader: DataLoader, sampler: DataLoader,stage: str = "val", snap: bool = False + ) -> Tuple[List[float], Tuple[np.ndarray, np.ndarray]]: + """Testing loop. + + Args: + dataloader: Torch dataloader. + stage: Train, val or test. + snap: Use snapshots. + + Returns: + Loss, (Target, OOF). + + """ + ##################### + loss_log = [] + weights_log = [] + self.model.eval() + pred = [] + target = [] + logging_level = get_stdout_level() + if logging_level < logging.INFO and self.verbose and self.verbose_bar: + loader = tqdm(zip(dataloader,sampler), desc=stage, disable=False) + else: + loader = zip(dataloader,sampler) + + with torch.no_grad(): + for sample, candidate_sample in loader: + data = { + i: torch.cat([(sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device)), + (candidate_sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else candidate_sample[i].to(self.device))]) + for i in sample.keys() + } + ### NOTE, HERE WE CAN ADD TORCH.UNIQUE + data['batch_size'] = len(sample['label']) + + if snap: + output = self.se.predict(data) + loss = self.se.forward(data) if stage != "test" else None + else: + output = self.model.predict(data) + loss = self.model(data) if stage != "test" else None + + if stage != "test": + loss = loss.mean().data.cpu().numpy() + + loss_log.append(loss) + + output = output.data.cpu().numpy()[:len(sample['label'])] + target_data = data["label"].data.cpu().numpy()[:len(sample['label'])] + weights = data.get("weight", None) + if weights is not None: + weights = weights.data.cpu().numpy()[:len(sample['label'])] + + pred.append(output) + target.append(target_data) + weights_log.extend(weights) + + self.model.train() + + return ( + loss_log, + ( + np.vstack(target) if len(target[0].shape) == 2 else np.hstack(target), + np.vstack(pred) if len(pred[0].shape) == 2 else np.hstack(pred), + ), + np.array(weights_log), + ) + def predict(self, dataloaders: DataLoader, stage: str) -> np.ndarray: """Predict model. Args: @@ -620,5 +772,8 @@ def predict(self, dataloader: DataLoader, stage: str) -> np.ndarray: Prediction. """ - loss, (target, pred), _ = self.test(stage=stage, snap=self.is_snap, dataloader=dataloader) + if dataloaders['sampler'] is not None: + loss, (target, pred), _ = self.test_with_sampler(stage=stage, snap=self.is_snap, dataloader=dataloaders[stage],sampler=dataloaders['sampler']) + else: + loss, (target, pred), _ = self.test(stage=stage, snap=self.is_snap, dataloader=dataloaders[stage]) return pred diff --git a/lightautoml/text/utils.py b/lightautoml/text/utils.py index 18a8fe70..fe91f806 100644 --- a/lightautoml/text/utils.py +++ b/lightautoml/text/utils.py @@ -66,7 +66,7 @@ def is_shuffle(stage: str) -> bool: Bool value. """ - is_sh = {"train": True, "val": False, "test": False} + is_sh = {"train": True, "val": False, "test": False, "sampler": True} return is_sh[stage] From 799ded248d417d67426c438eb1f9544c1d577f6e Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Mon, 2 Oct 2023 10:21:02 +0000 Subject: [PATCH 42/49] no-verify --- lightautoml/ml_algo/dl_model.py | 4 ++++ lightautoml/text/embed.py | 13 ++++++++++++- lightautoml/text/trainer.py | 5 +++-- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index 38b1f521..bde69ff2 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -52,6 +52,8 @@ DenseEmbeddingFlat, LinearEmbedding, LinearEmbeddingFlat, + MLPContEmbedding, + MLPContEmbeddingFlat, PLREmbedding, PLREmbeddingFlat, SoftEmbedding, @@ -134,6 +136,7 @@ "dense": DenseEmbeddingFlat, "plr": PLREmbeddingFlat, "soft": SoftEmbeddingFlat, + "mlp": MLPContEmbeddingFlat } cont_embedder_by_name = { "cont": LinearEmbedding, @@ -141,6 +144,7 @@ "dense": DenseEmbedding, "plr": PLREmbedding, "soft": SoftEmbedding, + "mlp": MLPContEmbedding, } diff --git a/lightautoml/text/embed.py b/lightautoml/text/embed.py index fa0ea834..b4974d3d 100644 --- a/lightautoml/text/embed.py +++ b/lightautoml/text/embed.py @@ -759,7 +759,18 @@ def forward(self, X: Dict) -> Tensor: """ x = X["cont"] - x = torch.stack([l(x[:, i]) for i, l in enumerate(self.layers)], 1) + # ans = [] + # for i, l in enumerate(self.layers): + # temp = x[:,i].view(x.size(0),-1) + # temp = l(temp) + # x = torch.stack(ans,1) + x = torch.stack([l(x[:, i].view(-1,1)) for i, l in enumerate(self.layers)], 1) if self.flatten_output: return x.view(x.shape[0], -1) return x + +class MLPContEmbeddingFlat(MLPContEmbedding): + """Flatten version of BasicCatEmbedding.""" + + def __init__(self, *args, **kwargs): + super(MLPContEmbeddingFlat, self).__init__(*args, **{**kwargs, **{"flatten_output": True}}) \ No newline at end of file diff --git a/lightautoml/text/trainer.py b/lightautoml/text/trainer.py index 79f60438..1825a54a 100644 --- a/lightautoml/text/trainer.py +++ b/lightautoml/text/trainer.py @@ -1,5 +1,6 @@ """Main pytorch training and prediction class with Snapshots Ensemble.""" +from itertools import cycle import logging from copy import deepcopy @@ -715,9 +716,9 @@ def test_with_sampler( target = [] logging_level = get_stdout_level() if logging_level < logging.INFO and self.verbose and self.verbose_bar: - loader = tqdm(zip(dataloader,sampler), desc=stage, disable=False) + loader = tqdm(zip(dataloader,cycle(sampler)), desc=stage, disable=False) else: - loader = zip(dataloader,sampler) + loader = zip(dataloader,cycle(sampler)) with torch.no_grad(): for sample, candidate_sample in loader: From 57aa2a5e0c1bb0ddd7d1bbf886c74e0ce26cfaef Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Mon, 2 Oct 2023 10:24:14 +0000 Subject: [PATCH 43/49] no-verify --- lightautoml/ml_algo/dl_model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index bde69ff2..483bcf84 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -1,7 +1,6 @@ """Neural net for tabular datasets.""" -from itertools import cycle from lightautoml.utils.installation import __validate_extra_deps From ef7316b8e3ca76e0cc758b1bbd5779f8559506c7 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Wed, 4 Oct 2023 09:33:04 +0000 Subject: [PATCH 44/49] starting changing --- lightautoml/ml_algo/torch_based/nn_models.py | 2 +- .../ml_algo/{ => torch_based}/tabnet/utils.py | 0 lightautoml/text/trainer.py | 293 +++++++++--------- 3 files changed, 146 insertions(+), 149 deletions(-) rename lightautoml/ml_algo/{ => torch_based}/tabnet/utils.py (100%) diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index f2368853..de698791 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -10,7 +10,7 @@ import torch.nn as nn from .saint.saint import ColTransformer, RowColTransformer -from ..tabnet.utils import TabNetEncoder, _initialize_non_glu +from .tabnet.utils import TabNetEncoder, _initialize_non_glu from .autoint.autoint_utils import AttnInteractionBlock, LeakyGate from .autoint.ghost_norm import GhostBatchNorm from .fttransformer.fttransformer_utils import Transformer diff --git a/lightautoml/ml_algo/tabnet/utils.py b/lightautoml/ml_algo/torch_based/tabnet/utils.py similarity index 100% rename from lightautoml/ml_algo/tabnet/utils.py rename to lightautoml/ml_algo/torch_based/tabnet/utils.py diff --git a/lightautoml/text/trainer.py b/lightautoml/text/trainer.py index 1825a54a..a68d13cc 100644 --- a/lightautoml/text/trainer.py +++ b/lightautoml/text/trainer.py @@ -494,74 +494,6 @@ def fit(self, dataloaders: Dict[str, DataLoader]) -> np.ndarray: return val_data[1] - def train_with_sampler(self, dataloaders: Dict[str, DataLoader]) -> List[float]: - """Training loop. - - Args: - dataloaders: Dict with torch dataloaders. - - Returns: - Loss. - - """ - ################## - loss_log = [] - self.model.train() - running_loss = 0 - c = 0 - - logging_level = get_stdout_level() - if logging_level < logging.INFO and self.verbose and self.verbose_bar: - loader = tqdm(zip(dataloaders["train"],dataloaders['sampler']), desc="train", disable=False) - else: - loader = zip(dataloaders["train"],dataloaders['sampler']) - for sample, candidate_sample in loader: - data = { - i: torch.cat([(sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device)), - (candidate_sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else candidate_sample[i].to(self.device))]) - for i in sample.keys() - } - ### NOTE, HERE WE CAN ADD TORCH.UNIQUE - data['batch_size'] = len(sample['label']) - - loss = self.model(data).mean() - if self.apex: - with self.amp.scale_loss(loss, self.optimizer) as scaled_loss: - scaled_loss.backward() - else: - loss.backward() - - if self.clip_grad: - torch.nn.utils.clip_grad_norm_(self.model.parameters(), **self.clip_grad_params) - self.optimizer.step() - self.optimizer.zero_grad() - - loss = loss.data.cpu().numpy() - loss_log.append(loss) - running_loss += loss - - c += 1 - if self.verbose and self.verbose_bar and logging_level < logging.INFO: - if self.verbose_inside and c % self.verbose_inside == 0: - val_loss, val_data, weights = self.test_with_sampler(dataloader=dataloaders["val"],sampler=dataloaders['sampler']) - if self.stop_by_metric: - cond = -1 * self.metric(*val_data, weights) - else: - cond = np.mean(val_loss) - self.se.update(self.model, cond) - - logger.info3( - "Epoch: {e}, iter: {c}, val loss: {vl}, val metric: {me}".format( - me=self.metric(*val_data, weights), - e=self.epoch, - c=c, - vl=np.mean(val_loss), - ) - ) - loader.set_description("train (loss=%g)" % (running_loss / c)) - - return loss_log - def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: """Training loop. @@ -583,16 +515,18 @@ def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: loader = tqdm(dataloaders["train"], desc="train", disable=False) else: loader = dataloaders["train"] - for sample in loader: data = { i: (sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device)) for i in sample.keys() } + data['batch_size'] = len(sample['label']) + if dataloaders['sampler'] is not None: + data['sampler'] = dataloaders['sampler'] loss = self.model(data).mean() if self.apex: - with self.amp.scale_loss(loss, self.optimizer) as scaled_loss: + with self.amp.scale_loss(loss, self .optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() @@ -609,7 +543,7 @@ def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: c += 1 if self.verbose and self.verbose_bar and logging_level < logging.INFO: if self.verbose_inside and c % self.verbose_inside == 0: - val_loss, val_data, weights = self.test(dataloader=dataloaders["val"]) + val_loss, val_data, weights = self.test(dataloader=dataloaders) if self.stop_by_metric: cond = -1 * self.metric(*val_data, weights) else: @@ -628,74 +562,140 @@ def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: return loss_log - def test( - self, dataloader: DataLoader, stage: str = "val", snap: bool = False - ) -> Tuple[List[float], Tuple[np.ndarray, np.ndarray]]: - """Testing loop. - - Args: - dataloader: Torch dataloader. - stage: Train, val or test. - snap: Use snapshots. - - Returns: - Loss, (Target, OOF). - - """ - ##################### - loss_log = [] - weights_log = [] - self.model.eval() - pred = [] - target = [] - logging_level = get_stdout_level() - if logging_level < logging.INFO and self.verbose and self.verbose_bar: - loader = tqdm(dataloader, desc=stage, disable=False) - else: - loader = dataloader - - with torch.no_grad(): - for sample in loader: - data = { - i: (sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device)) - for i in sample.keys() - } - - if snap: - output = self.se.predict(data) - loss = self.se.forward(data) if stage != "test" else None - else: - output = self.model.predict(data) - loss = self.model(data) if stage != "test" else None - - if stage != "test": - loss = loss.mean().data.cpu().numpy() - - loss_log.append(loss) - - output = output.data.cpu().numpy() - target_data = data["label"].data.cpu().numpy() - weights = data.get("weight", None) - if weights is not None: - weights = weights.data.cpu().numpy() - - pred.append(output) - target.append(target_data) - weights_log.extend(weights) - - self.model.train() + # def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: + # """Training loop. + + # Args: + # dataloaders: Dict with torch dataloaders. + + # Returns: + # Loss. + + # """ + # ################## + # loss_log = [] + # self.model.train() + # running_loss = 0 + # c = 0 + + # logging_level = get_stdout_level() + # if logging_level < logging.INFO and self.verbose and self.verbose_bar: + # loader = tqdm(dataloaders["train"], desc="train", disable=False) + # else: + # loader = dataloaders["train"] + + # for sample in loader: + # data = { + # i: (sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device)) + # for i in sample.keys() + # } + + # loss = self.model(data).mean() + # if self.apex: + # with self.amp.scale_loss(loss, self.optimizer) as scaled_loss: + # scaled_loss.backward() + # else: + # loss.backward() + + # if self.clip_grad: + # torch.nn.utils.clip_grad_norm_(self.model.parameters(), **self.clip_grad_params) + # self.optimizer.step() + # self.optimizer.zero_grad() + + # loss = loss.data.cpu().numpy() + # loss_log.append(loss) + # running_loss += loss + + # c += 1 + # if self.verbose and self.verbose_bar and logging_level < logging.INFO: + # if self.verbose_inside and c % self.verbose_inside == 0: + # val_loss, val_data, weights = self.test(dataloader=dataloaders["val"]) + # if self.stop_by_metric: + # cond = -1 * self.metric(*val_data, weights) + # else: + # cond = np.mean(val_loss) + # self.se.update(self.model, cond) + + # logger.info3( + # "Epoch: {e}, iter: {c}, val loss: {vl}, val metric: {me}".format( + # me=self.metric(*val_data, weights), + # e=self.epoch, + # c=c, + # vl=np.mean(val_loss), + # ) + # ) + # loader.set_description("train (loss=%g)" % (running_loss / c)) + + # return loss_log + + # def test( + # self, dataloader: DataLoader, stage: str = "val", snap: bool = False + # ) -> Tuple[List[float], Tuple[np.ndarray, np.ndarray]]: + # """Testing loop. + + # Args: + # dataloader: Torch dataloader. + # stage: Train, val or test. + # snap: Use snapshots. + + # Returns: + # Loss, (Target, OOF). + + # """ + # ##################### + # loss_log = [] + # weights_log = [] + # self.model.eval() + # pred = [] + # target = [] + # logging_level = get_stdout_level() + # if logging_level < logging.INFO and self.verbose and self.verbose_bar: + # loader = tqdm(dataloader, desc=stage, disable=False) + # else: + # loader = dataloader + + # with torch.no_grad(): + # for sample in loader: + # data = { + # i: (sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device)) + # for i in sample.keys() + # } + + # if snap: + # output = self.se.predict(data) + # loss = self.se.forward(data) if stage != "test" else None + # else: + # output = self.model.predict(data) + # loss = self.model(data) if stage != "test" else None + + # if stage != "test": + # loss = loss.mean().data.cpu().numpy() + + # loss_log.append(loss) + + # output = output.data.cpu().numpy() + # target_data = data["label"].data.cpu().numpy() + # weights = data.get("weight", None) + # if weights is not None: + # weights = weights.data.cpu().numpy() + + # pred.append(output) + # target.append(target_data) + # weights_log.extend(weights) + + # self.model.train() + + # return ( + # loss_log, + # ( + # np.vstack(target) if len(target[0].shape) == 2 else np.hstack(target), + # np.vstack(pred) if len(pred[0].shape) == 2 else np.hstack(pred), + # ), + # np.array(weights_log), + # ) - return ( - loss_log, - ( - np.vstack(target) if len(target[0].shape) == 2 else np.hstack(target), - np.vstack(pred) if len(pred[0].shape) == 2 else np.hstack(pred), - ), - np.array(weights_log), - ) - - def test_with_sampler( - self, dataloader: DataLoader, sampler: DataLoader,stage: str = "val", snap: bool = False + def test( + self, dataloaders: DataLoader,stage: str = "val", snap: bool = False ) -> Tuple[List[float], Tuple[np.ndarray, np.ndarray]]: """Testing loop. @@ -716,20 +716,20 @@ def test_with_sampler( target = [] logging_level = get_stdout_level() if logging_level < logging.INFO and self.verbose and self.verbose_bar: - loader = tqdm(zip(dataloader,cycle(sampler)), desc=stage, disable=False) + loader = tqdm(dataloaders[stage], desc=stage, disable=False) else: - loader = zip(dataloader,cycle(sampler)) + loader = dataloaders[stage] with torch.no_grad(): for sample, candidate_sample in loader: data = { - i: torch.cat([(sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device)), - (candidate_sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else candidate_sample[i].to(self.device))]) + i: sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device) for i in sample.keys() } ### NOTE, HERE WE CAN ADD TORCH.UNIQUE data['batch_size'] = len(sample['label']) - + if dataloaders['sampler'] is not None: + data['sampler'] = dataloaders['sampler'] if snap: output = self.se.predict(data) loss = self.se.forward(data) if stage != "test" else None @@ -745,7 +745,7 @@ def test_with_sampler( output = output.data.cpu().numpy()[:len(sample['label'])] target_data = data["label"].data.cpu().numpy()[:len(sample['label'])] weights = data.get("weight", None) - if weights is not None: + if weights is not None: weights = weights.data.cpu().numpy()[:len(sample['label'])] pred.append(output) @@ -773,8 +773,5 @@ def predict(self, dataloaders: DataLoader, stage: str) -> np.ndarray: Prediction. """ - if dataloaders['sampler'] is not None: - loss, (target, pred), _ = self.test_with_sampler(stage=stage, snap=self.is_snap, dataloader=dataloaders[stage],sampler=dataloaders['sampler']) - else: - loss, (target, pred), _ = self.test(stage=stage, snap=self.is_snap, dataloader=dataloaders[stage]) + loss, (target, pred), _ = self.test(stage=stage, snap=self.is_snap, dataloader=dataloaders) return pred From 50962ddbcaa8ed72d033700e590fe5652d1061b3 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Thu, 12 Oct 2023 15:09:28 +0000 Subject: [PATCH 45/49] more changes --- lightautoml/dataset/base.py | 48 ++++++++ lightautoml/dataset/np_pd_dataset.py | 36 ++++++ lightautoml/dataset/utils.py | 115 ++++++++++++++++++ lightautoml/ml_algo/base.py | 6 +- lightautoml/ml_algo/dl_model.py | 17 ++- .../ml_algo/torch_based/tabnet/utils.py | 4 +- lightautoml/text/nn_model.py | 40 ++++-- lightautoml/text/trainer.py | 63 ++++++---- lightautoml/text/utils.py | 1 + 9 files changed, 285 insertions(+), 45 deletions(-) diff --git a/lightautoml/dataset/base.py b/lightautoml/dataset/base.py index a033e7db..5a107f19 100644 --- a/lightautoml/dataset/base.py +++ b/lightautoml/dataset/base.py @@ -365,6 +365,19 @@ def shape(self) -> Tuple[Optional[int], Optional[int]]: return rows, cols # static methods - how to make 1d slice, 2s slice, concat of feature matrix etc ... + @staticmethod + def _vstack(datasets: Sequence[Any]) -> Any: + """Abstract method - define horizontal stack of feature arrays. + + Args: + datasets: Sequence of feature arrays. + + Returns: # noqa DAR202 + Single feature array. + + """ + raise NotImplementedError("Horizontal Stack not implemented.") + @staticmethod def _hstack(datasets: Sequence[Any]) -> Any: """Abstract method - define horizontal stack of feature arrays. @@ -472,7 +485,42 @@ def concat(cls, datasets: Sequence["LAMLDataset"]) -> "LAMLDataset": dataset.set_data(data, features, roles) return dataset + @classmethod + def vconcat(cls, datasets: Sequence["LAMLDataset"]) -> "LAMLDataset": + """Concat multiple dataset. + + Default behavior - takes empty dataset from datasets[0] + and concat all features from others. + + Args: + datasets: Sequence of datasets. + + Returns: + Concated dataset. + + """ + for check in cls._concat_checks: + check(datasets) + + dataset = datasets[0].empty() + data = [] + features = [*datasets[0].features] + roles = {**datasets[0].roles} + atrs = set(dataset._array_like_attrs) + for ds in datasets: + data.append(ds.data) + for atr in ds._array_like_attrs: + if atr not in atrs: + dataset._array_like_attrs.append(atr) + dataset.__dict__[atr] = ds.__dict__[atr] + atrs.update({atr}) + + data = cls._vstack(data) + dataset.set_data(data, features, roles) + + return dataset + def drop_features(self, droplist: Sequence[str]): """Inplace drop columns from dataset. diff --git a/lightautoml/dataset/np_pd_dataset.py b/lightautoml/dataset/np_pd_dataset.py index 3ec8789c..bffc37c4 100644 --- a/lightautoml/dataset/np_pd_dataset.py +++ b/lightautoml/dataset/np_pd_dataset.py @@ -212,6 +212,18 @@ def _hstack(datasets: Sequence[np.ndarray]) -> np.ndarray: """ return np.hstack(datasets) + @staticmethod + def _vstack(datasets: Sequence[np.ndarray]) -> np.ndarray: + """Concatenate function for numpy arrays. + + Args: + datasets: Sequence of np.ndarray. + + Returns: + Stacked features array. + + """ + return np.vstack(datasets) @staticmethod def _get_rows(data: np.ndarray, k: IntIdx) -> np.ndarray: @@ -400,6 +412,17 @@ def _hstack(datasets: Sequence[Union[sparse.csr_matrix, np.ndarray]]) -> sparse. """ return sparse.hstack(datasets, format="csr") + def _vstack(datasets: Sequence[Union[sparse.csr_matrix, np.ndarray]]) -> sparse.csr_matrix: + """Concatenate function for sparse and numpy arrays. + + Args: + datasets: Sequence of csr_matrix or np.ndarray. + + Returns: + Sparse matrix. + + """ + return sparse.vstack(datasets, format="csr") def __init__( self, @@ -609,6 +632,19 @@ def _hstack(datasets: Sequence[DataFrame]) -> DataFrame: """ return pd.concat(datasets, axis=1) + + @staticmethod + def _vstack(datasets: Sequence[DataFrame]) -> DataFrame: + """Define how to concat features arrays. + + Args: + datasets: Sequence of tables. + + Returns: + concatenated table. + + """ + return pd.concat(datasets, axis=0) @staticmethod def _get_rows(data: DataFrame, k: IntIdx) -> FrameOrSeries: diff --git a/lightautoml/dataset/utils.py b/lightautoml/dataset/utils.py index 5f3410e5..158e9fa0 100644 --- a/lightautoml/dataset/utils.py +++ b/lightautoml/dataset/utils.py @@ -158,3 +158,118 @@ def concatenate(datasets: Sequence[LAMLDataset]) -> LAMLDataset: datasets = [datasets[n]] + [x for (y, x) in enumerate(datasets) if n != y] return conc(datasets) + + + +def get_common_vconcat( + datasets: Sequence[LAMLDataset], +) -> Tuple[Callable, Optional[type]]: + """Get concatenation function for datasets of different types. + + Takes multiple datasets as input and check, + if is's ok to concatenate it and return function. + + Args: + datasets: Sequence of datasets. + + Returns: + Function, that is able to concatenate datasets. + + """ + # TODO: Add pandas + numpy via transforming to numpy? + dataset_types = set([type(x) for x in datasets]) + + # general - if single type, concatenation for that type + if len(dataset_types) == 1: + klass = list(dataset_types)[0] + return klass.vconcat, None + + # np and sparse goes to sparse + elif dataset_types == {NumpyDataset, CSRSparseDataset}: + return CSRSparseDataset.vconcat, CSRSparseDataset + + elif dataset_types == {NumpyDataset, PandasDataset}: + return numpy_and_pandas_vconcat, None + + elif (dataset_types == {NumpyDataset, SeqNumpyPandasDataset}) or ( + dataset_types == {PandasDataset, SeqNumpyPandasDataset} + ): + return numpy_or_pandas_and_seq_vconcat, None + + raise TypeError("Unable to concatenate dataset types {0}".format(list(dataset_types))) + + +def numpy_and_pandas_vconcat(datasets: Sequence[Union[NumpyDataset, PandasDataset]]) -> PandasDataset: + """Concat of numpy and pandas dataset. + + Args: + datasets: Sequence of datasets to concatenate. + + Returns: + Concatenated dataset. + + """ + datasets = [x.to_pandas() for x in datasets] + + return PandasDataset.vconcat(datasets) + + +def numpy_or_pandas_and_seq_vconcat( + datasets: Sequence[Union[NumpyDataset, PandasDataset, SeqNumpyPandasDataset]] +) -> Union[NumpyDataset, PandasDataset]: + """Concat plain and sequential dataset. + + If both datasets have same size then concat them as plain, otherwise include seq dataset inside plain one. + + Args: + datasets: one plain and one seq dataset. + + Returns: + Concatenated dataset. + + """ + assert len(datasets) == 2, "should be 1 sequential and 1 plain dataset" + # get 1 numpy / pandas dataset + for n, dataset in enumerate(datasets): + if type(dataset) == SeqNumpyPandasDataset: + seq_dataset = dataset + else: + plain_dataset = dataset + + if len(seq_dataset.data) == len(plain_dataset): + return SeqNumpyPandasDataset.vconcat([seq_dataset, plain_dataset.to_pandas()]) + else: + if hasattr(plain_dataset, "seq_data"): + plain_dataset.seq_data[seq_dataset.name] = seq_dataset + else: + plain_dataset.seq_data = {seq_dataset.name: seq_dataset} + + return plain_dataset + + +def vconcatenate(datasets: Sequence[LAMLDataset]) -> LAMLDataset: + """Dataset concatenation function. + + Check if datasets have common concat function and then apply. + Assume to take target/folds/weights etc from first one. + + Args: + datasets: Sequence of datasets. + + Returns: + Dataset with concatenated features. + + """ + conc, klass = get_common_vconcat([ds for ds in datasets if ds is not None]) + + # this part is made to avoid setting first dataset of required type + if klass is not None: + + n = 0 + for n, ds in enumerate(datasets): + if type(ds) is klass: + break + + datasets = [datasets[n]] + [x for (y, x) in enumerate(datasets) if n != y] + + return conc(datasets) \ No newline at end of file diff --git a/lightautoml/ml_algo/base.py b/lightautoml/ml_algo/base.py index 0dec5aba..74c3c6da 100755 --- a/lightautoml/ml_algo/base.py +++ b/lightautoml/ml_algo/base.py @@ -16,7 +16,7 @@ import numpy as np -from lightautoml.validation.base import TrainValidIterator +from lightautoml.validation.base import HoldoutIterator, TrainValidIterator from ..dataset.base import LAMLDataset from ..dataset.np_pd_dataset import CSRSparseDataset @@ -271,8 +271,8 @@ def fit_predict(self, train_valid_iterator: TrainValidIterator) -> NumpyDataset: "===== Start working with \x1b[1mfold {}\x1b[0m for \x1b[1m{}\x1b[0m =====".format(n, self._name) ) self.timer.set_control_point() - - model, pred = self.fit_predict_single_fold(train, valid) + self.params['is_holdout'] = isinstance(train_valid_iterator,HoldoutIterator) + model, pred = self.fit_predict_single_fold(train, valid, 0) self.models.append(model) preds_arr[idx] += pred.reshape((pred.shape[0], -1)) counter_arr[idx] += 1 diff --git a/lightautoml/ml_algo/dl_model.py b/lightautoml/ml_algo/dl_model.py index 483bcf84..118b564a 100644 --- a/lightautoml/ml_algo/dl_model.py +++ b/lightautoml/ml_algo/dl_model.py @@ -1,6 +1,8 @@ """Neural net for tabular datasets.""" +from lightautoml.dataset.base import LAMLDataset +from lightautoml.dataset.utils import vconcatenate from lightautoml.utils.installation import __validate_extra_deps @@ -506,7 +508,7 @@ def init_params_on_input(self, train_valid_iterator) -> dict: ) return suggested_params - def get_dataloaders_from_dicts(self, data_dict: Dict): + def get_dataloaders_from_dicts(self, data_dict: Dict, n : int =0): """Construct dataloaders depending on stage. Args: @@ -532,6 +534,7 @@ def get_dataloaders_from_dicts(self, data_dict: Dict): } datasets[stage] = self.train_params["dataset"]( + fold = n, data=data, y=value.target.values if stage != "test" else np.ones(len(value.data)), w=value.weights.values if value.weights is not None else np.ones(len(value.data)), @@ -573,7 +576,7 @@ def fit_predict(self, train_valid_iterator: TrainValidIterator) -> NumpyDataset: self.params = self._init_params_on_input(train_valid_iterator) return super().fit_predict(train_valid_iterator) - def fit_predict_single_fold(self, train: TabularDataset, valid: TabularDataset): + def fit_predict_single_fold(self, train: TabularDataset, valid: TabularDataset, n=0): """Implements training and prediction on single fold. Args: @@ -589,16 +592,20 @@ def fit_predict_single_fold(self, train: TabularDataset, valid: TabularDataset): target = train.target self.params["bias"] = self.get_mean_target(target, task_name) if self.params["init_bias"] else None - model = self._infer_params(train) + if self.params['is_holdout']: + ds = train + else: + ds = vconcatenate([train,valid]) + model = self._infer_params(ds) model_path = ( os.path.join(self.path_to_save, f"{uuid.uuid4()}.pickle") if self.path_to_save is not None else None ) # init datasets if self.use_sampler: - dataloaders = self.get_dataloaders_from_dicts({"train": train.to_pandas(), "val": valid.to_pandas(),"sampler": train.to_pandas()}) + dataloaders = self.get_dataloaders_from_dicts({"train": train.to_pandas(), "val": valid.to_pandas(),"sampler": train.to_pandas()},n) else: - dataloaders = self.get_dataloaders_from_dicts({"train": train.to_pandas(), "val": valid.to_pandas()}) + dataloaders = self.get_dataloaders_from_dicts({"train": train.to_pandas(), "val": valid.to_pandas()},n) dataloaders['sampler'] = None val_pred = model.fit(dataloaders) diff --git a/lightautoml/ml_algo/torch_based/tabnet/utils.py b/lightautoml/ml_algo/torch_based/tabnet/utils.py index 8530be5d..dc2f9d75 100644 --- a/lightautoml/ml_algo/torch_based/tabnet/utils.py +++ b/lightautoml/ml_algo/torch_based/tabnet/utils.py @@ -2,8 +2,8 @@ import torch import numpy as np import torch.nn as nn -from ..torch_based.node_nn_model import Entmax15, Sparsemax -from ..torch_based.autoint.ghost_norm import GhostBatchNorm +from ..node_nn_model import Entmax15, Sparsemax +from ..autoint.ghost_norm import GhostBatchNorm def _initialize_non_glu(module, input_dim, output_dim): diff --git a/lightautoml/text/nn_model.py b/lightautoml/text/nn_model.py index 7508aead..131901d1 100644 --- a/lightautoml/text/nn_model.py +++ b/lightautoml/text/nn_model.py @@ -12,7 +12,7 @@ import torch.nn as nn from ..tasks.base import Task - +from .utils import _dtypes_mapping logger = logging.getLogger(__name__) @@ -31,6 +31,7 @@ class UniversalDataset: def __init__( self, + fold: int, data: Dict[str, np.ndarray], y: np.ndarray, w: Optional[np.ndarray] = None, @@ -38,6 +39,7 @@ def __init__( max_length: int = 256, stage: str = "test", ): + self.fold = fold self.data = data self.y = y self.w = w @@ -49,7 +51,7 @@ def __len__(self) -> int: return len(self.y) def __getitem__(self, index: int) -> Dict[str, np.ndarray]: - res = {"label": self.y[index]} + res = {"fold":self.fold ,"label": self.y[index]} res.update({key: value[index] for key, value in self.data.items() if key != "text"}) if (self.tokenizer is not None) and ("text" in self.data): sent = self.data["text"][index, 0] # only one column @@ -85,7 +87,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: """Forward-pass.""" x = torch.clamp(x, self.min_v, self.max_v) return x - + class TorchUniversalModel(nn.Module): """Mixed data model. @@ -133,6 +135,7 @@ def __init__( self.cont_embedder = None self.cat_embedder = None self.text_embedder = None + self.sampler = None n_in = 0 if cont_embedder_ is not None: @@ -212,9 +215,12 @@ def _init_last_layers(self, torch_model, bias, use_skip=False): except: logger.info3("Last linear layer not founded, so init_bias=False") - def get_logits(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: + + def get_logits(self, inp: Dict[str, torch.Tensor],efficient_bs:int = None) -> torch.Tensor: """Forward-pass of model with embeddings.""" outputs = [] + + if self.cont_embedder is not None: outputs.append(self.cont_embedder(inp)) @@ -228,8 +234,8 @@ def get_logits(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: output = torch.cat(outputs, dim=1) else: output = outputs[0] - if 'batch_size' in inp.keys(): - logits = self.torch_model(output,inp['batch_size']) + if efficient_bs is not None: + logits = self.torch_model(output,efficient_bs) else: logits = self.torch_model(output) return logits @@ -248,7 +254,16 @@ def get_preds_from_logits(self, logits: torch.Tensor) -> torch.Tensor: def forward(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: """Forward-pass with output loss.""" - x = self.get_logits(inp) + efficient_bs = None + if inp['sampler'] is not None: + efficient_bs = len(inp['label']) + candidate_sample = next(inp['sampler']) + inp = { + i: torch.cat([inp[i], + (candidate_sample[i].long().to(self.torch_model.device) if _dtypes_mapping[i] == "long" else candidate_sample[i].to(self.torch_model.device))]) + for i in set(inp.keys())-set(['sampler']) + } + x = self.get_logits(inp,efficient_bs) if not self.loss_on_logits: x = self.get_preds_from_logits(x) @@ -257,6 +272,15 @@ def forward(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: def predict(self, inp: Dict[str, torch.Tensor]) -> torch.Tensor: """Prediction.""" - x = self.get_logits(inp) + efficient_bs = None + if inp['sampler'] is not None: + efficient_bs = len(inp['label']) + candidate_sample = next(inp['sampler']) + inp = { + i: torch.cat([inp[i], + (candidate_sample[i].long().to(self.torch_model.device) if _dtypes_mapping[i] == "long" else candidate_sample[i].to(self.torch_model.device))]) + for i in set(inp.keys())-set(['sampler']) + } + x = self.get_logits(inp,efficient_bs) x = self.get_preds_from_logits(x) return x diff --git a/lightautoml/text/trainer.py b/lightautoml/text/trainer.py index a68d13cc..dc127c1b 100644 --- a/lightautoml/text/trainer.py +++ b/lightautoml/text/trainer.py @@ -237,6 +237,20 @@ def load_state_dict(self, weights: Dict, model: nn.Module): return self +class InfIterator(object): + def __init__(self, dataloader): + self.dl = dataloader + self.it = iter(self.dl) + + def __iter__(self): + return self + + def __next__(self): + try: + return next(self.it) + except StopIteration: + self.it = iter(self.dl) + return next(self.it) class Trainer: """Torch main trainer class. @@ -435,16 +449,11 @@ def fit(self, dataloaders: Dict[str, DataLoader]) -> np.ndarray: for epoch in range(self.n_epochs): self.epoch = epoch # train - if dataloaders['sampler'] is not None: - train_loss = self.train_with_sampler(dataloaders=dataloaders) - else: - train_loss = self.train(dataloaders=dataloaders) + train_loss = self.train(dataloaders=dataloaders) train_log.extend(train_loss) # test - if dataloaders['sampler'] is not None: - val_loss, val_data, weights = self.test_with_sampler(dataloader=dataloaders["val"], sampler = dataloaders["sampler"] ) - else: - val_loss, val_data, weights = self.test(dataloader=dataloaders["val"]) + + val_loss, val_data, weights = self.test(dataloaders=dataloaders) if self.stop_by_metric: cond = -1 * self.metric(*val_data, weights) else: @@ -469,20 +478,14 @@ def fit(self, dataloaders: Dict[str, DataLoader]) -> np.ndarray: self.se.set_best_params(self.model) if self.is_snap: - if dataloaders['sampler'] is not None: - val_loss, val_data, weights = self.test_with_sampler(dataloader=dataloaders["val"],sampler=dataloaders["sampler"], snap=True, stage="val") - else: - val_loss, val_data, weights = self.test(dataloader=dataloaders["val"], snap=True, stage="val") + val_loss, val_data, weights = self.test(dataloaders=dataloaders, snap=True, stage="val") logger.info3( "Result SE, val loss: {vl}, val metric: {me}".format( me=self.metric(*val_data, weights), vl=np.mean(val_loss) ) ) elif self.se.swa: - if dataloaders['sampler'] is not None: - val_loss, val_data, weights = self.test_with_sampler(dataloader=dataloaders["val"], sampler=dataloaders["sampler"]) - else: - val_loss, val_data, weights = self.test(dataloader=dataloaders["val"]) + val_loss, val_data, weights = self.test(dataloaders=dataloaders) logger.info3( "Early stopping: val loss: {vl}, val metric: {me}".format( me=self.metric(*val_data, weights), vl=np.mean(val_loss) @@ -515,15 +518,20 @@ def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: loader = tqdm(dataloaders["train"], desc="train", disable=False) else: loader = dataloaders["train"] + sampler = None + if dataloaders['sampler'] is not None: + # data['batch_size'] = len(sample['label']) + sampler = InfIterator(dataloaders['sampler']) for sample in loader: data = { i: (sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device)) for i in sample.keys() } - data['batch_size'] = len(sample['label']) - if dataloaders['sampler'] is not None: - data['sampler'] = dataloaders['sampler'] - + # data['batch_size'] = len(sample['label']) + # if dataloaders['sampler'] is not None: + # # data['batch_size'] = len(sample['label']) + # data['sampler'] = dataloaders['sampler'] + data['sampler'] = sampler loss = self.model(data).mean() if self.apex: with self.amp.scale_loss(loss, self .optimizer) as scaled_loss: @@ -543,7 +551,7 @@ def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: c += 1 if self.verbose and self.verbose_bar and logging_level < logging.INFO: if self.verbose_inside and c % self.verbose_inside == 0: - val_loss, val_data, weights = self.test(dataloader=dataloaders) + val_loss, val_data, weights = self.test(dataloaders=dataloaders) if self.stop_by_metric: cond = -1 * self.metric(*val_data, weights) else: @@ -719,17 +727,18 @@ def test( loader = tqdm(dataloaders[stage], desc=stage, disable=False) else: loader = dataloaders[stage] - + sampler = None + if dataloaders['sampler'] is not None: + # data['batch_size'] = len(sample['label']) + sampler = InfIterator(dataloaders['sampler']) with torch.no_grad(): - for sample, candidate_sample in loader: + for sample in loader: data = { i: sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device) for i in sample.keys() } + data['sampler'] = sampler ### NOTE, HERE WE CAN ADD TORCH.UNIQUE - data['batch_size'] = len(sample['label']) - if dataloaders['sampler'] is not None: - data['sampler'] = dataloaders['sampler'] if snap: output = self.se.predict(data) loss = self.se.forward(data) if stage != "test" else None @@ -773,5 +782,5 @@ def predict(self, dataloaders: DataLoader, stage: str) -> np.ndarray: Prediction. """ - loss, (target, pred), _ = self.test(stage=stage, snap=self.is_snap, dataloader=dataloaders) + loss, (target, pred), _ = self.test(stage=stage, snap=self.is_snap, dataloaders=dataloaders) return pred diff --git a/lightautoml/text/utils.py b/lightautoml/text/utils.py index fe91f806..d1cc3d0d 100644 --- a/lightautoml/text/utils.py +++ b/lightautoml/text/utils.py @@ -23,6 +23,7 @@ "token_type_ids": "long", "text": "float", # embeddings "length": "long", + "fold": "long" } From 586ae5946645b8a472633b5cc31db12fd0a86d1f Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Mon, 16 Oct 2023 10:19:20 +0000 Subject: [PATCH 46/49] Descr --- lightautoml/ml_algo/torch_based/nn_models.py | 121 ++++++--- .../ml_algo/torch_based/saint/saint.py | 144 ---------- .../ml_algo/torch_based/saint/saint_utils.py | 256 ++++++++++++++++++ 3 files changed, 334 insertions(+), 187 deletions(-) delete mode 100644 lightautoml/ml_algo/torch_based/saint/saint.py create mode 100644 lightautoml/ml_algo/torch_based/saint/saint_utils.py diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index de698791..c4dc20fa 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -9,7 +9,7 @@ import torch import torch.nn as nn -from .saint.saint import ColTransformer, RowColTransformer +from .saint.saint_utils import ColTransformer, RowColTransformer from .tabnet.utils import TabNetEncoder, _initialize_non_glu from .autoint.autoint_utils import AttnInteractionBlock, LeakyGate from .autoint.ghost_norm import GhostBatchNorm @@ -1141,7 +1141,7 @@ def __init__( epsilon=1e-15, virtual_batch_size=128, momentum=0.02, - mask_type="entemax", + mask_type="entmax", group_attention_matrix=None, **kwargs, ): @@ -1192,61 +1192,91 @@ def forward_masks(self, x): class SAINT(nn.Module): + """Implementation of Saint from https://github.com/yandex-research/tabular-dl-tabr. + + Args: + n_in : int + Number of features + n_out : int or list of int for multi task classification + Dimension of network output + embedding_size : embedding_size + Dimension of the embedding + depth : int + Number of Attention Blocks. + heads : int + Number of heads in Attention. + dim_head : int + Attention head dimension. + mlp_hidden_mults : int | tuple[int] + Multiply hidden state of MLP. + ffn_mult : int + Multiply hidden state of feed forward layer. + attn_dropout : float + Post-Attention dropout. + ff_dropout : int + Feed-Forward Dropout. + mlp_dropout : float + MLP Dropout. + attentiontype : str + Either "colrow" or "row" : this is the masking attention to use + device : torch.device + kwargs : kwargs + """ + def __init__( self, n_in: int, n_out: int = 1, embedding_size: int = 10, - depth: int =2, + depth: int = 2, heads: int = 8, - dim_head = 16, - mlp_hidden_mults = (4, 2), - ffn_mult = 4, - attn_dropout = 0., - ff_dropout = 0., - mlp_dropout =0., - attentiontype = 'colrow', + dim_head=16, + mlp_hidden_mults=(4, 2), + ffn_mult=4, + attn_dropout=0.0, + ff_dropout=0.0, + mlp_dropout=0.0, + attentiontype="colrow", device: torch.device = torch.device("cuda:0"), - **kwargs - ): + **kwargs, + ): super().__init__() self.device = device self.cls_token = nn.Embedding(2, embedding_size) self.attentiontype = attentiontype - if attentiontype == 'col': + if attentiontype == "col": self.transformer = ColTransformer( - dim = embedding_size, - depth = depth, - heads = heads, - dim_head = dim_head, - attn_dropout = attn_dropout, - ff_dropout = ff_dropout + dim=embedding_size, + depth=depth, + heads=heads, + dim_head=dim_head, + attn_dropout=attn_dropout, + ff_dropout=ff_dropout, ) - elif attentiontype in ['row','colrow'] : + elif attentiontype in ["row", "colrow"]: self.transformer = RowColTransformer( - dim = embedding_size, - nfeats= n_in+1, #num featurs - depth = depth, - heads = heads, - dim_head = dim_head, - ffn_mult = ffn_mult, - attn_dropout = attn_dropout, - ff_dropout = ff_dropout, - style = attentiontype + dim=embedding_size, + nfeats=n_in + 1, # num featurs + depth=depth, + heads=heads, + dim_head=dim_head, + ffn_mult=ffn_mult, + attn_dropout=attn_dropout, + ff_dropout=ff_dropout, + style=attentiontype, ) - - l = (n_in+1) // 8 #input_size = (dim * self.num_categories) + (dim * num_continuous) - hidden_dimensions = list(map(lambda t: l * t, mlp_hidden_mults)) - - self.mlp = MLP(n_in = embedding_size, - n_out = n_out, - hidden_size = hidden_dimensions, - drop_rate=mlp_dropout, - use_bn = False, - dropout_first= False) - # self.embeds = nn.Embedding(self.total_tokens, self.dim) #.to(device) + l_rate = (n_in + 1) // 8 # input_size = (dim * self.num_categories) + (dim * num_continuous) + hidden_dimensions = list(map(lambda t: l_rate * t, mlp_hidden_mults)) + self.mlp = MLP( + n_in=embedding_size, + n_out=n_out, + hidden_size=hidden_dimensions, + drop_rate=mlp_dropout, + use_bn=False, + dropout_first=False, + ) def forward(self, embedded: torch.Tensor, bs: int) -> torch.Tensor: """Transform the input tensor. @@ -1254,15 +1284,20 @@ def forward(self, embedded: torch.Tensor, bs: int) -> torch.Tensor: Args: embedded : torch.Tensor embedded fields + bs : batch size Returns: torch.Tensor """ - mask = torch.zeros((len(embedded),len(embedded)), device=self.device, dtype=torch.bool) + mask = torch.zeros((len(embedded), len(embedded)), device=self.device, dtype=torch.bool) mask[torch.arange(bs), torch.arange(bs)] = 1 + # NOTE that it was: + # mask[:bs, bs:] = 1 + # mask[bs:, bs:] = 1 + # probably misprint mask[:bs, bs:] = 1 - mask[bs:, bs:] = 1 + mask[bs:, :bs] = 1 cls_token = torch.unsqueeze( self.cls_token(torch.ones(embedded.shape[0], dtype=torch.int).to(self.device)), dim=1 @@ -1276,4 +1311,4 @@ def forward(self, embedded: torch.Tensor, bs: int) -> torch.Tensor: # con_outs = self.mlp2(x[:,self.num_categories:,:]) # return cat_outs, con_outs - return self.mlp(x[:,0,:]) + return self.mlp(x[:, 0, :]) diff --git a/lightautoml/ml_algo/torch_based/saint/saint.py b/lightautoml/ml_algo/torch_based/saint/saint.py deleted file mode 100644 index 03761aae..00000000 --- a/lightautoml/ml_algo/torch_based/saint/saint.py +++ /dev/null @@ -1,144 +0,0 @@ - -import numpy as np -import torch -import torch.nn.functional as F -from einops import rearrange -from torch import einsum, nn - -def exists(val): - return val is not None - -def default(val, d): - return val if exists(val) else d - -def ff_encodings(x,B): - x_proj = (2. * np.pi * x.unsqueeze(-1)) @ B.t() - return torch.cat([torch.sin(x_proj), torch.cos(x_proj)], dim=-1) - - -class Residual(nn.Module): - def __init__(self, fn): - super().__init__() - self.fn = fn - - def forward(self, x, **kwargs): - return self.fn(x, **kwargs) + x - -class PreNorm(nn.Module): - def __init__(self, dim, fn): - super().__init__() - self.norm = nn.LayerNorm(dim) - self.fn = fn - - def forward(self, x, **kwargs): - return self.fn(self.norm(x), **kwargs) - -# attention - -class GEGLU(nn.Module): - def forward(self, x): - x, gates = x.chunk(2, dim = -1) - return x * F.gelu(gates) - -class FeedForward(nn.Module): - def __init__(self, dim, mult = 4, dropout = 0.): - super().__init__() - self.net = nn.Sequential( - nn.Linear(dim, int(dim * mult) * 2), - GEGLU(), - nn.Dropout(dropout), - nn.Linear(int(dim * mult), dim) - ) - - def forward(self, x, **kwargs): - return self.net(x, **kwargs) - -class Attention(nn.Module): - def __init__( - self, - dim, - heads = 8, - dim_head = 16, - dropout = 0. - ): - super().__init__() - inner_dim = dim_head * heads - self.heads = heads - self.scale = dim_head ** -0.5 - - self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False) - self.to_out = nn.Linear(inner_dim, dim) - - self.dropout = nn.Dropout(dropout) - - def forward(self, x, mask=None): - h = self.heads - q, k, v = self.to_qkv(x).chunk(3, dim = -1) - q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), (q, k, v)) - sim = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale - if mask is not None: - sim[~mask[None, None].expand_as(sim)] = float('-inf') - attn = sim.softmax(dim = -1) - out = einsum('b h i j, b h j d -> b h i d', attn, v) - out = rearrange(out, 'b h n d -> b n (h d)', h = h) - return self.to_out(out) - - -class RowColTransformer(nn.Module): - def __init__(self, dim, nfeats, depth, heads, dim_head, ffn_mult, attn_dropout, ff_dropout, style='col'): - super().__init__() - self.layers = nn.ModuleList([]) - self.mask_embed = nn.Embedding(nfeats, dim) - self.style = style - for _ in range(depth): - if self.style == 'colrow': - self.layers.append(nn.ModuleList([ - PreNorm(dim, Residual(Attention(dim, heads = heads, dim_head = dim_head, dropout = attn_dropout))), - PreNorm(dim, Residual(FeedForward(dim, mult=ffn_mult, dropout = ff_dropout))), - PreNorm(dim*nfeats, Residual(Attention(dim*nfeats, heads = heads, dim_head = dim_head, dropout = attn_dropout))), - PreNorm(dim*nfeats, Residual(FeedForward(dim*nfeats, mult=ffn_mult, dropout = ff_dropout))), - ])) - else: - self.layers.append(nn.ModuleList([ - PreNorm(dim*nfeats, Residual(Attention(dim*nfeats, heads = heads, dim_head = 64, dropout = attn_dropout))), - PreNorm(dim*nfeats, Residual(FeedForward(dim*nfeats, mult=ffn_mult, dropout = ff_dropout))), - ])) - - def forward(self, x, mask_features=None, mask_samples=None): - - _, n, _ = x.shape - if self.style == 'colrow': - for attn1, ff1, attn2, ff2 in self.layers: # type: ignore[code] - x = attn1(x, mask=mask_features) - x = ff1(x) - x = rearrange(x, 'b n d -> 1 b (n d)') - x = attn2(x, mask=mask_samples) - x = ff2(x) - x = rearrange(x, '1 b (n d) -> b n d', n = n) - else: - for attn1, ff1 in self.layers: # type: ignore[code] - x = rearrange(x, 'b n d -> 1 b (n d)') - x = attn1(x) - x = ff1(x) - x = rearrange(x, '1 b (n d) -> b n d', n = n) - return x - - -# transformer -class ColTransformer(nn.Module): - def __init__(self, dim, depth, heads, dim_head, attn_dropout, ff_dropout): - super().__init__() - self.layers = nn.ModuleList([]) - - - for _ in range(depth): - self.layers.append(nn.ModuleList([ - PreNorm(dim, Residual(Attention(dim, heads = heads, dim_head = dim_head, dropout = attn_dropout))), - PreNorm(dim, Residual(FeedForward(dim, dropout = ff_dropout))), - ])) - - def forward(self, x, mask_features=None, mask_samples=None): - for attn, ff in self.layers: - x = attn(x) - x = ff(x) - return x diff --git a/lightautoml/ml_algo/torch_based/saint/saint_utils.py b/lightautoml/ml_algo/torch_based/saint/saint_utils.py new file mode 100644 index 00000000..d9dea227 --- /dev/null +++ b/lightautoml/ml_algo/torch_based/saint/saint_utils.py @@ -0,0 +1,256 @@ +"""Saint utils.""" + +from einops import rearrange +from torch import einsum, nn + +from ..fttransformer.fttransformer_utils import GEGLU + + +class Residual(nn.Module): + """Residual connection layer. + + Args: + fn : function to apply + """ + + def __init__(self, fn): + super().__init__() + self.fn = fn + + def forward(self, x, **kwargs): + """Forward-pass.""" + return self.fn(x, **kwargs) + x + + +class PreNorm(nn.Module): + """Normalization connection layer. + + Args: + fn : function to apply + """ + + def __init__(self, dim, fn): + super().__init__() + self.norm = nn.LayerNorm(dim) + self.fn = fn + + def forward(self, x, **kwargs): + """Forward-pass.""" + return self.fn(self.norm(x), **kwargs) + + +# attention + + +class FeedForward(nn.Module): + """Feedforward for Transformer block. + + Args: + dim: Embeddings dimension. + mult: multiply hidden state dim. + dropout: Post-Attention dropout. + """ + + def __init__(self, dim, mult=4, dropout=0.0): + super().__init__() + self.net = nn.Sequential( + nn.Linear(dim, int(dim * mult) * 2), GEGLU(), nn.Dropout(dropout), nn.Linear(int(dim * mult), dim) + ) + + def forward(self, x, **kwargs): + """Forward-pass. + + Args: + x : torch.Tensor + 3-d tensor; for example, embedded numeric and/or categorical values, + or the output of a previous attention layer. + kwargs: kwargs + + Returns: + torch.Tensor + + """ + return self.net(x, **kwargs) + + +class Attention(nn.Module): + """Attention Block. + + Args: + dim: Embeddings dimension. + heads: Number of heads in Attention. + dim_head: Attention head dimension. + dropout: Post-Attention dropout. + """ + + def __init__(self, dim, heads=8, dim_head=16, dropout=0.0): + super().__init__() + inner_dim = dim_head * heads + self.heads = heads + self.scale = dim_head ** -0.5 + + self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False) + self.to_out = nn.Linear(inner_dim, dim) + + self.dropout = nn.Dropout(dropout) + + def forward(self, x, mask=None): + """Transform the input tensor with attention. + + Args: + x : torch.Tensor + 3-d tensor; for example, embedded numeric and/or categorical values, + or the output of a previous attention layer. + mask: torch.Tensor + + Returns: + torch.Tensor + + """ + h = self.heads + q, k, v = self.to_qkv(x).chunk(3, dim=-1) + q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=h), (q, k, v)) + sim = einsum("b h i d, b h j d -> b h i j", q, k) * self.scale + if mask is not None: + sim[~mask[None, None].expand_as(sim)] = float("-inf") + attn = sim.softmax(dim=-1) + out = einsum("b h i j, b h j d -> b h i d", attn, v) + out = rearrange(out, "b h n d -> b n (h d)", h=h) + return self.to_out(out) + + +class RowColTransformer(nn.Module): + """Transformer Block. + + Args: + dim: Embeddings dimension. + nfeats: Number of features. + depth: Number of Attention Blocks. + heads: Number of heads in Attention. + dim_head: Attention head dimension. + ffn_mult: multiply hidden state of feed forward layer. + attn_dropout: Post-Attention dropout. + ff_dropout: Feed-Forward Dropout. + style: attention style: 'col' or 'colrow' + """ + + def __init__(self, dim, nfeats, depth, heads, dim_head, ffn_mult, attn_dropout, ff_dropout, style="col"): + super().__init__() + self.layers = nn.ModuleList([]) + self.mask_embed = nn.Embedding(nfeats, dim) + self.style = style + for _ in range(depth): + if self.style == "colrow": + self.layers.append( + nn.ModuleList( + [ + PreNorm( + dim, Residual(Attention(dim, heads=heads, dim_head=dim_head, dropout=attn_dropout)) + ), + PreNorm(dim, Residual(FeedForward(dim, mult=ffn_mult, dropout=ff_dropout))), + PreNorm( + dim * nfeats, + Residual(Attention(dim * nfeats, heads=heads, dim_head=dim_head, dropout=attn_dropout)), + ), + PreNorm( + dim * nfeats, Residual(FeedForward(dim * nfeats, mult=ffn_mult, dropout=ff_dropout)) + ), + ] + ) + ) + else: + self.layers.append( + nn.ModuleList( + [ + PreNorm( + dim * nfeats, + Residual(Attention(dim * nfeats, heads=heads, dim_head=64, dropout=attn_dropout)), + ), + PreNorm( + dim * nfeats, Residual(FeedForward(dim * nfeats, mult=ffn_mult, dropout=ff_dropout)) + ), + ] + ) + ) + + def forward(self, x, mask_features=None, mask_samples=None): + """Transform the input embeddings tensor with Transformer module. + + Args: + x : torch.Tensor + 3-d tensor; embedded numeric and/or categorical values, + or the output of a previous Transformer layer. + mask_features: torch.Tensor + mask for the first attention + mask_samples: torch.Tensor + mask for the second attention + + Returns: + torch.Tensor + + """ + _, n, _ = x.shape + if self.style == "colrow": + for attn1, ff1, attn2, ff2 in self.layers: # type: ignore[code] + x = attn1(x, mask=mask_features) + x = ff1(x) + x = rearrange(x, "b n d -> 1 b (n d)") + x = attn2(x, mask=mask_samples) + x = ff2(x) + x = rearrange(x, "1 b (n d) -> b n d", n=n) + else: + for attn1, ff1 in self.layers: # type: ignore[code] + x = rearrange(x, "b n d -> 1 b (n d)") + x = attn1(x) + x = ff1(x) + x = rearrange(x, "1 b (n d) -> b n d", n=n) + return x + + +# transformer +class ColTransformer(nn.Module): + """Transformer Block. + + Args: + dim: Embeddings dimension. + depth: Number of Attention Blocks. + heads: Number of heads in Attention. + dim_head: Attention head dimension. + attn_dropout: Post-Attention dropout. + ff_dropout: Feed-Forward Dropout. + """ + + def __init__(self, dim, depth, heads, dim_head, attn_dropout, ff_dropout): + super().__init__() + self.layers = nn.ModuleList([]) + + for _ in range(depth): + self.layers.append( + nn.ModuleList( + [ + PreNorm(dim, Residual(Attention(dim, heads=heads, dim_head=dim_head, dropout=attn_dropout))), + PreNorm(dim, Residual(FeedForward(dim, dropout=ff_dropout))), + ] + ) + ) + + def forward(self, x, mask_features=None, mask_samples=None): + """Transform the input embeddings tensor with Transformer module. + + Args: + x : torch.Tensor + 3-d tensor; embedded numeric and/or categorical values, + or the output of a previous Transformer layer. + mask_features: torch.Tensor + not used + mask_samples: torch.Tensor + not used + + Returns: + torch.Tensor + + """ + for attn, ff in self.layers: + x = attn(x) + x = ff(x) + return x From 45c716657abb7f2922a91105a557246c2a06de47 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Mon, 16 Oct 2023 10:23:13 +0000 Subject: [PATCH 47/49] Descr --- lightautoml/text/trainer.py | 132 ------------------------------------ 1 file changed, 132 deletions(-) diff --git a/lightautoml/text/trainer.py b/lightautoml/text/trainer.py index dc127c1b..55be9cdb 100644 --- a/lightautoml/text/trainer.py +++ b/lightautoml/text/trainer.py @@ -570,138 +570,6 @@ def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: return loss_log - # def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: - # """Training loop. - - # Args: - # dataloaders: Dict with torch dataloaders. - - # Returns: - # Loss. - - # """ - # ################## - # loss_log = [] - # self.model.train() - # running_loss = 0 - # c = 0 - - # logging_level = get_stdout_level() - # if logging_level < logging.INFO and self.verbose and self.verbose_bar: - # loader = tqdm(dataloaders["train"], desc="train", disable=False) - # else: - # loader = dataloaders["train"] - - # for sample in loader: - # data = { - # i: (sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device)) - # for i in sample.keys() - # } - - # loss = self.model(data).mean() - # if self.apex: - # with self.amp.scale_loss(loss, self.optimizer) as scaled_loss: - # scaled_loss.backward() - # else: - # loss.backward() - - # if self.clip_grad: - # torch.nn.utils.clip_grad_norm_(self.model.parameters(), **self.clip_grad_params) - # self.optimizer.step() - # self.optimizer.zero_grad() - - # loss = loss.data.cpu().numpy() - # loss_log.append(loss) - # running_loss += loss - - # c += 1 - # if self.verbose and self.verbose_bar and logging_level < logging.INFO: - # if self.verbose_inside and c % self.verbose_inside == 0: - # val_loss, val_data, weights = self.test(dataloader=dataloaders["val"]) - # if self.stop_by_metric: - # cond = -1 * self.metric(*val_data, weights) - # else: - # cond = np.mean(val_loss) - # self.se.update(self.model, cond) - - # logger.info3( - # "Epoch: {e}, iter: {c}, val loss: {vl}, val metric: {me}".format( - # me=self.metric(*val_data, weights), - # e=self.epoch, - # c=c, - # vl=np.mean(val_loss), - # ) - # ) - # loader.set_description("train (loss=%g)" % (running_loss / c)) - - # return loss_log - - # def test( - # self, dataloader: DataLoader, stage: str = "val", snap: bool = False - # ) -> Tuple[List[float], Tuple[np.ndarray, np.ndarray]]: - # """Testing loop. - - # Args: - # dataloader: Torch dataloader. - # stage: Train, val or test. - # snap: Use snapshots. - - # Returns: - # Loss, (Target, OOF). - - # """ - # ##################### - # loss_log = [] - # weights_log = [] - # self.model.eval() - # pred = [] - # target = [] - # logging_level = get_stdout_level() - # if logging_level < logging.INFO and self.verbose and self.verbose_bar: - # loader = tqdm(dataloader, desc=stage, disable=False) - # else: - # loader = dataloader - - # with torch.no_grad(): - # for sample in loader: - # data = { - # i: (sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device)) - # for i in sample.keys() - # } - - # if snap: - # output = self.se.predict(data) - # loss = self.se.forward(data) if stage != "test" else None - # else: - # output = self.model.predict(data) - # loss = self.model(data) if stage != "test" else None - - # if stage != "test": - # loss = loss.mean().data.cpu().numpy() - - # loss_log.append(loss) - - # output = output.data.cpu().numpy() - # target_data = data["label"].data.cpu().numpy() - # weights = data.get("weight", None) - # if weights is not None: - # weights = weights.data.cpu().numpy() - - # pred.append(output) - # target.append(target_data) - # weights_log.extend(weights) - - # self.model.train() - - # return ( - # loss_log, - # ( - # np.vstack(target) if len(target[0].shape) == 2 else np.hstack(target), - # np.vstack(pred) if len(pred[0].shape) == 2 else np.hstack(pred), - # ), - # np.array(weights_log), - # ) - def test( self, dataloaders: DataLoader,stage: str = "val", snap: bool = False ) -> Tuple[List[float], Tuple[np.ndarray, np.ndarray]]: From 81e9db4a3651f86f94950e6280d7aed7be6f8c69 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Mon, 16 Oct 2023 10:25:57 +0000 Subject: [PATCH 48/49] Descr --- lightautoml/text/trainer.py | 50 +++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/lightautoml/text/trainer.py b/lightautoml/text/trainer.py index 55be9cdb..d8a3bf73 100644 --- a/lightautoml/text/trainer.py +++ b/lightautoml/text/trainer.py @@ -1,10 +1,9 @@ """Main pytorch training and prediction class with Snapshots Ensemble.""" -from itertools import cycle import logging from copy import deepcopy -from typing import Any, Iterable +from typing import Any from typing import Callable from typing import Dict from typing import List @@ -237,14 +236,21 @@ def load_state_dict(self, weights: Dict, model: nn.Module): return self + class InfIterator(object): + """Infinite Iterator. + + Args: + dataloader : torch.utils.dataloader + """ + def __init__(self, dataloader): self.dl = dataloader self.it = iter(self.dl) def __iter__(self): return self - + def __next__(self): try: return next(self.it) @@ -252,6 +258,7 @@ def __next__(self): self.it = iter(self.dl) return next(self.it) + class Trainer: """Torch main trainer class. @@ -305,7 +312,6 @@ def __init__( stop_by_metric: bool = False, clip_grad: bool = False, clip_grad_params: Optional[Dict] = None, - **kwargs ): self.net = net @@ -328,7 +334,7 @@ def __init__( self.stop_by_metric = stop_by_metric self.clip_grad = clip_grad self.clip_grad_params = clip_grad_params if clip_grad_params is not None else {} - + self.dataloader = None self.model = None self.optimizer = None @@ -452,7 +458,7 @@ def fit(self, dataloaders: Dict[str, DataLoader]) -> np.ndarray: train_loss = self.train(dataloaders=dataloaders) train_log.extend(train_loss) # test - + val_loss, val_data, weights = self.test(dataloaders=dataloaders) if self.stop_by_metric: cond = -1 * self.metric(*val_data, weights) @@ -496,7 +502,6 @@ def fit(self, dataloaders: Dict[str, DataLoader]) -> np.ndarray: return val_data[1] - def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: """Training loop. @@ -519,9 +524,9 @@ def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: else: loader = dataloaders["train"] sampler = None - if dataloaders['sampler'] is not None: + if dataloaders["sampler"] is not None: # data['batch_size'] = len(sample['label']) - sampler = InfIterator(dataloaders['sampler']) + sampler = InfIterator(dataloaders["sampler"]) for sample in loader: data = { i: (sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device)) @@ -531,10 +536,10 @@ def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: # if dataloaders['sampler'] is not None: # # data['batch_size'] = len(sample['label']) # data['sampler'] = dataloaders['sampler'] - data['sampler'] = sampler + data["sampler"] = sampler loss = self.model(data).mean() if self.apex: - with self.amp.scale_loss(loss, self .optimizer) as scaled_loss: + with self.amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() @@ -571,12 +576,12 @@ def train(self, dataloaders: Dict[str, DataLoader]) -> List[float]: return loss_log def test( - self, dataloaders: DataLoader,stage: str = "val", snap: bool = False + self, dataloaders: DataLoader, stage: str = "val", snap: bool = False ) -> Tuple[List[float], Tuple[np.ndarray, np.ndarray]]: """Testing loop. Args: - dataloader: Torch dataloader. + dataloaders: Torch dataloader. stage: Train, val or test. snap: Use snapshots. @@ -596,17 +601,17 @@ def test( else: loader = dataloaders[stage] sampler = None - if dataloaders['sampler'] is not None: + if dataloaders["sampler"] is not None: # data['batch_size'] = len(sample['label']) - sampler = InfIterator(dataloaders['sampler']) + sampler = InfIterator(dataloaders["sampler"]) with torch.no_grad(): for sample in loader: data = { i: sample[i].long().to(self.device) if _dtypes_mapping[i] == "long" else sample[i].to(self.device) for i in sample.keys() } - data['sampler'] = sampler - ### NOTE, HERE WE CAN ADD TORCH.UNIQUE + data["sampler"] = sampler + # NOTE, HERE WE CAN ADD TORCH.UNIQUE if snap: output = self.se.predict(data) loss = self.se.forward(data) if stage != "test" else None @@ -619,11 +624,11 @@ def test( loss_log.append(loss) - output = output.data.cpu().numpy()[:len(sample['label'])] - target_data = data["label"].data.cpu().numpy()[:len(sample['label'])] + output = output.data.cpu().numpy()[: len(sample["label"])] + target_data = data["label"].data.cpu().numpy()[: len(sample["label"])] weights = data.get("weight", None) - if weights is not None: - weights = weights.data.cpu().numpy()[:len(sample['label'])] + if weights is not None: + weights = weights.data.cpu().numpy()[: len(sample["label"])] pred.append(output) target.append(target_data) @@ -639,11 +644,12 @@ def test( ), np.array(weights_log), ) + def predict(self, dataloaders: DataLoader, stage: str) -> np.ndarray: """Predict model. Args: - dataloader: Torch dataloader. + dataloaders: Torch dataloader. stage: Train, val or test. Returns: From 2238f7e66d1641010f6ebec7c3ef81523fc15f66 Mon Sep 17 00:00:00 2001 From: Vasilev Dmitriy Date: Mon, 16 Oct 2023 11:37:06 +0000 Subject: [PATCH 49/49] added poolings --- lightautoml/ml_algo/torch_based/nn_models.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/lightautoml/ml_algo/torch_based/nn_models.py b/lightautoml/ml_algo/torch_based/nn_models.py index c4dc20fa..0ee8c0b4 100644 --- a/lightautoml/ml_algo/torch_based/nn_models.py +++ b/lightautoml/ml_algo/torch_based/nn_models.py @@ -1237,6 +1237,7 @@ def __init__( ff_dropout=0.0, mlp_dropout=0.0, attentiontype="colrow", + pooling: str = "cls", device: torch.device = torch.device("cuda:0"), **kwargs, ): @@ -1268,9 +1269,9 @@ def __init__( l_rate = (n_in + 1) // 8 # input_size = (dim * self.num_categories) + (dim * num_continuous) hidden_dimensions = list(map(lambda t: l_rate * t, mlp_hidden_mults)) - + self.pooling = pooling_by_name[pooling]() self.mlp = MLP( - n_in=embedding_size, + n_in=embedding_size * 2 if pooling == "concat" else embedding_size, n_out=n_out, hidden_size=hidden_dimensions, drop_rate=mlp_dropout, @@ -1284,7 +1285,7 @@ def forward(self, embedded: torch.Tensor, bs: int) -> torch.Tensor: Args: embedded : torch.Tensor embedded fields - bs : batch size + bs : batch size without sapler`s part Returns: torch.Tensor @@ -1310,5 +1311,7 @@ def forward(self, embedded: torch.Tensor, bs: int) -> torch.Tensor: # cat_outs = self.mlp1(x[:,:self.num_categories,:]) # con_outs = self.mlp2(x[:,self.num_categories:,:]) # return cat_outs, con_outs - - return self.mlp(x[:, 0, :]) + x_mask = torch.ones(x.shape, dtype=torch.bool).to(self.device) + pool_tokens = self.pooling(x=x, x_mask=x_mask) + logits = self.mlp(pool_tokens) + return logits