From 3b0bf1ea9b74577ff0452f5d87a8d4f7fca48e76 Mon Sep 17 00:00:00 2001 From: manuel Date: Thu, 11 Jun 2020 19:46:01 +0100 Subject: [PATCH] add predict method --- mlbox/prediction/predictor.py | 98 ++++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 2 deletions(-) diff --git a/mlbox/prediction/predictor.py b/mlbox/prediction/predictor.py index 95edba80..01499c97 100644 --- a/mlbox/prediction/predictor.py +++ b/mlbox/prediction/predictor.py @@ -363,7 +363,7 @@ def fit_predict(self, params, df): print("fitting the pipeline ...") pp.fit(df['train'], df['target']) - + self.pp = pp if(self.verbose): print("CPU time: %s seconds"%(time.time() - start_time)) @@ -493,5 +493,99 @@ def fit_predict(self, params, df): + df['target'].name + "_predictions.csv", index=True) - + self.df = df return self + + def predict(self, new_df, dump=False): + """ + Generates predictions + """ + ########################################## + # Predicting + ########################################## + + if (new_df.shape[0] == 0): + warnings.warn("You have no test dataset. Cannot predict !") + else: + + start_time = time.time() + + ########################################## + # Classification + ########################################## + + if self.df['target'].dtype == 'int': + print("Predicting CLASSIFICATION target") + enc_name = "target_encoder.obj" + + try: + + fhand = open(self.to_path + "/" + enc_name, 'rb') + enc = pickle.load(fhand) + fhand.close() + + except Exception as e: + print(e) + raise ValueError("Unable to load '" + enc_name + + "' from directory : " + self.to_path) + + try: + if(self.verbose): + print("") + print("predicting ...") + + pred = pd.DataFrame(self.pp.predict_proba(new_df), + columns=enc.inverse_transform(range(len(enc.classes_))), + index=new_df) + pred[self.df['target'].name + "_predicted"] = pred.idxmax(axis=1) # noqa + + try: + pred[self.df['target'].name + "_predicted"] = pred[self.df['target'].name + "_predicted"].apply(int) # noqa + except Exception as e: + warnings.warn(e) + + except Exception as e: + print(e) + raise ValueError("Can not predict") + + ########################################## + # Regression + ########################################## + + elif self.df['target'].dtype == 'float': + print("Predicting REGRESSION target") + + pred = pd.DataFrame([], + columns=[self.df['target'].name + "_predicted"], + index=new_df.index) + + try: + if(self.verbose): + print("") + print("predicting...") + + pred[self.df['target'].name + "_predicted"] = self.pp.predict(new_df) # noqa + + except Exception as e: + print(e) + raise ValueError("Can not predict") + else: + pass + + if(self.verbose): + print("CPU time: %s seconds" % (time.time() - start_time)) + + ########################################## + # Dumping predictions + ########################################## + + if dump: + if(self.verbose): + print("") + print("dumping predictions into directory : " + self.to_path + " ...") + pred.to_csv(self.to_path + + "/" + + self.df['target'].name + + "_predictions.csv", + index=True) + return pred[self.df['target'].name + "_predicted"]