@@ -123,6 +123,7 @@ def test_string_indexer(self):
123123 # single input
124124 si = StringIndexer (inputCol = "label1" , outputCol = "index1" )
125125 model = si .fit (df .select ("label1" ))
126+ self .assertEqual (si .uid , model .uid )
126127
127128 # read/write
128129 with tempfile .TemporaryDirectory (prefix = "string_indexer" ) as tmp_dir :
@@ -183,6 +184,7 @@ def test_pca(self):
183184 pca = PCA (k = 2 , inputCol = "features" , outputCol = "pca_features" )
184185
185186 model = pca .fit (df )
187+ self .assertEqual (pca .uid , model .uid )
186188 self .assertEqual (model .getK (), 2 )
187189 self .assertTrue (
188190 np .allclose (model .explainedVariance .toArray (), [0.79439 , 0.20560 ], atol = 1e-4 )
@@ -272,6 +274,7 @@ def test_standard_scaler(self):
272274 self .assertEqual (scaler .getOutputCol (), "scaled" )
273275
274276 model = scaler .fit (df )
277+ self .assertEqual (scaler .uid , model .uid )
275278 self .assertTrue (np .allclose (model .mean .toArray (), [1.66666667 ], atol = 1e-4 ))
276279 self .assertTrue (np .allclose (model .std .toArray (), [1.52752523 ], atol = 1e-4 ))
277280
@@ -311,6 +314,7 @@ def test_maxabs_scaler(self):
311314 self .assertEqual (scaler .getOutputCol (), "scaled" )
312315
313316 model = scaler .fit (df )
317+ self .assertEqual (scaler .uid , model .uid )
314318 self .assertTrue (np .allclose (model .maxAbs .toArray (), [3.0 ], atol = 1e-4 ))
315319
316320 output = model .transform (df )
@@ -349,6 +353,7 @@ def test_minmax_scaler(self):
349353 self .assertEqual (scaler .getOutputCol (), "scaled" )
350354
351355 model = scaler .fit (df )
356+ self .assertEqual (scaler .uid , model .uid )
352357 self .assertTrue (np .allclose (model .originalMax .toArray (), [3.0 ], atol = 1e-4 ))
353358 self .assertTrue (np .allclose (model .originalMin .toArray (), [0.0 ], atol = 1e-4 ))
354359
@@ -388,6 +393,7 @@ def test_robust_scaler(self):
388393 self .assertEqual (scaler .getOutputCol (), "scaled" )
389394
390395 model = scaler .fit (df )
396+ self .assertEqual (scaler .uid , model .uid )
391397 self .assertTrue (np .allclose (model .range .toArray (), [3.0 ], atol = 1e-4 ))
392398 self .assertTrue (np .allclose (model .median .toArray (), [2.0 ], atol = 1e-4 ))
393399
@@ -422,6 +428,7 @@ def test_chi_sq_selector(self):
422428 self .assertEqual (selector .getOutputCol (), "selectedFeatures" )
423429
424430 model = selector .fit (df )
431+ self .assertEqual (selector .uid , model .uid )
425432 self .assertEqual (model .selectedFeatures , [2 ])
426433
427434 output = model .transform (df )
@@ -456,6 +463,7 @@ def test_univariate_selector(self):
456463 self .assertEqual (selector .getSelectionThreshold (), 1 )
457464
458465 model = selector .fit (df )
466+ self .assertEqual (selector .uid , model .uid )
459467 self .assertEqual (model .selectedFeatures , [3 ])
460468
461469 output = model .transform (df )
@@ -487,6 +495,7 @@ def test_variance_threshold_selector(self):
487495 self .assertEqual (selector .getOutputCol (), "selectedFeatures" )
488496
489497 model = selector .fit (df )
498+ self .assertEqual (selector .uid , model .uid )
490499 self .assertEqual (model .selectedFeatures , [2 ])
491500
492501 output = model .transform (df )
@@ -516,6 +525,7 @@ def test_word2vec(self):
516525 self .assertEqual (w2v .getMaxIter (), 1 )
517526
518527 model = w2v .fit (df )
528+ self .assertEqual (w2v .uid , model .uid )
519529 self .assertEqual (model .getVectors ().columns , ["word" , "vector" ])
520530 self .assertEqual (model .getVectors ().count (), 3 )
521531
@@ -567,6 +577,7 @@ def test_imputer(self):
567577 self .assertEqual (imputer .getOutputCols (), ["out_a" , "out_b" ])
568578
569579 model = imputer .fit (df )
580+ self .assertEqual (imputer .uid , model .uid )
570581 self .assertEqual (model .surrogateDF .columns , ["a" , "b" ])
571582 self .assertEqual (model .surrogateDF .count (), 1 )
572583 self .assertEqual (list (model .surrogateDF .head ()), [3.0 , 4.0 ])
@@ -598,6 +609,7 @@ def test_count_vectorizer(self):
598609 self .assertEqual (cv .getOutputCol (), "vectors" )
599610
600611 model = cv .fit (df )
612+ self .assertEqual (cv .uid , model .uid )
601613 self .assertEqual (sorted (model .vocabulary ), ["a" , "b" , "c" ])
602614
603615 output = model .transform (df )
@@ -624,6 +636,7 @@ def test_one_hot_encoder(self):
624636 self .assertEqual (encoder .getOutputCols (), ["output" ])
625637
626638 model = encoder .fit (df )
639+ self .assertEqual (encoder .uid , model .uid )
627640 self .assertEqual (model .categorySizes , [3 ])
628641
629642 output = model .transform (df )
@@ -900,6 +913,7 @@ def test_idf(self):
900913 self .assertListEqual (idf .params , [idf .inputCol , idf .minDocFreq , idf .outputCol ])
901914
902915 model = idf .fit (df , {idf .outputCol : "idf" })
916+ self .assertEqual (idf .uid , model .uid )
903917 # self.assertEqual(
904918 # model.uid, idf.uid, "Model should inherit the UID from its parent estimator."
905919 # )
@@ -1012,6 +1026,7 @@ def test_count_vectorizer_with_binary(self):
10121026 )
10131027 cv = CountVectorizer (binary = True , inputCol = "words" , outputCol = "features" )
10141028 model = cv .fit (dataset )
1029+ self .assertEqual (cv .uid , model .uid )
10151030
10161031 transformedList = model .transform (dataset ).select ("features" , "expected" ).collect ()
10171032
@@ -1047,6 +1062,8 @@ def test_count_vectorizer_with_maxDF(self):
10471062 )
10481063 cv = CountVectorizer (inputCol = "words" , outputCol = "features" )
10491064 model1 = cv .setMaxDF (3 ).fit (dataset )
1065+ self .assertEqual (cv .uid , model1 .uid )
1066+
10501067 self .assertEqual (model1 .vocabulary , ["b" , "c" , "d" ])
10511068
10521069 transformedList1 = model1 .transform (dataset ).select ("features" , "expected" ).collect ()
@@ -1119,6 +1136,8 @@ def test_rformula_force_index_label(self):
11191136 # Does not index label by default since it's numeric type.
11201137 rf = RFormula (formula = "y ~ x + s" )
11211138 model = rf .fit (df )
1139+ self .assertEqual (rf .uid , model .uid )
1140+
11221141 transformedDF = model .transform (df )
11231142 self .assertEqual (transformedDF .head ().label , 1.0 )
11241143 # Force to index label.
0 commit comments