Skip to content
This repository was archived by the owner on Feb 11, 2023. It is now read-only.

Commit f3955cf

Browse files
committed
update classif. (#15)
* CrossVal - allow larger test then train * fix classif. nb searches
1 parent 41cd661 commit f3955cf

File tree

10 files changed

+106
-56
lines changed

10 files changed

+106
-56
lines changed

experiments_ovary_centres/run_center_candidate_training.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
import matplotlib
3939
if os.environ.get('DISPLAY', '') == '' \
4040
and matplotlib.rcParams['backend'] != 'agg':
41-
# logging.warning('No display found. Using non-interactive Agg backend.')
41+
print('No display found. Using non-interactive Agg backend.')
4242
matplotlib.use('Agg')
4343

4444
import matplotlib.pyplot as plt

experiments_ovary_centres/run_center_clustering.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import matplotlib
2121
if os.environ.get('DISPLAY', '') == '' \
2222
and matplotlib.rcParams['backend'] != 'agg':
23-
# logging.warning('No display found. Using non-interactive Agg backend.')
23+
print('No display found. Using non-interactive Agg backend.')
2424
matplotlib.use('Agg')
2525

2626
import matplotlib.pylab as plt

experiments_ovary_detect/run_ovary_egg-segmentation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
import matplotlib
3838
if os.environ.get('DISPLAY', '') == '' \
3939
and matplotlib.rcParams['backend'] != 'agg':
40-
# logging.warning('No display found. Using non-interactive Agg backend.')
40+
print('No display found. Using non-interactive Agg backend.')
4141
matplotlib.use('Agg')
4242

4343
import numpy as np

experiments_segmentation/run_compute_stat_annot_segm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import matplotlib
2222
if os.environ.get('DISPLAY', '') == '' \
2323
and matplotlib.rcParams['backend'] != 'agg':
24-
# logging.warning('No display found. Using non-interactive Agg backend.')
24+
print('No display found. Using non-interactive Agg backend.')
2525
matplotlib.use('Agg')
2626

2727
import pandas as pd

handling_annotations/run_overlap_images_segms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import matplotlib
2424
if os.environ.get('DISPLAY','') == '' \
2525
and matplotlib.rcParams['backend'] != 'agg':
26-
# logging.warning('No display found. Using non-interactive Agg backend.')
26+
print('No display found. Using non-interactive Agg backend.')
2727
matplotlib.use('Agg')
2828

2929
import numpy as np

imsegm/classification.py

Lines changed: 96 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from scipy import interp
2121
from scipy.stats import randint as sp_randint
2222
from scipy.stats import uniform as sp_random
23+
from sklearn.base import clone
2324
from sklearn import preprocessing, feature_selection, decomposition
2425
from sklearn import cluster, metrics
2526
from sklearn import ensemble, neighbors, svm, tree
@@ -897,9 +898,10 @@ def eval_classif_cross_val_roc(clf_name, classif, features, labels,
897898
cross_val = cross_val.split(features, labels)
898899
count = 0.
899900
for train, test in cross_val:
900-
classif.fit(np.copy(features[train], order='C'),
901-
np.copy(labels[train], order='C'))
902-
proba = classif.predict_proba(np.copy(features[test], order='C'))
901+
classif_cv = clone(classif)
902+
classif_cv.fit(np.copy(features[train], order='C'),
903+
np.copy(labels[train], order='C'))
904+
proba = classif_cv.predict_proba(np.copy(features[test], order='C'))
903905
# Compute ROC curve and area the curve
904906
for i, lb in enumerate(unique_labels):
905907
fpr, tpr, _ = metrics.roc_curve(labels_bin[test, lb], proba[:, i])
@@ -934,6 +936,12 @@ def search_params_cut_down_max_nb_iter(clf_parameters, nb_iter):
934936
:param clf_parameters: {str: ...}
935937
:param nb_iter: int, nb of random tryes
936938
:return: int
939+
940+
>>> clf_params = create_clf_param_search_grid(DEFAULT_CLASSIF_NAME)
941+
>>> search_params_cut_down_max_nb_iter(clf_params, 100)
942+
100
943+
>>> search_params_cut_down_max_nb_iter(clf_params, 1e6)
944+
1450
937945
"""
938946
counts = []
939947
for k in clf_parameters:
@@ -944,7 +952,7 @@ def search_params_cut_down_max_nb_iter(clf_parameters, nb_iter):
944952
return nb_iter
945953
count = np.product(counts)
946954
if count < nb_iter:
947-
nb_iter < count
955+
nb_iter = count
948956
return nb_iter
949957

950958

@@ -1384,32 +1392,40 @@ class HoldOut:
13841392
13851393
Example
13861394
-------
1387-
>>> ho = HoldOut(10, 7)
1395+
>>> ho = HoldOut(10, 7, rand_seed=None)
13881396
>>> len(ho)
13891397
1
13901398
>>> list(ho)
13911399
[([0, 1, 2, 3, 4, 5, 6], [7, 8, 9])]
1400+
>>> ho = HoldOut(10, 7, rand_seed=0)
1401+
>>> list(ho)
1402+
[([2, 8, 4, 9, 1, 6, 7], [3, 0, 5])]
13921403
"""
1393-
def __init__(self, nb, hold_idx, random_state=0):
1404+
def __init__(self, nb_samples, hold_out, rand_seed=0):
13941405
"""
13951406
1396-
:param int nb: total number of samples
1397-
:param int hold_idx: index where the test starts
1398-
:param obj random_state: Seed for the random number generator.
1407+
:param int nb_samples: total number of samples
1408+
:param int hold_out: index where the test starts
1409+
:param obj rand_seed: Seed for the random number generator.
13991410
"""
1400-
self.total = nb
1401-
self.hold_idx = hold_idx
1402-
self.random_state = random_state
1403-
assert self.total > self.hold_idx, \
1404-
'total %i should be higher than hold Idx %i' % (self.total, self.hold_idx)
1411+
assert nb_samples > hold_out, \
1412+
'total %i should be higher than hold Idx %i' % (nb_samples, hold_out)
1413+
1414+
self._total = nb_samples
1415+
self.hold_out = hold_out
1416+
self._indexes = list(range(nb_samples))
1417+
1418+
if rand_seed is not None and rand_seed is not False:
1419+
np.random.seed(rand_seed)
1420+
np.random.shuffle(self._indexes)
14051421

14061422
def __iter__(self):
14071423
""" iterate the folds
14081424
14091425
:return ([int], [int]):
14101426
"""
1411-
ind_train = list(range(self.hold_idx))
1412-
ind_test = list(range(self.hold_idx, self.total))
1427+
ind_train = self._indexes[:self.hold_out]
1428+
ind_test = self._indexes[self.hold_out:]
14131429
yield ind_train, ind_test
14141430

14151431
def __len__(self):
@@ -1438,22 +1454,29 @@ class CrossValidatePOut:
14381454
>>> len(cv)
14391455
2
14401456
>>> list(cv) # doctest: +NORMALIZE_WHITESPACE
1441-
[([3, 4, 5], [0, 1, 2]), \
1457+
[([3, 4, 5], [0, 1, 2]),
14421458
([0, 1, 2], [3, 4, 5])]
14431459
14441460
Example 2
14451461
---------
14461462
>>> cv = CrossValidatePOut(7, 3, rand_seed=0)
14471463
>>> list(cv) # doctest: +NORMALIZE_WHITESPACE
1448-
[([3, 0, 5, 4], [6, 2, 1]), \
1449-
([6, 2, 1, 4], [3, 0, 5]), \
1464+
[([3, 0, 5, 4], [6, 2, 1]),
1465+
([6, 2, 1, 4], [3, 0, 5]),
14501466
([6, 2, 1, 3, 0, 5], [4])]
1451-
1452-
14531467
>>> len(list(cv))
14541468
3
14551469
>>> cv.indexes
14561470
[6, 2, 1, 3, 0, 5, 4]
1471+
1472+
Example 3
1473+
---------
1474+
>>> cv = CrossValidatePOut(7, 5, rand_seed=0)
1475+
>>> list(cv) # doctest: +NORMALIZE_WHITESPACE
1476+
[([6, 2], [1, 3, 0, 5, 4]),
1477+
([1, 3], [6, 2, 0, 5, 4]),
1478+
([0, 5], [6, 2, 1, 3, 4]),
1479+
([4], [6, 2, 1, 3, 0, 5])]
14571480
"""
14581481

14591482
def __init__(self, nb_samples, nb_hold_out, rand_seed=None):
@@ -1464,13 +1487,21 @@ def __init__(self, nb_samples, nb_hold_out, rand_seed=None):
14641487
:param obj rand_seed: int or None
14651488
"""
14661489
assert nb_samples > nb_hold_out, \
1467-
'number of holdout has to be smaller then total size'
1468-
self.nb_samples = nb_samples
1469-
self.nb_hold_out = nb_hold_out
1490+
'number of holdout has to be smaller then _total size'
1491+
self._nb_samples = nb_samples
1492+
self._nb_hold_out = nb_hold_out
1493+
1494+
self._revert = False # sets the sizes
1495+
if self._nb_hold_out > (self._nb_samples / 2.):
1496+
logging.debug('WARNING: you are running in reverse mode, '
1497+
'while using all training examples '
1498+
'there are much more yield test cases.')
1499+
self._nb_hold_out = self._nb_samples - self._nb_hold_out
1500+
self._revert = True
14701501

1471-
self.indexes = list(range(self.nb_samples))
1502+
self.indexes = list(range(self._nb_samples))
14721503

1473-
if rand_seed is not False:
1504+
if rand_seed is not None and rand_seed is not False:
14741505
np.random.seed(rand_seed)
14751506
np.random.shuffle(self.indexes)
14761507
logging.debug('sets ordering: %s', repr(self.indexes))
@@ -1482,17 +1513,19 @@ def __iter__(self):
14821513
14831514
:return ([int], [int]):
14841515
"""
1485-
for i in range(0, self.nb_samples, self.nb_hold_out):
1486-
inds_test = self.indexes[i:i + self.nb_hold_out]
1516+
for i in range(0, self._nb_samples, self._nb_hold_out):
1517+
inds_test = self.indexes[i:i + self._nb_hold_out]
14871518
inds_train = [i for i in self.indexes if i not in inds_test]
1519+
if self._revert:
1520+
inds_train, inds_test = inds_test, inds_train
14881521
yield inds_train, inds_test
14891522

14901523
def __len__(self):
14911524
""" number of folds
14921525
14931526
:return int:
14941527
"""
1495-
return int(np.ceil(self.nb_samples / float(self.nb_hold_out)))
1528+
return int(np.ceil(self._nb_samples / float(self._nb_hold_out)))
14961529

14971530

14981531
class CrossValidatePSetsOut:
@@ -1513,7 +1546,7 @@ class CrossValidatePSetsOut:
15131546
>>> len(cv)
15141547
2
15151548
>>> list(cv) # doctest: +NORMALIZE_WHITESPACE
1516-
[([5, 6, 7, 8, 9], [0, 1, 2, 3, 4]), \
1549+
[([5, 6, 7, 8, 9], [0, 1, 2, 3, 4]),
15171550
([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])]
15181551
15191552
Example 2
@@ -1522,13 +1555,21 @@ class CrossValidatePSetsOut:
15221555
>>> cv.set_indexes
15231556
[[0, 1], [2, 3], [4], [5, 6], [7]]
15241557
>>> list(cv) # doctest: +NORMALIZE_WHITESPACE
1525-
[([2, 3, 5, 6, 7], [4, 0, 1]), \
1526-
([4, 0, 1, 7], [2, 3, 5, 6]), \
1558+
[([2, 3, 5, 6, 7], [4, 0, 1]),
1559+
([4, 0, 1, 7], [2, 3, 5, 6]),
15271560
([4, 0, 1, 2, 3, 5, 6], [7])]
15281561
>>> len(cv)
15291562
3
15301563
>>> cv.sets_order
15311564
[2, 0, 1, 3, 4]
1565+
1566+
Example 3
1567+
---------
1568+
>>> cv = CrossValidatePSetsOut([2, 2, 1, 2, 1, 1], 4, rand_seed=0)
1569+
>>> list(cv) # doctest: +NORMALIZE_WHITESPACE
1570+
[([8, 4], [2, 3, 5, 6, 0, 1, 7]),
1571+
([2, 3, 5, 6], [8, 4, 0, 1, 7]),
1572+
([0, 1, 7], [8, 4, 2, 3, 5, 6])]
15321573
"""
15331574

15341575
def __init__(self, set_sizes, nb_hold_out, rand_seed=None):
@@ -1539,24 +1580,32 @@ def __init__(self, set_sizes, nb_hold_out, rand_seed=None):
15391580
:param obj rand_seed: int or None
15401581
"""
15411582
assert len(set_sizes) > nb_hold_out, \
1542-
'nb of hold out (%i) has to be smaller then total size %i' \
1583+
'nb of hold out (%i) has to be smaller then _total size %i' \
15431584
% (nb_hold_out, len(set_sizes))
1544-
self.set_sizes = list(set_sizes)
1545-
self.total = np.sum(self.set_sizes)
1546-
self.nb_hold_out = nb_hold_out
1585+
self._set_sizes = list(set_sizes)
1586+
self._total = np.sum(self._set_sizes)
1587+
self._nb_hold_out = nb_hold_out
1588+
1589+
self._revert = False # sets the sizes
1590+
if self._nb_hold_out > (len(self._set_sizes) / 2.):
1591+
logging.debug('WARNING: you are running in reverse mode, '
1592+
'while using all training examples '
1593+
'there are much more yield test cases.')
1594+
self._nb_hold_out = len(self._set_sizes) - self._nb_hold_out
1595+
self._revert = True
15471596

15481597
self.set_indexes = []
1549-
for i, size in enumerate(self.set_sizes):
1550-
start = int(np.sum(self.set_sizes[:i]))
1598+
for i, size in enumerate(self._set_sizes):
1599+
start = int(np.sum(self._set_sizes[:i]))
15511600
inds = range(start, start + size)
15521601
self.set_indexes.append(list(inds))
15531602

1554-
assert np.sum(len(i) for i in self.set_indexes) == self.total, \
1555-
'all indexes should sum to total count %i' % self.total
1603+
assert np.sum(len(i) for i in self.set_indexes) == self._total, \
1604+
'all indexes should sum to _total count %i' % self._total
15561605

1557-
self.sets_order = list(range(len(self.set_sizes)))
1606+
self.sets_order = list(range(len(self._set_sizes)))
15581607

1559-
if rand_seed is not False:
1608+
if rand_seed is not None and rand_seed is not False:
15601609
np.random.seed(rand_seed)
15611610
np.random.shuffle(self.sets_order)
15621611
logging.debug('sets ordering: %s', repr(self.sets_order))
@@ -1568,20 +1617,22 @@ def __iter__(self):
15681617
15691618
:return ([int], [int]):
15701619
"""
1571-
for i in range(0, len(self.set_sizes), self.nb_hold_out):
1572-
test = self.sets_order[i:i + self.nb_hold_out]
1620+
for i in range(0, len(self._set_sizes), self._nb_hold_out):
1621+
test = self.sets_order[i:i + self._nb_hold_out]
15731622
inds_train = list(itertools.chain.from_iterable(
15741623
self.set_indexes[i] for i in self.sets_order if i not in test))
15751624
inds_test = list(itertools.chain.from_iterable(
15761625
self.set_indexes[i] for i in self.sets_order if i in test))
1626+
if self._revert:
1627+
inds_train, inds_test = inds_test, inds_train
15771628
yield inds_train, inds_test
15781629

15791630
def __len__(self):
15801631
""" number of folds
15811632
15821633
:return int:
15831634
"""
1584-
nb = len(self.set_sizes) / float(self.nb_hold_out)
1635+
nb = len(self._set_sizes) / float(self._nb_hold_out)
15851636
return int(np.ceil(nb))
15861637

15871638

imsegm/descriptors.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1734,7 +1734,7 @@ def interpolate_ray_dist(ray_dists, order='spline'):
17341734
""" interpolate ray distances
17351735
17361736
:param [float] ray_dists:
1737-
:param str order: degree of interpolation
1737+
:param str|int order: degree of interpolation
17381738
:return [float]:
17391739
17401740
>>> interpolate_ray_dist([-1] * 5)
@@ -1799,7 +1799,7 @@ def _fn_cos_residual(x, t, y):
17991799
def reconstruct_ray_features_2d(position, ray_features, shift=0):
18001800
""" reconstruct ray features for 2D image
18011801
1802-
:param (int, int) position:
1802+
:param (int, int)|(float, float) position:
18031803
:param [float] ray_features:
18041804
:param float shift:
18051805
:return [[float, float]]:

imsegm/ellipse_fitting.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ def split_segm_background_foreground(seg, sel_bg=STRUC_ELEM_BG,
401401
""" smoothing segmentation with morphological operation
402402
403403
:param ndarray seg: input segmentation
404-
:param int sel_bg: smoothing background with morphological operation
404+
:param int|float sel_bg: smoothing background with morphological operation
405405
:param int sel_fg: smoothing foreground with morphological operation
406406
:return:
407407

imsegm/utilities/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import os
2-
import logging
32

43
import matplotlib
54
import numpy as np
@@ -8,7 +7,7 @@
87
# in case you are running on machine without display, e.g. server
98
if os.environ.get('DISPLAY', '') == '' \
109
and matplotlib.rcParams['backend'] != 'agg':
11-
# logging.warning('No display found. Using non-interactive Agg backend.')
10+
print('No display found. Using non-interactive Agg backend.')
1211
# https://matplotlib.org/faq/usage_faq.html
1312
matplotlib.use('Agg')
1413

imsegm/utilities/drawing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import matplotlib
1111
if os.environ.get('DISPLAY', '') == '' \
1212
and matplotlib.rcParams['backend'] != 'agg':
13-
# logging.warning('No display found. Using non-interactive Agg backend.')
13+
print('No display found. Using non-interactive Agg backend.')
1414
matplotlib.use('Agg')
1515

1616
import numpy as np

0 commit comments

Comments
 (0)