Skip to content

Commit 43fdb15

Browse files
committed
OWVolcanoPlot: general improvements, use of GeneScoring component
1 parent 6a7d577 commit 43fdb15

File tree

6 files changed

+398
-75
lines changed

6 files changed

+398
-75
lines changed

orangecontrib/bioinformatics/tests/widgets/ow_components/__init__.py

Whitespace-only changes.
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import unittest
2+
3+
from AnyQt.QtTest import QSignalSpy
4+
5+
from Orange.data import Table
6+
from Orange.widgets.widget import OWWidget
7+
from Orange.widgets.settings import SettingProvider
8+
from Orange.widgets.tests.base import WidgetTest
9+
from Orange.widgets.tests.utils import simulate
10+
11+
from orangecontrib.bioinformatics.utils.statistics import score_hypergeometric_test
12+
from orangecontrib.bioinformatics.widgets.ow_components import GeneScoringComponent
13+
14+
15+
class MockWidget(OWWidget):
16+
name = "Mock"
17+
scoring_component = SettingProvider(GeneScoringComponent)
18+
19+
def __init__(self):
20+
self.scoring_component = GeneScoringComponent(self, self.mainArea)
21+
22+
23+
class TestGeneScoringComponent(WidgetTest):
24+
def setUp(self):
25+
self.widget = MockWidget()
26+
self.component = self.widget.scoring_component
27+
28+
def test_scoring_methods_combobox(self):
29+
combo_box_values = [
30+
self.component.score_method_combo.itemText(i) for i in range(self.component.score_method_combo.count())
31+
]
32+
self.assertTrue(len(combo_box_values) > 0)
33+
self.assertEqual([name for name, _ in self.component.score_methods], combo_box_values)
34+
35+
signals_cb_emits = QSignalSpy(self.component.score_method_changed)
36+
simulate.combobox_run_through_all(self.component.score_method_combo)
37+
38+
self.assertEqual(self.component.score_method_combo.currentIndex(), self.component.current_method_index)
39+
self.assertEqual(self.component.current_method_index, len(combo_box_values) - 1)
40+
41+
# number of signals combobox emits should be equal to the length of available scoring methods
42+
self.assertEqual(len(combo_box_values), len(signals_cb_emits))
43+
44+
def test_expression_threshold_spinbox(self):
45+
# find index of item in combobox for hypergeometric test
46+
method_index, *_ = [
47+
index
48+
for index, (name, method) in enumerate(self.component.score_methods)
49+
if method == score_hypergeometric_test
50+
]
51+
52+
# check if spinbox appears after hypergeometric test is selected
53+
self.assertTrue(self.component.expression_threshold_box.isHidden())
54+
simulate.combobox_activate_index(self.component.score_method_combo, method_index)
55+
self.assertFalse(self.component.expression_threshold_box.isHidden())
56+
57+
def test_group_values(self):
58+
self.assertIsNone(self.component.data)
59+
self.component.initialize(Table('iris'))
60+
self.assertIsNotNone(self.component.data)
61+
62+
# we expect only one value 'iris class attribute'
63+
combo_box_value, *_ = [
64+
self.component.group_combo.itemText(i) for i in range(self.component.group_combo.count())
65+
]
66+
self.assertEqual(combo_box_value, 'iris')
67+
68+
group_values = [self.component.list_widget.item(i).text() for i in range(self.component.list_widget.count())]
69+
self.assertEqual(group_values, ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'])
70+
71+
72+
if __name__ == "__main__":
73+
unittest.main()

orangecontrib/bioinformatics/utils/statistics.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
ALTERNATIVES = [ALT_GREATER, ALT_TWO, ALT_LESS]
1313

1414

15-
def score_t_test(a, b, axis=0, alternative=ALT_TWO):
16-
# type: (np.array, np.array, int, str) -> Tuple[Union[float, np.array], Union[float, np.array]]
15+
def score_t_test(
16+
a: np.array, b: np.array, axis: int = 0, **kwargs
17+
) -> Tuple[Union[float, np.array], Union[float, np.array]]:
1718
""" Run t-test. Enable setting different alternative hypothesis.
1819
Probabilities are exact due to symmetry of the test.
1920
@@ -24,7 +25,7 @@ def score_t_test(a, b, axis=0, alternative=ALT_TWO):
2425
scipy.stats.ttest_ind
2526
2627
"""
27-
# alt = kwargs.get("alternative", ALT_TWO)
28+
alternative = kwargs.get("alternative", ALT_TWO)
2829
assert alternative in ALTERNATIVES
2930
scores, pvalues = scipy.stats.ttest_ind(a, b, axis=axis)
3031

@@ -41,7 +42,7 @@ def score_t_test(a, b, axis=0, alternative=ALT_TWO):
4142
return scores, 1.0 - pvalues
4243

4344

44-
def score_mann_whitney(a, b, **kwargs):
45+
def score_mann_whitney(a: np.array, b: np.array, **kwargs) -> Tuple[np.array, np.array]:
4546
axis = kwargs.get('axis', 0)
4647
a, b = np.asarray(a, dtype=float), np.asarray(b, dtype=float)
4748

@@ -71,12 +72,15 @@ def score_mann_whitney(a, b, **kwargs):
7172
return np.array(statistics), np.array(p_values)
7273

7374

74-
def score_hypergeometric_test(a, b, threshold=1, **kwargs):
75+
def score_hypergeometric_test(a: np.array, b: np.array, threshold: float = 1.0, **kwargs) -> Tuple[np.array, np.array]:
7576
"""
7677
Run a hypergeometric test. The probability in a two-sided test is approximated
7778
with the symmetric distribution with more extreme of the tails.
7879
"""
79-
# type: (np.ndarray, np.ndarray, float) -> np.ndarray
80+
axis = kwargs.get('axis', 0)
81+
82+
if axis == 1:
83+
a, b = a.T, b.T
8084

8185
# Binary expression matrices
8286
_a = (a >= threshold).astype(int)
Lines changed: 82 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1+
from typing import Optional
2+
13
import numpy as np
24

5+
from Orange.data import Table
36
from Orange.widgets import gui, settings
47
from Orange.widgets.widget import Msg
5-
from Orange.widgets.settings import SettingProvider
8+
from Orange.widgets.settings import SettingProvider, DomainContextHandler
69
from Orange.widgets.visualize.owscatterplot import OWScatterPlotBase, OWDataProjectionWidget
710

8-
from orangecontrib.bioinformatics.utils.statistics import score_t_test, score_fold_change
9-
from orangecontrib.bioinformatics.widgets.utils.gui import label_selection
10-
from orangecontrib.bioinformatics.widgets.utils.data import GENE_ID_COLUMN, GENE_AS_ATTRIBUTE_NAME
11+
from orangecontrib.bioinformatics.utils.statistics import score_fold_change
12+
from orangecontrib.bioinformatics.widgets.utils.data import TableAnnotation
13+
from orangecontrib.bioinformatics.widgets.ow_components import GeneScoringComponent
1114

1215

1316
class VolcanoGraph(OWScatterPlotBase):
@@ -28,117 +31,127 @@ class Warning(OWDataProjectionWidget.Warning):
2831
'Insufficient data to compute statistics.' 'More than one measurement per class should be provided '
2932
)
3033

31-
gene_enrichment = Msg('{}, {}.')
32-
no_selected_gene_sets = Msg('No gene set selected, select them from Gene Sets box.')
33-
3434
class Error(OWDataProjectionWidget.Error):
3535
exclude_error = Msg('Target labels most exclude/include at least one value.')
3636
negative_values = Msg('Negative values in the input. The inputs cannot be in ratio scale.')
37-
data_not_annotated = Msg('The input date is not annotated as expexted. Please refer to documentation.')
37+
data_not_annotated = Msg('The input date is not annotated as expected. Please refer to documentation.')
3838
gene_column_id_missing = Msg('Can not identify genes column. Please refer to documentation.')
3939

40-
GRAPH_CLASS = VolcanoGraph
40+
settingsHandler = DomainContextHandler()
4141
graph = SettingProvider(VolcanoGraph)
42-
embedding_variables_names = ('log2 (ratio)', '-log10 (P_value)')
42+
scoring_component = SettingProvider(GeneScoringComponent)
4343

44-
stored_selections = settings.ContextSetting([])
45-
current_group_index = settings.ContextSetting(0)
44+
GRAPH_CLASS = VolcanoGraph
45+
embedding_variables_names = ('log2 (ratio)', '-log10 (P_value)')
4646

4747
def __init__(self):
4848
super().__init__()
49+
self._data: Optional[Table] = None
50+
self.genes_in_columns: Optional[str] = None
51+
self.gene_id_column: Optional[str] = None
52+
self.gene_id_attribute: Optional[str] = None
53+
54+
self.fold: Optional[np.array] = None
55+
self.log_p_values: Optional[np.array] = None
56+
self.valid_data: Optional[np.array] = None
4957

5058
def _add_controls(self):
51-
box = gui.vBox(self.controlArea, "Target Labels")
52-
self.group_selection_widget = label_selection.LabelSelectionWidget()
53-
self.group_selection_widget.groupChanged.connect(self.on_target_values_changed)
54-
self.group_selection_widget.groupSelectionChanged.connect(self.on_target_values_changed)
55-
box.layout().addWidget(self.group_selection_widget)
59+
box = gui.vBox(self.controlArea, True, margin=0)
60+
self.scoring_component = GeneScoringComponent(self, box)
61+
self.scoring_component.group_changed.connect(self.setup_plot)
62+
self.scoring_component.selection_changed.connect(self.setup_plot)
63+
self.scoring_component.score_method_changed.connect(self.setup_plot)
64+
self.scoring_component.expression_threshold_changed.connect(self.setup_plot)
5665

5766
super()._add_controls()
5867
self.gui.add_widgets([self.gui.ShowGridLines], self._plot_box)
5968

60-
def get_embedding(self):
69+
def _compute(self):
6170
self.Error.exclude_error.clear()
6271

63-
group, target_indices = self.group_selection_widget.selected_split()
64-
65-
if self.data and group is not None and target_indices:
66-
X = self.data.X
67-
I1 = label_selection.group_selection_mask(self.data, group, target_indices)
68-
I2 = ~I1
69-
70-
# print(group)
71-
if isinstance(group, label_selection.RowGroup):
72-
X = X.T
73-
74-
N1, N2 = np.count_nonzero(I1), np.count_nonzero(I2)
72+
if self.data:
73+
x = self.data.X
74+
score_method = self.scoring_component.get_score_method()
75+
i1 = self.scoring_component.get_selection_mask()
76+
i2 = ~i1
7577

76-
if not N1 or not N2:
78+
n1, n2 = np.count_nonzero(i1), np.count_nonzero(i2)
79+
if not n1 or not n2:
7780
self.Error.exclude_error()
7881
return
7982

80-
if N1 < 2 and N2 < 2:
83+
if n1 < 2 and n2 < 2:
8184
self.Warning.insufficient_data()
8285

83-
X1, X2 = X[:, I1], X[:, I2]
84-
85-
if np.any(X1 < 0.0) or np.any(X2 < 0):
86+
x1, x2 = x[:, i1], x[:, i2]
87+
if np.any(x1 < 0.0) or np.any(x2 < 0):
8688
self.Error.negative_values()
87-
X1 = np.full_like(X1, np.nan)
88-
X2 = np.full_like(X2, np.nan)
89+
x1 = np.full_like(x1, np.nan)
90+
x2 = np.full_like(x2, np.nan)
91+
92+
with np.errstate(divide='ignore', invalid='ignore'):
93+
self.fold = score_fold_change(x1, x2, axis=1, log=True)
94+
_, p_values = score_method(x1, x2, axis=1, threshold=self.scoring_component.get_expression_threshold())
95+
self.log_p_values = np.log10(p_values)
96+
97+
def get_embedding(self):
98+
if self.data is None:
99+
return None
100+
101+
if self.fold is None or self.log_p_values is None:
102+
return
103+
104+
self.valid_data = np.isfinite(self.fold) & np.isfinite(self.log_p_values)
105+
return np.array([self.fold, -self.log_p_values]).T
106+
107+
def send_data(self):
108+
group_sel, data, graph = None, self._get_projection_data(), self.graph
109+
if graph.selection is not None:
110+
group_sel = np.zeros(len(data), dtype=int)
111+
group_sel[self.valid_data] = graph.selection
89112

90-
with np.errstate(divide="ignore", invalid="ignore"):
91-
fold = score_fold_change(X1, X2, axis=1, log=True)
92-
_, p_values = score_t_test(X1, X2, axis=1)
93-
log_p_values = np.log10(p_values)
113+
selected_data = self._get_selected_data(data, graph.get_selection(), group_sel)
94114

95-
self.valid_data = np.isfinite(fold) & np.isfinite(p_values)
96-
return np.array([fold, -log_p_values]).T
115+
if self.genes_in_columns and selected_data:
116+
selected_data = Table.transpose(selected_data, feature_names_column='Feature name')
117+
118+
self.Outputs.selected_data.send(selected_data)
97119

98120
def setup_plot(self):
121+
self._compute()
99122
super().setup_plot()
100123
for axis, var in (("bottom", 'log<sub>2</sub> (ratio)'), ("left", '-log<sub>10</sub> (P_value)')):
101124
self.graph.set_axis_title(axis, var)
102125

103-
def on_target_values_changed(self, index):
104-
# Save the current selection to persistent settings
105-
self.current_group_index = index
106-
selected_indices = [ind.row() for ind in self.group_selection_widget.currentGroupSelection().indexes()]
107-
108-
if self.current_group_index != -1 and selected_indices:
109-
self.stored_selections[self.current_group_index] = selected_indices
110-
111-
self.setup_plot()
112-
113126
def set_data(self, data):
114127
self.Warning.clear()
115128
self.Error.clear()
116-
super().set_data(data)
117-
self.group_selection_widget.set_data(self, self.data)
118129

119-
if self.data:
120-
if not self.stored_selections:
121-
self.stored_selections = [[0] for _ in self.group_selection_widget.targets]
122-
self.group_selection_widget.set_selection()
130+
if data:
131+
self.genes_in_columns = data.attributes.get(TableAnnotation.gene_as_attr_name, None)
132+
self.gene_id_column = data.attributes.get(TableAnnotation.gene_id_column, None)
133+
self.gene_id_attribute = data.attributes.get(TableAnnotation.gene_id_attribute, None)
134+
135+
if self.genes_in_columns:
136+
self._data = data
137+
# override default meta_attr_name value to avoid unexpected changes.
138+
data = Table.transpose(data, meta_attr_name='Feature name')
139+
140+
super().set_data(data)
141+
self.scoring_component.initialize(self.data)
123142

124143
def check_data(self):
125144
self.clear_messages()
126-
use_attr_names = self.data.attributes.get(GENE_AS_ATTRIBUTE_NAME, None)
127-
gene_id_column = self.data.attributes.get(GENE_ID_COLUMN, None)
128-
129145
if self.data is not None and (len(self.data) == 0 or len(self.data.domain) == 0):
130146
self.data = None
131147

132-
if use_attr_names is None:
148+
if self.genes_in_columns is None:
133149
# Note: input data is not annotated properly.
134150
self.Error.data_not_annotated()
135151
self.data = None
136152

137-
if gene_id_column is None:
138-
# Note: Can not identify genes column.
139-
self.Error.gene_column_id_missing()
140-
self.data = None
141-
142153

143154
if __name__ == "__main__":
144-
pass
155+
from Orange.widgets.utils.widgetpreview import WidgetPreview
156+
157+
WidgetPreview(OWVolcanoPlot).run()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .gene_scoring import GeneScoringComponent
2+
3+
__all__ = ('GeneScoringComponent',)

0 commit comments

Comments
 (0)