Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ RUN python3 -m pip install -r requirements.txt
COPY . .

RUN python3 -m pip install deepcelltypes-kit/

RUN curl -L -o /opt/deepcelltypes-hubmap-crosswalk.csv https://cdn.humanatlas.io/digital-objects/ctann/deepcelltypes-hubmap/v1.0/assets/deepcelltypes-hubmap-crosswalk.csv

48 changes: 44 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import List, Tuple

import numpy as np
import pandas as pd
import scipy as sp
import tensorflow as tf
import tifffile as tff
Expand Down Expand Up @@ -260,6 +261,39 @@ def predict(expr_file: Path, mask_file: Path) -> List[Tuple[int, int]]:
return prediction_list


def read_clid_mapping():
reference = pd.read_csv("/opt/deepcelltypes-hubmap-crosswalk.csv", header=10)
label_to_cl_label = dict(zip(reference['Annotation_Label'], reference['CL_Label']))
label_to_cl_id = dict(zip(reference['Annotation_Label'], reference['CL_ID']))
return label_to_cl_label, label_to_cl_id


def map_to_clid(prediction_df: pd.DataFrame) -> pd.DataFrame:
cl_label_map, cl_id_map = read_clid_mapping()

prediction_df['DeepCellTypes_CL_Label'] = prediction_df['DeepCellTypes_CellType'].map(cl_label_map)
prediction_df['DeepCellTypes_CL_ID'] = prediction_df['DeepCellTypes_CellType'].map(cl_id_map)
prediction_df['DeepCellTypes_CL_ID'] = prediction_df['DeepCellTypes_CL_ID'].fillna('CL:0000000')
print(prediction_df)
return prediction_df


def create_cell_type_manifest(prediction_df, outdir):
cell_type_manifest_dict = {}

for column_header in ['DeepCellTypes_CellType', 'DeepCellTypes_CL_ID']:
sub_dict = {
val: int((prediction_df[column_header] == val).sum())
for val in prediction_df[column_header].unique()
}
# Remove NaN key if it exists
sub_dict = {k: v for k, v in sub_dict.items() if not pd.isna(k)}
cell_type_manifest_dict[column_header] = sub_dict

with open(f'{outdir}/cell_type_manifest.json', 'w') as f:
json.dump(cell_type_manifest_dict, f)


def main(data_dir: Path):
pipeline_output_dir = data_dir / "pipeline_output"
expr_files = sorted(find_ome_tiffs(pipeline_output_dir / "expr"))
Expand All @@ -271,10 +305,16 @@ def main(data_dir: Path):
pred_csv_file = output_path / f"{expr_file.stem}-predictions.csv"
predictions = predict(expr_file, mask_file)
logger.info("Saving predictions from %s to %s", expr_file, pred_csv_file)
with open(pred_csv_file, "w") as fh:
print("ID,DeepCellTypes_CellType", file=fh)
for idx, ct in predictions:
print(f"{idx},{ct}", file=fh)
idxs, deepcelltypes_cells = zip(*predictions)
predictions_df = pd.DataFrame({'ID': idxs,
'DeepCellTypes_CellType': deepcelltypes_cells})
predictions_df.to_csv(pred_csv_file)
predictions_df_with_clid = map_to_clid(predictions_df)
create_cell_type_manifest(predictions_df_with_clid, output_path)
json_path = output_path / 'cl-mapping.json'
preds_dict=predictions_df_with_clid.to_dict(orient='records')
with open(json_path, 'w') as f:
json.dump(preds_dict, f)


if __name__ == "__main__":
Expand Down