hubmapconsortium · pennycuda · Aug 12, 2025 · Oct 3, 2025 · Mar 13, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -11,3 +11,6 @@ RUN python3 -m pip install -r requirements.txt
 COPY . .
 
 RUN python3 -m pip install deepcelltypes-kit/
+
+RUN curl -L -o /opt/deepcelltypes-hubmap-crosswalk.csv https://cdn.humanatlas.io/digital-objects/ctann/deepcelltypes-hubmap/v1.0/assets/deepcelltypes-hubmap-crosswalk.csv
+
diff --git a/main.py b/main.py
@@ -6,6 +6,7 @@
 from typing import List, Tuple
 
 import numpy as np
+import pandas as pd
 import scipy as sp
 import tensorflow as tf
 import tifffile as tff
@@ -260,6 +261,39 @@ def predict(expr_file: Path, mask_file: Path) -> List[Tuple[int, int]]:
     return prediction_list
 
 
+def read_clid_mapping():
+    reference = pd.read_csv("/opt/deepcelltypes-hubmap-crosswalk.csv", header=10)
+    label_to_cl_label = dict(zip(reference['Annotation_Label'], reference['CL_Label']))
+    label_to_cl_id = dict(zip(reference['Annotation_Label'], reference['CL_ID']))
+    return label_to_cl_label, label_to_cl_id
+
+
+def map_to_clid(prediction_df: pd.DataFrame) -> pd.DataFrame:
+    cl_label_map, cl_id_map = read_clid_mapping()
+
+    prediction_df['DeepCellTypes_CL_Label'] = prediction_df['DeepCellTypes_CellType'].map(cl_label_map)
+    prediction_df['DeepCellTypes_CL_ID'] = prediction_df['DeepCellTypes_CellType'].map(cl_id_map)
+    prediction_df['DeepCellTypes_CL_ID'] = prediction_df['DeepCellTypes_CL_ID'].fillna('CL:0000000')
+    print(prediction_df)
+    return prediction_df
+
+
+def create_cell_type_manifest(prediction_df, outdir):
+    cell_type_manifest_dict = {}
+
+    for column_header in ['DeepCellTypes_CellType', 'DeepCellTypes_CL_ID']:
+        sub_dict = {
+            val: int((prediction_df[column_header] == val).sum())
+            for val in prediction_df[column_header].unique()
+        }
+        # Remove NaN key if it exists
+        sub_dict = {k: v for k, v in sub_dict.items() if not pd.isna(k)}
+        cell_type_manifest_dict[column_header] = sub_dict
+
+    with open(f'{outdir}/cell_type_manifest.json', 'w') as f:
+        json.dump(cell_type_manifest_dict, f)
+
+
 def main(data_dir: Path):
     pipeline_output_dir = data_dir / "pipeline_output"
     expr_files = sorted(find_ome_tiffs(pipeline_output_dir / "expr"))
@@ -271,10 +305,16 @@ def main(data_dir: Path):
         pred_csv_file = output_path / f"{expr_file.stem}-predictions.csv"
         predictions = predict(expr_file, mask_file)
         logger.info("Saving predictions from %s to %s", expr_file, pred_csv_file)
-        with open(pred_csv_file, "w") as fh:
-            print("ID,DeepCellTypes_CellType", file=fh)
-            for idx, ct in predictions:
-                print(f"{idx},{ct}", file=fh)
+        idxs, deepcelltypes_cells = zip(*predictions)
+        predictions_df = pd.DataFrame({'ID': idxs,
+                                      'DeepCellTypes_CellType': deepcelltypes_cells})
+        predictions_df.to_csv(pred_csv_file)
+        predictions_df_with_clid = map_to_clid(predictions_df)
+        create_cell_type_manifest(predictions_df_with_clid, output_path)
+        json_path = output_path / 'cl-mapping.json'
+        preds_dict=predictions_df_with_clid.to_dict(orient='records')
+        with open(json_path, 'w') as f:
+            json.dump(preds_dict, f)
 
 
 if __name__ == "__main__":
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,3 +11,6 @@ RUN python3 -m pip install -r requirements.txt
		COPY . .

		RUN python3 -m pip install deepcelltypes-kit/

		RUN curl -L -o /opt/deepcelltypes-hubmap-crosswalk.csv https://cdn.humanatlas.io/digital-objects/ctann/deepcelltypes-hubmap/v1.0/assets/deepcelltypes-hubmap-crosswalk.csv