diff --git a/src/analysis/as_analysis_sc.py b/src/analysis/as_analysis_sc.py
index 416daa6b..4efc6fa1 100644
--- a/src/analysis/as_analysis_sc.py
+++ b/src/analysis/as_analysis_sc.py
@@ -58,7 +58,6 @@ def adata_count_qc(
 
         adata.obs["index"] = adata.obs.index  # Replace index column
 
-    # TODO add options to identify and filter GT errors
     if gt_error is not None:
         pass
 
@@ -175,9 +174,6 @@ def opt_disp(rho: float, ref_data: NDArray[np.uint16], n_data: NDArray[np.uint16
 
         df_dict[group_name] = df
 
-    # Should I return something?
-    # Maybe compile all of the dataframes?
-
     return df_dict
 
 
@@ -281,7 +277,6 @@ def get_imbalance_per_group(
         group_results.append((region, snp_count, mu, null_ll, alt_ll, pval))
 
     # Create allelic imbalance df
-    # Polars vs pandas??
     df: pd.DataFrame = pd.DataFrame(
         group_results, columns=["region", "num_snps", "mu", "null_ll", "alt_ll", "pval"]
     )
diff --git a/src/analysis/compare_ai.py b/src/analysis/compare_ai.py
index eb4259e5..60822c5e 100644
--- a/src/analysis/compare_ai.py
+++ b/src/analysis/compare_ai.py
@@ -19,7 +19,6 @@
 logger = logging.getLogger(__name__)
 
 
-# Use these functions to figure out how to optimize per group
 def get_imbalance_func(
     ref_count: NDArray[np.integer[Any]],
     n_count: NDArray[np.integer[Any]],
@@ -105,7 +104,6 @@ def get_compared_imbalance(
     if sample is None:
         phased = False
 
-    # Should I be comparing all combos by default??? Seems like a lot
     if groups is None:
         groups = list(adata.var["group"].dropna().unique())
         logger.info("Comparing all combinations of available groups")
@@ -154,14 +152,12 @@ def opt_disp(rho: float, ref_data: NDArray[np.uint16], n_data: NDArray[np.uint16
 
     # process counts on a per group basis to avoid recalculating
     group_dict: dict[str, Any] = {}
-    # group_data = namedtuple("group_data", ["ref_counts", "n_counts", "phase_data", "region_snp_dict"]) # Maybe include the gt_array instead of min_idx
     group_data = namedtuple("group_data", ["ref_counts", "n_counts", "region_snp_df"])
 
     for group_name in groups:
         # Subset by group
         adata_sub = adata[:, adata.var["group"] == group_name]
 
-        # Create count data per group, should i do pseudocount now or later?
         ref_counts_group = adata_sub.layers["ref"].sum(axis=1, dtype=np.uint16).T.A1 + pseudocount
         alt_counts_group = adata_sub.layers["alt"].sum(axis=1, dtype=np.uint16).T.A1 + pseudocount
         n_counts_group = ref_counts_group + alt_counts_group
@@ -239,7 +235,6 @@ def opt_disp(rho: float, ref_data: NDArray[np.uint16], n_data: NDArray[np.uint16
 
             continue
 
-        # This sub function name kinda long...find better name maybe?
         df = compare_imbalance_between_groups(
             disp, ref_counts1, n_counts1, ref_counts2, n_counts2, region_snp_dict, gt_array
         )
@@ -368,7 +363,6 @@ def compare_imbalance_between_groups(
 
         # Add data to output list
 
-        # How should i format this, lots of possible outputs
         group_results.append(
             (region, len(snp_list), combined_mu, alt_mu1, alt_mu2, null_ll, alt_ll, pval)
         )
@@ -412,7 +406,6 @@ def get_compared_imbalance_diff_snps(
     if sample is None:
         phased = False
 
-    # Should I be comparing all combos by default??? Seems like a lot
     if groups is None:
         groups = list(adata.var["group"].dropna().unique())
         logger.info("Comparing all combinations of available groups")
@@ -469,13 +462,12 @@ def opt_disp_filt(
     group_dict: dict[str, Any] = {}
     group_data = namedtuple(
         "group_data", ["ref_counts", "n_counts", "phase_data", "region_snp_dict"]
-    )  # Maybe include the gt_array instead of min_idx
+    )
 
     for group_name in groups:
         # Subset by group
         adata_sub = adata[:, adata.var["group"] == group_name]
 
-        # Create count data per group, should i do pseudocount now or later?
         ref_counts_group = adata_sub.layers["ref"].sum(axis=1, dtype=np.uint16).T.A1 + pseudocount
         alt_counts_group = adata_sub.layers["alt"].sum(axis=1, dtype=np.uint16).T.A1 + pseudocount
         n_counts_group = ref_counts_group + alt_counts_group
@@ -519,8 +511,6 @@ def opt_disp_filt(
 
     df_dict: dict[tuple[str, str], pd.DataFrame] = {}
     for group1, group2 in group_combos:
-        # Might be smart to create a cache to prevent repeating calculations
-        # This sub function name kinda long...find better name maybe?
         df = compare_imbalance_between_groups_diff_snps(
             disp, *group_dict[group1], *group_dict[group2]
         )
@@ -632,7 +622,6 @@ def compare_imbalance_between_groups_diff_snps(
 
         # Add data to output list
 
-        # How should i format this, lots of possible outputs
         group_results.append(
             (
                 region,
diff --git a/src/analysis/run_analysis.py b/src/analysis/run_analysis.py
index 39bb2ee9..7dcfff19 100644
--- a/src/analysis/run_analysis.py
+++ b/src/analysis/run_analysis.py
@@ -59,10 +59,9 @@ def __init__(
         # User input data
         self.count_file = count_file
         self.region_col = region_col
-        self.groupby = groupby  # group by region or parent?
+        self.groupby = groupby
         self.out_file = out_file
 
-        # TODO parse vcf for phased instead of default unphased
         self.phased: bool = bool(phased)
 
         # Default to single dispersion model
@@ -108,15 +107,14 @@ def __init__(
                 "Parent",
                 "parent",
             }:
-                self.groupby = count_cols[region_idx + 1]  # Set group
+                self.groupby = count_cols[region_idx + 1]
             else:
-                # Maybe throw error instead
                 logger.warning("%s not found in columns %s", self.groupby, count_cols)
                 self.groupby = None
 
         # Create default outfile
         if self.out_file is None:
-            self.out_file = str(Path.cwd() / "ai_results.tsv")  # do this after
+            self.out_file = str(Path.cwd() / "ai_results.tsv")
 
 
 def run_ai_analysis(
@@ -192,7 +190,6 @@ def run_ai_analysis(
     )
     ai_df = pd.DataFrame(results)
 
-    # Maybe give option to sort or not sort by pval
     if "fdr_pval" in ai_df.columns:
         ai_df = ai_df.sort_values(by="fdr_pval", ascending=True)
 
diff --git a/src/analysis/run_analysis_sc.py b/src/analysis/run_analysis_sc.py
index 6eb3255a..3bf98f72 100644
--- a/src/analysis/run_analysis_sc.py
+++ b/src/analysis/run_analysis_sc.py
@@ -36,10 +36,9 @@ def __init__(
         self.model = model
         self.out_file = out_file
         self.phased = phased
-        self.z_cutoff = z_cutoff  # Should i default to something like 4 or 5?
+        self.z_cutoff = z_cutoff
 
         # Default to single dispersion model
-        # TODO ADD GROUP DISP and other model types
         if (self.model is None) or (self.model not in {"single"}):
             self.model = "single"
 
@@ -48,7 +47,6 @@ def __init__(
             self.min_count = 10
 
         if self.pseudocount is None:
-            # self.pseudocount = 0 # either 0 or 1 for default
             self.pseudocount = 1
 
         # Make sure min and pseudocounts are valid
@@ -132,13 +130,10 @@ def process_adata_inputs(
                 f"Please input a sample from list: {sample_list}"
             )
         else:
-            # We gotta subset to include het genotypes now
             if not any(i in ["1|0", "0|1", "1/0", "0/1"] for i in adata.obs[sample].unique()):
                 raise ValueError(f"Heterozygous genotypes not found for sample: {sample}.")
 
-            # adata = adata[adata.obs[sample].isin(['1|0', '0|1', '1/0', '0/1'])]
-
-            # Using copy instead of view stops implicit mod warning, need to check memory usage
+            # Copy (not view) avoids implicit-modification warning downstream.
             adata = adata[adata.obs[sample].isin(["1|0", "0|1", "1/0", "0/1"])].copy()
             adata.obs = adata.obs.reset_index(
                 drop=True
@@ -234,14 +229,9 @@ def run_ai_analysis_sc(
 
     adata_inputs = process_adata_inputs(ad.read_h5ad(ai_files.adata_file), ai_files=ai_files)
 
-    # print(*vars(ai_files).items(), sep="\n") # For debugging
-    # print(adata_inputs) # For debugging
-
     # Update class attributes
     ai_files.update_data(adata_inputs)
 
-    # adata = adata_inputs.adata # Hold parsed adata file obj in memory
-
     # Prefilter and hold adata data in memory
     adata = adata_count_qc(adata_inputs.adata, z_cutoff=ai_files.z_cutoff, gt_error=None)
 
diff --git a/src/analysis/run_compare_ai.py b/src/analysis/run_compare_ai.py
index 70c2e645..463564b9 100644
--- a/src/analysis/run_compare_ai.py
+++ b/src/analysis/run_compare_ai.py
@@ -23,7 +23,6 @@ def run_ai_comparison(
     out_file: str | Path | None = None,
     z_cutoff: float | None = None,
 ) -> None:
-    # Might be smart to change some of the defaults in the class
     # Create data class that holds input data
     ai_files: WaspAnalysisSC = WaspAnalysisSC(
         adata_file=count_file,
diff --git a/src/counting/count_alleles.py b/src/counting/count_alleles.py
index c6e1ccee..e18ddd46 100644
--- a/src/counting/count_alleles.py
+++ b/src/counting/count_alleles.py
@@ -155,13 +155,10 @@ def make_count_df(bam_file: str, df: pl.DataFrame, use_rust: bool = True) -> pl.
         orient="row",
     )
 
-    # possibly find better solution
     df = df.with_columns([pl.col("chrom").cast(chrom_enum)]).join(
         count_df, on=["chrom", "pos"], how="left"
     )
 
-    # df = df.join(count_df, on=["chrom", "pos"], how="left")
-
     return df
 
 
diff --git a/src/counting/count_alleles_sc.py b/src/counting/count_alleles_sc.py
index 104f8ef8..437b8b67 100644
--- a/src/counting/count_alleles_sc.py
+++ b/src/counting/count_alleles_sc.py
@@ -108,10 +108,8 @@ def make_count_matrix(
         AnnData object with count matrices in layers (ref, alt, other).
     """
     chrom_list = df.get_column("chrom").unique(maintain_order=True)
-    # chrom_list = chrom_list[:3] # Testing purposes
 
     # Add genotypes annotations
-    # Maybe do this automatically and parse feature col instead?
     snp_df_cols = ["chrom", "pos", "ref", "alt"]
     if include_samples is not None:
         sample_cols = list(include_samples)
@@ -121,7 +119,6 @@ def make_count_matrix(
             df = df.with_columns(pl.col("GT").alias(sample_name))
         snp_df_cols.extend(sample_cols)
 
-    # Might be more memory efficient to use pandas index instead...
     snp_df = df.select(snp_df_cols).unique(maintain_order=True).with_row_index()
 
     sc_counts = CountStatsSC()  # Class that holds total count data
@@ -152,7 +149,6 @@ def make_count_matrix(
                 )
 
     # Create sparse matrices
-    # sparse array is recommended...but doesnt work with adata
     matrix_shape = (snp_df.shape[0], len(bc_dict))
     sparse_ref = _sparse_from_counts(sc_counts.ref_count, matrix_shape)
     sparse_alt = _sparse_from_counts(sc_counts.alt_count, matrix_shape)
@@ -164,8 +160,7 @@ def make_count_matrix(
         layers={"ref": sparse_ref, "alt": sparse_alt, "other": sparse_other},
     )
 
-    # Annotate adata: Figure out what to add to adata here vs later
-    adata.obs = snp_df.to_pandas()  # Maybe just switch to pandas? Should i set no copy?
+    adata.obs = snp_df.to_pandas()
     adata.obs["ref_count"] = adata.layers["ref"].sum(axis=1, dtype=np.uint16).T.A1
     adata.obs["alt_count"] = adata.layers["alt"].sum(axis=1, dtype=np.uint16).T.A1
 
diff --git a/src/counting/filter_variant_data.py b/src/counting/filter_variant_data.py
index 43bfc21e..21768e7d 100644
--- a/src/counting/filter_variant_data.py
+++ b/src/counting/filter_variant_data.py
@@ -105,8 +105,6 @@ def gtf_to_bed(
         .select(["seqname", "start", "end", attribute])
     )
 
-    # TODO Extra validation and may want to return some data?
-
     # Write to BED
     df.write_csv(out_bed, separator="\t", include_header=False)
 
@@ -129,7 +127,6 @@ def intersect_vcf_region(
     out_file : str | Path
         Output intersection file path.
     """
-    # Parse region file before or after???
     intersect_cmd = ["bedtools", "intersect", "-a", str(vcf_file), "-b", str(region_file), "-wb"]
 
     # write intersect out
diff --git a/src/counting/parse_gene_data.py b/src/counting/parse_gene_data.py
index 682192c1..3df9ac8d 100644
--- a/src/counting/parse_gene_data.py
+++ b/src/counting/parse_gene_data.py
@@ -129,17 +129,10 @@ def parse_gene_file(
         elif df.get_column("attribute").str.contains("Name").any():
             attribute = "Name"
         else:
-            # TODO return an error
-            # TODO maybe just use region or coords as a feature
             logger.warning("No 'ID', '%s_id' or 'Name' attribute found. Please include ID", feature)
 
-    # TODO: Figure out best way to handle parent attribute
-
     # Parse parent attributes
-    # Figure out best way to parse and handle this
     if parent_attribute is None:
-        # Defaults to gene(possibly transcript???)
-
         if df.get_column("attribute").str.contains("Parent").any():
             parent_attribute = "Parent"
         elif df.get_column("attribute").str.contains("transcript_id").any():
@@ -149,7 +142,6 @@ def parse_gene_file(
         else:
             parent_attribute = attribute
 
-    # TODO: Allow for count output without parent column
     assert attribute is not None, "Could not determine attribute from gene file"
     assert parent_attribute is not None, "Could not determine parent_attribute from gene file"
     if parent_attribute == attribute:
@@ -258,12 +250,8 @@ def parse_intersect_genes(
             }
         )
 
-    # AFTER performing gtf_to_bed and intersecting!
     df = pl.scan_csv(intersect_file, separator="\t", has_header=False, infer_schema_length=0)
 
-    # Should i poossibly consider diff number of cols?
-
-    # Might want to do checks for wrong number of columns
     subset_cols = [df.columns[i] for i in [0, 2, 3, 4, -2, -1]]
     new_cols = ["chrom", "pos", "ref", "alt", attribute, parent_attribute]
 
diff --git a/src/counting/run_counting.py b/src/counting/run_counting.py
index 651fa479..196b23ed 100644
--- a/src/counting/run_counting.py
+++ b/src/counting/run_counting.py
@@ -115,7 +115,7 @@ def __init__(
         self.skip_vcf_to_bed = precomputed_vcf_bed is not None
 
         # Parse region file
-        self.region_type = None  # maybe use a boolean flag instead
+        self.region_type = None
 
         if self.region_file is not None:
             f_ext = "".join(Path(self.region_file).suffixes)
@@ -162,7 +162,6 @@ def __init__(
             )
         self.skip_intersect = precomputed_intersect is not None
 
-        # TODO UPDATE THIS WHEN I ADD AUTOPARSERS
         if self.is_gene_file:
             # Possible edge case of vcf and gtf prefix conflict
             if self.vcf_bed == self.gtf_bed:
@@ -249,15 +248,10 @@ def run_count_variants(
         precomputed_intersect=precomputed_intersect,
     )
 
-    # print(*vars(count_files).items(), sep="\n") # For debugging
     with_gt = False
     if (count_files.samples is not None) and (len(count_files.samples) == 1):
         with_gt = True
 
-        # temporarily disable for ASE
-        # if not count_files.is_gene_file:
-        #     with_gt = True
-
     # Create Intermediary Files
     if not count_files.skip_vcf_to_bed:
         vcf_to_bed(
@@ -268,7 +262,6 @@ def run_count_variants(
             include_indels=include_indels,
         )
 
-    # TODO PARSE GENE FEATURES AND ATTRIBUTES
     region_col_name = None  # Defaults to 'region' as region name
     intersect_genes = False
 
@@ -276,7 +269,6 @@ def run_count_variants(
     if count_files.region_file is not None:
         # Check if we need to prepare genes for intersection
         if count_files.gtf_bed is not None:
-            # TODO UPDATE THIS WHEN I ADD AUTOPARSERS AND VALIDATORS
             gene_data = make_gene_data(
                 gene_file=count_files.region_file,
                 out_bed=count_files.gtf_bed,
@@ -300,7 +292,6 @@ def run_count_variants(
             )
 
     # Create Variant Dataframe
-    # TODO validate
     if intersect_genes:
         df = parse_intersect_genes_new(
             intersect_file=count_files.intersect_file,
@@ -321,18 +312,9 @@ def run_count_variants(
             use_region_names=count_files.use_region_names,
             region_col=region_col_name,
         )
-        # df = parse_intersect_region(
-        #     intersect_file=count_files.intersect_file,
-        #     use_region_names=count_files.use_region_names,
-        #     region_col=region_col_name)
-
-    # Should I include a filt bam step???
 
     # Count
     count_df = make_count_df(bam_file=count_files.bam_file, df=df, use_rust=use_rust)
 
     # Write counts
     count_df.write_csv(count_files.out_file, include_header=True, separator="\t")
-
-    # Should i return for use in analysis pipeline?
-    # return count_df
diff --git a/src/counting/run_counting_sc.py b/src/counting/run_counting_sc.py
index c9c4c20b..637df8aa 100644
--- a/src/counting/run_counting_sc.py
+++ b/src/counting/run_counting_sc.py
@@ -43,12 +43,10 @@ def __init__(
         out_file: str | None = None,
         temp_loc: str | None = None,
     ) -> None:
-        # TODO: ALSO ACCEPT .h5
-
         # User input files
         self.bam_file = bam_file
         self.variant_file = variant_file
-        self.barcode_file = barcode_file  # Maybe could be optional?
+        self.barcode_file = barcode_file
 
         self.feature_file = feature_file
         self.use_region_names = use_region_names
@@ -89,7 +87,7 @@ def __init__(
         self.vcf_bed = str(Path(self.temp_loc) / f"{variant_prefix}.bed")
 
         # Parse feature file
-        self.feature_type = None  # maybe use a boolean flag instead
+        self.feature_type = None
 
         if self.feature_file is not None:
             f_ext = "".join(Path(self.feature_file).suffixes)
@@ -158,7 +156,6 @@ def run_count_variants_sc(
     )
 
     # Create intermediary files
-    # Maybe change include_gt based on preparse?
     vcf_to_bed(
         vcf_file=count_files.variant_file,
         out_bed=count_files.vcf_bed,
@@ -191,7 +188,6 @@ def run_count_variants_sc(
         ad.AnnData().write_h5ad(count_files.out_file)
         return
 
-    # TODO: handle case where barcode file contains multiple columns
     with open(count_files.barcode_file) as file:
         bc_dict = {line.rstrip(): i for i, line in enumerate(file)}
 
@@ -205,4 +201,3 @@ def run_count_variants_sc(
 
     # Write outputs
     adata.write_h5ad(count_files.out_file)
-    # TODO: include output options, (ie MTX, dense?)
diff --git a/src/mapping/intersect_variant_data.py b/src/mapping/intersect_variant_data.py
index 973965c1..db182539 100644
--- a/src/mapping/intersect_variant_data.py
+++ b/src/mapping/intersect_variant_data.py
@@ -217,9 +217,6 @@ def make_intersect_df(
 
     df = df.with_columns(expr_list).unnest([*samples, "read"]).with_columns(cast_list)
 
-    # should i remove instead of keep first?
-    df = df.unique(
-        ["chrom", "read", "mate", "start", "stop"], keep="first"
-    )  # Doesnt remove dup snp in pair?
+    df = df.unique(["chrom", "read", "mate", "start", "stop"], keep="first")
 
     return df.collect()
diff --git a/src/mapping/remap_utils.py b/src/mapping/remap_utils.py
index 416ebde7..f6b7af00 100644
--- a/src/mapping/remap_utils.py
+++ b/src/mapping/remap_utils.py
@@ -36,7 +36,6 @@ def paired_read_gen_stat(
     read_dict = {}
     discard_set = set()
 
-    # DO I need multiple iterators???
     for read in bam.fetch(chrom, multiple_iterators=False):
         if not read.is_proper_pair:
             discard_set.add(read.query_name)
diff --git a/src/mapping/run_mapping.py b/src/mapping/run_mapping.py
index 6e76382b..b5bf5e7b 100644
--- a/src/mapping/run_mapping.py
+++ b/src/mapping/run_mapping.py
@@ -282,8 +282,6 @@ def run_make_remap_reads(
         temp_loc=temp_loc,
     )
 
-    # print(*vars(wasp_files).items(), sep="\n")
-
     # Create Checks for not integrated options
     if not wasp_files.is_paired:
         raise ValueError("Single-End not Implemented")
@@ -300,7 +298,6 @@ def run_make_remap_reads(
     assert isinstance(wasp_files.samples, list), "samples should be normalized to list"
     assert wasp_files.remap_fq2 is not None, "remap_fq2 should be set when is_paired is True"
 
-    # Should I create cache that checks for premade files??
     Path(str(wasp_files.out_dir)).mkdir(parents=True, exist_ok=True)
 
     # Create Intermediary Files
@@ -329,10 +326,6 @@ def run_make_remap_reads(
         num_samples=len(wasp_files.samples),
     )
 
-    # print("INTERSECTION COMPLETE")
-
-    # If a tempdir already exists??
-
     # Create remap fq
     write_remap_bam(
         wasp_files.to_remap_bam,
@@ -345,10 +338,7 @@ def run_make_remap_reads(
         max_seqs=max_seqs,
     )
 
-    # print("WROTE READS TO BE REMAPPED")
-
     wasp_files.write_data(out_file=out_json)  # export json
-    # print(f"File Data written to JSON...\n{out_json}")
 
 
 # Decorator and Parser for post remap filtering
diff --git a/src/mapping/wasp_data_files.py b/src/mapping/wasp_data_files.py
index 97aa0a44..69cc3f38 100644
--- a/src/mapping/wasp_data_files.py
+++ b/src/mapping/wasp_data_files.py
@@ -55,14 +55,11 @@ def __init__(
 
             else:
                 self.samples = [s.strip() for s in self.samples.split(",")]
-                # self.samples = self.samples.split(",") # should i strip spaces?
 
         # At this point, self.samples is normalized to Optional[List[str]]
 
         # Check if variant file is phased (only works for VCF/BCF, not PGEN)
         if self.is_phased is None and self.samples is not None:
-            # TODO GOTTA FIX THIS TO CHECK IF PHASED
-            # Note: This only works for VCF/BCF files, PGEN doesn't store phase in the same way
             variant_path = Path(self.variant_file)
             suffix = variant_path.suffix.lower()
             if suffix in (".vcf", ".bcf") or str(variant_path).lower().endswith(".vcf.gz"):
@@ -79,23 +76,18 @@ def __init__(
                     if n_checked > 0 and n_phased > n_checked // 2:
                         self.is_phased = True
                     else:
-                        # TODO GOTTA WARN UNPHASED BAD
-                        # TODO WARN SOME UNPHASED WHILE OTHERS PHASED
                         self.is_phased = False
             else:
                 # PGEN format - assume phased (user should specify if not)
                 self.is_phased = True
 
         if self.out_dir is None:
-            self.out_dir = Path(bam_file).parent  # change to cwd?
+            self.out_dir = Path(bam_file).parent
 
-        # TODO handle temp loc, maybe make default if temp not made?
-        # Temporary workaround until figure out temp dir options
         if self.temp_loc is None:
             self.temp_loc = self.out_dir
 
         # Generate intermediate files
-        # Maybe use easy defalt names if temp loc in use
 
         # Handle different variant file extensions for prefix extraction
         variant_name = Path(self.variant_file).name