diff --git a/bugbug/comment_features.py b/bugbug/comment_features.py
new file mode 100644
index 0000000000..814b9109c5
--- /dev/null
+++ b/bugbug/comment_features.py
@@ -0,0 +1,229 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import sys
+from datetime import datetime
+from typing import Any
+
+import pandas as pd
+from sklearn.base import BaseEstimator, TransformerMixin
+
+from bugbug.utils import extract_urls_and_domains
+
+
+class CommentFeature(object):
+    pass
+
+
+class CommentExtractor(BaseEstimator, TransformerMixin):
+    def __init__(
+        self,
+        feature_extractors,
+        cleanup_functions,
+    ):
+        assert len(set(type(fe) for fe in feature_extractors)) == len(
+            feature_extractors
+        ), "Duplicate Feature Extractors"
+        self.feature_extractors = feature_extractors
+
+        assert len(set(type(cf) for cf in cleanup_functions)) == len(
+            cleanup_functions
+        ), "Duplicate Cleanup Functions"
+        self.cleanup_functions = cleanup_functions
+
+    def fit(self, x, y=None):
+        for feature in self.feature_extractors:
+            if hasattr(feature, "fit"):
+                feature.fit(x())
+
+        return self
+
+    def transform(self, items):
+        items_iter = iter(items())
+
+        def apply_transform(item):
+            bug, comment = item
+            data = {}
+
+            for feature_extractor in self.feature_extractors:
+                res = feature_extractor(
+                    item,
+                )
+
+                if hasattr(feature_extractor, "name"):
+                    feature_extractor_name = feature_extractor.name
+                else:
+                    feature_extractor_name = feature_extractor.__class__.__name__
+
+                if res is None:
+                    continue
+
+                if isinstance(res, dict):
+                    for key, value in res.items():
+                        data[sys.intern(key)] = value
+                    continue
+
+                if isinstance(res, (list, set)):
+                    for item in res:
+                        data[sys.intern(f"{item} in {feature_extractor_name}")] = True
+                    continue
+
+                data[feature_extractor_name] = res
+
+            comment_text = comment["text"]
+            for cleanup_function in self.cleanup_functions:
+                comment_text = cleanup_function(comment_text)
+
+            return {
+                "data": data,
+                "comment_text": comment_text,
+            }
+
+        return pd.DataFrame(apply_transform(item) for item in items_iter)
+
+
+class CommentCreatorIsBugCreator(CommentFeature):
+    name = "Comment Creator is the Bug Creator"
+
+    def __call__(self, item, **kwargs) -> Any:
+        bug, comment = item
+
+        return bug["creator"] == comment["creator"]
+
+
+class NumberOfLinks(CommentFeature):
+    name = "Number of Links in the comment"
+
+    def __init__(self, domains_to_ignore=set()):
+        self.known_domains = domains_to_ignore
+
+    def __call__(self, item, **kwargs) -> Any:
+        _, comment = item
+
+        domains = extract_urls_and_domains(comment["text"])["domains"]
+
+        return {
+            "# of Known links": sum(domain in self.known_domains for domain in domains),
+            "# of Unknown links": sum(
+                domain not in self.known_domains for domain in domains
+            ),
+            "Total # of links": len(domains),
+        }
+
+
+class CharacterCount(CommentFeature):
+    name = "# of Characters in the Comment"
+
+    def __call__(self, item, **kwargs):
+        _, comment = item
+
+        return len(comment["text"])
+
+
+class WordCount(CommentFeature):
+    name = "# of Words in the Comment"
+
+    def __call__(self, item, **kwargs):
+        _, comment = item
+
+        return len(comment["text"].split())
+
+
+class UnknownLinkAtBeginning(CommentFeature):
+    name = "Unknown Link found at Beginning of the Comment"
+
+    def __init__(self, domains_to_ignore=set()):
+        self.known_domains = domains_to_ignore
+
+    def __call__(self, item, **kwargs):
+        _, comment = item
+
+        urls = extract_urls_and_domains(comment["text"], self.known_domains)["urls"]
+
+        words = comment["text"].split()
+        return words[0] in urls if words else False
+
+
+class UnknownLinkAtEnd(CommentFeature):
+    name = "Unknown Link found at End of the Comment"
+
+    def __init__(self, domains_to_ignore=set()):
+        self.known_domains = domains_to_ignore
+
+    def __call__(self, item, **kwargs):
+        _, comment = item
+
+        urls = extract_urls_and_domains(comment["text"], self.known_domains)["urls"]
+
+        words = comment["text"].split()
+        return words[-1] in urls if words else False
+
+
+class HourOfDay(CommentFeature):
+    name = "Hour of the Day (0-23)"
+
+    def __call__(self, item, **kwargs):
+        _, comment = item
+
+        comment_time = datetime.strptime(comment["creation_time"], "%Y-%m-%dT%H:%M:%SZ")
+        return comment_time.hour
+
+
+class Weekday(CommentFeature):
+    name = "Day of the Week (0-7)"
+
+    def __call__(self, item, **kwargs):
+        _, comment = item
+
+        comment_time = datetime.strptime(comment["creation_time"], "%Y-%m-%dT%H:%M:%SZ")
+        return comment_time.isoweekday()
+
+
+class PostedOnWeekend(CommentFeature):
+    name = "Comment was Posted on Weekend"
+
+    def __call__(self, item, **kwargs):
+        _, comment = item
+
+        comment_time = datetime.strptime(comment["creation_time"], "%Y-%m-%dT%H:%M:%SZ")
+        return comment_time.isoweekday() in (5, 6)
+
+
+class DayOfYear(CommentFeature):
+    name = "Day of the Year (0-366)"
+
+    def __call__(self, item, **kwargs):
+        _, comment = item
+
+        comment_time = datetime.strptime(comment["creation_time"], "%Y-%m-%dT%H:%M:%SZ")
+        return comment_time.timetuple().tm_yday
+
+
+class WeekOfYear(CommentFeature):
+    name = "Week of Year"
+
+    def __call__(self, item, **kwargs):
+        _, comment = item
+
+        comment_time = datetime.strptime(comment["creation_time"], "%Y-%m-%dT%H:%M:%SZ")
+        return comment_time.isocalendar()[1]
+
+
+class CommentTags(CommentFeature):
+    name = "Comment Tags"
+
+    def __init__(self, to_ignore=set()):
+        self.to_ignore = to_ignore
+
+    def __call__(self, item, **kwargs):
+        _, comment = item
+        tags = []
+
+        for tag in comment["tags"]:
+            if tag in self.to_ignore:
+                continue
+
+            tags.append(tag)
+        return tags
diff --git a/bugbug/model.py b/bugbug/model.py
index 92de0c34f2..a0699979f6 100644
--- a/bugbug/model.py
+++ b/bugbug/model.py
@@ -174,6 +174,8 @@ def __init__(self, lemmatization=False):
 
         self.store_dataset = False
 
+        self.use_scale_pos_weight = False
+
         self.entire_dataset_training = False
 
         # DBs required for training.
@@ -216,6 +218,8 @@ def get_human_readable_feature_names(self):
                 feature_name = f"Comments contain '{feature_name}'"
             elif type_ == "text":
                 feature_name = f"Combined text contains '{feature_name}'"
+            elif type_ == "comment_text":
+                feature_name = f"Comment text contains '{feature_name}'"
             elif type_ == "files":
                 feature_name = f"File '{feature_name}'"
             elif type_ not in ("data", "couple_data"):
@@ -388,6 +392,21 @@ def train(self, importance_cutoff=0.15, limit=None):
         # Split dataset in training and test.
         X_train, X_test, y_train, y_test = self.train_test_split(X, y)
 
+        # Use scale_pos_weight to help in extremely imbalanced datasets
+        if self.use_scale_pos_weight and is_binary:
+            negative_samples = sum(label == 0 for label in y_train)
+            positive_samples = sum(label == 1 for label in y_train)
+            logger.info("Negative Samples: %d", negative_samples)
+            logger.info("Positive Samples: %d", positive_samples)
+
+            scale_pos_weight = (negative_samples / positive_samples) / 10
+
+            logger.info("Scale Pos Weight: %d", scale_pos_weight)
+
+            self.clf.named_steps["estimator"].set_params(
+                scale_pos_weight=scale_pos_weight
+            )
+
         tracking_metrics = {}
 
         # Use k-fold cross validation to evaluate results.
@@ -803,3 +822,18 @@ def items_gen(self, classes):
                 continue
 
             yield issue, classes[issue_number]
+
+
+class CommentModel(Model):
+    def __init__(self, lemmatization=False):
+        Model.__init__(self, lemmatization)
+        self.training_dbs = [bugzilla.BUGS_DB]
+
+    def items_gen(self, classes):
+        for bug in bugzilla.get_bugs():
+            for comment in bug["comments"]:
+                comment_id = comment["id"]
+                if comment["id"] not in classes:
+                    continue
+
+            yield (bug, comment), classes[comment_id]
diff --git a/bugbug/models/__init__.py b/bugbug/models/__init__.py
index 1441e59bea..cc48fcca64 100644
--- a/bugbug/models/__init__.py
+++ b/bugbug/models/__init__.py
@@ -29,6 +29,7 @@
     "regressionrange": "bugbug.models.regressionrange.RegressionRangeModel",
     "regressor": "bugbug.models.regressor.RegressorModel",
     "spambug": "bugbug.models.spambug.SpamBugModel",
+    "spamcomment": "bugbug.models.spamcomment.SpamCommentModel",
     "stepstoreproduce": "bugbug.models.stepstoreproduce.StepsToReproduceModel",
     "testlabelselect": "bugbug.models.testselect.TestLabelSelectModel",
     "testgroupselect": "bugbug.models.testselect.TestGroupSelectModel",
diff --git a/bugbug/models/spamcomment.py b/bugbug/models/spamcomment.py
new file mode 100644
index 0000000000..6d24a07ed9
--- /dev/null
+++ b/bugbug/models/spamcomment.py
@@ -0,0 +1,178 @@
+# -*- coding: utf-8 -*-
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import logging
+
+import xgboost
+from imblearn.over_sampling import BorderlineSMOTE
+from imblearn.pipeline import Pipeline as ImblearnPipeline
+from sklearn.compose import ColumnTransformer
+from sklearn.feature_extraction import DictVectorizer
+from sklearn.pipeline import Pipeline
+
+from bugbug import bugzilla, comment_features, feature_cleanup, repository, utils
+from bugbug.model import CommentModel
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+SAFE_DOMAINS = {"github.com", "mozilla.com", "mozilla.org"}
+
+
+class SpamCommentModel(CommentModel):
+    def __init__(self, lemmatization=True):
+        CommentModel.__init__(self, lemmatization)
+
+        self.calculate_importance = False
+
+        self.use_scale_pos_weight = True
+
+        self.commit_emails = {
+            commit["author_email"]
+            for commit in repository.get_commits(include_backouts=True)
+        }
+
+        feature_extractors = [
+            comment_features.NumberOfLinks(SAFE_DOMAINS),
+            comment_features.WordCount(),
+            comment_features.HourOfDay(),
+            comment_features.DayOfYear(),
+            comment_features.Weekday(),
+            comment_features.UnknownLinkAtBeginning(SAFE_DOMAINS),
+            comment_features.UnknownLinkAtEnd(SAFE_DOMAINS),
+            comment_features.CommentCreatorIsBugCreator(),
+        ]
+
+        cleanup_functions = [
+            feature_cleanup.fileref(),
+            feature_cleanup.url(),
+            feature_cleanup.synonyms(),
+        ]
+
+        self.extraction_pipeline = Pipeline(
+            [
+                (
+                    "comment_extractor",
+                    comment_features.CommentExtractor(
+                        feature_extractors, cleanup_functions
+                    ),
+                ),
+            ]
+        )
+
+        self.clf = ImblearnPipeline(
+            [
+                (
+                    "union",
+                    ColumnTransformer(
+                        [
+                            ("data", DictVectorizer(), "data"),
+                            (
+                                "comment_text",
+                                self.text_vectorizer(min_df=0.0001),
+                                "comment_text",
+                            ),
+                        ]
+                    ),
+                ),
+                (
+                    "sampler",
+                    BorderlineSMOTE(random_state=0),
+                ),
+                (
+                    "estimator",
+                    xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()),
+                ),
+            ]
+        )
+
+    @staticmethod
+    def __download_older_bugs_with_spam_comments() -> None:
+        """Retrieve older bugs within the past specified number of months which have spam comments.
+
+        This function provides an option to extend the dataset used for model training by including older spam comments.
+        """
+        params = {
+            "f1": "comment_tag",
+            "o1": "substring",
+            "v1": "spam",
+            "product": bugzilla.PRODUCTS,
+        }
+
+        logger.info("Downloading older bugs...")
+        bugs_ids = bugzilla.get_ids(params)
+        older_bugs = bugzilla.download_bugs(bugs_ids)
+
+        logger.info("%d older bugs have been downloaded.", len(older_bugs))
+
+    def is_safe_comment(self, comment) -> bool:
+        """Determines if a comment is certainly safe (not spam) based on certain conditions.
+
+        This function applies filtering rules to identify comments that are likely
+        authored by legitimate contributors or bots. Such comments are definitely not spam.
+        """
+        return any(
+            [
+                comment["creator"] in self.commit_emails,
+                "@mozilla" in comment["creator"],
+                "@softvision" in comment["creator"],
+            ]
+        )
+
+    def get_labels(self):
+        classes = {}
+
+        self.__download_older_bugs_with_spam_comments()
+
+        for bug in bugzilla.get_bugs():
+            for comment in bug["comments"]:
+                comment_id = comment["id"]
+
+                if any(
+                    [
+                        comment["count"] == "0",
+                        self.is_safe_comment(comment),
+                        "[redacted -" in comment["text"],
+                        "(comment removed)" in comment["text"],
+                    ]
+                ):
+                    continue
+
+                if "spam" in comment["tags"]:
+                    classes[comment_id] = 1
+                else:
+                    classes[comment_id] = 0
+
+        logger.info(
+            "%d comments are classified as non-spam",
+            sum(label == 0 for label in classes.values()),
+        )
+        logger.info(
+            "%d comments are classified as spam",
+            sum(label == 1 for label in classes.values()),
+        )
+
+        return classes, [0, 1]
+
+    def items_gen(self, classes):
+        return (
+            ((bug, comment), classes[comment["id"]])
+            for bug in bugzilla.get_bugs()
+            for comment in bug["comments"]
+            if comment["id"] in classes
+        )
+
+    def get_feature_names(self):
+        return self.clf.named_steps["union"].get_feature_names_out()
+
+    def overwrite_classes(self, comments, classes, probabilities):
+        for i, comment in enumerate(comments):
+            if self.is_safe_comment(comment):
+                if probabilities:
+                    classes[i] = [1.0, 0.0]
+                else:
+                    classes[i] = 0
+
+        return classes
diff --git a/bugbug/utils.py b/bugbug/utils.py
index d04778bd4f..e7bba56f19 100644
--- a/bugbug/utils.py
+++ b/bugbug/utils.py
@@ -27,6 +27,7 @@
 import requests
 import scipy
 import taskcluster
+import tldextract
 import zstandard
 from pkg_resources import DistributionNotFound
 from requests.packages.urllib3.util.retry import Retry
@@ -558,3 +559,32 @@ def escape_markdown(text: str) -> str:
 def keep_as_is(x):
     """A tokenizer that does nothing."""
     return x
+
+
+def extract_urls_and_domains(text: str, domains_to_ignore: set = set()) -> dict:
+    """Extracts URLs and domains from a given text, optionally filtering out ignored domains.
+
+    Args:
+        - text: The input text string where URLs and domains need to be found.
+        - domains_to_ignore:  A set of domain names to exclude from the results. e.g. mozilla.com
+
+    Returns:
+        A dictionary containing:
+            - "urls": A list of extracted URLs.
+            - "domains": A list of extracted domain names (excluding ignored domains if provided).
+                        (Note: current domain extraction is basic and has limitations)
+    """
+    pattern = re.compile(r"(?:https?://|www\.)(?:[^\s/?#]+)+(?:[\/?#][^\s]*)?")
+    potential_urls = pattern.findall(text)
+
+    domains = []
+    urls = []
+
+    for url in potential_urls:
+        url_info = tldextract.extract(url)
+        domain = url_info.registered_domain
+        if domain and domain not in domains_to_ignore:
+            domains.append(domain)
+            urls.append(url)
+
+    return {"urls": urls, "domains": domains}
diff --git a/requirements.txt b/requirements.txt
index 26ad78dc73..a8cdf8968d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,6 +28,7 @@ shap[plots]==0.44.1
 tabulate==0.9.0
 taskcluster==60.4.2
 tenacity==8.2.3
+tldextract==5.1.1
 tqdm==4.66.2
 xgboost==2.0.3
 zstandard==0.22.0
diff --git a/tests/test_utils.py b/tests/test_utils.py
index fb1e700eab..6b93f62293 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -466,3 +466,111 @@ def test_StructuredColumnTransformer() -> None:
         .view(np.dtype("int64")),
         ColumnTransformer(transformers).fit_transform(df),
     )
+
+
+@pytest.mark.parametrize(
+    "test_input, expected_urls, expected_domains",
+    [
+        ("This is a sample text without any links.", [], []),
+        (
+            "Visit https://www.testdomain.com for more info.",
+            ["https://www.testdomain.com"],
+            ["testdomain.com"],
+        ),
+        (
+            "Links: http://www.example.com but ignore https://www.mozilla.com",
+            ["http://www.example.com"],
+            ["example.com"],
+        ),
+        (
+            "Check out https://example.org ,sign up on www.anothersite.net and proceed to https://firefox.mozilla.org",
+            ["https://example.org", "www.anothersite.net"],
+            ["example.org", "anothersite.net"],
+        ),
+        (
+            "Visit https://www.example.org.uk ,sign up on www.anothersite.net.ac and proceed to www.test.mozilla.org",
+            ["https://www.example.org.uk", "www.anothersite.net.ac"],
+            ["example.org.uk", "anothersite.net.ac"],
+        ),
+        (
+            "Check out http://example.com/a/abc/cat.jpg ,sign up on www.anothersite.net/abc/cde and proceed to https://firefox.mozilla.com/download/macos",
+            ["http://example.com/a/abc/cat.jpg", "www.anothersite.net/abc/cde"],
+            ["example.com", "anothersite.net"],
+        ),
+        (
+            "Visit https://www.example.org.uk/a/abc/cat.jpg ,sign up on www.anothersite.net.ac/abc/cde and visit https://www.mozilla.com/signup",
+            [
+                "https://www.example.org.uk/a/abc/cat.jpg",
+                "www.anothersite.net.ac/abc/cde",
+            ],
+            ["example.org.uk", "anothersite.net.ac"],
+        ),
+    ],
+)
+def test_url_extraction_ignore_domains(test_input, expected_urls, expected_domains):
+    """Tests extraction of URLs and domains while ignoring some domains"""
+    domains_to_ignore = {"mozilla.com", "mozilla.org"}
+    result = utils.extract_urls_and_domains(test_input, domains_to_ignore)
+
+    assert result["urls"] == expected_urls
+    assert result["domains"] == expected_domains
+
+
+@pytest.mark.parametrize(
+    "test_input, expected_urls, expected_domains",
+    [
+        ("This is a sample text without any links.", [], []),
+        (
+            "Visit https://www.testdomain.com for more info.",
+            ["https://www.testdomain.com"],
+            ["testdomain.com"],
+        ),
+        (
+            "Links: http://www.example.com , but do not ignore https://www.mozilla.com",
+            ["http://www.example.com", "https://www.mozilla.com"],
+            ["example.com", "mozilla.com"],
+        ),
+        (
+            "Check out https://example.org ,sign up on www.anothersite.net and proceed to https://firefox.mozilla.org",
+            [
+                "https://example.org",
+                "www.anothersite.net",
+                "https://firefox.mozilla.org",
+            ],
+            ["example.org", "anothersite.net", "mozilla.org"],
+        ),
+        (
+            "Visit https://www.example.org.uk ,sign up on www.anothersite.net.ac and proceed to www.test.mozilla.org",
+            [
+                "https://www.example.org.uk",
+                "www.anothersite.net.ac",
+                "www.test.mozilla.org",
+            ],
+            ["example.org.uk", "anothersite.net.ac", "mozilla.org"],
+        ),
+        (
+            "Check out http://example.com/a/abc/cat.jpg ,sign up on www.anothersite.net/abc/cde and proceed to https://firefox.mozilla.com/download/macos",
+            [
+                "http://example.com/a/abc/cat.jpg",
+                "www.anothersite.net/abc/cde",
+                "https://firefox.mozilla.com/download/macos",
+            ],
+            ["example.com", "anothersite.net", "mozilla.com"],
+        ),
+        (
+            "Visit http://www.example.org.uk/a/abc/cat.jpg ,sign up on www.anothersite.net.ac/abc/cde and visit https://www.mozilla.com/signup",
+            [
+                "http://www.example.org.uk/a/abc/cat.jpg",
+                "www.anothersite.net.ac/abc/cde",
+                "https://www.mozilla.com/signup",
+            ],
+            ["example.org.uk", "anothersite.net.ac", "mozilla.com"],
+        ),
+    ],
+)
+def test_url_extraction(test_input, expected_urls, expected_domains):
+    """Tests extraction of URLs and domains without ignoring domains"""
+    result = utils.extract_urls_and_domains(test_input)
+
+    assert result["urls"] == expected_urls
+    assert result["domains"] == expected_domains