From eab9d915ce0f55fc57385039b463cb362297c2db Mon Sep 17 00:00:00 2001 From: Odin H B Date: Tue, 27 Jun 2023 23:58:05 +0200 Subject: [PATCH 1/5] these names are never used anymore --- commit.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/commit.py b/commit.py index 8f5a1e2..b2b2326 100644 --- a/commit.py +++ b/commit.py @@ -16,11 +16,6 @@ define("port", default=5000, help="run on the given port", type=int) -names = ['Nick', 'Steve', 'Andy', 'Qi', 'Fanny', 'Sarah', 'Cord', 'Todd', - 'Chris', 'Pasha', 'Gabe', 'Tony', 'Jason', 'Randal', 'Ali', 'Kim', - 'Rainer', 'Guillaume', 'Kelan', 'David', 'John', 'Stephen', 'Tom', 'Steven', - 'Jen', 'Marcus', 'Edy', 'Rachel', 'Ethan', 'Dan', 'Darren', 'Greg'] - humans_file = os.path.join(os.path.dirname(__file__), 'static', 'humans.txt') messages_file = os.path.join(os.path.dirname(__file__), 'commit_messages.txt') messages: Dict[str, str] = {} From d833c0f62ac37d1da8cc90246e955c40b4abc4bc Mon Sep 17 00:00:00 2001 From: Odin H B Date: Wed, 28 Jun 2023 00:47:01 +0200 Subject: [PATCH 2/5] add self to humans i wanna be in the messages! :D --- static/humans.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/static/humans.txt b/static/humans.txt index 002f3f6..f40f8a4 100644 --- a/static/humans.txt +++ b/static/humans.txt @@ -82,6 +82,7 @@ Name: github:KneeNinetySeven Name: Rafael Reis (github:reisraff) Name: github:ShanTulshi Name: Jonatha Daguerre (github:jonathadv) +Name: Odin Heggvold Bekkelund (github: odinhb) /* SITE */ Last update: 2014/04/17 From f24e7d858ade96e251e0a5f958f761ef91357136 Mon Sep 17 00:00:00 2001 From: Odin H B Date: Wed, 28 Jun 2023 00:59:59 +0200 Subject: [PATCH 3/5] Split generator into separate python module --- .gitignore | 1 + commit.py | 79 +++++++++-------------------------------------------- messages.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 66 deletions(-) create mode 100644 .gitignore create mode 100644 messages.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a348e50 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/__pycache__/ diff --git a/commit.py b/commit.py index b2b2326..e2d09c7 100644 --- a/commit.py +++ b/commit.py @@ -1,12 +1,6 @@ import os -import sys -import random -import re import json import signal -from typing import Dict, List - -from hashlib import md5 import tornado.httpserver import tornado.ioloop @@ -14,71 +8,22 @@ from tornado.escape import xhtml_unescape from tornado.options import define, options -define("port", default=5000, help="run on the given port", type=int) - -humans_file = os.path.join(os.path.dirname(__file__), 'static', 'humans.txt') -messages_file = os.path.join(os.path.dirname(__file__), 'commit_messages.txt') -messages: Dict[str, str] = {} - -# Create a hash table of all commit messages -with open(messages_file, 'r', encoding='utf-8') as messages_input: - for line in messages_input.readlines(): - messages[md5(line.encode('utf-8')).hexdigest()] = line - -names: List[str] = [] - -with open(humans_file, 'r', encoding='utf-8') as humans_input: - humans_content = humans_input.read() - for line in humans_content.split("\n"): - if "Name:" in line: - data = line[6:].rstrip() - if data.find("github:") == 0: - names.append(data[7:]) - else: - names.append(data.split(" ")[0]) - -num_re = re.compile(r"XNUM([0-9,]*)X") - -def fill_line(message): - message = message.replace('XNAMEX', random.choice(names)) - message = message.replace('XUPPERNAMEX', random.choice(names).upper()) - message = message.replace('XLOWERNAMEX', random.choice(names).lower()) - - nums = num_re.findall(message) - - while nums: - start = 1 - end = 999 - value = nums.pop(0) or str(end) - if "," in value: - position = value.index(",") - if position == 0: # XNUM,5X - end = int(value[1:]) - elif position == len(value) - 1: # XNUM5,X - start = int(value[:position]) - else: # XNUM1,5X - start = int(value[:position]) - end = int(value[position+1:]) - else: - end = int(value) - if start > end: - end = start * 2 - - randint = random.randint(start, end) - message = num_re.sub(str(randint), message, count=1) +import messages - return message +define("port", default=5000, help="run on the given port", type=int) class MainHandler(tornado.web.RequestHandler): def get(self, message_hash=None): - if not message_hash: - message_hash = random.choice(list(messages.keys())) - elif message_hash not in messages: - raise tornado.web.HTTPError(404) + found_message = messages.find_by_md5(message_hash) - message = fill_line(messages[message_hash]) + if message_hash and not found_message: + raise tornado.web.HTTPError(404) - self.output_message(message, message_hash) + if found_message: + self.output_message(found_message, message_hash) + else: + message, generated_message_hash = messages.generate() + self.output_message(message, generated_message_hash) def output_message(self, message, message_hash): self.set_header('X-Message-Hash', message_hash) @@ -129,6 +74,8 @@ def try_exit(self): tornado.options.parse_command_line() signal.signal(signal.SIGINT, application.signal_handler) http_server = tornado.httpserver.HTTPServer(application) - http_server.listen(os.environ.get("PORT", 5000)) + port = os.environ.get("PORT", 5000) + print("ready for requests (on port %s)" % (port)) + http_server.listen(port) tornado.ioloop.PeriodicCallback(application.try_exit, 100).start() tornado.ioloop.IOLoop.instance().start() diff --git a/messages.py b/messages.py new file mode 100644 index 0000000..9606bb7 --- /dev/null +++ b/messages.py @@ -0,0 +1,76 @@ +import os +import random +import re +from typing import Dict, List +from hashlib import md5 + +def generate(): + digest = _pick_random_key(templates) + msg = templates[digest] + return (msg, digest) + +def find_by_md5(md5): + if md5 not in templates: + return None + else: + t = templates[md5] + return _fill_template(t) + +this_file = os.path.dirname(__file__) +humans_file = os.path.join(this_file, 'static', 'humans.txt') +all_messages_file = os.path.join(this_file, 'commit_messages.txt') +tmp = os.path.join(this_file, 'tmp') +os.makedirs(tmp, exist_ok=True) + +templates: Dict[str, str] = {} +names: List[str] = [] + +# Create a hash table of all commit message templates +print("hashing messages...") +with open(all_messages_file, 'r', encoding='utf-8') as f: + for line in f: + templates[md5(line.encode('utf-8')).hexdigest()] = line + +with open(humans_file, 'r', encoding='utf-8') as f: + for line in f: + if "Name:" in line: + data = line[6:].rstrip() + if data.find("github:") == 0: + names.append(data[7:]) + else: + names.append(data.split(" ")[0]) + +def _pick_random_key(templates): + return random.choice(list(templates.keys())) + +num_re = re.compile(r"XNUM([0-9,]*)X") + +def _fill_template(txt): + txt = txt.replace('XNAMEX', random.choice(names)) + txt = txt.replace('XUPPERNAMEX', random.choice(names).upper()) + txt = txt.replace('XLOWERNAMEX', random.choice(names).lower()) + + nums = num_re.findall(txt) + + while nums: + start = 1 + end = 999 + value = nums.pop(0) or str(end) + if "," in value: + position = value.index(",") + if position == 0: # XNUM,5X + end = int(value[1:]) + elif position == len(value) - 1: # XNUM5,X + start = int(value[:position]) + else: # XNUM1,5X + start = int(value[:position]) + end = int(value[position+1:]) + else: + end = int(value) + if start > end: + end = start * 2 + + randint = random.randint(start, end) + txt = num_re.sub(str(randint), txt, count=1) + + return txt From 826905b69cfab844385f201220dca272caec58e0 Mon Sep 17 00:00:00 2001 From: Odin H B Date: Fri, 7 Jul 2023 21:59:12 +0200 Subject: [PATCH 4/5] holy contrast batman i don't have poor eyesight but staring at the page for long enough i noticed that the link was really hard to see --- CONTRIBUTING.md | 2 +- index.html | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3fa4a27..b16c740 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,7 @@ ## Sorting -Don't bother sorting the file. Every now and then I'll sort and clean it up. +Don't bother sorting the files. Every now and then I'll sort and clean up. ## License diff --git a/index.html b/index.html index a9dda9b..9dd32df 100644 --- a/index.html +++ b/index.html @@ -17,7 +17,8 @@ } .permalink { font-size: .5em; - color: #ddd; + /* ensure compliance w/ AAA WCAG standards for accessible text */ + color: #595959; line-height: 1em; } .permalink a { From 61f715b853a79d4d19c0e662db21c358df6c2e6f Mon Sep 17 00:00:00 2001 From: Odin H B Date: Fri, 7 Jul 2023 23:07:37 +0200 Subject: [PATCH 5/5] add ?censor and ?safe params so we can use it at work enabling everyone to shit up their workplaces :D --- README.markdown | 6 ++ commit.py | 10 +- commit_messages/README.md | 58 ++++++++++++ commit_messages/censorable.txt | 44 +++++++++ .../safe.txt | 46 --------- commit_messages/unsafe.txt | 2 + index.html | 18 +++- messages.py | 94 +++++++++++++++---- 8 files changed, 210 insertions(+), 68 deletions(-) create mode 100644 commit_messages/README.md create mode 100644 commit_messages/censorable.txt rename commit_messages.txt => commit_messages/safe.txt (92%) create mode 100644 commit_messages/unsafe.txt diff --git a/README.markdown b/README.markdown index e3b5d7d..8020446 100644 --- a/README.markdown +++ b/README.markdown @@ -15,6 +15,12 @@ Some interesting usage for that can be: git config --global alias.yolo '!git commit -m "$(curl -s https://whatthecommit.com/index.txt)"' ``` +## Make it safe for work + + https://whatthecommit.com/?safe filters out any unsafe or swearing messages + https://whatthecommit.com/?censor censors them instead, and filters messages that wouldn't be funny if censored + https://whatthecommit.com/?censor=* censors using custom pattern + Or use one of the following VSCode Extensions: - [WhatTheCommit](https://marketplace.visualstudio.com/items?itemName=Gaardsholt.vscode-whatthecommit) diff --git a/commit.py b/commit.py index e2d09c7..493184e 100644 --- a/commit.py +++ b/commit.py @@ -14,7 +14,12 @@ class MainHandler(tornado.web.RequestHandler): def get(self, message_hash=None): - found_message = messages.find_by_md5(message_hash) + safe_only = self.get_argument("safe", default=False) != False + censor = self.get_argument("censor", default=False) + if censor == "": + censor = True + + found_message = messages.find_by_md5(message_hash, censor=censor) if message_hash and not found_message: raise tornado.web.HTTPError(404) @@ -22,7 +27,8 @@ def get(self, message_hash=None): if found_message: self.output_message(found_message, message_hash) else: - message, generated_message_hash = messages.generate() + message, generated_message_hash = ( + messages.generate(safe_only=safe_only, censor=censor)) self.output_message(message, generated_message_hash) def output_message(self, message, message_hash): diff --git a/commit_messages/README.md b/commit_messages/README.md new file mode 100644 index 0000000..94cbcb8 --- /dev/null +++ b/commit_messages/README.md @@ -0,0 +1,58 @@ +these are the source text files from which all the commit messages +are generated + +- put commit messages with swear words in censorable.txt, this makes +them funny by using [grawlixes](https://en.wikipedia.org/wiki/Grawlix) + +- put other nsfw in unsafe, because they cannot be made funny and safe +by using grawlixes + +- put safe for work messages in safe.txt. + +## Script for filtering of swear words + +This was used to create the initial list of unsafe words. + +```py +BAD_WORDS = [ + "shit", + "piss", + "fuck", + "cunt", + "cocksucker", + "motherfucker", + "tits", + + "cock", + "fucker", + + "fart", + "turd", + "twat", + + "dicksucker", + "fucking", + + "sex", + "sexy", +] + +safe_f = open("commit_messages/safe.txt", "w") +unsafe_f = open("commit_messages/unsafe.txt", "w") + +with open("./commit_messages.txt") as original_f: + for line in original_f: + bad = False + + for bad_word in BAD_WORDS: + if bad_word in line.lower(): + bad = True + + if bad: + unsafe_f.write(line) + else: + safe_f.write(line) + +safe_f.close() +unsafe_f.close() +``` diff --git a/commit_messages/censorable.txt b/commit_messages/censorable.txt new file mode 100644 index 0000000..0f2a21c --- /dev/null +++ b/commit_messages/censorable.txt @@ -0,0 +1,44 @@ +"Get that shit outta my master." +Code was clean until manager requested to fuck it up +Derp search/replace fuckup +Either Hot Shit or Total Bollocks +FUCKING XUPPERNAMEX +Fixed some shit +Fixed the fuck out of #XNUMX! +Fuck it, YOLO! +Fucking egotistical bastard. adds expandtab to vimrc +Fucking submodule bull shit +Fucking templates. +Herping the fucking derp right here and now. +I don't get paid enough for this shit. +I hate this fucking language. +I'm too old for this shit! +It fucking compiles \:D/ +Merge pull request #67 from Lazersmoke/fix-andys-shit Fix andys shit +My boss forced me to build this feature... Pure shit. +REALLY FUCKING FIXED +Revert "fuckup". +SHIT ===> GOLD +SOAP is a piece of shit +Shit code! +Some shit. +WHO THE FUCK CAME UP WITH MAKE? +Why The Fuck? +a lot of shit +clarify further the brokenness of C++. why the fuck are we using C++? +fix some fucking errors +fixed shit that havent been fixed in last commit +fixing project shit +fuckup. +holy shit it's functional +if you're not using et, fuck off +include shit +refuckulated the carbonator +someday I gonna kill someone for this shit... +this is how we generate our shit. +I don't give a damn 'bout my reputation +arrgghh... damn this thing for not working. +download half the damn internet to parse a pdf +fix that damn sign!!! +hopefully going to get a successful build got damn it +still trying to render a damn cube diff --git a/commit_messages.txt b/commit_messages/safe.txt similarity index 92% rename from commit_messages.txt rename to commit_messages/safe.txt index 9393edc..6624b91 100644 --- a/commit_messages.txt +++ b/commit_messages/safe.txt @@ -1,5 +1,4 @@ ¯\_(ツ)_/¯ -"Get that shit outta my master." #GrammarNazi $(init 0) $(rm -rvf .) @@ -12,7 +11,6 @@ $(rm -rvf .) /sigh 50/50 640K ought to be enough for anybody -8==========D :(:( :q! ??! what the ... @@ -51,7 +49,6 @@ COMMIT ALL THE FILES! Can someone review this commit, please ? Check next commit for message. Chuck Norris Emailed Me This Patch... I'm Not Going To Question It -Code was clean until manager requested to fuck it up Commit committed Commit committed.... Committed some changes @@ -67,7 +64,6 @@ DEAL WITH IT DNS_PROBE_FINISHED_NXDOMAIN Deleted API file Derp -Derp search/replace fuckup Derp, asset redirection in dev mode Derp. Fix missing constant post rename Derpy hooves @@ -83,11 +79,9 @@ Don’t even try to refactor it. Don’t mess with Voodoo Duh Easteregg -Either Hot Shit or Total Bollocks Errare humanum est. FONDLED THE CODE FOR REAL. -FUCKING XUPPERNAMEX Feed. You. Stuff. No time. Final commit, ready for tagging Fingers crossed! @@ -108,9 +102,7 @@ Fixed errors Fixed everything. Fixed mispeling Fixed so the code compiles -Fixed some shit Fixed the build. -Fixed the fuck out of #XNUMX! Fixed unnecessary bug. Fixed what was broken. Fixing XNAMEX's bug. @@ -120,10 +112,6 @@ For real, this time. For the sake of my sanity, just ignore this... For the statistics only Friday 5pm -Fuck it, YOLO! -Fucking egotistical bastard. adds expandtab to vimrc -Fucking submodule bull shit -Fucking templates. Future self, please forgive me and don't hit me with the baseball bat again! GIT :/ General commit (no IDs open) - Modifications for bad implementations @@ -138,7 +126,6 @@ Herp derp I left the debug in there and forgot to reset errors. Herpderp, shoulda check if it does really compile. Herping the derp Herping the derp derp (silly scoping error) -Herping the fucking derp right here and now. Herpy dooves. Hide those navs, boi! Hiding API key hahaha @@ -155,8 +142,6 @@ I can't believe it took so long to fix this. I cannot believe that it took this long to write a test for this. I did it for the lulz! I don't believe it -I don't get paid enough for this shit. -I don't give a damn 'bout my reputation I don't know what the hell I was thinking. I don't know what these changes are supposed to accomplish but somebody told me to make them. I don't know why. Just move on. @@ -164,7 +149,6 @@ I dont know what I am doing I expected something different. I forgot to commit... So here you go. I had a cup of tea and now it's fixed -I hate this fucking language. I have no idea what I'm doing here. I honestly wish I could remember what was going on here... I immediately regret this commit. @@ -197,7 +181,6 @@ I'm hungry I'm just a grunt. Don't blame me for this awful PoS. I'm sorry. I'm too foo for this bar -I'm too old for this shit! I'm totally adding this to epic win. +300 ID:10T Error IEize @@ -212,7 +195,6 @@ Is there an award for this? Issue #XNUM10X is now Issue #XNUM30X It Compiles! 50 Points For Gryffindor. It compiles! Ship it! -It fucking compiles \:D/ It only compiles every XNUM2,5X tries... good luck. It was the best of times, it was the worst of times It worked for me... @@ -242,7 +224,6 @@ Major fixup. Make Sure You Are Square With Your God Before Trying To Merge This Make that it works in 90% of the cases. 3:30. Merge pull my finger request -Merge pull request #67 from Lazersmoke/fix-andys-shit Fix andys shit Merging 'WIP: Do Not Merge This Branch' Into Master Merging the merge Minor updates @@ -251,7 +232,6 @@ Mongo.db was empty, filled now with good stuff More ignore Moved something to somewhere... goodnight... My bad -My boss forced me to build this feature... Pure shit. NOJIRA: No cry NSA backdoor - ignore Never Run This Commit As Root @@ -298,7 +278,6 @@ Programming the flux capacitor Push poorly written test can down the road another ten years Put everything in its right place QuickFix. -REALLY FUCKING FIXED Refactor factories, revisit visitors Refactored configuration. Reinventing the wheel. Again. @@ -309,22 +288,16 @@ Removing unecessary stuff Replace all whitespaces with tabs. Reset error count between rows. herpderp Reticulating splines... -Revert "fuckup". Revert "git please work" Revert "just testing, remember to revert" Revert this commit Rush B! -SEXY RUSSIAN CODES WAITING FOR YOU TO CALL -SHIT ===> GOLD -SOAP is a piece of shit Saint Pipeline, please give me the green light Same as last commit with changes See last commit -Shit code! Shovelling coal into the server... So my boss wanted this button ... Some bugs fixed -Some shit. Somebody set up us the bomb. Something fixed Spinning up the hamster... @@ -387,7 +360,6 @@ Updated framework to the lattest version Use a real JS construct, WTF knows why this works in chromium. Useful text Version control is awful -WHO THE FUCK CAME UP WITH MAKE? WIP, always WIPTF WTF is this. @@ -409,7 +381,6 @@ Who Let the Bugs Out?? Who has two thumbs and remembers the rudiments of his linear algebra courses? Apparently, this guy. Who knows WTF?! Who knows... -Why The Fuck? Working on WIP Working on tests (haha) Wubbalubbadubdub! @@ -433,7 +404,6 @@ Your heart's been aching but you're too shy to say it [skip ci] I'll fix the build monday _ a few bits tried to escape, but we caught them -a lot of shit accidental commit add actual words add dirty scripts from the dark side of the universe @@ -448,7 +418,6 @@ and a comma and so the crazy refactoring process sees the sunlight after some months in the dark! another big bag of changes apparently i did something… -arrgghh... damn this thing for not working. arrrggghhhhh fixed! asdfasdfasdfasdfasdfasdfadsf assorted changes @@ -470,7 +439,6 @@ c&p fail changed things... changes ci test -clarify further the brokenness of C++. why the fuck are we using C++? commented out failing tests commit copy and paste is not a design pattern @@ -487,7 +455,6 @@ does it work? maybe. will I check? no. doh. done. going to bed now. dope -download half the damn internet to parse a pdf enabled ultra instinct epic eppic fail XNAMEX @@ -501,24 +468,19 @@ first blush fix fix /sigh fix bug, for realz -fix some fucking errors fix tpyo fixed conflicts (LOL merge -s ours; push -f) fixed errors in the previous commit fixed mistaken bug -fixed shit that havent been fixed in last commit fixed some minor stuff, might need some additional work. -fix that damn sign!!! fixed the israeli-palestinian conflict fixes -fixing project shit foo forgot a contact page woops haha forgot to save that file forgot we're not using a smart language formatted all freemasonry -fuckup. gave up and used tables. giggle. git + ipynb = :( @@ -537,17 +499,13 @@ herpderp (redux) hey, look over there! hey, what's that over there?! hmmm -holy shit it's functional hoo boy -hopefully going to get a successful build got damn it i dunno, maybe this works i hid an easter egg in the code. can you find it? i need therapy i think i fixed a bug... -if you're not using et, fuck off implemented missing semicolon improved function -include shit increased loading time by a bit it is hump day _^_ it's friday @@ -600,7 +558,6 @@ project lead is allergic to changes... put code that worked where the code that didn't used to be rats really ignore ignored worsd -refuckulated the carbonator remove certain things and added stuff remove debug
all good removed echo and die statements, lolz. @@ -616,13 +573,11 @@ small is a real HTML tag, who knew. some brief changes some stuff working haha somebody keeps erasing my changes. -someday I gonna kill someone for this shit... someone fails and it isn't me sometimes you just herp the derp so hard it herpderps speling is difikult squash me starting the service is always better -still trying to render a damn cube stopped caring XNUM8,23X commits ago stuff syntax @@ -636,7 +591,6 @@ things occurred third time's a charm this doesn't really make things faster, but I tried this is Spartaaaaaaaa -this is how we generate our shit. this is my quickfix branch and i will use to do my quickfixes this is why docs are important this should fix it diff --git a/commit_messages/unsafe.txt b/commit_messages/unsafe.txt new file mode 100644 index 0000000..2373677 --- /dev/null +++ b/commit_messages/unsafe.txt @@ -0,0 +1,2 @@ +8==========D +SEXY RUSSIAN CODES WAITING FOR YOU TO CALL diff --git a/index.html b/index.html index 9dd32df..8301290 100644 --- a/index.html +++ b/index.html @@ -15,17 +15,18 @@ text-align: left; margin: 1em auto; } - .permalink { + .link-wrapper { + display: inline-block; font-size: .5em; /* ensure compliance w/ AAA WCAG standards for accessible text */ color: #595959; line-height: 1em; } - .permalink a { + .link-wrapper a { text-decoration: none; color: inherit; } - .permalink a:hover { + .link-wrapper a:hover { text-decoration: underline; } @@ -33,9 +34,18 @@

{% raw message %}

- + + +
diff --git a/messages.py b/messages.py index 9606bb7..990fa0b 100644 --- a/messages.py +++ b/messages.py @@ -3,33 +3,68 @@ import re from typing import Dict, List from hashlib import md5 +from itertools import cycle, islice + +def generate(censor=False, safe_only=True): + templates = safe_templates + if censor: + templates = safe_templates | censorable_templates + elif not safe_only: + templates = (unsafe_templates | + safe_templates | censorable_templates) -def generate(): digest = _pick_random_key(templates) - msg = templates[digest] + msg = _fill_template(templates[digest], censor=censor) return (msg, digest) -def find_by_md5(md5): - if md5 not in templates: - return None - else: - t = templates[md5] - return _fill_template(t) +def find_by_md5(md5, censor=False): + t = (unsafe_templates | + safe_templates | censorable_templates).get(md5) + + if not t: return None + + return _fill_template(t, censor=censor) this_file = os.path.dirname(__file__) humans_file = os.path.join(this_file, 'static', 'humans.txt') -all_messages_file = os.path.join(this_file, 'commit_messages.txt') -tmp = os.path.join(this_file, 'tmp') -os.makedirs(tmp, exist_ok=True) -templates: Dict[str, str] = {} +def _template_file(name): + return os.path.join(this_file, 'commit_messages', name) + +safe_templates_file = _template_file('safe.txt') +unsafe_templates_file = _template_file('unsafe.txt') +censorable_templates_file = _template_file('censorable.txt') + +censorable_templates: Dict[str, str] = {} +safe_templates: Dict[str, str] = {} +unsafe_templates: Dict[str, str] = {} names: List[str] = [] -# Create a hash table of all commit message templates +def _hash_template(template_text): + return md5(template_text.encode('utf-8')).hexdigest() + +def _hash_and_store(template_text, _dict): + digest = _hash_template(template_text) + _dict[digest] = template_text + print("hashing messages...") -with open(all_messages_file, 'r', encoding='utf-8') as f: +with open(safe_templates_file, 'r', encoding='utf-8') as f: + for line in f: + _hash_and_store(line, safe_templates) +with open(unsafe_templates_file, 'r', encoding='utf-8') as f: for line in f: - templates[md5(line.encode('utf-8')).hexdigest()] = line + _hash_and_store(line, unsafe_templates) +with open(censorable_templates_file, 'r', encoding='utf-8') as f: + for line in f: + _hash_and_store(line, censorable_templates) + +def check_for_collisions(): + all_template_digests = (list(safe_templates.keys()) + + list(unsafe_templates.keys()) + + list(censorable_templates.keys())) + if len(all_template_digests) != len(set(all_template_digests)): + raise Exception("uniqueness problem with source data") +check_for_collisions() with open(humans_file, 'r', encoding='utf-8') as f: for line in f: @@ -45,10 +80,12 @@ def _pick_random_key(templates): num_re = re.compile(r"XNUM([0-9,]*)X") -def _fill_template(txt): +def _fill_template(txt, censor=False): txt = txt.replace('XNAMEX', random.choice(names)) txt = txt.replace('XUPPERNAMEX', random.choice(names).upper()) txt = txt.replace('XLOWERNAMEX', random.choice(names).lower()) + if censor: + txt = _censor_swearing(txt, censor=censor) nums = num_re.findall(txt) @@ -74,3 +111,28 @@ def _fill_template(txt): txt = num_re.sub(str(randint), txt, count=1) return txt + +SWEARING = [ + "refuckulated", # funnier censored + "motherfucker", + "cocksucker", + "bollocks", # may be safe but funnier/safer censored + "fucking", + "cunts", + "fuck", + "shit", + "damn", # also funnier censored +] +# https://en.wikipedia.org/wiki/Grawlix +GRAWLIX = "!@#$%&*" + +def _censor_swearing(txt, censor=False): + grawlix_chars = censor if type(censor) == str else GRAWLIX + grawlix_chars = list(grawlix_chars) + + for swearword in SWEARING: + random.shuffle(grawlix_chars) + + grawlix = "".join(list(islice(cycle(grawlix_chars), len(swearword)))) + txt = re.sub(swearword, grawlix, txt, flags=re.IGNORECASE) + return txt