diff --git a/installation_and_upgrade/part_truncate_archive.bat b/installation_and_upgrade/part_truncate_archive.bat index b1e8759..20a2f31 100644 --- a/installation_and_upgrade/part_truncate_archive.bat +++ b/installation_and_upgrade/part_truncate_archive.bat @@ -15,7 +15,7 @@ IF %errorlevel% neq 0 ( REM Matches current MySQL version uv pip install mysql-connector-python==8.4.0 -python -u part_truncate_archive.py %* +python -u %~dp0part_truncate_archive.py %* IF %errorlevel% neq 0 goto ERROR call rmdir /s /q %UV_TEMP_VENV% diff --git a/installation_and_upgrade/part_truncate_archive.py b/installation_and_upgrade/part_truncate_archive.py index be497a2..fbf15eb 100644 --- a/installation_and_upgrade/part_truncate_archive.py +++ b/installation_and_upgrade/part_truncate_archive.py @@ -1,4 +1,6 @@ import argparse +import datetime +import subprocess import time from contextlib import closing @@ -41,49 +43,110 @@ def main() -> None: default=0.5, ) parser.add_argument( - "--history", - dest="history", + "--days", + dest="days", action="store", type=int, help="How many days to keep (default: 7)", default=7, ) parser.add_argument( - "--password", dest="password", action="store", help="mysql root password", default="" + "--hours", + dest="hours", + action="store", + type=int, + help="How many hours to keep (default: 0)", + default=0, + ) + parser.add_argument( + "--minutes", + dest="minutes", + action="store", + type=int, + help="How many minutes to keep (default: 0)", + default=0, + ) + parser.add_argument("--user", dest="user", action="store", help="mysql user", default="root") + parser.add_argument( + "--password", dest="password", action="store", help="mysql password", default="" + ) + parser.add_argument( + "--backup", dest="backup", action="store", help="backup data before deleting to file" ) parser.add_argument("--dry-run", dest="dry_run", action="store_true", help="dry run") args = parser.parse_args() - + sample_id_max = None + sample_id_min = None + count_sample_id = None + time_interval = datetime.timedelta(days=args.days, hours=args.hours, minutes=args.minutes) + cutoff_time = datetime.datetime.now() - time_interval + cutoff_isotime = cutoff_time.isoformat(" ", "seconds") # ignore pyright checking as oracle bug in type signature of close() method with closing( mysql.connector.connect( - user="root", password=args.password, host=args.host, database="archive" + user=args.user, password=args.password, host=args.host, database="archive" ) # pyright: ignore ) as conn: # this is so we don't cache query results and keep getting the same answer conn.autocommit = True with closing(conn.cursor(prepared=True)) as c: - c.execute("SET SQL_LOG_BIN=0") # disable any binary logging for this session - print(f"Looking for sample_id corresponding to {args.history} days ago") - c.execute( - "SELECT MAX(sample_id) FROM sample WHERE smpl_time < TIMESTAMPADD(DAY, -?, NOW())", - (args.history,), - ) - sample_id = c.fetchone()[0] - c.execute( - "SELECT COUNT(sample_id) FROM sample " - "WHERE smpl_time < TIMESTAMPADD(DAY, -?, NOW())", - (args.history,), - ) + print(f"Looking for sample_id corresponding to {cutoff_isotime}") + c.execute(f"SELECT MAX(sample_id) FROM sample WHERE smpl_time < '{cutoff_isotime}'") + sample_id_max = c.fetchone()[0] + c.execute(f"SELECT MIN(sample_id) FROM sample WHERE smpl_time < '{cutoff_isotime}'") + sample_id_min = c.fetchone()[0] + c.execute(f"SELECT COUNT(sample_id) FROM sample WHERE smpl_time < '{cutoff_isotime}'") count_sample_id = c.fetchone()[0] print( - f"ID of last row to delete is {sample_id} and there are {count_sample_id} rows " + f"ID range to delete is {sample_id_min} to {sample_id_max} " + f"and there are {count_sample_id} rows " f"-> {int(1 + count_sample_id / args.limit)} delete operations" ) + + if args.backup: + command = [ + r"C:\Instrument\Apps\MySQL\bin\mysqldump.exe", + f"--user={args.user}", + f"--password={args.password}", + f"--host={args.host}", + "--single-transaction", + f"--result-file={args.backup}", + "--no-create-db", + "--no-create-info", + "--skip-triggers", + "--quick", + f"--where=sample_id >= {sample_id_min} AND sample_id <= {sample_id_max} " + f"AND smpl_time < '{cutoff_isotime}'", + "archive", + "sample", + ] + if args.dry_run: + print(command) + else: + subprocess.run(command, check=True) + + # ignore pyright checking as oracle bug in type signature of close() method + with closing( + mysql.connector.connect( + user=args.user, password=args.password, host=args.host, database="archive" + ) # pyright: ignore + ) as conn: + # this is so we don't cache query results and keep getting the same answer + conn.autocommit = True + + with closing(conn.cursor(prepared=True)) as c: + c.execute("SET SQL_LOG_BIN=0") # disable any binary logging for this session + delete_ops = int(1 + count_sample_id / args.limit) + + print( + f"ID range to delete is {sample_id_min} to {sample_id_max} " + f"and there are {count_sample_id} rows " + f"-> {delete_ops} delete operations" + ) print( - f"This will take at least {args.sleep * count_sample_id / args.limit:.1f} " + f"This will take at least {args.sleep * delete_ops:.1f} " "seconds based on sleep time alone" ) if args.dry_run: @@ -91,10 +154,18 @@ def main() -> None: return rowcount = 1 it = 0 + progress = 0 while rowcount > 0: - c.execute(f"DELETE FROM sample WHERE sample_id < {sample_id} LIMIT {args.limit}") + c.execute( + f"DELETE FROM sample WHERE sample_id >= {sample_id_min} AND " + f"sample_id <= {sample_id_max} AND smpl_time < '{cutoff_isotime}' " + f"LIMIT {args.limit}" + ) rowcount = c.rowcount - print(f"{it % 10}", end="", flush=True) + so_far = 100.0 * it / delete_ops + if so_far > progress: + print(f"{progress}% ", end="", flush=True) + progress += 5 it += 1 time.sleep(args.sleep) print("")