Skip to content

Commit 1b5f01f

Browse files
committed
add script for offsite backups
1 parent e23939d commit 1b5f01f

File tree

3 files changed

+267
-0
lines changed

3 files changed

+267
-0
lines changed

backend/backup_to_dropbox.py

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
#!/usr/bin/env python
2+
"""Backup Heroku PostgreSQL database to Dropbox.
3+
4+
This script:
5+
1. Captures a Heroku database backup
6+
2. Downloads it
7+
3. Uploads to Dropbox
8+
4. Cleans up old backups (keeps last 30 days)
9+
10+
Usage:
11+
python backend/backup_to_dropbox.py
12+
"""
13+
14+
from __future__ import annotations
15+
16+
import os
17+
import subprocess
18+
import sys
19+
from datetime import UTC, datetime, timedelta
20+
21+
import dropbox
22+
from dropbox.exceptions import ApiError
23+
from dropbox.files import WriteMode
24+
25+
# Configuration
26+
HEROKU_APP_NAME = os.environ.get("HEROKU_APP_NAME", "fpbase")
27+
DROPBOX_ACCESS_TOKEN = os.environ.get("DROPBOX_ACCESS_TOKEN")
28+
DROPBOX_BACKUP_PATH = os.environ.get("DROPBOX_BACKUP_PATH", "/heroku-backups/fpbase")
29+
BACKUP_RETENTION_DAYS = int(os.environ.get("BACKUP_RETENTION_DAYS", "60"))
30+
31+
32+
def log(message: str) -> None:
33+
"""Print timestamped log message."""
34+
timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
35+
print(f"[{timestamp}] {message}")
36+
37+
38+
def run_command(cmd: list[str]) -> tuple[int, str, str]:
39+
"""Run shell command and return exit code, stdout, stderr."""
40+
result = subprocess.run(cmd, capture_output=True, text=True, check=False)
41+
return result.returncode, result.stdout, result.stderr
42+
43+
44+
def capture_heroku_backup() -> str | None:
45+
"""Capture Heroku database backup and return backup ID."""
46+
log("Capturing Heroku database backup...")
47+
cmd = ["heroku", "pg:backups:capture", "DATABASE_URL", "--app", HEROKU_APP_NAME]
48+
exit_code, _stdout, stderr = run_command(cmd)
49+
50+
if exit_code != 0:
51+
log(f"ERROR: Failed to capture backup: {stderr}")
52+
return None
53+
54+
log("Backup captured successfully")
55+
return "latest"
56+
57+
58+
def get_backup_url() -> str | None:
59+
"""Get download URL for the latest backup."""
60+
log("Getting backup download URL...")
61+
cmd = ["heroku", "pg:backups:url", "--app", HEROKU_APP_NAME]
62+
exit_code, stdout, stderr = run_command(cmd)
63+
64+
if exit_code != 0:
65+
log(f"ERROR: Failed to get backup URL: {stderr}")
66+
return None
67+
68+
url = stdout.strip()
69+
log("Got backup URL")
70+
return url
71+
72+
73+
def download_backup(url: str, local_path: str) -> bool:
74+
"""Download backup from URL to local path."""
75+
log(f"Downloading backup to {local_path}...")
76+
cmd = ["curl", "-o", local_path, url]
77+
exit_code, _stdout, stderr = run_command(cmd)
78+
79+
if exit_code != 0:
80+
log(f"ERROR: Failed to download backup: {stderr}")
81+
return False
82+
83+
# Check if file was downloaded
84+
if not os.path.exists(local_path) or os.path.getsize(local_path) == 0:
85+
log("ERROR: Downloaded file is empty or doesn't exist")
86+
return False
87+
88+
file_size_mb = os.path.getsize(local_path) / (1024 * 1024)
89+
log(f"Download complete ({file_size_mb:.2f} MB)")
90+
return True
91+
92+
93+
def upload_to_dropbox(local_path: str, dropbox_path: str) -> bool:
94+
"""Upload backup file to Dropbox."""
95+
if not DROPBOX_ACCESS_TOKEN:
96+
log("ERROR: DROPBOX_ACCESS_TOKEN environment variable not set")
97+
return False
98+
99+
log(f"Uploading to Dropbox: {dropbox_path}...")
100+
101+
try:
102+
dbx = dropbox.Dropbox(DROPBOX_ACCESS_TOKEN)
103+
104+
# Read file in chunks to handle large files
105+
chunk_size = 4 * 1024 * 1024 # 4 MB chunks
106+
file_size = os.path.getsize(local_path)
107+
108+
with open(local_path, "rb") as f:
109+
if file_size <= chunk_size:
110+
# Small file - upload in one request
111+
dbx.files_upload(f.read(), dropbox_path, mode=WriteMode("overwrite"))
112+
else:
113+
# Large file - use upload session
114+
upload_session_start_result = dbx.files_upload_session_start(f.read(chunk_size))
115+
cursor = dropbox.files.UploadSessionCursor(
116+
session_id=upload_session_start_result.session_id,
117+
offset=f.tell(),
118+
)
119+
commit = dropbox.files.CommitInfo(path=dropbox_path, mode=WriteMode("overwrite"))
120+
121+
while f.tell() < file_size:
122+
if (file_size - f.tell()) <= chunk_size:
123+
dbx.files_upload_session_finish(f.read(chunk_size), cursor, commit)
124+
else:
125+
dbx.files_upload_session_append_v2(f.read(chunk_size), cursor)
126+
cursor.offset = f.tell()
127+
128+
log(f"Upload complete: {dropbox_path}")
129+
return True
130+
131+
except ApiError as e:
132+
log(f"ERROR: Dropbox API error: {e}")
133+
return False
134+
except Exception as e:
135+
log(f"ERROR: Unexpected error during upload: {e}")
136+
return False
137+
138+
139+
def cleanup_old_backups() -> None:
140+
"""Delete Dropbox backups older than BACKUP_RETENTION_DAYS."""
141+
if not DROPBOX_ACCESS_TOKEN:
142+
return
143+
144+
log(f"Cleaning up backups older than {BACKUP_RETENTION_DAYS} days...")
145+
146+
try:
147+
dbx = dropbox.Dropbox(DROPBOX_ACCESS_TOKEN)
148+
cutoff_date = datetime.now(UTC) - timedelta(days=BACKUP_RETENTION_DAYS)
149+
150+
# List all files in backup directory
151+
result = dbx.files_list_folder(DROPBOX_BACKUP_PATH)
152+
deleted_count = 0
153+
154+
for entry in result.entries:
155+
if isinstance(entry, dropbox.files.FileMetadata):
156+
# Check if file is older than retention period
157+
# Make cutoff_date timezone-aware to match Dropbox's server_modified
158+
if entry.server_modified.replace(tzinfo=None) < cutoff_date.replace(tzinfo=None):
159+
dbx.files_delete_v2(entry.path_display)
160+
log(f"Deleted old backup: {entry.name}")
161+
deleted_count += 1
162+
163+
if deleted_count > 0:
164+
log(f"Cleaned up {deleted_count} old backup(s)")
165+
else:
166+
log("No old backups to clean up")
167+
168+
except ApiError as e:
169+
if e.error.is_path() and e.error.get_path().is_not_found():
170+
log(f"Backup directory doesn't exist yet: {DROPBOX_BACKUP_PATH}")
171+
else:
172+
log(f"ERROR: Failed to clean up old backups: {e}")
173+
except Exception as e:
174+
log(f"ERROR: Unexpected error during cleanup: {e}")
175+
176+
177+
def main() -> int:
178+
"""Main backup workflow."""
179+
log("=" * 60)
180+
log("Starting Heroku PostgreSQL -> Dropbox backup")
181+
log("=" * 60)
182+
183+
# Generate filename with timestamp
184+
timestamp = datetime.now(UTC).strftime("%Y-%m-%d-%H%M%S")
185+
filename = f"fpbase-{timestamp}.dump"
186+
local_path = f"/tmp/{filename}"
187+
dropbox_path = f"{DROPBOX_BACKUP_PATH}/{filename}"
188+
189+
try:
190+
# Step 1: Capture backup
191+
backup_id = capture_heroku_backup()
192+
if not backup_id:
193+
return 1
194+
195+
# Step 2: Get download URL
196+
url = get_backup_url()
197+
if not url:
198+
return 1
199+
200+
# Step 3: Download backup
201+
if not download_backup(url, local_path):
202+
return 1
203+
204+
# Step 4: Upload to Dropbox
205+
if not upload_to_dropbox(local_path, dropbox_path):
206+
return 1
207+
208+
# Step 5: Cleanup old backups
209+
cleanup_old_backups()
210+
211+
log("=" * 60)
212+
log("Backup completed successfully!")
213+
log("=" * 60)
214+
return 0
215+
216+
except Exception as e:
217+
log(f"ERROR: Unexpected error: {e}")
218+
return 1
219+
220+
finally:
221+
# Clean up local file
222+
if os.path.exists(local_path):
223+
os.remove(local_path)
224+
log(f"Cleaned up local file: {local_path}")
225+
226+
227+
if __name__ == "__main__":
228+
sys.exit(main())

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ dependencies = [
5151
"structlog>=24.1.0",
5252
"whitenoise[brotli]>=6.11.0",
5353
"django-vite>=3.1.0",
54+
"dropbox>=12.0.2", # for heroku-scheduler database backups
5455
]
5556

5657

uv.lock

Lines changed: 38 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)