186 lines
6.0 KiB
Python
186 lines
6.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Download all Gmail emails as .eml files organized by label/folder."""
|
|
|
|
import email
|
|
import email.header
|
|
import imaplib
|
|
import os
|
|
import re
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
|
|
def decode_header(raw):
|
|
if not raw:
|
|
return ""
|
|
parts = email.header.decode_header(raw)
|
|
decoded = []
|
|
for data, charset in parts:
|
|
if isinstance(data, bytes):
|
|
try:
|
|
decoded.append(data.decode(charset or "utf-8", errors="replace"))
|
|
except (LookupError, UnicodeDecodeError):
|
|
decoded.append(data.decode("utf-8", errors="replace"))
|
|
else:
|
|
decoded.append(data)
|
|
return " ".join(decoded)
|
|
|
|
|
|
def sanitize_filename(name, max_len=100):
|
|
name = re.sub(r'[<>:"/\\|?*\x00-\x1f]', '_', name)
|
|
name = name.strip('. ')
|
|
return name[:max_len] if name else "no_subject"
|
|
|
|
|
|
def backup_account(email_addr, app_password, output_dir, host="imap.gmail.com", port=993, starttls=False):
|
|
print(f"\n{'='*60}")
|
|
print(f"Backing up: {email_addr}")
|
|
print(f"Output: {output_dir}")
|
|
print(f"{'='*60}")
|
|
|
|
if starttls:
|
|
import ssl
|
|
ctx = ssl.create_default_context()
|
|
ctx.check_hostname = False
|
|
ctx.verify_mode = ssl.CERT_NONE
|
|
imap = imaplib.IMAP4(host, port)
|
|
imap.starttls(ssl_context=ctx)
|
|
else:
|
|
imap = imaplib.IMAP4_SSL(host, port)
|
|
imap.login(email_addr, app_password)
|
|
|
|
# List all folders
|
|
status, folders = imap.list()
|
|
folder_names = []
|
|
for f in folders:
|
|
# Parse folder REDACTED_APP_PASSWORD response
|
|
match = re.search(r'"/" "(.*)"$|"/" (.*)$', f.decode())
|
|
if match:
|
|
name = match.group(1) or match.group(2)
|
|
folder_names.append(name.strip('"'))
|
|
|
|
print(f"Found {len(folder_names)} folders")
|
|
|
|
total_downloaded = 0
|
|
total_skipped = 0
|
|
|
|
for folder in folder_names:
|
|
try:
|
|
status, data = imap.select(f'"{folder}"', readonly=True)
|
|
if status != "OK":
|
|
continue
|
|
msg_count = int(data[0])
|
|
if msg_count == 0:
|
|
continue
|
|
except Exception as e:
|
|
print(f" Skipping {folder}: {e}")
|
|
continue
|
|
|
|
# Create folder directory
|
|
safe_folder = folder.replace("/", "_").replace("[Gmail]_", "gmail_")
|
|
folder_dir = Path(output_dir) / safe_folder
|
|
folder_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
print(f"\n {folder}: {msg_count} messages")
|
|
|
|
# Fetch all message UIDs
|
|
status, data = imap.search(None, "ALL")
|
|
if status != "OK":
|
|
continue
|
|
uids = data[0].split()
|
|
|
|
for i, uid in enumerate(uids, 1):
|
|
try:
|
|
# Fetch full message
|
|
status, msg_data = imap.fetch(uid, "(RFC822)")
|
|
if status != "OK" or not msg_data[0]:
|
|
continue
|
|
|
|
raw_email = msg_data[0][1]
|
|
msg = email.message_from_bytes(raw_email)
|
|
|
|
# Build filename from date + subject
|
|
date_str = msg.get("Date", "")
|
|
subject = sanitize_filename(decode_header(msg.get("Subject", "no_subject")))
|
|
msg_id = msg.get("Message-ID", f"uid_{uid.decode()}")
|
|
safe_id = sanitize_filename(re.sub(r'[<>@.]', '_', msg_id), 40)
|
|
|
|
filename = f"{safe_id}_{subject}.eml"
|
|
filepath = folder_dir / filename
|
|
|
|
if filepath.exists():
|
|
total_skipped += 1
|
|
continue
|
|
|
|
filepath.write_bytes(raw_email)
|
|
total_downloaded += 1
|
|
|
|
if i % 50 == 0 or i == len(uids):
|
|
print(f" {i}/{len(uids)} processed")
|
|
except (imaplib.IMAP4.abort, imaplib.IMAP4.error, ConnectionError, OSError) as e:
|
|
print(f" Connection lost at {i}/{len(uids)}: {e}")
|
|
# Reconnect and re-select folder
|
|
try:
|
|
imap.logout()
|
|
except Exception:
|
|
pass
|
|
time.sleep(2)
|
|
if starttls:
|
|
import ssl
|
|
ctx = ssl.create_default_context()
|
|
ctx.check_hostname = False
|
|
ctx.verify_mode = ssl.CERT_NONE
|
|
imap = imaplib.IMAP4(host, port)
|
|
imap.starttls(ssl_context=ctx)
|
|
else:
|
|
imap = imaplib.IMAP4_SSL(host, port)
|
|
imap.login(email_addr, app_password)
|
|
imap.select(f'"{folder}"', readonly=True)
|
|
print(f" Reconnected, continuing...")
|
|
|
|
imap.logout()
|
|
print(f"\n Done: {total_downloaded} downloaded, {total_skipped} skipped (already exist)")
|
|
return total_downloaded
|
|
|
|
|
|
if __name__ == "__main__":
|
|
accounts = [
|
|
{
|
|
"email": "your-email@example.com",
|
|
"password": "REDACTED_APP_PASSWORD", # pragma: allowlist secret
|
|
"dir": "dvish92",
|
|
},
|
|
{
|
|
"email": "lzbellina92@gmail.com",
|
|
"password": "REDACTED_APP_PASSWORD", # pragma: allowlist secret
|
|
"dir": "lzbellina92",
|
|
},
|
|
{
|
|
"email": "admin@thevish.io",
|
|
"password": "MsuiUGPLNlWhOewqmaK3gA", # pragma: allowlist secret
|
|
"dir": "proton_admin",
|
|
"host": "127.0.0.1",
|
|
"port": 1143,
|
|
"starttls": True,
|
|
},
|
|
]
|
|
|
|
base_dir = sys.argv[1] if len(sys.argv) > 1 else "/tmp/gmail_backup"
|
|
|
|
print(f"Email Backup — downloading all emails to {base_dir}")
|
|
total = 0
|
|
for acct in accounts:
|
|
output = os.path.join(base_dir, acct["dir"])
|
|
os.makedirs(output, exist_ok=True)
|
|
total += backup_account(
|
|
acct["email"], acct["password"], output,
|
|
host=acct.get("host", "imap.gmail.com"),
|
|
port=acct.get("port", 993),
|
|
starttls=acct.get("starttls", False),
|
|
)
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"BACKUP COMPLETE: {total} emails downloaded to {base_dir}")
|
|
print(f"{'='*60}")
|