Files
IMAPSYNC/worker/worker.py
T
Sebastian Serfling f600cd52f3 fix: recover stale jobs on worker restart, persist active page on reload
- Worker resets running/cancelling jobs to idle on startup to fix jobs stuck after Docker restart
- Frontend saves current page to localStorage so reload returns to last visited page instead of always dashboard

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-22 14:07:13 +02:00

274 lines
8.6 KiB
Python

#!/usr/bin/env python3
"""
ImapSync Worker
---------------
Polls SQLite for queued jobs, executes imapsync, parses output,
updates run records and job status. Also handles cron schedules.
"""
import os
import re
import sqlite3
import subprocess
import time
import logging
from datetime import datetime
DB_PATH = os.environ.get("DB_PATH", "/data/imapsync.db")
LOG_DIR = os.environ.get("LOG_DIR", "/data/logs")
POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "15"))
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [WORKER] %(levelname)s %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
log = logging.getLogger(__name__)
os.makedirs(LOG_DIR, exist_ok=True)
def get_db():
conn = sqlite3.connect(DB_PATH, timeout=30, check_same_thread=False)
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA journal_mode=WAL")
return conn
def parse_imapsync_output(text: str) -> dict:
stats = {"messages_synced": 0, "messages_skipped": 0, "errors": 0}
m = re.search(r"Messages transferred:\s+(\d+)", text)
if m:
stats["messages_synced"] = int(m.group(1))
m = re.search(r"Messages skipped:\s+(\d+)", text)
if m:
stats["messages_skipped"] = int(m.group(1))
stats["errors"] = len(re.findall(r"(?i)^\s*(error|err)\b", text, re.MULTILINE))
return stats
def check_due_schedules():
try:
from croniter import croniter
except ImportError:
return
conn = get_db()
try:
now = datetime.utcnow()
rows = conn.execute(
"SELECT id, schedule, last_run FROM sync_jobs "
"WHERE enabled=1 AND schedule IS NOT NULL AND schedule != '' "
"AND status NOT IN ('running', 'queued')"
).fetchall()
for row in rows:
try:
cron = croniter(row["schedule"])
last_run = datetime.fromisoformat(row["last_run"]) if row["last_run"] else datetime(2000, 1, 1)
prev_due = cron.get_prev(datetime)
if prev_due > last_run:
conn.execute(
"UPDATE sync_jobs SET status='queued' WHERE id=?",
(row["id"],)
)
log.info(f"Job {row['id']} scheduled run queued (cron: {row['schedule']})")
except Exception as e:
log.warning(f"Cron parse error for job {row['id']}: {e}")
conn.commit()
finally:
conn.close()
def run_job(job: sqlite3.Row):
job_id = job["id"]
log.info(f"Starting job {job_id}: {job['name']}")
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
log_filename = f"job_{job_id}_{timestamp}.log"
log_path = os.path.join(LOG_DIR, log_filename)
conn = get_db()
conn.execute("UPDATE sync_jobs SET status='running', last_run=? WHERE id=?",
(datetime.utcnow().isoformat(), job_id))
cur = conn.execute(
"INSERT INTO job_runs (job_id, log_file, status) VALUES (?, ?, 'running')",
(job_id, log_filename)
)
run_id = cur.lastrowid
conn.commit()
conn.close()
ssl1 = "--ssl1" if job["src_ssl"] else "--nossl1"
ssl2 = "--ssl2" if job["dst_ssl"] else "--nossl2"
cmd = [
"imapsync",
"--host1", job["src_host"],
"--port1", str(job["src_port"]),
ssl1,
"--user1", job["src_user"],
"--password1", job["src_password"],
"--host2", job["dst_host"],
"--port2", str(job["dst_port"]),
ssl2,
"--user2", job["dst_user"],
"--password2", job["dst_password"],
"--nolog",
]
if job["extra_args"]:
cmd += job["extra_args"].split()
started = time.time()
exit_code = 0
output = ""
cancelled = False
try:
with open(log_path, "w") as lf:
lf.write(f"# ImapSync Job: {job['name']}\n")
lf.write(f"# Started: {datetime.utcnow().isoformat()}\n")
lf.write(f"# Command: {' '.join(cmd[:20])}...\n\n")
lf.flush()
proc = subprocess.Popen(
cmd,
stdout=lf,
stderr=subprocess.STDOUT,
)
while proc.poll() is None:
conn2 = get_db()
row = conn2.execute("SELECT status FROM sync_jobs WHERE id=?", (job_id,)).fetchone()
conn2.close()
if row and row["status"] == "cancelling":
log.info(f"Job {job_id} cancel requested, terminating process")
proc.terminate()
try:
proc.wait(timeout=10)
except subprocess.TimeoutExpired:
proc.kill()
proc.wait()
cancelled = True
break
time.sleep(3)
elapsed = time.time() - started
if elapsed > 7200:
log.error(f"Job {job_id} timed out after 2h")
proc.kill()
proc.wait()
break
if not cancelled and proc.returncode is not None:
exit_code = proc.returncode
with open(log_path, "r", errors="replace") as lf:
output = lf.read()
except Exception as e:
log.error(f"Job {job_id} exception: {e}")
exit_code = -2
output = str(e)
with open(log_path, "a") as lf:
lf.write(f"\n\nEXCEPTION: {e}\n")
duration = int(time.time() - started)
stats = parse_imapsync_output(output)
conn3 = get_db()
current_status = conn3.execute("SELECT status FROM sync_jobs WHERE id=?", (job_id,)).fetchone()
conn3.close()
if not cancelled and current_status and current_status["status"] == "cancelling":
cancelled = True
if cancelled:
job_status = "cancelled"
with open(log_path, "a") as lf:
lf.write("\n\nCANCELLED: Job was cancelled by user\n")
elif exit_code == 0:
job_status = "done"
else:
job_status = "failed"
conn = get_db()
conn.execute("""
UPDATE job_runs SET
status=?, finished_at=?, messages_synced=?,
messages_skipped=?, errors=?, duration_sec=?
WHERE id=? AND status != 'cancelled'
""", (job_status, datetime.utcnow().isoformat(),
stats["messages_synced"], stats["messages_skipped"],
stats["errors"], duration, run_id))
if job_status == "cancelled":
conn.execute("UPDATE job_runs SET duration_sec=? WHERE id=?", (duration, run_id))
conn.execute(
"UPDATE sync_jobs SET status='idle' WHERE id=?",
(job_id,)
)
conn.commit()
conn.close()
log.info(
f"Job {job_id} finished [{job_status}] in {duration}s — "
f"synced={stats['messages_synced']} skipped={stats['messages_skipped']} "
f"errors={stats['errors']}"
)
def recover_stale_jobs():
conn = get_db()
try:
stale = conn.execute(
"SELECT id FROM sync_jobs WHERE status IN ('running', 'cancelling')"
).fetchall()
for row in stale:
job_id = row["id"]
conn.execute(
"UPDATE job_runs SET status='failed', finished_at=? "
"WHERE job_id=? AND status='running'",
(datetime.utcnow().isoformat(), job_id)
)
conn.execute(
"UPDATE sync_jobs SET status='idle' WHERE id=?",
(job_id,)
)
log.warning(f"Recovered stale job {job_id} → idle (worker restart)")
conn.commit()
finally:
conn.close()
def main():
log.info(f"Worker started. DB={DB_PATH} LOG_DIR={LOG_DIR} POLL={POLL_INTERVAL}s")
for i in range(30):
if os.path.exists(DB_PATH):
break
log.info(f"Waiting for DB... ({i+1}/30)")
time.sleep(2)
recover_stale_jobs()
while True:
try:
check_due_schedules()
conn = get_db()
job = conn.execute("""
SELECT j.*,
s1.host as src_host, s1.port as src_port, s1.ssl as src_ssl,
s2.host as dst_host, s2.port as dst_port, s2.ssl as dst_ssl
FROM sync_jobs j
JOIN servers s1 ON j.src_server_id = s1.id
JOIN servers s2 ON j.dst_server_id = s2.id
WHERE j.status='queued' AND j.enabled=1
ORDER BY j.created_at ASC LIMIT 1
""").fetchone()
conn.close()
if job:
run_job(job)
else:
time.sleep(POLL_INTERVAL)
except Exception as e:
log.error(f"Worker loop error: {e}", exc_info=True)
time.sleep(POLL_INTERVAL)
if __name__ == "__main__":
main()