From 888e6bfa563cabad345b42ea3326e634d13b1c55 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Thu, 11 Sep 2025 14:28:06 +0200 Subject: [PATCH 01/31] move delete_inactive_users to new implementation --- chatmaild/pyproject.toml | 2 +- .../src/chatmaild/delete_inactive_users.py | 31 --- chatmaild/src/chatmaild/expire.py | 194 ++++++++++++++++++ .../tests/test_delete_inactive_users.py | 4 +- chatmaild/src/chatmaild/tests/test_expire.py | 39 ++++ cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 | 4 +- 6 files changed, 239 insertions(+), 35 deletions(-) delete mode 100644 chatmaild/src/chatmaild/delete_inactive_users.py create mode 100644 chatmaild/src/chatmaild/expire.py create mode 100644 chatmaild/src/chatmaild/tests/test_expire.py diff --git a/chatmaild/pyproject.toml b/chatmaild/pyproject.toml index 8fa212f8..5e431b04 100644 --- a/chatmaild/pyproject.toml +++ b/chatmaild/pyproject.toml @@ -27,7 +27,7 @@ chatmail-metadata = "chatmaild.metadata:main" filtermail = "chatmaild.filtermail:main" echobot = "chatmaild.echo:main" chatmail-metrics = "chatmaild.metrics:main" -delete_inactive_users = "chatmaild.delete_inactive_users:main" +expire = "chatmaild.expire:main" lastlogin = "chatmaild.lastlogin:main" turnserver = "chatmaild.turnserver:main" diff --git a/chatmaild/src/chatmaild/delete_inactive_users.py b/chatmaild/src/chatmaild/delete_inactive_users.py deleted file mode 100644 index 81467852..00000000 --- a/chatmaild/src/chatmaild/delete_inactive_users.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -Remove inactive users -""" - -import os -import shutil -import sys -import time - -from .config import read_config - - -def delete_inactive_users(config): - cutoff_date = time.time() - config.delete_inactive_users_after * 86400 - for addr in os.listdir(config.mailboxes_dir): - try: - user = config.get_user(addr) - except ValueError: - continue - - read_timestamp = user.get_last_login_timestamp() - if read_timestamp and read_timestamp < cutoff_date: - path = config.mailboxes_dir.joinpath(addr) - assert path == user.maildir - shutil.rmtree(path, ignore_errors=True) - - -def main(): - (cfgpath,) = sys.argv[1:] - config = read_config(cfgpath) - delete_inactive_users(config) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py new file mode 100644 index 00000000..c0e85085 --- /dev/null +++ b/chatmaild/src/chatmaild/expire.py @@ -0,0 +1,194 @@ +import sys +import os +import shutil +import logging +import time +from stat import S_ISREG +from pathlib import Path +from datetime import datetime +from collections import namedtuple + +# delete already seen big mails after 7 days, in the INBOX +# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/cur/*' -mtime +{{ config.delete_large_after }} -size +200k -type f -delete +# # delete all mails after {{ config.delete_mails_after }} days, in the Inbox +# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/cur/*' -mtime +{{ config.delete_mails_after }} -type f -delete +## or in any IMAP subfolder +# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/cur/*' -mtime +{{ config.delete_mails_after }} -type f -delete +## even if they are unseen +# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/new/*' -mtime +{{ config.delete_mails_after }} -type f -delete +# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/new/*' -mtime +{{ config.delete_mails_after }} -type f -delete +## or only temporary (but then they shouldn't be around after {{ config.delete_mails_after }} days anyway). +# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete +# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete +# 3 0 * * * vmail find {{ config.mailboxes_dir }} -name 'maildirsize' -type f -delete + + +FileEntry = namedtuple("FileEntry", ["relpath", "mtime", "size"]) +dayseconds = 24 * 60 * 60 +monthseconds = dayseconds * 30 + + +def joinpath(name, extra): + return name + "/" + extra + + +def D(timestamp, now=datetime.utcnow().timestamp()): + diff_seconds = int(now) - int(timestamp) + # assert diff_seconds >= 0, (int(timestamp), int(now)) + return f"{int(diff_seconds / dayseconds):2.0f}d" + + +def K(size): + return f"{int(size/1000):6.0f}K" + + +def M(size): + return f"{int(size/1000000):6.0f}M" + + +now = datetime.utcnow().timestamp() + + +class Stats: + def __init__(self, basedir): + self.basedir = str(basedir) + self.mailboxes = [] + + def iter_mailboxes(self, maxnum=None): + for mailbox in os.listdir(self.basedir)[:maxnum]: + if "@" in mailbox: + mailboxdir = joinpath(self.basedir, mailbox) + self.mailboxes.append(MailboxStat(mailboxdir)) + + +class MailboxStat: + def __init__(self, mailboxdir): + self.mailboxdir = mailboxdir = str(mailboxdir) + self.messages = [] + self.extrafiles = [] + + for name in os.listdir(mailboxdir): + fpath = joinpath(mailboxdir, name) + if name in ("cur", "new", "tmp"): + for msg_name in os.listdir(fpath): + msg_path = joinpath(fpath, msg_name) + st = os.stat(msg_path) + relpath = joinpath(name, msg_name) + self.messages.append( + FileEntry(relpath, mtime=st.st_mtime, size=st.st_size) + ) + else: + st = os.stat(fpath) + if S_ISREG(st.st_mode): + self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size)) + self.extrafiles.sort(key=lambda x: x.size, reverse=True) + + @property + def last_login(self): + for entry in self.extrafiles: + if entry.relpath == "password": + return entry.mtime + + def get_messages(self, prefix=""): + l = [] + for entry in self.messages: + if entry.relpath.startswith(prefix): + l.append(entry) + return l + + def get_extra_files(self): + return list(self.extrafiles) + + +class XXXStats: + def __init__(self): + self.sum_extra = 0 + self.sum_all_messages = 0 + self.logins = [] + self.messages = [] + + def analyze(self, statscache): + print("start") + for mailbox in statscache.cache: + mbox_cache = statscache.cache[mailbox] + if "password" not in mbox_cache: + continue + self.logins.append(mbox_cache["password"][0]) + for relpath, (mtime, size) in mbox_cache.items(): + if relpath[:4] in ("cur/", "new/", "tmp/"): + self.sum_all_messages += size + entry = FileEntry(relpath=relpath, mtime=mtime, size=size) + self.messages.append(entry) + else: + self.sum_extra += size + + def dump_summary(self): + print(f"size of everything: {M(self.sum_extra + self.sum_all_messages)}") + print(f"size all messages: {M(self.sum_all_messages)}") + percent = self.sum_extra / (self.sum_extra + self.sum_all_messages) * 100 + print(f"size extra files: {M(self.sum_extra)} ({percent:.2f}%)") + for size in (100000, 200000, 500000, 1000000, 5000000): + all_of_size = sum( + x.size + for x in self.messages + if x.size > size and x.relpath.startswith("cur") + ) + percent = all_of_size / self.sum_all_messages * 100 + print(f"size seen {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)") + for size in (100000, 200000, 500000, 1000000, 5000000): + all_of_size = sum( + x.size + for x in self.messages + if x.size > size and x.mtime < now - 2 * dayseconds + ) + percent = all_of_size / self.sum_all_messages * 100 + print( + f"size 2day-old {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)" + ) + for size in (100000, 200000, 500000, 1000000, 5000000): + all_of_size = sum( + x.size + for x in self.messages + if x.size > size + and x.relpath.startswith("cur") + and x.mtime < now - 7 * dayseconds + ) + percent = all_of_size / self.sum_all_messages * 100 + print( + f"size seen 7-day old {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)" + ) + + print() + + num_logins = len(self.logins) + monthly_active = len([x for x in self.logins if x >= now - monthseconds]) + daily_active = len([x for x in self.logins if x >= now - dayseconds]) + stale = num_logins - monthly_active + + def p(num): + return f"({num/num_logins * 100:.2f}%)" + + print(f"all logins: {K(num_logins)}") + print(f"monthly active: {K(monthly_active)} {p(monthly_active)}") + print(f">1m old logins: {K(stale)} {p(stale)}") + print(f"daily active: {K(daily_active)} {p(daily_active)}") + + +def run_expire(config, basedir): + stat = Stats(basedir) + stat.iter_mailboxes() + cutoff_date = time.time() - config.delete_inactive_users_after * 86400 + + num = 0 + for mbox in stat.mailboxes: + if mbox.last_login < cutoff_date: + logging.info("removing outdated mailbox %s", mbox.mailboxdir) + shutil.rmtree(mbox.mailboxdir, ignore_errors=True) + num += 1 + print(f"expired {num} mailboxes") + + +if __name__ == "__main__": + cfgpath, basedir = sys.argv[1:] + config = read_config(cfgpath) + run_expire(config, basedir) diff --git a/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py b/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py index 937237b4..3eb21d12 100644 --- a/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py +++ b/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py @@ -1,6 +1,6 @@ import time -from chatmaild.delete_inactive_users import delete_inactive_users +from chatmaild.expire import run_expire from chatmaild.doveauth import AuthDictProxy @@ -45,7 +45,7 @@ def create_user(addr, last_login): for addr in to_remove: assert example_config.get_user(addr).maildir.exists() - delete_inactive_users(example_config) + run_expire(example_config, example_config.mailboxes_dir) for p in example_config.mailboxes_dir.iterdir(): assert not p.name.startswith("old") diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py new file mode 100644 index 00000000..a2a40a57 --- /dev/null +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -0,0 +1,39 @@ +from chatmaild.expire import MailboxStat + + +def test_stats_mailbox(tmp_path): + mailboxdir = tmp_path + password = mailboxdir.joinpath("password") + password.write_text("xxx") + + garbagedir = mailboxdir.joinpath("garbagedir") + garbagedir.mkdir() + + cur = mailboxdir.joinpath("cur") + new = mailboxdir.joinpath("new") + cur.mkdir() + msg_cur = cur.joinpath("msg1") + msg_cur.write_text("xxx") + new.mkdir() + msg_new = new.joinpath("msg2") + msg_new.write_text("xxx123") + + mbox = MailboxStat(tmp_path) + assert mbox.last_login == password.stat().st_mtime + assert len(mbox.messages) == 2 + + seen = mbox.get_messages("cur") + assert len(seen) == 1 + assert seen[0].size == 3 + + new = mbox.get_messages("new") + assert len(new) == 1 + assert new[0].size == 6 + + extra = mailboxdir.joinpath("large") + extra.write_text("x" * 1000) + mailboxdir.joinpath("index-something").write_text("123") + mbox = MailboxStat(tmp_path) + extrafiles = mbox.get_extra_files() + assert len(extrafiles) == 3 + assert extrafiles[0].size == 1000 diff --git a/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 b/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 index 9eb27182..a9f10e8e 100644 --- a/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 +++ b/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 @@ -11,4 +11,6 @@ 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete 3 0 * * * vmail find {{ config.mailboxes_dir }} -name 'maildirsize' -type f -delete -4 0 * * * vmail /usr/local/lib/chatmaild/venv/bin/delete_inactive_users /usr/local/lib/chatmaild/chatmail.ini + +# ported +4 0 * * * vmail /usr/local/lib/chatmaild/venv/bin/expire /usr/local/lib/chatmaild/chatmail.ini {config.mailboxes_dir} From f86360006a78de14e2d57ff6423e3c757115c625 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Thu, 11 Sep 2025 15:32:30 +0200 Subject: [PATCH 02/31] do all expunging in python --- chatmaild/src/chatmaild/expire.py | 94 ++++++++++++------- .../tests/test_delete_inactive_users.py | 2 +- cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 | 18 +--- 3 files changed, 65 insertions(+), 49 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index c0e85085..01b375f0 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -1,25 +1,16 @@ -import sys import os import shutil -import logging +import sys import time -from stat import S_ISREG -from pathlib import Path -from datetime import datetime from collections import namedtuple +from datetime import datetime +from stat import S_ISREG + +from chatmaild.config import read_config # delete already seen big mails after 7 days, in the INBOX # 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/cur/*' -mtime +{{ config.delete_large_after }} -size +200k -type f -delete # # delete all mails after {{ config.delete_mails_after }} days, in the Inbox -# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/cur/*' -mtime +{{ config.delete_mails_after }} -type f -delete -## or in any IMAP subfolder -# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/cur/*' -mtime +{{ config.delete_mails_after }} -type f -delete -## even if they are unseen -# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/new/*' -mtime +{{ config.delete_mails_after }} -type f -delete -# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/new/*' -mtime +{{ config.delete_mails_after }} -type f -delete -## or only temporary (but then they shouldn't be around after {{ config.delete_mails_after }} days anyway). -# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete -# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete # 3 0 * * * vmail find {{ config.mailboxes_dir }} -name 'maildirsize' -type f -delete @@ -46,16 +37,14 @@ def M(size): return f"{int(size/1000000):6.0f}M" -now = datetime.utcnow().timestamp() - - class Stats: - def __init__(self, basedir): + def __init__(self, basedir, maxnum=None): self.basedir = str(basedir) self.mailboxes = [] + self.maxnum = maxnum - def iter_mailboxes(self, maxnum=None): - for mailbox in os.listdir(self.basedir)[:maxnum]: + def iter_mailboxes(self): + for mailbox in os.listdir(self.basedir)[: self.maxnum]: if "@" in mailbox: mailboxdir = joinpath(self.basedir, mailbox) self.mailboxes.append(MailboxStat(mailboxdir)) @@ -99,6 +88,11 @@ def get_messages(self, prefix=""): def get_extra_files(self): return list(self.extrafiles) + def get_file_entry(self, name): + for entry in self.extrafiles: + if name == entry.relapth: + return entry + class XXXStats: def __init__(self): @@ -123,6 +117,8 @@ def analyze(self, statscache): self.sum_extra += size def dump_summary(self): + now = datetime.utcnow().timestamp() + print(f"size of everything: {M(self.sum_extra + self.sum_all_messages)}") print(f"size all messages: {M(self.sum_all_messages)}") percent = self.sum_extra / (self.sum_extra + self.sum_all_messages) * 100 @@ -174,21 +170,55 @@ def p(num): print(f"daily active: {K(daily_active)} {p(daily_active)}") -def run_expire(config, basedir): - stat = Stats(basedir) +def run_expire(config, basedir, dry=False, maxnum=None): + now = time.time() + + stat = Stats(basedir, maxnum=maxnum) stat.iter_mailboxes() - cutoff_date = time.time() - config.delete_inactive_users_after * 86400 + cutoff_date_without_login = now - int(config.delete_inactive_users_after) * 86400 + cutoff_date_mails = now - int(config.delete_mails_after) * 86400 + cutoff_date_large_mails = now - int(config.delete_large_after) * 86400 + + def rmtree(path): + if dry: + print("would remove mailbox", path) + else: + shutil.rmtree(path, ignore_errors=True) + + def unlink(mailboxdir, message): + if dry: + relpath = os.path.basename(mailboxdir) + message.relpath + print(f"would remove {D(message.mtime)} {K(message.size)} {relpath}") + else: + os.unlink(path) - num = 0 for mbox in stat.mailboxes: - if mbox.last_login < cutoff_date: - logging.info("removing outdated mailbox %s", mbox.mailboxdir) - shutil.rmtree(mbox.mailboxdir, ignore_errors=True) - num += 1 - print(f"expired {num} mailboxes") + changed = False + if mbox.last_login and mbox.last_login < cutoff_date_without_login: + rmtree(mbox.mailboxdir) + continue + for message in mbox.messages: + path = joinpath(mbox.mailboxdir, message.relpath) + if message.mtime < cutoff_date_mails: + unlink(mbox.mailboxdir, message) + elif message.size > 200000 and message.mtime < cutoff_date_large_mails: + unlink(mbox.mailboxdir, message) + else: + continue + changed = True + if changed and not dry: + p = joinpath(mbox.mailboxdir, "maildirsize") + try: + os.unlink(p) + except FileNotFoundError: + pass -if __name__ == "__main__": - cfgpath, basedir = sys.argv[1:] +def main(): + cfgpath, basedir, maxnum = sys.argv[1:] config = read_config(cfgpath) - run_expire(config, basedir) + run_expire(config, basedir, dry=True, maxnum=int(maxnum)) + + +if __name__ == "__main__": + main() diff --git a/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py b/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py index 3eb21d12..a7c74a9e 100644 --- a/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py +++ b/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py @@ -1,7 +1,7 @@ import time -from chatmaild.expire import run_expire from chatmaild.doveauth import AuthDictProxy +from chatmaild.expire import run_expire def test_login_timestamps(example_config): diff --git a/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 b/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 index a9f10e8e..dc9af7e0 100644 --- a/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 +++ b/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 @@ -1,16 +1,2 @@ -# delete already seen big mails after 7 days, in the INBOX -2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/cur/*' -mtime +{{ config.delete_large_after }} -size +200k -type f -delete -# delete all mails after {{ config.delete_mails_after }} days, in the Inbox -2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/cur/*' -mtime +{{ config.delete_mails_after }} -type f -delete -# or in any IMAP subfolder -2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/cur/*' -mtime +{{ config.delete_mails_after }} -type f -delete -# even if they are unseen -2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/new/*' -mtime +{{ config.delete_mails_after }} -type f -delete -2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/new/*' -mtime +{{ config.delete_mails_after }} -type f -delete -# or only temporary (but then they shouldn't be around after {{ config.delete_mails_after }} days anyway). -2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete -2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/.*/tmp/*' -mtime +{{ config.delete_mails_after }} -type f -delete -3 0 * * * vmail find {{ config.mailboxes_dir }} -name 'maildirsize' -type f -delete - -# ported -4 0 * * * vmail /usr/local/lib/chatmaild/venv/bin/expire /usr/local/lib/chatmaild/chatmail.ini {config.mailboxes_dir} +# expire mailboxes and old or too large messages +2 0 * * * vmail /usr/local/lib/chatmaild/venv/bin/expire /usr/local/lib/chatmaild/chatmail.ini {config.mailboxes_dir} From 31ac558f06e5f7d1f37753e38d9475dc2cb12290 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Thu, 11 Sep 2025 16:17:50 +0200 Subject: [PATCH 03/31] add summary reporting, rework expiry logic --- chatmaild/pyproject.toml | 1 + chatmaild/src/chatmaild/expire.py | 161 +++++-------------- chatmaild/src/chatmaild/fsreport.py | 150 +++++++++++++++++ chatmaild/src/chatmaild/tests/test_expire.py | 27 ++-- 4 files changed, 211 insertions(+), 128 deletions(-) create mode 100644 chatmaild/src/chatmaild/fsreport.py diff --git a/chatmaild/pyproject.toml b/chatmaild/pyproject.toml index 5e431b04..a09ed122 100644 --- a/chatmaild/pyproject.toml +++ b/chatmaild/pyproject.toml @@ -28,6 +28,7 @@ filtermail = "chatmaild.filtermail:main" echobot = "chatmaild.echo:main" chatmail-metrics = "chatmaild.metrics:main" expire = "chatmaild.expire:main" +fsreport = "chatmaild.fsreport:main" lastlogin = "chatmaild.lastlogin:main" turnserver = "chatmaild.turnserver:main" diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 01b375f0..341cfbae 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -1,40 +1,45 @@ +""" +Expire old messages and addresses. + +""" + import os import shutil import sys -import time -from collections import namedtuple from datetime import datetime from stat import S_ISREG from chatmaild.config import read_config -# delete already seen big mails after 7 days, in the INBOX -# 2 0 * * * vmail find {{ config.mailboxes_dir }} -path '*/cur/*' -mtime +{{ config.delete_large_after }} -size +200k -type f -delete -# # delete all mails after {{ config.delete_mails_after }} days, in the Inbox -# 3 0 * * * vmail find {{ config.mailboxes_dir }} -name 'maildirsize' -type f -delete - +# XXX maildirsize (used by dovecot quota) needs to be removed after removing files -FileEntry = namedtuple("FileEntry", ["relpath", "mtime", "size"]) -dayseconds = 24 * 60 * 60 -monthseconds = dayseconds * 30 +class FileEntry: + def __init__(self, relpath, mtime, size): + self.relpath = relpath + self.mtime = mtime + self.size = size -def joinpath(name, extra): - return name + "/" + extra + def __repr__(self): + return f"" + def fmt_size(self): + return f"{int(self.size/1000):5.0f}K" -def D(timestamp, now=datetime.utcnow().timestamp()): - diff_seconds = int(now) - int(timestamp) - # assert diff_seconds >= 0, (int(timestamp), int(now)) - return f"{int(diff_seconds / dayseconds):2.0f}d" + def fmt_since(self, now): + diff_seconds = int(now) - int(self.mtime) + return f"{int(diff_seconds / 86400):2.0f}d" + def __eq__(self, other): + return ( + self.relpath == other.relpath + and self.size == other.size + and self.mtime == other.mtime + ) -def K(size): - return f"{int(size/1000):6.0f}K" - -def M(size): - return f"{int(size/1000000):6.0f}M" +def joinpath(name, extra): + return name + "/" + extra class Stats: @@ -53,9 +58,16 @@ def iter_mailboxes(self): class MailboxStat: def __init__(self, mailboxdir): self.mailboxdir = mailboxdir = str(mailboxdir) + # all detected messages in cur/new/tmp folders self.messages = [] + + # all detected files in mailbox top dir self.extrafiles = [] + # total size of all detected files + self.totalsize = 0 + + # scan all relevant files (without recursion) for name in os.listdir(mailboxdir): fpath = joinpath(mailboxdir, name) if name in ("cur", "new", "tmp"): @@ -66,11 +78,13 @@ def __init__(self, mailboxdir): self.messages.append( FileEntry(relpath, mtime=st.st_mtime, size=st.st_size) ) + self.totalsize += st.st_size else: st = os.stat(fpath) if S_ISREG(st.st_mode): self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size)) - self.extrafiles.sort(key=lambda x: x.size, reverse=True) + self.totalsize += st.st_size + self.extrafiles.sort(key=lambda x: -x.size) @property def last_login(self): @@ -78,101 +92,8 @@ def last_login(self): if entry.relpath == "password": return entry.mtime - def get_messages(self, prefix=""): - l = [] - for entry in self.messages: - if entry.relpath.startswith(prefix): - l.append(entry) - return l - - def get_extra_files(self): - return list(self.extrafiles) - - def get_file_entry(self, name): - for entry in self.extrafiles: - if name == entry.relapth: - return entry - - -class XXXStats: - def __init__(self): - self.sum_extra = 0 - self.sum_all_messages = 0 - self.logins = [] - self.messages = [] - - def analyze(self, statscache): - print("start") - for mailbox in statscache.cache: - mbox_cache = statscache.cache[mailbox] - if "password" not in mbox_cache: - continue - self.logins.append(mbox_cache["password"][0]) - for relpath, (mtime, size) in mbox_cache.items(): - if relpath[:4] in ("cur/", "new/", "tmp/"): - self.sum_all_messages += size - entry = FileEntry(relpath=relpath, mtime=mtime, size=size) - self.messages.append(entry) - else: - self.sum_extra += size - - def dump_summary(self): - now = datetime.utcnow().timestamp() - - print(f"size of everything: {M(self.sum_extra + self.sum_all_messages)}") - print(f"size all messages: {M(self.sum_all_messages)}") - percent = self.sum_extra / (self.sum_extra + self.sum_all_messages) * 100 - print(f"size extra files: {M(self.sum_extra)} ({percent:.2f}%)") - for size in (100000, 200000, 500000, 1000000, 5000000): - all_of_size = sum( - x.size - for x in self.messages - if x.size > size and x.relpath.startswith("cur") - ) - percent = all_of_size / self.sum_all_messages * 100 - print(f"size seen {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)") - for size in (100000, 200000, 500000, 1000000, 5000000): - all_of_size = sum( - x.size - for x in self.messages - if x.size > size and x.mtime < now - 2 * dayseconds - ) - percent = all_of_size / self.sum_all_messages * 100 - print( - f"size 2day-old {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)" - ) - for size in (100000, 200000, 500000, 1000000, 5000000): - all_of_size = sum( - x.size - for x in self.messages - if x.size > size - and x.relpath.startswith("cur") - and x.mtime < now - 7 * dayseconds - ) - percent = all_of_size / self.sum_all_messages * 100 - print( - f"size seen 7-day old {K(size)} messages: {M(all_of_size)} ({percent:.2f}%)" - ) - - print() - - num_logins = len(self.logins) - monthly_active = len([x for x in self.logins if x >= now - monthseconds]) - daily_active = len([x for x in self.logins if x >= now - dayseconds]) - stale = num_logins - monthly_active - - def p(num): - return f"({num/num_logins * 100:.2f}%)" - - print(f"all logins: {K(num_logins)}") - print(f"monthly active: {K(monthly_active)} {p(monthly_active)}") - print(f">1m old logins: {K(stale)} {p(stale)}") - print(f"daily active: {K(daily_active)} {p(daily_active)}") - - -def run_expire(config, basedir, dry=False, maxnum=None): - now = time.time() +def run_expire(config, basedir, now, dry=True, maxnum=None): stat = Stats(basedir, maxnum=maxnum) stat.iter_mailboxes() cutoff_date_without_login = now - int(config.delete_inactive_users_after) * 86400 @@ -188,7 +109,9 @@ def rmtree(path): def unlink(mailboxdir, message): if dry: relpath = os.path.basename(mailboxdir) + message.relpath - print(f"would remove {D(message.mtime)} {K(message.size)} {relpath}") + print( + f"would remove {message.fmt_since(now)} {message.fmt_size()} {relpath}" + ) else: os.unlink(path) @@ -217,7 +140,9 @@ def unlink(mailboxdir, message): def main(): cfgpath, basedir, maxnum = sys.argv[1:] config = read_config(cfgpath) - run_expire(config, basedir, dry=True, maxnum=int(maxnum)) + now = datetime.utcnow().timestamp() + now = datetime(2025, 9, 9).timestamp() + run_expire(config, basedir, maxnum=int(maxnum), now=now) if __name__ == "__main__": diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py new file mode 100644 index 00000000..91a59302 --- /dev/null +++ b/chatmaild/src/chatmaild/fsreport.py @@ -0,0 +1,150 @@ +import os +import sys +from datetime import datetime + +from chatmaild.config import read_config +from chatmaild.expire import FileEntry, Stats, joinpath + +DAYSECONDS = 24 * 60 * 60 +MONTHSECONDS = DAYSECONDS * 30 + + +def D(timestamp, now=datetime.utcnow().timestamp()): + diff_seconds = int(now) - int(timestamp) + # assert diff_seconds >= 0, (int(timestamp), int(now)) + return f"{int(diff_seconds / DAYSECONDS):2.0f}d" + + +def K(size): + if size < 1000: + return f"{size:5.0f}" + return f"{int(size/1000):5.0f}K" + + +def M(size): + return f"{int(size/1000000):5.0f}M" + + +def H(size): + if size < 1000 * 1000: + return K(size) + if size < 1000 * 1000 * 1000: + return M(size) + return f"{size/1000000000:2.2f}G" + + +class Report: + def __init__(self, stats, now): + self.sum_extra = 0 + self.sum_all_messages = 0 + self.messages = [] + self.user_logins = [] + self.ci_logins = [] + self.stats = stats + self.now = now + + for mailbox in stats.mailboxes: + last_login = mailbox.last_login + if last_login: + if os.path.basename(mailbox.mailboxdir)[:3] == "ci-": + self.ci_logins.append(last_login) + else: + self.user_logins.append(last_login) + for entry in mailbox.messages: + new = FileEntry( + relpath=joinpath( + os.path.basename(mailbox.mailboxdir), entry.relpath + ), + mtime=entry.mtime, + size=entry.size, + ) + self.messages.append(new) + self.sum_all_messages += entry.size + + for entry in mailbox.extrafiles: + self.sum_extra += entry.size + + def dump_summary(self): + reports = [] + + def print_messages(title, messages, num, rep=True): + print() + allsize = sum(x.size for x in messages) + if rep: + reports.append((title, allsize)) + + print(f"## {title} [total: {H(allsize)}]") + for entry in messages[:num]: + print(f"{K(entry.size)} {D(entry.mtime)} {entry.relpath}") + + for kind in ("cur", "new"): + biggest = list(self.messages) + biggest.sort(key=lambda x: (-x.size, x.mtime)) + print_messages(f"Biggest {kind} messages", biggest, 10, rep=False) + + oldest = self.messages + mode = "cur" + for maxsize in (160000, 500000, 2000000, 10000000): + oldest = [x for x in oldest if x.size > maxsize and mode in x.relpath] + oldest.sort(key=lambda x: x.mtime) + print_messages(f"{mode} folders oldest > {K(maxsize)} messages", oldest, 10) + + # list all 160K files of people who haven't logged in for a while + messages = [] + cutoff_date_login = self.now - 30 * DAYSECONDS + for mstat in self.stats.mailboxes: + if mstat.last_login and mstat.last_login < cutoff_date_login: + for msg in mstat.messages: + if msg.size > 160000: + messages.append(msg) + + messages.sort(key=lambda x: x.size) + print_messages(">30-day last_login new >160K", messages, 10) + + print() + print("## Overall mailbox storage use analysis") + print(f"Mailbox data: {M(self.sum_extra + self.sum_all_messages)}") + print(f"Messages : {M(self.sum_all_messages)}") + percent = self.sum_extra / (self.sum_extra + self.sum_all_messages) * 100 + print(f"Extra files : {M(self.sum_extra)} ({percent:.2f}%)") + + for title, size in reports: + percent = size / self.sum_all_messages * 100 + print(f"{title:38} {M(size)} ({percent:.2f}%)") + + all_logins = len(self.user_logins) + len(self.ci_logins) + num_logins = len(self.user_logins) + ci_logins = len(self.ci_logins) + + def p(num): + return f"({num/num_logins * 100:2.2f}%)" + + print() + print(f"## Login stats, from date reference {datetime.fromtimestamp(self.now)}") + print(f"all: {K(all_logins)}") + print(f"non-ci: {K(num_logins)}") + print(f"ci: {K(ci_logins)}") + for days in (1, 10, 30, 40, 80, 100, 150): + active = len( + [x for x in self.user_logins if x >= self.now - days * DAYSECONDS] + ) + print(f"last {days:3} days: {K(active)} {p(active)}") + + +def run_report(config, basedir, maxnum=None, now=None): + stats = Stats(basedir, maxnum=maxnum) + stats.iter_mailboxes() + rep = Report(stats, now=now) + rep.dump_summary() + + +def main(): + cfgpath, basedir, maxnum = sys.argv[1:] + config = read_config(cfgpath) + now = datetime.utcnow().timestamp() + now = datetime(2025, 9, 9).timestamp() + run_report(config, basedir, maxnum=int(maxnum), now=now) + + +if __name__ == "__main__": + main() diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py index a2a40a57..3b290a79 100644 --- a/chatmaild/src/chatmaild/tests/test_expire.py +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -1,4 +1,14 @@ -from chatmaild.expire import MailboxStat +import random + +from chatmaild.expire import FileEntry, MailboxStat + + +def test_filentry_ordering(): + l = [FileEntry(f"x{i}", size=i + 10, mtime=1000 - i) for i in range(10)] + sorted = list(l) + random.shuffle(l) + l.sort(key=lambda x: x.size) + assert l == sorted def test_stats_mailbox(tmp_path): @@ -22,18 +32,15 @@ def test_stats_mailbox(tmp_path): assert mbox.last_login == password.stat().st_mtime assert len(mbox.messages) == 2 - seen = mbox.get_messages("cur") - assert len(seen) == 1 - assert seen[0].size == 3 + msgs = list(mbox.messages) + assert len(msgs) == 2 + assert msgs[0].size == 3 # cur - new = mbox.get_messages("new") - assert len(new) == 1 - assert new[0].size == 6 + assert msgs[1].size == 6 # new extra = mailboxdir.joinpath("large") extra.write_text("x" * 1000) mailboxdir.joinpath("index-something").write_text("123") mbox = MailboxStat(tmp_path) - extrafiles = mbox.get_extra_files() - assert len(extrafiles) == 3 - assert extrafiles[0].size == 1000 + assert len(mbox.extrafiles) == 3 + assert mbox.extrafiles[0].size == 1000 From 64a46d69d45bf807f54bc64a408416a663657b47 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Sun, 14 Sep 2025 15:02:45 +0200 Subject: [PATCH 04/31] refactor and write tests for overall expiry/report runs --- chatmaild/src/chatmaild/expire.py | 105 +++++++++++-------- chatmaild/src/chatmaild/fsreport.py | 56 +++++----- chatmaild/src/chatmaild/tests/test_expire.py | 82 +++++++++++---- 3 files changed, 143 insertions(+), 100 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 341cfbae..93b46bb0 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -11,8 +11,6 @@ from chatmaild.config import read_config -# XXX maildirsize (used by dovecot quota) needs to be removed after removing files - class FileEntry: def __init__(self, relpath, mtime, size): @@ -48,14 +46,19 @@ def __init__(self, basedir, maxnum=None): self.mailboxes = [] self.maxnum = maxnum - def iter_mailboxes(self): - for mailbox in os.listdir(self.basedir)[: self.maxnum]: - if "@" in mailbox: - mailboxdir = joinpath(self.basedir, mailbox) - self.mailboxes.append(MailboxStat(mailboxdir)) + def iter_mailboxes(self, callback=None): + for name in os.listdir(self.basedir)[: self.maxnum]: + if "@" in name: + mailboxdir = joinpath(self.basedir, name) + mailbox = MailboxStat(mailboxdir) + self.mailboxes.append(mailbox) + if callback is not None: + callback(mailbox) class MailboxStat: + last_login = None + def __init__(self, mailboxdir): self.mailboxdir = mailboxdir = str(mailboxdir) # all detected messages in cur/new/tmp folders @@ -83,66 +86,76 @@ def __init__(self, mailboxdir): st = os.stat(fpath) if S_ISREG(st.st_mode): self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size)) + if name == "password": + self.last_login = st.st_mtime self.totalsize += st.st_size self.extrafiles.sort(key=lambda x: -x.size) - @property - def last_login(self): - for entry in self.extrafiles: - if entry.relpath == "password": - return entry.mtime - -def run_expire(config, basedir, now, dry=True, maxnum=None): - stat = Stats(basedir, maxnum=maxnum) - stat.iter_mailboxes() - cutoff_date_without_login = now - int(config.delete_inactive_users_after) * 86400 - cutoff_date_mails = now - int(config.delete_mails_after) * 86400 - cutoff_date_large_mails = now - int(config.delete_large_after) * 86400 +class Expiry: + def __init__(self, config, stat, dry, now): + self.config = config + self.dry = dry + self.now = now - def rmtree(path): - if dry: + def rmtree(self, path): + if not self.dry: print("would remove mailbox", path) else: shutil.rmtree(path, ignore_errors=True) - def unlink(mailboxdir, message): - if dry: - relpath = os.path.basename(mailboxdir) + message.relpath - print( - f"would remove {message.fmt_since(now)} {message.fmt_size()} {relpath}" - ) + def unlink(self, mailboxdir, relpath): + path = joinpath(mailboxdir, relpath) + if not self.dry: + for message in self.messages: + if relpath == message.relpath: + print( + f"would remove {message.fmt_since(self.now)} {message.fmt_size()} {path}" + ) + break else: - os.unlink(path) + try: + os.unlink(path) + except FileNotFoundError: + pass # it's gone already, fine + + def process_mailbox_stat(self, mbox): + cutoff_without_login = ( + self.now - int(self.config.delete_inactive_users_after) * 86400 + ) + cutoff_mails = self.now - int(self.config.delete_mails_after) * 86400 + cutoff_large_mails = self.now - int(self.config.delete_large_after) * 86400 - for mbox in stat.mailboxes: changed = False - if mbox.last_login and mbox.last_login < cutoff_date_without_login: - rmtree(mbox.mailboxdir) - continue + if mbox.last_login and mbox.last_login < cutoff_without_login: + self.rmtree(mbox.mailboxdir) + return + for message in mbox.messages: - path = joinpath(mbox.mailboxdir, message.relpath) - if message.mtime < cutoff_date_mails: - unlink(mbox.mailboxdir, message) - elif message.size > 200000 and message.mtime < cutoff_date_large_mails: - unlink(mbox.mailboxdir, message) + if message.mtime < cutoff_mails: + self.unlink(mbox.mailboxdir, message.relpath) + elif message.size > 200000 and message.mtime < cutoff_large_mails: + self.unlink(mbox.mailboxdir, message.relpath) else: continue changed = True - if changed and not dry: - p = joinpath(mbox.mailboxdir, "maildirsize") - try: - os.unlink(p) - except FileNotFoundError: - pass + if changed: + self.unlink(mbox.mailboxdir, "maildirsize") -def main(): - cfgpath, basedir, maxnum = sys.argv[1:] +def main(args=None): + if args is None: + args = sys.argv[1:] + else: + args = list(map(str, args)) + cfgpath, basedir, maxnum = args config = read_config(cfgpath) now = datetime.utcnow().timestamp() now = datetime(2025, 9, 9).timestamp() - run_expire(config, basedir, maxnum=int(maxnum), now=now) + + stat = Stats(basedir, maxnum=int(maxnum)) + exp = Expiry(config, stat, dry=True, now=now) + stat.iter_mailboxes(exp.process_mailbox_stat) if __name__ == "__main__": diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index 91a59302..009a2af2 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -2,7 +2,6 @@ import sys from datetime import datetime -from chatmaild.config import read_config from chatmaild.expire import FileEntry, Stats, joinpath DAYSECONDS = 24 * 60 * 60 @@ -18,6 +17,8 @@ def D(timestamp, now=datetime.utcnow().timestamp()): def K(size): if size < 1000: return f"{size:5.0f}" + elif size < 10000: + return f"{size/1000:3.2f}K" return f"{int(size/1000):5.0f}K" @@ -43,26 +44,24 @@ def __init__(self, stats, now): self.stats = stats self.now = now - for mailbox in stats.mailboxes: - last_login = mailbox.last_login - if last_login: - if os.path.basename(mailbox.mailboxdir)[:3] == "ci-": - self.ci_logins.append(last_login) - else: - self.user_logins.append(last_login) - for entry in mailbox.messages: - new = FileEntry( - relpath=joinpath( - os.path.basename(mailbox.mailboxdir), entry.relpath - ), - mtime=entry.mtime, - size=entry.size, - ) - self.messages.append(new) - self.sum_all_messages += entry.size - - for entry in mailbox.extrafiles: - self.sum_extra += entry.size + def process_mailbox_stat(self, mailbox): + last_login = mailbox.last_login + if last_login: + if os.path.basename(mailbox.mailboxdir)[:3] == "ci-": + self.ci_logins.append(last_login) + else: + self.user_logins.append(last_login) + for entry in mailbox.messages: + new = FileEntry( + relpath=joinpath(os.path.basename(mailbox.mailboxdir), entry.relpath), + mtime=entry.mtime, + size=entry.size, + ) + self.messages.append(new) + self.sum_all_messages += entry.size + + for entry in mailbox.extrafiles: + self.sum_extra += entry.size def dump_summary(self): reports = [] @@ -131,19 +130,14 @@ def p(num): print(f"last {days:3} days: {K(active)} {p(active)}") -def run_report(config, basedir, maxnum=None, now=None): - stats = Stats(basedir, maxnum=maxnum) - stats.iter_mailboxes() - rep = Report(stats, now=now) - rep.dump_summary() - - def main(): - cfgpath, basedir, maxnum = sys.argv[1:] - config = read_config(cfgpath) + basedir, maxnum = sys.argv[1:] now = datetime.utcnow().timestamp() now = datetime(2025, 9, 9).timestamp() - run_report(config, basedir, maxnum=int(maxnum), now=now) + stats = Stats(basedir, maxnum=int(maxnum)) + rep = Report(stats, now=now) + stats.iter_mailboxes(rep.process_mailbox_stat) + rep.dump_summary() if __name__ == "__main__": diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py index 3b290a79..2649fce5 100644 --- a/chatmaild/src/chatmaild/tests/test_expire.py +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -1,46 +1,82 @@ import random +from datetime import datetime +from pathlib import Path + +import pytest from chatmaild.expire import FileEntry, MailboxStat +from chatmaild.expire import main as expiry_main +from chatmaild.fsreport import Report, Stats +# XXX maildirsize (used by dovecot quota) needs to be removed after removing files -def test_filentry_ordering(): - l = [FileEntry(f"x{i}", size=i + 10, mtime=1000 - i) for i in range(10)] - sorted = list(l) - random.shuffle(l) - l.sort(key=lambda x: x.size) - assert l == sorted - -def test_stats_mailbox(tmp_path): - mailboxdir = tmp_path - password = mailboxdir.joinpath("password") +@pytest.fixture +def mailboxdir1(tmp_path): + mailboxdir1 = tmp_path.joinpath("mailbox1@example.org") + mailboxdir1.mkdir() + password = mailboxdir1.joinpath("password") password.write_text("xxx") - garbagedir = mailboxdir.joinpath("garbagedir") + garbagedir = mailboxdir1.joinpath("garbagedir") garbagedir.mkdir() - cur = mailboxdir.joinpath("cur") - new = mailboxdir.joinpath("new") + cur = mailboxdir1.joinpath("cur") + new = mailboxdir1.joinpath("new") cur.mkdir() msg_cur = cur.joinpath("msg1") msg_cur.write_text("xxx") new.mkdir() msg_new = new.joinpath("msg2") msg_new.write_text("xxx123") + return mailboxdir1 + + +@pytest.fixture +def mbox1(mailboxdir1): + return MailboxStat(mailboxdir1) - mbox = MailboxStat(tmp_path) - assert mbox.last_login == password.stat().st_mtime - assert len(mbox.messages) == 2 - msgs = list(mbox.messages) +def test_filentry_ordering(): + l = [FileEntry(f"x{i}", size=i + 10, mtime=1000 - i) for i in range(10)] + sorted = list(l) + random.shuffle(l) + l.sort(key=lambda x: x.size) + assert l == sorted + + +def test_stats_mailbox(mbox1): + password = Path(mbox1.mailboxdir).joinpath("password") + assert mbox1.last_login == password.stat().st_mtime + assert len(mbox1.messages) == 2 + + msgs = list(mbox1.messages) assert len(msgs) == 2 assert msgs[0].size == 3 # cur - assert msgs[1].size == 6 # new - extra = mailboxdir.joinpath("large") + extra = Path(mbox1.mailboxdir).joinpath("large-extra") extra.write_text("x" * 1000) - mailboxdir.joinpath("index-something").write_text("123") - mbox = MailboxStat(tmp_path) - assert len(mbox.extrafiles) == 3 - assert mbox.extrafiles[0].size == 1000 + Path(mbox1.mailboxdir).joinpath("index-something").write_text("123") + mbox2 = MailboxStat(mbox1.mailboxdir) + assert len(mbox2.extrafiles) == 3 + assert mbox2.extrafiles[0].size == 1000 + + # cope well with mailbox dirs that have no password (for whatever reason) + Path(mbox1.mailboxdir).joinpath("password").unlink() + mbox3 = MailboxStat(mbox1.mailboxdir) + assert mbox3.last_login is None + + +def test_report(mbox1): + now = datetime.utcnow().timestamp() + mailboxes_dir = Path(mbox1.mailboxdir).parent + stats = Stats(str(mailboxes_dir), maxnum=None) + rep = Report(stats, now=now) + stats.iter_mailboxes(rep.process_mailbox_stat) + rep.dump_summary() + + +def test_expiry(example_config, mbox1): + args = example_config._inipath, mbox1.mailboxdir, 10000 + expiry_main(args) From 8d9371e01b1410bf0b759e1f5f67ec4c95427414 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Sun, 14 Sep 2025 15:13:25 +0200 Subject: [PATCH 05/31] add basic command line parsing for expire + some streamlining --- chatmaild/src/chatmaild/expire.py | 130 ++++++++++++------- chatmaild/src/chatmaild/fsreport.py | 18 +-- chatmaild/src/chatmaild/tests/test_expire.py | 105 ++++++++++----- 3 files changed, 157 insertions(+), 96 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 93b46bb0..8cee2ab3 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -4,8 +4,8 @@ """ import os -import shutil import sys +from argparse import ArgumentParser from datetime import datetime from stat import S_ISREG @@ -13,13 +13,20 @@ class FileEntry: - def __init__(self, relpath, mtime, size): + def __init__(self, basedir, relpath, mtime, size): + self.basedir = basedir self.relpath = relpath self.mtime = mtime self.size = size def __repr__(self): - return f"" + return f"" + + def __str__(self): + return self.get_path() + + def get_path(self): + return joinpath(self.basedir, self.relpath) def fmt_size(self): return f"{int(self.size/1000):5.0f}K" @@ -49,8 +56,8 @@ def __init__(self, basedir, maxnum=None): def iter_mailboxes(self, callback=None): for name in os.listdir(self.basedir)[: self.maxnum]: if "@" in name: - mailboxdir = joinpath(self.basedir, name) - mailbox = MailboxStat(mailboxdir) + basedir = joinpath(self.basedir, name) + mailbox = MailboxStat(basedir) self.mailboxes.append(mailbox) if callback is not None: callback(mailbox) @@ -59,8 +66,8 @@ def iter_mailboxes(self, callback=None): class MailboxStat: last_login = None - def __init__(self, mailboxdir): - self.mailboxdir = mailboxdir = str(mailboxdir) + def __init__(self, basedir): + self.basedir = basedir = str(basedir) # all detected messages in cur/new/tmp folders self.messages = [] @@ -71,55 +78,58 @@ def __init__(self, mailboxdir): self.totalsize = 0 # scan all relevant files (without recursion) - for name in os.listdir(mailboxdir): - fpath = joinpath(mailboxdir, name) + for name in os.listdir(basedir): + fpath = joinpath(basedir, name) if name in ("cur", "new", "tmp"): for msg_name in os.listdir(fpath): msg_path = joinpath(fpath, msg_name) st = os.stat(msg_path) relpath = joinpath(name, msg_name) self.messages.append( - FileEntry(relpath, mtime=st.st_mtime, size=st.st_size) + FileEntry( + self.basedir, relpath, mtime=st.st_mtime, size=st.st_size + ) ) self.totalsize += st.st_size else: st = os.stat(fpath) if S_ISREG(st.st_mode): - self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size)) + self.extrafiles.append( + FileEntry(self.basedir, name, st.st_mtime, st.st_size) + ) if name == "password": self.last_login = st.st_mtime self.totalsize += st.st_size self.extrafiles.sort(key=lambda x: -x.size) +def print_info(msg): + print(msg, file=sys.stderr) + + class Expiry: def __init__(self, config, stat, dry, now): self.config = config self.dry = dry self.now = now - - def rmtree(self, path): - if not self.dry: - print("would remove mailbox", path) - else: - shutil.rmtree(path, ignore_errors=True) - - def unlink(self, mailboxdir, relpath): - path = joinpath(mailboxdir, relpath) - if not self.dry: - for message in self.messages: - if relpath == message.relpath: - print( - f"would remove {message.fmt_since(self.now)} {message.fmt_size()} {path}" - ) - break - else: - try: - os.unlink(path) - except FileNotFoundError: - pass # it's gone already, fine + self.del_files = [] + self.del_mailboxes = [] + + def perform_removes(self): + for mboxdir in self.del_mailboxes: + print_info(f"removing {mboxdir}") + if not self.dry: + self.rmtree(mboxdir) + for path in self.del_files: + print_info(f"removing {path}") + if not self.dry: + try: + os.unlink(path) + except FileNotFoundError: + pass # it's gone already, fine def process_mailbox_stat(self, mbox): + print_info(f"processing expiry for {mbox.basedir}") cutoff_without_login = ( self.now - int(self.config.delete_inactive_users_after) * 86400 ) @@ -128,35 +138,55 @@ def process_mailbox_stat(self, mbox): changed = False if mbox.last_login and mbox.last_login < cutoff_without_login: - self.rmtree(mbox.mailboxdir) + self.del_mailboxes.append(mbox.basedir) return for message in mbox.messages: if message.mtime < cutoff_mails: - self.unlink(mbox.mailboxdir, message.relpath) + self.del_files.append(message.get_path()) elif message.size > 200000 and message.mtime < cutoff_large_mails: - self.unlink(mbox.mailboxdir, message.relpath) + self.del_files.append(message.get_path()) else: continue changed = True if changed: - self.unlink(mbox.mailboxdir, "maildirsize") - - -def main(args=None): - if args is None: - args = sys.argv[1:] - else: - args = list(map(str, args)) - cfgpath, basedir, maxnum = args - config = read_config(cfgpath) + self.del_files.append(joinpath(mbox.basedir, "maildirsize")) + + +def main(args): + """Expire mailboxes and messages according to chatmail config""" + parser = ArgumentParser(description=main.__doc__) + parser.add_argument( + "chatmail_ini", action="store", help="path pointing to chatmail.ini file" + ) + parser.add_argument( + "mailboxes_dir", + action="store", + help="path pointing to directory containing all mailbox directories", + ) + parser.add_argument( + "--maxnum", + default=None, + action="store", + help="maximum number of mailbxoes to iterate on", + ) + + parser.add_argument( + "--remove", + dest="remove", + action="store_true", + help="actually remove all expired files and dirs", + ) + args = parser.parse_args([str(x) for x in args]) + + config = read_config(args.chatmail_ini) now = datetime.utcnow().timestamp() - now = datetime(2025, 9, 9).timestamp() - - stat = Stats(basedir, maxnum=int(maxnum)) - exp = Expiry(config, stat, dry=True, now=now) + maxnum = int(args.maxnum) if args.maxnum else None + stat = Stats(args.mailboxes_dir, maxnum=maxnum) + exp = Expiry(config, stat, dry=not args.remove, now=now) stat.iter_mailboxes(exp.process_mailbox_stat) + exp.perform_removes() if __name__ == "__main__": - main() + main(sys.argv[1:]) diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index 009a2af2..3d846420 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -2,7 +2,7 @@ import sys from datetime import datetime -from chatmaild.expire import FileEntry, Stats, joinpath +from chatmaild.expire import Stats DAYSECONDS = 24 * 60 * 60 MONTHSECONDS = DAYSECONDS * 30 @@ -47,21 +47,13 @@ def __init__(self, stats, now): def process_mailbox_stat(self, mailbox): last_login = mailbox.last_login if last_login: - if os.path.basename(mailbox.mailboxdir)[:3] == "ci-": + if os.path.basename(mailbox.basedir)[:3] == "ci-": self.ci_logins.append(last_login) else: self.user_logins.append(last_login) - for entry in mailbox.messages: - new = FileEntry( - relpath=joinpath(os.path.basename(mailbox.mailboxdir), entry.relpath), - mtime=entry.mtime, - size=entry.size, - ) - self.messages.append(new) - self.sum_all_messages += entry.size - - for entry in mailbox.extrafiles: - self.sum_extra += entry.size + self.messages.extend(mailbox.messages) + self.sum_all_messages += sum(msg.size for msg in mailbox.messages) + self.sum_extra += sum(entry.size for entry in mailbox.extrafiles) def dump_summary(self): reports = [] diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py index 2649fce5..ee6fb78b 100644 --- a/chatmaild/src/chatmaild/tests/test_expire.py +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -1,5 +1,7 @@ +import os import random from datetime import datetime +from fnmatch import fnmatch from pathlib import Path import pytest @@ -8,37 +10,46 @@ from chatmaild.expire import main as expiry_main from chatmaild.fsreport import Report, Stats -# XXX maildirsize (used by dovecot quota) needs to be removed after removing files +# XXX basedirsize (used by dovecot quota) needs to be removed after removing files @pytest.fixture -def mailboxdir1(tmp_path): - mailboxdir1 = tmp_path.joinpath("mailbox1@example.org") - mailboxdir1.mkdir() - password = mailboxdir1.joinpath("password") +def basedir1(tmp_path): + basedir1 = tmp_path.joinpath("mailbox1@example.org") + basedir1.mkdir() + password = basedir1.joinpath("password") password.write_text("xxx") + basedir1.joinpath("maildirsize").write_text("xxx") - garbagedir = mailboxdir1.joinpath("garbagedir") + garbagedir = basedir1.joinpath("garbagedir") garbagedir.mkdir() - cur = mailboxdir1.joinpath("cur") - new = mailboxdir1.joinpath("new") - cur.mkdir() - msg_cur = cur.joinpath("msg1") - msg_cur.write_text("xxx") - new.mkdir() - msg_new = new.joinpath("msg2") - msg_new.write_text("xxx123") - return mailboxdir1 + create_new_messages(basedir1, ["cur/msg1"], size=500) + create_new_messages(basedir1, ["new/msg2"], size=600) + return basedir1 + + +def create_new_messages(basedir, relpaths, size=1000, days=0): + now = datetime.utcnow().timestamp() + + for relpath in relpaths: + msg_path = Path(basedir).joinpath(relpath) + msg_path.parent.mkdir(parents=True, exist_ok=True) + msg_path.write_text("x" * size) + # accessed now, modified N days ago + os.utime(msg_path, (now, now - days * 86400)) @pytest.fixture -def mbox1(mailboxdir1): - return MailboxStat(mailboxdir1) +def mbox1(basedir1): + return MailboxStat(basedir1) -def test_filentry_ordering(): - l = [FileEntry(f"x{i}", size=i + 10, mtime=1000 - i) for i in range(10)] +def test_filentry_ordering(tmp_path): + l = [ + FileEntry(str(tmp_path), f"x{i}", size=i + 10, mtime=1000 - i) + for i in range(10) + ] sorted = list(l) random.shuffle(l) l.sort(key=lambda x: x.size) @@ -46,37 +57,65 @@ def test_filentry_ordering(): def test_stats_mailbox(mbox1): - password = Path(mbox1.mailboxdir).joinpath("password") + password = Path(mbox1.basedir).joinpath("password") assert mbox1.last_login == password.stat().st_mtime assert len(mbox1.messages) == 2 msgs = list(mbox1.messages) assert len(msgs) == 2 - assert msgs[0].size == 3 # cur - assert msgs[1].size == 6 # new - - extra = Path(mbox1.mailboxdir).joinpath("large-extra") - extra.write_text("x" * 1000) - Path(mbox1.mailboxdir).joinpath("index-something").write_text("123") - mbox2 = MailboxStat(mbox1.mailboxdir) - assert len(mbox2.extrafiles) == 3 + assert msgs[0].size == 500 # cur + assert msgs[1].size == 600 # new + + create_new_messages(mbox1.basedir, ["large-extra"], size=1000) + create_new_messages(mbox1.basedir, ["index-something"], size=3) + mbox2 = MailboxStat(mbox1.basedir) + assert len(mbox2.extrafiles) == 4 assert mbox2.extrafiles[0].size == 1000 # cope well with mailbox dirs that have no password (for whatever reason) - Path(mbox1.mailboxdir).joinpath("password").unlink() - mbox3 = MailboxStat(mbox1.mailboxdir) + Path(mbox1.basedir).joinpath("password").unlink() + mbox3 = MailboxStat(mbox1.basedir) assert mbox3.last_login is None def test_report(mbox1): now = datetime.utcnow().timestamp() - mailboxes_dir = Path(mbox1.mailboxdir).parent + mailboxes_dir = Path(mbox1.basedir).parent stats = Stats(str(mailboxes_dir), maxnum=None) rep = Report(stats, now=now) stats.iter_mailboxes(rep.process_mailbox_stat) rep.dump_summary() -def test_expiry(example_config, mbox1): - args = example_config._inipath, mbox1.mailboxdir, 10000 +def test_expiry_cli_basic(example_config, mbox1): + args = example_config._inipath, Path(mbox1.basedir).parent + expiry_main(args) + + +def test_expiry_cli_old_files(capsys, example_config, mbox1): + args = example_config._inipath, Path(mbox1.basedir).parent + + relpaths_old = ["cur/msg_old1", "cur/msg_old1"] + cutoff_days = int(example_config.delete_mails_after) + 1 + create_new_messages(mbox1.basedir, relpaths_old, size=1000, days=cutoff_days) + + relpaths_large = ["cur/msg_old_large1", "new/msg_old_large2"] + cutoff_days = int(example_config.delete_large_after) + 1 + create_new_messages( + mbox1.basedir, relpaths_large, size=1000 * 300, days=cutoff_days + ) + + create_new_messages(mbox1.basedir, ["cur/shouldstay"], size=1000 * 300, days=1) + expiry_main(args) + out, err = capsys.readouterr() + + allpaths = relpaths_old + relpaths_large + ["maildirsize"] + for path in allpaths: + for line in err.split("\n"): + if fnmatch(line, f"removing*{path}"): + break + else: + pytest.fail(f"failed to remove {path}\n{err}") + + assert "shouldstay" not in err From 4be2bebcd7847d5ac3d776cfa10925b69e35d67c Mon Sep 17 00:00:00 2001 From: holger krekel Date: Sun, 14 Sep 2025 17:12:52 +0200 Subject: [PATCH 06/31] add argument parsing for reporting --- chatmaild/src/chatmaild/expire.py | 11 +++++++--- chatmaild/src/chatmaild/fsreport.py | 33 ++++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 8cee2ab3..2d3a4dd4 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -160,10 +160,12 @@ def main(args): "chatmail_ini", action="store", help="path pointing to chatmail.ini file" ) parser.add_argument( - "mailboxes_dir", - action="store", - help="path pointing to directory containing all mailbox directories", + "mailboxes_dir", action="store", help="path to directory of mailboxes" ) + parser.add_argument( + "--days", action="store", help="assume date to be days older than now" + ) + parser.add_argument( "--maxnum", default=None, @@ -181,6 +183,9 @@ def main(args): config = read_config(args.chatmail_ini) now = datetime.utcnow().timestamp() + if args.days: + now = now - 86400 * int(args.days) + maxnum = int(args.maxnum) if args.maxnum else None stat = Stats(args.mailboxes_dir, maxnum=maxnum) exp = Expiry(config, stat, dry=not args.remove, now=now) diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index 3d846420..2f4a1f98 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -1,5 +1,5 @@ import os -import sys +from argparse import ArgumentParser from datetime import datetime from chatmaild.expire import Stats @@ -122,11 +122,34 @@ def p(num): print(f"last {days:3} days: {K(active)} {p(active)}") -def main(): - basedir, maxnum = sys.argv[1:] +def main(args=None): + """Report about filesystem storage usage of all mailboxes and messages""" + parser = ArgumentParser(description=main.__doc__) + # parser.add_argument( + # "chatmail_ini", action="store", help="path pointing to chatmail.ini file" + # ) + parser.add_argument( + "mailboxes_dir", action="store", help="path to directory of mailboxes" + ) + parser.add_argument( + "--days", action="store", help="assume date to be days older than now" + ) + + parser.add_argument( + "--maxnum", + default=None, + action="store", + help="maximum number of mailbxoes to iterate on", + ) + + args = parser.parse_args([str(x) for x in args] if args else args) + now = datetime.utcnow().timestamp() - now = datetime(2025, 9, 9).timestamp() - stats = Stats(basedir, maxnum=int(maxnum)) + if args.days: + now = now - 86400 * int(args.days) + + maxnum = int(args.maxnum) if args.maxnum else None + stats = Stats(args.mailboxes_dir, maxnum=maxnum) rep = Report(stats, now=now) stats.iter_mailboxes(rep.process_mailbox_stat) rep.dump_summary() From 0dc319617ed9c5ad772780f97ad6cd86787bf44d Mon Sep 17 00:00:00 2001 From: holger krekel Date: Sun, 14 Sep 2025 22:48:17 +0200 Subject: [PATCH 07/31] fix comment --- chatmaild/src/chatmaild/fsreport.py | 2 +- cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index 2f4a1f98..c5f33e38 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -132,7 +132,7 @@ def main(args=None): "mailboxes_dir", action="store", help="path to directory of mailboxes" ) parser.add_argument( - "--days", action="store", help="assume date to be days older than now" + "--days", default=0, action="store", help="assume date to be days older than now" ) parser.add_argument( diff --git a/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 b/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 index dc9af7e0..0b2821ba 100644 --- a/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 +++ b/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 @@ -1,2 +1,2 @@ -# expire mailboxes and old or too large messages -2 0 * * * vmail /usr/local/lib/chatmaild/venv/bin/expire /usr/local/lib/chatmaild/chatmail.ini {config.mailboxes_dir} +# expire mailboxes, old and large messages +2 0 * * * vmail /usr/local/lib/chatmaild/venv/bin/expire /usr/local/lib/chatmaild/chatmail.ini {config.mailboxes_dir} --remove From e77215cf176e1c7ae884281ce8e4b48acd53fe7f Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 15 Sep 2025 13:09:44 +0200 Subject: [PATCH 08/31] strike superflous code --- chatmaild/src/chatmaild/expire.py | 47 +++++++------------ chatmaild/src/chatmaild/fsreport.py | 9 +++- .../tests/test_delete_inactive_users.py | 10 +++- chatmaild/src/chatmaild/tests/test_expire.py | 8 +--- 4 files changed, 35 insertions(+), 39 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 2d3a4dd4..f12b21b3 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -4,6 +4,7 @@ """ import os +import shutil import sys from argparse import ArgumentParser from datetime import datetime @@ -13,27 +14,19 @@ class FileEntry: - def __init__(self, basedir, relpath, mtime, size): - self.basedir = basedir + def __init__(self, relpath, mtime, size): self.relpath = relpath self.mtime = mtime self.size = size + def __hash__(self): + return hash(self.relpath) + def __repr__(self): return f"" - def __str__(self): - return self.get_path() - - def get_path(self): - return joinpath(self.basedir, self.relpath) - - def fmt_size(self): - return f"{int(self.size/1000):5.0f}K" - - def fmt_since(self, now): - diff_seconds = int(now) - int(self.mtime) - return f"{int(diff_seconds / 86400):2.0f}d" + def get_path(self, basedir): + return joinpath(basedir, self.relpath) def __eq__(self, other): return ( @@ -50,7 +43,6 @@ def joinpath(name, extra): class Stats: def __init__(self, basedir, maxnum=None): self.basedir = str(basedir) - self.mailboxes = [] self.maxnum = maxnum def iter_mailboxes(self, callback=None): @@ -58,7 +50,6 @@ def iter_mailboxes(self, callback=None): if "@" in name: basedir = joinpath(self.basedir, name) mailbox = MailboxStat(basedir) - self.mailboxes.append(mailbox) if callback is not None: callback(mailbox) @@ -86,17 +77,13 @@ def __init__(self, basedir): st = os.stat(msg_path) relpath = joinpath(name, msg_name) self.messages.append( - FileEntry( - self.basedir, relpath, mtime=st.st_mtime, size=st.st_size - ) + FileEntry(relpath, mtime=st.st_mtime, size=st.st_size) ) self.totalsize += st.st_size else: st = os.stat(fpath) if S_ISREG(st.st_mode): - self.extrafiles.append( - FileEntry(self.basedir, name, st.st_mtime, st.st_size) - ) + self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size)) if name == "password": self.last_login = st.st_mtime self.totalsize += st.st_size @@ -108,8 +95,9 @@ def print_info(msg): class Expiry: - def __init__(self, config, stat, dry, now): + def __init__(self, config, stats, dry, now): self.config = config + self.stats = stats self.dry = dry self.now = now self.del_files = [] @@ -119,13 +107,14 @@ def perform_removes(self): for mboxdir in self.del_mailboxes: print_info(f"removing {mboxdir}") if not self.dry: - self.rmtree(mboxdir) + shutil.rmtree(mboxdir) for path in self.del_files: print_info(f"removing {path}") if not self.dry: try: os.unlink(path) except FileNotFoundError: + print_info(f"delete failed, file vanished? {path}") pass # it's gone already, fine def process_mailbox_stat(self, mbox): @@ -143,9 +132,9 @@ def process_mailbox_stat(self, mbox): for message in mbox.messages: if message.mtime < cutoff_mails: - self.del_files.append(message.get_path()) + self.del_files.append(joinpath(mbox.basedir, message.relpath)) elif message.size > 200000 and message.mtime < cutoff_large_mails: - self.del_files.append(message.get_path()) + self.del_files.append(joinpath(mbox.basedir, message.relpath)) else: continue changed = True @@ -187,9 +176,9 @@ def main(args): now = now - 86400 * int(args.days) maxnum = int(args.maxnum) if args.maxnum else None - stat = Stats(args.mailboxes_dir, maxnum=maxnum) - exp = Expiry(config, stat, dry=not args.remove, now=now) - stat.iter_mailboxes(exp.process_mailbox_stat) + stats = Stats(args.mailboxes_dir, maxnum=maxnum) + exp = Expiry(config, stats, dry=not args.remove, now=now) + stats.iter_mailboxes(exp.process_mailbox_stat) exp.perform_removes() diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index c5f33e38..e8cb7e6b 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -39,6 +39,7 @@ def __init__(self, stats, now): self.sum_extra = 0 self.sum_all_messages = 0 self.messages = [] + self.mailboxes = [] self.user_logins = [] self.ci_logins = [] self.stats = stats @@ -52,6 +53,7 @@ def process_mailbox_stat(self, mailbox): else: self.user_logins.append(last_login) self.messages.extend(mailbox.messages) + self.mailboxes.append(mailbox) self.sum_all_messages += sum(msg.size for msg in mailbox.messages) self.sum_extra += sum(entry.size for entry in mailbox.extrafiles) @@ -83,7 +85,7 @@ def print_messages(title, messages, num, rep=True): # list all 160K files of people who haven't logged in for a while messages = [] cutoff_date_login = self.now - 30 * DAYSECONDS - for mstat in self.stats.mailboxes: + for mstat in self.mailboxes: if mstat.last_login and mstat.last_login < cutoff_date_login: for msg in mstat.messages: if msg.size > 160000: @@ -132,7 +134,10 @@ def main(args=None): "mailboxes_dir", action="store", help="path to directory of mailboxes" ) parser.add_argument( - "--days", default=0, action="store", help="assume date to be days older than now" + "--days", + default=0, + action="store", + help="assume date to be days older than now", ) parser.add_argument( diff --git a/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py b/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py index a7c74a9e..7257fb85 100644 --- a/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py +++ b/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py @@ -1,7 +1,7 @@ import time from chatmaild.doveauth import AuthDictProxy -from chatmaild.expire import run_expire +from chatmaild.expire import main as main_expire def test_login_timestamps(example_config): @@ -45,7 +45,13 @@ def create_user(addr, last_login): for addr in to_remove: assert example_config.get_user(addr).maildir.exists() - run_expire(example_config, example_config.mailboxes_dir) + main_expire( + args=[ + "--remove", + str(example_config._inipath), + str(example_config.mailboxes_dir), + ] + ) for p in example_config.mailboxes_dir.iterdir(): assert not p.name.startswith("old") diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py index ee6fb78b..9c760064 100644 --- a/chatmaild/src/chatmaild/tests/test_expire.py +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -46,10 +46,7 @@ def mbox1(basedir1): def test_filentry_ordering(tmp_path): - l = [ - FileEntry(str(tmp_path), f"x{i}", size=i + 10, mtime=1000 - i) - for i in range(10) - ] + l = [FileEntry(f"x{i}", size=i + 10, mtime=1000 - i) for i in range(10)] sorted = list(l) random.shuffle(l) l.sort(key=lambda x: x.size) @@ -93,8 +90,6 @@ def test_expiry_cli_basic(example_config, mbox1): def test_expiry_cli_old_files(capsys, example_config, mbox1): - args = example_config._inipath, Path(mbox1.basedir).parent - relpaths_old = ["cur/msg_old1", "cur/msg_old1"] cutoff_days = int(example_config.delete_mails_after) + 1 create_new_messages(mbox1.basedir, relpaths_old, size=1000, days=cutoff_days) @@ -107,6 +102,7 @@ def test_expiry_cli_old_files(capsys, example_config, mbox1): create_new_messages(mbox1.basedir, ["cur/shouldstay"], size=1000 * 300, days=1) + args = example_config._inipath, Path(mbox1.basedir).parent, "--remove" expiry_main(args) out, err = capsys.readouterr() From a0349863a82cfc49cd4758cbd1662ddb54cadfea Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 15 Sep 2025 13:15:15 +0200 Subject: [PATCH 09/31] more streamline --- chatmaild/src/chatmaild/expire.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index f12b21b3..0bd9aa12 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -25,9 +25,6 @@ def __hash__(self): def __repr__(self): return f"" - def get_path(self, basedir): - return joinpath(basedir, self.relpath) - def __eq__(self, other): return ( self.relpath == other.relpath @@ -118,7 +115,6 @@ def perform_removes(self): pass # it's gone already, fine def process_mailbox_stat(self, mbox): - print_info(f"processing expiry for {mbox.basedir}") cutoff_without_login = ( self.now - int(self.config.delete_inactive_users_after) * 86400 ) From f32c5c5c1af16f1ebec8d4f98189519b228c7983 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 15 Sep 2025 13:22:03 +0200 Subject: [PATCH 10/31] don't globally collect files anymore to avoid using growing-with-number-of-mailboxes ram --- chatmaild/src/chatmaild/expire.py | 39 ++++++++++++++----------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 0bd9aa12..5a67b26c 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -97,22 +97,19 @@ def __init__(self, config, stats, dry, now): self.stats = stats self.dry = dry self.now = now - self.del_files = [] - self.del_mailboxes = [] - - def perform_removes(self): - for mboxdir in self.del_mailboxes: - print_info(f"removing {mboxdir}") - if not self.dry: - shutil.rmtree(mboxdir) - for path in self.del_files: - print_info(f"removing {path}") - if not self.dry: - try: - os.unlink(path) - except FileNotFoundError: - print_info(f"delete failed, file vanished? {path}") - pass # it's gone already, fine + + def remove_mailbox(self, mboxdir): + print_info(f"removing {mboxdir}") + if not self.dry: + shutil.rmtree(mboxdir) + + def remove_file(self, path): + print_info(f"removing {path}") + if not self.dry: + try: + os.unlink(path) + except FileNotFoundError: + print_info(f"file not found/vanished {path}") def process_mailbox_stat(self, mbox): cutoff_without_login = ( @@ -123,19 +120,20 @@ def process_mailbox_stat(self, mbox): changed = False if mbox.last_login and mbox.last_login < cutoff_without_login: - self.del_mailboxes.append(mbox.basedir) + self.remove_mailbox(mbox.basedir) return + os.chdir(mbox.basedir) for message in mbox.messages: if message.mtime < cutoff_mails: - self.del_files.append(joinpath(mbox.basedir, message.relpath)) + self.remove_file(message.relpath) elif message.size > 200000 and message.mtime < cutoff_large_mails: - self.del_files.append(joinpath(mbox.basedir, message.relpath)) + self.remove_file(message.relpath) else: continue changed = True if changed: - self.del_files.append(joinpath(mbox.basedir, "maildirsize")) + self.remove_file("maildirsize") def main(args): @@ -175,7 +173,6 @@ def main(args): stats = Stats(args.mailboxes_dir, maxnum=maxnum) exp = Expiry(config, stats, dry=not args.remove, now=now) stats.iter_mailboxes(exp.process_mailbox_stat) - exp.perform_removes() if __name__ == "__main__": From 01a2a878134f7a60fc91568a62f47e13a5fbb1bc Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 15 Sep 2025 13:41:16 +0200 Subject: [PATCH 11/31] no reporting by default, and adding a summary line --- chatmaild/src/chatmaild/expire.py | 38 +++++++++++++++++--- chatmaild/src/chatmaild/tests/test_expire.py | 2 +- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 5a67b26c..81d1b4ea 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -6,6 +6,7 @@ import os import shutil import sys +import time from argparse import ArgumentParser from datetime import datetime from stat import S_ISREG @@ -92,24 +93,34 @@ def print_info(msg): class Expiry: - def __init__(self, config, stats, dry, now): + def __init__(self, config, stats, dry, now, verbose): self.config = config self.stats = stats self.dry = dry self.now = now + self.verbose = verbose + self.del_mboxes = 0 + self.all_mboxes = 0 + self.del_files = 0 + self.all_files = 0 + self.start = time.time() def remove_mailbox(self, mboxdir): - print_info(f"removing {mboxdir}") + if self.verbose: + print_info(f"removing {mboxdir}") if not self.dry: shutil.rmtree(mboxdir) + self.del_mboxes += 1 def remove_file(self, path): - print_info(f"removing {path}") + if self.verbose: + print_info(f"removing {path}") if not self.dry: try: os.unlink(path) except FileNotFoundError: print_info(f"file not found/vanished {path}") + self.del_files += 1 def process_mailbox_stat(self, mbox): cutoff_without_login = ( @@ -118,12 +129,15 @@ def process_mailbox_stat(self, mbox): cutoff_mails = self.now - int(self.config.delete_mails_after) * 86400 cutoff_large_mails = self.now - int(self.config.delete_large_after) * 86400 + self.all_mboxes += 1 changed = False if mbox.last_login and mbox.last_login < cutoff_without_login: self.remove_mailbox(mbox.basedir) return + # all to-be-removed files are relative to the mailbox basedir os.chdir(mbox.basedir) + self.all_files += len(mbox.messages) for message in mbox.messages: if message.mtime < cutoff_mails: self.remove_file(message.relpath) @@ -135,6 +149,13 @@ def process_mailbox_stat(self, mbox): if changed: self.remove_file("maildirsize") + def get_summary(self): + return ( + f"Removed {self.del_mboxes} out of {self.all_mboxes} mailboxes " + f"and {self.del_files} out of {self.all_files} files " + f"in {time.time()-self.start:2.2f} seconds" + ) + def main(args): """Expire mailboxes and messages according to chatmail config""" @@ -155,6 +176,12 @@ def main(args): action="store", help="maximum number of mailbxoes to iterate on", ) + parser.add_argument( + "-v", + dest="verbose", + action="store_true", + help="print out removed files and mailboxes", + ) parser.add_argument( "--remove", @@ -170,9 +197,10 @@ def main(args): now = now - 86400 * int(args.days) maxnum = int(args.maxnum) if args.maxnum else None - stats = Stats(args.mailboxes_dir, maxnum=maxnum) - exp = Expiry(config, stats, dry=not args.remove, now=now) + stats = Stats(os.path.abspath(args.mailboxes_dir), maxnum=maxnum) + exp = Expiry(config, stats, dry=not args.remove, now=now, verbose=args.verbose) stats.iter_mailboxes(exp.process_mailbox_stat) + print(exp.get_summary()) if __name__ == "__main__": diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py index 9c760064..af8dc7a1 100644 --- a/chatmaild/src/chatmaild/tests/test_expire.py +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -102,7 +102,7 @@ def test_expiry_cli_old_files(capsys, example_config, mbox1): create_new_messages(mbox1.basedir, ["cur/shouldstay"], size=1000 * 300, days=1) - args = example_config._inipath, Path(mbox1.basedir).parent, "--remove" + args = example_config._inipath, Path(mbox1.basedir).parent, "--remove", "-v" expiry_main(args) out, err = capsys.readouterr() From a18c582f170b1db7835930a732cc038d628d25a3 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 15 Sep 2025 14:05:16 +0200 Subject: [PATCH 12/31] further reduce code --- chatmaild/src/chatmaild/expire.py | 46 ++++++++----------------------- 1 file changed, 11 insertions(+), 35 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 81d1b4ea..3f7b3a9c 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -8,34 +8,13 @@ import sys import time from argparse import ArgumentParser +from collections import namedtuple from datetime import datetime from stat import S_ISREG from chatmaild.config import read_config - -class FileEntry: - def __init__(self, relpath, mtime, size): - self.relpath = relpath - self.mtime = mtime - self.size = size - - def __hash__(self): - return hash(self.relpath) - - def __repr__(self): - return f"" - - def __eq__(self, other): - return ( - self.relpath == other.relpath - and self.size == other.size - and self.mtime == other.mtime - ) - - -def joinpath(name, extra): - return name + "/" + extra +FileEntry = namedtuple("FileEntry", ("relpath", "mtime", "size")) class Stats: @@ -46,7 +25,7 @@ def __init__(self, basedir, maxnum=None): def iter_mailboxes(self, callback=None): for name in os.listdir(self.basedir)[: self.maxnum]: if "@" in name: - basedir = joinpath(self.basedir, name) + basedir = self.basedir + "/" + name mailbox = MailboxStat(basedir) if callback is not None: callback(mailbox) @@ -56,7 +35,7 @@ class MailboxStat: last_login = None def __init__(self, basedir): - self.basedir = basedir = str(basedir) + self.basedir = str(basedir) # all detected messages in cur/new/tmp folders self.messages = [] @@ -67,19 +46,16 @@ def __init__(self, basedir): self.totalsize = 0 # scan all relevant files (without recursion) - for name in os.listdir(basedir): - fpath = joinpath(basedir, name) + os.chdir(self.basedir) + for name in os.listdir("."): if name in ("cur", "new", "tmp"): - for msg_name in os.listdir(fpath): - msg_path = joinpath(fpath, msg_name) - st = os.stat(msg_path) - relpath = joinpath(name, msg_name) - self.messages.append( - FileEntry(relpath, mtime=st.st_mtime, size=st.st_size) - ) + for msg_name in os.listdir(name): + relpath = name + "/" + msg_name + st = os.stat(relpath) + self.messages.append(FileEntry(relpath, st.st_mtime, st.st_size)) self.totalsize += st.st_size else: - st = os.stat(fpath) + st = os.stat(name) if S_ISREG(st.st_mode): self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size)) if name == "password": From c59f6c760a20df97a18d14f39fcb3abd6d6c94b1 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 15 Sep 2025 14:21:15 +0200 Subject: [PATCH 13/31] remove superflous Stats class --- chatmaild/src/chatmaild/expire.py | 25 +++++++------------- chatmaild/src/chatmaild/fsreport.py | 11 ++++----- chatmaild/src/chatmaild/tests/test_expire.py | 10 ++++---- 3 files changed, 18 insertions(+), 28 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 3f7b3a9c..4e5b4d6f 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -17,18 +17,10 @@ FileEntry = namedtuple("FileEntry", ("relpath", "mtime", "size")) -class Stats: - def __init__(self, basedir, maxnum=None): - self.basedir = str(basedir) - self.maxnum = maxnum - - def iter_mailboxes(self, callback=None): - for name in os.listdir(self.basedir)[: self.maxnum]: - if "@" in name: - basedir = self.basedir + "/" + name - mailbox = MailboxStat(basedir) - if callback is not None: - callback(mailbox) +def iter_mailboxes(basedir, maxnum): + for name in os.listdir(basedir)[:maxnum]: + if "@" in name: + yield MailboxStat(basedir + "/" + name) class MailboxStat: @@ -69,9 +61,8 @@ def print_info(msg): class Expiry: - def __init__(self, config, stats, dry, now, verbose): + def __init__(self, config, dry, now, verbose): self.config = config - self.stats = stats self.dry = dry self.now = now self.verbose = verbose @@ -173,9 +164,9 @@ def main(args): now = now - 86400 * int(args.days) maxnum = int(args.maxnum) if args.maxnum else None - stats = Stats(os.path.abspath(args.mailboxes_dir), maxnum=maxnum) - exp = Expiry(config, stats, dry=not args.remove, now=now, verbose=args.verbose) - stats.iter_mailboxes(exp.process_mailbox_stat) + exp = Expiry(config, dry=not args.remove, now=now, verbose=args.verbose) + for mailbox in iter_mailboxes(os.path.abspath(args.mailboxes_dir), maxnum=maxnum): + exp.process_mailbox_stat(mailbox) print(exp.get_summary()) diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index e8cb7e6b..68bf371a 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -2,7 +2,7 @@ from argparse import ArgumentParser from datetime import datetime -from chatmaild.expire import Stats +from chatmaild.expire import iter_mailboxes DAYSECONDS = 24 * 60 * 60 MONTHSECONDS = DAYSECONDS * 30 @@ -35,14 +35,13 @@ def H(size): class Report: - def __init__(self, stats, now): + def __init__(self, now): self.sum_extra = 0 self.sum_all_messages = 0 self.messages = [] self.mailboxes = [] self.user_logins = [] self.ci_logins = [] - self.stats = stats self.now = now def process_mailbox_stat(self, mailbox): @@ -154,9 +153,9 @@ def main(args=None): now = now - 86400 * int(args.days) maxnum = int(args.maxnum) if args.maxnum else None - stats = Stats(args.mailboxes_dir, maxnum=maxnum) - rep = Report(stats, now=now) - stats.iter_mailboxes(rep.process_mailbox_stat) + rep = Report(now=now) + for mbox in iter_mailboxes(os.path.abspath(args.mailboxes_dir), maxnum=maxnum): + rep.process_mailbox_stat(mbox) rep.dump_summary() diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py index af8dc7a1..b7301399 100644 --- a/chatmaild/src/chatmaild/tests/test_expire.py +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -8,7 +8,7 @@ from chatmaild.expire import FileEntry, MailboxStat from chatmaild.expire import main as expiry_main -from chatmaild.fsreport import Report, Stats +from chatmaild.fsreport import Report, iter_mailboxes # XXX basedirsize (used by dovecot quota) needs to be removed after removing files @@ -58,7 +58,7 @@ def test_stats_mailbox(mbox1): assert mbox1.last_login == password.stat().st_mtime assert len(mbox1.messages) == 2 - msgs = list(mbox1.messages) + msgs = list(sorted(mbox1.messages, key=lambda x: x.size)) assert len(msgs) == 2 assert msgs[0].size == 500 # cur assert msgs[1].size == 600 # new @@ -78,9 +78,9 @@ def test_stats_mailbox(mbox1): def test_report(mbox1): now = datetime.utcnow().timestamp() mailboxes_dir = Path(mbox1.basedir).parent - stats = Stats(str(mailboxes_dir), maxnum=None) - rep = Report(stats, now=now) - stats.iter_mailboxes(rep.process_mailbox_stat) + rep = Report(now=now) + for mailbox in iter_mailboxes(str(mailboxes_dir), maxnum=None): + rep.process_mailbox_stat(mailbox) rep.dump_summary() From 5956c51d66fdab4e8848d05ec5ce92ad31446023 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 15 Sep 2025 14:40:44 +0200 Subject: [PATCH 14/31] some renaming --- chatmaild/src/chatmaild/fsreport.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index 68bf371a..4dd854a6 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -36,10 +36,11 @@ def H(size): class Report: def __init__(self, now): - self.sum_extra = 0 - self.sum_all_messages = 0 - self.messages = [] + self.size_extra = 0 + self.size_messages = 0 + self.mailboxes = [] + self.messages = [] self.user_logins = [] self.ci_logins = [] self.now = now @@ -53,8 +54,8 @@ def process_mailbox_stat(self, mailbox): self.user_logins.append(last_login) self.messages.extend(mailbox.messages) self.mailboxes.append(mailbox) - self.sum_all_messages += sum(msg.size for msg in mailbox.messages) - self.sum_extra += sum(entry.size for entry in mailbox.extrafiles) + self.size_messages += sum(msg.size for msg in mailbox.messages) + self.size_extra += sum(entry.size for entry in mailbox.extrafiles) def dump_summary(self): reports = [] @@ -95,13 +96,13 @@ def print_messages(title, messages, num, rep=True): print() print("## Overall mailbox storage use analysis") - print(f"Mailbox data: {M(self.sum_extra + self.sum_all_messages)}") - print(f"Messages : {M(self.sum_all_messages)}") - percent = self.sum_extra / (self.sum_extra + self.sum_all_messages) * 100 - print(f"Extra files : {M(self.sum_extra)} ({percent:.2f}%)") + print(f"Mailbox data: {M(self.size_extra + self.size_messages)}") + print(f"Messages : {M(self.size_messages)}") + percent = self.size_extra / (self.size_extra + self.size_messages) * 100 + print(f"Extra files : {M(self.size_extra)} ({percent:.2f}%)") for title, size in reports: - percent = size / self.sum_all_messages * 100 + percent = size / self.size_messages * 100 print(f"{title:38} {M(size)} ({percent:.2f}%)") all_logins = len(self.user_logins) + len(self.ci_logins) From 101ffca509e38230b1c8b31f6cd90509773cbfe8 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 15 Sep 2025 14:44:10 +0200 Subject: [PATCH 15/31] fix lint issues --- chatmaild/src/chatmaild/expire.py | 2 +- chatmaild/src/chatmaild/fsreport.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 4e5b4d6f..05628db1 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -120,7 +120,7 @@ def get_summary(self): return ( f"Removed {self.del_mboxes} out of {self.all_mboxes} mailboxes " f"and {self.del_files} out of {self.all_files} files " - f"in {time.time()-self.start:2.2f} seconds" + f"in {time.time() - self.start:2.2f} seconds" ) diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index 4dd854a6..a507e391 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -18,12 +18,12 @@ def K(size): if size < 1000: return f"{size:5.0f}" elif size < 10000: - return f"{size/1000:3.2f}K" - return f"{int(size/1000):5.0f}K" + return f"{size / 1000:3.2f}K" + return f"{int(size / 1000):5.0f}K" def M(size): - return f"{int(size/1000000):5.0f}M" + return f"{int(size / 1000000):5.0f}M" def H(size): @@ -31,7 +31,7 @@ def H(size): return K(size) if size < 1000 * 1000 * 1000: return M(size) - return f"{size/1000000000:2.2f}G" + return f"{size / 1000000000:2.2f}G" class Report: @@ -52,6 +52,7 @@ def process_mailbox_stat(self, mailbox): self.ci_logins.append(last_login) else: self.user_logins.append(last_login) + self.messages.extend(mailbox.messages) self.mailboxes.append(mailbox) self.size_messages += sum(msg.size for msg in mailbox.messages) @@ -110,7 +111,7 @@ def print_messages(title, messages, num, rep=True): ci_logins = len(self.ci_logins) def p(num): - return f"({num/num_logins * 100:2.2f}%)" + return f"({num / num_logins * 100:2.2f}%)" print() print(f"## Login stats, from date reference {datetime.fromtimestamp(self.now)}") From d1941f07dbd55e080252b781bc7290de7fbc26b0 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 15 Sep 2025 15:02:24 +0200 Subject: [PATCH 16/31] during fsreport (reporting) don't store all mailbxoes but categorize them immediately, provide a few command line options to select --- CHANGELOG.md | 7 + chatmaild/src/chatmaild/expire.py | 2 + chatmaild/src/chatmaild/fsreport.py | 143 ++++++++++--------- chatmaild/src/chatmaild/tests/test_expire.py | 16 ++- 4 files changed, 93 insertions(+), 75 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5f92e84..57ecea8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,13 @@ - Add `--skip-dns-check` argument to `cmdeploy run` command, which disables DNS record checking before installation. ([#661](https://github.com/chatmail/relay/pull/661)) +- Rework expiry of message files and mailboxes in Python + to only do a single iteration over sometimes millions of messages + instead of doing "find" commands that iterate 9 times over the messages. + Provide an "fsreport" CLI for more fine grained analysis of message files. + ([#637](https://github.com/chatmail/relay/pull/632)) + + ## 1.7.0 2025-09-11 - Make www upload path configurable diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 05628db1..64f4dbca 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -38,6 +38,7 @@ def __init__(self, basedir): self.totalsize = 0 # scan all relevant files (without recursion) + old_cwd = os.getcwd() os.chdir(self.basedir) for name in os.listdir("."): if name in ("cur", "new", "tmp"): @@ -54,6 +55,7 @@ def __init__(self, basedir): self.last_login = st.st_mtime self.totalsize += st.st_size self.extrafiles.sort(key=lambda x: -x.size) + os.chdir(old_cwd) def print_info(msg): diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index a507e391..8546cbf4 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -1,3 +1,20 @@ +""" +command line tool to analyze mailbox message storage + +example invocation: + + python -m chatmaild.fsreport /home/vmail/mail/nine.testrun.org + +to show storage summaries for all "cur" folders + + python -m chatmaild.fsreport /home/vmail/mail/nine.testrun.org --mdir cur + +to show storage summaries only for first 1000 mailboxes + + python -m chatmaild.fsreport /home/vmail/mail/nine.testrun.org --maxnum 1000 + +""" + import os from argparse import ArgumentParser from datetime import datetime @@ -16,7 +33,7 @@ def D(timestamp, now=datetime.utcnow().timestamp()): def K(size): if size < 1000: - return f"{size:5.0f}" + return f"{size:6.0f}" elif size < 10000: return f"{size / 1000:3.2f}K" return f"{int(size / 1000):5.0f}K" @@ -35,93 +52,71 @@ def H(size): class Report: - def __init__(self, now): + def __init__(self, now, min_login_age, mdir): self.size_extra = 0 self.size_messages = 0 - - self.mailboxes = [] - self.messages = [] - self.user_logins = [] - self.ci_logins = [] self.now = now + self.min_login_age = min_login_age + self.mdir = mdir + + self.num_ci_logins = self.num_all_logins = 0 + self.login_buckets = dict((x, 0) for x in (1, 10, 30, 40, 80, 100, 150)) + self.message_buckets = dict((x, 0) for x in (0, 160000, 500000, 2000000)) def process_mailbox_stat(self, mailbox): + # categorize login times last_login = mailbox.last_login if last_login: + self.num_all_logins += 1 if os.path.basename(mailbox.basedir)[:3] == "ci-": - self.ci_logins.append(last_login) + self.num_ci_logins += 1 else: - self.user_logins.append(last_login) - - self.messages.extend(mailbox.messages) - self.mailboxes.append(mailbox) - self.size_messages += sum(msg.size for msg in mailbox.messages) + for days in self.login_buckets: + if last_login >= self.now - days * DAYSECONDS: + self.login_buckets[days] += 1 + + cutoff_login_date = self.now - self.min_login_age * DAYSECONDS + if last_login and last_login <= cutoff_login_date: + # categorize message sizes + for size in self.message_buckets: + for msg in mailbox.messages: + if msg.size >= size: + if self.mdir and not msg.relpath.startswith(self.mdir): + continue + self.message_buckets[size] += msg.size + + self.size_messages += sum(entry.size for entry in mailbox.messages) self.size_extra += sum(entry.size for entry in mailbox.extrafiles) def dump_summary(self): - reports = [] - - def print_messages(title, messages, num, rep=True): - print() - allsize = sum(x.size for x in messages) - if rep: - reports.append((title, allsize)) - - print(f"## {title} [total: {H(allsize)}]") - for entry in messages[:num]: - print(f"{K(entry.size)} {D(entry.mtime)} {entry.relpath}") - - for kind in ("cur", "new"): - biggest = list(self.messages) - biggest.sort(key=lambda x: (-x.size, x.mtime)) - print_messages(f"Biggest {kind} messages", biggest, 10, rep=False) - - oldest = self.messages - mode = "cur" - for maxsize in (160000, 500000, 2000000, 10000000): - oldest = [x for x in oldest if x.size > maxsize and mode in x.relpath] - oldest.sort(key=lambda x: x.mtime) - print_messages(f"{mode} folders oldest > {K(maxsize)} messages", oldest, 10) - - # list all 160K files of people who haven't logged in for a while - messages = [] - cutoff_date_login = self.now - 30 * DAYSECONDS - for mstat in self.mailboxes: - if mstat.last_login and mstat.last_login < cutoff_date_login: - for msg in mstat.messages: - if msg.size > 160000: - messages.append(msg) - - messages.sort(key=lambda x: x.size) - print_messages(">30-day last_login new >160K", messages, 10) - + all_messages = self.size_messages print() - print("## Overall mailbox storage use analysis") - print(f"Mailbox data: {M(self.size_extra + self.size_messages)}") - print(f"Messages : {M(self.size_messages)}") - percent = self.size_extra / (self.size_extra + self.size_messages) * 100 + print("## Mailbox storage use analysis") + print(f"Mailbox data total size: {M(self.size_extra + all_messages)}") + print(f"Messages total size : {M(all_messages)}") + percent = self.size_extra / (self.size_extra + all_messages) * 100 print(f"Extra files : {M(self.size_extra)} ({percent:.2f}%)") - for title, size in reports: - percent = size / self.size_messages * 100 - print(f"{title:38} {M(size)} ({percent:.2f}%)") + print() + if self.min_login_age: + print(f"### Message storage for {self.min_login_age} days old logins") + + pref = f"[{self.mdir}] " if self.mdir else "" + for minsize, sumsize in self.message_buckets.items(): + percent = sumsize / all_messages * 100 + print(f"{pref}larger than {K(minsize)}: {M(sumsize)} ({percent:.2f}%)") - all_logins = len(self.user_logins) + len(self.ci_logins) - num_logins = len(self.user_logins) - ci_logins = len(self.ci_logins) + user_logins = self.num_all_logins - self.num_ci_logins def p(num): - return f"({num / num_logins * 100:2.2f}%)" + return f"({num / user_logins * 100:2.2f}%)" print() print(f"## Login stats, from date reference {datetime.fromtimestamp(self.now)}") - print(f"all: {K(all_logins)}") - print(f"non-ci: {K(num_logins)}") - print(f"ci: {K(ci_logins)}") - for days in (1, 10, 30, 40, 80, 100, 150): - active = len( - [x for x in self.user_logins if x >= self.now - days * DAYSECONDS] - ) + print(f"all: {K(self.num_all_logins)}") + print(f"non-ci: {K(user_logins)}") + print(f"ci: {K(self.num_ci_logins)}") + for days, active in self.login_buckets.items(): print(f"last {days:3} days: {K(active)} {p(active)}") @@ -140,6 +135,18 @@ def main(args=None): action="store", help="assume date to be days older than now", ) + parser.add_argument( + "--min-login-age", + default=0, + dest="min_login_age", + action="store", + help="only sum up message size if last login is at least min-login-age days old", + ) + parser.add_argument( + "--mdir", + action="store", + help="only consider 'cur' or 'new' or 'tmp' messages for summary", + ) parser.add_argument( "--maxnum", @@ -155,7 +162,7 @@ def main(args=None): now = now - 86400 * int(args.days) maxnum = int(args.maxnum) if args.maxnum else None - rep = Report(now=now) + rep = Report(now=now, min_login_age=int(args.min_login_age), mdir=args.mdir) for mbox in iter_mailboxes(os.path.abspath(args.mailboxes_dir), maxnum=maxnum): rep.process_mailbox_stat(mbox) rep.dump_summary() diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py index b7301399..0d85eb34 100644 --- a/chatmaild/src/chatmaild/tests/test_expire.py +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -8,7 +8,7 @@ from chatmaild.expire import FileEntry, MailboxStat from chatmaild.expire import main as expiry_main -from chatmaild.fsreport import Report, iter_mailboxes +from chatmaild.fsreport import main as report_main # XXX basedirsize (used by dovecot quota) needs to be removed after removing files @@ -76,12 +76,14 @@ def test_stats_mailbox(mbox1): def test_report(mbox1): - now = datetime.utcnow().timestamp() - mailboxes_dir = Path(mbox1.basedir).parent - rep = Report(now=now) - for mailbox in iter_mailboxes(str(mailboxes_dir), maxnum=None): - rep.process_mailbox_stat(mailbox) - rep.dump_summary() + args = (str(Path(mbox1.basedir).parent),) + report_main(args) + args = list(args) + "--days 1".split() + report_main(args) + args = list(args) + "--min-login-age 1".split() + report_main(args) + args = list(args) + "--mdir cur".split() + report_main(args) def test_expiry_cli_basic(example_config, mbox1): From 21efffa59559b476b2dda6ff2972b08219b0cb8b Mon Sep 17 00:00:00 2001 From: holger krekel Date: Tue, 16 Sep 2025 12:54:33 +0200 Subject: [PATCH 17/31] remove superflous totalsize attribute --- chatmaild/src/chatmaild/expire.py | 5 ----- chatmaild/src/chatmaild/tests/test_expire.py | 2 -- 2 files changed, 7 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 64f4dbca..d9cae413 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -34,9 +34,6 @@ def __init__(self, basedir): # all detected files in mailbox top dir self.extrafiles = [] - # total size of all detected files - self.totalsize = 0 - # scan all relevant files (without recursion) old_cwd = os.getcwd() os.chdir(self.basedir) @@ -46,14 +43,12 @@ def __init__(self, basedir): relpath = name + "/" + msg_name st = os.stat(relpath) self.messages.append(FileEntry(relpath, st.st_mtime, st.st_size)) - self.totalsize += st.st_size else: st = os.stat(name) if S_ISREG(st.st_mode): self.extrafiles.append(FileEntry(name, st.st_mtime, st.st_size)) if name == "password": self.last_login = st.st_mtime - self.totalsize += st.st_size self.extrafiles.sort(key=lambda x: -x.size) os.chdir(old_cwd) diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py index 0d85eb34..041b734a 100644 --- a/chatmaild/src/chatmaild/tests/test_expire.py +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -10,8 +10,6 @@ from chatmaild.expire import main as expiry_main from chatmaild.fsreport import main as report_main -# XXX basedirsize (used by dovecot quota) needs to be removed after removing files - @pytest.fixture def basedir1(tmp_path): From c3ed0eea5a3cacb399a9471e86321264660e51e4 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 20 Oct 2025 09:54:31 +0200 Subject: [PATCH 18/31] prefix new commands --- chatmaild/pyproject.toml | 4 ++-- cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/chatmaild/pyproject.toml b/chatmaild/pyproject.toml index a09ed122..1194db55 100644 --- a/chatmaild/pyproject.toml +++ b/chatmaild/pyproject.toml @@ -27,8 +27,8 @@ chatmail-metadata = "chatmaild.metadata:main" filtermail = "chatmaild.filtermail:main" echobot = "chatmaild.echo:main" chatmail-metrics = "chatmaild.metrics:main" -expire = "chatmaild.expire:main" -fsreport = "chatmaild.fsreport:main" +chatmail-expire = "chatmaild.expire:main" +chatmail-fsreport = "chatmaild.fsreport:main" lastlogin = "chatmaild.lastlogin:main" turnserver = "chatmaild.turnserver:main" diff --git a/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 b/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 index 0b2821ba..538251a3 100644 --- a/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 +++ b/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 @@ -1,2 +1,2 @@ # expire mailboxes, old and large messages -2 0 * * * vmail /usr/local/lib/chatmaild/venv/bin/expire /usr/local/lib/chatmaild/chatmail.ini {config.mailboxes_dir} --remove +2 0 * * * vmail /usr/local/lib/chatmaild/venv/bin/chatmail-expire /usr/local/lib/chatmaild/chatmail.ini {config.mailboxes_dir} --remove From fcea25c5becf4b53c15c93b8e1ff31d8675f3564 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 20 Oct 2025 09:59:47 +0200 Subject: [PATCH 19/31] address four review comments from link2xt --- chatmaild/src/chatmaild/fsreport.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index 8546cbf4..8ad884b0 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -3,15 +3,15 @@ example invocation: - python -m chatmaild.fsreport /home/vmail/mail/nine.testrun.org + python -m chatmaild.fsreport /home/vmail/mail/example.org to show storage summaries for all "cur" folders - python -m chatmaild.fsreport /home/vmail/mail/nine.testrun.org --mdir cur + python -m chatmaild.fsreport /home/vmail/mail/example.org --mdir cur to show storage summaries only for first 1000 mailboxes - python -m chatmaild.fsreport /home/vmail/mail/nine.testrun.org --maxnum 1000 + python -m chatmaild.fsreport /home/vmail/mail/example.org --maxnum 1000 """ @@ -60,8 +60,9 @@ def __init__(self, now, min_login_age, mdir): self.mdir = mdir self.num_ci_logins = self.num_all_logins = 0 - self.login_buckets = dict((x, 0) for x in (1, 10, 30, 40, 80, 100, 150)) - self.message_buckets = dict((x, 0) for x in (0, 160000, 500000, 2000000)) + self.login_buckets = {x: 0 for x in (1, 10, 30, 40, 80, 100, 150)} + + self.message_buckets = {x: 0 for x in (0, 160000, 500000, 2000000)} def process_mailbox_stat(self, mailbox): # categorize login times @@ -123,9 +124,6 @@ def p(num): def main(args=None): """Report about filesystem storage usage of all mailboxes and messages""" parser = ArgumentParser(description=main.__doc__) - # parser.add_argument( - # "chatmail_ini", action="store", help="path pointing to chatmail.ini file" - # ) parser.add_argument( "mailboxes_dir", action="store", help="path to directory of mailboxes" ) @@ -155,7 +153,7 @@ def main(args=None): help="maximum number of mailbxoes to iterate on", ) - args = parser.parse_args([str(x) for x in args] if args else args) + args = parser.parse_args(args) now = datetime.utcnow().timestamp() if args.days: From 6e26a62bf114f82dcb7b5662181ca3f6dad520f8 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 20 Oct 2025 10:13:24 +0200 Subject: [PATCH 20/31] cosmetic: refine summary and fix typo --- chatmaild/src/chatmaild/expire.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index d9cae413..d248eeda 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -116,7 +116,7 @@ def process_mailbox_stat(self, mbox): def get_summary(self): return ( f"Removed {self.del_mboxes} out of {self.all_mboxes} mailboxes " - f"and {self.del_files} out of {self.all_files} files " + f"and {self.del_files} out of {self.all_files} files in existing mailboxes " f"in {time.time() - self.start:2.2f} seconds" ) @@ -138,7 +138,7 @@ def main(args): "--maxnum", default=None, action="store", - help="maximum number of mailbxoes to iterate on", + help="maximum number of mailboxes to iterate on", ) parser.add_argument( "-v", From aeec17c058e3cb8ae6b11b31aa136e95c833872c Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 20 Oct 2025 13:43:40 +0200 Subject: [PATCH 21/31] unify chatmail-fsreport and chatmail-expire to both just require a chatmail.ini file --- chatmaild/src/chatmaild/expire.py | 9 +++------ chatmaild/src/chatmaild/fsreport.py | 13 ++++++++----- .../tests/test_delete_inactive_users.py | 1 - chatmaild/src/chatmaild/tests/test_expire.py | 16 ++++++++-------- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index d248eeda..507d5798 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -121,15 +121,12 @@ def get_summary(self): ) -def main(args): +def main(args=None): """Expire mailboxes and messages according to chatmail config""" parser = ArgumentParser(description=main.__doc__) parser.add_argument( "chatmail_ini", action="store", help="path pointing to chatmail.ini file" ) - parser.add_argument( - "mailboxes_dir", action="store", help="path to directory of mailboxes" - ) parser.add_argument( "--days", action="store", help="assume date to be days older than now" ) @@ -153,7 +150,7 @@ def main(args): action="store_true", help="actually remove all expired files and dirs", ) - args = parser.parse_args([str(x) for x in args]) + args = parser.parse_args(args) config = read_config(args.chatmail_ini) now = datetime.utcnow().timestamp() @@ -162,7 +159,7 @@ def main(args): maxnum = int(args.maxnum) if args.maxnum else None exp = Expiry(config, dry=not args.remove, now=now, verbose=args.verbose) - for mailbox in iter_mailboxes(os.path.abspath(args.mailboxes_dir), maxnum=maxnum): + for mailbox in iter_mailboxes(str(config.mailboxes_dir), maxnum=maxnum): exp.process_mailbox_stat(mailbox) print(exp.get_summary()) diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index 8ad884b0..e4f7380f 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -3,15 +3,15 @@ example invocation: - python -m chatmaild.fsreport /home/vmail/mail/example.org + python -m chatmaild.fsreport /path/to/chatmail.ini to show storage summaries for all "cur" folders - python -m chatmaild.fsreport /home/vmail/mail/example.org --mdir cur + python -m chatmaild.fsreport /path/to/chatmail.ini --mdir cur to show storage summaries only for first 1000 mailboxes - python -m chatmaild.fsreport /home/vmail/mail/example.org --maxnum 1000 + python -m chatmaild.fsreport /path/to/chatmail.ini --maxnum 1000 """ @@ -19,6 +19,7 @@ from argparse import ArgumentParser from datetime import datetime +from chatmaild.config import read_config from chatmaild.expire import iter_mailboxes DAYSECONDS = 24 * 60 * 60 @@ -125,7 +126,7 @@ def main(args=None): """Report about filesystem storage usage of all mailboxes and messages""" parser = ArgumentParser(description=main.__doc__) parser.add_argument( - "mailboxes_dir", action="store", help="path to directory of mailboxes" + "chatmail_ini", action="store", help="path pointing to chatmail.ini file" ) parser.add_argument( "--days", @@ -155,13 +156,15 @@ def main(args=None): args = parser.parse_args(args) + config = read_config(args.chatmail_ini) + now = datetime.utcnow().timestamp() if args.days: now = now - 86400 * int(args.days) maxnum = int(args.maxnum) if args.maxnum else None rep = Report(now=now, min_login_age=int(args.min_login_age), mdir=args.mdir) - for mbox in iter_mailboxes(os.path.abspath(args.mailboxes_dir), maxnum=maxnum): + for mbox in iter_mailboxes(str(config.mailboxes_dir), maxnum=maxnum): rep.process_mailbox_stat(mbox) rep.dump_summary() diff --git a/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py b/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py index 7257fb85..5e662e4e 100644 --- a/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py +++ b/chatmaild/src/chatmaild/tests/test_delete_inactive_users.py @@ -49,7 +49,6 @@ def create_user(addr, last_login): args=[ "--remove", str(example_config._inipath), - str(example_config.mailboxes_dir), ] ) diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py index 041b734a..7ef7b51f 100644 --- a/chatmaild/src/chatmaild/tests/test_expire.py +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -11,9 +11,8 @@ from chatmaild.fsreport import main as report_main -@pytest.fixture -def basedir1(tmp_path): - basedir1 = tmp_path.joinpath("mailbox1@example.org") +def fill_mbox(basedir): + basedir1 = basedir.joinpath("mailbox1@example.org") basedir1.mkdir() password = basedir1.joinpath("password") password.write_text("xxx") @@ -39,7 +38,8 @@ def create_new_messages(basedir, relpaths, size=1000, days=0): @pytest.fixture -def mbox1(basedir1): +def mbox1(example_config): + basedir1 = fill_mbox(example_config.mailboxes_dir) return MailboxStat(basedir1) @@ -73,8 +73,8 @@ def test_stats_mailbox(mbox1): assert mbox3.last_login is None -def test_report(mbox1): - args = (str(Path(mbox1.basedir).parent),) +def test_report(mbox1, example_config): + args = (str(example_config._inipath),) report_main(args) args = list(args) + "--days 1".split() report_main(args) @@ -85,7 +85,7 @@ def test_report(mbox1): def test_expiry_cli_basic(example_config, mbox1): - args = example_config._inipath, Path(mbox1.basedir).parent + args = (example_config._inipath,) expiry_main(args) @@ -102,7 +102,7 @@ def test_expiry_cli_old_files(capsys, example_config, mbox1): create_new_messages(mbox1.basedir, ["cur/shouldstay"], size=1000 * 300, days=1) - args = example_config._inipath, Path(mbox1.basedir).parent, "--remove", "-v" + args = str(example_config._inipath), "--remove", "-v" expiry_main(args) out, err = capsys.readouterr() From 3fd56cea2c5820e32a48fdea018f6d0936d750ff Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 20 Oct 2025 14:42:04 +0200 Subject: [PATCH 22/31] fix another invocation --- chatmaild/src/chatmaild/tests/test_expire.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py index 7ef7b51f..0a7f68e2 100644 --- a/chatmaild/src/chatmaild/tests/test_expire.py +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -85,7 +85,7 @@ def test_report(mbox1, example_config): def test_expiry_cli_basic(example_config, mbox1): - args = (example_config._inipath,) + args = (str(example_config._inipath),) expiry_main(args) From b159b60822fa0ab5c487d595daa916ec7824f20a Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 20 Oct 2025 15:08:31 +0200 Subject: [PATCH 23/31] always use "H" for printing numbers, and make "chatmail.ini" file optional, defaulting to where it is on chatmail relays --- chatmaild/src/chatmaild/expire.py | 7 ++++++- chatmaild/src/chatmaild/fsreport.py | 29 +++++++++++++++++------------ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 507d5798..28a6f45b 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -124,8 +124,13 @@ def get_summary(self): def main(args=None): """Expire mailboxes and messages according to chatmail config""" parser = ArgumentParser(description=main.__doc__) + ini = "/usr/local/lib/chatmaild/chatmail.ini" parser.add_argument( - "chatmail_ini", action="store", help="path pointing to chatmail.ini file" + "chatmail_ini", + action="store", + nargs="?", + help=f"path pointing to chatmail.ini file, default: {ini}", + default=ini, ) parser.add_argument( "--days", action="store", help="assume date to be days older than now" diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index e4f7380f..cfc78bd5 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -36,7 +36,7 @@ def K(size): if size < 1000: return f"{size:6.0f}" elif size < 10000: - return f"{size / 1000:3.2f}K" + return f"{size / 1000:5.2f}K" return f"{int(size / 1000):5.0f}K" @@ -49,7 +49,7 @@ def H(size): return K(size) if size < 1000 * 1000 * 1000: return M(size) - return f"{size / 1000000000:2.2f}G" + return f"{size / 1000000000:5.2f}G" class Report: @@ -94,10 +94,10 @@ def dump_summary(self): all_messages = self.size_messages print() print("## Mailbox storage use analysis") - print(f"Mailbox data total size: {M(self.size_extra + all_messages)}") - print(f"Messages total size : {M(all_messages)}") + print(f"Mailbox data total size: {H(self.size_extra + all_messages)}") + print(f"Messages total size : {H(all_messages)}") percent = self.size_extra / (self.size_extra + all_messages) * 100 - print(f"Extra files : {M(self.size_extra)} ({percent:.2f}%)") + print(f"Extra files : {H(self.size_extra)} ({percent:.2f}%)") print() if self.min_login_age: @@ -105,8 +105,8 @@ def dump_summary(self): pref = f"[{self.mdir}] " if self.mdir else "" for minsize, sumsize in self.message_buckets.items(): - percent = sumsize / all_messages * 100 - print(f"{pref}larger than {K(minsize)}: {M(sumsize)} ({percent:.2f}%)") + percent = (sumsize / all_messages * 100) if all_messages else 0 + print(f"{pref}larger than {H(minsize)}: {H(sumsize)} ({percent:.2f}%)") user_logins = self.num_all_logins - self.num_ci_logins @@ -115,18 +115,23 @@ def p(num): print() print(f"## Login stats, from date reference {datetime.fromtimestamp(self.now)}") - print(f"all: {K(self.num_all_logins)}") - print(f"non-ci: {K(user_logins)}") - print(f"ci: {K(self.num_ci_logins)}") + print(f"all: {H(self.num_all_logins)}") + print(f"non-ci: {H(user_logins)}") + print(f"ci: {H(self.num_ci_logins)}") for days, active in self.login_buckets.items(): - print(f"last {days:3} days: {K(active)} {p(active)}") + print(f"last {days:3} days: {H(active)} {p(active)}") def main(args=None): """Report about filesystem storage usage of all mailboxes and messages""" parser = ArgumentParser(description=main.__doc__) + ini = "/usr/local/lib/chatmaild/chatmail.ini" parser.add_argument( - "chatmail_ini", action="store", help="path pointing to chatmail.ini file" + "chatmail_ini", + action="store", + nargs="?", + help=f"path pointing to chatmail.ini file, default: {ini}", + default=ini, ) parser.add_argument( "--days", From 39adf4afb09da019866722ebcc90ae9dcbf57180 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 20 Oct 2025 15:15:42 +0200 Subject: [PATCH 24/31] unify K output --- chatmaild/src/chatmaild/fsreport.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index cfc78bd5..a233c828 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -33,11 +33,9 @@ def D(timestamp, now=datetime.utcnow().timestamp()): def K(size): - if size < 1000: - return f"{size:6.0f}" - elif size < 10000: + if size < 10000: return f"{size / 1000:5.2f}K" - return f"{int(size / 1000):5.0f}K" + return f"{size / 1000:5.0f}K" def M(size): From 0e4899ecf9d4f5fd7b992e1103b7de273ac307a9 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 20 Oct 2025 17:38:12 +0200 Subject: [PATCH 25/31] use systemd timer instead of cron-job for expiry (tested by hand on c2) --- cmdeploy/src/cmdeploy/__init__.py | 30 ++++++++++--------- cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 | 2 -- .../service/chatmail-expire.service.f | 9 ++++++ .../cmdeploy/service/chatmail-expire.timer.f | 9 ++++++ 4 files changed, 34 insertions(+), 16 deletions(-) delete mode 100644 cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 create mode 100644 cmdeploy/src/cmdeploy/service/chatmail-expire.service.f create mode 100644 cmdeploy/src/cmdeploy/service/chatmail-expire.timer.f diff --git a/cmdeploy/src/cmdeploy/__init__.py b/cmdeploy/src/cmdeploy/__init__.py index ea192a07..0064800f 100644 --- a/cmdeploy/src/cmdeploy/__init__.py +++ b/cmdeploy/src/cmdeploy/__init__.py @@ -129,6 +129,8 @@ def _install_remote_venv_with_chatmaild(config) -> None: "chatmail-metadata", "lastlogin", "turnserver", + "chatmail-expire", + "chatmail-expire.timer", ): execpath = fn if fn != "filtermail-incoming" else "filtermail" params = dict( @@ -137,20 +139,21 @@ def _install_remote_venv_with_chatmaild(config) -> None: remote_venv_dir=remote_venv_dir, mail_domain=config.mail_domain, ) - source_path = importlib.resources.files(__package__).joinpath( - "service", f"{fn}.service.f" - ) + + basename = fn if "." in fn else f"{fn}.service" + + source_path = importlib.resources.files(__package__).joinpath("service", f"{basename}.f") content = source_path.read_text().format(**params).encode() files.put( - name=f"Upload {fn}.service", + name=f"Upload {basename}", src=io.BytesIO(content), - dest=f"/etc/systemd/system/{fn}.service", + dest=f"/etc/systemd/system/{basename}", **root_owned, ) systemd.service( - name=f"Setup {fn} service", - service=f"{fn}.service", + name=f"Setup {basename}", + service=basename, running=True, enabled=True, restarted=True, @@ -158,6 +161,7 @@ def _install_remote_venv_with_chatmaild(config) -> None: ) + def _configure_opendkim(domain: str, dkim_selector: str = "dkim") -> bool: """Configures OpenDKIM""" need_restart = False @@ -387,13 +391,11 @@ def _configure_dovecot(config: Config, debug: bool = False) -> bool: ) need_restart |= lua_push_notification_script.changed - files.template( - src=importlib.resources.files(__package__).joinpath("dovecot/expunge.cron.j2"), - dest="/etc/cron.d/expunge", - user="root", - group="root", - mode="644", - config=config, + # remove historic expunge script + # which is now implemented through a systemd chatmail-expire service/timer + files.file( + path="/etc/cron.d/expunge", + present=False, ) # as per https://doc.dovecot.org/configuration_manual/os/ diff --git a/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 b/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 deleted file mode 100644 index 538251a3..00000000 --- a/cmdeploy/src/cmdeploy/dovecot/expunge.cron.j2 +++ /dev/null @@ -1,2 +0,0 @@ -# expire mailboxes, old and large messages -2 0 * * * vmail /usr/local/lib/chatmaild/venv/bin/chatmail-expire /usr/local/lib/chatmaild/chatmail.ini {config.mailboxes_dir} --remove diff --git a/cmdeploy/src/cmdeploy/service/chatmail-expire.service.f b/cmdeploy/src/cmdeploy/service/chatmail-expire.service.f new file mode 100644 index 00000000..4387b20a --- /dev/null +++ b/cmdeploy/src/cmdeploy/service/chatmail-expire.service.f @@ -0,0 +1,9 @@ +[Unit] +Description=chatmail mail storage expiration job +After=network.target + +[Service] +Type=oneshot +User=vmail +ExecStart=/usr/local/lib/chatmaild/venv/bin/chatmail-expire /usr/local/lib/chatmaild/chatmail.ini --remove + diff --git a/cmdeploy/src/cmdeploy/service/chatmail-expire.timer.f b/cmdeploy/src/cmdeploy/service/chatmail-expire.timer.f new file mode 100644 index 00000000..9ae7119f --- /dev/null +++ b/cmdeploy/src/cmdeploy/service/chatmail-expire.timer.f @@ -0,0 +1,9 @@ +[Unit] +Description=Run Daily chatmail-expire job + +[Timer] +OnCalendar=*-*-* 00:02:00 +Persistent=true + +[Install] +WantedBy=timers.target From b834c43e2a0c9fd8e59f1089f0a780f916d00e92 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 20 Oct 2025 18:01:16 +0200 Subject: [PATCH 26/31] also run fsreport --- cmdeploy/src/cmdeploy/__init__.py | 2 ++ .../src/cmdeploy/service/chatmail-fsreport.service.f | 9 +++++++++ cmdeploy/src/cmdeploy/service/chatmail-fsreport.timer.f | 9 +++++++++ 3 files changed, 20 insertions(+) create mode 100644 cmdeploy/src/cmdeploy/service/chatmail-fsreport.service.f create mode 100644 cmdeploy/src/cmdeploy/service/chatmail-fsreport.timer.f diff --git a/cmdeploy/src/cmdeploy/__init__.py b/cmdeploy/src/cmdeploy/__init__.py index 0064800f..8d7af04d 100644 --- a/cmdeploy/src/cmdeploy/__init__.py +++ b/cmdeploy/src/cmdeploy/__init__.py @@ -131,6 +131,8 @@ def _install_remote_venv_with_chatmaild(config) -> None: "turnserver", "chatmail-expire", "chatmail-expire.timer", + "chatmail-fsreport", + "chatmail-fsreport.timer", ): execpath = fn if fn != "filtermail-incoming" else "filtermail" params = dict( diff --git a/cmdeploy/src/cmdeploy/service/chatmail-fsreport.service.f b/cmdeploy/src/cmdeploy/service/chatmail-fsreport.service.f new file mode 100644 index 00000000..3bd630fb --- /dev/null +++ b/cmdeploy/src/cmdeploy/service/chatmail-fsreport.service.f @@ -0,0 +1,9 @@ +[Unit] +Description=chatmail file system storage reporting job +After=network.target + +[Service] +Type=oneshot +User=vmail +ExecStart=/usr/local/lib/chatmaild/venv/bin/chatmail-fsreport /usr/local/lib/chatmaild/chatmail.ini + diff --git a/cmdeploy/src/cmdeploy/service/chatmail-fsreport.timer.f b/cmdeploy/src/cmdeploy/service/chatmail-fsreport.timer.f new file mode 100644 index 00000000..b47d1b03 --- /dev/null +++ b/cmdeploy/src/cmdeploy/service/chatmail-fsreport.timer.f @@ -0,0 +1,9 @@ +[Unit] +Description=Run Daily Chatmail fsreport Job + +[Timer] +OnCalendar=*-*-* 08:02:00 +Persistent=true + +[Install] +WantedBy=timers.target From a5fa8cb3696ed885437eb05970e9e52e301d4cd6 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 20 Oct 2025 20:46:14 +0200 Subject: [PATCH 27/31] simplify and beautify formatting and sizes --- chatmaild/src/chatmaild/fsreport.py | 39 +++++++++++------------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index a233c828..9570018b 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -26,27 +26,14 @@ MONTHSECONDS = DAYSECONDS * 30 -def D(timestamp, now=datetime.utcnow().timestamp()): - diff_seconds = int(now) - int(timestamp) - # assert diff_seconds >= 0, (int(timestamp), int(now)) - return f"{int(diff_seconds / DAYSECONDS):2.0f}d" - - -def K(size): +def HSize(size: int): + """Format a size integer as a Human-readable string Kilobyte, Megabyte or Gigabyte""" if size < 10000: return f"{size / 1000:5.2f}K" - return f"{size / 1000:5.0f}K" - - -def M(size): - return f"{int(size / 1000000):5.0f}M" - - -def H(size): if size < 1000 * 1000: - return K(size) + return f"{size / 1000:5.0f}K" if size < 1000 * 1000 * 1000: - return M(size) + return f"{int(size / 1000000):5.0f}M" return f"{size / 1000000000:5.2f}G" @@ -92,10 +79,10 @@ def dump_summary(self): all_messages = self.size_messages print() print("## Mailbox storage use analysis") - print(f"Mailbox data total size: {H(self.size_extra + all_messages)}") - print(f"Messages total size : {H(all_messages)}") + print(f"Mailbox data total size: {HSize(self.size_extra + all_messages)}") + print(f"Messages total size : {HSize(all_messages)}") percent = self.size_extra / (self.size_extra + all_messages) * 100 - print(f"Extra files : {H(self.size_extra)} ({percent:.2f}%)") + print(f"Extra files : {HSize(self.size_extra)} ({percent:.2f}%)") print() if self.min_login_age: @@ -104,7 +91,9 @@ def dump_summary(self): pref = f"[{self.mdir}] " if self.mdir else "" for minsize, sumsize in self.message_buckets.items(): percent = (sumsize / all_messages * 100) if all_messages else 0 - print(f"{pref}larger than {H(minsize)}: {H(sumsize)} ({percent:.2f}%)") + print( + f"{pref}larger than {HSize(minsize)}: {HSize(sumsize)} ({percent:.2f}%)" + ) user_logins = self.num_all_logins - self.num_ci_logins @@ -113,11 +102,11 @@ def p(num): print() print(f"## Login stats, from date reference {datetime.fromtimestamp(self.now)}") - print(f"all: {H(self.num_all_logins)}") - print(f"non-ci: {H(user_logins)}") - print(f"ci: {H(self.num_ci_logins)}") + print(f"all: {HSize(self.num_all_logins)}") + print(f"non-ci: {HSize(user_logins)}") + print(f"ci: {HSize(self.num_ci_logins)}") for days, active in self.login_buckets.items(): - print(f"last {days:3} days: {H(active)} {p(active)}") + print(f"last {days:3} days: {HSize(active)} {p(active)}") def main(args=None): From 97ddeb32079d7e9060332c8ea02e57a5b0278a97 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 20 Oct 2025 20:48:04 +0200 Subject: [PATCH 28/31] Apply suggestions from code review fix typo Co-authored-by: l --- chatmaild/src/chatmaild/fsreport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index 9570018b..deb498ab 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -143,7 +143,7 @@ def main(args=None): "--maxnum", default=None, action="store", - help="maximum number of mailbxoes to iterate on", + help="maximum number of mailboxes to iterate on", ) args = parser.parse_args(args) From 4d69543542e7451ccbf55bd018795ad133c4a864 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Mon, 20 Oct 2025 20:59:11 +0200 Subject: [PATCH 29/31] replace expunge mentioning in architecture --- ARCHITECTURE.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index a4191cae..9e42c007 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -19,7 +19,6 @@ graph LR; /var/lib/acme`")] --> nginx-internal; cron --- chatmail-metrics; cron --- acmetool; - cron --- expunge; chatmail-metrics --- website; acmetool --> certs[("`TLS certs /var/lib/acme`")]; @@ -35,7 +34,8 @@ graph LR; dovecot --- users; dovecot --- |metadata.socket|chatmail-metadata; doveauth --- users; - expunge --- users; + chatmail-expire-daily --- users; + chatmail-fsreport-daily --- users; chatmail-metadata --- iroh-relay; certs-nginx --> postfix; certs-nginx --> dovecot; From aa0aa7c7bc426bcc94ced3991b0bd30d95f789bd Mon Sep 17 00:00:00 2001 From: holger krekel Date: Tue, 21 Oct 2025 16:49:23 +0200 Subject: [PATCH 30/31] try fix CI --- chatmaild/src/chatmaild/expire.py | 4 ++++ chatmaild/src/chatmaild/tests/test_expire.py | 8 +++++++- cmdeploy/src/cmdeploy/__init__.py | 7 ++++++- cmdeploy/src/cmdeploy/service/chatmail-expire.timer.f | 1 - 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/chatmaild/src/chatmaild/expire.py b/chatmaild/src/chatmaild/expire.py index 28a6f45b..30a10547 100644 --- a/chatmaild/src/chatmaild/expire.py +++ b/chatmaild/src/chatmaild/expire.py @@ -18,6 +18,10 @@ def iter_mailboxes(basedir, maxnum): + if not os.path.exists(basedir): + print_info(f"no mailboxes found at: {basedir}") + return + for name in os.listdir(basedir)[:maxnum]: if "@" in name: yield MailboxStat(basedir + "/" + name) diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py index 0a7f68e2..be6cc642 100644 --- a/chatmaild/src/chatmaild/tests/test_expire.py +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -6,7 +6,7 @@ import pytest -from chatmaild.expire import FileEntry, MailboxStat +from chatmaild.expire import FileEntry, MailboxStat, iter_mailboxes from chatmaild.expire import main as expiry_main from chatmaild.fsreport import main as report_main @@ -51,6 +51,12 @@ def test_filentry_ordering(tmp_path): assert l == sorted +def test_no_mailbxoes(tmp_path, capsys): + assert [] == list(iter_mailboxes(str(tmp_path.joinpath("notexists")), maxnum=10)) + out, err = capsys.readouterr() + assert "no mailboxes" in err + + def test_stats_mailbox(mbox1): password = Path(mbox1.basedir).joinpath("password") assert mbox1.last_login == password.stat().st_mtime diff --git a/cmdeploy/src/cmdeploy/__init__.py b/cmdeploy/src/cmdeploy/__init__.py index 8d7af04d..140d6a44 100644 --- a/cmdeploy/src/cmdeploy/__init__.py +++ b/cmdeploy/src/cmdeploy/__init__.py @@ -153,11 +153,16 @@ def _install_remote_venv_with_chatmaild(config) -> None: dest=f"/etc/systemd/system/{basename}", **root_owned, ) + if fn == "chatmail-expire" or fn == "chatmail-fsreport": + # don't auto-start but let the corresponding timer trigger execution + enabled = False + else: + enabled = True systemd.service( name=f"Setup {basename}", service=basename, running=True, - enabled=True, + enabled=enabled, restarted=True, daemon_reload=True, ) diff --git a/cmdeploy/src/cmdeploy/service/chatmail-expire.timer.f b/cmdeploy/src/cmdeploy/service/chatmail-expire.timer.f index 9ae7119f..9520a67c 100644 --- a/cmdeploy/src/cmdeploy/service/chatmail-expire.timer.f +++ b/cmdeploy/src/cmdeploy/service/chatmail-expire.timer.f @@ -3,7 +3,6 @@ [Timer] OnCalendar=*-*-* 00:02:00 -Persistent=true [Install] WantedBy=timers.target From 5e01aa3836607bfbc1ce79683bee6dd0747d8502 Mon Sep 17 00:00:00 2001 From: holger krekel Date: Tue, 21 Oct 2025 17:19:54 +0200 Subject: [PATCH 31/31] make sure fsreport can run on empty mailbox dir --- chatmaild/src/chatmaild/fsreport.py | 7 +++++-- chatmaild/src/chatmaild/tests/test_expire.py | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/chatmaild/src/chatmaild/fsreport.py b/chatmaild/src/chatmaild/fsreport.py index deb498ab..375800f8 100644 --- a/chatmaild/src/chatmaild/fsreport.py +++ b/chatmaild/src/chatmaild/fsreport.py @@ -81,7 +81,10 @@ def dump_summary(self): print("## Mailbox storage use analysis") print(f"Mailbox data total size: {HSize(self.size_extra + all_messages)}") print(f"Messages total size : {HSize(all_messages)}") - percent = self.size_extra / (self.size_extra + all_messages) * 100 + try: + percent = self.size_extra / (self.size_extra + all_messages) * 100 + except ZeroDivisionError: + percent = 100 print(f"Extra files : {HSize(self.size_extra)} ({percent:.2f}%)") print() @@ -98,7 +101,7 @@ def dump_summary(self): user_logins = self.num_all_logins - self.num_ci_logins def p(num): - return f"({num / user_logins * 100:2.2f}%)" + return f"({num / user_logins * 100:2.2f}%)" if user_logins else "100%" print() print(f"## Login stats, from date reference {datetime.fromtimestamp(self.now)}") diff --git a/chatmaild/src/chatmaild/tests/test_expire.py b/chatmaild/src/chatmaild/tests/test_expire.py index be6cc642..dd848c00 100644 --- a/chatmaild/src/chatmaild/tests/test_expire.py +++ b/chatmaild/src/chatmaild/tests/test_expire.py @@ -79,6 +79,11 @@ def test_stats_mailbox(mbox1): assert mbox3.last_login is None +def test_report_no_mailboxes(example_config): + args = (str(example_config._inipath),) + report_main(args) + + def test_report(mbox1, example_config): args = (str(example_config._inipath),) report_main(args)