From 36e6a6b67c7f64f524770a852587f8db072604a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9lestin=20Matte?= <gitlab@cmatte.me>
Date: Wed, 3 Nov 2021 17:09:01 +0100
Subject: [PATCH] parser: handle messages in which Message-ID is missing

Message-ID is not mandatory in emails. When such a message is imported,
attempt to use Resent-Message-ID instead if it exists, or generate a new
one.
---
 loader/lib/parser.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/loader/lib/parser.py b/loader/lib/parser.py
index 171f197..21e1e48 100644
--- a/loader/lib/parser.py
+++ b/loader/lib/parser.py
@@ -1,6 +1,7 @@
 import re
 import datetime
 import dateutil.parser
+import random
 
 from email.parser import BytesParser
 from email.header import decode_header, Header
@@ -28,13 +29,13 @@ class ArchivesParser(object):
         # Look for a specific messageid. This means we might parse it twice,
         # but so be it. Any exception means we know it's not this one...
         try:
-            if self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID'))) == msgid:
+            if self.clean_messageid(self.decode_mime_header(self.get_or_generate_messageid())) == msgid:
                 return True
         except Exception:
             return False
 
     def analyze(self, date_override=None):
-        self.msgid = self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID')))
+        self.msgid = self.clean_messageid(self.decode_mime_header(self.get_or_generate_messageid()))
         self._from = self.decode_mime_header(self.get_mandatory('From'), True)
         self.to = self.decode_mime_header(self.get_optional('To'), True)
         self.cc = self.decode_mime_header(self.get_optional('CC'), True)
@@ -547,6 +548,25 @@ class ArchivesParser(object):
         except ValueError as ve:
             raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, ve))
 
+    def get_or_generate_messageid(self):
+        x = self.msg["Message-ID"]
+        if x is None:
+            # If Message-ID is message, try using Resent-Message-ID instead
+            x = self.msg["Resent-Message-ID"]
+        if x is None:
+            # If Resent-Message-ID is missing too, forge a new Message-ID
+            # following a simpler version of
+            # https://datatracker.ietf.org/doc/html/draft-ietf-usefor-message-id-00#section-3
+            date_part = re.sub('[^A-Z0-9]', '', str(self.forgiving_date_decode(self.decode_mime_header(self.get_mandatory('Date')))))
+            random_part = random.getrandbits(64)
+            from_fqdn = self.decode_mime_header(self.get_mandatory('From'), True).split('@')
+            if len(from_fqdn) > 1:
+                fqdn = from_fqdn[1]
+            else:
+                fqdn = ""
+            x = "<" + str(date_part) + "." + str(random_part) + "@" + fqdn + ">"
+        return x
+
     def get_mandatory(self, fieldname):
         try:
             x = self.msg[fieldname]
-- 
2.33.1

