From 8228c2222fcf5791fe5643252e4d248839c199e9 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Thu, 16 Nov 2023 10:42:10 -0600
Subject: [PATCH] Try both utf-8 and windows-1252 for decoding email

Recent submissions from Cirrus were classified as spam by the lore
analysis robot script.  This is because cirrus used windows-1252 for
the encoding which failed to decode as utf-8.

Try both encodings when decoding email.

Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
 contrib/process_linux_firmware.py | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/contrib/process_linux_firmware.py b/contrib/process_linux_firmware.py
index 668e35c0..ea108391 100755
--- a/contrib/process_linux_firmware.py
+++ b/contrib/process_linux_firmware.py
@@ -34,6 +34,8 @@ content_types = {
 def classify_content(content):
     # load content into the email library
     msg = email.message_from_string(content)
+    decoded = None
+    body = None
 
     # check the subject
     subject = msg["Subject"]
@@ -42,17 +44,28 @@ def classify_content(content):
     if "PATCH" in subject:
         return ContentType.PATCH
 
-    for part in msg.walk():
-        if part.get_content_type() == "text/plain":
+    if msg.is_multipart():
+        for part in msg.walk():
+            if part.get_content_type() == "text/plain":
+                body = part.get_payload(decode=True)
+    else:
+        body = msg.get_payload(decode=True)
+
+    if body:
+        for encoding in ["utf-8", "windows-1252"]:
             try:
-                body = part.get_payload(decode=True).decode("utf-8")
-                for key in content_types.keys():
-                    if key in body:
-                        return content_types[key]
-                break
-            except UnicodeDecodeError as e:
-                logging.warning("Failed to decode email: %s, treating as SPAM" % e)
+                decoded = body.decode(encoding)
                 break
+            except UnicodeDecodeError:
+                pass
+
+    if decoded:
+        for key in content_types.keys():
+            if key in decoded:
+                return content_types[key]
+    else:
+        logging.warning("Failed to decode email: %s, treating as SPAM", body)
+
     return ContentType.SPAM