Trap internal AssertionError from python libraries
authorMagnus Hagander <magnus@hagander.net>
Thu, 3 Jan 2019 10:14:30 +0000 (11:14 +0100)
committerMagnus Hagander <magnus@hagander.net>
Fri, 4 Jan 2019 11:23:39 +0000 (12:23 +0100)
For some really broken messages, we end up in a cannot-happen codepath.
Trap this one and just consider that MIME part empty, and try again
later.

In passing, also change it so we continue loading after failures of parsing.
We continued in the mode where we just generated diffs, but not when
making updates. Now continue in both cases, but of course don't do the
actual update if the parsing failed.

loader/lib/parser.py
loader/reparse_message.py

index 8ee25c5ad3a339467916d1efe98f688738c8be53..b97c8b338bba8c0b86792ad06d7aa1747d416f55 100644 (file)
@@ -124,7 +124,14 @@ class ArchivesParser(object):
                return charset
 
        def get_payload_as_unicode(self, msg):
-               b = msg.get_payload(decode=True)
+               try:
+                       b = msg.get_payload(decode=True)
+               except AssertionError:
+                       # Badly encoded data can throw an exception here, where the python
+                       # libraries fail to handle it and enters a cannot-happen path.
+                       # In which case we just ignore it and hope for a better MIME part later.
+                       b = None
+
                if b:
                        # Find out if there is a charset
                        charset = None
@@ -303,8 +310,15 @@ class ArchivesParser(object):
                                return
                        # For now, accept anything not text/plain
                        if container.get_content_type() != 'text/plain':
-                               self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
+                               try:
+                                       self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
+                               except AssertionError:
+                                       # Badly encoded data can throw an exception here, where the python
+                                       # libraries fail to handle it and enters a cannot-happen path.
+                                       # In which case we just ignore this attachment.
+                                       return
                                return
+
                        # It's a text/plain, it might be worthwhile.
                        # If it has a name, we consider it an attachments
                        if not container.get_params():
@@ -312,19 +326,42 @@ class ArchivesParser(object):
                        for k,v in container.get_params():
                                if k=='name' and v != '':
                                        # Yes, it has a name
-                                       self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
+                                       try:
+                                               self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
+                                       except AssertionError:
+                                               # Badly encoded data can throw an exception here, where the python
+                                               # libraries fail to handle it and enters a cannot-happen path.
+                                               # In which case we just ignore this attachment.
+                                               return
+
                                        return
+
                        # If it's content-disposition=attachment, we also want to save it
                        if 'Content-Disposition' in container and container['Content-Disposition'].startswith('attachment'):
-                               self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
+                               try:
+                                       self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
+                               except AssertionError:
+                                       # Badly encoded data can throw an exception here, where the python
+                                       # libraries fail to handle it and enters a cannot-happen path.
+                                       # In which case we just ignore this attachment.
+                                       return
+
                                return
+
                        # If we have already found one text/plain part, make all
                        # further text/plain parts attachments
                        if self.attachments_found_first_plaintext:
                                # However, this will also *always* catch the MIME part added
                                # by majordomo with the footer. So if that one is present,
                                # we need to explicitly exclude it again.
-                               b = container.get_payload(decode=True)
+                               try:
+                                       b = container.get_payload(decode=True)
+                               except AssertionError:
+                                       # Badly encoded data can throw an exception here, where the python
+                                       # libraries fail to handle it and enters a cannot-happen path.
+                                       # In which case we just ignore this attachment.
+                                       return
+
                                if isinstance(b, str) and not self._re_footer.match(b):
                                        # We know there is no name for this one
                                        self.attachments.append((None, container.get_content_type(), b))
index df4501a307b0e4670a31eb690614d57b9ed1dd9e..ed4def2f0e000c88aa09ca6c10883f72c3765011 100755 (executable)
@@ -102,8 +102,9 @@ if __name__ == "__main__":
                        ap.analyze(date_override=opt.force_date)
                except IgnorableException as e:
                        if opt.update:
-                               raise e
-                       f.write("Exception loading %s: %s" % (id, e))
+                               print("Exception loading {0}: {1}".format(id, e))
+                       else:
+                               f.write("Exception loading %s: %s" % (id, e))
                        continue
 
                if opt.update: