Tabs to 4 spaces

author Magnus Hagander <magnus@hagander.net>

Thu, 3 Jan 2019 20:15:38 +0000 (21:15 +0100)

committer Magnus Hagander <magnus@hagander.net>

Fri, 4 Jan 2019 11:24:06 +0000 (12:24 +0100)
author Magnus Hagander <magnus@hagander.net>
Thu, 3 Jan 2019 20:15:38 +0000 (21:15 +0100)
committer Magnus Hagander <magnus@hagander.net>
Fri, 4 Jan 2019 11:24:06 +0000 (12:24 +0100)
diff --git a/django/archives/auth.py b/django/archives/auth.py

index c24faa1fcae42d1b0e281fbb0b0e73f640d628a2..fd937902d5330375e022b9cb692f10b1a5a98e46 100644 (file)
--- a/django/archives/auth.py
+++ b/django/archives/auth.py
@@ -36,10 +36,10 @@ from Crypto import Random
  import time
  
  class AuthBackend(ModelBackend):
-       # We declare a fake backend that always fails direct authentication -
-       # since we should never be using direct authentication in the first place!
-       def authenticate(self, username=None, password=None):
-               raise Exception("Direct authentication not supported")
+    # We declare a fake backend that always fails direct authentication -
+    # since we should never be using direct authentication in the first place!
+    def authenticate(self, username=None, password=None):
+        raise Exception("Direct authentication not supported")
  
  
  ####
@@ -48,90 +48,90 @@ class AuthBackend(ModelBackend):
  
  # Handle login requests by sending them off to the main site
  def login(request):
-       if not hasattr(settings, 'PGAUTH_REDIRECT'):
-               # No pgauth installed, so allow local installs.
-               from django.contrib.auth.views import login
-               return login(request, template_name='admin.html')
-
-       if 'next' in request.GET:
-               # Put together an url-encoded dict of parameters we're getting back,
-               # including a small nonce at the beginning to make sure it doesn't
-               # encrypt the same way every time.
-               s = "t=%s&%s" % (int(time.time()), urlencode({'r': request.GET['next']}))
-               # Now encrypt it
-               r = Random.new()
-               iv = r.read(16)
-               encryptor = AES.new(SHA.new(settings.SECRET_KEY.encode('ascii')).digest()[:16], AES.MODE_CBC, iv)
-               cipher = encryptor.encrypt(s + ' ' * (16-(len(s) % 16))) # pad to 16 bytes
-
-               return HttpResponseRedirect("%s?d=%s$%s" % (
-                               settings.PGAUTH_REDIRECT,
-                           base64.b64encode(iv, b"-_").decode('utf8'),
-                           base64.b64encode(cipher, b"-_").decode('utf8'),
-                               ))
-       else:
-               return HttpResponseRedirect(settings.PGAUTH_REDIRECT)
+    if not hasattr(settings, 'PGAUTH_REDIRECT'):
+        # No pgauth installed, so allow local installs.
+        from django.contrib.auth.views import login
+        return login(request, template_name='admin.html')
+
+    if 'next' in request.GET:
+        # Put together an url-encoded dict of parameters we're getting back,
+        # including a small nonce at the beginning to make sure it doesn't
+        # encrypt the same way every time.
+        s = "t=%s&%s" % (int(time.time()), urlencode({'r': request.GET['next']}))
+        # Now encrypt it
+        r = Random.new()
+        iv = r.read(16)
+        encryptor = AES.new(SHA.new(settings.SECRET_KEY.encode('ascii')).digest()[:16], AES.MODE_CBC, iv)
+        cipher = encryptor.encrypt(s + ' ' * (16-(len(s) % 16))) # pad to 16 bytes
+
+        return HttpResponseRedirect("%s?d=%s$%s" % (
+                settings.PGAUTH_REDIRECT,
+                base64.b64encode(iv, b"-_").decode('utf8'),
+                base64.b64encode(cipher, b"-_").decode('utf8'),
+                ))
+    else:
+        return HttpResponseRedirect(settings.PGAUTH_REDIRECT)
  
  # Handle logout requests by logging out of this site and then
  # redirecting to log out from the main site as well.
  def logout(request):
-       if request.user.is_authenticated():
-               django_logout(request)
-       return HttpResponseRedirect("%slogout/" % settings.PGAUTH_REDIRECT)
+    if request.user.is_authenticated():
+        django_logout(request)
+    return HttpResponseRedirect("%slogout/" % settings.PGAUTH_REDIRECT)
  
  # Receive an authentication response from the main website and try
  # to log the user in.
  def auth_receive(request):
-       if 's' in request.GET and request.GET['s'] == "logout":
-               # This was a logout request
-               return HttpResponseRedirect('/')
-
-       if 'i' not in request.GET:
-               return HttpResponse("Missing IV in url!", status=400)
-       if 'd' not in request.GET:
-               return HttpResponse("Missing data in url!", status=400)
-
-       # Set up an AES object and decrypt the data we received
-       decryptor = AES.new(base64.b64decode(settings.PGAUTH_KEY),
-                                               AES.MODE_CBC,
-                                               base64.b64decode(str(request.GET['i']), "-_"))
-       s = decryptor.decrypt(base64.b64decode(str(request.GET['d']), "-_")).rstrip(b' ').decode('utf8')
-
-       # Now un-urlencode it
-       try:
-               data = parse_qs(s, strict_parsing=True)
-       except ValueError:
-               return HttpResponse("Invalid encrypted data received.", status=400)
-
-       # Check the timestamp in the authentication
-       if (int(data['t'][0]) < time.time() - 10):
-               return HttpResponse("Authentication token too old.", status=400)
-
-       # Update the user record (if any)
-       try:
-               user = User.objects.get(username=data['u'][0])
-               # User found, let's see if any important fields have changed
-               changed = False
-               if user.first_name != data['f'][0]:
-                       user.first_name = data['f'][0]
-                       changed = True
-               if user.last_name != data['l'][0]:
-                       user.last_name = data['l'][0]
-                       changed = True
-               if user.email != data['e'][0]:
-                       user.email = data['e'][0]
-                       changed= True
-               if changed:
-                       user.save()
-       except User.DoesNotExist:
-               # User not found, create it!
-
-               # NOTE! We have some legacy users where there is a user in
-               # the database with a different userid. Instead of trying to
-               # somehow fix that live, give a proper error message and
-               # have somebody look at it manually.
-               if User.objects.filter(email=data['e'][0]).exists():
-                       return HttpResponse("""A user with email %s already exists, but with
+    if 's' in request.GET and request.GET['s'] == "logout":
+        # This was a logout request
+        return HttpResponseRedirect('/')
+
+    if 'i' not in request.GET:
+        return HttpResponse("Missing IV in url!", status=400)
+    if 'd' not in request.GET:
+        return HttpResponse("Missing data in url!", status=400)
+
+    # Set up an AES object and decrypt the data we received
+    decryptor = AES.new(base64.b64decode(settings.PGAUTH_KEY),
+                        AES.MODE_CBC,
+                        base64.b64decode(str(request.GET['i']), "-_"))
+    s = decryptor.decrypt(base64.b64decode(str(request.GET['d']), "-_")).rstrip(b' ').decode('utf8')
+
+    # Now un-urlencode it
+    try:
+        data = parse_qs(s, strict_parsing=True)
+    except ValueError:
+        return HttpResponse("Invalid encrypted data received.", status=400)
+
+    # Check the timestamp in the authentication
+    if (int(data['t'][0]) < time.time() - 10):
+        return HttpResponse("Authentication token too old.", status=400)
+
+    # Update the user record (if any)
+    try:
+        user = User.objects.get(username=data['u'][0])
+        # User found, let's see if any important fields have changed
+        changed = False
+        if user.first_name != data['f'][0]:
+            user.first_name = data['f'][0]
+            changed = True
+        if user.last_name != data['l'][0]:
+            user.last_name = data['l'][0]
+            changed = True
+        if user.email != data['e'][0]:
+            user.email = data['e'][0]
+            changed= True
+        if changed:
+            user.save()
+    except User.DoesNotExist:
+        # User not found, create it!
+
+        # NOTE! We have some legacy users where there is a user in
+        # the database with a different userid. Instead of trying to
+        # somehow fix that live, give a proper error message and
+        # have somebody look at it manually.
+        if User.objects.filter(email=data['e'][0]).exists():
+            return HttpResponse("""A user with email %s already exists, but with
  a different username than %s.
  
  This is almost certainly caused by some legacy data in our database.
@@ -142,51 +142,51 @@ for you.
  We apologize for the inconvenience.
  """ % (data['e'][0], data['u'][0]), content_type='text/plain')
  
-               if hasattr(settings, 'PGAUTH_CREATEUSER_CALLBACK'):
-                       res = getattr(settings, 'PGAUTH_CREATEUSER_CALLBACK')(
-                               data['u'][0],
-                               data['e'][0],
-                               ['f'][0],
-                               data['l'][0],
-                       )
-                       # If anything is returned, we'll return that as our result.
-                       # If None is returned, it means go ahead and create the user.
-                       if res:
-                               return res
-
-               user = User(username=data['u'][0],
-                                       first_name=data['f'][0],
-                                       last_name=data['l'][0],
-                                       email=data['e'][0],
-                                       password='setbypluginnotasha1',
-                                       )
-               user.save()
-
-       # Ok, we have a proper user record. Now tell django that
-       # we're authenticated so it persists it in the session. Before
-       # we do that, we have to annotate it with the backend information.
-       user.backend = "%s.%s" % (AuthBackend.__module__, AuthBackend.__name__)
-       django_login(request, user)
-
-       # Finally, check of we have a data package that tells us where to
-       # redirect the user.
-       if 'd' in data:
-               (ivs, datas) = data['d'][0].split('$')
-               decryptor = AES.new(SHA.new(settings.SECRET_KEY.encode('ascii')).digest()[:16],
-                                                       AES.MODE_CBC,
-                                                       base64.b64decode(ivs, b"-_"))
-               s = decryptor.decrypt(base64.b64decode(datas, "-_")).rstrip(b' ').decode('utf8')
-               try:
-                       rdata = parse_qs(s, strict_parsing=True)
-               except ValueError:
-                       return HttpResponse("Invalid encrypted data received.", status=400)
-               if 'r' in rdata:
-                       # Redirect address
-                       return HttpResponseRedirect(rdata['r'][0])
-       # No redirect specified, see if we have it in our settings
-       if hasattr(settings, 'PGAUTH_REDIRECT_SUCCESS'):
-               return HttpResponseRedirect(settings.PGAUTH_REDIRECT_SUCCESS)
-       return HttpResponse("Authentication successful, but don't know where to redirect!", status=500)
+        if hasattr(settings, 'PGAUTH_CREATEUSER_CALLBACK'):
+            res = getattr(settings, 'PGAUTH_CREATEUSER_CALLBACK')(
+                data['u'][0],
+                data['e'][0],
+                ['f'][0],
+                data['l'][0],
+            )
+            # If anything is returned, we'll return that as our result.
+            # If None is returned, it means go ahead and create the user.
+            if res:
+                return res
+
+        user = User(username=data['u'][0],
+                    first_name=data['f'][0],
+                    last_name=data['l'][0],
+                    email=data['e'][0],
+                    password='setbypluginnotasha1',
+                    )
+        user.save()
+
+    # Ok, we have a proper user record. Now tell django that
+    # we're authenticated so it persists it in the session. Before
+    # we do that, we have to annotate it with the backend information.
+    user.backend = "%s.%s" % (AuthBackend.__module__, AuthBackend.__name__)
+    django_login(request, user)
+
+    # Finally, check of we have a data package that tells us where to
+    # redirect the user.
+    if 'd' in data:
+        (ivs, datas) = data['d'][0].split('$')
+        decryptor = AES.new(SHA.new(settings.SECRET_KEY.encode('ascii')).digest()[:16],
+                            AES.MODE_CBC,
+                            base64.b64decode(ivs, b"-_"))
+        s = decryptor.decrypt(base64.b64decode(datas, "-_")).rstrip(b' ').decode('utf8')
+        try:
+            rdata = parse_qs(s, strict_parsing=True)
+        except ValueError:
+            return HttpResponse("Invalid encrypted data received.", status=400)
+        if 'r' in rdata:
+            # Redirect address
+            return HttpResponseRedirect(rdata['r'][0])
+    # No redirect specified, see if we have it in our settings
+    if hasattr(settings, 'PGAUTH_REDIRECT_SUCCESS'):
+        return HttpResponseRedirect(settings.PGAUTH_REDIRECT_SUCCESS)
+    return HttpResponse("Authentication successful, but don't know where to redirect!", status=500)
  
  
  # Perform a search in the central system. Note that the results are returned as an
@@ -197,29 +197,29 @@ We apologize for the inconvenience.
  # Unlike the authentication, searching does not involve the browser - we just make
  # a direct http call.
  def user_search(searchterm=None, userid=None):
-       # If upsteam isn't responding quickly, it's not going to respond at all, and
-       # 10 seconds is already quite long.
-       socket.setdefaulttimeout(10)
-       if userid:
-               q = {'u': userid}
-       else:
-               q = {'s': searchterm}
-
-       u = urllib.request.urlopen('%ssearch/?%s' % (
-               settings.PGAUTH_REDIRECT,
-               urlencode(q),
-               ))
-       (ivs, datas) = u.read().split('&')
-       u.close()
-
-       # Decryption time
-       decryptor = AES.new(base64.b64decode(settings.PGAUTH_KEY),
-                                               AES.MODE_CBC,
-                                               base64.b64decode(ivs, "-_"))
-       s = decryptor.decrypt(base64.b64decode(datas, "-_")).rstrip(' ')
-       j = json.loads(s)
-
-       return j
+    # If upsteam isn't responding quickly, it's not going to respond at all, and
+    # 10 seconds is already quite long.
+    socket.setdefaulttimeout(10)
+    if userid:
+        q = {'u': userid}
+    else:
+        q = {'s': searchterm}
+
+    u = urllib.request.urlopen('%ssearch/?%s' % (
+        settings.PGAUTH_REDIRECT,
+        urlencode(q),
+        ))
+    (ivs, datas) = u.read().split('&')
+    u.close()
+
+    # Decryption time
+    decryptor = AES.new(base64.b64decode(settings.PGAUTH_KEY),
+                        AES.MODE_CBC,
+                        base64.b64decode(ivs, "-_"))
+    s = decryptor.decrypt(base64.b64decode(datas, "-_")).rstrip(' ')
+    j = json.loads(s)
+
+    return j
  
  # Import a user into the local authentication system. Will initially
  # make a search for it, and if anything other than one entry is returned
@@ -230,18 +230,18 @@ def user_search(searchterm=None, userid=None):
  # The call to this function should normally be wrapped in a transaction,
  # and this function itself will make no attempt to do anything about that.
  def user_import(uid):
-       u = user_search(userid=uid)
-       if len(u) != 1:
-               raise Exception("Internal error, duplicate or no user found")
+    u = user_search(userid=uid)
+    if len(u) != 1:
+        raise Exception("Internal error, duplicate or no user found")
  
-       u = u[0]
+    u = u[0]
  
-       if User.objects.filter(username=u['u']).exists():
-               raise Exception("User already exists")
+    if User.objects.filter(username=u['u']).exists():
+        raise Exception("User already exists")
  
-       User(username=u['u'],
-                first_name=u['f'],
-                last_name=u['l'],
-                email=u['e'],
-                password='setbypluginnotsha1',
-                ).save()
+    User(username=u['u'],
+         first_name=u['f'],
+         last_name=u['l'],
+         email=u['e'],
+         password='setbypluginnotsha1',
+         ).save()
diff --git a/django/archives/mailarchives/api.py b/django/archives/mailarchives/api.py

index 0ec94e1a6be9cd508db497671178b1d8d38234f3..e8e0891bfdac258d35da2d2f51ec456341e0f633 100644 (file)
--- a/django/archives/mailarchives/api.py
+++ b/django/archives/mailarchives/api.py
@@ -10,126 +10,126 @@ import json
  
  @cache(hours=4)
  def listinfo(request):
-       if not settings.PUBLIC_ARCHIVES:
-               return HttpResponseForbidden('No API access on private archives for now')
+    if not settings.PUBLIC_ARCHIVES:
+        return HttpResponseForbidden('No API access on private archives for now')
  
-       if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS:
-               return HttpResponseForbidden('Invalid host')
+    if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS:
+        return HttpResponseForbidden('Invalid host')
  
-       resp = HttpResponse(content_type='application/json')
-       json.dump([{
-               'name': l.listname,
-               'shortdesc': l.shortdesc,
-               'description': l.description,
-               'active': l.active,
-               'group': l.group.groupname,
-               } for l in List.objects.select_related('group').all()], resp)
+    resp = HttpResponse(content_type='application/json')
+    json.dump([{
+        'name': l.listname,
+        'shortdesc': l.shortdesc,
+        'description': l.description,
+        'active': l.active,
+        'group': l.group.groupname,
+        } for l in List.objects.select_related('group').all()], resp)
  
-       return resp
+    return resp
  
  @cache(hours=4)
  def latest(request, listname):
-       if not settings.PUBLIC_ARCHIVES:
-               return HttpResponseForbidden('No API access on private archives for now')
-
-       if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS:
-               return HttpResponseForbidden('Invalid host')
-
-       # Return the latest <n> messages on this list.
-       # If <n> is not specified, return 50. Max value for <n> is 100.
-       if 'n' in request.GET:
-               try:
-                       limit = int(request.GET['n'])
-               except:
-                       limit = 0
-       else:
-               limit = 50
-       if limit <= 0 or limit > 100:
-               limit = 50
-
-       extrawhere=[]
-       extraparams=[]
-
-       # Return only messages that have attachments?
-       if 'a' in request.GET:
-               if request.GET['a'] == '1':
-                       extrawhere.append("has_attachment")
-
-       # Restrict by full text search
-       if 's' in request.GET and request.GET['s']:
-               extrawhere.append("fti @@ plainto_tsquery('public.pg', %s)")
-               extraparams.append(request.GET['s'])
-
-       if listname != '*':
-               list = get_object_or_404(List, listname=listname)
-               extrawhere.append("threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % list.listid)
-       else:
-               list = None
-               extrawhere=''
-
-       mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().extra(where=extrawhere, params=extraparams).order_by('-date')[:limit]
-       allyearmonths = set([(m.date.year, m.date.month) for m in mlist])
-
-       resp = HttpResponse(content_type='application/json')
-       json.dump([
-               {'msgid': m.messageid,
-                'date': m.date.isoformat(),
-                'from': m.mailfrom,
-                'subj': m.subject,}
-               for m in mlist], resp)
-
-       # Make sure this expires from the varnish cache when new entries show
-       # up in this month.
-       # XXX: need to deal with the global view, but for now API callers come in directly
-       if list:
-               resp['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (list.listid, year, month) for year, month in allyearmonths]))
-       return resp
+    if not settings.PUBLIC_ARCHIVES:
+        return HttpResponseForbidden('No API access on private archives for now')
+
+    if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS:
+        return HttpResponseForbidden('Invalid host')
+
+    # Return the latest <n> messages on this list.
+    # If <n> is not specified, return 50. Max value for <n> is 100.
+    if 'n' in request.GET:
+        try:
+            limit = int(request.GET['n'])
+        except:
+            limit = 0
+    else:
+        limit = 50
+    if limit <= 0 or limit > 100:
+        limit = 50
+
+    extrawhere=[]
+    extraparams=[]
+
+    # Return only messages that have attachments?
+    if 'a' in request.GET:
+        if request.GET['a'] == '1':
+            extrawhere.append("has_attachment")
+
+    # Restrict by full text search
+    if 's' in request.GET and request.GET['s']:
+        extrawhere.append("fti @@ plainto_tsquery('public.pg', %s)")
+        extraparams.append(request.GET['s'])
+
+    if listname != '*':
+        list = get_object_or_404(List, listname=listname)
+        extrawhere.append("threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % list.listid)
+    else:
+        list = None
+        extrawhere=''
+
+    mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().extra(where=extrawhere, params=extraparams).order_by('-date')[:limit]
+    allyearmonths = set([(m.date.year, m.date.month) for m in mlist])
+
+    resp = HttpResponse(content_type='application/json')
+    json.dump([
+        {'msgid': m.messageid,
+         'date': m.date.isoformat(),
+         'from': m.mailfrom,
+         'subj': m.subject,}
+        for m in mlist], resp)
+
+    # Make sure this expires from the varnish cache when new entries show
+    # up in this month.
+    # XXX: need to deal with the global view, but for now API callers come in directly
+    if list:
+        resp['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (list.listid, year, month) for year, month in allyearmonths]))
+    return resp
  
  
  @cache(hours=4)
  def thread(request, msgid):
-       if not settings.PUBLIC_ARCHIVES:
-               return HttpResponseForbidden('No API access on private archives for now')
-
-       if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS:
-               return HttpResponseForbidden('Invalid host')
-
-       # Return metadata about a single thread. A list of all the emails
-       # that are in the thread with their basic attributes are included.
-       msg = get_object_or_404(Message, messageid=msgid)
-       mlist = Message.objects.defer('bodytxt', 'cc', 'to').filter(threadid=msg.threadid)
-
-       resp = HttpResponse(content_type='application/json')
-       json.dump([
-               {'msgid': m.messageid,
-                'date': m.date.isoformat(),
-                'from': m.mailfrom,
-                'subj': m.subject,
-                'atts': [{'id': a.id, 'name': a.filename} for a in m.attachment_set.all()],
-        }
-               for m in mlist], resp)
-       resp['X-pgthread'] = m.threadid
-       return resp
+    if not settings.PUBLIC_ARCHIVES:
+        return HttpResponseForbidden('No API access on private archives for now')
+
+    if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS:
+        return HttpResponseForbidden('Invalid host')
+
+    # Return metadata about a single thread. A list of all the emails
+    # that are in the thread with their basic attributes are included.
+    msg = get_object_or_404(Message, messageid=msgid)
+    mlist = Message.objects.defer('bodytxt', 'cc', 'to').filter(threadid=msg.threadid)
+
+    resp = HttpResponse(content_type='application/json')
+    json.dump([
+        {'msgid': m.messageid,
+         'date': m.date.isoformat(),
+         'from': m.mailfrom,
+         'subj': m.subject,
+         'atts': [{'id': a.id, 'name': a.filename} for a in m.attachment_set.all()],
+     }
+        for m in mlist], resp)
+    resp['X-pgthread'] = m.threadid
+    return resp
  
  def thread_subscribe(request, msgid):
-       if not settings.PUBLIC_ARCHIVES:
-               return HttpResponseForbidden('No API access on private archives for now')
+    if not settings.PUBLIC_ARCHIVES:
+        return HttpResponseForbidden('No API access on private archives for now')
  
-       if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS:
-               return HttpResponseForbidden('Invalid host')
+    if not request.META['REMOTE_ADDR'] in settings.API_CLIENTS:
+        return HttpResponseForbidden('Invalid host')
  
-       if 'HTTP_X_APIKEY' not in request.META:
-               return HttpResponseForbidden('No API key')
+    if 'HTTP_X_APIKEY' not in request.META:
+        return HttpResponseForbidden('No API key')
  
-       if request.method != 'PUT':
-               return HttpResponseForbidden('Invalid HTTP verb')
+    if request.method != 'PUT':
+        return HttpResponseForbidden('Invalid HTTP verb')
  
-       apiclient = get_object_or_404(ApiClient, apikey=request.META['HTTP_X_APIKEY'])
-       msg = get_object_or_404(Message, messageid=msgid)
+    apiclient = get_object_or_404(ApiClient, apikey=request.META['HTTP_X_APIKEY'])
+    msg = get_object_or_404(Message, messageid=msgid)
  
-       (obj, created) = ThreadSubscription.objects.get_or_create(apiclient=apiclient,
-                                                                                                                         threadid=msg.threadid)
-       if created:
-               return HttpResponse(status=201)
-       else:
-               return HttpResponse(status=200)
+    (obj, created) = ThreadSubscription.objects.get_or_create(apiclient=apiclient,
+                                                              threadid=msg.threadid)
+    if created:
+        return HttpResponse(status=201)
+    else:
+        return HttpResponse(status=200)
diff --git a/django/archives/mailarchives/models.py b/django/archives/mailarchives/models.py

index 6270974dc7f5cf57d4e7f5c9a221bb7f9d9f23f9..ca8e88f303e9605b5c89c52789e416239c9b94f7 100644 (file)
--- a/django/archives/mailarchives/models.py
+++ b/django/archives/mailarchives/models.py
@@ -4,128 +4,128 @@ from django.db import models
  # We're intentionally putting the prefix text in the array here, since
  # we might need that flexibility in the future.
  hide_reasons = [
-       None,                                # placeholder for 0
-       'This message has been hidden because a virus was found in the message.', # 1
-       'This message has been hidden because the message violated policies.',    # 2
-       'This message has been hidden because for privacy reasons.',              # 3
-       'This message was corrupt',                                               # 4
-       ]
+    None,                                # placeholder for 0
+    'This message has been hidden because a virus was found in the message.', # 1
+    'This message has been hidden because the message violated policies.',    # 2
+    'This message has been hidden because for privacy reasons.',              # 3
+    'This message was corrupt',                                               # 4
+    ]
  
  
  class Message(models.Model):
-       threadid = models.IntegerField(null=False, blank=False)
-       mailfrom = models.TextField(null=False, db_column='_from')
-       to = models.TextField(null=False, db_column='_to')
-       cc = models.TextField(null=False)
-       subject = models.TextField(null=False)
-       date = models.DateTimeField(null=False)
-       messageid = models.TextField(null=False)
-       bodytxt = models.TextField(null=False)
-       # rawtxt is a bytea field, which django doesn't support (easily)
-       parentid = models.IntegerField(null=False, blank=False)
-       has_attachment = models.BooleanField(null=False, default=False)
-       hiddenstatus = models.IntegerField(null=True)
-       # fti is a tsvector field, which django doesn't support (easily)
-
-       class Meta:
-               db_table = 'messages'
-
-       @property
-       def printdate(self):
-               return self.date.strftime("%Y-%m-%d %H:%M:%S")
-
-       @property
-       def shortdate(self):
-               return self.date.strftime("%Y%m%d%H%M")
-
-       # We explicitly cache the attachments here, so we can use them
-       # multiple times from templates without generating multiple queries
-       # to the database.
-       _attachments = None
-       @property
-       def attachments(self):
-               if not self._attachments:
-                       self._attachments = self.attachment_set.extra(select={'len': 'length(attachment)'}).all()
-               return self._attachments
-
-       @property
-       def hiddenreason(self):
-               if not self.hiddenstatus: return None
-               try:
-                       return hide_reasons[self.hiddenstatus]
-               except:
-                       # Weird value
-                       return 'This message has been hidden.'
+    threadid = models.IntegerField(null=False, blank=False)
+    mailfrom = models.TextField(null=False, db_column='_from')
+    to = models.TextField(null=False, db_column='_to')
+    cc = models.TextField(null=False)
+    subject = models.TextField(null=False)
+    date = models.DateTimeField(null=False)
+    messageid = models.TextField(null=False)
+    bodytxt = models.TextField(null=False)
+    # rawtxt is a bytea field, which django doesn't support (easily)
+    parentid = models.IntegerField(null=False, blank=False)
+    has_attachment = models.BooleanField(null=False, default=False)
+    hiddenstatus = models.IntegerField(null=True)
+    # fti is a tsvector field, which django doesn't support (easily)
+
+    class Meta:
+        db_table = 'messages'
+
+    @property
+    def printdate(self):
+        return self.date.strftime("%Y-%m-%d %H:%M:%S")
+
+    @property
+    def shortdate(self):
+        return self.date.strftime("%Y%m%d%H%M")
+
+    # We explicitly cache the attachments here, so we can use them
+    # multiple times from templates without generating multiple queries
+    # to the database.
+    _attachments = None
+    @property
+    def attachments(self):
+        if not self._attachments:
+            self._attachments = self.attachment_set.extra(select={'len': 'length(attachment)'}).all()
+        return self._attachments
+
+    @property
+    def hiddenreason(self):
+        if not self.hiddenstatus: return None
+        try:
+            return hide_reasons[self.hiddenstatus]
+        except:
+            # Weird value
+            return 'This message has been hidden.'
  
  class ListGroup(models.Model):
-       groupid = models.IntegerField(null=False, primary_key=True)
-       groupname = models.CharField(max_length=200, null=False, blank=False)
-       sortkey = models.IntegerField(null=False)
+    groupid = models.IntegerField(null=False, primary_key=True)
+    groupname = models.CharField(max_length=200, null=False, blank=False)
+    sortkey = models.IntegerField(null=False)
  
-       class Meta:
-               db_table = 'listgroups'
+    class Meta:
+        db_table = 'listgroups'
  
  class List(models.Model):
-       listid = models.IntegerField(null=False, primary_key=True)
-       listname = models.CharField(max_length=200, null=False, blank=False, unique=True)
-       shortdesc = models.TextField(null=False, blank=False)
-       description = models.TextField(null=False, blank=False)
-       active = models.BooleanField(null=False, blank=False)
-       group = models.ForeignKey(ListGroup, db_column='groupid')
-       subscriber_access = models.BooleanField(null=False, blank=False, default=False, help_text="Subscribers can access contents (default is admins only)")
+    listid = models.IntegerField(null=False, primary_key=True)
+    listname = models.CharField(max_length=200, null=False, blank=False, unique=True)
+    shortdesc = models.TextField(null=False, blank=False)
+    description = models.TextField(null=False, blank=False)
+    active = models.BooleanField(null=False, blank=False)
+    group = models.ForeignKey(ListGroup, db_column='groupid')
+    subscriber_access = models.BooleanField(null=False, blank=False, default=False, help_text="Subscribers can access contents (default is admins only)")
  
  
-       @property
-       def maybe_shortdesc(self):
-               if self.shortdesc:
-                       return self.shortdesc
-               return self.listname
+    @property
+    def maybe_shortdesc(self):
+        if self.shortdesc:
+            return self.shortdesc
+        return self.listname
  
-       class Meta:
-               db_table = 'lists'
+    class Meta:
+        db_table = 'lists'
  
  class Attachment(models.Model):
-       message = models.ForeignKey(Message, null=False, blank=False, db_column='message')
-       filename = models.CharField(max_length=1000, null=False, blank=False)
-       contenttype = models.CharField(max_length=1000, null=False, blank=False)
-       # attachment = bytea, not supported by django at this point
+    message = models.ForeignKey(Message, null=False, blank=False, db_column='message')
+    filename = models.CharField(max_length=1000, null=False, blank=False)
+    contenttype = models.CharField(max_length=1000, null=False, blank=False)
+    # attachment = bytea, not supported by django at this point
  
-       class Meta:
-               db_table = 'attachments'
-               # Predictable same-as-insert order
-               ordering = ('id',)
+    class Meta:
+        db_table = 'attachments'
+        # Predictable same-as-insert order
+        ordering = ('id',)
  
-       def inlineable(self):
-               # Return True if this image should be inlined
-               if self.contenttype in ('image/png', 'image/gif', 'image/jpg', 'image/jpeg'):
-                       # Note! len needs to be set with extra(select=)
-                       if self.len < 75000:
-                               return True
-               return False
+    def inlineable(self):
+        # Return True if this image should be inlined
+        if self.contenttype in ('image/png', 'image/gif', 'image/jpg', 'image/jpeg'):
+            # Note! len needs to be set with extra(select=)
+            if self.len < 75000:
+                return True
+        return False
  
  
  class ListSubscriber(models.Model):
-       # Only used when public access is not allowed.
-       # We set the username of the community account instead of a
-       # foreign key, because the user might not exist.
-       list = models.ForeignKey(List, null=False, blank=False)
-       username = models.CharField(max_length=30, null=False, blank=False)
+    # Only used when public access is not allowed.
+    # We set the username of the community account instead of a
+    # foreign key, because the user might not exist.
+    list = models.ForeignKey(List, null=False, blank=False)
+    username = models.CharField(max_length=30, null=False, blank=False)
  
-       class Meta:
-               unique_together = (('list', 'username'), )
-               db_table = 'listsubscribers'
+    class Meta:
+        unique_together = (('list', 'username'), )
+        db_table = 'listsubscribers'
  
  class ApiClient(models.Model):
-       apikey = models.CharField(max_length=100, null=False, blank=False)
-       postback = models.URLField(max_length=500, null=False, blank=False)
+    apikey = models.CharField(max_length=100, null=False, blank=False)
+    postback = models.URLField(max_length=500, null=False, blank=False)
  
-       class Meta:
-               db_table = 'apiclients'
+    class Meta:
+        db_table = 'apiclients'
  
  class ThreadSubscription(models.Model):
-       apiclient = models.ForeignKey(ApiClient, null=False, blank=False)
-       threadid = models.IntegerField(null=False, blank=False)
+    apiclient = models.ForeignKey(ApiClient, null=False, blank=False)
+    threadid = models.IntegerField(null=False, blank=False)
  
-       class Meta:
-               db_table = 'threadsubscriptions'
-               unique_together = (('apiclient', 'threadid'),)
+    class Meta:
+        db_table = 'threadsubscriptions'
+        unique_together = (('apiclient', 'threadid'),)
diff --git a/django/archives/mailarchives/redirecthandler.py b/django/archives/mailarchives/redirecthandler.py

index 030b43f5307b55e5ae49dcd502a6f5f2e6ab2270..f91a773c68e9a3b4df50baa0aa15fb6be1333a77 100644 (file)
--- a/django/archives/mailarchives/redirecthandler.py
+++ b/django/archives/mailarchives/redirecthandler.py
@@ -1,10 +1,10 @@
  from django import shortcuts
  
  class ERedirect(Exception):
-       def __init__(self, url):
-               self.url = url
+    def __init__(self, url):
+        self.url = url
  
  class RedirectMiddleware(object):
-       def process_exception(self, request, exception):
-               if isinstance(exception, ERedirect):
-                       return shortcuts.redirect(exception.url)
+    def process_exception(self, request, exception):
+        if isinstance(exception, ERedirect):
+            return shortcuts.redirect(exception.url)
diff --git a/django/archives/mailarchives/templatetags/pgfilters.py b/django/archives/mailarchives/templatetags/pgfilters.py

index 77d5fec4acacfbabe8e2604d7369adf26cca9499..c7d121b5a7ba2e384e218ffc4d027c7b9071ee5d 100644 (file)
--- a/django/archives/mailarchives/templatetags/pgfilters.py
+++ b/django/archives/mailarchives/templatetags/pgfilters.py
@@ -8,12 +8,12 @@ import hashlib
  register = template.Library()
  
  def _rewrite_email(value):
-       return value.replace('@', '(at)').replace('.','(dot)')
+    return value.replace('@', '(at)').replace('.','(dot)')
  
  @register.filter(name='hidemail')
  @stringfilter
  def hidemail(value):
-       return _rewrite_email(value)
+    return _rewrite_email(value)
  
  # A regular expression and replacement function to mangle email addresses.
  #
@@ -27,25 +27,25 @@ def hidemail(value):
  # are mangled.
  _re_mail = re.compile('(/m(essage-id)?/)?[^()<>@,;:\/\s"\'&|]+@[^()<>@,;:\/\s"\'&|]+')
  def _rewrite_email_match(match):
-       if match.group(1):
-               return match.group(0)   # was preceded by /message-id/
-       else:
-               return _rewrite_email(match.group(0))
+    if match.group(1):
+        return match.group(0)    # was preceded by /message-id/
+    else:
+        return _rewrite_email(match.group(0))
  
  @register.filter(name='hideallemail')
  @stringfilter
  def hideallemail(value):
-       return _re_mail.sub(lambda x: _rewrite_email_match(x), value)
+    return _re_mail.sub(lambda x: _rewrite_email_match(x), value)
  
  @register.filter(name='nameonly')
  @stringfilter
  def nameonly(value):
-       (name, email) = parseaddr(value)
-       if name:
-               return name
-       return email.split('@')[0]
+    (name, email) = parseaddr(value)
+    if name:
+        return name
+    return email.split('@')[0]
  
  @register.filter(name='md5')
  @stringfilter
  def md5(value):
-       return hashlib.md5(value.encode('utf8')).hexdigest()
+    return hashlib.md5(value.encode('utf8')).hexdigest()
diff --git a/django/archives/mailarchives/views.py b/django/archives/mailarchives/views.py

index a5c87b3072feb30042a18676374e5ef4376847f6..c82ea0457dc3f6d9beaa8554550082102a24a784 100644 (file)
--- a/django/archives/mailarchives/views.py
+++ b/django/archives/mailarchives/views.py
@@ -27,40 +27,40 @@ from .models import *
  
  # Ensure the user is logged in (if it's not public lists)
  def ensure_logged_in(request):
-       if settings.PUBLIC_ARCHIVES:
-               return
-       if hasattr(request, 'user') and request.user.is_authenticated():
-               return
-       raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path))
+    if settings.PUBLIC_ARCHIVES:
+        return
+    if hasattr(request, 'user') and request.user.is_authenticated():
+        return
+    raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path))
  
  # Ensure the user has permissions to access a list. If not, raise
  # a permissions exception.
  def ensure_list_permissions(request, l):
-       if settings.PUBLIC_ARCHIVES:
-               return
-       if hasattr(request, 'user') and request.user.is_authenticated():
-               if request.user.is_superuser:
-                       return
-               if l.subscriber_access and ListSubscriber.objects.filter(list=l, username=request.user.username).exists():
-                       return
-               # Logged in but no access
-               raise PermissionDenied("Access denied.")
-
-       # Redirect to a login page
-       raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path))
+    if settings.PUBLIC_ARCHIVES:
+        return
+    if hasattr(request, 'user') and request.user.is_authenticated():
+        if request.user.is_superuser:
+            return
+        if l.subscriber_access and ListSubscriber.objects.filter(list=l, username=request.user.username).exists():
+            return
+        # Logged in but no access
+        raise PermissionDenied("Access denied.")
+
+    # Redirect to a login page
+    raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path))
  
  # Ensure the user has permissions to access a message. In order to view
  # a message, the user must have permissions on *all* lists the thread
  # appears on.
  def ensure_message_permissions(request, msgid):
-       if settings.PUBLIC_ARCHIVES:
-               return
-       if hasattr(request, 'user') and request.user.is_authenticated():
-               if request.user.is_superuser:
-                       return
-
-               curs = connection.cursor()
-               curs.execute("""SELECT EXISTS (
+    if settings.PUBLIC_ARCHIVES:
+        return
+    if hasattr(request, 'user') and request.user.is_authenticated():
+        if request.user.is_superuser:
+            return
+
+        curs = connection.cursor()
+        curs.execute("""SELECT EXISTS (
   SELECT 1 FROM list_threads
   INNER JOIN messages ON messages.threadid=list_threads.threadid
   WHERE messages.messageid=%(msgid)s
@@ -70,302 +70,302 @@ def ensure_message_permissions(request, msgid):
    AND listsubscribers.username=%(username)s
   )
  )""", {
-                       'msgid': msgid,
-                       'username': request.user.username,
-               })
-               if not curs.fetchone()[0]:
-                       # This thread is not on any list that the user does not have permissions on.
-                       return
+            'msgid': msgid,
+            'username': request.user.username,
+        })
+        if not curs.fetchone()[0]:
+            # This thread is not on any list that the user does not have permissions on.
+            return
  
-               # Logged in but no access
-               raise PermissionDenied("Access denied.")
+        # Logged in but no access
+        raise PermissionDenied("Access denied.")
  
-       # Redirect to a login page
-       raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path))
+    # Redirect to a login page
+    raise ERedirect('%s?next=%s' % (settings.LOGIN_URL, request.path))
  
  # Decorator to set cache age
  def cache(days=0, hours=0, minutes=0, seconds=0):
-       "Set the server to cache object a specified time. td must be a timedelta object"
-       def _cache(fn):
-               def __cache(request, *_args, **_kwargs):
-                       resp = fn(request, *_args, **_kwargs)
-                       if settings.PUBLIC_ARCHIVES:
-                               # Only set cache headers on public archives
-                               td = timedelta(hours=hours, minutes=minutes, seconds=seconds)
-                               resp['Cache-Control'] = 's-maxage=%s' % (td.days*3600*24 + td.seconds)
-                       return resp
-               return __cache
-       return _cache
+    "Set the server to cache object a specified time. td must be a timedelta object"
+    def _cache(fn):
+        def __cache(request, *_args, **_kwargs):
+            resp = fn(request, *_args, **_kwargs)
+            if settings.PUBLIC_ARCHIVES:
+                # Only set cache headers on public archives
+                td = timedelta(hours=hours, minutes=minutes, seconds=seconds)
+                resp['Cache-Control'] = 's-maxage=%s' % (td.days*3600*24 + td.seconds)
+            return resp
+        return __cache
+    return _cache
  
  def nocache(fn):
-       def _nocache(request, *_args, **_kwargs):
-               resp = fn(request, *_args, **_kwargs)
-               if settings.PUBLIC_ARCHIVES:
-                       # Only set cache headers on public archives
-                       resp['Cache-Control'] = 's-maxage=0'
-               return resp
-       return _nocache
+    def _nocache(request, *_args, **_kwargs):
+        resp = fn(request, *_args, **_kwargs)
+        if settings.PUBLIC_ARCHIVES:
+            # Only set cache headers on public archives
+            resp['Cache-Control'] = 's-maxage=0'
+        return resp
+    return _nocache
  
  # Decorator to require http auth
  def antispam_auth(fn):
-       def _antispam_auth(request, *_args, **_kwargs):
-               if not settings.PUBLIC_ARCHIVES:
-                       return fn(request, *_args, **_kwargs)
-
-               if 'HTTP_AUTHORIZATION' in request.META:
-                       auth = request.META['HTTP_AUTHORIZATION'].split()
-                       if len(auth) != 2:
-                               return HttpResponseForbidden("Invalid authentication")
-                       if auth[0].lower() == "basic":
-                               user, pwd = base64.b64decode(auth[1]).decode('utf8', errors='ignore').split(':')
-                               if user == 'archives' and pwd == 'antispam':
-                                       # Actually run the function if auth is correct
-                                       resp = fn(request, *_args, **_kwargs)
-                                       return resp
-               # Require authentication
-               response = HttpResponse()
-               response.status_code = 401
-               response['WWW-Authenticate'] = 'Basic realm="Please authenticate with user archives and password antispam"'
-               return response
-
-       return _antispam_auth
+    def _antispam_auth(request, *_args, **_kwargs):
+        if not settings.PUBLIC_ARCHIVES:
+            return fn(request, *_args, **_kwargs)
+
+        if 'HTTP_AUTHORIZATION' in request.META:
+            auth = request.META['HTTP_AUTHORIZATION'].split()
+            if len(auth) != 2:
+                return HttpResponseForbidden("Invalid authentication")
+            if auth[0].lower() == "basic":
+                user, pwd = base64.b64decode(auth[1]).decode('utf8', errors='ignore').split(':')
+                if user == 'archives' and pwd == 'antispam':
+                    # Actually run the function if auth is correct
+                    resp = fn(request, *_args, **_kwargs)
+                    return resp
+        # Require authentication
+        response = HttpResponse()
+        response.status_code = 401
+        response['WWW-Authenticate'] = 'Basic realm="Please authenticate with user archives and password antispam"'
+        return response
+
+    return _antispam_auth
  
  
  
  def get_all_groups_and_lists(request, listid=None):
-       # Django doesn't (yet) support traversing the reverse relationship,
-       # so we'll get all the lists and rebuild it backwards.
-       if settings.PUBLIC_ARCHIVES or request.user.is_superuser:
-               lists = List.objects.select_related('group').all().order_by('listname')
-       else:
-               lists = List.objects.select_related('group').filter(subscriber_access=True, listsubscriber__username=request.user.username).order_by('listname')
-       listgroupid = None
-       groups = {}
-       for l in lists:
-               if l.listid == listid:
-                       listgroupid = l.group.groupid
-
-               if l.group.groupid in groups:
-                       groups[l.group.groupid]['lists'].append(l)
-               else:
-                       groups[l.group.groupid] = {
-                               'groupid': l.group.groupid,
-                               'groupname': l.group.groupname,
-                               'sortkey': l.group.sortkey,
-                               'lists': [l,],
-                               'homelink': 'list/group/%s' % l.group.groupid,
-                               }
-
-       return (sorted(list(groups.values()), key=lambda g: g['sortkey']), listgroupid)
+    # Django doesn't (yet) support traversing the reverse relationship,
+    # so we'll get all the lists and rebuild it backwards.
+    if settings.PUBLIC_ARCHIVES or request.user.is_superuser:
+        lists = List.objects.select_related('group').all().order_by('listname')
+    else:
+        lists = List.objects.select_related('group').filter(subscriber_access=True, listsubscriber__username=request.user.username).order_by('listname')
+    listgroupid = None
+    groups = {}
+    for l in lists:
+        if l.listid == listid:
+            listgroupid = l.group.groupid
+
+        if l.group.groupid in groups:
+            groups[l.group.groupid]['lists'].append(l)
+        else:
+            groups[l.group.groupid] = {
+                'groupid': l.group.groupid,
+                'groupname': l.group.groupname,
+                'sortkey': l.group.sortkey,
+                'lists': [l,],
+                'homelink': 'list/group/%s' % l.group.groupid,
+                }
+
+    return (sorted(list(groups.values()), key=lambda g: g['sortkey']), listgroupid)
  
  
  class NavContext(object):
-       def __init__(self, request, listid=None, listname=None, all_groups=None, expand_groupid=None):
-               self.request = request
-               self.ctx = {}
-
-               if all_groups:
-                       groups = copy.deepcopy(all_groups)
-                       if expand_groupid:
-                               listgroupid = int(expand_groupid)
-               else:
-                       (groups, listgroupid) = get_all_groups_and_lists(request, listid)
-
-               for g in groups:
-                       # On the root page, remove *all* entries
-                       # On other lists, remove the entries in all groups other than our
-                       # own.
-                       if (not listid and not expand_groupid) or listgroupid != g['groupid']:
-                               # Root page, so remove *all* entries
-                               g['lists'] = []
-
-               self.ctx.update({'listgroups': groups})
-               if listname:
-                       self.ctx.update({'searchform_listname': listname})
+    def __init__(self, request, listid=None, listname=None, all_groups=None, expand_groupid=None):
+        self.request = request
+        self.ctx = {}
+
+        if all_groups:
+            groups = copy.deepcopy(all_groups)
+            if expand_groupid:
+                listgroupid = int(expand_groupid)
+        else:
+            (groups, listgroupid) = get_all_groups_and_lists(request, listid)
+
+        for g in groups:
+            # On the root page, remove *all* entries
+            # On other lists, remove the entries in all groups other than our
+            # own.
+            if (not listid and not expand_groupid) or listgroupid != g['groupid']:
+                # Root page, so remove *all* entries
+                g['lists'] = []
+
+        self.ctx.update({'listgroups': groups})
+        if listname:
+            self.ctx.update({'searchform_listname': listname})
  
  def render_nav(navcontext, template, ctx):
-       ctx.update(navcontext.ctx)
-       return render(navcontext.request, template, ctx)
+    ctx.update(navcontext.ctx)
+    return render(navcontext.request, template, ctx)
  
  @cache(hours=4)
  def index(request):
-       ensure_logged_in(request)
+    ensure_logged_in(request)
  
-       (groups, listgroupid) = get_all_groups_and_lists(request)
-       return render_nav(NavContext(request, all_groups=groups), 'index.html', {
-                       'groups': [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups],
-                       })
+    (groups, listgroupid) = get_all_groups_and_lists(request)
+    return render_nav(NavContext(request, all_groups=groups), 'index.html', {
+            'groups': [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups],
+            })
  
  
  @cache(hours=8)
  def groupindex(request, groupid):
-       (groups, listgroupid) = get_all_groups_and_lists(request)
-       mygroups = [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups if g['groupid']==int(groupid)]
-       if len(mygroups) == 0:
-               raise Http404('List group does not exist')
+    (groups, listgroupid) = get_all_groups_and_lists(request)
+    mygroups = [{'groupname': g['groupname'], 'lists': g['lists']} for g in groups if g['groupid']==int(groupid)]
+    if len(mygroups) == 0:
+        raise Http404('List group does not exist')
  
-       return render_nav(NavContext(request, all_groups=groups, expand_groupid=groupid), 'index.html', {
-                       'groups': mygroups,
-                       })
+    return render_nav(NavContext(request, all_groups=groups, expand_groupid=groupid), 'index.html', {
+            'groups': mygroups,
+            })
  
  @cache(hours=8)
  def monthlist(request, listname):
-       l = get_object_or_404(List, listname=listname)
-       ensure_list_permissions(request, l)
+    l = get_object_or_404(List, listname=listname)
+    ensure_list_permissions(request, l)
  
-       curs = connection.cursor()
-       curs.execute("SELECT year, month FROM list_months WHERE listid=%(listid)s ORDER BY year DESC, month DESC", {'listid': l.listid})
-       months=[{'year':r[0],'month':r[1], 'date':datetime(r[0],r[1],1)} for r in curs.fetchall()]
+    curs = connection.cursor()
+    curs.execute("SELECT year, month FROM list_months WHERE listid=%(listid)s ORDER BY year DESC, month DESC", {'listid': l.listid})
+    months=[{'year':r[0],'month':r[1], 'date':datetime(r[0],r[1],1)} for r in curs.fetchall()]
  
-       return render_nav(NavContext(request, l.listid, l.listname), 'monthlist.html', {
-                       'list': l,
-                       'months': months,
-                       })
+    return render_nav(NavContext(request, l.listid, l.listname), 'monthlist.html', {
+            'list': l,
+            'months': months,
+            })
  
  def get_monthday_info(mlist, l, d):
-       allmonths = set([m.date.month for m in mlist])
-       monthdate = None
-       daysinmonth = None
-       if len(allmonths) == 1:
-               # All hits are from one month, so generate month links
-               monthdate = mlist[0].date
-       elif len(allmonths) == 0:
-               # No hits at all, so generate month links from the specified date
-               monthdate = d
-
-       if monthdate:
-               curs = connection.cursor()
-               curs.execute("SELECT DISTINCT extract(day FROM date) FROM messages WHERE date >= %(startdate)s AND date < %(enddate)s AND threadid IN (SELECT threadid FROM list_threads WHERE listid=%(listid)s) ORDER BY 1", {
-                               'startdate': datetime(year=monthdate.year, month=monthdate.month, day=1),
-                               'enddate': monthdate + timedelta(days=calendar.monthrange(monthdate.year, monthdate.month)[1]),
-                               'listid': l.listid,
-                               })
-               daysinmonth = [int(r[0]) for r in curs.fetchall()]
-
-       yearmonth = None
-       if monthdate:
-               yearmonth = "%s%02d" % (monthdate.year, monthdate.month)
-       return (yearmonth, daysinmonth)
+    allmonths = set([m.date.month for m in mlist])
+    monthdate = None
+    daysinmonth = None
+    if len(allmonths) == 1:
+        # All hits are from one month, so generate month links
+        monthdate = mlist[0].date
+    elif len(allmonths) == 0:
+        # No hits at all, so generate month links from the specified date
+        monthdate = d
+
+    if monthdate:
+        curs = connection.cursor()
+        curs.execute("SELECT DISTINCT extract(day FROM date) FROM messages WHERE date >= %(startdate)s AND date < %(enddate)s AND threadid IN (SELECT threadid FROM list_threads WHERE listid=%(listid)s) ORDER BY 1", {
+                'startdate': datetime(year=monthdate.year, month=monthdate.month, day=1),
+                'enddate': monthdate + timedelta(days=calendar.monthrange(monthdate.year, monthdate.month)[1]),
+                'listid': l.listid,
+                })
+        daysinmonth = [int(r[0]) for r in curs.fetchall()]
+
+    yearmonth = None
+    if monthdate:
+        yearmonth = "%s%02d" % (monthdate.year, monthdate.month)
+    return (yearmonth, daysinmonth)
  
  
  def _render_datelist(request, l, d, datefilter, title, queryproc):
-       # NOTE! Basic permissions checks must be done before calling this function!
-
-       if not settings.PUBLIC_ARCHIVES and not request.user.is_superuser:
-               mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().filter(datefilter, hiddenstatus__isnull=True).extra(
-                       where=["threadid IN (SELECT threadid FROM list_threads t WHERE listid=%s AND NOT EXISTS (SELECT 1 FROM list_threads t2 WHERE t2.threadid=t.threadid AND listid NOT IN (SELECT list_id FROM listsubscribers WHERE username=%s)))"],
-                       params=(l.listid, request.user.username),
-               )
-       else:
-               # Else we return everything
-               mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().filter(datefilter, hiddenstatus__isnull=True).extra(where=["threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % l.listid])
-       mlist = queryproc(mlist)
-
-       allyearmonths = set([(m.date.year, m.date.month) for m in mlist])
-       (yearmonth, daysinmonth) = get_monthday_info(mlist, l, d)
-
-       r = render_nav(NavContext(request, l.listid, l.listname), 'datelist.html', {
-                       'list': l,
-                       'messages': mlist,
-                       'title': title,
-                       'daysinmonth': daysinmonth,
-                       'yearmonth': yearmonth,
-                       })
-       r['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (l.listid, year, month) for year,month in allyearmonths]))
-       return r
+    # NOTE! Basic permissions checks must be done before calling this function!
+
+    if not settings.PUBLIC_ARCHIVES and not request.user.is_superuser:
+        mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().filter(datefilter, hiddenstatus__isnull=True).extra(
+            where=["threadid IN (SELECT threadid FROM list_threads t WHERE listid=%s AND NOT EXISTS (SELECT 1 FROM list_threads t2 WHERE t2.threadid=t.threadid AND listid NOT IN (SELECT list_id FROM listsubscribers WHERE username=%s)))"],
+            params=(l.listid, request.user.username),
+        )
+    else:
+        # Else we return everything
+        mlist = Message.objects.defer('bodytxt', 'cc', 'to').select_related().filter(datefilter, hiddenstatus__isnull=True).extra(where=["threadid IN (SELECT threadid FROM list_threads WHERE listid=%s)" % l.listid])
+    mlist = queryproc(mlist)
+
+    allyearmonths = set([(m.date.year, m.date.month) for m in mlist])
+    (yearmonth, daysinmonth) = get_monthday_info(mlist, l, d)
+
+    r = render_nav(NavContext(request, l.listid, l.listname), 'datelist.html', {
+            'list': l,
+            'messages': mlist,
+            'title': title,
+            'daysinmonth': daysinmonth,
+            'yearmonth': yearmonth,
+            })
+    r['X-pglm'] = ':%s:' % (':'.join(['%s/%s/%s' % (l.listid, year, month) for year,month in allyearmonths]))
+    return r
  
  def render_datelist_from(request, l, d, title, to=None):
-       # NOTE! Basic permissions checks must be done before calling this function!
-       datefilter = Q(date__gte=d)
-       if to:
-               datefilter.add(Q(date__lt=to), Q.AND)
+    # NOTE! Basic permissions checks must be done before calling this function!
+    datefilter = Q(date__gte=d)
+    if to:
+        datefilter.add(Q(date__lt=to), Q.AND)
  
-       return _render_datelist(request, l, d, datefilter, title,
-                                                       lambda x: list(x.order_by('date')[:200]))
+    return _render_datelist(request, l, d, datefilter, title,
+                            lambda x: list(x.order_by('date')[:200]))
  
  def render_datelist_to(request, l, d, title):
-       # NOTE! Basic permissions checks must be done before calling this function!
+    # NOTE! Basic permissions checks must be done before calling this function!
  
-       # Need to sort this backwards in the database to get the LIMIT applied
-       # properly, and then manually resort it in the correct order. We can do
-       # the second sort safely in python since it's not a lot of items..
+    # Need to sort this backwards in the database to get the LIMIT applied
+    # properly, and then manually resort it in the correct order. We can do
+    # the second sort safely in python since it's not a lot of items..
  
-       return _render_datelist(request, l, d, Q(date__lte=d), title,
-                                                       lambda x: sorted(x.order_by('-date')[:200], key=lambda m: m.date))
+    return _render_datelist(request, l, d, Q(date__lte=d), title,
+                            lambda x: sorted(x.order_by('-date')[:200], key=lambda m: m.date))
  
  @cache(hours=2)
  def datelistsince(request, listname, msgid):
-       l = get_object_or_404(List, listname=listname)
-       ensure_list_permissions(request, l)
+    l = get_object_or_404(List, listname=listname)
+    ensure_list_permissions(request, l)
  
-       msg = get_object_or_404(Message, messageid=msgid)
-       return render_datelist_from(request, l, msg.date, "%s since %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S")))
+    msg = get_object_or_404(Message, messageid=msgid)
+    return render_datelist_from(request, l, msg.date, "%s since %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S")))
  
  # Longer cache since this will be used for the fixed date links
  @cache(hours=4)
  def datelistsincetime(request, listname, year, month, day, hour, minute):
-       l = get_object_or_404(List, listname=listname)
-       ensure_list_permissions(request, l)
+    l = get_object_or_404(List, listname=listname)
+    ensure_list_permissions(request, l)
  
-       try:
-               d = datetime(int(year), int(month), int(day), int(hour), int(minute))
-       except ValueError:
-               raise Http404("Invalid date format, not found")
-       return render_datelist_from(request, l, d, "%s since %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M")))
+    try:
+        d = datetime(int(year), int(month), int(day), int(hour), int(minute))
+    except ValueError:
+        raise Http404("Invalid date format, not found")
+    return render_datelist_from(request, l, d, "%s since %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M")))
  
  @cache(hours=2)
  def datelistbefore(request, listname, msgid):
-       l = get_object_or_404(List, listname=listname)
-       ensure_list_permissions(request, l)
+    l = get_object_or_404(List, listname=listname)
+    ensure_list_permissions(request, l)
  
-       msg = get_object_or_404(Message, messageid=msgid)
-       return render_datelist_to(request, l, msg.date, "%s before %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S")))
+    msg = get_object_or_404(Message, messageid=msgid)
+    return render_datelist_to(request, l, msg.date, "%s before %s" % (l.listname, msg.date.strftime("%Y-%m-%d %H:%M:%S")))
  
  @cache(hours=2)
  def datelistbeforetime(request, listname, year, month, day, hour, minute):
-       l = get_object_or_404(List, listname=listname)
-       ensure_list_permissions(request, l)
+    l = get_object_or_404(List, listname=listname)
+    ensure_list_permissions(request, l)
  
-       try:
-               d = datetime(int(year), int(month), int(day), int(hour), int(minute))
-       except ValueError:
-               raise Http404("Invalid date format, not found")
-       return render_datelist_to(request, l, d, "%s before %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M")))
+    try:
+        d = datetime(int(year), int(month), int(day), int(hour), int(minute))
+    except ValueError:
+        raise Http404("Invalid date format, not found")
+    return render_datelist_to(request, l, d, "%s before %s" % (l.listname, d.strftime("%Y-%m-%d %H:%M")))
  
  @cache(hours=4)
  def datelist(request, listname, year, month):
-       l = get_object_or_404(List, listname=listname)
-       ensure_list_permissions(request, l)
+    l = get_object_or_404(List, listname=listname)
+    ensure_list_permissions(request, l)
  
-       try:
-               d = datetime(int(year), int(month), 1)
-       except ValueError:
-               raise Http404("Malformatted date, month not found")
+    try:
+        d = datetime(int(year), int(month), 1)
+    except ValueError:
+        raise Http404("Malformatted date, month not found")
  
-       enddate = d+timedelta(days=31)
-       enddate = datetime(enddate.year, enddate.month, 1)
-       return render_datelist_from(request, l, d, "%s - %s %s" % (l.listname, d.strftime("%B"), d.year), enddate)
+    enddate = d+timedelta(days=31)
+    enddate = datetime(enddate.year, enddate.month, 1)
+    return render_datelist_from(request, l, d, "%s - %s %s" % (l.listname, d.strftime("%B"), d.year), enddate)
  
  @cache(hours=4)
  def attachment(request, attid):
-       # Use a direct query instead of django, since it has bad support for
-       # bytea
-       # XXX: minor information leak, because we load the whole attachment before we check
-       # the thread permissions. Is that OK?
-       curs = connection.cursor()
-       curs.execute("SELECT filename, contenttype, messageid, attachment FROM attachments INNER JOIN messages ON messages.id=attachments.message AND attachments.id=%(id)s AND messages.hiddenstatus IS NULL", {'id': int(attid)})
-       r = curs.fetchall()
-       if len(r) != 1:
-               return HttpResponse("Attachment not found")
+    # Use a direct query instead of django, since it has bad support for
+    # bytea
+    # XXX: minor information leak, because we load the whole attachment before we check
+    # the thread permissions. Is that OK?
+    curs = connection.cursor()
+    curs.execute("SELECT filename, contenttype, messageid, attachment FROM attachments INNER JOIN messages ON messages.id=attachments.message AND attachments.id=%(id)s AND messages.hiddenstatus IS NULL", {'id': int(attid)})
+    r = curs.fetchall()
+    if len(r) != 1:
+        return HttpResponse("Attachment not found")
  
-       ensure_message_permissions(request, r[0][2])
+    ensure_message_permissions(request, r[0][2])
  
-       return HttpResponse(r[0][3], content_type=r[0][1])
+    return HttpResponse(r[0][3], content_type=r[0][1])
  
  def _build_thread_structure(threadid):
-       # Yeah, this is *way* too complicated for the django ORM
-       curs = connection.cursor()
-       curs.execute("""WITH RECURSIVE t(id, _from, subject, date, messageid, has_attachment, parentid, datepath) AS(
+    # Yeah, this is *way* too complicated for the django ORM
+    curs = connection.cursor()
+    curs.execute("""WITH RECURSIVE t(id, _from, subject, date, messageid, has_attachment, parentid, datepath) AS(
    SELECT id,_from,subject,date,messageid,has_attachment,parentid,array[]::timestamptz[] FROM messages m WHERE m.threadid=%(threadid)s AND parentid IS NULL
   UNION ALL
    SELECT m.id,m._from,m.subject,m.date,m.messageid,m.has_attachment,m.parentid,t.datepath||t.date FROM messages m INNER JOIN t ON t.id=m.parentid WHERE m.threadid=%(threadid)s
@@ -373,395 +373,395 @@ def _build_thread_structure(threadid):
  SELECT id,_from,subject,date,messageid,has_attachment,parentid,datepath FROM t ORDER BY datepath||date
  """, {'threadid': threadid})
  
-       for id,_from,subject,date,messageid,has_attachment,parentid,parentpath in curs.fetchall():
-               yield {'id':id, 'mailfrom':_from, 'subject': subject, 'date': date, 'printdate': date.strftime("%Y-%m-%d %H:%M:%S"), 'messageid': messageid, 'hasattachment': has_attachment, 'parentid': parentid, 'indent': "&nbsp;" * len(parentpath)}
+    for id,_from,subject,date,messageid,has_attachment,parentid,parentpath in curs.fetchall():
+        yield {'id':id, 'mailfrom':_from, 'subject': subject, 'date': date, 'printdate': date.strftime("%Y-%m-%d %H:%M:%S"), 'messageid': messageid, 'hasattachment': has_attachment, 'parentid': parentid, 'indent': "&nbsp;" * len(parentpath)}
  
  
  def _get_nextprevious(listmap, dt):
-       curs = connection.cursor()
-       curs.execute("""WITH l(listid) AS (
+    curs = connection.cursor()
+    curs.execute("""WITH l(listid) AS (
     SELECT unnest(%(lists)s)
  )
  SELECT l.listid,1,
   (SELECT ARRAY[messageid,to_char(date, 'yyyy-mm-dd hh24:mi:ss'),subject,_from] FROM messages m
-        INNER JOIN list_threads lt ON lt.threadid=m.threadid
-        WHERE m.date>%(time)s AND lt.listid=l.listid
-        ORDER BY m.date LIMIT 1
+     INNER JOIN list_threads lt ON lt.threadid=m.threadid
+     WHERE m.date>%(time)s AND lt.listid=l.listid
+     ORDER BY m.date LIMIT 1
    ) FROM l
  UNION ALL
  SELECT l.listid,0,
   (SELECT ARRAY[messageid,to_char(date, 'yyyy-mm-dd hh24:mi:ss'),subject,_from] FROM messages m
-        INNER JOIN list_threads lt ON lt.threadid=m.threadid
-        WHERE m.date<%(time)s AND lt.listid=l.listid
-        ORDER BY m.date DESC LIMIT 1
+     INNER JOIN list_threads lt ON lt.threadid=m.threadid
+     WHERE m.date<%(time)s AND lt.listid=l.listid
+     ORDER BY m.date DESC LIMIT 1
   ) FROM l""", {
-                       'lists': list(listmap.keys()),
-                       'time': dt,
-                       })
-       retval = {}
-       for listid, isnext, data in curs.fetchall():
-               if data:
-                       # Can be NULL, but if not, it will always have all fields
-                       listname = listmap[listid]
-                       d = {
-                               'msgid': data[0],
-                               'date': data[1],
-                               'subject': data[2],
-                               'from': data[3],
-                               }
-                       if listname in retval:
-                               retval[listname][isnext and 'next' or 'prev'] = d
-                       else:
-                               retval[listname] = {
-                                       isnext and 'next' or 'prev': d
-                                       }
-       return retval
+            'lists': list(listmap.keys()),
+            'time': dt,
+            })
+    retval = {}
+    for listid, isnext, data in curs.fetchall():
+        if data:
+            # Can be NULL, but if not, it will always have all fields
+            listname = listmap[listid]
+            d = {
+                'msgid': data[0],
+                'date': data[1],
+                'subject': data[2],
+                'from': data[3],
+                }
+            if listname in retval:
+                retval[listname][isnext and 'next' or 'prev'] = d
+            else:
+                retval[listname] = {
+                    isnext and 'next' or 'prev': d
+                    }
+    return retval
  
  @cache(hours=4)
  def message(request, msgid):
-       ensure_message_permissions(request, msgid)
-
-       try:
-               m = Message.objects.get(messageid=msgid)
-       except Message.DoesNotExist:
-               raise Http404('Message does not exist')
-
-       lists = List.objects.extra(where=["listid IN (SELECT listid FROM list_threads WHERE threadid=%s)" % m.threadid]).order_by('listname')
-       listmap = dict([(l.listid, l.listname) for l in lists])
-       threadstruct = list(_build_thread_structure(m.threadid))
-       newest = calendar.timegm(max(threadstruct, key=lambda x: x['date'])['date'].utctimetuple())
-       if 'HTTP_IF_MODIFIED_SINCE' in request.META and not settings.DEBUG:
-               ims = parse_http_date_safe(request.META.get("HTTP_IF_MODIFIED_SINCE"))
-               if ims >= newest:
-                       return HttpResponseNotModified()
-
-       responses = [t for t in threadstruct if t['parentid']==m.id]
-
-       if m.parentid:
-               for t in threadstruct:
-                       if t['id'] == m.parentid:
-                               parent = t
-                               break
-       else:
-               parent = None
-       nextprev = _get_nextprevious(listmap, m.date)
-
-       r = render_nav(NavContext(request, lists[0].listid, lists[0].listname), 'message.html', {
-                       'msg': m,
-                       'threadstruct': threadstruct,
-                       'responses': responses,
-                       'parent': parent,
-                       'lists': lists,
-                       'nextprev': nextprev,
-                       })
-       r['X-pgthread'] = ":%s:" % m.threadid
-       r['Last-Modified'] = http_date(newest)
-       return r
+    ensure_message_permissions(request, msgid)
+
+    try:
+        m = Message.objects.get(messageid=msgid)
+    except Message.DoesNotExist:
+        raise Http404('Message does not exist')
+
+    lists = List.objects.extra(where=["listid IN (SELECT listid FROM list_threads WHERE threadid=%s)" % m.threadid]).order_by('listname')
+    listmap = dict([(l.listid, l.listname) for l in lists])
+    threadstruct = list(_build_thread_structure(m.threadid))
+    newest = calendar.timegm(max(threadstruct, key=lambda x: x['date'])['date'].utctimetuple())
+    if 'HTTP_IF_MODIFIED_SINCE' in request.META and not settings.DEBUG:
+        ims = parse_http_date_safe(request.META.get("HTTP_IF_MODIFIED_SINCE"))
+        if ims >= newest:
+            return HttpResponseNotModified()
+
+    responses = [t for t in threadstruct if t['parentid']==m.id]
+
+    if m.parentid:
+        for t in threadstruct:
+            if t['id'] == m.parentid:
+                parent = t
+                break
+    else:
+        parent = None
+    nextprev = _get_nextprevious(listmap, m.date)
+
+    r = render_nav(NavContext(request, lists[0].listid, lists[0].listname), 'message.html', {
+            'msg': m,
+            'threadstruct': threadstruct,
+            'responses': responses,
+            'parent': parent,
+            'lists': lists,
+            'nextprev': nextprev,
+            })
+    r['X-pgthread'] = ":%s:" % m.threadid
+    r['Last-Modified'] = http_date(newest)
+    return r
  
  @cache(hours=4)
  def message_flat(request, msgid):
-       ensure_message_permissions(request, msgid)
-
-       try:
-               msg = Message.objects.get(messageid=msgid)
-       except Message.DoesNotExist:
-               raise Http404('Message does not exist')
-       allmsg = list(Message.objects.filter(threadid=msg.threadid).order_by('date'))
-       lists = List.objects.extra(where=["listid IN (SELECT listid FROM list_threads WHERE threadid=%s)" % msg.threadid]).order_by('listname')
-
-       isfirst = (msg == allmsg[0])
-
-       newest = calendar.timegm(max(allmsg, key=lambda x: x.date).date.utctimetuple())
-       if 'HTTP_IF_MODIFIED_SINCE' in request.META and not settings.DEBUG:
-               ims = parse_http_date_safe(request.META.get('HTTP_IF_MODIFIED_SINCE'))
-               if ims >= newest:
-                       return HttpResponseNotModified()
-
-       r = render_nav(NavContext(request), 'message_flat.html', {
-                       'msg': msg,
-                       'allmsg': allmsg,
-                       'lists': lists,
-                       'isfirst': isfirst,
-                       })
-       r['X-pgthread'] = ":%s:" % msg.threadid
-       r['Last-Modified'] = http_date(newest)
-       return r
+    ensure_message_permissions(request, msgid)
+
+    try:
+        msg = Message.objects.get(messageid=msgid)
+    except Message.DoesNotExist:
+        raise Http404('Message does not exist')
+    allmsg = list(Message.objects.filter(threadid=msg.threadid).order_by('date'))
+    lists = List.objects.extra(where=["listid IN (SELECT listid FROM list_threads WHERE threadid=%s)" % msg.threadid]).order_by('listname')
+
+    isfirst = (msg == allmsg[0])
+
+    newest = calendar.timegm(max(allmsg, key=lambda x: x.date).date.utctimetuple())
+    if 'HTTP_IF_MODIFIED_SINCE' in request.META and not settings.DEBUG:
+        ims = parse_http_date_safe(request.META.get('HTTP_IF_MODIFIED_SINCE'))
+        if ims >= newest:
+            return HttpResponseNotModified()
+
+    r = render_nav(NavContext(request), 'message_flat.html', {
+            'msg': msg,
+            'allmsg': allmsg,
+            'lists': lists,
+            'isfirst': isfirst,
+            })
+    r['X-pgthread'] = ":%s:" % msg.threadid
+    r['Last-Modified'] = http_date(newest)
+    return r
  
  @nocache
  @antispam_auth
  def message_raw(request, msgid):
-       ensure_message_permissions(request, msgid)
+    ensure_message_permissions(request, msgid)
  
-       curs = connection.cursor()
-       curs.execute("SELECT threadid, hiddenstatus, rawtxt FROM messages WHERE messageid=%(messageid)s", {
-               'messageid': msgid,
-       })
-       row = curs.fetchall()
-       if len(row) != 1:
-               raise Http404('Message does not exist')
+    curs = connection.cursor()
+    curs.execute("SELECT threadid, hiddenstatus, rawtxt FROM messages WHERE messageid=%(messageid)s", {
+        'messageid': msgid,
+    })
+    row = curs.fetchall()
+    if len(row) != 1:
+        raise Http404('Message does not exist')
  
-       if row[0][1]:
-               r = HttpResponse('This message has been hidden.', content_type='text/plain')
-       else:
-               r = HttpResponse(row[0][2], content_type='text/plain')
-               r['X-pgthread'] = ":%s:" % row[0][0]
-       return r
+    if row[0][1]:
+        r = HttpResponse('This message has been hidden.', content_type='text/plain')
+    else:
+        r = HttpResponse(row[0][2], content_type='text/plain')
+        r['X-pgthread'] = ":%s:" % row[0][0]
+    return r
  
  
  def _build_mbox(query, params, msgid=None):
-       connection.ensure_connection()
+    connection.ensure_connection()
  
-       # Rawmsg is not in the django model, so we have to query it separately
-       curs = connection.connection.cursor(name='mbox', withhold=True)
-       curs.itersize = 50
-       curs.execute(query, params)
+    # Rawmsg is not in the django model, so we have to query it separately
+    curs = connection.connection.cursor(name='mbox', withhold=True)
+    curs.itersize = 50
+    curs.execute(query, params)
  
-       firstmsg = curs.fetchone()
-       if msgid and firstmsg[0] != msgid:
-               # Always redirect to the first message in the thread when building
-               # the mbox, to not generate potentially multiple copies in
-               # the cache.
-               return HttpResponsePermanentRedirect(firstmsg[0])
+    firstmsg = curs.fetchone()
+    if msgid and firstmsg[0] != msgid:
+        # Always redirect to the first message in the thread when building
+        # the mbox, to not generate potentially multiple copies in
+        # the cache.
+        return HttpResponsePermanentRedirect(firstmsg[0])
  
-       def _one_message(raw):
-               # Parse as a message to generate headers
-               s = BytesIO(raw)
-               parser = email.parser.BytesParser(policy=email.policy.compat32)
-               msg = parser.parse(s)
-               return msg.as_string(unixfrom=True)
+    def _one_message(raw):
+        # Parse as a message to generate headers
+        s = BytesIO(raw)
+        parser = email.parser.BytesParser(policy=email.policy.compat32)
+        msg = parser.parse(s)
+        return msg.as_string(unixfrom=True)
  
  
-       def _message_stream(first):
-               yield _one_message(first[1])
+    def _message_stream(first):
+        yield _one_message(first[1])
  
-               for mid, raw in curs:
-                       yield _one_message(raw)
+        for mid, raw in curs:
+            yield _one_message(raw)
  
-               # Close must be done inside this function. If we close it in the
-               # main function, it won't let the iterator run to completion.
-               curs.close()
+        # Close must be done inside this function. If we close it in the
+        # main function, it won't let the iterator run to completion.
+        curs.close()
  
-       r = StreamingHttpResponse(_message_stream(firstmsg))
-       r['Content-type'] = 'application/mbox'
-       return r
+    r = StreamingHttpResponse(_message_stream(firstmsg))
+    r['Content-type'] = 'application/mbox'
+    return r
  
  @nocache
  @antispam_auth
  def message_mbox(request, msgid):
-       ensure_message_permissions(request, msgid)
+    ensure_message_permissions(request, msgid)
  
-       msg = get_object_or_404(Message, messageid=msgid)
+    msg = get_object_or_404(Message, messageid=msgid)
  
-       return _build_mbox(
-               "SELECT messageid, rawtxt FROM messages WHERE threadid=%(thread)s AND hiddenstatus IS NULL ORDER BY date",
-               {
-                       'thread': msg.threadid,
-               },
-               msgid)
+    return _build_mbox(
+        "SELECT messageid, rawtxt FROM messages WHERE threadid=%(thread)s AND hiddenstatus IS NULL ORDER BY date",
+        {
+            'thread': msg.threadid,
+        },
+        msgid)
  
  @nocache
  @antispam_auth
  def mbox(request, listname, listname2, mboxyear, mboxmonth):
-       if (listname != listname2):
-               raise Http404('List name mismatch')
-       l = get_object_or_404(List, listname=listname)
-       ensure_list_permissions(request, l)
-
-       mboxyear = int(mboxyear)
-       mboxmonth = int(mboxmonth)
-
-       query = "SELECT messageid, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE listid=%(listid)s AND hiddenstatus IS NULL AND date >= %(startdate)s AND date <= %(enddate)s %%% ORDER BY date"
-       params = {
-                               'listid': l.listid,
-                               'startdate': date(mboxyear, mboxmonth, 1),
-                               'enddate': datetime(mboxyear, mboxmonth, calendar.monthrange(mboxyear, mboxmonth)[1], 23, 59, 59),
-       }
-
-       if not settings.PUBLIC_ARCHIVES and not request.user.is_superuser:
-               # Restrict to only view messages that the user has permissions on all threads they're on
-               query = query.replace('%%%', 'AND NOT EXISTS (SELECT 1 FROM list_threads t2 WHERE t2.threadid=t.threadid AND listid NOT IN (SELECT list_id FROM listsubscribers WHERE username=%(username)s))')
-               params['username'] = request.user.username
-       else:
-               # Just return the whole thing
-               query = query.replace('%%%', '')
-       return _build_mbox(query, params)
+    if (listname != listname2):
+        raise Http404('List name mismatch')
+    l = get_object_or_404(List, listname=listname)
+    ensure_list_permissions(request, l)
+
+    mboxyear = int(mboxyear)
+    mboxmonth = int(mboxmonth)
+
+    query = "SELECT messageid, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE listid=%(listid)s AND hiddenstatus IS NULL AND date >= %(startdate)s AND date <= %(enddate)s %%% ORDER BY date"
+    params = {
+                'listid': l.listid,
+                'startdate': date(mboxyear, mboxmonth, 1),
+                'enddate': datetime(mboxyear, mboxmonth, calendar.monthrange(mboxyear, mboxmonth)[1], 23, 59, 59),
+    }
+
+    if not settings.PUBLIC_ARCHIVES and not request.user.is_superuser:
+        # Restrict to only view messages that the user has permissions on all threads they're on
+        query = query.replace('%%%', 'AND NOT EXISTS (SELECT 1 FROM list_threads t2 WHERE t2.threadid=t.threadid AND listid NOT IN (SELECT list_id FROM listsubscribers WHERE username=%(username)s))')
+        params['username'] = request.user.username
+    else:
+        # Just return the whole thing
+        query = query.replace('%%%', '')
+    return _build_mbox(query, params)
  
  def search(request):
-       if not settings.PUBLIC_ARCHIVES:
-               # We don't support searching of non-public archives at all at this point.
-               # XXX: room for future improvement
-               return HttpResponseForbidden('Not public archives')
-
-       # Only certain hosts are allowed to call the search API
-       if not request.META['REMOTE_ADDR'] in settings.SEARCH_CLIENTS:
-               return HttpResponseForbidden('Invalid host')
-
-       curs = connection.cursor()
-
-       # Perform a search of the archives and return a JSON document.
-       # Expects the following (optional) POST parameters:
-       # q = query to search for
-       # ln = comma separate list of listnames to search in
-       # d = number of days back to search for, or -1 (or not specified)
-       #         to search the full archives
-       # s = sort results by ['r'=rank, 'd'=date, 'i'=inverse date]
-       if not request.method == 'POST':
-               raise Http404('I only respond to POST')
-
-       if 'q' not in request.POST:
-               raise Http404('No search query specified')
-       query = request.POST['q']
-
-       if 'ln' in request.POST:
-               try:
-                       curs.execute("SELECT listid FROM lists WHERE listname=ANY(%(names)s)", {
-                               'names': request.POST['ln'].split(','),
-                       })
-                       lists = [x for x, in curs.fetchall()]
-               except:
-                       # If failing to parse list of lists, just search all
-                       lists = None
-       else:
-               lists = None
-
-       if 'd' in request.POST:
-               days = int(request.POST['d'])
-               if days < 1 or days > 365:
-                       firstdate = None
-               else:
-                       firstdate = datetime.now() - timedelta(days=days)
-       else:
-               firstdate = None
-
-       if 's' in request.POST:
-               list_sort = request.POST['s']
-               if not list_sort in ('d', 'r', 'i'):
-                       list_stort = 'r'
-       else:
-               list_sort = 'r'
-
-       # Ok, we have all we need to do the search
-
-       if query.find('@') > 0:
-               # This could be a messageid. So try to get that one specifically first.
-               # We don't do a more specific check if it's a messageid because doing
-               # a key lookup is cheap...
-               curs.execute("SELECT messageid FROM messages WHERE messageid=%(q)s", {
-                               'q': query,
-                               })
-               a = curs.fetchall()
-               if len(a) == 1:
-                       # Yup, this was a messageid
-                       resp = HttpResponse(content_type='application/json')
-
-                       json.dump({'messageidmatch': 1}, resp)
-                       return resp
-               # If not found, fall through to a regular search
-
-       curs.execute("SET gin_fuzzy_search_limit=10000")
-       qstr = "SELECT messageid, date, subject, _from, ts_rank_cd(fti, plainto_tsquery('public.pg', %(q)s)), ts_headline(bodytxt, plainto_tsquery('public.pg', %(q)s),'StartSel=\"[[[[[[\",StopSel=\"]]]]]]\"') FROM messages m WHERE fti @@ plainto_tsquery('public.pg', %(q)s)"
-       params = {
-               'q': query,
-       }
-       if lists:
-               qstr += " AND EXISTS (SELECT 1 FROM list_threads lt WHERE lt.threadid=m.threadid AND lt.listid=ANY(%(lists)s))"
-               params['lists'] = lists
-       if firstdate:
-               qstr += " AND m.date > %(date)s"
-               params['date'] = firstdate
-       if list_sort == 'r':
-               qstr += " ORDER BY ts_rank_cd(fti, plainto_tsquery(%(q)s)) DESC LIMIT 1000"
-       elif list_sort == 'd':
-               qstr += " ORDER BY date DESC LIMIT 1000"
-       else:
-               qstr += " ORDER BY date ASC LIMIT 1000"
-
-       curs.execute(qstr, params)
-
-       resp = HttpResponse(content_type='application/json')
-
-       json.dump([{
-                               'm': messageid,
-                               'd': date.isoformat(),
-                               's': subject,
-                               'f': mailfrom,
-                               'r': rank,
-                               'a': abstract.replace("[[[[[[", "<b>").replace("]]]]]]","</b>"),
-
-                               } for messageid, date, subject, mailfrom, rank, abstract in curs.fetchall()],
-                         resp)
-       return resp
+    if not settings.PUBLIC_ARCHIVES:
+        # We don't support searching of non-public archives at all at this point.
+        # XXX: room for future improvement
+        return HttpResponseForbidden('Not public archives')
+
+    # Only certain hosts are allowed to call the search API
+    if not request.META['REMOTE_ADDR'] in settings.SEARCH_CLIENTS:
+        return HttpResponseForbidden('Invalid host')
+
+    curs = connection.cursor()
+
+    # Perform a search of the archives and return a JSON document.
+    # Expects the following (optional) POST parameters:
+    # q = query to search for
+    # ln = comma separate list of listnames to search in
+    # d = number of days back to search for, or -1 (or not specified)
+    #      to search the full archives
+    # s = sort results by ['r'=rank, 'd'=date, 'i'=inverse date]
+    if not request.method == 'POST':
+        raise Http404('I only respond to POST')
+
+    if 'q' not in request.POST:
+        raise Http404('No search query specified')
+    query = request.POST['q']
+
+    if 'ln' in request.POST:
+        try:
+            curs.execute("SELECT listid FROM lists WHERE listname=ANY(%(names)s)", {
+                'names': request.POST['ln'].split(','),
+            })
+            lists = [x for x, in curs.fetchall()]
+        except:
+            # If failing to parse list of lists, just search all
+            lists = None
+    else:
+        lists = None
+
+    if 'd' in request.POST:
+        days = int(request.POST['d'])
+        if days < 1 or days > 365:
+            firstdate = None
+        else:
+            firstdate = datetime.now() - timedelta(days=days)
+    else:
+        firstdate = None
+
+    if 's' in request.POST:
+        list_sort = request.POST['s']
+        if not list_sort in ('d', 'r', 'i'):
+            list_stort = 'r'
+    else:
+        list_sort = 'r'
+
+    # Ok, we have all we need to do the search
+
+    if query.find('@') > 0:
+        # This could be a messageid. So try to get that one specifically first.
+        # We don't do a more specific check if it's a messageid because doing
+        # a key lookup is cheap...
+        curs.execute("SELECT messageid FROM messages WHERE messageid=%(q)s", {
+                'q': query,
+                })
+        a = curs.fetchall()
+        if len(a) == 1:
+            # Yup, this was a messageid
+            resp = HttpResponse(content_type='application/json')
+
+            json.dump({'messageidmatch': 1}, resp)
+            return resp
+        # If not found, fall through to a regular search
+
+    curs.execute("SET gin_fuzzy_search_limit=10000")
+    qstr = "SELECT messageid, date, subject, _from, ts_rank_cd(fti, plainto_tsquery('public.pg', %(q)s)), ts_headline(bodytxt, plainto_tsquery('public.pg', %(q)s),'StartSel=\"[[[[[[\",StopSel=\"]]]]]]\"') FROM messages m WHERE fti @@ plainto_tsquery('public.pg', %(q)s)"
+    params = {
+        'q': query,
+    }
+    if lists:
+        qstr += " AND EXISTS (SELECT 1 FROM list_threads lt WHERE lt.threadid=m.threadid AND lt.listid=ANY(%(lists)s))"
+        params['lists'] = lists
+    if firstdate:
+        qstr += " AND m.date > %(date)s"
+        params['date'] = firstdate
+    if list_sort == 'r':
+        qstr += " ORDER BY ts_rank_cd(fti, plainto_tsquery(%(q)s)) DESC LIMIT 1000"
+    elif list_sort == 'd':
+        qstr += " ORDER BY date DESC LIMIT 1000"
+    else:
+        qstr += " ORDER BY date ASC LIMIT 1000"
+
+    curs.execute(qstr, params)
+
+    resp = HttpResponse(content_type='application/json')
+
+    json.dump([{
+                'm': messageid,
+                'd': date.isoformat(),
+                's': subject,
+                'f': mailfrom,
+                'r': rank,
+                'a': abstract.replace("[[[[[[", "<b>").replace("]]]]]]","</b>"),
+
+                } for messageid, date, subject, mailfrom, rank, abstract in curs.fetchall()],
+              resp)
+    return resp
  
  @cache(seconds=10)
  def web_sync_timestamp(request):
-       s = datetime.now().strftime("%Y-%m-%d %H:%M:%S\n")
-       r = HttpResponse(s, content_type='text/plain')
-       r['Content-Length'] = len(s)
-       return r
+    s = datetime.now().strftime("%Y-%m-%d %H:%M:%S\n")
+    r = HttpResponse(s, content_type='text/plain')
+    r['Content-Length'] = len(s)
+    return r
  
  @cache(hours=8)
  def legacy(request, listname, year, month, msgnum):
-       curs = connection.cursor()
-       curs.execute("SELECT msgid FROM legacymap WHERE listid=(SELECT listid FROM lists WHERE listname=%(list)s) AND year=%(year)s AND month=%(month)s AND msgnum=%(msgnum)s", {
-                       'list': listname,
-                       'year': year,
-                       'month': month,
-                       'msgnum': msgnum,
-                       })
-       r = curs.fetchall()
-       if len(r) != 1:
-               raise Http404('Message does not exist')
-       return HttpResponsePermanentRedirect('/message-id/%s' % r[0][0])
+    curs = connection.cursor()
+    curs.execute("SELECT msgid FROM legacymap WHERE listid=(SELECT listid FROM lists WHERE listname=%(list)s) AND year=%(year)s AND month=%(month)s AND msgnum=%(msgnum)s", {
+            'list': listname,
+            'year': year,
+            'month': month,
+            'msgnum': msgnum,
+            })
+    r = curs.fetchall()
+    if len(r) != 1:
+        raise Http404('Message does not exist')
+    return HttpResponsePermanentRedirect('/message-id/%s' % r[0][0])
  
  # dynamic CSS serving, meaning we merge a number of different CSS into a
  # single one, making sure it turns into a single http response. We do this
  # dynamically, since the output will be cached.
  _dynamic_cssmap = {
-       'base': ['media/css/main.css',
-                        'media/css/normalize.css',],
-       'docs': ['media/css/global.css',
-                        'media/css/table.css',
-                        'media/css/text.css',
-                        'media/css/docs.css'],
-       }
+    'base': ['media/css/main.css',
+             'media/css/normalize.css',],
+    'docs': ['media/css/global.css',
+             'media/css/table.css',
+             'media/css/text.css',
+             'media/css/docs.css'],
+    }
  
  @cache(hours=8)
  def dynamic_css(request, css):
-       if css not in _dynamic_cssmap:
-               raise Http404('CSS not found')
-       files = _dynamic_cssmap[css]
-       resp = HttpResponse(content_type='text/css')
-
-       # We honor if-modified-since headers by looking at the most recently
-       # touched CSS file.
-       latestmod = 0
-       for fn in files:
-               try:
-                       stime = os.stat(fn).st_mtime
-                       if latestmod < stime:
-                               latestmod = stime
-               except OSError:
-                       # If we somehow referred to a file that didn't exist, or
-                       # one that we couldn't access.
-                       raise Http404('CSS (sub) not found')
-       if 'HTTP_IF_MODIFIED_SINCE' in request.META:
-               # This code is mostly stolen from django :)
-               matches = re.match(r"^([^;]+)(; length=([0-9]+))?$",
-                                                  request.META.get('HTTP_IF_MODIFIED_SINCE'),
-                                                  re.IGNORECASE)
-               header_mtime = parse_http_date_safe(matches.group(1))
-               # We don't do length checking, just the date
-               if int(latestmod) <= header_mtime:
-                       return HttpResponseNotModified(content_type='text/css')
-       resp['Last-Modified'] = http_date(latestmod)
-
-       for fn in files:
-               with open(fn) as f:
-                       resp.write("/* %s */\n" % fn)
-                       resp.write(f.read())
-                       resp.write("\n")
-
-       return resp
+    if css not in _dynamic_cssmap:
+        raise Http404('CSS not found')
+    files = _dynamic_cssmap[css]
+    resp = HttpResponse(content_type='text/css')
+
+    # We honor if-modified-since headers by looking at the most recently
+    # touched CSS file.
+    latestmod = 0
+    for fn in files:
+        try:
+            stime = os.stat(fn).st_mtime
+            if latestmod < stime:
+                latestmod = stime
+        except OSError:
+            # If we somehow referred to a file that didn't exist, or
+            # one that we couldn't access.
+            raise Http404('CSS (sub) not found')
+    if 'HTTP_IF_MODIFIED_SINCE' in request.META:
+        # This code is mostly stolen from django :)
+        matches = re.match(r"^([^;]+)(; length=([0-9]+))?$",
+                           request.META.get('HTTP_IF_MODIFIED_SINCE'),
+                           re.IGNORECASE)
+        header_mtime = parse_http_date_safe(matches.group(1))
+        # We don't do length checking, just the date
+        if int(latestmod) <= header_mtime:
+            return HttpResponseNotModified(content_type='text/css')
+    resp['Last-Modified'] = http_date(latestmod)
+
+    for fn in files:
+        with open(fn) as f:
+            resp.write("/* %s */\n" % fn)
+            resp.write(f.read())
+            resp.write("\n")
+
+    return resp
  
  # Redirect to the requested url, with a slash first. This is used to remove
  # trailing slashes on messageid links by doing a permanent redirect. This is
@@ -769,11 +769,11 @@ def dynamic_css(request, css):
  # in the cache.
  @cache(hours=8)
  def slash_redirect(request, url):
-       return HttpResponsePermanentRedirect("/%s" % url)
+    return HttpResponsePermanentRedirect("/%s" % url)
  
  # Redirect the requested URL to whatever happens to be in the regexp capture.
  # This is used for user agents that generate broken URLs that are easily
  # captured using regexp.
  @cache(hours=8)
  def re_redirect(request, prefix, msgid):
-       return HttpResponsePermanentRedirect("/%s%s" % (prefix, msgid))
+    return HttpResponsePermanentRedirect("/%s%s" % (prefix, msgid))
diff --git a/django/archives/settings.py b/django/archives/settings.py

index 6a42a2603f730a87ff763b362a4b3aeb89ec100e..5d214d741391b4093b44ecc8cbf162da1328a1a8 100644 (file)
--- a/django/archives/settings.py
+++ b/django/archives/settings.py
@@ -96,18 +96,18 @@ MIDDLEWARE_CLASSES = [
  ROOT_URLCONF = 'archives.urls'
  
  TEMPLATES = [{
-       'BACKEND': 'django.template.backends.django.DjangoTemplates',
-       'OPTIONS': {
-               'context_processors': [
-                       'django.template.context_processors.request',
-                       'django.contrib.messages.context_processors.messages',
-                       'archives.util.PGWebContextProcessor',
-               ],
-               'loaders': [
-                       'django.template.loaders.filesystem.Loader',
-                       'django.template.loaders.app_directories.Loader',
-               ],
-       },
+    'BACKEND': 'django.template.backends.django.DjangoTemplates',
+    'OPTIONS': {
+        'context_processors': [
+            'django.template.context_processors.request',
+            'django.contrib.messages.context_processors.messages',
+            'archives.util.PGWebContextProcessor',
+        ],
+        'loaders': [
+            'django.template.loaders.filesystem.Loader',
+            'django.template.loaders.app_directories.Loader',
+        ],
+    },
  }]
  
  
@@ -122,7 +122,7 @@ INSTALLED_APPS = [
      # 'django.contrib.admin',
      # Uncomment the next line to enable admin documentation:
      # 'django.contrib.admindocs',
-       'archives.mailarchives',
+    'archives.mailarchives',
  ]
  
  # A sample logging configuration. The only tangible logging
@@ -159,23 +159,23 @@ API_CLIENTS = ('127.0.0.1',)
  PUBLIC_ARCHIVES = False
  
  try:
-       from .settings_local import *
+    from .settings_local import *
  except ImportError:
-       pass
+    pass
  
  # If this is a non-public site, enable middleware for handling logins etc
  if not PUBLIC_ARCHIVES:
-       MIDDLEWARE_CLASSES = [
-               'django.contrib.sessions.middleware.SessionMiddleware',
-               'django.contrib.auth.middleware.AuthenticationMiddleware',
-       ] + MIDDLEWARE_CLASSES
-       MIDDLEWARE_CLASSES.append('archives.mailarchives.redirecthandler.RedirectMiddleware')
-
-       INSTALLED_APPS = [
-               'django.contrib.auth',
-               'django.contrib.contenttypes',
-               'django.contrib.sessions',
-       ] + INSTALLED_APPS
-
-       from archives.util import validate_new_user
-       PGAUTH_CREATEUSER_CALLBACK=validate_new_user
+    MIDDLEWARE_CLASSES = [
+        'django.contrib.sessions.middleware.SessionMiddleware',
+        'django.contrib.auth.middleware.AuthenticationMiddleware',
+    ] + MIDDLEWARE_CLASSES
+    MIDDLEWARE_CLASSES.append('archives.mailarchives.redirecthandler.RedirectMiddleware')
+
+    INSTALLED_APPS = [
+        'django.contrib.auth',
+        'django.contrib.contenttypes',
+        'django.contrib.sessions',
+    ] + INSTALLED_APPS
+
+    from archives.util import validate_new_user
+    PGAUTH_CREATEUSER_CALLBACK=validate_new_user
diff --git a/django/archives/util.py b/django/archives/util.py

index 425fffee393b83155613e180507bdc866f17f6cf..4ed9730e7d330aab48601e9ffd62f73b24a5a6b4 100644 (file)
--- a/django/archives/util.py
+++ b/django/archives/util.py
@@ -3,42 +3,42 @@ from django.db import connection
  from django.utils.functional import SimpleLazyObject
  
  def validate_new_user(username, email, firstname, lastname):
-       # Only allow user creation if they are already a subscriber
-       curs = connection.cursor()
-       curs.execute("SELECT EXISTS(SELECT 1 FROM listsubscribers WHERE username=%(username)s)", {
-               'username': username,
-       })
-       if curs.fetchone()[0]:
-               # User is subscribed to something, so allow creation
-               return None
+    # Only allow user creation if they are already a subscriber
+    curs = connection.cursor()
+    curs.execute("SELECT EXISTS(SELECT 1 FROM listsubscribers WHERE username=%(username)s)", {
+        'username': username,
+    })
+    if curs.fetchone()[0]:
+        # User is subscribed to something, so allow creation
+        return None
  
-       return HttpResponse("You are not currently subscribed to any mailing list on this server. Account not created.")
+    return HttpResponse("You are not currently subscribed to any mailing list on this server. Account not created.")
  
  def _get_gitrev():
-       # Return the current git revision, that is used for
-       # cache-busting URLs.
-       try:
-               with open('../.git/refs/heads/master') as f:
-                       return f.readline()[:8]
-       except IOError:
-               # A "git gc" will remove the ref and replace it with a packed-refs.
-               try:
-                       with open('../.git/packed-refs') as f:
-                               for l in f.readlines():
-                                       if l.endswith("refs/heads/master\n"):
-                                               return l[:8]
-                               # Not found in packed-refs. Meh, just make one up.
-                               return 'ffffffff'
-               except IOError:
-                       # If packed-refs also can't be read, just give up
-                       return 'eeeeeeee'
+    # Return the current git revision, that is used for
+    # cache-busting URLs.
+    try:
+        with open('../.git/refs/heads/master') as f:
+            return f.readline()[:8]
+    except IOError:
+        # A "git gc" will remove the ref and replace it with a packed-refs.
+        try:
+            with open('../.git/packed-refs') as f:
+                for l in f.readlines():
+                    if l.endswith("refs/heads/master\n"):
+                        return l[:8]
+                # Not found in packed-refs. Meh, just make one up.
+                return 'ffffffff'
+        except IOError:
+            # If packed-refs also can't be read, just give up
+            return 'eeeeeeee'
  
  # Template context processor to add information about the root link and
  # the current git revision. git revision is returned as a lazy object so
  # we don't spend effort trying to load it if we don't need it (though
  # all general pages will need it since it's used to render the css urls)
  def PGWebContextProcessor(request):
-       gitrev = SimpleLazyObject(_get_gitrev)
-       return {
-               'gitrev': gitrev,
-       }
+    gitrev = SimpleLazyObject(_get_gitrev)
+    return {
+        'gitrev': gitrev,
+    }
diff --git a/loader/clean_date.py b/loader/clean_date.py

index 4ea295100435992770c55c8b17b6095a3df70421..e94d9cba96da58e17d610bbc62734eaf6d1d076c 100755 (executable)
--- a/loader/clean_date.py
+++ b/loader/clean_date.py
@@ -16,62 +16,62 @@ import dateutil.parser
  import psycopg2
  
  def scan_message(messageid, olddate, curs):
-       u = "http://archives.postgresql.org/msgtxt.php?id=%s" % messageid
-       print("Scanning message at %s (date reported as %s)..." % (u, olddate))
+    u = "http://archives.postgresql.org/msgtxt.php?id=%s" % messageid
+    print("Scanning message at %s (date reported as %s)..." % (u, olddate))
  
-       f = urlopen(u)
-       p = Parser()
-       msg = p.parse(f)
-       f.close()
+    f = urlopen(u)
+    p = Parser()
+    msg = p.parse(f)
+    f.close()
  
-       # Can be either one of them, but we really don't care...
-       ds = None
-       for k,r in list(msg.items()):
-               if k != 'Received': continue
+    # Can be either one of them, but we really don't care...
+    ds = None
+    for k,r in list(msg.items()):
+        if k != 'Received': continue
  
-               print("Trying on %s" % r)
-               m = re.search(';\s*(.*)$', r)
-               if m:
-                       ds = m.group(1)
-                       break
-               m = re.search(';\s*(.*)\s*\(envelope-from [^\)]+\)$', r)
-               if m:
-                       ds = m.group(1)
-                       break
+        print("Trying on %s" % r)
+        m = re.search(';\s*(.*)$', r)
+        if m:
+            ds = m.group(1)
+            break
+        m = re.search(';\s*(.*)\s*\(envelope-from [^\)]+\)$', r)
+        if m:
+            ds = m.group(1)
+            break
  
-       if not ds:
-               print("Could not find date. Sorry.")
-               return False
-       d = None
-       try:
-               d = dateutil.parser.parse(ds)
-       except:
-               print("Could not parse date '%s', sorry." % ds)
-               return
+    if not ds:
+        print("Could not find date. Sorry.")
+        return False
+    d = None
+    try:
+        d = dateutil.parser.parse(ds)
+    except:
+        print("Could not parse date '%s', sorry." % ds)
+        return
  
-       while True:
-               x = input("Parsed this as date %s. Update? " % d)
-               if x.upper() == 'Y':
-                       curs.execute("UPDATE messages SET date=%(d)s WHERE messageid=%(m)s", {
-                                       'd': d,
-                                       'm': messageid,
-                                       })
-                       print("Updated.")
-                       break
-               elif x.upper() == 'N':
-                       break
-       
+    while True:
+        x = input("Parsed this as date %s. Update? " % d)
+        if x.upper() == 'Y':
+            curs.execute("UPDATE messages SET date=%(d)s WHERE messageid=%(m)s", {
+                    'd': d,
+                    'm': messageid,
+                    })
+            print("Updated.")
+            break
+        elif x.upper() == 'N':
+            break
+    
  if __name__ == "__main__":
-       cfg = ConfigParser()
-       cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
-       connstr = cfg.get('db','connstr')
+    cfg = ConfigParser()
+    cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
+    connstr = cfg.get('db','connstr')
  
-       conn = psycopg2.connect(connstr)
+    conn = psycopg2.connect(connstr)
  
-       curs = conn.cursor()
-       curs.execute("SELECT messageid, date FROM messages WHERE date>(CURRENT_TIMESTAMP+'1 day'::interval) OR date < '1997-01-01'")
-       for messageid, date in curs.fetchall():
-               scan_message(messageid, date, curs)
+    curs = conn.cursor()
+    curs.execute("SELECT messageid, date FROM messages WHERE date>(CURRENT_TIMESTAMP+'1 day'::interval) OR date < '1997-01-01'")
+    for messageid, date in curs.fetchall():
+        scan_message(messageid, date, curs)
  
-       conn.commit()
-       print("Done.")
+    conn.commit()
+    print("Done.")
diff --git a/loader/generate_mbox.py b/loader/generate_mbox.py

index c2299e1f6d1610bb0c744a6face0ea2df64639e7..3ac1b9747bf8acd9b7988db322907e3af0d5b61a 100755 (executable)
--- a/loader/generate_mbox.py
+++ b/loader/generate_mbox.py
@@ -21,96 +21,96 @@ import psycopg2
  
  
  def generate_single_mbox(conn, listid, year, month, destination):
-       curs = conn.cursor()
-       curs.execute("SELECT id, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", {
-               'listid': listid,
-               'startdate': date(year, month, 1),
-               'enddate': date(year, month, calendar.monthrange(year, month)[1]),
-       })
-       with open(destination, 'w', encoding='utf8') as f:
-               for id, raw, in curs:
-                       s = BytesIO(raw)
-                       parser = email.parser.BytesParser(policy=email.policy.compat32)
-                       msg = parser.parse(s)
-                       try:
-                               x = msg.as_string(unixfrom=True)
-                               f.write(x)
-                       except UnicodeEncodeError as e:
-                               print("Not including {0}, unicode error".format(msg['message-id']))
-                       except Exception as e:
-                               print("Not including {0}, exception {1}".format(msg['message-id'], e))
+    curs = conn.cursor()
+    curs.execute("SELECT id, rawtxt FROM messages m INNER JOIN list_threads t ON t.threadid=m.threadid WHERE hiddenstatus IS NULL AND listid=%(listid)s AND date>=%(startdate)s AND date <= %(enddate)s ORDER BY date", {
+        'listid': listid,
+        'startdate': date(year, month, 1),
+        'enddate': date(year, month, calendar.monthrange(year, month)[1]),
+    })
+    with open(destination, 'w', encoding='utf8') as f:
+        for id, raw, in curs:
+            s = BytesIO(raw)
+            parser = email.parser.BytesParser(policy=email.policy.compat32)
+            msg = parser.parse(s)
+            try:
+                x = msg.as_string(unixfrom=True)
+                f.write(x)
+            except UnicodeEncodeError as e:
+                print("Not including {0}, unicode error".format(msg['message-id']))
+            except Exception as e:
+                print("Not including {0}, exception {1}".format(msg['message-id'], e))
  
  
  if __name__ == "__main__":
-       parser = argparse.ArgumentParser(description="Generate mbox file(s)")
-       parser.add_argument('--list', type=str, help='List to generate for')
-       parser.add_argument('--month', type=str, help='year-month to generate for, e.g. 2017-02')
-       parser.add_argument('--destination', type=str, help='File to write into (or directory for --auto)', required=True)
-       parser.add_argument('--auto', action='store_true', help='Auto-generate latest month mboxes for all lists')
-       parser.add_argument('--quiet', action='store_true', help='Run quiet')
+    parser = argparse.ArgumentParser(description="Generate mbox file(s)")
+    parser.add_argument('--list', type=str, help='List to generate for')
+    parser.add_argument('--month', type=str, help='year-month to generate for, e.g. 2017-02')
+    parser.add_argument('--destination', type=str, help='File to write into (or directory for --auto)', required=True)
+    parser.add_argument('--auto', action='store_true', help='Auto-generate latest month mboxes for all lists')
+    parser.add_argument('--quiet', action='store_true', help='Run quiet')
  
-       args = parser.parse_args()
+    args = parser.parse_args()
  
-       if args.auto:
-               if (args.list or args.month):
-                       print("Must not specify list and month when auto-generating!")
-                       sys.exit(1)
-               if not os.path.isdir(args.destination):
-                       print("Destination must be a directory, and exist, when auto-generating")
-                       sys.exit(1)
-       else:
-               if not (args.list and args.month and args.destination):
-                       print("Must specify list, month and destination when generating a single mailbox")
-                       parser.print_help()
-                       sys.exit(1)
+    if args.auto:
+        if (args.list or args.month):
+            print("Must not specify list and month when auto-generating!")
+            sys.exit(1)
+        if not os.path.isdir(args.destination):
+            print("Destination must be a directory, and exist, when auto-generating")
+            sys.exit(1)
+    else:
+        if not (args.list and args.month and args.destination):
+            print("Must specify list, month and destination when generating a single mailbox")
+            parser.print_help()
+            sys.exit(1)
  
  
-       # Arguments OK, now connect
-       cfg = ConfigParser()
-       cfg.read(os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), 'archives.ini'))
-       try:
-               connstr = cfg.get('db','connstr')
-       except:
-               connstr = 'need_connstr'
+    # Arguments OK, now connect
+    cfg = ConfigParser()
+    cfg.read(os.path.join(os.path.realpath(os.path.dirname(sys.argv[0])), 'archives.ini'))
+    try:
+        connstr = cfg.get('db','connstr')
+    except:
+        connstr = 'need_connstr'
  
-       conn = psycopg2.connect(connstr)
-       curs = conn.cursor()
+    conn = psycopg2.connect(connstr)
+    curs = conn.cursor()
  
-       if args.auto:
-               curs.execute("SELECT listid, listname FROM lists WHERE active ORDER BY listname")
-               all_lists = curs.fetchall()
-               today = date.today()
-               yesterday = today - timedelta(days=1)
-               if today.month == yesterday.month:
-                       # Same month, so do it
-                       monthrange = ((today.year, today.month),)
-               else:
-                       monthrange = ((today.year, today.month),(yesterday.year, yesterday.month))
-               for lid, lname in all_lists:
-                       for year, month in monthrange:
-                               fullpath = os.path.join(args.destination, lname, 'files/public/archive')
-                               if not os.path.isdir(fullpath):
-                                       os.makedirs(fullpath)
-                               if not args.quiet:
-                                       print("Generating {0}-{1} for {2}".format(year, month, lname))
-                               generate_single_mbox(conn, lid, year, month,
-                                                                        os.path.join(fullpath, "{0}.{0:04d}{1:02d}".format(year, month)))
-       else:
-               # Parse year and month
-               m = re.match('^(\d{4})-(\d{2})$', args.month)
-               if not m:
-                       print("Month must be specified on format YYYY-MM, not {0}".format(args.month))
-                       sys.exit(1)
-               year = int(m.group(1))
-               month = int(m.group(2))
+    if args.auto:
+        curs.execute("SELECT listid, listname FROM lists WHERE active ORDER BY listname")
+        all_lists = curs.fetchall()
+        today = date.today()
+        yesterday = today - timedelta(days=1)
+        if today.month == yesterday.month:
+            # Same month, so do it
+            monthrange = ((today.year, today.month),)
+        else:
+            monthrange = ((today.year, today.month),(yesterday.year, yesterday.month))
+        for lid, lname in all_lists:
+            for year, month in monthrange:
+                fullpath = os.path.join(args.destination, lname, 'files/public/archive')
+                if not os.path.isdir(fullpath):
+                    os.makedirs(fullpath)
+                if not args.quiet:
+                    print("Generating {0}-{1} for {2}".format(year, month, lname))
+                generate_single_mbox(conn, lid, year, month,
+                                     os.path.join(fullpath, "{0}.{0:04d}{1:02d}".format(year, month)))
+    else:
+        # Parse year and month
+        m = re.match('^(\d{4})-(\d{2})$', args.month)
+        if not m:
+            print("Month must be specified on format YYYY-MM, not {0}".format(args.month))
+            sys.exit(1)
+        year = int(m.group(1))
+        month = int(m.group(2))
  
-               curs.execute("SELECT listid FROM lists WHERE listname=%(name)s", {
-                       'name': args.list,
-               })
-               if curs.rowcount != 1:
-                       print("List {0} not found.".format(args.list))
-                       sys.exit(1)
+        curs.execute("SELECT listid FROM lists WHERE listname=%(name)s", {
+            'name': args.list,
+        })
+        if curs.rowcount != 1:
+            print("List {0} not found.".format(args.list))
+            sys.exit(1)
  
-               if not args.quiet:
-                       print("Generating {0}-{1} for {2}".format(year, month, args.list))
-               generate_single_mbox(conn, curs.fetchone()[0], year, month, args.destination)
+        if not args.quiet:
+            print("Generating {0}-{1} for {2}".format(year, month, args.list))
+        generate_single_mbox(conn, curs.fetchone()[0], year, month, args.destination)
diff --git a/loader/hide_message.py b/loader/hide_message.py

index 8bb9359662070f78de776af7d0736539fa76e4bd..7a0f5242778b22f7cd7616c8cff85a4b154cf34f 100755 (executable)
--- a/loader/hide_message.py
+++ b/loader/hide_message.py
@@ -15,85 +15,85 @@ import psycopg2
  from lib.varnish import VarnishPurger
  
  reasons = [
-       None, # Placeholder for 0
-       "virus",
-       "violates policies",
-       "privacy",
-       "corrupt",
+    None, # Placeholder for 0
+    "virus",
+    "violates policies",
+    "privacy",
+    "corrupt",
  ]
  
  if __name__ == "__main__":
-       optparser = OptionParser()
-       optparser.add_option('-m', '--msgid', dest='msgid', help='Messageid to hide')
-
-       (opt, args) = optparser.parse_args()
-
-       if (len(args)):
-               print("No bare arguments accepted")
-               optparser.print_help()
-               sys.exit(1)
-
-       if not opt.msgid:
-               print("Message-id must be specified")
-               optparser.print_help()
-               sys.exit(1)
-
-       cfg = ConfigParser()
-       cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
-       try:
-               connstr = cfg.get('db','connstr')
-       except:
-               connstr = 'need_connstr'
-
-       conn = psycopg2.connect(connstr)
-       curs = conn.cursor()
-
-       curs.execute("SELECT id, threadid, hiddenstatus FROM messages WHERE messageid=%(msgid)s", {
-               'msgid': opt.msgid,
-       })
-       if curs.rowcount <= 0:
-               print("Message not found.")
-               sys.exit(1)
-
-       id, threadid, previous = curs.fetchone()
-
-       # Message found, ask for reason
-       reason = 0
-       print("Current status: %s" % reasons[previous or 0])
-       print("\n".join("%s - %s " % (n, reasons[n]) for n in range(len(reasons))))
-       while True:
-               reason = input('Reason for hiding message? ')
-               try:
-                       reason = int(reason)
-               except ValueError:
-                       continue
-
-               if reason == 0:
-                       print("Un-hiding message")
-                       reason = None
-                       break
-               else:
-                       try:
-                               print("Hiding message for reason: %s" % reasons[reason])
-                       except:
-                               continue
-                       break
-       if previous == reason:
-               print("No change in status, not updating")
-               conn.close()
-               sys.exit(0)
-
-       curs.execute("UPDATE messages SET hiddenstatus=%(new)s WHERE id=%(id)s", {
-               'new': reason,
-               'id': id,
-       })
-       if curs.rowcount != 1:
-               print("Failed to update! Not hiding!")
-               conn.rollback()
-               sys.exit(0)
-       conn.commit()
-
-       VarnishPurger(cfg).purge([int(threadid), ])
-       conn.close()
-
-       print("Message hidden and varnish purge triggered.")
+    optparser = OptionParser()
+    optparser.add_option('-m', '--msgid', dest='msgid', help='Messageid to hide')
+
+    (opt, args) = optparser.parse_args()
+
+    if (len(args)):
+        print("No bare arguments accepted")
+        optparser.print_help()
+        sys.exit(1)
+
+    if not opt.msgid:
+        print("Message-id must be specified")
+        optparser.print_help()
+        sys.exit(1)
+
+    cfg = ConfigParser()
+    cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
+    try:
+        connstr = cfg.get('db','connstr')
+    except:
+        connstr = 'need_connstr'
+
+    conn = psycopg2.connect(connstr)
+    curs = conn.cursor()
+
+    curs.execute("SELECT id, threadid, hiddenstatus FROM messages WHERE messageid=%(msgid)s", {
+        'msgid': opt.msgid,
+    })
+    if curs.rowcount <= 0:
+        print("Message not found.")
+        sys.exit(1)
+
+    id, threadid, previous = curs.fetchone()
+
+    # Message found, ask for reason
+    reason = 0
+    print("Current status: %s" % reasons[previous or 0])
+    print("\n".join("%s - %s " % (n, reasons[n]) for n in range(len(reasons))))
+    while True:
+        reason = input('Reason for hiding message? ')
+        try:
+            reason = int(reason)
+        except ValueError:
+            continue
+
+        if reason == 0:
+            print("Un-hiding message")
+            reason = None
+            break
+        else:
+            try:
+                print("Hiding message for reason: %s" % reasons[reason])
+            except:
+                continue
+            break
+    if previous == reason:
+        print("No change in status, not updating")
+        conn.close()
+        sys.exit(0)
+
+    curs.execute("UPDATE messages SET hiddenstatus=%(new)s WHERE id=%(id)s", {
+        'new': reason,
+        'id': id,
+    })
+    if curs.rowcount != 1:
+        print("Failed to update! Not hiding!")
+        conn.rollback()
+        sys.exit(0)
+    conn.commit()
+
+    VarnishPurger(cfg).purge([int(threadid), ])
+    conn.close()
+
+    print("Message hidden and varnish purge triggered.")
diff --git a/loader/legacy/scan_old_archives.py b/loader/legacy/scan_old_archives.py

index 5d9a24257f327b8a433939639d5079a291f6e882..53d339ad197813a07a15e888b50601e89fb843c0 100755 (executable)
--- a/loader/legacy/scan_old_archives.py
+++ b/loader/legacy/scan_old_archives.py
@@ -101,23 +101,23 @@ listmap = {
  hp = HTMLParser()
  
  def get_messageid(fn):
-       with open(fn) as f:
-               for l in f:
-                       if l.startswith('<!--X-Message-Id: '):
-                               # Found it!
-                               return hp.unescape(l[18:-5])
-       raise Exception("No messageid in %s" % fn)
+    with open(fn) as f:
+        for l in f:
+            if l.startswith('<!--X-Message-Id: '):
+                # Found it!
+                return hp.unescape(l[18:-5])
+    raise Exception("No messageid in %s" % fn)
  
  dirre = re.compile("^(\d+)-(\d+)$")
  fnre = re.compile("^msg(\d+)\.php$")
  for (dirpath, dirnames, filenames) in os.walk(root):
-       # Dirpath is the full pathname
-       base = os.path.basename(dirpath)
-       m = dirre.match(base)
-       if m:
-               # Directory with actual files in it
-               listname = os.path.basename(os.path.dirname(dirpath))
-               for fn in filenames:
-                       m2 = fnre.match(fn)
-                       if m2:
-                               print "%s;%s;%s;%s;\"%s\"" % (listmap[listname], m.group(1), m.group(2), m2.group(1), get_messageid("%s/%s" % (dirpath, fn)))
+    # Dirpath is the full pathname
+    base = os.path.basename(dirpath)
+    m = dirre.match(base)
+    if m:
+        # Directory with actual files in it
+        listname = os.path.basename(os.path.dirname(dirpath))
+        for fn in filenames:
+            m2 = fnre.match(fn)
+            if m2:
+                print "%s;%s;%s;%s;\"%s\"" % (listmap[listname], m.group(1), m.group(2), m2.group(1), get_messageid("%s/%s" % (dirpath, fn)))
diff --git a/loader/lib/exception.py b/loader/lib/exception.py

index 02172a26ec2ca9539994fcbf8d3774b055fb85ae..54a9efc5239d2cf47307eb43e1f2285dce2ec955 100644 (file)
--- a/loader/lib/exception.py
+++ b/loader/lib/exception.py
@@ -1,2 +1,2 @@
  class IgnorableException(Exception):
-       pass
+    pass
diff --git a/loader/lib/log.py b/loader/lib/log.py

index 5b6379a01b2354d8e98bff6a9929a75212fc3204..48722c9714023f73637e9c74c6d20ecb3c2b6d37 100644 (file)
--- a/loader/lib/log.py
+++ b/loader/lib/log.py
@@ -1,33 +1,33 @@
  class Log(object):
-       def __init__(self):
-               self.verbose = False
+    def __init__(self):
+        self.verbose = False
  
-       def set(self, verbose):
-               self.verbose = verbose
+    def set(self, verbose):
+        self.verbose = verbose
  
-       def status(self, msg):
-               if self.verbose:
-                       print(msg)
+    def status(self, msg):
+        if self.verbose:
+            print(msg)
  
-       def log(self, msg):
-               print(msg)
+    def log(self, msg):
+        print(msg)
  
-       def error(self, msg):
-               print(msg)
+    def error(self, msg):
+        print(msg)
  
-       def print_status(self):
-               opstatus.print_status()
+    def print_status(self):
+        opstatus.print_status()
  
  class OpStatus(object):
-       def __init__(self):
-               self.stored = 0
-               self.dupes = 0
-               self.tagged = 0
-               self.failed = 0
-               self.overwritten = 0
-
-       def print_status(self):
-               print("%s stored, %s new-list tagged, %s dupes, %s failed, %s overwritten" % (self.stored, self.tagged, self.dupes, self.failed, self.overwritten))
+    def __init__(self):
+        self.stored = 0
+        self.dupes = 0
+        self.tagged = 0
+        self.failed = 0
+        self.overwritten = 0
+
+    def print_status(self):
+        print("%s stored, %s new-list tagged, %s dupes, %s failed, %s overwritten" % (self.stored, self.tagged, self.dupes, self.failed, self.overwritten))
  
  
  log = Log()
diff --git a/loader/lib/mbox.py b/loader/lib/mbox.py

index 77c83b0fb52d7a0f6550ac6612b277ff8889ec7b..c097e72d4baf813c2eb03d4aaedfe5d2ad291107 100644 (file)
--- a/loader/lib/mbox.py
+++ b/loader/lib/mbox.py
@@ -11,41 +11,41 @@ SEPARATOR = "ABCARCHBREAK123" * 50
  bSEPARATOR = bytes(SEPARATOR, 'ascii')
  
  class MailboxBreakupParser(object):
-       def __init__(self, fn):
-               self.EOF = False
+    def __init__(self, fn):
+        self.EOF = False
  
-               if fn.endswith(".gz"):
-                       cat = "zcat"
-               else:
-                       cat = "cat"
-               cmd = "%s %s | formail -s /bin/sh -c 'cat && echo %s'" % (cat, fn, SEPARATOR)
-               self.pipe = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
+        if fn.endswith(".gz"):
+            cat = "zcat"
+        else:
+            cat = "cat"
+        cmd = "%s %s | formail -s /bin/sh -c 'cat && echo %s'" % (cat, fn, SEPARATOR)
+        self.pipe = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
  
-       def returncode(self):
-               self.pipe.wait()
-               return self.pipe.returncode
+    def returncode(self):
+        self.pipe.wait()
+        return self.pipe.returncode
  
-       def stderr_output(self):
-               return self.pipe.stderr.read()
+    def stderr_output(self):
+        return self.pipe.stderr.read()
  
-       def __next__(self):
-               sio = BytesIO()
-               while True:
-                       try:
-                               l = next(self.pipe.stdout)
-                       except StopIteration:
-                               # End of file!
-                               self.EOF = True
-                               if sio.tell() == 0:
-                                       # Nothing read yet, so return None instead of an empty
-                                       # bytesio
-                                       return None
-                               sio.seek(0)
-                               return sio
-                       if l.rstrip() == bSEPARATOR:
-                               # Reached a separator. Meaning we're not at end of file,
-                               # but we're at end of message.
-                               sio.seek(0)
-                               return sio
-                       # Otherwise, append it to where we are now
-                       sio.write(l)
+    def __next__(self):
+        sio = BytesIO()
+        while True:
+            try:
+                l = next(self.pipe.stdout)
+            except StopIteration:
+                # End of file!
+                self.EOF = True
+                if sio.tell() == 0:
+                    # Nothing read yet, so return None instead of an empty
+                    # bytesio
+                    return None
+                sio.seek(0)
+                return sio
+            if l.rstrip() == bSEPARATOR:
+                # Reached a separator. Meaning we're not at end of file,
+                # but we're at end of message.
+                sio.seek(0)
+                return sio
+            # Otherwise, append it to where we are now
+            sio.write(l)
diff --git a/loader/lib/parser.py b/loader/lib/parser.py

index b97c8b338bba8c0b86792ad06d7aa1747d416f55..81192da2eb408297f6ab7a9f9530e6922f669f95 100644 (file)
--- a/loader/lib/parser.py
+++ b/loader/lib/parser.py
@@ -14,560 +14,560 @@ from lib.exception import IgnorableException
  from lib.log import log
  
  class ArchivesParser(object):
-       def __init__(self):
-               self.parser = BytesParser(policy=compat32)
-
-       def parse(self, stream):
-               self.rawtxt = stream.read()
-               self.msg = self.parser.parse(io.BytesIO(self.rawtxt))
-
-       def is_msgid(self, msgid):
-               # Look for a specific messageid. This means we might parse it twice,
-               # but so be it. Any exception means we know it's not this one...
-               try:
-                       if self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID'))) == msgid:
-                               return True
-               except Exception as e:
-                       return False
-
-       def analyze(self, date_override=None):
-               self.msgid = self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID')))
-               self._from = self.decode_mime_header(self.get_mandatory('From'), True)
-               self.to = self.decode_mime_header(self.get_optional('To'), True)
-               self.cc = self.decode_mime_header(self.get_optional('CC'), True)
-               self.subject = self.decode_mime_header(self.get_optional('Subject'))
-               if date_override:
-                       self.date = self.forgiving_date_decode(date_override)
-               else:
-                       self.date = self.forgiving_date_decode(self.decode_mime_header(self.get_mandatory('Date')))
-               self.bodytxt = self.get_body()
-               self.attachments = []
-               self.get_attachments()
-               if len(self.attachments) > 0:
-                       log.status("Found %s attachments" % len(self.attachments))
-
-               # Build an list of the message id's we are interested in
-               self.parents = []
-               # The first one is in-reply-to, if it exists
-               if self.get_optional('in-reply-to'):
-                       m = self.clean_messageid(self.decode_mime_header(self.get_optional('in-reply-to')), True)
-                       if m:
-                               self.parents.append(m)
-
-               # Then we add all References values, in backwards order
-               if self.get_optional('references'):
-                       cleaned_msgids = [self.clean_messageid(x, True) for x in reversed(self.decode_mime_header(self.get_optional('references')).split())]
-                       # Can't do this with a simple self.parents.extend() due to broken
-                       # mailers that add the same reference more than once. And we can't
-                       # use a set() to make it unique, because order is very important
-                       for m in cleaned_msgids:
-                               if m and not m in self.parents:
-                                       self.parents.append(m)
-
-
-       def clean_charset(self, charset):
-               lcharset = charset.lower()
-               if lcharset == 'unknown-8bit' or lcharset == 'x-unknown' or lcharset == 'unknown':
-                       # Special case where we don't know... We'll assume
-                       # us-ascii and use replacements
-                       return 'us-ascii'
-               if lcharset == '0' or lcharset == 'x-user-defined' or lcharset == '_autodetect_all' or lcharset == 'default_charset':
-                       # Seriously broken charset definitions, map to us-ascii
-                       # and throw away the rest with replacements
-                       return 'us-ascii'
-               if lcharset == 'x-gbk':
-                       # Some MUAs set it to x-gbk, but there is a valid
-                       # declaratoin as gbk...
-                       return 'gbk'
-               if lcharset == 'iso-8859-8-i':
-                       # -I is a special logical version, but should be the
-                       # same charset
-                       return 'iso-8859-8'
-               if lcharset == 'windows-874':
-                       # This is an alias for iso-8859-11
-                       return 'iso-8859-11'
-               if lcharset == 'iso-88-59-1' or lcharset == 'iso-8858-1':
-                       # Strange way of saying 8859....
-                       return 'iso-8859-1'
-               if lcharset == 'iso885915':
-                       return 'iso-8859-15'
-               if lcharset == 'iso-latin-2':
-                       return 'iso-8859-2'
-               if lcharset == 'iso-850':
-                       # Strange spelling of cp850 (windows charset)
-                       return 'cp850'
-               if lcharset == 'koi8r':
-                       return 'koi8-r'
-               if lcharset == 'cp 1252':
-                       return 'cp1252'
-               if lcharset == 'iso-8859-1,iso-8859-2' or lcharset == 'iso-8859-1:utf8:us-ascii':
-                       # Why did this show up more than once?!
-                       return 'iso-8859-1'
-               if lcharset == 'x-windows-949':
-                       return 'ms949'
-               if lcharset == 'pt_pt' or lcharset == 'de_latin' or lcharset == 'de':
-                       # This is a locale, and not a charset, but most likely it's this one
-                       return 'iso-8859-1'
-               if lcharset == 'iso-8858-15':
-                       # How is this a *common* mistake?
-                       return 'iso-8859-15'
-               if lcharset == 'macintosh':
-                       return 'mac_roman'
-               if lcharset == 'cn-big5':
-                       return 'big5'
-               if lcharset == 'x-unicode-2-0-utf-7':
-                       return 'utf-7'
-               if lcharset == 'tscii':
-                       # No support for this charset :S Map it down to ascii
-                       # and throw away all the rest. sucks, but we have to
-                       return 'us-ascii'
-               return charset
-
-       def get_payload_as_unicode(self, msg):
-               try:
-                       b = msg.get_payload(decode=True)
-               except AssertionError:
-                       # Badly encoded data can throw an exception here, where the python
-                       # libraries fail to handle it and enters a cannot-happen path.
-                       # In which case we just ignore it and hope for a better MIME part later.
-                       b = None
-
-               if b:
-                       # Find out if there is a charset
-                       charset = None
-                       params = msg.get_params()
-                       if not params:
-                               # No content-type, so we assume us-ascii
-                               return str(b, 'us-ascii', errors='ignore')
-                       for k,v in params:
-                               if k.lower() == 'charset':
-                                       charset = v
-                                       break
-                       if charset:
-                               try:
-                                       return str(b, self.clean_charset(charset), errors='ignore')
-                               except LookupError as e:
-                                       raise IgnorableException("Failed to get unicode payload: %s" % e)
-                       else:
-                               # XXX: reasonable default?
-                               return str(b, errors='ignore')
-               # Return None or empty string, depending on what we got back
-               return b
-
-       # Regular expression matching the PostgreSQL custom mail footer that
-       # is appended to all emails.
-       _re_footer = re.compile('(.*)--\s+\nSent via [^\s]+ mailing list \([^\)]+\)\nTo make changes to your subscription:\nhttp://www\.postgresql\.org/mailpref/[^\s]+\s*$', re.DOTALL)
-       def get_body(self):
-               b = self._get_body()
-               if b:
-                       # Python bug 9133, allows unicode surrogate pairs - which PostgreSQL will
-                       # later reject..
-                       if b.find('\udbff\n\udef8'):
-                               b = b.replace('\udbff\n\udef8', '')
-
-               # Remove postgres specific mail footer - if it's there
-               m = self._re_footer.match(b)
-               if m:
-                       b = m.group(1)
-
-               # Sometimes we end up with a trailing \0 when decoding long strings, so
-               # replace it if it's there.
-               # In fact, replace it everywhere, since it can also turn up in the middle
-               # of a text when it's a really broken decoding.
-               b = b.replace('\0', '')
-
-               return b
-
-       def _get_body(self):
-               # This is where the magic happens - try to figure out what the body
-               # of this message should render as.
-               hasempty = False
-
-               # First see if this is a single-part message that we can just
-               # decode and go.
-               b = self.get_payload_as_unicode(self.msg)
-               if b: return b
-               if b == '':
-                       # We found something, but it was empty. We'll keep looking as
-                       # there might be something better available, but make a note
-                       # that empty exists.
-                       hasempty = True
-
-               # Ok, it's multipart. Find the first part that is text/plain,
-               # and use that one. Do this recursively, since we may have something
-               # like:
-               # multipart/mixed:
-               #   multipart/alternative:
-               #      text/plain
-               #      text/html
-               #   application/octet-stream (attachment)
-               b = self.recursive_first_plaintext(self.msg)
-               if b: return b
-               if b == '':
-                       hasempty = True
-
-               # Couldn't find a plaintext. Look for the first HTML in that case.
-               # Fallback, but what can we do at this point...
-               b = self.recursive_first_plaintext(self.msg, True)
-               if b:
-                       b = self.html_clean(b)
-                       if b: return b
-               if b == '' or b is None:
-                       hasempty = True
-
-               if hasempty:
-                       log.status('Found empty body in %s' % self.msgid)
-                       return ''
-               raise IgnorableException("Don't know how to read the body from %s" % self.msgid)
-
-       def recursive_first_plaintext(self, container, html_instead=False):
-               pl = container.get_payload()
-               if isinstance(pl, str):
-                       # This was not a multipart, but it leaked... Give up!
-                       return None
-               for p in pl:
-                       if p.get_params() == None:
-                               # MIME multipart/mixed, but no MIME type on the part
-                               log.status("Found multipart/mixed in message '%s', but no MIME type on part. Trying text/plain." % self.msgid)
-                               return self.get_payload_as_unicode(p)
-                       if p.get_params()[0][0].lower() == 'text/plain':
-                               # Don't include it if it looks like an attachment
-                               if 'Content-Disposition' in p and p['Content-Disposition'].startswith('attachment'):
-                                       continue
-                               return self.get_payload_as_unicode(p)
-                       if html_instead and p.get_params()[0][0].lower() == 'text/html':
-                               # Don't include it if it looks like an attachment
-                               if 'Content-Disposition' in p and p['Content-Disposition'].startswith('attachment'):
-                                       continue
-                               return self.get_payload_as_unicode(p)
-                       if p.is_multipart():
-                               b = self.recursive_first_plaintext(p, html_instead)
-                               if b or b == '': return b
-
-               # Yikes, nothing here! Hopefully we'll find something when
-               # we continue looping at a higher level.
-               return None
-
-       def get_attachments(self):
-               self.attachments_found_first_plaintext = False
-               self.recursive_get_attachments(self.msg)
-
-       # Clean a filenames encoding and return it as a unicode string
-       def _clean_filename_encoding(self, filename):
-               # If this is a header-encoded filename, start by decoding that
-               if filename.startswith('=?'):
-                       decoded, encoding = decode_header(filename)[0]
-                       return str(decoded, encoding, errors='ignore')
-
-               # If it's already unicode, just return it
-               if isinstance(filename, str):
-                       return filename
-
-               # Anything that's not UTF8, we just get rid of. We can live with
-               # filenames slightly mangled in this case.
-               return str(filename, 'utf-8', errors='ignore')
-
-       def _extract_filename(self, container):
-               # Try to get the filename for an attachment in the container.
-               # If the standard library can figure one out, use that one.
-               f = container.get_filename()
-               if f: return self._clean_filename_encoding(f)
-
-               # Failing that, some mailers set Content-Description to the
-               # filename
-               if 'Content-Description' in container:
-                       return self._clean_filename_encoding(container['Content-Description'])
-               return None
-
-       def recursive_get_attachments(self, container):
-               # We start recursion in the "multipart" container if any
-               if container.get_content_type() == 'multipart/mixed' or container.get_content_type() == 'multipart/signed':
-                       # Multipart - worth scanning into
-                       if not container.is_multipart():
-                               # Wow, this is broken. It's multipart/mixed, but doesn't
-                               # contain multiple parts.
-                               # Since we're just looking for attachments, let's just
-                               # ignore it...
-                               return
-                       for p in container.get_payload():
-                               if p.get_params() == None:
-                                       continue
-                               self.recursive_get_attachments(p)
-               elif container.get_content_type() == 'multipart/alternative':
-                       # Alternative is not an attachment (we decide)
-                       # It's typilcally plantext + html
-                       self.attachments_found_first_plaintext = True
-                       return
-               elif container.is_multipart():
-                       # Other kinds of multipart, such as multipart/signed...
-                       return
-               else:
-                       # Not a multipart.
-                       # Exclude specific contenttypes
-                       if container.get_content_type() == 'application/pgp-signature':
-                               return
-                       if container.get_content_type() in ('application/pkcs7-signature', 'application/x-pkcs7-signature'):
-                               return
-                       # For now, accept anything not text/plain
-                       if container.get_content_type() != 'text/plain':
-                               try:
-                                       self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
-                               except AssertionError:
-                                       # Badly encoded data can throw an exception here, where the python
-                                       # libraries fail to handle it and enters a cannot-happen path.
-                                       # In which case we just ignore this attachment.
-                                       return
-                               return
-
-                       # It's a text/plain, it might be worthwhile.
-                       # If it has a name, we consider it an attachments
-                       if not container.get_params():
-                               return
-                       for k,v in container.get_params():
-                               if k=='name' and v != '':
-                                       # Yes, it has a name
-                                       try:
-                                               self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
-                                       except AssertionError:
-                                               # Badly encoded data can throw an exception here, where the python
-                                               # libraries fail to handle it and enters a cannot-happen path.
-                                               # In which case we just ignore this attachment.
-                                               return
-
-                                       return
-
-                       # If it's content-disposition=attachment, we also want to save it
-                       if 'Content-Disposition' in container and container['Content-Disposition'].startswith('attachment'):
-                               try:
-                                       self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
-                               except AssertionError:
-                                       # Badly encoded data can throw an exception here, where the python
-                                       # libraries fail to handle it and enters a cannot-happen path.
-                                       # In which case we just ignore this attachment.
-                                       return
-
-                               return
-
-                       # If we have already found one text/plain part, make all
-                       # further text/plain parts attachments
-                       if self.attachments_found_first_plaintext:
-                               # However, this will also *always* catch the MIME part added
-                               # by majordomo with the footer. So if that one is present,
-                               # we need to explicitly exclude it again.
-                               try:
-                                       b = container.get_payload(decode=True)
-                               except AssertionError:
-                                       # Badly encoded data can throw an exception here, where the python
-                                       # libraries fail to handle it and enters a cannot-happen path.
-                                       # In which case we just ignore this attachment.
-                                       return
-
-                               if isinstance(b, str) and not self._re_footer.match(b):
-                                       # We know there is no name for this one
-                                       self.attachments.append((None, container.get_content_type(), b))
-                               return
-
-                       # Ok, so this was a plaintext that we ignored. Set the flag
-                       # that we have now ignored one, so we'll make the next one
-                       # an attachment.
-                       self.attachments_found_first_plaintext = True
-                       # No name, and text/plain, so ignore it
-
-       re_msgid = re.compile('^\s*<(.*)>\s*')
-       def clean_messageid(self, messageid, ignorebroken=False):
-               m = self.re_msgid.match(messageid)
-               if not m:
-                       if ignorebroken:
-                               log.status("Could not parse messageid '%s', ignoring it" % messageid)
-                               return None
-                       raise IgnorableException("Could not parse message id '%s'" % messageid)
-               return m.groups(1)[0].replace(' ','')
-
-#      _date_multi_re = re.compile(' \((\w+\s\w+(\s+\w+)*|)\)$')
-       # Now using [^\s] instead of \w, to work with japanese chars
-       _date_multi_re = re.compile(' \(([^\s]+\s[^\s]+(\s+[^\s]+)*|)\)$')
-       _date_multi_re2 = re.compile(' ([\+-]\d{4}) \([^)]+\)$')
-       _date_multiminus_re = re.compile(' -(-\d+)$')
-       _date_offsetnoplus_re = re.compile(' (\d{4})$')
-       def forgiving_date_decode(self, d):
-               if d.strip() == '':
-                       raise IgnorableException("Failed to parse empty date")
-               # Strange timezones requiring manual adjustments
-               if d.endswith('-7700 (EST)'):
-                       d = d.replace('-7700 (EST)', 'EST')
-               if d.endswith('+6700 (EST)'):
-                       d = d.replace('+6700 (EST)', 'EST')
-               if d.endswith('+-4-30'):
-                       d = d.replace('+-4-30', '+0430')
-               if d.endswith('+1.00'):
-                       d = d.replace('+1.00', '+0100')
-               if d.endswith('+-100'):
-                       d = d.replace('+-100', '+0100')
-               if d.endswith('+500'):
-                       d = d.replace('+500', '+0500')
-               if d.endswith('-500'):
-                       d = d.replace('-500', '-0500')
-               if d.endswith('-700'):
-                       d = d.replace('-700', '-0700')
-               if d.endswith('-800'):
-                       d = d.replace('-800', '-0800')
-               if d.endswith('+05-30'):
-                       d = d.replace('+05-30', '+0530')
-               if d.endswith('+0-900'):
-                       d = d.replace('+0-900', '-0900')
-               if d.endswith('Mexico/General'):
-                       d = d.replace('Mexico/General','CDT')
-               if d.endswith('Pacific Daylight Time'):
-                       d = d.replace('Pacific Daylight Time', 'PDT')
-               if d.endswith(' ZE2'):
-                       d = d.replace(' ZE2',' +0200')
-               if d.find('-Juin-') > 0:
-                       d = d.replace('-Juin-','-Jun-')
-               if d.find('-Juil-') > 0:
-                       d = d.replace('-Juil-','-Jul-')
-               if d.find(' 0 (GMT)'):
-                       d = d.replace(' 0 (GMT)',' +0000')
-
-               if self._date_multiminus_re.search(d):
-                       d = self._date_multiminus_re.sub(' \\1', d)
-
-               if self._date_offsetnoplus_re.search(d):
-                       d = self._date_offsetnoplus_re.sub('+\\1', d)
-
-
-               # We have a number of dates in the format
-               # "<full datespace> +0200 (MET DST)"
-               # or similar. The problem coming from the space within the
-               # parenthesis, or if the contents of the parenthesis is
-               # completely empty
-               if self._date_multi_re.search(d):
-                       d = self._date_multi_re.sub('', d)
-
-               # If the spec is instead
-               # "<full datespace> +0200 (...)"
-               # of any kind, we can just remove what's in the (), because the
-               # parser is just going to rely on the fixed offset anyway.
-               if self._date_multi_re2.search(d):
-                       d = self._date_multi_re2.sub(' \\1', d)
-
-               try:
-                       dp = dateutil.parser.parse(d, fuzzy=True)
-
-                       # Some offsets are >16 hours, which postgresql will not
-                       # (for good reasons) accept
-                       if dp.utcoffset() and abs(dp.utcoffset().days * (24*60*60) + dp.utcoffset().seconds) > 60*60*16-1:
-                               # Convert it to a UTC timestamp using Python. It will give
-                               # us the right time, but the wrong timezone. Should be
-                               # enough...
-                               dp = datetime.datetime(*dp.utctimetuple()[:6])
-                       return dp
-               except Exception as e:
-                       raise IgnorableException("Failed to parse date '%s': %s" % (d, e))
-
-       def _maybe_decode(self, s, charset):
-               if isinstance(s, str):
-                       return s.strip(' ')
-               return str(s, charset and self.clean_charset(charset) or 'us-ascii', errors='ignore').strip(' ')
-
-       # Workaround for broken quoting in some MUAs (see below)
-       _re_mailworkaround = re.compile('"(=\?[^\?]+\?[QB]\?[^\?]+\?=)"', re.IGNORECASE)
-       def _decode_mime_header(self, hdr, email_workaround):
-               if hdr == None:
-                       return None
-
-               # Per http://bugs.python.org/issue504152 (and lots of testing), it seems
-               # we must get rid of the sequence \n\t at least in the header. If we
-               # do this *before* doing any MIME decoding, we should be safe against
-               # anybody *actually* putting that sequence in the header (since we
-               # won't match the encoded contents)
-               hdr = hdr.replace("\n\t"," ")
-
-               # In at least some cases, at least gmail (and possibly other MUAs)
-               # incorrectly put double quotes in the name/email field even when
-               # it's encoded. That's not allowed - they have to be escaped - but
-               # since there's a fair amount of those, we apply a regex to get
-               # rid of them.
-               m = self._re_mailworkaround.search(hdr)
-               if m:
-                       hdr = self._re_mailworkaround.sub(r'\1', hdr)
-
-               try:
-                       return " ".join([self._maybe_decode(s, charset) for s, charset in decode_header(hdr)])
-               except HeaderParseError as e:
-                       # Parser error is typically someone specifying an encoding,
-                       # but then not actually using that encoding. We'll do the best
-                       # we can, which is cut it down to ascii and ignore errors
-                       return str(hdr, 'us-ascii', errors='ignore').strip(' ')
-
-       def decode_mime_header(self, hdr, email_workaround=False):
-               try:
-                       if isinstance(hdr, Header):
-                               hdr = hdr.encode()
-
-                       h = self._decode_mime_header(hdr, email_workaround)
-                       if h:
-                               return h.replace("\0", "")
-                       return ''
-               except LookupError as e:
-                       raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, e))
-               except ValueError as ve:
-                       raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, ve))
-
-       def get_mandatory(self, fieldname):
-               try:
-                       x = self.msg[fieldname]
-                       if x==None:
-                               raise Exception()
-                       return x
-               except:
-                       raise IgnorableException("Mandatory field '%s' is missing" % fieldname)
-
-       def get_optional(self, fieldname):
-               try:
-                       return self.msg[fieldname]
-               except:
-                       return ''
-
-       def html_clean(self, html):
-               # First we pass it through tidy
-               (html, errors) = tidylib.tidy_document(html,
-                                                                                          options={
-                                                                                                  'drop-proprietary-attributes': 1,
-                                                                                                  'alt-text': '',
-                                                                                                  'hide-comments': 1,
-                                                                                                  'output-xhtml': 1,
-                                                                                                  'show-body-only': 1,
-                                                                                                  'clean': 1,
-                                                                                                  'char-encoding': 'utf8',
-                                                                                                  'show-warnings': 0,
-                                                                                                  'show-info': 0,
-                                                                                                  })
-               if errors:
-                       print(("HTML tidy failed for %s!" % self.msgid))
-                       print(errors)
-                       return None
-
-               try:
-                       cleaner = HTMLCleaner()
-                       cleaner.feed(html)
-                       return cleaner.get_text()
-               except Exception as e:
-                       # Failed to parse the html, thus failed to clean it. so we must
-                       # give up...
-                       return None
+    def __init__(self):
+        self.parser = BytesParser(policy=compat32)
+
+    def parse(self, stream):
+        self.rawtxt = stream.read()
+        self.msg = self.parser.parse(io.BytesIO(self.rawtxt))
+
+    def is_msgid(self, msgid):
+        # Look for a specific messageid. This means we might parse it twice,
+        # but so be it. Any exception means we know it's not this one...
+        try:
+            if self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID'))) == msgid:
+                return True
+        except Exception as e:
+            return False
+
+    def analyze(self, date_override=None):
+        self.msgid = self.clean_messageid(self.decode_mime_header(self.get_mandatory('Message-ID')))
+        self._from = self.decode_mime_header(self.get_mandatory('From'), True)
+        self.to = self.decode_mime_header(self.get_optional('To'), True)
+        self.cc = self.decode_mime_header(self.get_optional('CC'), True)
+        self.subject = self.decode_mime_header(self.get_optional('Subject'))
+        if date_override:
+            self.date = self.forgiving_date_decode(date_override)
+        else:
+            self.date = self.forgiving_date_decode(self.decode_mime_header(self.get_mandatory('Date')))
+        self.bodytxt = self.get_body()
+        self.attachments = []
+        self.get_attachments()
+        if len(self.attachments) > 0:
+            log.status("Found %s attachments" % len(self.attachments))
+
+        # Build an list of the message id's we are interested in
+        self.parents = []
+        # The first one is in-reply-to, if it exists
+        if self.get_optional('in-reply-to'):
+            m = self.clean_messageid(self.decode_mime_header(self.get_optional('in-reply-to')), True)
+            if m:
+                self.parents.append(m)
+
+        # Then we add all References values, in backwards order
+        if self.get_optional('references'):
+            cleaned_msgids = [self.clean_messageid(x, True) for x in reversed(self.decode_mime_header(self.get_optional('references')).split())]
+            # Can't do this with a simple self.parents.extend() due to broken
+            # mailers that add the same reference more than once. And we can't
+            # use a set() to make it unique, because order is very important
+            for m in cleaned_msgids:
+                if m and not m in self.parents:
+                    self.parents.append(m)
+
+
+    def clean_charset(self, charset):
+        lcharset = charset.lower()
+        if lcharset == 'unknown-8bit' or lcharset == 'x-unknown' or lcharset == 'unknown':
+            # Special case where we don't know... We'll assume
+            # us-ascii and use replacements
+            return 'us-ascii'
+        if lcharset == '0' or lcharset == 'x-user-defined' or lcharset == '_autodetect_all' or lcharset == 'default_charset':
+            # Seriously broken charset definitions, map to us-ascii
+            # and throw away the rest with replacements
+            return 'us-ascii'
+        if lcharset == 'x-gbk':
+            # Some MUAs set it to x-gbk, but there is a valid
+            # declaratoin as gbk...
+            return 'gbk'
+        if lcharset == 'iso-8859-8-i':
+            # -I is a special logical version, but should be the
+            # same charset
+            return 'iso-8859-8'
+        if lcharset == 'windows-874':
+            # This is an alias for iso-8859-11
+            return 'iso-8859-11'
+        if lcharset == 'iso-88-59-1' or lcharset == 'iso-8858-1':
+            # Strange way of saying 8859....
+            return 'iso-8859-1'
+        if lcharset == 'iso885915':
+            return 'iso-8859-15'
+        if lcharset == 'iso-latin-2':
+            return 'iso-8859-2'
+        if lcharset == 'iso-850':
+            # Strange spelling of cp850 (windows charset)
+            return 'cp850'
+        if lcharset == 'koi8r':
+            return 'koi8-r'
+        if lcharset == 'cp 1252':
+            return 'cp1252'
+        if lcharset == 'iso-8859-1,iso-8859-2' or lcharset == 'iso-8859-1:utf8:us-ascii':
+            # Why did this show up more than once?!
+            return 'iso-8859-1'
+        if lcharset == 'x-windows-949':
+            return 'ms949'
+        if lcharset == 'pt_pt' or lcharset == 'de_latin' or lcharset == 'de':
+            # This is a locale, and not a charset, but most likely it's this one
+            return 'iso-8859-1'
+        if lcharset == 'iso-8858-15':
+            # How is this a *common* mistake?
+            return 'iso-8859-15'
+        if lcharset == 'macintosh':
+            return 'mac_roman'
+        if lcharset == 'cn-big5':
+            return 'big5'
+        if lcharset == 'x-unicode-2-0-utf-7':
+            return 'utf-7'
+        if lcharset == 'tscii':
+            # No support for this charset :S Map it down to ascii
+            # and throw away all the rest. sucks, but we have to
+            return 'us-ascii'
+        return charset
+
+    def get_payload_as_unicode(self, msg):
+        try:
+            b = msg.get_payload(decode=True)
+        except AssertionError:
+            # Badly encoded data can throw an exception here, where the python
+            # libraries fail to handle it and enters a cannot-happen path.
+            # In which case we just ignore it and hope for a better MIME part later.
+            b = None
+
+        if b:
+            # Find out if there is a charset
+            charset = None
+            params = msg.get_params()
+            if not params:
+                # No content-type, so we assume us-ascii
+                return str(b, 'us-ascii', errors='ignore')
+            for k,v in params:
+                if k.lower() == 'charset':
+                    charset = v
+                    break
+            if charset:
+                try:
+                    return str(b, self.clean_charset(charset), errors='ignore')
+                except LookupError as e:
+                    raise IgnorableException("Failed to get unicode payload: %s" % e)
+            else:
+                # XXX: reasonable default?
+                return str(b, errors='ignore')
+        # Return None or empty string, depending on what we got back
+        return b
+
+    # Regular expression matching the PostgreSQL custom mail footer that
+    # is appended to all emails.
+    _re_footer = re.compile('(.*)--\s+\nSent via [^\s]+ mailing list \([^\)]+\)\nTo make changes to your subscription:\nhttp://www\.postgresql\.org/mailpref/[^\s]+\s*$', re.DOTALL)
+    def get_body(self):
+        b = self._get_body()
+        if b:
+            # Python bug 9133, allows unicode surrogate pairs - which PostgreSQL will
+            # later reject..
+            if b.find('\udbff\n\udef8'):
+                b = b.replace('\udbff\n\udef8', '')
+
+        # Remove postgres specific mail footer - if it's there
+        m = self._re_footer.match(b)
+        if m:
+            b = m.group(1)
+
+        # Sometimes we end up with a trailing \0 when decoding long strings, so
+        # replace it if it's there.
+        # In fact, replace it everywhere, since it can also turn up in the middle
+        # of a text when it's a really broken decoding.
+        b = b.replace('\0', '')
+
+        return b
+
+    def _get_body(self):
+        # This is where the magic happens - try to figure out what the body
+        # of this message should render as.
+        hasempty = False
+
+        # First see if this is a single-part message that we can just
+        # decode and go.
+        b = self.get_payload_as_unicode(self.msg)
+        if b: return b
+        if b == '':
+            # We found something, but it was empty. We'll keep looking as
+            # there might be something better available, but make a note
+            # that empty exists.
+            hasempty = True
+
+        # Ok, it's multipart. Find the first part that is text/plain,
+        # and use that one. Do this recursively, since we may have something
+        # like:
+        # multipart/mixed:
+        #   multipart/alternative:
+        #      text/plain
+        #      text/html
+        #   application/octet-stream (attachment)
+        b = self.recursive_first_plaintext(self.msg)
+        if b: return b
+        if b == '':
+            hasempty = True
+
+        # Couldn't find a plaintext. Look for the first HTML in that case.
+        # Fallback, but what can we do at this point...
+        b = self.recursive_first_plaintext(self.msg, True)
+        if b:
+            b = self.html_clean(b)
+            if b: return b
+        if b == '' or b is None:
+            hasempty = True
+
+        if hasempty:
+            log.status('Found empty body in %s' % self.msgid)
+            return ''
+        raise IgnorableException("Don't know how to read the body from %s" % self.msgid)
+
+    def recursive_first_plaintext(self, container, html_instead=False):
+        pl = container.get_payload()
+        if isinstance(pl, str):
+            # This was not a multipart, but it leaked... Give up!
+            return None
+        for p in pl:
+            if p.get_params() == None:
+                # MIME multipart/mixed, but no MIME type on the part
+                log.status("Found multipart/mixed in message '%s', but no MIME type on part. Trying text/plain." % self.msgid)
+                return self.get_payload_as_unicode(p)
+            if p.get_params()[0][0].lower() == 'text/plain':
+                # Don't include it if it looks like an attachment
+                if 'Content-Disposition' in p and p['Content-Disposition'].startswith('attachment'):
+                    continue
+                return self.get_payload_as_unicode(p)
+            if html_instead and p.get_params()[0][0].lower() == 'text/html':
+                # Don't include it if it looks like an attachment
+                if 'Content-Disposition' in p and p['Content-Disposition'].startswith('attachment'):
+                    continue
+                return self.get_payload_as_unicode(p)
+            if p.is_multipart():
+                b = self.recursive_first_plaintext(p, html_instead)
+                if b or b == '': return b
+
+        # Yikes, nothing here! Hopefully we'll find something when
+        # we continue looping at a higher level.
+        return None
+
+    def get_attachments(self):
+        self.attachments_found_first_plaintext = False
+        self.recursive_get_attachments(self.msg)
+
+    # Clean a filenames encoding and return it as a unicode string
+    def _clean_filename_encoding(self, filename):
+        # If this is a header-encoded filename, start by decoding that
+        if filename.startswith('=?'):
+            decoded, encoding = decode_header(filename)[0]
+            return str(decoded, encoding, errors='ignore')
+
+        # If it's already unicode, just return it
+        if isinstance(filename, str):
+            return filename
+
+        # Anything that's not UTF8, we just get rid of. We can live with
+        # filenames slightly mangled in this case.
+        return str(filename, 'utf-8', errors='ignore')
+
+    def _extract_filename(self, container):
+        # Try to get the filename for an attachment in the container.
+        # If the standard library can figure one out, use that one.
+        f = container.get_filename()
+        if f: return self._clean_filename_encoding(f)
+
+        # Failing that, some mailers set Content-Description to the
+        # filename
+        if 'Content-Description' in container:
+            return self._clean_filename_encoding(container['Content-Description'])
+        return None
+
+    def recursive_get_attachments(self, container):
+        # We start recursion in the "multipart" container if any
+        if container.get_content_type() == 'multipart/mixed' or container.get_content_type() == 'multipart/signed':
+            # Multipart - worth scanning into
+            if not container.is_multipart():
+                # Wow, this is broken. It's multipart/mixed, but doesn't
+                # contain multiple parts.
+                # Since we're just looking for attachments, let's just
+                # ignore it...
+                return
+            for p in container.get_payload():
+                if p.get_params() == None:
+                    continue
+                self.recursive_get_attachments(p)
+        elif container.get_content_type() == 'multipart/alternative':
+            # Alternative is not an attachment (we decide)
+            # It's typilcally plantext + html
+            self.attachments_found_first_plaintext = True
+            return
+        elif container.is_multipart():
+            # Other kinds of multipart, such as multipart/signed...
+            return
+        else:
+            # Not a multipart.
+            # Exclude specific contenttypes
+            if container.get_content_type() == 'application/pgp-signature':
+                return
+            if container.get_content_type() in ('application/pkcs7-signature', 'application/x-pkcs7-signature'):
+                return
+            # For now, accept anything not text/plain
+            if container.get_content_type() != 'text/plain':
+                try:
+                    self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
+                except AssertionError:
+                    # Badly encoded data can throw an exception here, where the python
+                    # libraries fail to handle it and enters a cannot-happen path.
+                    # In which case we just ignore this attachment.
+                    return
+                return
+
+            # It's a text/plain, it might be worthwhile.
+            # If it has a name, we consider it an attachments
+            if not container.get_params():
+                return
+            for k,v in container.get_params():
+                if k=='name' and v != '':
+                    # Yes, it has a name
+                    try:
+                        self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
+                    except AssertionError:
+                        # Badly encoded data can throw an exception here, where the python
+                        # libraries fail to handle it and enters a cannot-happen path.
+                        # In which case we just ignore this attachment.
+                        return
+
+                    return
+
+            # If it's content-disposition=attachment, we also want to save it
+            if 'Content-Disposition' in container and container['Content-Disposition'].startswith('attachment'):
+                try:
+                    self.attachments.append((self._extract_filename(container), container.get_content_type(), container.get_payload(decode=True)))
+                except AssertionError:
+                    # Badly encoded data can throw an exception here, where the python
+                    # libraries fail to handle it and enters a cannot-happen path.
+                    # In which case we just ignore this attachment.
+                    return
+
+                return
+
+            # If we have already found one text/plain part, make all
+            # further text/plain parts attachments
+            if self.attachments_found_first_plaintext:
+                # However, this will also *always* catch the MIME part added
+                # by majordomo with the footer. So if that one is present,
+                # we need to explicitly exclude it again.
+                try:
+                    b = container.get_payload(decode=True)
+                except AssertionError:
+                    # Badly encoded data can throw an exception here, where the python
+                    # libraries fail to handle it and enters a cannot-happen path.
+                    # In which case we just ignore this attachment.
+                    return
+
+                if isinstance(b, str) and not self._re_footer.match(b):
+                    # We know there is no name for this one
+                    self.attachments.append((None, container.get_content_type(), b))
+                return
+
+            # Ok, so this was a plaintext that we ignored. Set the flag
+            # that we have now ignored one, so we'll make the next one
+            # an attachment.
+            self.attachments_found_first_plaintext = True
+            # No name, and text/plain, so ignore it
+
+    re_msgid = re.compile('^\s*<(.*)>\s*')
+    def clean_messageid(self, messageid, ignorebroken=False):
+        m = self.re_msgid.match(messageid)
+        if not m:
+            if ignorebroken:
+                log.status("Could not parse messageid '%s', ignoring it" % messageid)
+                return None
+            raise IgnorableException("Could not parse message id '%s'" % messageid)
+        return m.groups(1)[0].replace(' ','')
+
+#    _date_multi_re = re.compile(' \((\w+\s\w+(\s+\w+)*|)\)$')
+    # Now using [^\s] instead of \w, to work with japanese chars
+    _date_multi_re = re.compile(' \(([^\s]+\s[^\s]+(\s+[^\s]+)*|)\)$')
+    _date_multi_re2 = re.compile(' ([\+-]\d{4}) \([^)]+\)$')
+    _date_multiminus_re = re.compile(' -(-\d+)$')
+    _date_offsetnoplus_re = re.compile(' (\d{4})$')
+    def forgiving_date_decode(self, d):
+        if d.strip() == '':
+            raise IgnorableException("Failed to parse empty date")
+        # Strange timezones requiring manual adjustments
+        if d.endswith('-7700 (EST)'):
+            d = d.replace('-7700 (EST)', 'EST')
+        if d.endswith('+6700 (EST)'):
+            d = d.replace('+6700 (EST)', 'EST')
+        if d.endswith('+-4-30'):
+            d = d.replace('+-4-30', '+0430')
+        if d.endswith('+1.00'):
+            d = d.replace('+1.00', '+0100')
+        if d.endswith('+-100'):
+            d = d.replace('+-100', '+0100')
+        if d.endswith('+500'):
+            d = d.replace('+500', '+0500')
+        if d.endswith('-500'):
+            d = d.replace('-500', '-0500')
+        if d.endswith('-700'):
+            d = d.replace('-700', '-0700')
+        if d.endswith('-800'):
+            d = d.replace('-800', '-0800')
+        if d.endswith('+05-30'):
+            d = d.replace('+05-30', '+0530')
+        if d.endswith('+0-900'):
+            d = d.replace('+0-900', '-0900')
+        if d.endswith('Mexico/General'):
+            d = d.replace('Mexico/General','CDT')
+        if d.endswith('Pacific Daylight Time'):
+            d = d.replace('Pacific Daylight Time', 'PDT')
+        if d.endswith(' ZE2'):
+            d = d.replace(' ZE2',' +0200')
+        if d.find('-Juin-') > 0:
+            d = d.replace('-Juin-','-Jun-')
+        if d.find('-Juil-') > 0:
+            d = d.replace('-Juil-','-Jul-')
+        if d.find(' 0 (GMT)'):
+            d = d.replace(' 0 (GMT)',' +0000')
+
+        if self._date_multiminus_re.search(d):
+            d = self._date_multiminus_re.sub(' \\1', d)
+
+        if self._date_offsetnoplus_re.search(d):
+            d = self._date_offsetnoplus_re.sub('+\\1', d)
+
+
+        # We have a number of dates in the format
+        # "<full datespace> +0200 (MET DST)"
+        # or similar. The problem coming from the space within the
+        # parenthesis, or if the contents of the parenthesis is
+        # completely empty
+        if self._date_multi_re.search(d):
+            d = self._date_multi_re.sub('', d)
+
+        # If the spec is instead
+        # "<full datespace> +0200 (...)"
+        # of any kind, we can just remove what's in the (), because the
+        # parser is just going to rely on the fixed offset anyway.
+        if self._date_multi_re2.search(d):
+            d = self._date_multi_re2.sub(' \\1', d)
+
+        try:
+            dp = dateutil.parser.parse(d, fuzzy=True)
+
+            # Some offsets are >16 hours, which postgresql will not
+            # (for good reasons) accept
+            if dp.utcoffset() and abs(dp.utcoffset().days * (24*60*60) + dp.utcoffset().seconds) > 60*60*16-1:
+                # Convert it to a UTC timestamp using Python. It will give
+                # us the right time, but the wrong timezone. Should be
+                # enough...
+                dp = datetime.datetime(*dp.utctimetuple()[:6])
+            return dp
+        except Exception as e:
+            raise IgnorableException("Failed to parse date '%s': %s" % (d, e))
+
+    def _maybe_decode(self, s, charset):
+        if isinstance(s, str):
+            return s.strip(' ')
+        return str(s, charset and self.clean_charset(charset) or 'us-ascii', errors='ignore').strip(' ')
+
+    # Workaround for broken quoting in some MUAs (see below)
+    _re_mailworkaround = re.compile('"(=\?[^\?]+\?[QB]\?[^\?]+\?=)"', re.IGNORECASE)
+    def _decode_mime_header(self, hdr, email_workaround):
+        if hdr == None:
+            return None
+
+        # Per http://bugs.python.org/issue504152 (and lots of testing), it seems
+        # we must get rid of the sequence \n\t at least in the header. If we
+        # do this *before* doing any MIME decoding, we should be safe against
+        # anybody *actually* putting that sequence in the header (since we
+        # won't match the encoded contents)
+        hdr = hdr.replace("\n\t"," ")
+
+        # In at least some cases, at least gmail (and possibly other MUAs)
+        # incorrectly put double quotes in the name/email field even when
+        # it's encoded. That's not allowed - they have to be escaped - but
+        # since there's a fair amount of those, we apply a regex to get
+        # rid of them.
+        m = self._re_mailworkaround.search(hdr)
+        if m:
+            hdr = self._re_mailworkaround.sub(r'\1', hdr)
+
+        try:
+            return " ".join([self._maybe_decode(s, charset) for s, charset in decode_header(hdr)])
+        except HeaderParseError as e:
+            # Parser error is typically someone specifying an encoding,
+            # but then not actually using that encoding. We'll do the best
+            # we can, which is cut it down to ascii and ignore errors
+            return str(hdr, 'us-ascii', errors='ignore').strip(' ')
+
+    def decode_mime_header(self, hdr, email_workaround=False):
+        try:
+            if isinstance(hdr, Header):
+                hdr = hdr.encode()
+
+            h = self._decode_mime_header(hdr, email_workaround)
+            if h:
+                return h.replace("\0", "")
+            return ''
+        except LookupError as e:
+            raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, e))
+        except ValueError as ve:
+            raise IgnorableException("Failed to decode header value '%s': %s" % (hdr, ve))
+
+    def get_mandatory(self, fieldname):
+        try:
+            x = self.msg[fieldname]
+            if x==None:
+                raise Exception()
+            return x
+        except:
+            raise IgnorableException("Mandatory field '%s' is missing" % fieldname)
+
+    def get_optional(self, fieldname):
+        try:
+            return self.msg[fieldname]
+        except:
+            return ''
+
+    def html_clean(self, html):
+        # First we pass it through tidy
+        (html, errors) = tidylib.tidy_document(html,
+                                               options={
+                                                   'drop-proprietary-attributes': 1,
+                                                   'alt-text': '',
+                                                   'hide-comments': 1,
+                                                   'output-xhtml': 1,
+                                                   'show-body-only': 1,
+                                                   'clean': 1,
+                                                   'char-encoding': 'utf8',
+                                                   'show-warnings': 0,
+                                                   'show-info': 0,
+                                                   })
+        if errors:
+            print(("HTML tidy failed for %s!" % self.msgid))
+            print(errors)
+            return None
+
+        try:
+            cleaner = HTMLCleaner()
+            cleaner.feed(html)
+            return cleaner.get_text()
+        except Exception as e:
+            # Failed to parse the html, thus failed to clean it. so we must
+            # give up...
+            return None
  
  
  class HTMLCleaner(HTMLParser):
-       def __init__(self):
-               HTMLParser.__init__(self)
-               self.io = io.StringIO()
+    def __init__(self):
+        HTMLParser.__init__(self)
+        self.io = io.StringIO()
  
-       def get_text(self):
-               return self.io.getvalue()
+    def get_text(self):
+        return self.io.getvalue()
  
-       def handle_data(self, data):
-               self.io.write(data)
+    def handle_data(self, data):
+        self.io.write(data)
  
-       def handle_starttag(self, tag, attrs):
-               if tag == "p" or tag == "br":
-                       self.io.write("\n")
+    def handle_starttag(self, tag, attrs):
+        if tag == "p" or tag == "br":
+            self.io.write("\n")
diff --git a/loader/lib/storage.py b/loader/lib/storage.py

index 8962b879bae82756e7b5a282b390a280ff1559be..2303fee13f9d57f2dd11cb2d7319799c0baf8e92 100644 (file)
--- a/loader/lib/storage.py
+++ b/loader/lib/storage.py
@@ -5,307 +5,307 @@ from .parser import ArchivesParser
  from lib.log import log, opstatus
  
  class ArchivesParserStorage(ArchivesParser):
-       def __init__(self):
-               super(ArchivesParserStorage, self).__init__()
-               self.purges = set()
+    def __init__(self):
+        super(ArchivesParserStorage, self).__init__()
+        self.purges = set()
  
-       def purge_list(self, listid, year, month):
-               self.purges.add((int(listid), int(year), int(month)))
+    def purge_list(self, listid, year, month):
+        self.purges.add((int(listid), int(year), int(month)))
  
-       def purge_thread(self, threadid):
-               self.purges.add(int(threadid))
+    def purge_thread(self, threadid):
+        self.purges.add(int(threadid))
  
-       def store(self, conn, listid, overwrite=False):
-               curs = conn.cursor()
+    def store(self, conn, listid, overwrite=False):
+        curs = conn.cursor()
  
-               # Potentially add the information that there exists a mail for
-               # this month. We do that this early since we're always going to
-               # make the check anyway, and this keeps the code in one place..
-               if not overwrite:
-                       curs.execute("INSERT INTO list_months (listid, year, month) SELECT %(listid)s, %(year)s, %(month)s WHERE NOT EXISTS (SELECT listid FROM list_months WHERE listid=%(listid)s AND year=%(year)s AND month=%(month)s)", {
-                                       'listid': listid,
-                                       'year': self.date.year,
-                                       'month': self.date.month,
-                                       })
+        # Potentially add the information that there exists a mail for
+        # this month. We do that this early since we're always going to
+        # make the check anyway, and this keeps the code in one place..
+        if not overwrite:
+            curs.execute("INSERT INTO list_months (listid, year, month) SELECT %(listid)s, %(year)s, %(month)s WHERE NOT EXISTS (SELECT listid FROM list_months WHERE listid=%(listid)s AND year=%(year)s AND month=%(month)s)", {
+                    'listid': listid,
+                    'year': self.date.year,
+                    'month': self.date.month,
+                    })
  
-               curs.execute("SELECT threadid, EXISTS(SELECT threadid FROM list_threads lt WHERE lt.listid=%(listid)s AND lt.threadid=m.threadid), id FROM messages m WHERE m.messageid=%(messageid)s", {
-                               'messageid': self.msgid,
-                               'listid': listid,
-                               })
-               r = curs.fetchall()
-               if len(r) > 0:
-                       # Has to be 1 row, since we have a unique index on id
-                       if not r[0][1] and not overwrite:
-                               log.status("Tagging message %s with list %s" % (self.msgid, listid))
-                               curs.execute("INSERT INTO list_threads (threadid, listid) VALUES (%(threadid)s, %(listid)s)", {
-                                               'threadid': r[0][0],
-                                               'listid': listid,
-                                               })
-                               opstatus.tagged += 1
-                               self.purge_list(listid, self.date.year, self.date.month)
-                               self.purge_thread(r[0][0])
-                       else:
-                               opstatus.dupes += 1
+        curs.execute("SELECT threadid, EXISTS(SELECT threadid FROM list_threads lt WHERE lt.listid=%(listid)s AND lt.threadid=m.threadid), id FROM messages m WHERE m.messageid=%(messageid)s", {
+                'messageid': self.msgid,
+                'listid': listid,
+                })
+        r = curs.fetchall()
+        if len(r) > 0:
+            # Has to be 1 row, since we have a unique index on id
+            if not r[0][1] and not overwrite:
+                log.status("Tagging message %s with list %s" % (self.msgid, listid))
+                curs.execute("INSERT INTO list_threads (threadid, listid) VALUES (%(threadid)s, %(listid)s)", {
+                        'threadid': r[0][0],
+                        'listid': listid,
+                        })
+                opstatus.tagged += 1
+                self.purge_list(listid, self.date.year, self.date.month)
+                self.purge_thread(r[0][0])
+            else:
+                opstatus.dupes += 1
  
-                       if overwrite:
-                               pk = r[0][2]
-                               self.purge_thread(r[0][0])
-                               # Overwrite an existing message. We do not attempt to
-                               # "re-thread" a message, we just update the contents. We
-                               # do remove all attachments and rewrite them. Of course, we
-                               # don't change the messageid (since it's our primary
-                               # identifyer), and we don't update the raw text of the message.
-                               # (since we are expected to have used that raw text to do
-                               # the re-parsing initially)
-                               # We update bodytext as a separate step so as not to rewrite
-                               # the TOAST table unnecessarily...
-                               curs.execute("UPDATE messages SET bodytxt=%(bodytxt)s WHERE id=%(id)s AND NOT (bodytxt=%(bodytxt)s) RETURNING id", {
-                                               'id': pk,
-                                               'bodytxt': self.bodytxt,
-                                               })
-                               rc = curs.rowcount
-                               curs.execute("UPDATE messages SET _from=%(from)s, _to=%(to)s, cc=%(cc)s, subject=%(subject)s, date=%(date)s, has_attachment=%(has_attachment)s WHERE id=%(id)s AND NOT (_from=%(from)s AND _to=%(to)s AND cc=%(cc)s AND subject=%(subject)s AND date=%(date)s AND has_attachment=%(has_attachment)s) RETURNING id", {
-                                               'id': pk,
-                                               'from': self._from,
-                                               'to': self.to or '',
-                                               'cc': self.cc or '',
-                                               'subject': self.subject or '',
-                                               'date': self.date,
-                                               'has_attachment': len(self.attachments) > 0,
-                                               })
-                               rc += curs.rowcount
-                               if rc == 0:
-                                       log.status("Message %s unchanged" % self.msgid)
-                                       return False
+            if overwrite:
+                pk = r[0][2]
+                self.purge_thread(r[0][0])
+                # Overwrite an existing message. We do not attempt to
+                # "re-thread" a message, we just update the contents. We
+                # do remove all attachments and rewrite them. Of course, we
+                # don't change the messageid (since it's our primary
+                # identifyer), and we don't update the raw text of the message.
+                # (since we are expected to have used that raw text to do
+                # the re-parsing initially)
+                # We update bodytext as a separate step so as not to rewrite
+                # the TOAST table unnecessarily...
+                curs.execute("UPDATE messages SET bodytxt=%(bodytxt)s WHERE id=%(id)s AND NOT (bodytxt=%(bodytxt)s) RETURNING id", {
+                        'id': pk,
+                        'bodytxt': self.bodytxt,
+                        })
+                rc = curs.rowcount
+                curs.execute("UPDATE messages SET _from=%(from)s, _to=%(to)s, cc=%(cc)s, subject=%(subject)s, date=%(date)s, has_attachment=%(has_attachment)s WHERE id=%(id)s AND NOT (_from=%(from)s AND _to=%(to)s AND cc=%(cc)s AND subject=%(subject)s AND date=%(date)s AND has_attachment=%(has_attachment)s) RETURNING id", {
+                        'id': pk,
+                        'from': self._from,
+                        'to': self.to or '',
+                        'cc': self.cc or '',
+                        'subject': self.subject or '',
+                        'date': self.date,
+                        'has_attachment': len(self.attachments) > 0,
+                        })
+                rc += curs.rowcount
+                if rc == 0:
+                    log.status("Message %s unchanged" % self.msgid)
+                    return False
  
-                               curs.execute("DELETE FROM attachments WHERE message=%(message)s", {
-                                               'message': pk,
-                                               })
-                               if len(self.attachments):
-                                       curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)",[ {
-                                                               'message': pk,
-                                                               'filename': a[0] or 'unknown_filename',
-                                                               'contenttype': a[1],
-                                                               'attachment': bytearray(a[2]),
-                                                               } for a in self.attachments])
-                               opstatus.overwritten += 1
-                               log.status("Message %s overwritten" % self.msgid)
-                       else:
-                               log.status("Message %s already stored" % self.msgid)
-                       return True
+                curs.execute("DELETE FROM attachments WHERE message=%(message)s", {
+                        'message': pk,
+                        })
+                if len(self.attachments):
+                    curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)",[ {
+                                'message': pk,
+                                'filename': a[0] or 'unknown_filename',
+                                'contenttype': a[1],
+                                'attachment': bytearray(a[2]),
+                                } for a in self.attachments])
+                opstatus.overwritten += 1
+                log.status("Message %s overwritten" % self.msgid)
+            else:
+                log.status("Message %s already stored" % self.msgid)
+            return True
  
-               if overwrite:
-                       raise Exception("Attempt to overwrite message (%s) that doesn't exist on list %s!" % (self.msgid, listid))
-               # Always purge the primary list for this thread
-               self.purge_list(listid, self.date.year, self.date.month)
+        if overwrite:
+            raise Exception("Attempt to overwrite message (%s) that doesn't exist on list %s!" % (self.msgid, listid))
+        # Always purge the primary list for this thread
+        self.purge_list(listid, self.date.year, self.date.month)
  
-               # Resolve own thread
-               curs.execute("SELECT id, messageid, threadid FROM messages WHERE messageid=ANY(%(parents)s)", {
-                               'parents': self.parents,
-                               })
-               all_parents = curs.fetchall()
-               if len(all_parents):
-                       # At least one of the parents exist. Now try to figure out which one
-                       best_parent = len(self.parents)+1
-                       best_threadid = -1
-                       best_parentid = None
-                       for i in range(0,len(all_parents)):
-                               for j in range(0,len(self.parents)):
-                                       if self.parents[j] == all_parents[i][1]:
-                                               # This messageid found. Better than the last one?
-                                               if j < best_parent:
-                                                       best_parent = j
-                                                       best_parentid = all_parents[i][0]
-                                                       best_threadid = all_parents[i][2]
-                       if best_threadid == -1:
-                               raise Exception("Message %s, resolve failed in a way it shouldn't :P" % selg.msgid)
-                       self.parentid = best_parentid
-                       self.threadid = best_threadid
-                       # Slice away all matches that are worse than the one we wanted
-                       self.parents = self.parents[:best_parent]
+        # Resolve own thread
+        curs.execute("SELECT id, messageid, threadid FROM messages WHERE messageid=ANY(%(parents)s)", {
+                'parents': self.parents,
+                })
+        all_parents = curs.fetchall()
+        if len(all_parents):
+            # At least one of the parents exist. Now try to figure out which one
+            best_parent = len(self.parents)+1
+            best_threadid = -1
+            best_parentid = None
+            for i in range(0,len(all_parents)):
+                for j in range(0,len(self.parents)):
+                    if self.parents[j] == all_parents[i][1]:
+                        # This messageid found. Better than the last one?
+                        if j < best_parent:
+                            best_parent = j
+                            best_parentid = all_parents[i][0]
+                            best_threadid = all_parents[i][2]
+            if best_threadid == -1:
+                raise Exception("Message %s, resolve failed in a way it shouldn't :P" % selg.msgid)
+            self.parentid = best_parentid
+            self.threadid = best_threadid
+            # Slice away all matches that are worse than the one we wanted
+            self.parents = self.parents[:best_parent]
  
-                       log.status("Message %s resolved to existing thread %s, waiting for %s better messages" % (self.msgid, self.threadid, len(self.parents)))
-               else:
-                       # No parent exist. But don't create the threadid just yet, since
-                       # it's possible that we're somebody elses parent!
-                       self.parentid = None
-                       self.threadid = None
+            log.status("Message %s resolved to existing thread %s, waiting for %s better messages" % (self.msgid, self.threadid, len(self.parents)))
+        else:
+            # No parent exist. But don't create the threadid just yet, since
+            # it's possible that we're somebody elses parent!
+            self.parentid = None
+            self.threadid = None
  
-               # Now see if we are somebody elses *parent*...
-               curs.execute("SELECT message, priority, threadid FROM unresolved_messages INNER JOIN messages ON messages.id=unresolved_messages.message WHERE unresolved_messages.msgid=%(msgid)s ORDER BY threadid", {
-                               'msgid': self.msgid,
-                               })
-               childrows = curs.fetchall()
-               if len(childrows):
-                       # We are some already existing message's parent (meaning the
-                       # messages arrived out of order)
-                       # In the best case, the threadid is the same for all threads.
-                       # But it might be different if this it the "glue message" that's
-                       # holding other threads together.
-                       if self.threadid:
-                               # Already have a threadid, means that we have a glue message
-                               print("Message %s resolved to existing thread %s, while being somebodys parent" % (self.msgid, self.threadid))
-                       else:
-                               print("Message %s did not resolve to existing thread, but is somebodys parent" % self.msgid)
-                               # In this case, just pick the first thread from the list and merge into that
-                               # one.
-                               self.threadid = childrows[0][2]
+        # Now see if we are somebody elses *parent*...
+        curs.execute("SELECT message, priority, threadid FROM unresolved_messages INNER JOIN messages ON messages.id=unresolved_messages.message WHERE unresolved_messages.msgid=%(msgid)s ORDER BY threadid", {
+                'msgid': self.msgid,
+                })
+        childrows = curs.fetchall()
+        if len(childrows):
+            # We are some already existing message's parent (meaning the
+            # messages arrived out of order)
+            # In the best case, the threadid is the same for all threads.
+            # But it might be different if this it the "glue message" that's
+            # holding other threads together.
+            if self.threadid:
+                # Already have a threadid, means that we have a glue message
+                print("Message %s resolved to existing thread %s, while being somebodys parent" % (self.msgid, self.threadid))
+            else:
+                print("Message %s did not resolve to existing thread, but is somebodys parent" % self.msgid)
+                # In this case, just pick the first thread from the list and merge into that
+                # one.
+                self.threadid = childrows[0][2]
  
-                       # Get a unique list (set) of all threads *except* the primary one,
-                       # because we'll be merging into that one.
-                       mergethreads = set([r[2] for r in childrows]).difference(set((self.threadid,)))
-                       if len(mergethreads):
-                               # We have one or more merge threads
-                               log.status("Merging threads %s into thread %s" % (",".join(str(s) for s in mergethreads), self.threadid))
-                               curs.execute("UPDATE messages SET threadid=%(threadid)s WHERE threadid=ANY(%(oldthreadids)s)", {
-                                               'threadid': self.threadid,
-                                               'oldthreadids': list(mergethreads),
-                                               })
-                               # Insert any lists that were tagged on the merged threads
-                               curs.execute("INSERT INTO list_threads (threadid, listid) SELECT DISTINCT %(threadid)s,listid FROM list_threads lt2 WHERE lt2.threadid=ANY(%(oldthreadids)s) AND listid NOT IN (SELECT listid FROM list_threads lt3 WHERE lt3.threadid=%(threadid)s)", {
-                                               'threadid': self.threadid,
-                                               'oldthreadids': list(mergethreads),
-                                               })
-                               # Remove all old leftovers
-                               curs.execute("DELETE FROM list_threads WHERE threadid=ANY(%(oldthreadids)s)", {
-                                               'oldthreadids': list(mergethreads),
-                                               })
-                               # Purge varnish records for all the threads we just removed
-                               for t in mergethreads:
-                                       self.purge_thread(t)
+            # Get a unique list (set) of all threads *except* the primary one,
+            # because we'll be merging into that one.
+            mergethreads = set([r[2] for r in childrows]).difference(set((self.threadid,)))
+            if len(mergethreads):
+                # We have one or more merge threads
+                log.status("Merging threads %s into thread %s" % (",".join(str(s) for s in mergethreads), self.threadid))
+                curs.execute("UPDATE messages SET threadid=%(threadid)s WHERE threadid=ANY(%(oldthreadids)s)", {
+                        'threadid': self.threadid,
+                        'oldthreadids': list(mergethreads),
+                        })
+                # Insert any lists that were tagged on the merged threads
+                curs.execute("INSERT INTO list_threads (threadid, listid) SELECT DISTINCT %(threadid)s,listid FROM list_threads lt2 WHERE lt2.threadid=ANY(%(oldthreadids)s) AND listid NOT IN (SELECT listid FROM list_threads lt3 WHERE lt3.threadid=%(threadid)s)", {
+                        'threadid': self.threadid,
+                        'oldthreadids': list(mergethreads),
+                        })
+                # Remove all old leftovers
+                curs.execute("DELETE FROM list_threads WHERE threadid=ANY(%(oldthreadids)s)", {
+                        'oldthreadids': list(mergethreads),
+                        })
+                # Purge varnish records for all the threads we just removed
+                for t in mergethreads:
+                    self.purge_thread(t)
  
-                       # Batch all the children for repointing. We can't do the actual
-                       # repointing until later, since we don't know our own id yet.
-                       self.children = [r[0] for r in childrows]
-                       log.status("Children set to %s with mergethreads being %s (from childrows %s and threadid %s)" % (
-                                       self.children, mergethreads, childrows, self.threadid))
+            # Batch all the children for repointing. We can't do the actual
+            # repointing until later, since we don't know our own id yet.
+            self.children = [r[0] for r in childrows]
+            log.status("Children set to %s with mergethreads being %s (from childrows %s and threadid %s)" % (
+                    self.children, mergethreads, childrows, self.threadid))
  
-                       # Finally, remove all the pending messages that had a higher
-                       # priority value (meaning less important) than us
-                       curs.executemany("DELETE FROM unresolved_messages WHERE message=%(msg)s AND priority >= %(prio)s", [{
-                                               'msg': msg,
-                                               'prio': prio,
-                                               } for msg, prio, tid in childrows])
-               else:
-                       self.children = []
+            # Finally, remove all the pending messages that had a higher
+            # priority value (meaning less important) than us
+            curs.executemany("DELETE FROM unresolved_messages WHERE message=%(msg)s AND priority >= %(prio)s", [{
+                        'msg': msg,
+                        'prio': prio,
+                        } for msg, prio, tid in childrows])
+        else:
+            self.children = []
  
-               if not self.threadid:
-                       # No parent and no child exists - create a new threadid, just for us!
-                       curs.execute("SELECT nextval('threadid_seq')")
-                       self.threadid = curs.fetchall()[0][0]
-                       log.status("Message %s resolved to no parent (out of %s) and no child, new thread %s" % (self.msgid, len(self.parents), self.threadid))
-               else:
-                       # We have a threadid already, so we're not a new thread. Thus,
-                       # we need to purge the old thread
-                       self.purge_thread(self.threadid)
+        if not self.threadid:
+            # No parent and no child exists - create a new threadid, just for us!
+            curs.execute("SELECT nextval('threadid_seq')")
+            self.threadid = curs.fetchall()[0][0]
+            log.status("Message %s resolved to no parent (out of %s) and no child, new thread %s" % (self.msgid, len(self.parents), self.threadid))
+        else:
+            # We have a threadid already, so we're not a new thread. Thus,
+            # we need to purge the old thread
+            self.purge_thread(self.threadid)
  
-               # Insert a thread tag if we're on a new list
-               curs.execute("INSERT INTO list_threads (threadid, listid) SELECT %(threadid)s, %(listid)s WHERE NOT EXISTS (SELECT * FROM list_threads t2 WHERE t2.threadid=%(threadid)s AND t2.listid=%(listid)s) RETURNING threadid", {
-                       'threadid': self.threadid,
-                       'listid': listid,
-                       })
-               if len(curs.fetchall()):
-                       log.status("Tagged thread %s with listid %s" % (self.threadid, listid))
+        # Insert a thread tag if we're on a new list
+        curs.execute("INSERT INTO list_threads (threadid, listid) SELECT %(threadid)s, %(listid)s WHERE NOT EXISTS (SELECT * FROM list_threads t2 WHERE t2.threadid=%(threadid)s AND t2.listid=%(listid)s) RETURNING threadid", {
+            'threadid': self.threadid,
+            'listid': listid,
+            })
+        if len(curs.fetchall()):
+            log.status("Tagged thread %s with listid %s" % (self.threadid, listid))
  
-               curs.execute("INSERT INTO messages (parentid, threadid, _from, _to, cc, subject, date, has_attachment, messageid, bodytxt, rawtxt) VALUES (%(parentid)s, %(threadid)s, %(from)s, %(to)s, %(cc)s, %(subject)s, %(date)s, %(has_attachment)s, %(messageid)s, %(bodytxt)s, %(rawtxt)s) RETURNING id", {
-                               'parentid': self.parentid,
-                               'threadid': self.threadid,
-                               'from': self._from,
-                               'to': self.to or '',
-                               'cc': self.cc or '',
-                               'subject': self.subject or '',
-                               'date': self.date,
-                               'has_attachment': len(self.attachments) > 0,
-                               'messageid': self.msgid,
-                               'bodytxt': self.bodytxt,
-                               'rawtxt': bytearray(self.rawtxt),
-                               })
-               id = curs.fetchall()[0][0]
-               log.status("Message %s, got id %s, set thread %s, parent %s" % (
-                               self.msgid, id, self.threadid, self.parentid))
-               if len(self.attachments):
-                       # Insert attachments
-                       curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)",[ {
-                                               'message': id,
-                                               'filename': a[0] or 'unknown_filename',
-                                               'contenttype': a[1],
-                                               'attachment': bytearray(a[2]),
-                                               } for a in self.attachments])
+        curs.execute("INSERT INTO messages (parentid, threadid, _from, _to, cc, subject, date, has_attachment, messageid, bodytxt, rawtxt) VALUES (%(parentid)s, %(threadid)s, %(from)s, %(to)s, %(cc)s, %(subject)s, %(date)s, %(has_attachment)s, %(messageid)s, %(bodytxt)s, %(rawtxt)s) RETURNING id", {
+                'parentid': self.parentid,
+                'threadid': self.threadid,
+                'from': self._from,
+                'to': self.to or '',
+                'cc': self.cc or '',
+                'subject': self.subject or '',
+                'date': self.date,
+                'has_attachment': len(self.attachments) > 0,
+                'messageid': self.msgid,
+                'bodytxt': self.bodytxt,
+                'rawtxt': bytearray(self.rawtxt),
+                })
+        id = curs.fetchall()[0][0]
+        log.status("Message %s, got id %s, set thread %s, parent %s" % (
+                self.msgid, id, self.threadid, self.parentid))
+        if len(self.attachments):
+            # Insert attachments
+            curs.executemany("INSERT INTO attachments (message, filename, contenttype, attachment) VALUES (%(message)s, %(filename)s, %(contenttype)s, %(attachment)s)",[ {
+                        'message': id,
+                        'filename': a[0] or 'unknown_filename',
+                        'contenttype': a[1],
+                        'attachment': bytearray(a[2]),
+                        } for a in self.attachments])
  
-               if len(self.children):
-                       log.status("Setting %s other messages to children of %s" % (len(self.children), self.msgid))
-                       curs.executemany("UPDATE messages SET parentid=%(parent)s WHERE id=%(id)s",
-                                                        [{'parent': id, 'id': c} for c in self.children])
-               if len(self.parents):
-                       # There are remaining parents we'd rather have to get ourselves
-                       # properly threaded - so store them in the db.
-                       curs.executemany("INSERT INTO unresolved_messages (message, priority, msgid) VALUES (%(id)s, %(priority)s, %(msgid)s)",
-                                                        [{'id': id, 'priority': i, 'msgid': self.parents[i]} for i in range(0, len(self.parents))])
+        if len(self.children):
+            log.status("Setting %s other messages to children of %s" % (len(self.children), self.msgid))
+            curs.executemany("UPDATE messages SET parentid=%(parent)s WHERE id=%(id)s",
+                             [{'parent': id, 'id': c} for c in self.children])
+        if len(self.parents):
+            # There are remaining parents we'd rather have to get ourselves
+            # properly threaded - so store them in the db.
+            curs.executemany("INSERT INTO unresolved_messages (message, priority, msgid) VALUES (%(id)s, %(priority)s, %(msgid)s)",
+                             [{'id': id, 'priority': i, 'msgid': self.parents[i]} for i in range(0, len(self.parents))])
  
-               opstatus.stored += 1
-               return True
+        opstatus.stored += 1
+        return True
  
-       def diff(self, conn, f, fromonlyf, oldid):
-               curs = conn.cursor()
+    def diff(self, conn, f, fromonlyf, oldid):
+        curs = conn.cursor()
  
-               # Fetch the old one so we have something to diff against
-               curs.execute("SELECT id, _from, _to, cc, subject, date, has_attachment, bodytxt FROM messages WHERE messageid=%(msgid)s", {
-                       'msgid': self.msgid,
-                       })
-               try:
-                       id, _from, to, cc, subject, date, has_attachment, bodytxt = curs.fetchone()
-               except TypeError as e:
-                       f.write("---- %s ----\n" % self.msgid)
-                       f.write("Could not re-find in archives (old id was %s): %s\n" % (oldid, e))
-                       f.write("\n-------------------------------\n\n")
-                       return
+        # Fetch the old one so we have something to diff against
+        curs.execute("SELECT id, _from, _to, cc, subject, date, has_attachment, bodytxt FROM messages WHERE messageid=%(msgid)s", {
+            'msgid': self.msgid,
+            })
+        try:
+            id, _from, to, cc, subject, date, has_attachment, bodytxt = curs.fetchone()
+        except TypeError as e:
+            f.write("---- %s ----\n" % self.msgid)
+            f.write("Could not re-find in archives (old id was %s): %s\n" % (oldid, e))
+            f.write("\n-------------------------------\n\n")
+            return
  
  
-               if (_from.rstrip(), to.rstrip(), cc.rstrip(), subject.rstrip()) != (self._from, self.to, self.cc, self.subject):
-                       log.status("Message %s has header changes " % self.msgid)
-                       f.write("==== %s ====\n" % self.msgid)
-                       for fn in ['_from', 'to', 'cc', 'subject']:
-                               if getattr(self, fn) != eval(fn):
-                                       s = "- {0}: {1}\n".format(fn, eval(fn))
-                                       d = "+ {0}: {1}\n".format(fn, getattr(self, fn))
-                                       f.write(s)
-                                       f.write(d)
-                       f.write("\n\n")
+        if (_from.rstrip(), to.rstrip(), cc.rstrip(), subject.rstrip()) != (self._from, self.to, self.cc, self.subject):
+            log.status("Message %s has header changes " % self.msgid)
+            f.write("==== %s ====\n" % self.msgid)
+            for fn in ['_from', 'to', 'cc', 'subject']:
+                if getattr(self, fn) != eval(fn):
+                    s = "- {0}: {1}\n".format(fn, eval(fn))
+                    d = "+ {0}: {1}\n".format(fn, getattr(self, fn))
+                    f.write(s)
+                    f.write(d)
+            f.write("\n\n")
  
-               if bodytxt != self.bodytxt:
-                       log.status("Message %s has body changes " % self.msgid)
-                       tempdiff = list(difflib.unified_diff(bodytxt.splitlines(),
-                                                                                                self.bodytxt.splitlines(),
-                                                                                                fromfile='old',
-                                                                                                tofile='new',
-                                                                                                n=0,
-                                                                                                lineterm=''))
-                       if (len(tempdiff)-2) % 3 == 0:
-                               # 3 rows to a diff, two header rows.
-                               # Then verify that each slice of 3 contains one @@ row (header), one -From and one +>From,
-                               # which indicates the only change is in the From.
-                               ok = True
-                               tempdiff = tempdiff[2:]
-                               while tempdiff:
-                                       a,b,c = (tempdiff.pop(0), tempdiff.pop(0), tempdiff.pop(0))
-                                       if not (a.startswith('@@ ') and b.startswith('-From ') and c.startswith('+>From ')):
-                                               ok=False
-                                               break
-                               if ok:
-                                       fromonlyf.write("%s\n" % self.msgid)
-                                       return
+        if bodytxt != self.bodytxt:
+            log.status("Message %s has body changes " % self.msgid)
+            tempdiff = list(difflib.unified_diff(bodytxt.splitlines(),
+                                                 self.bodytxt.splitlines(),
+                                                 fromfile='old',
+                                                 tofile='new',
+                                                 n=0,
+                                                 lineterm=''))
+            if (len(tempdiff)-2) % 3 == 0:
+                # 3 rows to a diff, two header rows.
+                # Then verify that each slice of 3 contains one @@ row (header), one -From and one +>From,
+                # which indicates the only change is in the From.
+                ok = True
+                tempdiff = tempdiff[2:]
+                while tempdiff:
+                    a,b,c = (tempdiff.pop(0), tempdiff.pop(0), tempdiff.pop(0))
+                    if not (a.startswith('@@ ') and b.startswith('-From ') and c.startswith('+>From ')):
+                        ok=False
+                        break
+                if ok:
+                    fromonlyf.write("%s\n" % self.msgid)
+                    return
  
  
-                       # Generate a nicer diff
-                       d = list(difflib.unified_diff(bodytxt.splitlines(),
-                                                                                 self.bodytxt.splitlines(),
-                                                                                 fromfile='old',
-                                                                                 tofile='new',
-                                                                                 n=0,
-                                                                                 lineterm=''))
-                       if len(d) > 0:
-                               f.write("---- %s ----\n" % self.msgid)
-                               f.write("\n".join(d))
-                               f.write("\n\n")
-               else:
-                       log.status("Message %s unchanged." % self.msgid)
+            # Generate a nicer diff
+            d = list(difflib.unified_diff(bodytxt.splitlines(),
+                                          self.bodytxt.splitlines(),
+                                          fromfile='old',
+                                          tofile='new',
+                                          n=0,
+                                          lineterm=''))
+            if len(d) > 0:
+                f.write("---- %s ----\n" % self.msgid)
+                f.write("\n".join(d))
+                f.write("\n\n")
+        else:
+            log.status("Message %s unchanged." % self.msgid)
diff --git a/loader/lib/varnish.py b/loader/lib/varnish.py

index f2a06c3d74e9ae39c37b78dbf13f2cfbc4eb8803..99d2d5003848f4dedd4db20b0c6c9ffdc4ce74f0 100644 (file)
--- a/loader/lib/varnish.py
+++ b/loader/lib/varnish.py
@@ -3,31 +3,31 @@ import requests
  from lib.log import log
  
  class VarnishPurger(object):
-       def __init__(self, cfg):
-               self.cfg = cfg
+    def __init__(self, cfg):
+        self.cfg = cfg
  
-       def purge(self, purges):
-               if not len(purges):
-                       return
+    def purge(self, purges):
+        if not len(purges):
+            return
  
-               if not self.cfg.has_option('varnish', 'purgeurl'):
-                       return
+        if not self.cfg.has_option('varnish', 'purgeurl'):
+            return
  
-               purgeurl = self.cfg.get('varnish', 'purgeurl')
-               exprlist = []
-               for p in purges:
-                       if isinstance(p, tuple):
-                               # Purging a list
-                               exprlist.append('obj.http.x-pglm ~ :%s/%s/%s:' % p)
-                       else:
-                               # Purging individual thread
-                               exprlist.append('obj.http.x-pgthread ~ :%s:' % p)
-               purgedict = dict(list(zip(['p%s' % n for n in range(0, len(exprlist))], exprlist)))
-               purgedict['n'] = len(exprlist)
-               r = requests.post(purgeurl, data=purgedict, headers={
-                       'Content-type': 'application/x-www-form-urlencoded',
-                       'Host': 'www.postgresql.org',
-               })
-               if r.status_code != 200:
-                       log.error("Failed to send purge request!")
+        purgeurl = self.cfg.get('varnish', 'purgeurl')
+        exprlist = []
+        for p in purges:
+            if isinstance(p, tuple):
+                # Purging a list
+                exprlist.append('obj.http.x-pglm ~ :%s/%s/%s:' % p)
+            else:
+                # Purging individual thread
+                exprlist.append('obj.http.x-pgthread ~ :%s:' % p)
+        purgedict = dict(list(zip(['p%s' % n for n in range(0, len(exprlist))], exprlist)))
+        purgedict['n'] = len(exprlist)
+        r = requests.post(purgeurl, data=purgedict, headers={
+            'Content-type': 'application/x-www-form-urlencoded',
+            'Host': 'www.postgresql.org',
+        })
+        if r.status_code != 200:
+            log.error("Failed to send purge request!")
  
diff --git a/loader/load_message.py b/loader/load_message.py

index efb8626336ebedae9a1fb8bbad0748872d43f47f..4f724d7680e64fbdc3999bdbfc20fd442cf43a59 100755 (executable)
--- a/loader/load_message.py
+++ b/loader/load_message.py
@@ -21,163 +21,163 @@ from lib.log import log, opstatus
  from lib.varnish import VarnishPurger
  
  def log_failed_message(listid, srctype, src, msg, err):
-       try:
-               msgid = msg.msgid
-       except:
-               msgid = "<unknown>"
-       log.error("Failed to load message (msgid %s) from %s, spec %s: %s" % (msgid.encode('us-ascii', 'replace'), srctype, src, str(str(err), 'us-ascii', 'replace')))
-
-       # We also put the data in the db. This happens in the main transaction
-       # so if the whole script dies, it goes away...
-       conn.cursor().execute("INSERT INTO loaderrors (listid, msgid, srctype, src, err) VALUES (%(listid)s, %(msgid)s, %(srctype)s, %(src)s, %(err)s)", {
-                       'listid': listid,
-                       'msgid': msgid,
-                       'srctype': srctype,
-                       'src': src,
-                       'err': str(str(err), 'us-ascii', 'replace'),
-                       })
+    try:
+        msgid = msg.msgid
+    except:
+        msgid = "<unknown>"
+    log.error("Failed to load message (msgid %s) from %s, spec %s: %s" % (msgid.encode('us-ascii', 'replace'), srctype, src, str(str(err), 'us-ascii', 'replace')))
+
+    # We also put the data in the db. This happens in the main transaction
+    # so if the whole script dies, it goes away...
+    conn.cursor().execute("INSERT INTO loaderrors (listid, msgid, srctype, src, err) VALUES (%(listid)s, %(msgid)s, %(srctype)s, %(src)s, %(err)s)", {
+            'listid': listid,
+            'msgid': msgid,
+            'srctype': srctype,
+            'src': src,
+            'err': str(str(err), 'us-ascii', 'replace'),
+            })
  
  
  if __name__ == "__main__":
-       optparser = OptionParser()
-       optparser.add_option('-l', '--list', dest='list', help='Name of list to load message for')
-       optparser.add_option('-d', '--directory', dest='directory', help='Load all messages in directory')
-       optparser.add_option('-m', '--mbox', dest='mbox', help='Load all messages in mbox')
-       optparser.add_option('-i', '--interactive', dest='interactive', action='store_true', help='Prompt after each message')
-       optparser.add_option('-v', '--verbose', dest='verbose', action='store_true', help='Verbose output')
-       optparser.add_option('--force-date', dest='force_date', help='Override date (used for dates that can\'t be parsed)')
-       optparser.add_option('--filter-msgid', dest='filter_msgid', help='Only process message with given msgid')
-
-       (opt, args) = optparser.parse_args()
-
-       if (len(args)):
-               print("No bare arguments accepted")
-               optparser.print_usage()
-               sys.exit(1)
-
-       if not opt.list:
-               print("List must be specified")
-               optparser.print_usage()
-               sys.exit(1)
-
-       if opt.directory and opt.mbox:
-               print("Can't specify both directory and mbox!")
-               optparser.print_usage()
-               sys.exit(1)
-
-       if opt.force_date and (opt.directory or opt.mbox) and not opt.filter_msgid:
-               print("Can't use force_date with directory or mbox - only individual messages")
-               optparser.print_usage()
-               sys.exit(1)
-
-       if opt.filter_msgid and not (opt.directory or opt.mbox):
-               print("filter_msgid makes no sense without directory or mbox!")
-               optparser.print_usage()
-               sys.exit(1)
-
-       log.set(opt.verbose)
-
-       cfg = ConfigParser()
-       cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
-       try:
-               connstr = cfg.get('db','connstr')
-       except:
-               connstr = 'need_connstr'
-
-       conn = psycopg2.connect(connstr)
-       curs = conn.cursor()
-
-       # Take an advisory lock to force serialization.
-       # We could do this "properly" by reordering operations and using ON CONFLICT,
-       # but concurrency is not that important and this is easier...
-       try:
-               curs.execute("SET statement_timeout='30s'")
-               curs.execute("SELECT pg_advisory_xact_lock(8059944559669076)")
-       except Exception as e:
-               print(("Failed to wait on advisory lock: %s" % e))
-               sys.exit(1)
-
-       # Get the listid we're working on
-       curs.execute("SELECT listid FROM lists WHERE listname=%(list)s", {
-                       'list': opt.list
-                       })
-       r = curs.fetchall()
-       if len(r) != 1:
-               log.error("List %s not found" % opt.list)
-               conn.close()
-               sys.exit(1)
-       listid = r[0][0]
-
-       purges = set()
-
-       if opt.directory:
-               # Parse all files in directory
-               for x in os.listdir(opt.directory):
-                       log.status("Parsing file %s" % x)
-                       with open(os.path.join(opt.directory, x)) as f:
-                               ap = ArchivesParserStorage()
-                               ap.parse(f)
-                               if opt.filter_msgid and not ap.is_msgid(opt.filter_msgid):
-                                       continue
-                               try:
-                                       ap.analyze(date_override=opt.force_date)
-                               except IgnorableException as e:
-                                       log_failed_message(listid, "directory", os.path.join(opt.directory, x), ap, e)
-                                       opstatus.failed += 1
-                                       continue
-                               ap.store(conn, listid)
-                               purges.update(ap.purges)
-                       if opt.interactive:
-                               print("Interactive mode, committing transaction")
-                               conn.commit()
-                               print("Proceed to next message with Enter, or input a period (.) to stop processing")
-                               x = input()
-                               if x == '.':
-                                       print("Ok, aborting!")
-                                       break
-                               print("---------------------------------")
-       elif opt.mbox:
-               if not os.path.isfile(opt.mbox):
-                       print("File %s does not exist" % opt.mbox)
-                       sys.exit(1)
-               mboxparser = MailboxBreakupParser(opt.mbox)
-               while not mboxparser.EOF:
-                       ap = ArchivesParserStorage()
-                       msg = next(mboxparser)
-                       if not msg:
-                               break
-                       ap.parse(msg)
-                       if opt.filter_msgid and not ap.is_msgid(opt.filter_msgid):
-                               continue
-                       try:
-                               ap.analyze(date_override=opt.force_date)
-                       except IgnorableException as e:
-                               log_failed_message(listid, "mbox", opt.mbox, ap, e)
-                               opstatus.failed += 1
-                               continue
-                       ap.store(conn, listid)
-                       purges.update(ap.purges)
-               if mboxparser.returncode():
-                       log.error("Failed to parse mbox:")
-                       log.error(mboxparser.stderr_output())
-                       sys.exit(1)
-       else:
-               # Parse single message on stdin
-               ap = ArchivesParserStorage()
-               ap.parse(sys.stdin.buffer)
-               try:
-                       ap.analyze(date_override=opt.force_date)
-               except IgnorableException as e:
-                       log_failed_message(listid, "stdin","", ap, e)
-                       conn.close()
-                       sys.exit(1)
-               ap.store(conn, listid)
-               purges.update(ap.purges)
-               if opstatus.stored:
-                       log.log("Stored message with message-id %s" % ap.msgid)
-
-       conn.commit()
-       conn.close()
-       opstatus.print_status()
-
-       VarnishPurger(cfg).purge(purges)
+    optparser = OptionParser()
+    optparser.add_option('-l', '--list', dest='list', help='Name of list to load message for')
+    optparser.add_option('-d', '--directory', dest='directory', help='Load all messages in directory')
+    optparser.add_option('-m', '--mbox', dest='mbox', help='Load all messages in mbox')
+    optparser.add_option('-i', '--interactive', dest='interactive', action='store_true', help='Prompt after each message')
+    optparser.add_option('-v', '--verbose', dest='verbose', action='store_true', help='Verbose output')
+    optparser.add_option('--force-date', dest='force_date', help='Override date (used for dates that can\'t be parsed)')
+    optparser.add_option('--filter-msgid', dest='filter_msgid', help='Only process message with given msgid')
+
+    (opt, args) = optparser.parse_args()
+
+    if (len(args)):
+        print("No bare arguments accepted")
+        optparser.print_usage()
+        sys.exit(1)
+
+    if not opt.list:
+        print("List must be specified")
+        optparser.print_usage()
+        sys.exit(1)
+
+    if opt.directory and opt.mbox:
+        print("Can't specify both directory and mbox!")
+        optparser.print_usage()
+        sys.exit(1)
+
+    if opt.force_date and (opt.directory or opt.mbox) and not opt.filter_msgid:
+        print("Can't use force_date with directory or mbox - only individual messages")
+        optparser.print_usage()
+        sys.exit(1)
+
+    if opt.filter_msgid and not (opt.directory or opt.mbox):
+        print("filter_msgid makes no sense without directory or mbox!")
+        optparser.print_usage()
+        sys.exit(1)
+
+    log.set(opt.verbose)
+
+    cfg = ConfigParser()
+    cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
+    try:
+        connstr = cfg.get('db','connstr')
+    except:
+        connstr = 'need_connstr'
+
+    conn = psycopg2.connect(connstr)
+    curs = conn.cursor()
+
+    # Take an advisory lock to force serialization.
+    # We could do this "properly" by reordering operations and using ON CONFLICT,
+    # but concurrency is not that important and this is easier...
+    try:
+        curs.execute("SET statement_timeout='30s'")
+        curs.execute("SELECT pg_advisory_xact_lock(8059944559669076)")
+    except Exception as e:
+        print(("Failed to wait on advisory lock: %s" % e))
+        sys.exit(1)
+
+    # Get the listid we're working on
+    curs.execute("SELECT listid FROM lists WHERE listname=%(list)s", {
+            'list': opt.list
+            })
+    r = curs.fetchall()
+    if len(r) != 1:
+        log.error("List %s not found" % opt.list)
+        conn.close()
+        sys.exit(1)
+    listid = r[0][0]
+
+    purges = set()
+
+    if opt.directory:
+        # Parse all files in directory
+        for x in os.listdir(opt.directory):
+            log.status("Parsing file %s" % x)
+            with open(os.path.join(opt.directory, x)) as f:
+                ap = ArchivesParserStorage()
+                ap.parse(f)
+                if opt.filter_msgid and not ap.is_msgid(opt.filter_msgid):
+                    continue
+                try:
+                    ap.analyze(date_override=opt.force_date)
+                except IgnorableException as e:
+                    log_failed_message(listid, "directory", os.path.join(opt.directory, x), ap, e)
+                    opstatus.failed += 1
+                    continue
+                ap.store(conn, listid)
+                purges.update(ap.purges)
+            if opt.interactive:
+                print("Interactive mode, committing transaction")
+                conn.commit()
+                print("Proceed to next message with Enter, or input a period (.) to stop processing")
+                x = input()
+                if x == '.':
+                    print("Ok, aborting!")
+                    break
+                print("---------------------------------")
+    elif opt.mbox:
+        if not os.path.isfile(opt.mbox):
+            print("File %s does not exist" % opt.mbox)
+            sys.exit(1)
+        mboxparser = MailboxBreakupParser(opt.mbox)
+        while not mboxparser.EOF:
+            ap = ArchivesParserStorage()
+            msg = next(mboxparser)
+            if not msg:
+                break
+            ap.parse(msg)
+            if opt.filter_msgid and not ap.is_msgid(opt.filter_msgid):
+                continue
+            try:
+                ap.analyze(date_override=opt.force_date)
+            except IgnorableException as e:
+                log_failed_message(listid, "mbox", opt.mbox, ap, e)
+                opstatus.failed += 1
+                continue
+            ap.store(conn, listid)
+            purges.update(ap.purges)
+        if mboxparser.returncode():
+            log.error("Failed to parse mbox:")
+            log.error(mboxparser.stderr_output())
+            sys.exit(1)
+    else:
+        # Parse single message on stdin
+        ap = ArchivesParserStorage()
+        ap.parse(sys.stdin.buffer)
+        try:
+            ap.analyze(date_override=opt.force_date)
+        except IgnorableException as e:
+            log_failed_message(listid, "stdin","", ap, e)
+            conn.close()
+            sys.exit(1)
+        ap.store(conn, listid)
+        purges.update(ap.purges)
+        if opstatus.stored:
+            log.log("Stored message with message-id %s" % ap.msgid)
+
+    conn.commit()
+    conn.close()
+    opstatus.print_status()
+
+    VarnishPurger(cfg).purge(purges)
diff --git a/loader/pglister_sync.py b/loader/pglister_sync.py

index e38cdd4baee48e4471334eb3f7b10c62f2beb892..0d6747062ba1034326d2bad3c388c76a9dbc2f27 100755 (executable)
--- a/loader/pglister_sync.py
+++ b/loader/pglister_sync.py
@@ -11,97 +11,97 @@ import psycopg2
  import requests
  
  if __name__=="__main__":
-       parser = argparse.ArgumentParser(description="Synchronize lists from pglister")
-       parser.add_argument('--dryrun', action='store_true', help="Don't commit changes to database")
+    parser = argparse.ArgumentParser(description="Synchronize lists from pglister")
+    parser.add_argument('--dryrun', action='store_true', help="Don't commit changes to database")
  
-       args = parser.parse_args()
+    args = parser.parse_args()
  
-       cfg = ConfigParser()
-       cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
-       try:
-               connstr = cfg.get('db','connstr')
-       except:
-               connstr = 'need_connstr'
+    cfg = ConfigParser()
+    cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
+    try:
+        connstr = cfg.get('db','connstr')
+    except:
+        connstr = 'need_connstr'
  
-       if cfg.has_option('pglister', 'subscribers') and cfg.getint('pglister', 'subscribers'):
-               do_subscribers=1
-       else:
-               do_subscribers=0
+    if cfg.has_option('pglister', 'subscribers') and cfg.getint('pglister', 'subscribers'):
+        do_subscribers=1
+    else:
+        do_subscribers=0
  
-       psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
-       conn = psycopg2.connect(connstr)
-       curs = conn.cursor()
+    psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
+    conn = psycopg2.connect(connstr)
+    curs = conn.cursor()
  
-       r = requests.get('{0}/api/archive/{1}/lists/?subscribers={2}'.format(
-               cfg.get('pglister', 'root'),
-               cfg.get('pglister', 'myname'),
-               do_subscribers and 1 or 0,
-               ), headers={
-                       'X-Api-Key': cfg.get('pglister', 'apikey'),
-               })
-       obj = r.json()
+    r = requests.get('{0}/api/archive/{1}/lists/?subscribers={2}'.format(
+        cfg.get('pglister', 'root'),
+        cfg.get('pglister', 'myname'),
+        do_subscribers and 1 or 0,
+        ), headers={
+            'X-Api-Key': cfg.get('pglister', 'apikey'),
+        })
+    obj = r.json()
  
-       # For groups, just add them if they don't exist
-       groups = {g['group']['id']:g['group']['groupname'] for g in obj}
+    # For groups, just add them if they don't exist
+    groups = {g['group']['id']:g['group']['groupname'] for g in obj}
  
-       for id,name in list(groups.items()):
-               curs.execute("SELECT EXISTS (SELECT 1 FROM listgroups WHERE groupname=%(group)s)", {
-                       'group': name,
-               })
-               if not curs.fetchone()[0]:
-                       curs.execute("INSERT INTO listgroups (groupname, sortkey) VALUES (%(group)s, 100) RETURNING groupname", {
-                               'group': name,
-                       })
-                       print("Added group %s" % name)
+    for id,name in list(groups.items()):
+        curs.execute("SELECT EXISTS (SELECT 1 FROM listgroups WHERE groupname=%(group)s)", {
+            'group': name,
+        })
+        if not curs.fetchone()[0]:
+            curs.execute("INSERT INTO listgroups (groupname, sortkey) VALUES (%(group)s, 100) RETURNING groupname", {
+                'group': name,
+            })
+            print("Added group %s" % name)
  
-       # Add any missing lists, and synchronize their contents.
-       for l in obj:
-               curs.execute("SELECT listid,listname FROM lists WHERE listname=%(name)s", {
-                       'name': l['listname'],
-               })
-               if curs.rowcount == 0:
-                       curs.execute("INSERT INTO lists (listname, shortdesc, description, active, groupid) SELECT %(name)s, %(name)s, %(desc)s, 't', groupid FROM listgroups WHERE groupname=%(groupname)s RETURNING listid, listname", {
-                               'name': l['listname'],
-                               'desc': l['longdesc'],
-                               'groupname': l['group']['groupname'],
-                       })
-                       listid, name = curs.fetchone()
-                       print("Added list %s" % name)
-               else:
-                       listid, name = curs.fetchone()
-                       curs.execute("UPDATE lists SET shortdesc=%(name)s, description=%(desc)s, groupid=(SELECT groupid FROM listgroups WHERE groupname=%(groupname)s), active=true WHERE listid=%(id)s AND NOT (active AND shortdesc=%(name)s AND description=%(desc)s AND groupid=(SELECT groupid FROM listgroups WHERE groupname=%(groupname)s)) RETURNING listname", {
-                               'id': listid,
-                               'name': l['listname'],
-                               'desc': l['longdesc'],
-                               'groupname': l['group']['groupname'],
-                       })
-                       for n, in curs.fetchall():
-                               print("Updated list %s " % n)
+    # Add any missing lists, and synchronize their contents.
+    for l in obj:
+        curs.execute("SELECT listid,listname FROM lists WHERE listname=%(name)s", {
+            'name': l['listname'],
+        })
+        if curs.rowcount == 0:
+            curs.execute("INSERT INTO lists (listname, shortdesc, description, active, groupid) SELECT %(name)s, %(name)s, %(desc)s, 't', groupid FROM listgroups WHERE groupname=%(groupname)s RETURNING listid, listname", {
+                'name': l['listname'],
+                'desc': l['longdesc'],
+                'groupname': l['group']['groupname'],
+            })
+            listid, name = curs.fetchone()
+            print("Added list %s" % name)
+        else:
+            listid, name = curs.fetchone()
+            curs.execute("UPDATE lists SET shortdesc=%(name)s, description=%(desc)s, groupid=(SELECT groupid FROM listgroups WHERE groupname=%(groupname)s), active=true WHERE listid=%(id)s AND NOT (active AND shortdesc=%(name)s AND description=%(desc)s AND groupid=(SELECT groupid FROM listgroups WHERE groupname=%(groupname)s)) RETURNING listname", {
+                'id': listid,
+                'name': l['listname'],
+                'desc': l['longdesc'],
+                'groupname': l['group']['groupname'],
+            })
+            for n, in curs.fetchall():
+                print("Updated list %s " % n)
  
-               if do_subscribers:
-                       # If we synchronize subscribers, we do so on all lists for now.
-                       curs.execute("WITH t(u) AS (SELECT UNNEST(%(usernames)s)), ins(un) AS (INSERT INTO listsubscribers (username, list_id) SELECT u, %(listid)s FROM t WHERE NOT EXISTS (SELECT 1 FROM listsubscribers WHERE username=u AND list_id=%(listid)s) RETURNING username), del(un) AS (DELETE FROM listsubscribers WHERE list_id=%(listid)s AND NOT EXISTS (SELECT 1 FROM t WHERE u=username) RETURNING username) SELECT 'ins',un FROM ins UNION ALL SELECT 'del',un FROM del ORDER BY 1,2", {
-                               'usernames': l['subscribers'],
-                               'listid': listid,
-                       })
-                       for what, who in curs.fetchall():
-                               if what == 'ins':
-                                       print("Added subscriber %s to list %s" % (who, name))
-                               else:
-                                       print("Removed subscriber %s from list %s" % (who, name))
+        if do_subscribers:
+            # If we synchronize subscribers, we do so on all lists for now.
+            curs.execute("WITH t(u) AS (SELECT UNNEST(%(usernames)s)), ins(un) AS (INSERT INTO listsubscribers (username, list_id) SELECT u, %(listid)s FROM t WHERE NOT EXISTS (SELECT 1 FROM listsubscribers WHERE username=u AND list_id=%(listid)s) RETURNING username), del(un) AS (DELETE FROM listsubscribers WHERE list_id=%(listid)s AND NOT EXISTS (SELECT 1 FROM t WHERE u=username) RETURNING username) SELECT 'ins',un FROM ins UNION ALL SELECT 'del',un FROM del ORDER BY 1,2", {
+                'usernames': l['subscribers'],
+                'listid': listid,
+            })
+            for what, who in curs.fetchall():
+                if what == 'ins':
+                    print("Added subscriber %s to list %s" % (who, name))
+                else:
+                    print("Removed subscriber %s from list %s" % (who, name))
  
  
-       # We don't remove lists ever, because we probably want to keep archives around.
-       # But for now, we alert on them.
-       curs.execute("SELECT listname FROM lists WHERE active AND NOT listname=ANY(%(lists)s)", {
-               'lists': [l['listname'] for l in obj],
-       })
-       for n, in curs.fetchall():
-               print("List %s exists in archives, but not in upstream! Should it be marked inactive?" % n)
+    # We don't remove lists ever, because we probably want to keep archives around.
+    # But for now, we alert on them.
+    curs.execute("SELECT listname FROM lists WHERE active AND NOT listname=ANY(%(lists)s)", {
+        'lists': [l['listname'] for l in obj],
+    })
+    for n, in curs.fetchall():
+        print("List %s exists in archives, but not in upstream! Should it be marked inactive?" % n)
  
-       if args.dryrun:
-               print("Dry-run, rolling back")
-               conn.rollback()
-       else:
-               conn.commit()
-       conn.close()
+    if args.dryrun:
+        print("Dry-run, rolling back")
+        conn.rollback()
+    else:
+        conn.commit()
+    conn.close()
diff --git a/loader/purge_frontend_message.py b/loader/purge_frontend_message.py

index 72899e80c4d5af8c8b308d1f6f8c39d7bbc963a6..8325c721f1946b566c08bbacdf16e5f059971c24 100755 (executable)
--- a/loader/purge_frontend_message.py
+++ b/loader/purge_frontend_message.py
@@ -15,35 +15,35 @@ import psycopg2
  from lib.varnish import VarnishPurger
  
  if __name__ == "__main__":
-       optparser = OptionParser()
-       optparser.add_option('-m', '--msgid', dest='msgid', help='Messageid to load')
-
-       (opt, args) = optparser.parse_args()
-
-       if (len(args)):
-               print("No bare arguments accepted")
-               optparser.print_help()
-               sys.exit(1)
-
-       if not opt.msgid:
-               print("Message-id must be specified")
-               optparser.print_help()
-               sys.exit(1)
-
-       cfg = ConfigParser()
-       cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
-       try:
-               connstr = cfg.get('db','connstr')
-       except:
-               connstr = 'need_connstr'
-
-       conn = psycopg2.connect(connstr)
-       curs = conn.cursor()
-
-       curs.execute("SELECT id, threadid FROM messages WHERE messageid=%(msgid)s", {
-               'msgid': opt.msgid,
-       })
-       id, threadid = curs.fetchone()
-
-       VarnishPurger(cfg).purge([int(threadid), ])
-       conn.close()
+    optparser = OptionParser()
+    optparser.add_option('-m', '--msgid', dest='msgid', help='Messageid to load')
+
+    (opt, args) = optparser.parse_args()
+
+    if (len(args)):
+        print("No bare arguments accepted")
+        optparser.print_help()
+        sys.exit(1)
+
+    if not opt.msgid:
+        print("Message-id must be specified")
+        optparser.print_help()
+        sys.exit(1)
+
+    cfg = ConfigParser()
+    cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
+    try:
+        connstr = cfg.get('db','connstr')
+    except:
+        connstr = 'need_connstr'
+
+    conn = psycopg2.connect(connstr)
+    curs = conn.cursor()
+
+    curs.execute("SELECT id, threadid FROM messages WHERE messageid=%(msgid)s", {
+        'msgid': opt.msgid,
+    })
+    id, threadid = curs.fetchone()
+
+    VarnishPurger(cfg).purge([int(threadid), ])
+    conn.close()
diff --git a/loader/reparse_message.py b/loader/reparse_message.py

index ed4def2f0e000c88aa09ca6c10883f72c3765011..0f1fc2a196e0b4b282332c4ab4f7d7850e612f0c 100755 (executable)
--- a/loader/reparse_message.py
+++ b/loader/reparse_message.py
@@ -22,129 +22,129 @@ from lib.log import log, opstatus
  from lib.varnish import VarnishPurger
  
  def ResultIter(cursor):
-       # Fetch lots of data but keep memory usage down a bit, by feeding it out of
-       # a generator, and use fetchmany()
-       while True:
-               results = cursor.fetchmany(5000)
-               if not results:
-                       break
-               for r in results:
-                       yield r
+    # Fetch lots of data but keep memory usage down a bit, by feeding it out of
+    # a generator, and use fetchmany()
+    while True:
+        results = cursor.fetchmany(5000)
+        if not results:
+            break
+        for r in results:
+            yield r
  
  
  if __name__ == "__main__":
-       optparser = OptionParser()
-       optparser.add_option('-m', '--msgid', dest='msgid', help='Messageid to load')
-       optparser.add_option('--all', dest='all', action='store_true', help='Load *all* messages currently in the db')
-       optparser.add_option('--sample', dest='sample', help='Load a sample of <n> messages')
-       optparser.add_option('-v', '--verbose', dest='verbose', action='store_true', help='Verbose output')
-       optparser.add_option('--force-date', dest='force_date', help='Override date (used for dates that can\'t be parsed)')
-       optparser.add_option('--update', dest='update', action='store_true', help='Actually update, not just diff (default is diff)')
-       optparser.add_option('--commit', dest='commit', action='store_true', help='Commit the transaction without asking')
-
-       (opt, args) = optparser.parse_args()
-
-       if (len(args)):
-               print("No bare arguments accepted")
-               optparser.print_usage()
-               sys.exit(1)
-
-       if sum([1 for x in [opt.all, opt.sample, opt.msgid] if x]) != 1:
-               print("Must specify exactly one of --msgid, --all and --sample")
-               sys.exit(1)
-
-       if not opt.update and os.path.exists('reparse.diffs'):
-               print("File reparse.diffs already exists. Remove or rename and try again.")
-               sys.exit(1)
-
-       log.set(opt.verbose)
-
-       cfg = ConfigParser()
-       cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
-       try:
-               connstr = cfg.get('db','connstr')
-       except:
-               connstr = 'need_connstr'
-
-       conn = psycopg2.connect(connstr)
-
-       # Get messages
-       curs = conn.cursor('msglist')
-       if opt.all:
-               curs2 = conn.cursor()
-               curs2.execute("SELECT count(*) FROM messages WHERE hiddenstatus IS NULL")
-               totalcount, = curs2.fetchone()
-               curs.execute("SELECT id, rawtxt FROM messages WHERE hiddenstatus IS NULL ORDER BY id")
-       elif opt.sample:
-               totalcount = int(opt.sample)
-               curs.execute("SELECT id, rawtxt FROM messages WHERE hiddenstatus IS NULL ORDER BY id DESC LIMIT %(num)s", {
-                       'num': int(opt.sample),
-               })
-       else:
-               totalcount = 1
-               curs.execute("SELECT id, rawtxt FROM messages WHERE messageid=%(msgid)s", {
-                       'msgid': opt.msgid,
-               })
-
-       if not opt.update:
-               f = codecs.open("reparse.diffs", "w", "utf-8")
-               fromonlyf = open("reparse.fromonly","w")
-
-       firststatus = datetime.now()
-       laststatus = datetime.now()
-       num = 0
-       updated = 0
-       for id, rawtxt in ResultIter(curs):
-               num += 1
-               ap = ArchivesParserStorage()
-               ap.parse(BytesIO(rawtxt))
-               try:
-                       ap.analyze(date_override=opt.force_date)
-               except IgnorableException as e:
-                       if opt.update:
-                               print("Exception loading {0}: {1}".format(id, e))
-                       else:
-                               f.write("Exception loading %s: %s" % (id, e))
-                       continue
-
-               if opt.update:
-                       if ap.store(conn, listid=-9, overwrite=True):
-                               updated += 1
-               else:
-                       ap.diff(conn, f, fromonlyf, id)
-               if datetime.now() - laststatus > timedelta(seconds=5):
-                       sys.stdout.write("%s messages parsed (%s%%, %s / second), %s updated\r" % (num,
-                                                                                                                                                                          num*100/totalcount,
-                                                                                                                                                                          num / ((datetime.now()-firststatus).seconds),
-                                                                                                                                                                          updated))
-                       sys.stdout.flush()
-                       laststatus = datetime.now()
-
-       print("")
-
-       if opt.update:
-               opstatus.print_status()
-               if not opt.commit:
-                       while True:
-                               print("OK to commit transaction? ")
-                               a = input().lower().strip()
-                               if a == 'y' or a == 'yes':
-                                       print("Ok, committing.")
-                                       break
-                               elif a == 'n' or a == 'no':
-                                       print("Aborting and rolling back")
-                                       conn.rollback()
-                                       sys.exit(1)
-               conn.commit()
-               VarnishPurger(cfg).purge(ap.purges)
-       else:
-               fromonlyf.close()
-               f.close()
-               if os.path.getsize('reparse.diffs') == 0:
-                       os.unlink('reparse.diffs')
-               if os.path.getsize('reparse.fromonly') == 0:
-                       os.unlink('reparse.fromonly')
-
-               # Just in case
-               conn.rollback()
-       conn.close()
+    optparser = OptionParser()
+    optparser.add_option('-m', '--msgid', dest='msgid', help='Messageid to load')
+    optparser.add_option('--all', dest='all', action='store_true', help='Load *all* messages currently in the db')
+    optparser.add_option('--sample', dest='sample', help='Load a sample of <n> messages')
+    optparser.add_option('-v', '--verbose', dest='verbose', action='store_true', help='Verbose output')
+    optparser.add_option('--force-date', dest='force_date', help='Override date (used for dates that can\'t be parsed)')
+    optparser.add_option('--update', dest='update', action='store_true', help='Actually update, not just diff (default is diff)')
+    optparser.add_option('--commit', dest='commit', action='store_true', help='Commit the transaction without asking')
+
+    (opt, args) = optparser.parse_args()
+
+    if (len(args)):
+        print("No bare arguments accepted")
+        optparser.print_usage()
+        sys.exit(1)
+
+    if sum([1 for x in [opt.all, opt.sample, opt.msgid] if x]) != 1:
+        print("Must specify exactly one of --msgid, --all and --sample")
+        sys.exit(1)
+
+    if not opt.update and os.path.exists('reparse.diffs'):
+        print("File reparse.diffs already exists. Remove or rename and try again.")
+        sys.exit(1)
+
+    log.set(opt.verbose)
+
+    cfg = ConfigParser()
+    cfg.read('%s/archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
+    try:
+        connstr = cfg.get('db','connstr')
+    except:
+        connstr = 'need_connstr'
+
+    conn = psycopg2.connect(connstr)
+
+    # Get messages
+    curs = conn.cursor('msglist')
+    if opt.all:
+        curs2 = conn.cursor()
+        curs2.execute("SELECT count(*) FROM messages WHERE hiddenstatus IS NULL")
+        totalcount, = curs2.fetchone()
+        curs.execute("SELECT id, rawtxt FROM messages WHERE hiddenstatus IS NULL ORDER BY id")
+    elif opt.sample:
+        totalcount = int(opt.sample)
+        curs.execute("SELECT id, rawtxt FROM messages WHERE hiddenstatus IS NULL ORDER BY id DESC LIMIT %(num)s", {
+            'num': int(opt.sample),
+        })
+    else:
+        totalcount = 1
+        curs.execute("SELECT id, rawtxt FROM messages WHERE messageid=%(msgid)s", {
+            'msgid': opt.msgid,
+        })
+
+    if not opt.update:
+        f = codecs.open("reparse.diffs", "w", "utf-8")
+        fromonlyf = open("reparse.fromonly","w")
+
+    firststatus = datetime.now()
+    laststatus = datetime.now()
+    num = 0
+    updated = 0
+    for id, rawtxt in ResultIter(curs):
+        num += 1
+        ap = ArchivesParserStorage()
+        ap.parse(BytesIO(rawtxt))
+        try:
+            ap.analyze(date_override=opt.force_date)
+        except IgnorableException as e:
+            if opt.update:
+                print("Exception loading {0}: {1}".format(id, e))
+            else:
+                f.write("Exception loading %s: %s" % (id, e))
+            continue
+
+        if opt.update:
+            if ap.store(conn, listid=-9, overwrite=True):
+                updated += 1
+        else:
+            ap.diff(conn, f, fromonlyf, id)
+        if datetime.now() - laststatus > timedelta(seconds=5):
+            sys.stdout.write("%s messages parsed (%s%%, %s / second), %s updated\r" % (num,
+                                                                                       num*100/totalcount,
+                                                                                       num / ((datetime.now()-firststatus).seconds),
+                                                                                       updated))
+            sys.stdout.flush()
+            laststatus = datetime.now()
+
+    print("")
+
+    if opt.update:
+        opstatus.print_status()
+        if not opt.commit:
+            while True:
+                print("OK to commit transaction? ")
+                a = input().lower().strip()
+                if a == 'y' or a == 'yes':
+                    print("Ok, committing.")
+                    break
+                elif a == 'n' or a == 'no':
+                    print("Aborting and rolling back")
+                    conn.rollback()
+                    sys.exit(1)
+        conn.commit()
+        VarnishPurger(cfg).purge(ap.purges)
+    else:
+        fromonlyf.close()
+        f.close()
+        if os.path.getsize('reparse.diffs') == 0:
+            os.unlink('reparse.diffs')
+        if os.path.getsize('reparse.fromonly') == 0:
+            os.unlink('reparse.fromonly')
+
+        # Just in case
+        conn.rollback()
+    conn.close()
diff --git a/loader/tools/edit_raw.py b/loader/tools/edit_raw.py

index 4e3925eec3d05870839e78a384b6a6ca0773bc41..5ec67ec09ac56708be0f44692d7071a1df91a710 100755 (executable)
--- a/loader/tools/edit_raw.py
+++ b/loader/tools/edit_raw.py
@@ -16,82 +16,82 @@ sys.path.append('..')
  from lib.storage import ArchivesParserStorage
  
  if __name__ == "__main__":
-       optparser = OptionParser()
-       optparser.add_option('-m', dest='msgid', help='Messageid to edit')
-       optparser.add_option('-i', dest='id', help='Message primary key id to edit')
-       optparser.add_option('-c', dest='charset', help='Charset to edit as', default='utf8')
-       optparser.add_option('--nodiff', dest='nodiff', action="store_true", help='Disable viewing of diff', default=False)
-       (opt, args) = optparser.parse_args()
+    optparser = OptionParser()
+    optparser.add_option('-m', dest='msgid', help='Messageid to edit')
+    optparser.add_option('-i', dest='id', help='Message primary key id to edit')
+    optparser.add_option('-c', dest='charset', help='Charset to edit as', default='utf8')
+    optparser.add_option('--nodiff', dest='nodiff', action="store_true", help='Disable viewing of diff', default=False)
+    (opt, args) = optparser.parse_args()
  
-       if (len(args)):
-               print "No bare arguments accepted"
-               optparser.print_usage()
-               sys.exit(1)
+    if (len(args)):
+        print "No bare arguments accepted"
+        optparser.print_usage()
+        sys.exit(1)
  
-       cfg = ConfigParser()
-       cfg.read('%s/../archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
-       try:
-               connstr = cfg.get('db','connstr')
-       except:
-               connstr = 'need_connstr'
+    cfg = ConfigParser()
+    cfg.read('%s/../archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
+    try:
+        connstr = cfg.get('db','connstr')
+    except:
+        connstr = 'need_connstr'
  
-       conn = psycopg2.connect(connstr)
-       curs = conn.cursor()
+    conn = psycopg2.connect(connstr)
+    curs = conn.cursor()
  
-       if not (opt.msgid or opt.id):
-               print "Need -m or -i!"
-               sys.exit(1)
-       if opt.msgid and opt.id:
-               print "Can't specify both -m and -i!"
-               sys.exit(1)
+    if not (opt.msgid or opt.id):
+        print "Need -m or -i!"
+        sys.exit(1)
+    if opt.msgid and opt.id:
+        print "Can't specify both -m and -i!"
+        sys.exit(1)
  
-       if opt.msgid:
-               curs.execute("SELECT id, rawtxt FROM messages WHERE messageid=%(msgid)s", {
-                       'msgid': opt.msgid,
-               })
-       else:
-               curs.execute("SELECT id, rawtxt FROM messages WHERE id=%(id)s", {
-                       'id': opt.id,
-               })
+    if opt.msgid:
+        curs.execute("SELECT id, rawtxt FROM messages WHERE messageid=%(msgid)s", {
+            'msgid': opt.msgid,
+        })
+    else:
+        curs.execute("SELECT id, rawtxt FROM messages WHERE id=%(id)s", {
+            'id': opt.id,
+        })
  
-       id, rawtxt = curs.fetchone()
-       s = StringIO(rawtxt)
+    id, rawtxt = curs.fetchone()
+    s = StringIO(rawtxt)
  
-       f = tempfile.NamedTemporaryFile(delete=False)
-       try:
-               f.write(s.getvalue())
-               f.close()
-               os.system("vim %s" % f.name)
-               f2 = open(f.name, "rb")
-               s2 = f2.read()
-               f2.close()
+    f = tempfile.NamedTemporaryFile(delete=False)
+    try:
+        f.write(s.getvalue())
+        f.close()
+        os.system("vim %s" % f.name)
+        f2 = open(f.name, "rb")
+        s2 = f2.read()
+        f2.close()
  
-               if not opt.nodiff:
-                       print "\n".join(difflib.unified_diff(s.getvalue().decode(opt.charset).splitlines(),
-                                                                                                s2.decode(opt.charset).splitlines(),
-                                                                                                fromfile='old',
-                                                                                                tofile='new',
-                                                                                                lineterm=''))
+        if not opt.nodiff:
+            print "\n".join(difflib.unified_diff(s.getvalue().decode(opt.charset).splitlines(),
+                                                 s2.decode(opt.charset).splitlines(),
+                                                 fromfile='old',
+                                                 tofile='new',
+                                                 lineterm=''))
  
-               while True:
-                       a = raw_input('Save this to db?').lower()
-                       if a == 'y' or a == 'yes':
-                               curs.execute("INSERT INTO messages_edited SELECT * FROM messages WHERE id=%(id)s", {
-                                       'id': id,
-                                       })
-                               curs.execute("UPDATE messages SET rawtxt=%(raw)s WHERE id=%(id)s", {
-                                       'id': id,
-                                       'raw': bytearray(s2),
-                               })
-                               conn.commit()
-                               break
-                       elif a == 'n' or a == 'no':
-                               print "Ok, not saving"
-                               break
+        while True:
+            a = raw_input('Save this to db?').lower()
+            if a == 'y' or a == 'yes':
+                curs.execute("INSERT INTO messages_edited SELECT * FROM messages WHERE id=%(id)s", {
+                    'id': id,
+                    })
+                curs.execute("UPDATE messages SET rawtxt=%(raw)s WHERE id=%(id)s", {
+                    'id': id,
+                    'raw': bytearray(s2),
+                })
+                conn.commit()
+                break
+            elif a == 'n' or a == 'no':
+                print "Ok, not saving"
+                break
  
-       finally:
-               try:
-                       f.close()
-               except:
-                       pass
-               os.unlink(f.name)
+    finally:
+        try:
+            f.close()
+        except:
+            pass
+        os.unlink(f.name)
diff --git a/loader/tools/fix_from.py b/loader/tools/fix_from.py

index a630839e9f7955472a210ab684daecf466e4527b..719c40cfe124ac15e7d841c215af046b8075b994 100755 (executable)
--- a/loader/tools/fix_from.py
+++ b/loader/tools/fix_from.py
@@ -14,103 +14,103 @@ sys.path.append('..')
  from lib.storage import ArchivesParserStorage
  
  if __name__ == "__main__":
-       cfg = ConfigParser()
-       cfg.read('%s/../archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
-       try:
-               connstr = cfg.get('db','connstr')
-       except:
-               connstr = 'need_connstr'
-
-       conn = psycopg2.connect(connstr)
-       curs = conn.cursor()
-
-       with open('fromlist', 'r') as f:
-               for l in f:
-                       curs.execute("SAVEPOINT msg")
-
-                       msgid = l.strip()
-                       curs.execute("SELECT id, rawtxt, bodytxt FROM messages WHERE messageid=%(msgid)s", {
-                               'msgid': msgid,
-                       })
-                       id, rawtxt, bodytxt = curs.fetchone()
-
-                       ap = ArchivesParserStorage()
-                       s = StringIO(rawtxt)
-
-                       # Parse the old message, so we can fix it.
-                       ap.parse(s)
-                       ap.analyze()
-
-                       # Double check...
-                       if bodytxt.decode('utf8') == ap.bodytxt:
-                               print "Message already fixed: %s" % msgid
-                               curs.execute("ROLLBACK TO SAVEPOINT msg")
-                               continue
-
-                       # Now try to fix it...
-                       s.seek(0)
-
-                       fixed = re.sub('^>From ', 'From ', s.getvalue(), flags=re.MULTILINE)
-
-                       curs.execute("UPDATE messages SET rawtxt=%(raw)s WHERE messageid=%(msgid)s", {
-                               'msgid': msgid,
-                               'raw': bytearray(fixed),
-                       })
-
-                       # Ok, read it back and try again
-                       curs.execute("SELECT id, rawtxt, bodytxt FROM messages WHERE messageid=%(msgid)s", {
-                               'msgid': msgid,
-                       })
-                       id, rawtxt, bodytxt = curs.fetchone()
-
-                       ap = ArchivesParserStorage()
-
-                       # Parse the old message, so we can
-                       ap.parse(StringIO(rawtxt))
-                       ap.analyze()
-
-                       if ap.bodytxt != bodytxt.decode('utf8'):
-                               print "Failed to fix %s!" % msgid
-
-                               # Generate diff to show what we changed
-                               print "CHANGED:"
-                               print "\n".join(difflib.unified_diff(s.getvalue(),
-                                                                                                        fixed,
-                                                                                                        fromfile='old',
-                                                                                                        tofile='new',
-                                                                                                        n=2,
-                                                                                                        lineterm=''))
-                               print "----"
-                               # Generate a diff to show what's left
-                               print "REMAINING:"
-                               print "\n".join(difflib.unified_diff(bodytxt.decode('utf8').splitlines(),
-                                                                                                        ap.bodytxt.splitlines(),
-                                                                                                        fromfile='old',
-                                                                                                        tofile='new',
-                                                                                                        n=2,
-                                                                                                        lineterm=''))
-                               print "--------------"
-                               while True:
-                                       a = raw_input('Save this change anyway?').lower()
-                                       if a == 'y' or a == 'yes':
-                                               print "Ok, saving!"
-                                               curs.execute("RELEASE SAVEPOINT msg")
-                                               break
-                                       elif a == 'n' or a == 'no':
-                                               print "Ok, rolling back!"
-                                               curs.execute("ROLLBACK TO SAVEPOINT msg")
-                                               break
-                                       elif a == 'yq':
-                                               print "Ok, committing and then exiting"
-                                               curs.execute("RELEASE SAVEPOINT msg")
-                                               conn.commit()
-                                               conn.close()
-                                               sys.exit(0)
-                       else:
-                               print "Fixed %s!" % msgid
-                               curs.execute("RELEASE SAVEPOINT msg")
-                       s.close()
-
-       print "Committing all that's there..."
-       conn.commit()
-       conn.close()
+    cfg = ConfigParser()
+    cfg.read('%s/../archives.ini' % os.path.realpath(os.path.dirname(sys.argv[0])))
+    try:
+        connstr = cfg.get('db','connstr')
+    except:
+        connstr = 'need_connstr'
+
+    conn = psycopg2.connect(connstr)
+    curs = conn.cursor()
+
+    with open('fromlist', 'r') as f:
+        for l in f:
+            curs.execute("SAVEPOINT msg")
+
+            msgid = l.strip()
+            curs.execute("SELECT id, rawtxt, bodytxt FROM messages WHERE messageid=%(msgid)s", {
+                'msgid': msgid,
+            })
+            id, rawtxt, bodytxt = curs.fetchone()
+
+            ap = ArchivesParserStorage()
+            s = StringIO(rawtxt)
+
+            # Parse the old message, so we can fix it.
+            ap.parse(s)
+            ap.analyze()
+
+            # Double check...
+            if bodytxt.decode('utf8') == ap.bodytxt:
+                print "Message already fixed: %s" % msgid
+                curs.execute("ROLLBACK TO SAVEPOINT msg")
+                continue
+
+            # Now try to fix it...
+            s.seek(0)
+
+            fixed = re.sub('^>From ', 'From ', s.getvalue(), flags=re.MULTILINE)
+
+            curs.execute("UPDATE messages SET rawtxt=%(raw)s WHERE messageid=%(msgid)s", {
+                'msgid': msgid,
+                'raw': bytearray(fixed),
+            })
+
+            # Ok, read it back and try again
+            curs.execute("SELECT id, rawtxt, bodytxt FROM messages WHERE messageid=%(msgid)s", {
+                'msgid': msgid,
+            })
+            id, rawtxt, bodytxt = curs.fetchone()
+
+            ap = ArchivesParserStorage()
+
+            # Parse the old message, so we can
+            ap.parse(StringIO(rawtxt))
+            ap.analyze()
+
+            if ap.bodytxt != bodytxt.decode('utf8'):
+                print "Failed to fix %s!" % msgid
+
+                # Generate diff to show what we changed
+                print "CHANGED:"
+                print "\n".join(difflib.unified_diff(s.getvalue(),
+                                                     fixed,
+                                                     fromfile='old',
+                                                     tofile='new',
+                                                     n=2,
+                                                     lineterm=''))
+                print "----"
+                # Generate a diff to show what's left
+                print "REMAINING:"
+                print "\n".join(difflib.unified_diff(bodytxt.decode('utf8').splitlines(),
+                                                     ap.bodytxt.splitlines(),
+                                                     fromfile='old',
+                                                     tofile='new',
+                                                     n=2,
+                                                     lineterm=''))
+                print "--------------"
+                while True:
+                    a = raw_input('Save this change anyway?').lower()
+                    if a == 'y' or a == 'yes':
+                        print "Ok, saving!"
+                        curs.execute("RELEASE SAVEPOINT msg")
+                        break
+                    elif a == 'n' or a == 'no':
+                        print "Ok, rolling back!"
+                        curs.execute("ROLLBACK TO SAVEPOINT msg")
+                        break
+                    elif a == 'yq':
+                        print "Ok, committing and then exiting"
+                        curs.execute("RELEASE SAVEPOINT msg")
+                        conn.commit()
+                        conn.close()
+                        sys.exit(0)
+            else:
+                print "Fixed %s!" % msgid
+                curs.execute("RELEASE SAVEPOINT msg")
+            s.close()
+
+    print "Committing all that's there..."
+    conn.commit()
+    conn.close()
author	Magnus Hagander <magnus@hagander.net>
	Thu, 3 Jan 2019 20:15:38 +0000 (21:15 +0100)
committer	Magnus Hagander <magnus@hagander.net>
	Fri, 4 Jan 2019 11:24:06 +0000 (12:24 +0100)
django/archives/auth.py		patch \| blob \| blame \| history
django/archives/mailarchives/api.py		patch \| blob \| blame \| history
django/archives/mailarchives/models.py		patch \| blob \| blame \| history
django/archives/mailarchives/redirecthandler.py		patch \| blob \| blame \| history
django/archives/mailarchives/templatetags/pgfilters.py		patch \| blob \| blame \| history
django/archives/mailarchives/views.py		patch \| blob \| blame \| history
django/archives/settings.py		patch \| blob \| blame \| history
django/archives/util.py		patch \| blob \| blame \| history
loader/clean_date.py		patch \| blob \| blame \| history
loader/generate_mbox.py		patch \| blob \| blame \| history
loader/hide_message.py		patch \| blob \| blame \| history
loader/legacy/scan_old_archives.py		patch \| blob \| blame \| history
loader/lib/exception.py		patch \| blob \| blame \| history
loader/lib/log.py		patch \| blob \| blame \| history
loader/lib/mbox.py		patch \| blob \| blame \| history
loader/lib/parser.py		patch \| blob \| blame \| history
loader/lib/storage.py		patch \| blob \| blame \| history
loader/lib/varnish.py		patch \| blob \| blame \| history
loader/load_message.py		patch \| blob \| blame \| history
loader/pglister_sync.py		patch \| blob \| blame \| history
loader/purge_frontend_message.py		patch \| blob \| blame \| history
loader/reparse_message.py		patch \| blob \| blame \| history
loader/tools/edit_raw.py		patch \| blob \| blame \| history
loader/tools/fix_from.py		patch \| blob \| blame \| history