#! /usr/bin/env python # # Copyright (C) 2016 by the Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # USA. """Modify selected message subjects in a pipermail article database Usage: %(PROGRAM)s [options] filename regexp Where: --author=regexp / -a regexp An optional second regexp which if provided will be matched against message authors (display names) which are to be changed to 'Redacted'. --verbose / -v Print some information about what is being done. --dry-run / -d Don't actually do anything. Just print what would be done. --help / -h Print this message and exit. filename is a full path to a periodic article database. regexp is a regular expression to match case sensitively against message subjects which are to be changed to 'Redacted'. Sometimes a message gets posted to a list and archived which has offensive content and/or an offensive Subject: header, and it is desired to edit out the offending text. This is an involved process as the text is in the cummulative listname.mbox/listname.mbox file, the periodic .txt file, the individual HTML files in the archive and possibly in the periodic Thread, Subject, Author and Date indices. In the latter case, it is not sufficient to edit the indices as the next post in the period will rebuild the indices using subject information from the listname/database/(period)-article database. Thus, at least if the period is current, one needs to modify the subject information in the -article database, and that will suffice to fix the indices upon the next post in the period. That's what this script is for. In case one wants to modify a copy, the script will accept any -article database by filename regardless of name. The script needs that name as its first argument and a regular expression which will be searched against all the subjects in the database file and those that match will be changed to 'Redacted'. The script must run from Mailman's bin/ directory. """ import re import sys import getopt import marshal import cPickle import paths from Mailman.i18n import _ PROGRAM = sys.argv[0] def usage(code, msg=''): if code: fd = sys.stderr else: fd = sys.stdout print >> fd, _(__doc__) if msg: print >> fd, msg sys.exit(code) def main(): verbose = dry = False try: opts, args = getopt.getopt(sys.argv[1:], 'a:vdh', ['author=', 'verbose', 'dry-run', 'help']) except getopt.error, msg: usage(1, msg) author = None for opt, arg in opts: if opt in ('-v', '--verbose'): verbose = True if opt in ('-d', '--dry-run'): dry = True if opt in ('-h', '--help'): usage(0) if opt in ('-a', '--author'): author = arg try: are = re.compile(author) except re.error, e: usage(1, 'Invalid author regexp: %s' % e) if len(args) <> 2: usage(1, _('Exactly two arguments required')) filename = args[0] regexp = args[1] try: f = open(filename, 'r+b') except IOError, e: usage(2, e) try: cre = re.compile(regexp) except re.error, e: usage(1, 'Invalid regexp: %s' % e) x = marshal.load(f) a = c = t = 0 for k, v in x.items(): t += 1 v = cPickle.loads(v) if cre.search(v.subject): if verbose or dry: print v.subject c += 1 if verbose or dry: print 'Changing to "Redacted".' v.subject = 'Redacted' v.decoded['stripped'] = u'Redacted' v.decoded['subject'] = u'Redacted' x[k] = cPickle.dumps(v) if author and are.search(v.author): if verbose or dry: print v.author a += 1 if verbose or dry: print 'Changing to "Redacted".' v.author = 'Redacted' v.decoded['author'] = u'Redacted' x[k] = cPickle.dumps(v) if not dry and a + c > 0: f.seek(0) marshal.dump(x, f) f.flush() f.truncate() print 'Changed subject in %d and author in %d of %d articles.' % (c, a, t) if __name__ == '__main__': main()