#! /usr/bin/env python # # Copyright (C) 2016 by the Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # USA. import os import argparse import paths from Mailman import Utils from Mailman.MailList import MailList width = int(os.environ.get('COLUMNS', 80)) - 2 def parseargs(): parser = argparse.ArgumentParser( description=Utils.wrap("""Convert strings in list attributes from one character set encoding to another""", column=width), epilog=Utils.wrap("""This script must be put in Mailman's bin/ directory. Sometimes people wish to change the character set for a particular language or languages in a Mailman installation from its original character set, e.g., iso-8859-1, to a new character set, e.g. utf-8. Aside from the obvious steps necessary in doing this such as recoding the message catalog and templates for the language, it is also necessary to recode all the string valued list attributes for any existing list whose preferred_language has had its character set changed. That's what this script is for. The script will go through the configuration of a list and for all the string values, if the string is a valid encoding in the old character set and not in the new character set, it will recode the string from the old character set encoding to the new. Unfortunately, it appears that at least some recent versions of the Debian (and hence Ubuntu) Mailman package have changed the encoding for several languages to utf-8 causing various exceptions and shunted messages when list attributes have strings encoded in the old character set. See . This script can help with those issues.""", column=width), formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-l', '--language', default=None, action='append', help="""\ Process only lists whose preferred_language is this. May be repeated to do more than one preferred_language. For example --language=de will do only lists whose preferred language is German. Specifying this argument additional times, e.g. '-l de -l nl', will do lists whose preferred language is any of those given. The default is to do all lists.""") parser.add_argument('-f', '--from_enc', default=None, type=str, help="""\ Specify the old encoding for strings. The default is iso-8859-1.""") parser.add_argument('-t', '--to_enc', default=None, type=str, help="""\ Specify the new encoding for strings. The default is utf-8.""") parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help="""\ Print more progress messages.""") parser.add_argument('-d', '--doit', dest='doit', action='store_true', help="""\ As a precaution against accidentally recoding lists incorrectly, if this is not specified the script won't actually change any lists, but it will print 'updated' messages for those lists it would have changed.""") return parser.parse_args() def recode(mlist, f, t): """If the character set for a list's preferred_language has changed, attempt to recode old string values into the new character set. mlist is the list, f is the old charset and t is the new charset. """ changed = False for x in dir(mlist): if x.startswith('_'): continue nv = doitem(getattr(mlist, x), f, t) if nv: setattr(mlist, x, nv) changed = True return changed def doitem(v, f, t): """Recursively process lists, tuples and dictionary values and convert strings as needed. Return either the updated item or None if no change.""" changed = False if isinstance(v, str): return convert(v, f, t) elif isinstance(v, list): for i in range(len(v)): nv = doitem(v[i], f, t) if nv: changed = True v[i] = nv if changed: return v else: return None elif isinstance(v, tuple): nt = () for i in range(len(v)): nv = doitem(v[i], f, t) if nv: changed = True nt += (nv,) else: nt += (v[i],) if changed: return nt else: return None elif isinstance(v, dict): for k, ov in v.items(): nv = doitem(ov, f, t) if nv: changed = True v[k] = nv if changed: return v else: return None else: return None def convert(s, f, t): """This does the actual character set conversion of the string s from charset f to charset t.""" try: u = unicode(s, f) is_f = True except ValueError: is_f = False try: unicode(s, t) is_t = True except ValueError: is_t = False if is_f and not is_t: return u.encode(t, 'replace') else: return None def main(): ns = parseargs() if not ns.from_enc: ns.from_enc = 'iso-8859-1' if not ns.to_enc: ns.to_enc = 'utf-8' for l in Utils.list_names(): mlist = MailList(l, lock=False) if ns.language and mlist.preferred_language not in ns.language: if ns.verbose: print '%s: skiping, preferred_language is %s' % ( mlist.real_name, mlist.preferred_language) continue mlist.Lock() changed = False try: changed = recode(mlist, ns.from_enc, ns.to_enc) finally: if not changed and ns.verbose: print '%s: nothing changed' % mlist.real_name if changed: print '%s: updated' % mlist.real_name if ns.doit: mlist.Save() mlist.Unlock() if __name__ == '__main__': main()