#! /usr/bin/env python
#
# Copyright (C) 2016 by the Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA.

import os
import argparse

import paths

from Mailman import Utils
from Mailman.MailList import MailList

width = int(os.environ.get('COLUMNS', 80)) - 2

def parseargs():
    parser = argparse.ArgumentParser(
        description=Utils.wrap("""Convert strings in list attributes from
one character set encoding to another""", column=width),
        epilog=Utils.wrap("""This script must be put in Mailman's bin/
directory.

Sometimes people wish to change the character set for a particular language
or languages in a Mailman installation from its original character set, e.g.,
iso-8859-1, to a new character set, e.g. utf-8. Aside from the obvious steps
necessary in doing this such as recoding the message catalog and templates
for the language, it is also necessary to recode all the string valued list
attributes for any existing list whose preferred_language has had its character
set changed. That's what this script is for.

The script will go through the configuration of a list and for all the
string values, if the string is a valid encoding in the old character set and
not in the new character set, it will recode the string from the old character
set encoding to the new.

Unfortunately, it appears that at least some recent versions of the Debian
(and hence Ubuntu) Mailman package have changed the encoding for several
languages to utf-8 causing various exceptions and shunted messages when list
attributes have strings encoded in the old character set. See
<https://bugs.launchpad.net/mailman/+bug/1462755>. This script can
help with those issues.""", column=width),
        formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument('-l', '--language',
                      default=None, action='append',
                      help="""\
Process only lists whose preferred_language is this. May be repeated to do
more than one preferred_language. For example --language=de will do only
lists whose preferred language is German. Specifying this argument additional
times, e.g. '-l de -l nl', will do lists whose preferred language is any of
those given. The default is to do all lists.""")
    parser.add_argument('-f', '--from_enc',
                      default=None, type=str,
                      help="""\
Specify the old encoding for strings. The default is iso-8859-1.""")
    parser.add_argument('-t', '--to_enc',
                      default=None, type=str,
                      help="""\
Specify the new encoding for strings. The default is utf-8.""")
    parser.add_argument('-v', '--verbose',
                      dest='verbose', action='store_true',
                      help="""\
Print more progress messages.""")
    parser.add_argument('-d', '--doit',
                      dest='doit', action='store_true',
                      help="""\
As a precaution against accidentally recoding lists incorrectly, if this
is not specified the script won't actually change any lists, but it will
print 'updated' messages for those lists it would have changed.""")

    return parser.parse_args()


def recode(mlist, f, t):
    """If the character set for a list's preferred_language has changed,
    attempt to recode old string values into the new character set.

    mlist is the list, f is the old charset and t is the new charset.
    """
    changed = False
    for x in dir(mlist):
        if x.startswith('_'):
            continue
        nv = doitem(getattr(mlist, x), f, t)
        if nv:
            setattr(mlist, x, nv)
            changed = True
    return changed

def doitem(v, f, t):
    """Recursively process lists, tuples and dictionary values and
    convert strings as needed. Return either the updated item or None
    if no change."""
    changed = False
    if isinstance(v, str):
        return convert(v, f, t)
    elif isinstance(v, list):
        for i in range(len(v)):
            nv = doitem(v[i], f, t)
            if nv:
                changed = True
                v[i] = nv
        if changed:
            return v
        else:
            return None
    elif isinstance(v, tuple):
        nt = ()
        for i in range(len(v)):
            nv = doitem(v[i], f, t)
            if nv:
                changed = True
                nt += (nv,)
            else:
                nt += (v[i],)
        if changed:
            return nt
        else:
            return None
    elif isinstance(v, dict):
        for k, ov in v.items():
            nv = doitem(ov, f, t)
            if nv:
                changed = True
                v[k] = nv
        if changed:
            return v
        else:
            return None
    else:
        return None

def convert(s, f, t):
    """This does the actual character set conversion of the string s
    from charset f to charset t."""

    try:
        u = unicode(s, f)
        is_f = True
    except ValueError:
        is_f = False
    try:
        unicode(s, t)
        is_t = True
    except ValueError:
        is_t = False
    if is_f and not is_t:
        return u.encode(t, 'replace')
    else:
        return None

def main():
    ns = parseargs()
    if not ns.from_enc:
        ns.from_enc = 'iso-8859-1'
    if not ns.to_enc:
        ns.to_enc = 'utf-8'
    for l in Utils.list_names():
        mlist = MailList(l, lock=False)
        if ns.language and mlist.preferred_language not in ns.language:
            if ns.verbose:
                print '%s: skiping, preferred_language is %s' % (
                    mlist.real_name, mlist.preferred_language)
            continue
        mlist.Lock()
        changed = False
        try:
            changed = recode(mlist, ns.from_enc, ns.to_enc)
        finally:
            if not changed and ns.verbose:
                print '%s: nothing changed' % mlist.real_name
            if changed:
                print '%s: updated' % mlist.real_name
                if ns.doit:
                    mlist.Save()
            mlist.Unlock()

if __name__ == '__main__':
    main()