#!/usr/bin/env python
#
# Copyright (C) 2006 by the Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA.
#
# 2006-08-28 mas    Initial Rev. Mark Sapiro <msapiro@value.net>

"""Retrieve list subscribers from lists.topica.com.

Usage: %(PROGRAM)s [options] listname owner-email password

Options:
   --output file
   -o file
       Write output to specified file (required).

   --verbose
   -v
       Print page number of each retrieved index page
       to monitor progress.

   --debug
   -d
       Print copious debugging output - not recommended.

   --help
   -h
       Print this help message and exit

   listname is the name of the mailing list.
   owner-email is the email address of the list owner.
   password is the list owner's Topica account password.

   Output is one line per subscriber containing
      Email Address
      Real Name
      Moderation
         T = On
         F = Off
         D = Use List Setting
      Email Delivery Enabled (T|F)
      Digest Mode (T|F)
      Is Owner (T|F)
   Note that both 'Email Delivery Enabled' and 'Is Owner' have null values
   for the primary list owner.

   If Python 2.4's cookielib is available, we use it.  Otherwise we require
   ClientCookie  http://wwwsearch.sourceforge.net/ClientCookie/
"""

import re
import sys
import getopt
import urllib
import urllib2

# Regexps for finding things in the pages
SID = re.compile('\?sid=(?P<sid>\d+)')
UID = re.compile('INPUT.*NAME="seui".*VALUE="(?P<uid>[^"]*)"')
RNM = re.compile('INPUT.*NAME="sdn".*VALUE="(?P<rnm>[^"]*)"')
MOD = re.compile('OPTION VALUE="(?P<mod>[^"]*)" SELECTED')
ENA = re.compile('INPUT.*NAME="sena" VALUE="(?P<ena>[^"]*)" CHECKED')
DIG = re.compile('INPUT.*NAME="sdig" VALUE="(?P<dig>[^"]*)" CHECKED')
ISO = re.compile('INPUT.*NAME="siso" VALUE="(?P<iso>[^"]*)" CHECKED')

# if we have Python 2.4's cookielib, use it
try:
    import cookielib
    policy = cookielib.DefaultCookiePolicy(rfc2965 = True)
    cookiejar = cookielib.CookieJar(policy)
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)).open
except ImportError:
    import ClientCookie
    # if this is a new ClientCookie, we need to turn on RFC2965 cookies
    cookiejar = ClientCookie.CookieJar()
    try:
        cookiejar.set_policy(ClientCookie.DefaultCookiePolicy(rfc2965 = True))
        # install an opener that uses this policy
        opener = ClientCookie.build_opener(
                ClientCookie.HTTPCookieProcessor(cookiejar))
        ClientCookie.install_opener(opener)
    except AttributeError:
        # must be an old ClientCookie, which already accepts RFC2965 cookies
        pass
    opener = ClientCookie.urlopen

PROGRAM = sys.argv[0]

try:
    True, False
except NameError:
    True = 1
    False = 0

def usage(code, msg=''):
    if code:
        fd = sys.stderr
    else:
        fd = sys.stdout
    print >> fd, __doc__ % globals()
    if msg:
        print >> fd, msg
    sys.exit(code)

def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "ho:dv",
                ["help", "output=", "debug", "verbose"])
    except:
        usage(2)

    fp = None
    verbose = False
    debug = False
    for o,a in opts:
        if o in ("-v", "--verbose"):
            verbose = True
        if o in ("-d", "debug"):
            debug = True
        if o in ("-h", "--help"):
            usage(0)
        if o in ("-o", "--output"):
            fp = open(a, "wt")
    if not fp:
        usage(2, 'Output file required.')
    if len(args) != 3:
        usage(2)

    login_page = 'http://lists.topica.com/login.html'
    # get the login page and it's cookie
    page = opener(login_page)
    page.close()

    login_url = 'http://lists.topica.com/perl/login.pl'
    p = {'location': '',
         'al': '',
         'email': args[1],
         'password': args[2]
         }
    # login
    page = opener(login_url, urllib.urlencode(p))
    lines = page.read()
    page.close()
    if lines.find('Invalid username and/or password.') >= 0:
        usage(1, 'Invalid username and/or password.')
    if debug:
        print login_url, urllib.urlencode(p)
        print lines

    # logged in, now the main loop
    start = 1
    print >> fp, '"email","real name","moderated","enabled","digest","owner"'
    while True:
        index_url = 'http://lists.topica.com/lists/%s/prefs/subscribers.html' % args[0]
        p = {'curPage': '%d' % start
             }
        try:
            page = opener(index_url, urllib.urlencode(p))
            lines = page.read()
            page.close()
        except urllib2.HTTPError:
            usage(1, """Topica server error. Possibly a bad listname.
If not, retry may succeed""")
        if lines.find('Sorry, we experienced an error.') >= 0:
            usage(1, """Topica error. Possibly a bad listname.
If not, retry may succeed""")
        if lines.find('ACCESS DENIED') >= 0 or \
          lines.find('You are not subscribed to this list.') >= 0:
            usage(1, "Topica says you don't have access to the '%s' list."
                      % args[0])
        if debug:
            print index_url, urllib.urlencode(p)
            print lines

        if verbose:
            print '%d' % start

        sids = []
        m = True
        while m:
            m = SID.search(lines)
            if m:
                sids.append(m.group('sid'))
                lines = SID.sub('', lines, 1)
        if not sids:
            break
        for sid in sids:
            subscriber_url = 'http://lists.topica.com/lists/%s/prefs/edit_subscriber.html' \
                      % args[0]
            p = {'sid': sid
                 }
            page = opener(subscriber_url, urllib.urlencode(p))
            lines = page.read()
            page.close()
            if debug:
                print subscriber_url, urllib.urlencode(p)
                print lines
            uid = rnm = mod = ena = dig = iso = ''
            m = re.search(UID, lines)
            if m:
                uid = m.group('uid')
            m = re.search(RNM, lines)
            if m:
                rnm = m.group('rnm')
            m = re.search(MOD, lines)
            if m:
                mod = m.group('mod')
            m = re.search(ENA, lines)
            if m:
                ena = m.group('ena')
            m = re.search(DIG, lines)
            if m:
                dig = m.group('dig')
            m = re.search(ISO, lines)
            if m:
                iso = m.group('iso')
            print >> fp, '"%s","%s","%s","%s","%s","%s"' % \
              (uid, rnm, mod, ena, dig, iso)

        start += 1

    fp.close()

if __name__ == '__main__':
    main()