#!/usr/bin/env python # # Copyright (C) 2006 by the Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, # USA. # # 2006-08-28 mas Initial Rev. Mark Sapiro """Retrieve list subscribers from lists.topica.com. Usage: %(PROGRAM)s [options] listname owner-email password Options: --output file -o file Write output to specified file (required). --verbose -v Print page number of each retrieved index page to monitor progress. --debug -d Print copious debugging output - not recommended. --help -h Print this help message and exit listname is the name of the mailing list. owner-email is the email address of the list owner. password is the list owner's Topica account password. Output is one line per subscriber containing Email Address Real Name Moderation T = On F = Off D = Use List Setting Email Delivery Enabled (T|F) Digest Mode (T|F) Is Owner (T|F) Note that both 'Email Delivery Enabled' and 'Is Owner' have null values for the primary list owner. If Python 2.4's cookielib is available, we use it. Otherwise we require ClientCookie http://wwwsearch.sourceforge.net/ClientCookie/ """ import re import sys import getopt import urllib import urllib2 # Regexps for finding things in the pages SID = re.compile('\?sid=(?P\d+)') UID = re.compile('INPUT.*NAME="seui".*VALUE="(?P[^"]*)"') RNM = re.compile('INPUT.*NAME="sdn".*VALUE="(?P[^"]*)"') MOD = re.compile('OPTION VALUE="(?P[^"]*)" SELECTED') ENA = re.compile('INPUT.*NAME="sena" VALUE="(?P[^"]*)" CHECKED') DIG = re.compile('INPUT.*NAME="sdig" VALUE="(?P[^"]*)" CHECKED') ISO = re.compile('INPUT.*NAME="siso" VALUE="(?P[^"]*)" CHECKED') # if we have Python 2.4's cookielib, use it try: import cookielib policy = cookielib.DefaultCookiePolicy(rfc2965 = True) cookiejar = cookielib.CookieJar(policy) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)).open except ImportError: import ClientCookie # if this is a new ClientCookie, we need to turn on RFC2965 cookies cookiejar = ClientCookie.CookieJar() try: cookiejar.set_policy(ClientCookie.DefaultCookiePolicy(rfc2965 = True)) # install an opener that uses this policy opener = ClientCookie.build_opener( ClientCookie.HTTPCookieProcessor(cookiejar)) ClientCookie.install_opener(opener) except AttributeError: # must be an old ClientCookie, which already accepts RFC2965 cookies pass opener = ClientCookie.urlopen PROGRAM = sys.argv[0] try: True, False except NameError: True = 1 False = 0 def usage(code, msg=''): if code: fd = sys.stderr else: fd = sys.stdout print >> fd, __doc__ % globals() if msg: print >> fd, msg sys.exit(code) def main(): try: opts, args = getopt.getopt(sys.argv[1:], "ho:dv", ["help", "output=", "debug", "verbose"]) except: usage(2) fp = None verbose = False debug = False for o,a in opts: if o in ("-v", "--verbose"): verbose = True if o in ("-d", "debug"): debug = True if o in ("-h", "--help"): usage(0) if o in ("-o", "--output"): fp = open(a, "wt") if not fp: usage(2, 'Output file required.') if len(args) != 3: usage(2) login_page = 'http://lists.topica.com/login.html' # get the login page and it's cookie page = opener(login_page) page.close() login_url = 'http://lists.topica.com/perl/login.pl' p = {'location': '', 'al': '', 'email': args[1], 'password': args[2] } # login page = opener(login_url, urllib.urlencode(p)) lines = page.read() page.close() if lines.find('Invalid username and/or password.') >= 0: usage(1, 'Invalid username and/or password.') if debug: print login_url, urllib.urlencode(p) print lines # logged in, now the main loop start = 1 print >> fp, '"email","real name","moderated","enabled","digest","owner"' while True: index_url = 'http://lists.topica.com/lists/%s/prefs/subscribers.html' % args[0] p = {'curPage': '%d' % start } try: page = opener(index_url, urllib.urlencode(p)) lines = page.read() page.close() except urllib2.HTTPError: usage(1, """Topica server error. Possibly a bad listname. If not, retry may succeed""") if lines.find('Sorry, we experienced an error.') >= 0: usage(1, """Topica error. Possibly a bad listname. If not, retry may succeed""") if lines.find('ACCESS DENIED') >= 0 or \ lines.find('You are not subscribed to this list.') >= 0: usage(1, "Topica says you don't have access to the '%s' list." % args[0]) if debug: print index_url, urllib.urlencode(p) print lines if verbose: print '%d' % start sids = [] m = True while m: m = SID.search(lines) if m: sids.append(m.group('sid')) lines = SID.sub('', lines, 1) if not sids: break for sid in sids: subscriber_url = 'http://lists.topica.com/lists/%s/prefs/edit_subscriber.html' \ % args[0] p = {'sid': sid } page = opener(subscriber_url, urllib.urlencode(p)) lines = page.read() page.close() if debug: print subscriber_url, urllib.urlencode(p) print lines uid = rnm = mod = ena = dig = iso = '' m = re.search(UID, lines) if m: uid = m.group('uid') m = re.search(RNM, lines) if m: rnm = m.group('rnm') m = re.search(MOD, lines) if m: mod = m.group('mod') m = re.search(ENA, lines) if m: ena = m.group('ena') m = re.search(DIG, lines) if m: dig = m.group('dig') m = re.search(ISO, lines) if m: iso = m.group('iso') print >> fp, '"%s","%s","%s","%s","%s","%s"' % \ (uid, rnm, mod, ena, dig, iso) start += 1 fp.close() if __name__ == '__main__': main()