#!/usr/bin/python -tt # # (C) 2007 Paul W. Frields. # This file is licensed under the GNU General Public License (GPL) v2. import os, sys import urllib import re from time import sleep from optparse import OptionParser """Take a list of pages from a Moin wiki and convert them to DocBook.""" parser = OptionParser() parser.add_option("-d", "--output-directory", dest="outdir", default=os.getcwd(), help="Output files to directory DIR", metavar="DIR") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", help="Use verbose logging", default=False) parser.add_option("-i", "--input-file", dest="infname", default="", help="Take list of pages from file FILE", metavar="FILE") parser.add_option("-u", "--url", dest="urlbase", default="http://fedoraproject.org/wiki/Docs/Beats/", help="Use URL as base for pages to fetch", metavar="URL") parser.add_option("-p", "--pause", dest="pausetime", default=0.5, help="Wait SEC seconds between fetches", metavar="SEC") parser.add_option("-s", "--slashes", dest="ignoreSlashes", action="store_false", default=True, help="Don't ignore page names with embedded slashes") (opts, args) = parser.parse_args() try: infile = open(opts.infname, "r") except: print "Can't open input file", opts.infname sys.exit(-2) if not os.access(opts.outdir, os.W_OK): print "Can't write to output directory", opts.outdir sys.exit(-3) pageList = [page.rstrip('\n') for page in infile.readlines()] infile.close() if opts.verbose: print "Read", len(pageList), "pages" for page in pageList: if opts.ignoreSlashes and len(re.findall("/", page)) > 0: continue else: sleep(opts.pausetime) if opts.verbose: print "Retrieving", page data = ''.join(urllib.urlopen(opts.urlbase + page + "?action=RenderAsDocbook").readlines()) outXml = open(os.path.join(opts.outdir, page.replace("/", "")), "w") outXml.write(data) outXml.close() if opts.verbose: print "Finished"