| 1 |
""" TurboGears offline docs generator |
|---|
| 2 |
|
|---|
| 3 |
TODO: |
|---|
| 4 |
|
|---|
| 5 |
* replace links to proper link |
|---|
| 6 |
* make the tarball |
|---|
| 7 |
* make the PDF |
|---|
| 8 |
|
|---|
| 9 |
Fixed: |
|---|
| 10 |
* create folders automatically |
|---|
| 11 |
* get css and pic |
|---|
| 12 |
|
|---|
| 13 |
""" |
|---|
| 14 |
import urllib2 |
|---|
| 15 |
import re |
|---|
| 16 |
import os |
|---|
| 17 |
|
|---|
| 18 |
# links |
|---|
| 19 |
acquire_site = "http://docs.turbogears.org/" |
|---|
| 20 |
doclist = "TitleIndex?action=titleindex" |
|---|
| 21 |
|
|---|
| 22 |
# pattern |
|---|
| 23 |
acquire_version = "2.0" |
|---|
| 24 |
comment = "PageCommentData" |
|---|
| 25 |
#"<2.0/" |
|---|
| 26 |
verp = "<"+acquire_version+"/" |
|---|
| 27 |
verr = "<" |
|---|
| 28 |
|
|---|
| 29 |
# retrive_docs |
|---|
| 30 |
raw = "?action=raw" |
|---|
| 31 |
ext = ".rst" |
|---|
| 32 |
|
|---|
| 33 |
brokenlink = [] |
|---|
| 34 |
|
|---|
| 35 |
def get_doclist(acquire_version): |
|---|
| 36 |
docs = urllib2.urlopen(acquire_site + doclist) |
|---|
| 37 |
targets = [] |
|---|
| 38 |
for link in docs.readlines(): |
|---|
| 39 |
if re.match(acquire_version, link) and (not re.match(comment, link)): |
|---|
| 40 |
targets.append(link) |
|---|
| 41 |
return targets |
|---|
| 42 |
|
|---|
| 43 |
def retrive_docs(link, format): |
|---|
| 44 |
""" |
|---|
| 45 |
get the doc with proper format |
|---|
| 46 |
|
|---|
| 47 |
@param link: the link |
|---|
| 48 |
@type link: string |
|---|
| 49 |
@param format: html or rst |
|---|
| 50 |
@type format: string |
|---|
| 51 |
@return: doc |
|---|
| 52 |
@rtype: file descriptor |
|---|
| 53 |
""" |
|---|
| 54 |
link = link.strip() |
|---|
| 55 |
|
|---|
| 56 |
if format == "html": |
|---|
| 57 |
print "download " + acquire_site + link |
|---|
| 58 |
try: |
|---|
| 59 |
doc = urllib2.urlopen(acquire_site + link) |
|---|
| 60 |
except: |
|---|
| 61 |
brokenlink.append(link) |
|---|
| 62 |
return None |
|---|
| 63 |
|
|---|
| 64 |
if format == "rst": |
|---|
| 65 |
print "download " + acquire_site + link + raw |
|---|
| 66 |
try: |
|---|
| 67 |
doc = urllib2.urlopen(acquire_site + link + raw) |
|---|
| 68 |
except: |
|---|
| 69 |
brokenlink.append(link) |
|---|
| 70 |
return None |
|---|
| 71 |
|
|---|
| 72 |
return doc |
|---|
| 73 |
|
|---|
| 74 |
def save_doc(filepath, doc): |
|---|
| 75 |
# split package and basename |
|---|
| 76 |
divider = filepath.rfind("/") |
|---|
| 77 |
if divider > -1: |
|---|
| 78 |
package = filepath[0:divider] |
|---|
| 79 |
basename = filepath[divider+1:] |
|---|
| 80 |
|
|---|
| 81 |
# caught exception |
|---|
| 82 |
if (not os.access(".", os.W_OK)): |
|---|
| 83 |
pass |
|---|
| 84 |
|
|---|
| 85 |
# create directory |
|---|
| 86 |
if not os.path.exists(package): |
|---|
| 87 |
try: |
|---|
| 88 |
os.mkdir(package) |
|---|
| 89 |
except OSError, e: |
|---|
| 90 |
print e |
|---|
| 91 |
return |
|---|
| 92 |
|
|---|
| 93 |
# save file to destination |
|---|
| 94 |
print "saved to " + filepath |
|---|
| 95 |
open(filepath,'w').write(doc) |
|---|
| 96 |
|
|---|
| 97 |
def proc_doc(link, doc, targets): |
|---|
| 98 |
""" |
|---|
| 99 |
process doc, and save the doc to proper destination |
|---|
| 100 |
""" |
|---|
| 101 |
#from docutils.core import publish_parts |
|---|
| 102 |
#outbin = publish_parts(doc.read(),writer_name="html")["html_body"] |
|---|
| 103 |
|
|---|
| 104 |
#prepare_dest() |
|---|
| 105 |
link = link.strip() |
|---|
| 106 |
if link == acquire_version: |
|---|
| 107 |
link = acquire_version+"/index" |
|---|
| 108 |
|
|---|
| 109 |
# file path |
|---|
| 110 |
place = os.getcwd() |
|---|
| 111 |
filepath = place+"/"+link+ext |
|---|
| 112 |
filepath.replace('\\', '/') |
|---|
| 113 |
|
|---|
| 114 |
# silly proc |
|---|
| 115 |
doc = doc.read() |
|---|
| 116 |
|
|---|
| 117 |
for doclink in extra: |
|---|
| 118 |
doc = doc.replace("/"+doclink, doclink) |
|---|
| 119 |
|
|---|
| 120 |
#replace '<2.0/' to '<' |
|---|
| 121 |
doc = doc.replace(verp, verr) |
|---|
| 122 |
|
|---|
| 123 |
# replace links to proper link |
|---|
| 124 |
"""for urllink in targets: |
|---|
| 125 |
if urllink == acquire_version: |
|---|
| 126 |
#urllink="1.0/index.html" |
|---|
| 127 |
pass |
|---|
| 128 |
else: |
|---|
| 129 |
doc = doc.replace(urllink, urllink+ext)""" |
|---|
| 130 |
|
|---|
| 131 |
save_doc(filepath, doc) |
|---|
| 132 |
|
|---|
| 133 |
def process_docs(targets): |
|---|
| 134 |
# get css and pic |
|---|
| 135 |
|
|---|
| 136 |
# retrive docs |
|---|
| 137 |
for link in targets: |
|---|
| 138 |
doc = retrive_docs(link, "rst") |
|---|
| 139 |
if doc is not None: |
|---|
| 140 |
proc_doc(link, doc, targets) |
|---|
| 141 |
|
|---|
| 142 |
print "done, brokenlink=%s"%brokenlink |
|---|
| 143 |
|
|---|
| 144 |
# make the tarball |
|---|
| 145 |
|
|---|
| 146 |
|
|---|
| 147 |
if __name__ == '__main__': |
|---|
| 148 |
|
|---|
| 149 |
targets = get_doclist(acquire_version = "2.0") |
|---|
| 150 |
process_docs(targets) |
|---|