Ticket #34: get_tgdoc.py

File get_tgdoc.py, 3.4 kB (added by fredlin, 7 months ago)

new tgdoc.py to work with sphinx

Line 
1 """ TurboGears offline docs generator
2
3 TODO:
4  
5   * replace links to proper link
6   * make the tarball
7   * make the PDF
8  
9 Fixed:
10   * create folders automatically
11   * get css and pic
12
13 """
14 import urllib2
15 import re
16 import os
17
18 # links
19 acquire_site = "http://docs.turbogears.org/"
20 doclist = "TitleIndex?action=titleindex"
21
22 # pattern
23 acquire_version = "2.0"
24 comment = "PageCommentData"
25 #"<2.0/"
26 verp = "<"+acquire_version+"/"
27 verr = "<"
28
29 # retrive_docs
30 raw = "?action=raw"
31 ext = ".rst"
32
33 brokenlink = []
34
35 def get_doclist(acquire_version):
36     docs  = urllib2.urlopen(acquire_site + doclist)
37     targets = []
38     for link in docs.readlines():
39         if re.match(acquire_version, link) and (not re.match(comment, link)):
40             targets.append(link)
41     return targets
42        
43 def retrive_docs(link, format):
44     """
45     get the doc with proper format
46     
47     @param link: the link
48     @type link: string
49     @param format: html or rst
50     @type format: string
51     @return: doc
52     @rtype: file descriptor
53     """
54     link = link.strip()
55    
56     if format == "html":
57         print "download " + acquire_site + link
58         try:
59             doc  = urllib2.urlopen(acquire_site + link)
60         except:
61             brokenlink.append(link)
62             return None
63        
64     if format == "rst":
65         print "download " + acquire_site + link + raw
66         try:
67             doc  = urllib2.urlopen(acquire_site + link + raw)
68         except:
69             brokenlink.append(link)
70             return None
71
72     return doc
73
74 def save_doc(filepath, doc):
75     # split package and basename
76     divider = filepath.rfind("/")
77     if divider > -1:
78         package = filepath[0:divider]
79         basename = filepath[divider+1:]
80
81     # caught exception
82     if (not os.access(".", os.W_OK)):
83         pass
84
85     # create directory
86     if not os.path.exists(package):
87         try:
88             os.mkdir(package)
89         except OSError, e:
90             print e
91             return
92
93     # save file to destination   
94     print "saved to " + filepath
95     open(filepath,'w').write(doc)
96
97 def proc_doc(link, doc, targets):
98     """
99     process doc, and save the doc to proper destination   
100     """
101     #from docutils.core import publish_parts
102     #outbin = publish_parts(doc.read(),writer_name="html")["html_body"]
103
104     #prepare_dest()
105     link = link.strip()
106     if link == acquire_version:
107         link = acquire_version+"/index"
108
109     # file path
110     place = os.getcwd()
111     filepath = place+"/"+link+ext
112     filepath.replace('\\', '/')
113
114     # silly proc
115     doc = doc.read()
116
117     for doclink in extra:
118         doc = doc.replace("/"+doclink, doclink)
119
120     #replace '<2.0/' to '<'
121     doc = doc.replace(verp, verr)
122
123     # replace links to proper link
124     """for urllink in targets:
125         if urllink == acquire_version:
126             #urllink="1.0/index.html"
127             pass
128         else:
129             doc = doc.replace(urllink, urllink+ext)"""
130
131     save_doc(filepath, doc)
132
133 def process_docs(targets):
134     # get css and pic
135
136     # retrive docs
137     for link in targets:
138         doc = retrive_docs(link, "rst")
139         if doc is not None:
140             proc_doc(link, doc, targets)
141
142     print "done, brokenlink=%s"%brokenlink
143
144     # make the tarball
145
146
147 if __name__ == '__main__':
148
149     targets = get_doclist(acquire_version = "2.0")
150     process_docs(targets)