Ticket #34: tggen.py

File tggen.py, 1.7 kB (added by fredlin, 2 years ago)

a rough script to download all 1.0 docs on docwiki, before running the script, you needto create a folder named "1.0", and four subfolders "GettingStarted?", "RoughDocs?", "SQLAlchemy", "Wiki20" in it. The downloaded file is saved as html format, feel free to enhance the code.

Line 
1 """ TurboGears offline docs generator
2
3 TODO:
4   * create folders automatically
5   * replace links to proper link
6   * get css and pic
7
8 """
9 import urllib2
10 import re
11 import os
12
13 # links
14 acquire_site = "http://docs.turbogears.org/"
15 doclist = "TitleIndex?action=titleindex"
16
17 # pattern
18 acquire_version = "1.0"
19 comment = "PageCommentData"
20
21 extra = ["wiki/modern/css/common.css",
22         "wiki/modern/css/screen.css",
23         "wiki/modern/css/print.css",
24         "wiki/modern/css/projection.css"]
25
26 def get_doclist():
27     docs  = urllib2.urlopen(acquire_site + doclist)
28     targets = []
29     for link in docs.readlines():
30         if re.match(acquire_version, link) and (not re.match(comment, link)):
31             targets.append(link)
32     return targets
33
34    
35 def download_docs(link):
36     """
37     save targets docs to destination
38     """
39     #from docutils.core import publish_parts
40    
41     raw = "?action=raw"
42     ext = ".html"
43    
44     link = link.strip()
45
46     print "download " + acquire_site + link + raw
47     #doc  = urllib2.urlopen(acquire_site + link + raw)
48     doc  = urllib2.urlopen(acquire_site + link)
49    
50     #outbin = publish_parts(doc.read(),writer_name="html")["html_body"]
51    
52     #prepare_dest()
53     if link == acquire_version:
54         link = "1.0/index"
55    
56     #file path
57     place = os.getcwd()
58     filepath = place+"/"+link+ext
59     filepath.replace('\\', '/')
60     print filepath
61     #open(filepath,'w').write(outbin)
62     open(filepath,'w').write(doc.read())
63
64 def process_docs(targets):
65     frontpage = "index"
66
67     for link in targets:
68         doc = download_docs(link)
69
70 if __name__ == '__main__':
71     targets = get_doclist()
72     process_docs(targets)
73