| 51 | | def monkey_before_main(self): |
|---|
| 52 | | ''' |
|---|
| 53 | | this function is used to monkey patch the decoding filter from cherrypy |
|---|
| 54 | | ''' |
|---|
| 55 | | conf = cherrypy.config.get |
|---|
| 56 | | |
|---|
| 57 | | if not conf('decoding_filter.on', False): |
|---|
| 58 | | return |
|---|
| 59 | | |
|---|
| 60 | | if getattr(cherrypy.request, "_decoding_attempted", False): |
|---|
| 61 | | return |
|---|
| 62 | | cherrypy.request._decoding_attempted = True |
|---|
| 63 | | |
|---|
| 64 | | enc = conf('decoding_filter.encoding', None) |
|---|
| 65 | | if not enc: |
|---|
| 66 | | ct = cherrypy.request.headers.elements("Content-Type") |
|---|
| 67 | | if ct: |
|---|
| 68 | | ct = ct[0] |
|---|
| 69 | | enc = ct.params.get("charset", None) |
|---|
| 70 | | if (not enc) and ct.value.lower().startswith("text/"): |
|---|
| 71 | | # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 |
|---|
| 72 | | # When no explicit charset parameter is provided by the |
|---|
| 73 | | # sender, media subtypes of the "text" type are defined |
|---|
| 74 | | # to have a default charset value of "ISO-8859-1" when |
|---|
| 75 | | # received via HTTP. |
|---|
| 76 | | enc = "ISO-8859-1" |
|---|
| 77 | | |
|---|
| | 51 | class MonkeyDecodingFilter(BaseFilter): |
|---|
| | 52 | def before_main(self): |
|---|
| | 53 | ''' |
|---|
| | 54 | this function is used to monkey patch the decoding filter from cherrypy |
|---|
| | 55 | ''' |
|---|
| | 56 | conf = cherrypy.config.get |
|---|
| | 57 | |
|---|
| | 58 | if not conf('decoding_filter.on', False): |
|---|
| | 59 | return |
|---|
| | 60 | |
|---|
| | 61 | if getattr(cherrypy.request, "_decoding_attempted", False): |
|---|
| | 62 | return |
|---|
| | 63 | cherrypy.request._decoding_attempted = True |
|---|
| | 64 | |
|---|
| | 65 | enc = conf('decoding_filter.encoding', None) |
|---|
| 79 | | enc = conf('decoding_filter.default_encoding', "utf-8") |
|---|
| 80 | | |
|---|
| 81 | | try: |
|---|
| 82 | | self.decode(enc) |
|---|
| 83 | | except UnicodeDecodeError: |
|---|
| 84 | | # IE and Firefox don't supply a charset when submitting form |
|---|
| 85 | | # params with a CT of application/x-www-form-urlencoded. |
|---|
| 86 | | # So after all our guessing, it could *still* be wrong. |
|---|
| 87 | | # Start over with ISO-8859-1, since that seems to be preferred. |
|---|
| 88 | | self.decode("ISO-8859-1") |
|---|
| | 67 | ct = cherrypy.request.headers.elements("Content-Type") |
|---|
| | 68 | if ct: |
|---|
| | 69 | ct = ct[0] |
|---|
| | 70 | enc = ct.params.get("charset", None) |
|---|
| | 71 | if (not enc) and ct.value.lower().startswith("text/"): |
|---|
| | 72 | # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 |
|---|
| | 73 | # When no explicit charset parameter is provided by the |
|---|
| | 74 | # sender, media subtypes of the "text" type are defined |
|---|
| | 75 | # to have a default charset value of "ISO-8859-1" when |
|---|
| | 76 | # received via HTTP. |
|---|
| | 77 | enc = "ISO-8859-1" |
|---|
| | 78 | |
|---|
| | 79 | if not enc: |
|---|
| | 80 | enc = conf('decoding_filter.default_encoding', "utf-8") |
|---|
| | 81 | |
|---|
| | 82 | try: |
|---|
| | 83 | self.decode(enc) |
|---|
| | 84 | except UnicodeDecodeError: |
|---|
| | 85 | # IE and Firefox don't supply a charset when submitting form |
|---|
| | 86 | # params with a CT of application/x-www-form-urlencoded. |
|---|
| | 87 | # So after all our guessing, it could *still* be wrong. |
|---|
| | 88 | # Start over with ISO-8859-1, since that seems to be preferred. |
|---|
| | 89 | self.decode("ISO-8859-1") |
|---|
| | 90 | |
|---|
| | 91 | def decode(self, from_enc): |
|---|
| | 92 | ''' |
|---|
| | 93 | recursively decode all values in an iterable to from specified encoding |
|---|
| | 94 | ''' |
|---|
| | 95 | def decode_from(value, from_enc): |
|---|
| | 96 | if isinstance(value, dict): |
|---|
| | 97 | for k, v in value.items(): |
|---|
| | 98 | value[k] = decode_from(v, from_enc) |
|---|
| | 99 | |
|---|
| | 100 | elif isinstance(value, list): |
|---|
| | 101 | new_list = list() |
|---|
| | 102 | for item in value: |
|---|
| | 103 | newlist.append(decode_from(item, from_enc)) |
|---|
| | 104 | |
|---|
| | 105 | elif isinstance(value, str): |
|---|
| | 106 | return value.decode(from_enc) |
|---|
| | 107 | |
|---|
| | 108 | return value |
|---|
| | 109 | |
|---|
| | 110 | decoded_params = decode_from(cherrypy.request.params, from_enc) |
|---|
| | 111 | # this is done in two steps to make sure the exception in |
|---|
| | 112 | # before_main can retry a decode with another encoding if needed |
|---|
| | 113 | # DON'T merge those two lines |
|---|
| | 114 | cherrypy.request.params = decoded_params |
|---|
| 102 | | decodingfilter.DecodingFilter.before_main = monkey_before_main |
|---|
| | 128 | # TODO: is there a better way to inject this ? Maybe earlier than start_extension |
|---|
| | 129 | # monkey inject our replacement filter into the CP2 filter chain |
|---|
| | 130 | df = MonkeyDecodingFilter() |
|---|
| | 131 | for index, active_filter in enumerate( |
|---|
| | 132 | cherrypy.filters._filterhooks['before_main']): |
|---|
| | 133 | |
|---|
| | 134 | if active_filter.im_class == \ |
|---|
| | 135 | cherrypy.filters.decodingfilter.DecodingFilter: |
|---|
| | 136 | |
|---|
| | 137 | cherrypy.filters._filterhooks['before_main'].pop(index) |
|---|
| | 138 | cherrypy.filters._filterhooks['before_main'].insert( |
|---|
| | 139 | index, df.before_main) |
|---|