3131import sys
3232import tempfile
3333import shutil
34+ import codecs
3435
3536if sys .version_info .major == 2 :
3637 # Pythontidy is only supported on Python2
4647
4748DEFAULT_CONFIG_PATHS = ['~/.codevalidatorrc' , '/etc/codevalidatorrc' ]
4849
50+ # The first rule name which matches a registered encoding is used
51+ # both as a check that the file can be read with that encoding,
52+ # as well as a encoding filter for those rules which support fixing.
53+
4954DEFAULT_RULES = [
5055 'utf8' ,
5156 'nobom' ,
7782 '*.php' : DEFAULT_RULES + ['phpcs' ],
7883 '*.phtml' : DEFAULT_RULES ,
7984 '*.pp' : DEFAULT_RULES + ['puppet' ],
80- '*.properties' : DEFAULT_RULES + ['ascii' ],
85+ '*.properties' : ['ascii' ] + DEFAULT_RULES ,
8186 '*.py' : DEFAULT_RULES + ['pyflakes' , 'pythontidy' ],
8287 '*.rst' : DEFAULT_RULES ,
8388 '*.rb' : DEFAULT_RULES + ['ruby' , 'rubocop' ],
113118
114119STDIN_CONTENTS = None
115120
121+ ENCODING_BY_FILE = dict ()
116122
117123class BaseException (Exception ):
118124
@@ -164,6 +170,35 @@ def wrap(f):
164170
165171 return wrap
166172
173+ def needs_unicode (fix_function ):
174+ """
175+ decorator for a _fix_... function to make it work with a pair of
176+ unicode files (or file-like objects) internally instead of a pair
177+ of byte-files (which are still used externally).
178+
179+ The returned function has an attribute `needs_encoding` which tells
180+ the calling function that it needs an encoding argument (the name of
181+ the encoding to use).
182+ """
183+
184+ def wrapped_fix (src , dst , encoding_or_options ):
185+ if isinstance (encoding_or_options , basestring ):
186+ encoding = encoding_or_options
187+ options = None
188+ else :
189+ encoding = encoding_or_options ['encoding' ]
190+ options = encoding_or_options
191+ # decode + encode
192+ src = codecs .EncodedFile (src , encoding )
193+ dst = codecs .EncodedFile (dst , encoding )
194+ if options :
195+ return fix_function (src , dst , options )
196+ else :
197+ return fix_function (src , dst )
198+
199+ wrapped_fix .needs_encoding = True
200+ return wrapped_fix
201+
167202
168203def is_python3 (fd ):
169204 '''check first line of file object whether it contains "python3" (shebang)'''
@@ -183,39 +218,33 @@ def _validate_notabs(fd):
183218 return b'\t ' not in fd .read ()
184219
185220
221+ @needs_unicode
186222def _fix_notabs (src , dst ):
187223 original = src .read ()
188224 fixed = original .replace (b'\t ' , b' ' * 4 )
189- dst .write (fixed . decode () )
225+ dst .write (fixed )
190226
191227
192228@message ('contains carriage return (CR)' )
193229def _validate_nocr (fd ):
194230 return b'\r ' not in fd .read ()
195231
196232
233+ @needs_unicode
197234def _fix_nocr (src , dst ):
198235 original = src .read ()
199- fixed = original .replace (b'\r ' , b'' )
200- dst .write (fixed .decode ())
201-
202-
203- @message ('is not UTF-8 encoded' )
204- def _validate_utf8 (fd ):
205- try :
206- fd .read ().decode ('utf-8' )
207- except UnicodeDecodeError :
208- return False
209- return True
236+ fixed = original .replace ('\r ' , '' )
237+ dst .write (fixed )
210238
211239
212- @message ('is not ASCII encoded' )
213- def _validate_ascii (fd ):
214- try :
215- fd .read ().decode ('ascii' )
216- except UnicodeDecodeError :
217- return False
218- return True
240+ def encoding_validator (encoding ):
241+ def validate_encoding (fd ):
242+ try :
243+ fd .read ().decode (encoding )
244+ except UnicodeDecodeError :
245+ return "is not %s-encoded" % encoding .upper ()
246+ return True
247+ return validate_encoding
219248
220249
221250@message ('has UTF-8 byte order mark (BOM)' )
@@ -245,6 +274,7 @@ def _validate_notrailingws(fd):
245274 return True
246275
247276
277+ @needs_unicode
248278def _fix_notrailingws (src , dst ):
249279 for line in src :
250280 dst .write (line .rstrip ())
@@ -773,15 +803,29 @@ def notify(*args):
773803 print (* args )
774804
775805
806+ def get_encoding_rule (rules ):
807+ for rule in rules :
808+ try :
809+ codecs .lookup (rule )
810+ return rule
811+ except LookupError :
812+ continue
813+
814+
776815def validate_file_with_rules (fname , rules ):
816+ encoding = get_encoding_rule (rules )
817+ ENCODING_BY_FILE [fname ] = encoding
777818 with open_file_for_read (fname ) as fd :
778819 for rule in rules :
779820 logging .debug ('Validating %s with %s..' , fname , rule )
780821 fd .seek (0 )
781822 func = globals ().get ('_validate_' + rule )
782823 if not func :
783- notify (rule , 'does not exist' )
784- continue
824+ if rule == encoding :
825+ func = encoding_validator (encoding )
826+ else :
827+ notify (rule , 'does not exist' )
828+ continue
785829 options = CONFIG .get ('options' , {}).get (rule )
786830 try :
787831 if options :
@@ -837,6 +881,7 @@ def fix_file(fname, rules):
837881 if CONFIG .get ('create_backup' , True ):
838882 dirname , basename = os .path .split (fname )
839883 shutil .copy2 (fname , os .path .join (dirname , CONFIG ['backup_filename' ].format (original = basename ))) # creates a backup
884+ encoding = ENCODING_BY_FILE [fname ]
840885 with open_file_for_read (fname ) as fd :
841886 dst = fd
842887 for rule in rules :
@@ -849,7 +894,10 @@ def fix_file(fname, rules):
849894 src .seek (0 )
850895 try :
851896 if options :
897+ options ['encoding' ] = encoding
852898 func (src , dst , options )
899+ elif func .needs_encoding :
900+ func (src , dst , encoding )
853901 else :
854902 func (src , dst )
855903 was_fixed &= True
@@ -863,7 +911,7 @@ def fix_file(fname, rules):
863911 # b) some fix functions destroyed the code
864912 if was_fixed and len (fixed ) > 0 :
865913 with open_file_for_write (fname ) as fd :
866- fd .write (fixed . encode () )
914+ fd .write (fixed )
867915 return True
868916 else :
869917 notify ('{0}: ERROR fixing file. File remained unchanged' .format (fname ))
0 commit comments