forked from dsopscak/utf8
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnew_hook.py
More file actions
117 lines (95 loc) · 3.13 KB
/
new_hook.py
File metadata and controls
117 lines (95 loc) · 3.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python
"""
"""
import sys, os, traceback
import re
import getopt
import subprocess
def run(command):
try:
print "*** opening subproces with command:"
print command
p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print "***"
print "Reading output..."
output = p.stdout.read()
print "... done"
except Exception as e:
print ">>> Caught exception: "
print e
print "<<<"
output = ""
return output
# Try to determine if the given stream is encoded utf-8. Can only return a
# false positive, negatives are (can be) known to be correct.
#
def guess_utf8(s):
ba = bytearray(s, "iso-8859-1")
# Believe the windowsish BOM if it's there. Would be more robust to
# skip over it and anaylize the rest of the file.
if len(ba) > 2 and ba[0] == 0xef and ba[1] == 0xbb and ba[2] == 0xbf:
return True
rval = False
i = 0
while i < len(ba):
if ba[i] < 128: # ascii stands alone
i = i + 1
elif ba[i] >= 194 and ba[i] <= 223: # precedes one continuation
if i + 1 < len(ba) and ba[i+1] >= 128 and ba[i+1] <= 191:
rval = True
i = i + 2
else:
return False
elif ba[i] >= 224 and ba[i] <= 239: # precedes two continuations
if (i + 2 < len(ba) and ba[i+1] >= 128 and ba[i+1] <= 191
and ba[i+2] >= 128 and ba[i+2] <= 191):
rval = True
i = i + 3
else:
return False
else:
return False
return rval
def get_utf8_items(transaction, repository, items):
utf8_items = []
svnlook = "C:\\Program Files (x86)\\VisualSVN Server\\bin\\svnlook.exe"
for item in items:
for suffix in ('.osql', '.sql'):
if item.lower().endswith(suffix):
size = run ([svnlook, "filesize",
"-r", transaction, repository, item])
#"-t", transaction, repository, item])
if int(size) < 524288:
contents = run ([svnlook, "cat",
"-r", transaction, repository, item])
#"-t", transaction, repository, item])
if (guess_utf8(contents)):
utf8_items.append(item)
return utf8_items
def main ():
args = sys.argv[1:]
transaction = args[0]
repository = args[1]
items = args[2:]
bad_items = get_utf8_items(transaction, repository, items);
print "Checking..."
for item in items:
print item
print "*************"
print "Of these..."
for item in bad_items:
print item
print "... are bad"
if __name__ == '__main__':
try:
exit_status = main()
if exit_status is None:
sys.exit(0)
sys.exit(int(exit_status))
except SystemExit, e:
raise e
except Exception, e:
print >> sys.stderr,'ERROR, UNEXPECTED EXCEPTION'
print >> sys.stderr, str(e)
traceback.print_exc()
os._exit(1)