-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgetarkpatch
More file actions
executable file
·250 lines (206 loc) · 5.6 KB
/
getarkpatch
File metadata and controls
executable file
·250 lines (206 loc) · 5.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
#!/bin/bash
#
# getarkpatch
#
declare -i optcount=0
declare -i number=1 # assume single patch
declare homelink
declare homebase
declare patchfile="/dev/shm/getarkpatchfile"
declare directory="$PWD"
declare patlist
declare line
declare hrefstr='href="'
declare href
declare -a hrefary
declare -i hrefstrsiz=${#hrefstr}
declare -i chindex
declare -i count
declare -i index
declare usagestr=$(
cat <<EOF
$(basename $0) [options] link
link - the link to the patch or to the prologue for a patch set.
Options
-n number - number of patches (default is 1)
-d directory - optional output directory, default is PWD: $PWD
-h - this help text
\0
EOF
)
usage() {
echo -e "$usagestr"
exit $1
}
# strindex string substring
#
# Return zero-based index of substring in string
# Returns -1 if substring is not found in string
#
function strindex {
x="${1%%$2*}"
[[ "$x" = "$1" ]] && echo -1 || echo ${#x}
}
# renapatch() - rename a patchfile taken from a web archive
#
# $1 - the path to the patchfile downloaded from the archive
# $2 - the directory into which the renamed file is to be moved
# $3 - returns the new path to the renamed file
#
renapatch(){
local patfil="$1"
local dir="$2"
local found=false
local line
local lastto
local lastfrom
local temp
local newname
while IFS= read -r line; do
if ! $found \
&& [ $(strindex "$line" "To:") -ge 0 ]; then
lastto=$line
fi
if ! $found \
&& [ $(strindex "$line" "From:") -ge 0 ]; then
lastfrom=$line
fi
if ! $found && [ $(strindex "$line" "Subject: ") -ge 0 ]; then
found=true
# Remove the word "Subject: " from the temp
#
temp=$(echo "$line" | cut -d' ' -f2-)
# Remove any brackets, parentheses, and quotes
#
temp=$(echo "$temp" | sed 's/[][{}<>()|,"!@#$%^&*]//g')
temp=$(echo "$temp" | sed "s/'//g")
# Replace spaces, tabs, slashes, colons and semicolons
# with hyphens
#
temp=$(echo "$temp" | sed "s\[/:; ]\-\g")
temp=$(echo "$temp" | tr -d '[:space:]' )
# Create the new name
#
newname=$(printf "%s/%s.patch" "$dir" "$temp")
echo "*********************"
echo $newname
echo "*********************"
echo $lastto > "$newname"
echo $lastfrom >> "$newname"
echo $line >> "$newname"
continue
fi
$found && echo "$line" >> "$newname"
done < $patfil
eval $3="$newname"
}
getapatch() {
# $directory is global defined at top of this script
#
local patlin="$1"
local patfil="$2"
local filename
local newpat
echo "link: $patlin"
echo "file: $patfil"
# Download the patch and strip the HTML comment markers to expose
# comments for parsing.
#
curl -k "$patlin" | sed -r -e 's/(<!--X-)|(-->)//g' \
-e 's/<[!a-zA-Z\/][^>]*>//g'> "$patfil"
renapatch "$patfil" "$directory" newpat
# Strip all the remaining HTML tags from the file
#
# sed -i 's/<[!a-zA-Z\/][^>]*>//g' $newpat
# Convert the HTML entities in the file back to ASCII characters
#
sed -i 's/&/\&/g; s/</\</g; s/>/\>/g; s/"/\"/g; s/#'/\'"'"'/g; s/“/\"/g; s/-/-/g; s/”/\"/g; s/ / /g;' $newpat
}
while getopts n:d:h OPTION; do
case "$OPTION" in
n ) number=$OPTARG
optcount=$((optcount+1))
;;
d ) directory=$OPTARG
[ -d "$directory" ] || {
echo "$directory is not a valid directory"
exit 1
}
optcount=$((optcount+2))
;;
h ) optcount=$((optcount+1))
;;
* ) echo "unrecognized option -$OPTION"
echo -e "$usagestr"
exit 127
esac
done
shift $optcount
[ $# -eq 1 ] || usage 1
# Home link must be the prologue of a patch set or a single patch.
#
# Single patch is assumed, unless user submitted -n number.
#
# If the number of patches to be extracted is greater than 1, then that
# number is used to help parse the list of links on the home link page
# for the links to the patches in the set and replies to them.
#
# The list of links must be parsed to remove the html tags, leaving only
# the relative links.
#
# To create the complete URL of each patch, the home link is stripped of
# the last field after the last '/' and the relative link of the
# corresponding patch is appended.
#
# Each patch extracted will have its html tags stripped.
#
homelink="$1"
echo "homelink: $homelink"
# If there is only one patch to process, then do that and exit with
# good status
#
[ $number -eq 1 ] && { getapatch "$homelink" "$patchfile"; exit 0; }
# If we get this far, we are processing a patch set.
# Get the list of patches and replies in the set.
#
patlist=$(curl -k "$homelink" | awk '/\[/ && /'$number'\]/')
# Extract the base link, everything but the html filename at the end
# of the link.
#
homebase=$(echo "$homelink" | rev | cut -d'/' -f2- | rev)/
echo "homebase: $homebase"
# Loop through the list of links discovered on the homepage and
# create the urls of the links to the individual patches in the
# patchset.
#
index=0
while read line; do
# Find the href in the line. If there is no href, then
# skip the line.
#
chindex=$(strindex "$line" "$hrefstr")
[ $chindex -eq -1 ] && continue
chindex=$((chindex + hrefstrsiz))
# Create the URL of the patch
#
href="$homebase"$(echo ${line:$chindex} | cut -d'"' -f1)
echo "href: $href"
# Get the remainder of the line after the href
#
line="$(echo ${line:$chindex} | cut -d'>' -f2-)"
# Not gonna process replies as patches
#
[ "${line:0:3}" == "Re:" ] && continue
hrefary[$index]="$href"
let index++
done <<< "$patlist"
# At this point, the hrefary array only contains urls of patches in
# this patch set.
# Loop through the hrefary array and get the patches.
#
count=$index
for ((index=0; index < count; ++index)); do
getapatch "${hrefary[$index]}" "$patchfile"
done
patrmhead $directory
exit 0