-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrename_paper.sh
More file actions
executable file
·171 lines (137 loc) · 4.21 KB
/
rename_paper.sh
File metadata and controls
executable file
·171 lines (137 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/bin/bash
#
# Time-stamp: <Friday 2025-07-25 21:12:02 Jess Moore>
#
# Renames academic paper using bibfile to YYYY_author.pdf
#
# Usage: bash rename_paper.sh "file" "bibfile"
function usage() {
echo "Usage: $(basename "$0") 'file' 'bibfile'"
echo ""
echo "Description: This script renames a research paper "
echo " to a standardised filename of "
echo " YYYY_Author1.pdf (1 author)"
echo " YYYY_Author1_Author2.pdf (2 authors)"
echo " YYYY_Author1_etal.pdf (3 or more authors)"
echo " using the bibfile data. "
echo ""
echo "Arguments:"
echo " file: Academic paper filename."
echo " bibfile: Bib filename"
echo ""
exit 1 # Exit with a non-zero status to indicate an error
}
if [[ $# -eq 0 || $* == *"help"* || $* == *"-h"* ]]; then
usage
fi
PAPERNAME=$1
BIB=$2
# Check paper exists
if [ ! -f "$PAPERNAME" ]; then
echo "Error: Paper ${PAPERNAME} does not exist. Check filename is correct."
exit 1
fi
# Check bib file exists
if [ ! -f "$BIB" ]; then
echo "Error: Bib file ${BIB} does not exist. Check filename downloaded and correct."
exit 1
fi
# Check bib file format and convert if necessary
EXT="${BIB##*.}"
BASENAME=$(basename "$BIB")
case "${EXT}" in
'xml')
BIBXML=$BIB;;
'ris'|'nbib'|'bib')
echo "Converting ${BIB} to .xml format."
BIBXML="${BASENAME}.xml";
case "${EXT}" in
'ris')
ris2xml "$BIB" > "$BIBXML";;
'nbib')
nbib2xml "$BIB" > "$BIBXML";;
'bib')
bib2xml "$BIB" > "$BIBXML";;
esac;;
*)
echo "Error ${BIB} is not in a supported file format.";
Exit 1;;
esac
# Extract first author from xml
AUTHORS=$(xml sel -N x="http://www.loc.gov/mods/v3" -t -v "//x:mods[@ID]/x:name[@type='personal']/x:namePart[@type='family']" "$BIBXML")
# Prompt for author/s string if not in bib file
if [[ -z $AUTHORS ]]; then
while true; do
read -r -p "Provide Author full names separated by '\n': " AUTHORS
break
done
echo "User provided authors: ${AUTHORS}"
else
echo "Authors: ${AUTHORS}"
fi
AUTHOR1=$(echo "$AUTHORS" | head -n 1 | sed 's/ /_/g')
N_AUTHORS=$(echo "$AUTHORS" | wc -l)
# Extract publication year from xml
DATE_RANGE=$(xml sel -N x="http://www.loc.gov/mods/v3" -t -v "//x:mods[@ID]/x:part/x:date" "${BIBXML}")
# Try alternate parametr if <date> not found
if [[ -z $DATE_RANGE ]]; then
DATE_RANGE=$(xml sel -N x="http://www.loc.gov/mods/v3" -t -v "//x:mods[@ID]/x:originInfo/x:dateIssued" "${BIBXML}")
fi
# Exit if not found
if [[ -z $DATE_RANGE ]]; then
echo "Error: Date was not parsed from ${BIB}."
exit 1
fi
# A date is typically formatted as
# yyyymmdd = 8 characters or
# yyyy-mm-dd = 10 characters or
# XXXXXXXX-yy/mm/dd > 10 characters
# yyyy-dd mmm yyyy through to dd mmm yyyy > 20 characters
# Use length to determine year extraction
if [[ ${#DATE_RANGE} -gt 20 ]]; then
# Verbose date range provided
echo "${DATE_RANGE}: ${#DATE_RANGE} (verbose)"
YEAR=$(echo "$DATE_RANGE" | cut -d '-' -f 1)
elif [[ ${#DATE_RANGE} -gt 11 ]]; then
# Date range provided
echo "${DATE_RANGE}: ${#DATE_RANGE}"
YEAR=$(echo "$DATE_RANGE" | cut -d '-' -f 2 | cut -c1-4)
elif [[ ${#DATE_RANGE} -gt 4 ]]; then
# Single date provided
if [[ "${DATE_RANGE:((4)):1}" == "-" ]]; then
echo "Formatted with year first"
echo "${DATE_RANGE}: ${#DATE_RANGE}"
YEAR=$(echo "$DATE_RANGE" | cut -c1-4)
else
echo "Formatted with year last"
echo "${DATE_RANGE}: ${#DATE_RANGE}"
YEAR=$(echo "$DATE_RANGE" | cut -c7-10)
fi
else
# Year provided only
YEAR=$DATE_RANGE
fi
# Form new paper name
if [ "$N_AUTHORS" -eq "1" ]; then
# 1 author
AUTHOR_STR=$AUTHOR1
elif [ "$N_AUTHORS" -eq "2" ]; then
# 2 authors
AUTHOR2=$(echo "$AUTHORS" |head -n 2 | tr '\n' '_' | sed 's/ /_/g')
AUTHOR_STR="${AUTHOR1}_${AUTHOR2}"
else
# More than 2 authors
AUTHOR_STR="${AUTHOR1}_etal"
fi
NEWPAPERNAME="${YEAR}_${AUTHOR_STR}.pdf"
echo "Old paper name: ${PAPERNAME}"
echo "New paper name: ${NEWPAPERNAME}"
echo "Successfully renamed paper"
ls -l "${PAPERNAME}"
# Rename paper
mv "${PAPERNAME}" "${NEWPAPERNAME}"
ls -l "${NEWPAPERNAME}"
# Clean up
rm "$BIBXML"
rm "$BIB"
echo "Done."