Skip to content

Commit e4fdaed

Browse files
committed
* add sanity check for unknown arguments
* add unlicense to .ipynb
1 parent 3149b22 commit e4fdaed

2 files changed

Lines changed: 88 additions & 57 deletions

File tree

ParsingMetadataMD2JSON.ipynb

Lines changed: 79 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,47 @@
11
{
22
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "8bbf8f85-3a07-4b4b-82fe-75f51e54caa1",
6+
"metadata": {},
7+
"source": [
8+
"This is free and unencumbered software released into the public domain.\n",
9+
"\n",
10+
"Anyone is free to copy, modify, publish, use, compile, sell, or\n",
11+
"distribute this software, either in source code form or as a compiled\n",
12+
"binary, for any purpose, commercial or non-commercial, and by any\n",
13+
"means.\n",
14+
"\n",
15+
"In jurisdictions that recognize copyright laws, the author or authors\n",
16+
"of this software dedicate any and all copyright interest in the\n",
17+
"software to the public domain. We make this dedication for the benefit\n",
18+
"of the public at large and to the detriment of our heirs and\n",
19+
"successors. We intend this dedication to be an overt act of\n",
20+
"relinquishment in perpetuity of all present and future rights to this\n",
21+
"software under copyright law.\n",
22+
"\n",
23+
"THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n",
24+
"EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n",
25+
"MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\n",
26+
"IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR\n",
27+
"OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,\n",
28+
"ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR\n",
29+
"OTHER DEALINGS IN THE SOFTWARE.\n",
30+
"\n",
31+
"For more information, please refer to <https://unlicense.org>"
32+
]
33+
},
334
{
435
"cell_type": "code",
5-
"execution_count": 1,
36+
"execution_count": null,
637
"id": "8737b6b4-a572-46b2-bd22-b70cfccb501d",
738
"metadata": {},
839
"outputs": [],
940
"source": [
10-
"from pathlib import Path\n",
11-
"import re\n",
12-
"import json\n",
13-
"import sys"
41+
"from pathlib import Path # Importing Path class from pathlib for file path manipulations\n",
42+
"import re # Importing re for regular expression operations\n",
43+
"import json # Importing json for handling JSON data\n",
44+
"import sys # Importing sys for command line argument handling"
1445
]
1546
},
1647
{
@@ -19,43 +50,47 @@
1950
"id": "8373a7f1-49fe-4279-8a35-fadb413acde0",
2051
"metadata": {},
2152
"outputs": [],
22-
"source": []
53+
"source": [
54+
"def show_help():\n",
55+
" \"\"\"Display the help message for the script.\"\"\"\n",
56+
" print(\"Usage:\")\n",
57+
" print(\"./ParsingMetadataMD2JSON [options] <input file>...\")\n",
58+
" print(\"Options:\")\n",
59+
" print(\"--help Show this screen.\")\n",
60+
" print()"
61+
]
2362
},
2463
{
2564
"cell_type": "code",
26-
"execution_count": 2,
65+
"execution_count": null,
2766
"id": "ee24ff0d-07b7-4b21-8f38-211783deb980",
2867
"metadata": {},
2968
"outputs": [],
3069
"source": [
31-
"\"\"\"\n",
32-
"Usage:\n",
33-
" ./ParsingMetadataMD2JSON [options] <input file>...\n",
34-
"Options:\n",
35-
" --help Show this screen.\n",
36-
"\"\"\"\n",
37-
"\n",
3870
"def main() -> int:\n",
39-
" # Reads in a MarkDown M_datafile.md parsing every section\n",
40-
" # into a separate JSON M_datafile.json file\n",
71+
" \"\"\"\n",
72+
" Reads in a MarkDown M_datafile.md parsing every section\n",
73+
" into a separate JSON M_datafile.json file\n",
74+
" \"\"\"\n",
4175
"\n",
4276
" # Get command line arguments for file\n",
4377
" args = sys.argv[1:]\n",
4478
" \n",
79+
" # Check for help option\n",
80+
" if '--help' in args:\n",
81+
" show_help() # Call the help function\n",
82+
" return 0 # Exit after showing help\n",
83+
" \n",
4584
" # Get the file to convert\n",
4685
" try:\n",
4786
" md_path = Path(args[0])\n",
4887
" if not md_path.exists():\n",
4988
" raise FileNotFoundError(f\"{md_path} does not exist.\")\n",
5089
" except IndexError:\n",
51-
" print(\"Usage:\")\n",
52-
" print(\"./ParsingMetadataMD2JSON [options] <input file>...\")\n",
53-
" print(\"Options:\")\n",
54-
" print(\"--help Show this screen.\")\n",
55-
" print()\n",
56-
" #raise IndexError(f\"Expected file name as first argument\")\n",
90+
" # Handle the case where no file is provided\n",
5791
" print(f\"Expected file name as first argument...EXITING...\")\n",
58-
" sys.exit(1) # something went wrong\n",
92+
" show_help() # Show help message\n",
93+
" sys.exit(1) # Exit the program with an error code as something went wrong\n",
5994
" \n",
6095
" with open(str(md_path), \"r\", encoding=\"utf-8\") as f:\n",
6196
" contents = f.read()#.replace(\"\\r\", \"\") # Remove Windows style line breaks\n",
@@ -68,14 +103,15 @@
68103
" with open(Path(args[0][:-len(\".md\")] + \".json\"), 'w', encoding='utf-8') as f:\n",
69104
" json.dump(dictionaryJSON, f, ensure_ascii=False, indent=4)\n",
70105
" \n",
106+
" # Print success message indicating the output file\n",
71107
" print(\"SUCCESS: \" + str(Path(args[0]))+\" parsed to \" + str(Path(args[0][:-len(\".md\")] + \".json\")) )\n",
72108
"\n",
73109
" return 0 # success\n"
74110
]
75111
},
76112
{
77113
"cell_type": "code",
78-
"execution_count": 3,
114+
"execution_count": null,
79115
"id": "650d3f63-758a-49b6-9663-338dcc47f653",
80116
"metadata": {},
81117
"outputs": [],
@@ -96,7 +132,7 @@
96132
},
97133
{
98134
"cell_type": "code",
99-
"execution_count": 4,
135+
"execution_count": null,
100136
"id": "f9f69886-9c77-4017-90bb-6f9e9ee82d42",
101137
"metadata": {},
102138
"outputs": [],
@@ -106,22 +142,22 @@
106142
" # change, amend, or replace to your needs\n",
107143
"\n",
108144
" # Initialize variables to store the JSON data\n",
109-
" M_title = {}\n",
110-
" M_creators = [] # can be a list\n",
111-
" M_publisher = {}\n",
112-
" M_contributors = [] # can be a list\n",
113-
" M_description = {}\n",
114-
" M_subjects = [] # a list of keywords\n",
115-
" M_date = {}\n",
116-
" M_language = [] # a list of languages\n",
117-
" M_formats = [] # a list of file formats\n",
118-
" M_type = {}\n",
119-
" M_coverage = {}\n",
120-
" M_identifier = {}\n",
121-
" M_methods = []\n",
122-
" M_sources = []\n",
123-
" M_relations = []\n",
124-
" M_rights = []\n",
145+
" M_title = {} # the title of entry\n",
146+
" M_creators = [] # can be a list of creators\n",
147+
" M_publisher = {} # the publisher\n",
148+
" M_contributors = [] # can be a list of contributors\n",
149+
" M_description = {} # the description of entry\n",
150+
" M_subjects = [] # a list of keywords\n",
151+
" M_date = {} # the last-changed date\n",
152+
" M_language = [] # a list of languages\n",
153+
" M_formats = [] # a list of file formats\n",
154+
" M_type = {} # the type of entry\n",
155+
" M_coverage = {} # the period of time\n",
156+
" M_identifier = {} # unique identifier (PID)\n",
157+
" M_methods = [] # a list of methods for creating the entry\n",
158+
" M_sources = [] # a list of sources of the entry\n",
159+
" M_relations = [] # a list of relations about the entry\n",
160+
" M_rights = [] # a refernce of licence statements\n",
125161
" \n",
126162
" \n",
127163
" for index, entry in enumerate(contentdictionary):\n",
@@ -270,18 +306,10 @@
270306
},
271307
{
272308
"cell_type": "code",
273-
"execution_count": 5,
309+
"execution_count": null,
274310
"id": "32674c9d-4060-4fcb-bfe0-97b922d64227",
275311
"metadata": {},
276-
"outputs": [
277-
{
278-
"name": "stdout",
279-
"output_type": "stream",
280-
"text": [
281-
"SUCCESS: M_Dataset_README_Example.md parsed to M_Dataset_README_Example.json\n"
282-
]
283-
}
284-
],
312+
"outputs": [],
285313
"source": [
286314
"%%python3 ParsingMetadataMD2JSON.py ./M_Dataset_README_Example.md\n",
287315
"# cell magic to fake command line execution\n",

ParsingMetadataMD2JSON.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,6 @@
2828
For more information, please refer to <https://unlicense.org>
2929
"""
3030

31-
from pathlib import Path
32-
import re
33-
import json
34-
import sys
35-
3631
from pathlib import Path # Importing Path class from pathlib for file path manipulations
3732
import re # Importing re for regular expression operations
3833
import json # Importing json for handling JSON data
@@ -42,7 +37,7 @@
4237
def show_help():
4338
"""Display the help message for the script."""
4439
print("Usage:")
45-
print("./ParsingMetadataMD2JSON [options] <input file>...")
40+
print("./ParsingMetadataMD2JSON [options] <input file.md>...")
4641
print("Options:")
4742
print("--help Show this screen.")
4843
print()
@@ -61,6 +56,13 @@ def main() -> int:
6156
show_help() # Call the help function
6257
return 0 # Exit after showing help
6358

59+
# Check for unknown arguments
60+
for arg in args:
61+
if not arg.startswith('--help') and not arg.endswith('.md'):
62+
print(f"Error: Unknown argument '{arg}'.")
63+
show_help()
64+
sys.exit(1)
65+
6466
# Get the file to convert
6567
try:
6668
md_path = Path(args[0])
@@ -72,6 +74,7 @@ def main() -> int:
7274
show_help() # Show help message
7375
sys.exit(1) # Exit the program with an error code as something went wrong
7476

77+
# read the file to convert and parse
7578
with open(str(md_path), "r", encoding="utf-8") as f:
7679
contents = f.read()#.replace("\r", "") # Remove Windows style line breaks
7780

0 commit comments

Comments
 (0)