-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathword_to_csv.py
More file actions
41 lines (34 loc) · 1.49 KB
/
word_to_csv.py
File metadata and controls
41 lines (34 loc) · 1.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
import csv
import logging
from docx import Document
logger = logging.getLogger(__name__)
def read_word_file(input_docx_file):
doc = Document(input_docx_file)
text = []
for para in doc.paragraphs:
if para.text.strip():
text.append(para.text)
return text
def parse_data_to_csv(input_docx_file):
try:
data = read_word_file(input_docx_file)
output_csv_file = os.path.splitext(input_docx_file)[0] + '.csv'
with open(output_csv_file, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
header = ["Potential Surplus", "Est. Resale Value", "Opening Bid", "Date Sold", "Case #",
"Parcel ID", "Type of Foreclosure", "First Name", "Last Name", "Mailing Address",
"Mailing City", "Mailing State", "Mailing Zip Code", "Property Address",
"Property City", "Property State", "Property Zip Code", "County"]
writer.writerow(header)
for line in data:
row = [item.strip() for item in line.split(',')]
if len(row) == len(header):
writer.writerow(row)
else:
logger.warning(f"Skipping invalid row: {line}")
logger.info(f"CSV file '{output_csv_file}' has been created successfully.")
return output_csv_file
except Exception as e:
logger.error(f"An error occurred while parsing the document: {str(e)}")
raise