forked from Necoro/cddl
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscraper.py
More file actions
125 lines (99 loc) · 3.58 KB
/
scraper.py
File metadata and controls
125 lines (99 loc) · 3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import os
from time import sleep
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
# bank specific files
import configs.comdirect as comdirect
# ------------------------------------------------------------------------------
# Login and goto postbox
def login_with_browser(bank) -> webdriver:
# Prepare and open the chrome driver
chrome_options = Options()
chrome_options.add_experimental_option('prefs', {
"download.default_directory": bank.config.downloads,
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"plugins.always_open_pdf_externally": True
}
)
driver = webdriver.Chrome('/usr/bin/chromedriver', options=chrome_options)
driver.get(bank.login_url)
# Get needed elements
sleep(1)
login_field = bank.find_login_element(driver)
pin_field = bank.find_password_element(driver)
login_button = bank.find_login_button_element(driver)
# Click away the cook'header > div.loginlogout'ie button, maximum wait for 2 seconds
bank.accept_cookie(driver)
# check the elements exist
if login_field == None:
print("login_field not found")
if pin_field == None:
print("pin_field not found")
if login_button == None:
print("login_button not found")
login_field.send_keys(bank.config.login)
sleep(0.3)
pin_field.send_keys(bank.config.password)
sleep(0.3)
login_button.click()
sleep(1)
# Wait until photoTAN is used and accepted
cnt = 0
while True:
try:
bank.find_2fa_ready_element1(driver)
break
except:
cnt = cnt + 1
print('Wait for postbox {:3d} sec'.format(cnt))
sleep(1)
# Goto postbox
driver.get(bank.post_box_url)
# Wait until photoTAN is used and accepted
cnt = 0
while True:
try:
bank.find_2fa_ready_element2(driver)
break
except:
cnt = cnt + 1
print('Wait for postbox {:3d} sec'.format(cnt))
sleep(1)
# optional use archive
if bank.config.archive:
bank.navigate_to_archive(driver)
return driver
def scrap_pdfs(bank, driver: webdriver):
# read last pdf-url from file
current_head_file = bank.config.file_head
try:
# read id (first line) of doc
with open(current_head_file, 'r') as f:
last_known_id = f.readline().replace('\n', '').replace('\r', '')
except FileNotFoundError:
last_known_id = ''
print("last known pdf: '{:s}'".format(last_known_id))
# collect all files until last head file [id, name]
new_head_doc = bank.read_pages(driver, last_known_id)
# replace old head with new head?
if new_head_doc is not None:
# old to backup - should replace on unix
os.rename(current_head_file, current_head_file + '.bak')
# write new_head_pdf_url to file
try:
with open(current_head_file, 'w+') as f:
f.write('\n'.join(new_head_doc))
print("updated new head file to: {:s} - {:s}".format(new_head_doc[0], new_head_doc[1]))
except FileNotFoundError:
print('should never happen')
def logout_and_close(bank, driver: webdriver):
if bank.config.close:
bank.logout(driver)
sleep(1)
drv.close()
# ------------------------------------------------------------------------------
# Login and download comdirect
drv = login_with_browser(comdirect)
scrap_pdfs(comdirect, drv)
logout_and_close(comdirect, drv)