-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbot.py
More file actions
104 lines (82 loc) · 2.85 KB
/
bot.py
File metadata and controls
104 lines (82 loc) · 2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import urllib.request
from pdf2image import convert_from_path
import random
import os
import PyPDF2
import tweepy
from PIL import Image, ImageChops
media_ids = []
art_url = ""
date = ""
client = tweepy.Client(
consumer_key="",
consumer_secret="",
access_token="",
access_token_secret=""
)
auth = tweepy.OAuth1UserHandler(
consumer_key="",
consumer_secret="",
access_token="",
access_token_secret=""
)
api = tweepy.API(auth)
def trim(im):
bg = Image.new(im.mode, im.size, im.getpixel((0,0)))
diff = ImageChops.difference(im, bg)
diff = ImageChops.add(diff, diff, 2.0, -100)
bbox = diff.getbbox()
if bbox:
return im.crop(bbox)
def crop():
n = 0
for file in os.listdir('.'):
if file.endswith('.png'):
bg = Image.open(file) # The image to be cropped
w, h = bg.size
cropped = bg.crop((0, 150, w, h))
new_im = trim(cropped)
cropped_name = "cropped{}.png".format(n)
new_im.save(cropped_name)
os.remove(file)
def convert():
n = 0
pages = convert_from_path('pdf.pdf', 100)
for page in pages:
filename = 'out{}.png'.format(n)
page.save(filename, 'PNG')
n = n + 1
def scrape():
global art_url, date
article = random.randrange(1, 100)
day = random.randrange(1, 30)
month = random.randrange(1, 12)
year = random.randrange(1897, 1979)
url = "https://www.nli.org.il/en/newspapers/?a=is&oid=frw{}{}{}-01.2.{}&type=nlilogicalsectionpdf&e=-------en-20--1--img-txIN%7ctxTI--------------1".format(year, format(month, '02d'), format(day, '02d'), article )
art_url = "https://www.nli.org.il/en/newspapers/frw/{}/{}/{}/01/article/{}".format(year, format(month, '02d'), format(day, '02d'), article )
date = "{}/{}/{}".format(month, day, year)
print(date)
for file in os.listdir('.'):
if file.endswith('.png'):
os.remove(file)
for file in os.listdir('.'):
if file.endswith('.pdf'):
os.remove(file)
print(url)
urllib.request.urlretrieve(url, "pdf.pdf")
try:
PyPDF2.PdfFileReader(open("pdf.pdf", "rb"))
except PyPDF2.utils.PdfReadError:
scrape()
else:
convert()
scrape()
crop()
for file in os.listdir('.'):
if file.endswith('.png'):
res = api.media_upload(file)
media_ids.append(res.media_id)
status = date + "\n" + art_url
#post_result = api.update_status(status, media_ids=media_ids)
post_result = client.create_tweet(text=status, media_ids=media_ids)
print(date)