-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathebooks.py
More file actions
111 lines (85 loc) · 3.61 KB
/
ebooks.py
File metadata and controls
111 lines (85 loc) · 3.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from __future__ import print_function
import sys
import os
import tweepy
import HTMLParser
from pprint import pprint as print
html_parser = HTMLParser.HTMLParser()
def source_status_id(tweet):
return tweet.retweeted_status.id
def connect():
consumer_key = os.environ['TWITTER_CONSUMER_KEY']
consumer_secret = os.environ['TWITTER_CONSUMER_SECRET']
access_token = os.environ['TWITTER_ACCESS_TOKEN']
access_token_secret = os.environ['TWITTER_ACCESS_TOKEN_SECRET']
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
return tweepy.API(auth)
def retweeted(tweet):
return tweet.text.startswith('RT') and getattr(tweet,'retweeted_status', None)
if __name__=="__main__":
screen_name=os.environ['TWITTER_SOURCE_ACCOUNT']
api = connect()
# get most recent tweets of source account
source_tweets = api.user_timeline(
screen_name,
count=200,
trim_user=True,
exclude_replies=False,
include_rts=True,
)
# get most recent tweets of reposting account
ebooks_tweets = api.user_timeline(
screen_name="tef_ebooks",
count=200,
trim_user=True,
exclude_replies=False,
include_rts=True,
)
ebooks_retweets = [t for t in ebooks_tweets if retweeted(t)]
ebooks_tweets = [t for t in ebooks_tweets if not (retweeted(t) or t.text.startswith('@'))]
# get maximum date, id of ebooks tweets.
max_ebooks_id = max(tweet.id for tweet in ebooks_tweets)
max_ebooks_timestamp = max(tweet.created_at for tweet in ebooks_tweets)
ebooks_retweet_ids = set(source_status_id(tweet) for tweet in ebooks_retweets)
ebooks_tweet_ids = dict((tweet.text, tweet.id) for tweet in ebooks_tweets)
new_tweet_ids = {}
for tweet in source_tweets:
if not retweeted(tweet):
media = tweet._json['entities'].get('media')
if media:
for m in media:
source_url = m['media_url_https']
compressed_url = m['url']
tweet.text = tweet.text.replace(compressed_url, source_url)
if tweet.text in ebooks_tweet_ids:
new_tweet_ids[tweet.id] = ebooks_tweet_ids[tweet.text]
def old_tweet(tweet):
if retweeted(tweet):
return source_status_id(tweet) in ebooks_retweet_ids or ('@'+screen_name in tweet.text)
else:
return (
tweet.text.strip().startswith(('@','.','RT')) or
tweet.id < max_ebooks_id or
tweet.created_at < max_ebooks_timestamp or
tweet.text in ebooks_tweet_ids
)
# build a map of old tweet ids to new tweet ids
recent_source_tweets = sorted([tweet for tweet in source_tweets if not old_tweet(tweet)], key=lambda t: t.created_at)
if '--dry-run' in sys.argv:
for tweet in recent_source_tweets:
if retweeted(tweet):
print("retweet {!r}, #{!r}".format(tweet.text, source_status_id(tweet)))
else:
print("tweet:{!r} irt:{!r}".format(tweet.text, new_tweet_ids.get(tweet.in_reply_to_status_id)))
else:
for tweet in recent_source_tweets:
if retweeted(tweet):
try:
status = api.retweet(id=source_status_id(tweet))
except:
pass
else:
text = html_parser.unescape(tweet.text)
status = api.update_status(status=text, in_reply_to_status_id=new_tweet_ids.get(tweet.in_reply_to_status_id))
new_tweet_ids[tweet.id] = status.id