Skip to content
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,8 @@ nosetests.xml
.mr.developer.cfg
.project
.pydevproject

# drandreaskrueger:
env
twitterapi-oauth.txt
photos/
29 changes: 28 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ For help:

Authentication
--------------
See TwitterAPI documentation.
See TwitterAPI documentation:

* ['create your app'](https://apps.twitter.com)
* [file format](http://pythonhosted.org/TwitterAPI/twitteroauth.html )
* [explanations](https://developer.twitter.com/en/docs/basics/getting-started)

Geocoder
--------
Expand All @@ -44,3 +48,26 @@ Dependencies
* TwitterAPI
* pygeocoder
* Fridge

Install
-------

python3 -m venv env/py3
source env/py3/bin/activate
pip3 install --upgrade setuptools wheel
pip3 install fridge pygeocoder TwitterAPI
python3 setup.py build
python3 setup.py install

or python2.7

deactivate
virtualenv env/py2
python --version
source env/py2/bin/activate
pip install --upgrade setuptools wheel
pip install fridge pygeocoder TwitterAPI
python setup.py build
python setup.py install


57 changes: 41 additions & 16 deletions TwitterGeoPics/SearchOldTweets.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,40 @@
import sys
from TwitterAPI import TwitterAPI, TwitterOAuth, TwitterRestPager
import urllib

import datetime

GEO = Geocoder()

def parse_date(status):
"""
expects date in this strange format: Sun Nov 05 17:14:42 +0000 2017
FIXME: try with other twitter timezones please. Might need %z ?
TODO: Ending downloads as soon as cutoff datetime is reached?
"""
return datetime.datetime.strptime(status['created_at'],
'%a %b %d %H:%M:%S +0000 %Y')

def unique_name(status):
"""
Unique filename for images, concatenating screen_name and timestamp
"""
screen_name = status['user']['screen_name']
when = parse_date(status).strftime('%Y%m%d-%H%M%S')
# file_name = screen_name + "_" + when
# file_name = when + "_" + screen_name
file_name = when + "_" + screen_name
return file_name

def download_photo(status, photo_dir):
"""Download photo(s) from embedded url(s)."""
if 'media' in status['entities']:
for media in status['entities'].get('media'):
if media['type'] == 'photo':
file_name = unique_name(status)
photo_url = media['media_url_https']
screen_name = status['user']['screen_name']
file_name = os.path.join(photo_dir, screen_name) + '.' + photo_url.split('.')[-1]
urllib.urlretrieve(photo_url, file_name)
file_name += '.' + photo_url.split('.')[-1]
urllib.urlretrieve(photo_url, os.path.join(photo_dir, file_name))
print ("IMAGE: %s" % file_name)


def lookup_geocode(status):
Expand All @@ -37,16 +57,20 @@ def lookup_geocode(status):
print('GEOCODER QUOTA EXCEEDED: %s' % GEO.count_request)


def process_tweet(status, photo_dir, stalk):
print('\n%s: %s' % (status['user']['screen_name'], status['text']))
print(status['created_at'])
if photo_dir:
download_photo(status, photo_dir)
if stalk:
lookup_geocode(status)
def process_tweet(status, photo_dir, stalk, no_images_of_retweets):
print('\nUSER: %s\nTWEET: %s' % (status['user']['screen_name'], status['text']))
print('DATE: %s' % status['created_at'])

try:
if photo_dir and not (no_images_of_retweets and status.has_key('retweeted_status')):
download_photo(status, photo_dir)
if stalk:
lookup_geocode(status)
except Exception as e:
print ("ALERT exception ignored: %s %s" % (type(e), e))


def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count):
def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, no_images_of_retweets, count):
"""Get tweets containing any words in 'word_list'."""
words = ' OR '.join(word_list)
params = {'q':words, 'count':count}
Expand All @@ -57,7 +81,7 @@ def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count):
for item in pager.get_iterator():
if 'text' in item:
if not no_retweets or not item.has_key('retweeted_status'):
process_tweet(item, photo_dir, stalk)
process_tweet(item, photo_dir, stalk, no_images_of_retweets)
elif 'message' in item:
if item['code'] == 131:
continue # ignore internal server error
Expand All @@ -79,7 +103,8 @@ def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count):
parser.add_argument('-count', type=int, default=15, help='download batch size')
parser.add_argument('-location', type=str, help='limit tweets to a place')
parser.add_argument('-oauth', metavar='FILENAME', type=str, help='read OAuth credentials from file')
parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets')
parser.add_argument('-no_retweets', action='store_true', help='exclude re-tweets completely')
parser.add_argument('-no_images_of_retweets', action='store_true', help='exclude re-tweet images')
parser.add_argument('-photo_dir', metavar='DIRECTORYNAME', type=str, help='download photos to this directory')
parser.add_argument('-stalk', action='store_true', help='print tweet location')
parser.add_argument('-words', metavar='W', type=str, nargs='+', help='word(s) to search')
Expand All @@ -98,10 +123,10 @@ def search_tweets(api, word_list, photo_dir, region, stalk, no_retweets, count):
print('Google found region at %f,%f with a radius of %s km' % (lat, lng, radius))
else:
region = None
search_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets, args.count)
search_tweets(api, args.words, args.photo_dir, region, args.stalk, args.no_retweets, args.no_images_of_retweets, args.count)
except KeyboardInterrupt:
print('\nTerminated by user\n')
except Exception as e:
print('*** STOPPED %s\n' % e)
print('*** STOPPED %s %s\n' % (type(e), e))

GEO.print_stats()