-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcloudcrate.py
More file actions
340 lines (275 loc) · 13.6 KB
/
cloudcrate.py
File metadata and controls
340 lines (275 loc) · 13.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
#!/usr/bin/python
# https://docs.python.org/2/library/os.html#os.listdir
# Getting Started with AWS - https://aws.amazon.com/articles/3998
# Introduction to boto - http://boto.cloudhackers.com/en/latest/
# How to Install boto - http://stackoverflow.com/questions/2481287/how-do-i-install-boto
# https://ariejan.net/2010/12/24/public-readable-amazon-s3-bucket-policy/
# http://aws.amazon.com/code/Amazon-S3/1713
print """
____ _ _ ____ _
/ ___| | ___ _ _ __| | / ___|_ __ __ _| |_ ___
| | | |/ _ \| | | |/ _` | | | | '__/ _` | __/ _
| |___| | (_) | |_| | (_| | | |___| | | (_| | || __/
\____|_|\___/ \__,_|\__,_| \____|_| \__,_|\__\___|
Usage : python cloudcrate.py
============================
Available tasks:
setup ................ If you are using cloudcrate for the first time , Run 'cloudcrate setup' to install all dependencies.
command: python cloudcrate.py setup
sync ................ Run Sync from the cloudcrate folder to sync to the cloud
command: python cloudcrate.py sync
download ................ Run Download and all files are automatically downloaded to folder s3_downloads on the Desktop
command: python cloudcrate.py download
"""
import sys
import os
from datetime import datetime
from collections import defaultdict
try:
task = str(sys.argv[1])
#args = str(sys.argv[2])
except IndexError:
print "Enter one of the above command line arguments "
sys.exit(1)
#==========================================================================================================================
# this is the first option in the Available tasks list . If the host does not have boto installed on it , this part of the
# code , goes ahead and installs boto . This is an expected scenario , most of the hosts will not have boto
# Also , I inititally had a weblink and thought of downloading from there
# Hiccup # 1 : I discovered wget is not default on OSX.
# Hiccup # 2 : Was running into firewall issues , when trying to download from begind the wall.
# The solution was to bundle all of the required libraries (ie) boto files for download and then install it locally.
#==========================================================================================================================
if task == 'setup' :
try:
import boto
print "=================================================================================="
print "All required libraries have already been installed , proceed to sync"
print "Please note, your current folder is your 'cloudcrate' copy files in this and sync "
print "Example sync command : python cloudcrate.py sync"
print "For list of all commands,type : python cloudcrate.py "
print "=================================================================================="
except ImportError,e:
print "============================================================="
raw_input("Missing Libraries - Press Hit Enter to Install them")
print "============================================================="
print "Installing boto - python interface to Amazon S3"
print "============================================================="
os.system("tar -zxvf boto.0.tar.gz")
os.chdir("boto-2.34.0")
os.system("sudo python setup.py install ")
print "=================================================================================="
print "All required libraries have been installed, proceed to sync "
print "Please note, your current folder is your 'cloudcrate' copy files in this and sync "
print "Example sync command : python cloudcrate.py sync"
print "For list of all commands : python cloudcrate.py "
print "=================================================================================="
if task == 'sync' :
from boto.s3.connection import S3Connection
from boto.s3.key import Key
import json
import time
from time import mktime
from datetime import datetime
from subprocess import call
import subprocess
print "Establish connection to AWS S3"
conn = S3Connection('AKIAJHAZH5AVPWXOI4ZA', 'PG8BmISNsLWFN/8dZ8jBckmqU/Jq8nFJFVEORswL')
bucket = conn.create_bucket('cloudcrate.hari')
print "======================================"
print "====== Syncing Current Directory ====="
print "======================================"
path = os.path.dirname(os.path.realpath('cloudcrate.py')) + '/'
list_of_files = []
creation_time_dict = {}
for (path,dirs,l_of_f) in os.walk(path):
for name in l_of_f:
full_name = (os.path.join(path,name))
#print "file fullname = ", full_name
list_of_files.append(full_name)
for name in dirs:
full_name = (os.path.join(path,name))
print "dir fullname =" ,full_name
#list_of_files.extend[full_name]
list_of_files.append(full_name)
print "==========================================="
print "====== The creation time of the files ====="
print "==========================================="
for files in list_of_files:
command_to_run = str("mdls -name kMDItemFSCreationDate") + " " + files
process = subprocess.Popen(command_to_run.split(), stdout = subprocess.PIPE)
output = process.communicate()[0]
head, tail = os.path.split(files)
creation_time_dict[tail] = str(output)[24:28]
print creation_time_dict
json.dump(creation_time_dict, open("creation_time.txt",'w'))
#print "======================================"
#print "===== SOME LIST ======================"
#print "======================================"
#print list_of_files
print "======================================"
print "===== LIST OF FILES IN DIRECTORY======"
print "======================================"
print "the list object returned above is " , type(list_of_files)
try:
print "in try block - this would handle a resyn operation"
print os.path.exists("last_modified.txt")
last_modified_dict = json.load(open("last_modified.txt"))
for files in list_of_files:
#if not files.startswith('.'):
#print 'Working on file ' ,files
if (files not in last_modified_dict):
last_modified_dict[files] = os.path.getmtime(files)
#print "Missing file Added to dictionary is ", files
print "uploading ..from try block if" , files
k = Key(bucket)
head, tail = os.path.split(files)
k.key = tail
#print "The key is " , k.key
k.set_contents_from_filename(files)
json.dump(last_modified_dict, open("last_modified.txt",'w'))
elif (files in last_modified_dict) & (os.path.getmtime(files) > last_modified_dict[files]) :
last_modified_dict[files] = os.path.getmtime(files)
print "uploading ..from try block elif" , files
k = Key(bucket)
head, tail = os.path.split(files)
k.key = tail
print "The key is " , k.key
k.set_contents_from_filename(files)
json.dump(last_modified_dict, open("last_modified.txt",'w'))
else :
print "skipping file from try block else ",files
bucket.set_acl('public-read')
except IOError as e :
print e
print "In IO exception block - There was no last_modified.txt file"
last_modified_dict = defaultdict()
for files in list_of_files:
print "files = " , files
#print "list of files = " , list_of_files
last_modified_dict[files]= os.path.getmtime(files)
print last_modified_dict
json.dump(last_modified_dict, open("last_modified.txt",'w'))
for files in list_of_files:
#if not files.startswith('.'):
print 'uploading file from IOError Exception' ,files
k = Key(bucket)
#k.key = files
head, tail = os.path.split(files)
k.key = tail
print "The key is " , k.key
k.set_contents_from_filename(files)
bucket.set_acl('public-read')
print "======================================================================="
print "visit http://cloudcrate.hari.s3.amazonaws.com/list.html to take a look at the bucket & uploaded files"
print "======================================================================="
if task == 'download' :
import json
from boto.s3.connection import S3Connection
from boto.s3.key import Key
download_last_modified_dict = {}
path = os.path.dirname(os.path.realpath('cloudcrate.py')) + '/'
print "Establishing connection to AWS"
conn = S3Connection('AKIAJHAZH5AVPWXOI4ZA', 'PG8BmISNsLWFN/8dZ8jBckmqU/Jq8nFJFVEORswL')
print "Connected,Getting bucket"
bucket = conn.get_bucket('cloudcrate.hari')
#os.mkdir('~/Desktop/downloaded/')
file_types_list =[]
creation_time_dict = {}
print " ==== Loading the json from the disk to memory =="
creation_time_dict = json.load(open("creation_time.txt"))
print "==========creation time dict looks as below ====="
for k,v in creation_time_dict.items():
print k,v
print "================================================="
if not os.path.exists(os.path.expanduser('~/Desktop/s3_downloads')):
print "===================================================="
print "Looks like you havent downloaded the files even once"
print "===================================================="
os.mkdir(os.path.expanduser('~/Desktop/s3_downloads/'))
print "Created a folder of name s3_downloads on your Desktop"
print "===================================================="
print "======= Creating the following local folders ======="
set_directories = set(creation_time_dict.values())
print set_directories
print "===================================================="
for items in set_directories:
os.mkdir(os.path.expanduser('~/Desktop/s3_downloads/'+items))
print "Created local folder of name ", items
# for key in bucket.list():
# download_last_modified_dict[key.name]= key.last_modified
# #print key.name
# #fileName, fileExtension = os.path.splitext(key.name)
# #print "file extension ==", fileExtension ,"file Name==" , fileName
# #print key.last_modified
# #print fileExtension[1:]
# try :
# if not os.path.exists(fileExtension[1:]):
# #print "inside loop that makes Directory based on fileEXT"
# #os.makedirs(fileExtension[1:])
# downloaded_file = key.get_contents_to_filename(key.name)
# print "Downloaded file from the if code block" , key.name
# except OSError as e:
# continue
# #os.chdir(fileExtension[1:])
# downloaded_file = key.get_contents_to_filename(key.name)
# print "Downloaded file from the if code block" , key.name
for key in bucket.list():
print "inside the download loop "
#print creation_time_dict[key.name]
download_last_modified_dict[key.name]= key.last_modified
print "Added a key to the last modified dictionary ==",key.last_modified
#key.name = path + key.name
print "Debug Message :", creation_time_dict[key.name]
if creation_time_dict[key.name] in set_directories:
os.chdir("/Users/Hari/Desktop/s3_downloads/" + creation_time_dict[key.name])
downloaded_file = key.get_contents_to_filename(key.name)
#print key.last_modified
print "Downloaded file from fresh download code block " , key.name
#print download_last_modified_dict
json.dump(download_last_modified_dict, open("download_last_modified.txt",'w'))
print set(creation_time_dict.values())
print "End of Download"
else:
print "============================================================================================="
print "the s3_downloads folder already exists , will now selectively download files into this folder"
print "============================================================================================="
os.chdir(os.path.expanduser('~/Desktop/s3_downloads'))
download_last_modified_dict = json.load(open("download_last_modified.txt"))
print "loaded json from file into memory and it looks like below", download_last_modified_dict
set_directories = set(creation_time_dict.values())
# for key in download_last_modified_dict:
# print download_last_modified_dict[key]
# print type(bucket.list())
# for key in bucket.list():
# #print type(key)
# print key.name, key.last_modified , download_last_modified_dict[key.name]
#try:
for key in bucket.list():
try:
if key.last_modified > download_last_modified_dict[key.name]:
#print "from S3 " ,key.last_modified ,"from file", download_last_modified_dict[key.name]
download_last_modified_dict[key.name]= key.last_modified
if creation_time_dict[key.name] in set_directories:
os.chdir("/Users/Hari/Desktop/s3_downloads/" + creation_time_dict[key.name])
downloaded_file = key.get_contents_to_filename(key.name)
print "Downloading file based on timestamp comparison",key.name
json.dump(download_last_modified_dict, open("download_last_modified.txt",'w'))
#downloaded_file = key.get_contents_to_filename(key.name)
else:
#print "Skipping download of file",key.name , "last updated time that I just read from S3",key.last_modified
print "Skipping download of file",key.name
json.dump(download_last_modified_dict, open("download_last_modified.txt",'w'))
except KeyError as e :
#print e
#print "KeyError" , key.name
download_last_modified_dict[key.name]=key.last_modified
#downloaded_file = key.get_contents_to_filename(key.name)
if creation_time_dict[key.name] in set_directories:
os.chdir("/Users/Hari/Desktop/s3_downloads/" + creation_time_dict[key.name])
downloaded_file = key.get_contents_to_filename(key.name)
print "Downloading file based on timestamp comparison",key.name
#print "Downloaded in the KeyError code block",key.name
#fileName, fileExtension = os.path.splitext(downloaded_file)
json.dump(download_last_modified_dict, open("download_last_modified.txt",'w'))
print set(creation_time_dict.values())
print "End of Download"