Skip to content

Commit 148be62

Browse files
committed
EDPOPS-321 include block count in md5
1 parent 036a049 commit 148be62

3 files changed

Lines changed: 11 additions & 5 deletions

File tree

solvebio/cli/data.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ def _object_exists(remote_parent, local_path, _client):
295295
return False
296296
else:
297297
# Check if the md5sum matches
298-
local_md5 = md5sum(local_path)[0]
298+
local_md5 = md5sum(local_path)
299299
remote_md5 = obj.get("md5")
300300
if remote_md5 and remote_md5 == local_md5:
301301
return True
@@ -831,7 +831,7 @@ def _download_recursive(
831831
# Skip over files that match remote md5 checksum
832832
if os.path.exists(local_path):
833833
remote_md5 = remote_file.get("md5")
834-
if remote_md5 and remote_md5 == md5sum(local_path)[0]:
834+
if remote_md5 and remote_md5 == md5sum(local_path):
835835
print("Skipping {} already in sync".format(local_path))
836836
continue
837837

solvebio/resource/object.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ def _upload_single_file(cls, obj, local_path, **kwargs):
600600
size = os.path.getsize(local_path)
601601

602602
# Get MD5 for single part upload
603-
local_md5, _ = md5sum(local_path, multipart_threshold=None)
603+
local_md5 = md5sum(local_path, multipart_threshold=None)
604604

605605
upload_url = obj.upload_url
606606

@@ -934,10 +934,16 @@ def _upload_single_part(
934934
if not chunk_data:
935935
break
936936

937+
def md5_base64(data):
938+
import hashlib
939+
md5 = hashlib.md5(data).digest()
940+
return base64.b64encode(md5).decode("utf-8")
941+
937942
# Upload without requests-level retry (let our custom retry handle it)
938943
session = requests.Session()
939944

940-
headers = {"Content-Length": str(len(chunk_data))}
945+
headers = {"Content-Length": str(len(chunk_data)),
946+
"ContentMD5": md5_base64(chunk_data)}
941947

942948
# Calculate timeout based on part size
943949
part_size_mb = len(chunk_data) / (1024 * 1024)

solvebio/utils/md5sum.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,4 @@ def _read_chunks(f, chunk_size):
3636
for block in _read_chunks(f, multipart_chunksize):
3737
md5.update(block)
3838

39-
return md5.hexdigest(), block_count
39+
return f"{md5.hexdigest()}-{block_count}" if block_count else md5.hexdigest()

0 commit comments

Comments
 (0)