diff --git a/addons/base/views.py b/addons/base/views.py index c1bb834b7b0..eb63448a331 100644 --- a/addons/base/views.py +++ b/addons/base/views.py @@ -66,6 +66,8 @@ from website.ember_osf_web.decorators import ember_flag_is_active from website.project.utils import serialize_node from website.util import rubeus, timestamp, inspect_info # noqa +import re +import json as _json from osf.features import ( SLOAN_COI_DISPLAY, @@ -433,6 +435,9 @@ def get_auth(auth, **kwargs): credentials = node.serialize_waterbutler_credentials(provider_name) waterbutler_settings = node.serialize_waterbutler_settings(provider_name) + if is_node_process and provider_settings: + waterbutler_settings['max_file_size'] = getattr(provider_settings.config, 'max_file_size', None) + if not is_node_process: # for only location_id value storage = ExportDataLocation.objects.get(pk=location_id) @@ -582,6 +587,33 @@ def create_waterbutler_log(payload, **kwargs): params=payload ) if payload.get('email') is True or payload.get('errors'): + # Parse structured error info from WaterButler exception repr strings + # e.g. "" + error_info = None + if payload.get('errors'): + for err_str in payload['errors']: + # Try to extract JSON payload embedded in the repr string + json_match = re.search(r'\((?:\d+),\s*(\{.*\})\)', err_str, re.DOTALL) + if json_match: + try: + err_data = _json.loads(json_match.group(1)) + message_key = err_data.get('message_key', '') + if message_key == 'quota_exceeded' or 'quota_exceeded' in err_str: + error_info = {'type': 'quota_exceeded'} + break + elif err_data.get('oversized_files'): + error_info = { + 'type': 'oversized', + 'oversized_files': err_data['oversized_files'], + 'max_size': err_data.get('max_size'), + } + break + except Exception as e: + logger.warning( + f'Failed to parse WaterButler error representation string: {err_str}. ' + f'Error: {e}' + ) + mails.send_mail( user.username, mails.FILE_OPERATION_FAILED if payload.get('errors') @@ -592,7 +624,8 @@ def create_waterbutler_log(payload, **kwargs): source_path=payload['source']['materialized'], source_addon=payload['source']['addon'], destination_addon=payload['destination']['addon'], - osf_support_email=settings.OSF_SUPPORT_EMAIL + osf_support_email=settings.OSF_SUPPORT_EMAIL, + error_info=error_info, ) if payload.get('errors'): # Action failed but our function succeeded diff --git a/addons/osfstorage/decorators.py b/addons/osfstorage/decorators.py index decf4489ee5..b946976f5a8 100644 --- a/addons/osfstorage/decorators.py +++ b/addons/osfstorage/decorators.py @@ -105,7 +105,8 @@ def wrapped(payload, *args, **kwargs): 'source': source, 'destination': dest_parent, 'name': payload['destination']['name'], - 'is_check_permission': is_check_permission + 'is_check_permission': is_check_permission, + 'replaced_size': int(payload.get('replaced_size', 0)), }) except KeyError: raise HTTPError(http_status.HTTP_400_BAD_REQUEST) diff --git a/addons/osfstorage/tests/test_views_copy_move_quota.py b/addons/osfstorage/tests/test_views_copy_move_quota.py new file mode 100644 index 00000000000..9e1c7a36e2d --- /dev/null +++ b/addons/osfstorage/tests/test_views_copy_move_quota.py @@ -0,0 +1,1123 @@ +# encoding: utf-8 +""" +Tests for new quota and FileInfo logic added to addons/osfstorage/views.py. + +Covers: + - _get_all_descendant_file_ids + - osfstorage_copy_hook (file + folder, FileInfo UPSERT, quota delta) + - osfstorage_move_hook (file + folder, intra-target replaced_size, inter-target quota) + - osfstorage_create_child (FileInfo UPSERT + quota delta on upload) + - osfstorage_delete (folder quota subtraction before deletion) + - TestUploadIntegration (full upload flow: create_child -> create_waterbutler_log) +""" +from __future__ import unicode_literals + +import time as time_module +import mock +import pytest +from nose.tools import assert_equal, assert_true, assert_false + +from framework.auth import signing +from website.util import api_url_for + +from addons.osfstorage.tests.utils import StorageTestCase, make_payload +from addons.osfstorage.tests import factories +from addons.osfstorage.tests.utils import recursively_create_file + +from osf.models import FileInfo, BaseFileNode +from osf_tests.factories import ProjectFactory + + +# --------------------------------------------------------------------------- +# Helper +# --------------------------------------------------------------------------- + +def _sign(payload): + """Return a signed copy of *payload* using the default signer.""" + return signing.sign_data(signing.default_signer, payload) + + +# --------------------------------------------------------------------------- +# Test helper: _get_all_descendant_file_ids +# --------------------------------------------------------------------------- + +@pytest.mark.django_db +class TestGetAllDescendantFileIds(StorageTestCase): + """Unit-tests for the private helper _get_all_descendant_file_ids.""" + + def _call(self, folder_node): + from addons.osfstorage.views import _get_all_descendant_file_ids + return _get_all_descendant_file_ids(folder_node) + + def test_empty_folder_returns_empty_list(self): + """An empty root folder (no children) must return [].""" + root = self.node_settings.get_root() + result = self._call(root) + assert_equal(result, []) + + def test_flat_folder_with_files(self): + """Files directly under a folder must all be returned.""" + root = self.node_settings.get_root() + f1 = root.append_file('alpha.txt') + f2 = root.append_file('beta.txt') + result = self._call(root) + assert set(result) == {f1.id, f2.id} + + def test_nested_folders_return_all_files(self): + """ + Structure: + root/ + sub1/ + deep.txt + file.txt + Both files must be returned, folder IDs must NOT be included. + """ + root = self.node_settings.get_root() + sub1 = root.append_folder('sub1') + deep = sub1.append_file('deep.txt') + flat = root.append_file('file.txt') + result = self._call(root) + assert set(result) == {deep.id, flat.id} + + def test_trashed_files_excluded(self): + """TrashedFile/TrashedFolder must not appear in the result.""" + root = self.node_settings.get_root() + live = root.append_file('live.txt') + dead = root.append_file('dead.txt') + # Soft-delete the file + dead.delete(user=self.user) + result = self._call(root) + assert live.id in result + assert dead.id not in result + + def test_only_osfstorage_provider(self): + """Files from other providers must be excluded (provider filter).""" + root = self.node_settings.get_root() + # append_file creates osfstorage files by default – just verify the + # provider guard does not block those + f = root.append_file('normal.txt') + result = self._call(root) + assert f.id in result + + +# --------------------------------------------------------------------------- +# Helpers shared by copy/move hook tests +# --------------------------------------------------------------------------- + +class HookTestBase(StorageTestCase): + """Base class providing signed hook helpers used by copy/move tests.""" + + def _post_hook(self, view_name, guid, payload): + return self.app.post_json( + api_url_for(view_name, guid=guid, **_sign(payload)), + {}, + expect_errors=True, + ) + + def _send_hook(self, view_name, guid_target, payload, target_node=None): + """Send a signed POST to *view_name* and return the response.""" + target_node = target_node or self.node + return self.app.post_json( + api_url_for(view_name, guid=guid_target), + signing.sign_data(signing.default_signer, payload), + expect_errors=True, + ) + + +# --------------------------------------------------------------------------- +# osfstorage_copy_hook – file copy +# --------------------------------------------------------------------------- +@pytest.mark.enable_implicit_clean +@pytest.mark.django_db +class TestCopyHookFileQuota(StorageTestCase): + """ + Tests for the FILE branch of osfstorage_copy_hook. + + New logic (views.py lines 159-208): + • Creates / updates a FileInfo record for the cloned file. + • Computes delta = new_size - replaced_size and calls update_quota. + """ + + def setUp(self): + super().setUp() + self.root_node = self.node_settings.get_root() + + def _copy_file(self, source_file, dest_folder, dest_target=None, replaced_size=0): + dest_target = dest_target or self.node + payload = { + 'source': source_file._id, + 'target': self.root_node._id, + 'user': self.user._id, + 'destination': { + 'parent': dest_folder._id, + 'target': dest_target._id, + 'name': source_file.name, + }, + } + if replaced_size: + payload['replaced_size'] = str(replaced_size) + # signed payload must go in the request BODY, not URL params + return self.app.post_json( + api_url_for('osfstorage_copy_hook', guid=self.node._id), + signing.sign_data(signing.default_signer, payload), + expect_errors=True, + ) + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_copy_file_creates_fileinfo(self, mock_st, mock_uq): + """After a file copy, a FileInfo record must exist for the clone.""" + version = factories.FileVersionFactory(size=500) + src = self.root_node.append_file('src.txt') + src.add_version(version) + src.save() + + dest_folder = self.root_node.append_folder('dest') + resp = self._copy_file(src, dest_folder) + assert resp.status_code == 201, f'copy hook returned {resp.status_code}: {resp.json}' + + # Exactly one FileInfo must exist for the cloned file (not the source) + cloned = dest_folder.find_child_by_name('src.txt') + assert cloned is not None + fi = FileInfo.objects.filter(file=cloned).first() + assert fi is not None + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_copy_file_quota_delta_positive(self, mock_st, mock_uq): + """ + When a file is copied WITHOUT a replaced file (replaced_size=0), + the full new_size is added to the quota. + """ + version = factories.FileVersionFactory(size=300) + src = self.root_node.append_file('data.bin') + src.add_version(version) + src.save() + + dest_folder = self.root_node.append_folder('out') + resp = self._copy_file(src, dest_folder, replaced_size=0) + assert resp.status_code == 201, f'copy hook returned {resp.status_code}: {resp.json}' + + # update_quota must have been called with add=True and the correct size + assert mock_uq.called + call_kwargs = mock_uq.call_args + # delta > 0 → add=True + assert call_kwargs[1].get('add', call_kwargs[0][-1]) is True + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_copy_file_no_quota_update_when_delta_zero(self, mock_st, mock_uq): + """ + When new_size == replaced_size the delta is 0, so update_quota must + NOT be called (views.py line 203: if delta != 0). + """ + version = factories.FileVersionFactory(size=100) + src = self.root_node.append_file('equal.txt') + src.add_version(version) + src.save() + + dest_folder = self.root_node.append_folder('same') + self._copy_file(src, dest_folder, replaced_size=100) + + assert not mock_uq.called + + +# --------------------------------------------------------------------------- +# osfstorage_copy_hook – folder copy +# --------------------------------------------------------------------------- + +@pytest.mark.enable_implicit_clean +@pytest.mark.django_db +class TestCopyHookFolderQuota(StorageTestCase): + """ + Tests for the FOLDER branch of osfstorage_copy_hook. + + New logic (views.py lines 165-198): + • Builds a source_size_map from FileInfo records (or falls back to + versions) for every descendant. + • Uses bulk_create (with IntegrityError fallback) to create FileInfo + records for the cloned folder's descendants. + • Forces replaced_size = 0 because osfstorage_delete has already + subtracted it. + """ + + def setUp(self): + super().setUp() + self.root_node = self.node_settings.get_root() + + def _copy_folder(self, source_folder, dest_folder, dest_target=None): + dest_target = dest_target or self.node + payload = { + 'source': source_folder._id, + 'target': self.root_node._id, + 'user': self.user._id, + 'destination': { + 'parent': dest_folder._id, + 'target': dest_target._id, + 'name': source_folder.name, + }, + } + # signed payload must go in the request BODY, not URL params + return self.app.post_json( + api_url_for('osfstorage_copy_hook', guid=self.node._id), + signing.sign_data(signing.default_signer, payload), + expect_errors=True, + ) + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_copy_folder_creates_fileinfo_for_descendants(self, mock_st, mock_uq): + """ + After copying a folder, every descendant FILE of the clone must have + a corresponding FileInfo record. + """ + # Build source folder with two files + src_folder = self.root_node.append_folder('src_folder') + v1 = factories.FileVersionFactory(size=200) + f1 = src_folder.append_file('one.txt') + f1.add_version(v1); f1.save() + + v2 = factories.FileVersionFactory(size=400) + f2 = src_folder.append_file('two.txt') + f2.add_version(v2); f2.save() + + # Pre-create FileInfo so source_size_map is populated + FileInfo.objects.update_or_create(file=f1, defaults={'file_size': 200}) + FileInfo.objects.update_or_create(file=f2, defaults={'file_size': 400}) + + dest_folder = self.root_node.append_folder('dest_folder') + resp = self._copy_folder(src_folder, dest_folder) + assert resp.status_code == 201, f'copy hook returned {resp.status_code}: {resp.json}' + + cloned = dest_folder.find_child_by_name('src_folder') + assert cloned is not None + + from addons.osfstorage.views import _get_all_descendant_file_ids + cloned_ids = _get_all_descendant_file_ids(cloned) + assert len(cloned_ids) == 2 + for fid in cloned_ids: + assert FileInfo.objects.filter(file_id=fid).exists(), \ + f'FileInfo missing for cloned file id={fid}' + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_copy_folder_fallback_when_fileinfo_missing(self, mock_st, mock_uq): + """ + If the source file has no FileInfo record, the code falls back to + the latest FileVersion.size. The clone must still get a FileInfo + record (possibly size=0 if no version exists either). + """ + src_folder = self.root_node.append_folder('no_fi_folder') + v = factories.FileVersionFactory(size=111) + f = src_folder.append_file('nofi.txt') + f.add_version(v); f.save() + # Intentionally do NOT create a FileInfo for f + + dest_folder = self.root_node.append_folder('dest_nofi') + self._copy_folder(src_folder, dest_folder) + + cloned = dest_folder.find_child_by_name('no_fi_folder') + from addons.osfstorage.views import _get_all_descendant_file_ids + cloned_ids = _get_all_descendant_file_ids(cloned) + # The clone must have a FileInfo record + for fid in cloned_ids: + assert FileInfo.objects.filter(file_id=fid).exists() + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + @mock.patch('addons.osfstorage.views._get_all_descendant_file_ids') + def test_copy_folder_none_copied_from_id_sets_size_zero(self, mock_descendants, mock_st, mock_uq): + """ + When a cloned file has copied_from_id=None (not in source_size_map), + the else branch (views.py: 'src_id is None and not in map') sets size=0 + and still creates a FileInfo record for that file. + """ + src_folder = self.root_node.append_folder('src_none_id') + v = factories.FileVersionFactory(size=100) + src_file = src_folder.append_file('src_none.txt') + src_file.add_version(v) + src_file.save() + FileInfo.objects.update_or_create(file=src_file, defaults={'file_size': 100}) + + dest_folder = self.root_node.append_folder('dest_none_id') + # A fresh file with no copied_from → copied_from_id will be None in the DB + orphan_file = dest_folder.append_file('orphan.txt') + orphan_file.save() + + # First call → source descendants; second call → cloned descendants (orphan) + mock_descendants.side_effect = [ + [src_file.id], # _get_all_descendant_file_ids(source) + [orphan_file.id], # _get_all_descendant_file_ids(cloned) + ] + + resp = self._copy_folder(src_folder, dest_folder) + assert resp.status_code == 201 + + # The else branch must have produced size=0 → FileInfo.file_size == 0 + fi = FileInfo.objects.filter(file=orphan_file).first() + assert fi is not None, 'FileInfo must be created even when copied_from_id is None' + assert fi.file_size == 0 + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_copy_folder_bulk_create_integrity_error_fallback(self, mock_st, mock_uq): + """ + When FileInfo.objects.bulk_create raises IntegrityError the except-block + must call get_or_create for every FileInfo in file_infos. + """ + from django.db import IntegrityError as DjangoIntegrityError + + src_folder = self.root_node.append_folder('src_ie_folder') + v = factories.FileVersionFactory(size=75) + src_file = src_folder.append_file('ie_file.txt') + src_file.add_version(v) + src_file.save() + FileInfo.objects.update_or_create(file=src_file, defaults={'file_size': 75}) + + dest_folder = self.root_node.append_folder('dest_ie_folder') + + original_get_or_create = FileInfo.objects.get_or_create + with mock.patch.object( + FileInfo.objects, 'bulk_create', + side_effect=DjangoIntegrityError('duplicate key') + ): + with mock.patch.object( + FileInfo.objects, 'get_or_create', + wraps=original_get_or_create + ) as spy_goc: + resp = self._copy_folder(src_folder, dest_folder) + + assert resp.status_code == 201 + assert spy_goc.called, ( + 'get_or_create must be called as the IntegrityError fallback' + ) + # One get_or_create call per cloned descendant file + from addons.osfstorage.views import _get_all_descendant_file_ids + cloned = dest_folder.find_child_by_name('src_ie_folder') + cloned_ids = _get_all_descendant_file_ids(cloned) + assert spy_goc.call_count == len(cloned_ids) + + +# --------------------------------------------------------------------------- +# osfstorage_move_hook – quota logic +# --------------------------------------------------------------------------- + +@pytest.mark.django_db +class TestMoveHookQuota(StorageTestCase): + """ + Tests for the quota-related additions in osfstorage_move_hook. + + New logic (views.py lines 231-258): + FILE move: + • FileInfo UPSERT (file=source, file_size=latest_version.size). + FOLDER move: + • Aggregates FileInfo for all descendants. + • replaced_size = 0 (osfstorage_delete already subtracted). + QUOTA UPDATE: + • inter-target: subtract from source, add delta to dest. + • intra-target with replaced_size > 0: subtract replaced_size only. + """ + + def setUp(self): + super().setUp() + self.root_node = self.node_settings.get_root() + + def _move(self, source, dest_folder, dest_target=None, replaced_size=None, + is_check_permission=None): + dest_target = dest_target or self.node + payload = { + 'source': source._id, + 'target': self.root_node._id, + 'user': self.user._id, + 'destination': { + 'parent': dest_folder._id, + 'target': dest_target._id, + 'name': source.name, + }, + } + if replaced_size is not None: + payload['replaced_size'] = str(replaced_size) + if is_check_permission is not None: + payload['is_check_permission'] = is_check_permission + # signed payload must go in the request BODY, not URL params + return self.app.post_json( + api_url_for('osfstorage_move_hook', guid=self.node._id), + signing.sign_data(signing.default_signer, payload), + expect_errors=True, + ) + + # --- FILE MOVE --- + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_move_file_upserts_fileinfo(self, mock_st, mock_uq): + """ + Moving a FILE must create/update a FileInfo record for that file + reflecting the latest version size. + """ + version = factories.FileVersionFactory(size=250) + src = self.root_node.append_file('move_me.txt') + src.add_version(version); src.save() + + dest_folder = self.root_node.append_folder('landing') + self._move(src, dest_folder) + + src.reload() + fi = FileInfo.objects.filter(file=src).first() + assert fi is not None + assert fi.file_size == 250 + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_move_file_inter_target_subtracts_source_quota(self, mock_st, mock_uq): + """ + Moving a FILE to a *different* target node must call update_quota + on the source target with add=False. + """ + other_project = ProjectFactory(creator=self.user) + other_root = other_project.get_addon('osfstorage').get_root() + dest_folder = other_root.append_folder('remote') + + version = factories.FileVersionFactory(size=150) + src = self.root_node.append_file('cross.txt') + src.add_version(version); src.save() + + self._move(src, dest_folder, dest_target=other_project) + + # At least one call to update_quota with add=False (source deduction) + assert mock_uq.called + subtract_calls = [c for c in mock_uq.call_args_list + if c[1].get('add', c[0][-1]) is False] + assert len(subtract_calls) >= 1 + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_move_file_intra_target_replaced_size_subtracted(self, mock_st, mock_uq): + """ + Moving a FILE within the SAME target node and providing replaced_size > 0 + must subtract replaced_size from the target (views.py line 255-257). + """ + version = factories.FileVersionFactory(size=100) + src = self.root_node.append_file('replace_target.txt') + src.add_version(version); src.save() + + dest_folder = self.root_node.append_folder('same_target_dest') + self._move(src, dest_folder, replaced_size=80) + + # update_quota must have been called (for the replace deduction) + assert mock_uq.called + subtract_calls = [c for c in mock_uq.call_args_list + if c[1].get('add', c[0][-1]) is False] + assert len(subtract_calls) >= 1 + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_move_file_intra_target_no_replace_no_quota_update(self, mock_st, mock_uq): + """ + Moving a FILE within the SAME target and replaced_size=0 must NOT + trigger any update_quota call. + """ + version = factories.FileVersionFactory(size=100) + src = self.root_node.append_file('same_no_replace.txt') + src.add_version(version); src.save() + + dest_folder = self.root_node.append_folder('inner') + self._move(src, dest_folder, replaced_size=0) + + assert not mock_uq.called + + # --- FOLDER MOVE --- + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_move_folder_aggregates_fileinfo(self, mock_st, mock_uq): + """ + Moving a FOLDER must aggregate FileInfo sizes for all descendants + (new_size) and pass that to update_quota on the source (add=False). + """ + other_project = ProjectFactory(creator=self.user) + other_root = other_project.get_addon('osfstorage').get_root() + dest_folder = other_root.append_folder('folder_dest') + + src_folder = self.root_node.append_folder('move_folder') + v1 = factories.FileVersionFactory(size=100) + f1 = src_folder.append_file('a.txt'); f1.add_version(v1); f1.save() + v2 = factories.FileVersionFactory(size=200) + f2 = src_folder.append_file('b.txt'); f2.add_version(v2); f2.save() + + FileInfo.objects.update_or_create(file=f1, defaults={'file_size': 100}) + FileInfo.objects.update_or_create(file=f2, defaults={'file_size': 200}) + + self._move(src_folder, dest_folder, dest_target=other_project) + + # update_quota must have been called for source deduction (300 total) + assert mock_uq.called + subtract_calls = [c for c in mock_uq.call_args_list + if c[1].get('add', c[0][-1]) is False] + assert len(subtract_calls) >= 1 + # The size passed must match the aggregate (300) + sizes = [c[0][1] for c in subtract_calls] + assert 300 in sizes + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_move_folder_replaced_size_zeroed(self, mock_st, mock_uq): + """ + When moving a folder, replaced_size must be set to 0 before computing + delta (osfstorage_delete already subtracted it). Therefore, if the + caller passes a non-zero replaced_size for a folder move the effective + delta equals new_size, NOT new_size - replaced_size. + """ + other_project = ProjectFactory(creator=self.user) + other_root = other_project.get_addon('osfstorage').get_root() + dest_folder = other_root.append_folder('folder_dest2') + + src_folder = self.root_node.append_folder('move_folder2') + v = factories.FileVersionFactory(size=300) + f = src_folder.append_file('c.txt'); f.add_version(v); f.save() + FileInfo.objects.update_or_create(file=f, defaults={'file_size': 300}) + + # Pass a non-zero replaced_size – it must be ignored for folders + self._move(src_folder, dest_folder, dest_target=other_project, replaced_size=999) + + assert mock_uq.called + # The delta added to dest must be 300 (not 300-999=-699) + add_calls = [c for c in mock_uq.call_args_list + if c[1].get('add', c[0][-1]) is True] + sizes_added = [c[0][1] for c in add_calls] + assert 300 in sizes_added + + +# --------------------------------------------------------------------------- +# osfstorage_create_child – FileInfo UPSERT + quota delta +# --------------------------------------------------------------------------- + +@pytest.mark.django_db +class TestCreateChildQuota(StorageTestCase): + """ + Tests for the FileInfo / quota logic added to osfstorage_create_child. + + New logic (views.py lines 489-504): + • Computes new_size from the new FileVersion. + • Reads old_size from existing FileInfo (or 0 if none). + • UPSERT FileInfo with new_size. + • Calls update_quota(target, abs(delta), storage_type, add=(delta > 0)) + only when delta != 0. + """ + + def setUp(self): + super().setUp() + self.root_node = self.node_settings.get_root() + + def _upload(self, parent, name, size=123, unique_hash=None, target=None): + """ + Build and POST an upload payload. + + *unique_hash*: when provided, overrides ``metadata['name']`` so that + each call produces a distinct location-hash and is NOT treated as a + duplicate by ``FileVersion.is_duplicate``. Pass a different string on + each call to simulate a genuine new version. + """ + target = target or self.project + payload = make_payload(user=self.user, name=name) + payload['metadata']['size'] = size + if unique_hash is not None: + # Changing metadata['name'] changes the location['object'] value + # which changes the location_hash → not a duplicate + payload['metadata']['name'] = unique_hash + return self.app.post_json( + api_url_for( + 'osfstorage_create_child', + fid=parent._id, + guid=target._id, + ), + signing.sign_data(signing.default_signer, payload), + expect_errors=True, + ) + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_upload_new_file_creates_fileinfo(self, mock_st, mock_uq): + """First upload of a file must create a FileInfo record with the correct size.""" + res = self._upload(self.root_node, 'brand_new.txt', size=500) + assert res.status_code in (200, 201) + record = self.root_node.find_child_by_name('brand_new.txt') + fi = FileInfo.objects.filter(file=record).first() + assert fi is not None + assert fi.file_size == 500 + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_upload_new_file_calls_update_quota(self, mock_st, mock_uq): + """First upload (old_size=0) must call update_quota with add=True.""" + res = self._upload(self.root_node, 'new_quota.txt', size=300) + assert res.status_code in (200, 201) + assert mock_uq.called + call_kwargs = mock_uq.call_args + assert call_kwargs[1].get('add', call_kwargs[0][-1]) is True + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_upload_update_file_updates_fileinfo(self, mock_st, mock_uq): + """ + Re-uploading (new version) of the same file must update the FileInfo + to the new size. + + We pass unique_hash on each call so that the two uploads produce + different location hashes and are NOT treated as duplicates. Without + this, create_version returns the existing version (size=100) even when + we request size=200. + """ + # First upload + self._upload(self.root_node, 'versioned.txt', size=100, unique_hash='hash_v1') + record = self.root_node.find_child_by_name('versioned.txt') + + # Second upload – new hash forces a real new version with size=200 + mock_uq.reset_mock() + self._upload(self.root_node, 'versioned.txt', size=200, unique_hash='hash_v2') + record.reload() + fi = FileInfo.objects.filter(file=record).first() + assert fi is not None + assert fi.file_size == 200 + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_upload_update_file_quota_delta(self, mock_st, mock_uq): + """ + When a file is updated with a genuinely new version, delta = new_size - old_size. + update_quota must be called with abs(delta) and add=True when size increases. + """ + # First upload sets old_size = 100 + self._upload(self.root_node, 'delta_test.txt', size=100, unique_hash='hash_a') + mock_uq.reset_mock() + + # Second upload: unique hash → real new version, new_size=250, delta=150 + self._upload(self.root_node, 'delta_test.txt', size=250, unique_hash='hash_b') + + assert mock_uq.called + call_kwargs = mock_uq.call_args + size_arg = call_kwargs[0][1] + add_arg = call_kwargs[1].get('add', call_kwargs[0][-1]) + assert size_arg == 150 + assert add_arg is True + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_upload_same_size_no_quota_update(self, mock_st, mock_uq): + """ + If the new version has the same size as the old version, delta=0 and + update_quota must NOT be called a second time. + """ + self._upload(self.root_node, 'same_size.txt', size=100) + mock_uq.reset_mock() + + # Upload again with the same size + self._upload(self.root_node, 'same_size.txt', size=100) + + # delta == 0 → update_quota should NOT be called + assert not mock_uq.called + + +# --------------------------------------------------------------------------- +# osfstorage_delete – folder quota subtraction +# --------------------------------------------------------------------------- + +@pytest.mark.django_db +class TestDeleteHookFolderQuota(StorageTestCase): + """ + Tests for the folder-quota logic added to osfstorage_delete. + + New logic (views.py lines 533-542): + • Collects _get_all_descendant_file_ids for the folder. + • Sums FileInfo.file_size for those files. + • Zeroes the FileInfo records BEFORE deletion (race-condition guard). + • Calls update_quota(target, total_size, storage_type, add=False). + """ + + def setUp(self): + super().setUp() + self.root_node = self.node_settings.get_root() + + def _delete(self, file_node): + payload = {'user': self.user._id} + return self.app.delete( + '{url}?payload={payload}&signature={signature}'.format( + url=api_url_for( + 'osfstorage_delete', + guid=self.node._id, + fid=file_node._id, + ), + **signing.sign_data(signing.default_signer, payload) + ), + expect_errors=True, + ) + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_delete_folder_subtracts_quota(self, mock_st, mock_uq): + """ + Deleting a folder with 2 files (total=300 bytes) must call + update_quota with size=300 and add=False. + """ + folder = self.root_node.append_folder('big_folder') + v1 = factories.FileVersionFactory(size=100) + f1 = folder.append_file('x.txt'); f1.add_version(v1); f1.save() + v2 = factories.FileVersionFactory(size=200) + f2 = folder.append_file('y.txt'); f2.add_version(v2); f2.save() + + FileInfo.objects.update_or_create(file=f1, defaults={'file_size': 100}) + FileInfo.objects.update_or_create(file=f2, defaults={'file_size': 200}) + + resp = self._delete(folder) + assert resp.status_code == 200 + + assert mock_uq.called + call_args = mock_uq.call_args + size_arg = call_args[0][1] + add_arg = call_args[1].get('add', call_args[0][-1]) + assert size_arg == 300 + assert add_arg is False + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_delete_folder_zeroes_fileinfo_before_deletion(self, mock_st, mock_uq): + """ + The FileInfo records must be zeroed BEFORE the hard deletion of the + folder to prevent race conditions (views.py line 541). + """ + folder = self.root_node.append_folder('zeroed_folder') + v = factories.FileVersionFactory(size=500) + f = folder.append_file('z.txt'); f.add_version(v); f.save() + FileInfo.objects.update_or_create(file=f, defaults={'file_size': 500}) + + # We need to capture the FileInfo state at the moment update_quota is + # called – at that point file_size must already be 0. + recorded_file_size = [] + + def side_effect(target, size, storage_type, add): + fi = FileInfo.objects.filter(file=f).first() + if fi: + recorded_file_size.append(fi.file_size) + + mock_uq.side_effect = side_effect + + self._delete(folder) + + assert len(recorded_file_size) == 1 + assert recorded_file_size[0] == 0, ( + 'FileInfo.file_size must be 0 when update_quota is called, ' + f'but was {recorded_file_size[0]}' + ) + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_delete_empty_folder_no_quota_update(self, mock_st, mock_uq): + """ + Deleting an empty folder (total_size=0) must NOT call update_quota + (views.py line 538: if total_size > 0). + """ + folder = self.root_node.append_folder('empty_folder') + resp = self._delete(folder) + assert resp.status_code == 200 + assert not mock_uq.called + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_delete_folder_nested_files_quota(self, mock_st, mock_uq): + """ + Nested files (folder-in-folder) must ALL be counted when computing + the quota deduction. + """ + outer = self.root_node.append_folder('outer') + inner = outer.append_folder('inner') + + v1 = factories.FileVersionFactory(size=50) + f1 = inner.append_file('deep1.txt'); f1.add_version(v1); f1.save() + v2 = factories.FileVersionFactory(size=150) + f2 = inner.append_file('deep2.txt'); f2.add_version(v2); f2.save() + + FileInfo.objects.update_or_create(file=f1, defaults={'file_size': 50}) + FileInfo.objects.update_or_create(file=f2, defaults={'file_size': 150}) + + resp = self._delete(outer) + assert resp.status_code == 200 + + assert mock_uq.called + size_arg = mock_uq.call_args[0][1] + assert size_arg == 200 # 50 + 150 + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + def test_delete_file_does_not_trigger_folder_quota_path(self, mock_st, mock_uq): + """ + Deleting a single FILE must NOT go through the folder-quota path. + The new code block is guarded by `not file_node.is_file`. + """ + v = factories.FileVersionFactory(size=777) + f = self.root_node.append_file('single.txt') + f.add_version(v); f.save() + FileInfo.objects.update_or_create(file=f, defaults={'file_size': 777}) + + resp = self._delete(f) + assert resp.status_code == 200 + + # update_quota must NOT have been called by the folder-quota path. + # (The existing file-quota path in views.py does NOT call update_quota + # on delete; that is handled by the WaterButler hook elsewhere.) + assert not mock_uq.called + + +# --------------------------------------------------------------------------- +# Integration: full upload flow – osfstorage_create_child + create_waterbutler_log +# --------------------------------------------------------------------------- + +@pytest.mark.django_db +class TestUploadIntegration(StorageTestCase): + """ + Integration tests simulating the complete OSF upload flow. + + Step 1 – osfstorage_create_child: + WaterButler calls this endpoint after writing the file to storage. + Creates a FileInfo record and calls update_quota ONCE. + + Step 2 – create_waterbutler_log: + WaterButler calls this endpoint to record the log. + Fires file_signals.file_updated → update_used_quota. + The skip logic (provider == 'osfstorage') prevents update_quota + from being called a SECOND time (Critical 1 fix). + """ + + def setUp(self): + super().setUp() + self.root_node = self.node_settings.get_root() + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + def _upload(self, parent, name, size=123, unique_hash=None): + """POST to osfstorage_create_child (WaterButler upload callback).""" + payload = make_payload(user=self.user, name=name) + payload['metadata']['size'] = size + if unique_hash is not None: + # Different hash → genuinely new FileVersion (not treated as duplicate) + payload['metadata']['name'] = unique_hash + return self.app.post_json( + api_url_for( + 'osfstorage_create_child', + fid=parent._id, + guid=self.project._id, + ), + signing.sign_data(signing.default_signer, payload), + expect_errors=True, + ) + + def _wb_log(self, file_id, name, size, action='create'): + """PUT to create_waterbutler_log (WaterButler log callback).""" + options = { + 'auth': {'id': self.user._id}, + 'action': action, + 'provider': 'osfstorage', + 'metadata': { + 'provider': 'osfstorage', + 'name': name, + 'materialized': '/' + file_id, + 'path': '/' + file_id, + 'kind': 'file', + 'size': size, + 'created_utc': '', + 'modified_utc': '', + 'extra': {'version': '1'}, + }, + 'time': time_module.time() + 1000, + } + message, signature = signing.default_signer.sign_payload(options) + return self.app.put_json( + self.project.api_url_for('create_waterbutler_log'), + {'payload': message, 'signature': signature}, + headers={'Content-Type': 'application/json'}, + expect_errors=True, + ) + + # ------------------------------------------------------------------ + # Upload new file + # ------------------------------------------------------------------ + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + @mock.patch('addons.base.views.BaseFileNode') + @mock.patch('addons.base.views.timestamp') + def test_new_upload_wb_log_returns_200_and_no_duplicate( + self, mock_ts, mock_bfn, mock_st, mock_uq): + """ + Scenario #1: Upload a brand-new file. + + Expected: + - osfstorage_create_child: returns 200/201, creates FileInfo, calls update_quota once. + - create_waterbutler_log: returns 200 (not 500), creates a NodeLog entry. + - After both steps: still exactly 1 FileInfo record; update_quota NOT called again via signal. + """ + name = 'new_upload.txt' + size = 1024 + + # Step 1: WaterButler upload callback -> osfstorage_create_child + upload_resp = self._upload(self.root_node, name, size=size) + assert upload_resp.status_code in (200, 201), ( + f'osfstorage_create_child returned {upload_resp.status_code}' + ) + + # Scenario #4: exactly 1 FileInfo record created + record = self.root_node.find_child_by_name(name) + assert record is not None + assert FileInfo.objects.filter(file=record).count() == 1, ( + 'FileInfo must have exactly 1 record after upload.' + ) + + # Scenario #3: update_quota called exactly once from create_child + assert mock_uq.call_count == 1, ( + f'update_quota should be called once from osfstorage_create_child, ' + f'but was called {mock_uq.call_count} time(s).' + ) + mock_uq.reset_mock() + + # Step 2: WaterButler log callback -> create_waterbutler_log + nlogs = self.project.logs.count() + log_resp = self._wb_log(record._id, name, size) + + # Scenario #1: /create_waterbutler_log must return 200 + assert log_resp.status_code == 200, ( + f'create_waterbutler_log returned {log_resp.status_code} ' + '(expected 200). This was Critical 1: FILE_ADDED signal caused ' + 'duplicate FileInfo creation -> IntegrityError -> 500.' + ) + + # NodeLog must be created + self.project.reload() + assert self.project.logs.count() == nlogs + 1, ( + 'A NodeLog entry must be created by create_waterbutler_log.' + ) + + # Scenario #4: FileInfo still exactly 1 record (signal was skipped) + assert FileInfo.objects.filter(file=record).count() == 1, ( + 'FileInfo must remain exactly 1 record after create_waterbutler_log. ' + 'The skip logic in update_used_quota must prevent duplication.' + ) + + # Scenario #3: update_quota must NOT be called again via signal + assert mock_uq.call_count == 0, ( + f'update_quota was called {mock_uq.call_count} extra time(s) via ' + 'the FILE_ADDED signal. The skip logic should have prevented this.' + ) + + # ------------------------------------------------------------------ + # Overwrite: upload a new version of an existing file + # ------------------------------------------------------------------ + + @mock.patch('addons.osfstorage.views.update_quota') + @mock.patch('addons.osfstorage.views.get_project_storage_type', return_value=1) + @mock.patch('addons.base.views.BaseFileNode') + @mock.patch('addons.base.views.timestamp') + def test_overwrite_upload_wb_log_returns_200_and_no_duplicate( + self, mock_ts, mock_bfn, mock_st, mock_uq): + """ + Scenario #2: Upload a new version of an existing file (same filename). + + Expected: + - create_waterbutler_log returns 200. + - A NodeLog entry is created. + - FileInfo is UPDATED (UPSERT), not duplicated -> still exactly 1 record. + - update_quota called once from create_child for the size delta. + """ + name = 'overwrite.txt' + size_v1, size_v2 = 500, 800 + + # First upload + self._upload(self.root_node, name, size=size_v1, unique_hash='hash_v1') + record = self.root_node.find_child_by_name(name) + assert FileInfo.objects.filter(file=record).count() == 1 + mock_uq.reset_mock() + + # Second upload (same filename, different hash -> genuinely new FileVersion) + upload_resp = self._upload( + self.root_node, name, size=size_v2, unique_hash='hash_v2' + ) + assert upload_resp.status_code in (200, 201) + record.reload() + + # FileInfo must be UPDATED, not duplicated + fi = FileInfo.objects.filter(file=record).first() + assert fi is not None + assert fi.file_size == size_v2, ( + f'FileInfo.file_size should be {size_v2} after overwrite, got {fi.file_size}.' + ) + assert FileInfo.objects.filter(file=record).count() == 1 + + # update_quota called once for the new version (delta = size_v2 - size_v1) + assert mock_uq.call_count == 1 + mock_uq.reset_mock() + + # WaterButler log callback (action='update' for an overwrite) + nlogs = self.project.logs.count() + log_resp = self._wb_log(record._id, name, size_v2, action='update') + + # Scenario #2: /create_waterbutler_log must return 200 + assert log_resp.status_code == 200, ( + f'create_waterbutler_log returned {log_resp.status_code} ' + 'for overwrite (expected 200).' + ) + + self.project.reload() + assert self.project.logs.count() == nlogs + 1 + + # Still exactly 1 FileInfo record + assert FileInfo.objects.filter(file=record).count() == 1, ( + 'FileInfo must remain exactly 1 record after overwrite + ' + 'create_waterbutler_log.' + ) + + # update_quota must NOT be called again via signal (osfstorage skip) + assert mock_uq.call_count == 0, ( + f'update_quota was called {mock_uq.call_count} extra time(s) via ' + 'signal after overwrite. Skip logic should prevent this.' + ) + + @mock.patch('addons.base.views.BaseFileNode') + @mock.patch('addons.base.views.timestamp') + def test_upload_actual_quota_increment_no_double_count(self, mock_ts, mock_bfn): + """ + Do not mock update_quota — verify UserQuota.used in the database: + - After osfstorage_create_child: used increases exactly by file_size. + - After create_waterbutler_log: used does not increase further (no double-count). + """ + from osf.models import UserQuota + + # Ensure UserQuota initially exists with used=0 + uq, _ = UserQuota.objects.get_or_create( + user=self.user, + storage_type=UserQuota.NII_STORAGE, + defaults={'max_quota': 200, 'used': 0}, + ) + initial_used = UserQuota.objects.get( + user=self.user, storage_type=UserQuota.NII_STORAGE + ).used + + # Step 1: osfstorage_create_child + self._upload(self.root_node, 'real.txt', size=500) + record = self.root_node.find_child_by_name('real.txt') + assert record is not None + + after_create_child = UserQuota.objects.get( + user=self.user, storage_type=UserQuota.NII_STORAGE + ).used + assert after_create_child - initial_used == 500, \ + f'Initial increment incorrect: expected +500, got +{after_create_child - initial_used}' + + # Step 2: create_waterbutler_log + log_resp = self._wb_log(record._id, 'real.txt', 500) + assert log_resp.status_code == 200 + + final = UserQuota.objects.get( + user=self.user, storage_type=UserQuota.NII_STORAGE + ).used + assert final == after_create_child, \ + f'No double-counting after create_waterbutler_log: expected {after_create_child}, got {final}' diff --git a/addons/osfstorage/views.py b/addons/osfstorage/views.py index 1e413f84df2..ef37de8d090 100644 --- a/addons/osfstorage/views.py +++ b/addons/osfstorage/views.py @@ -1,6 +1,6 @@ from __future__ import unicode_literals -from django.db.models import IntegerField +from django.db.models import IntegerField, Sum from django.db.models.functions import Cast from rest_framework import status as http_status import logging @@ -19,7 +19,7 @@ from api.caching.tasks import update_storage_usage from osf.exceptions import InvalidTagError, TagNotFoundError -from osf.models import FileVersion, OSFUser, ExportDataRestore, ExportData +from osf.models import FileVersion, OSFUser, ExportDataRestore, ExportData, FileInfo, BaseFileNode from osf.utils.permissions import WRITE from osf.utils.requests import check_select_for_update from website.project.decorators import ( @@ -28,6 +28,7 @@ from website.project.model import has_anonymous_link from website.files import exceptions +from website.util.quota import get_project_storage_type, update_quota from addons.osfstorage import utils from addons.osfstorage import decorators from addons.osfstorage.models import OsfStorageFolder @@ -124,17 +125,94 @@ def osfstorage_get_revisions(file_node, payload, target, **kwargs): ] } +def _get_all_descendant_file_ids(folder_node): + TRASHED_TYPES = ('osf.trashedfilenode', 'osf.trashedfile', 'osf.trashedfolder') + + file_ids = [] + folder_ids_to_process = [folder_node.id] + + while folder_ids_to_process: + children = BaseFileNode.objects.filter( + parent_id__in=folder_ids_to_process, + provider='osfstorage', + ).exclude( + type__in=TRASHED_TYPES + ).values('id', 'type') + + next_folder_ids = [] + for child in children: + if child['type'] == 'osf.osfstoragefile': + file_ids.append(child['id']) + elif child['type'] == 'osf.osfstoragefolder': + next_folder_ids.append(child['id']) + + folder_ids_to_process = next_folder_ids + + return file_ids @decorators.waterbutler_opt_hook def osfstorage_copy_hook(source, destination, name=None, **kwargs): - ret = source.copy_under(destination, name=name).serialize(), http_status.HTTP_201_CREATED + replaced_size = int(kwargs.get('replaced_size', 0)) + cloned = source.copy_under(destination, name=name) + + # Only handle if source is a node and destination is a node + if getattr(source.target, 'type', '') == 'osf.node' and getattr(destination.target, 'type', '') == 'osf.node': + if source.is_file: + latest_version = cloned.versions.order_by('-created').first() + new_size = latest_version.size if latest_version and latest_version.size else 0 + FileInfo.objects.update_or_create(file=cloned, defaults={'file_size': new_size}) + else: + source_file_ids = _get_all_descendant_file_ids(source) + source_size_map = { + fi.file_id: fi.file_size + for fi in FileInfo.objects.filter(file_id__in=source_file_ids) + } + + cloned_file_ids = _get_all_descendant_file_ids(cloned) + cloned_files = list(BaseFileNode.objects.filter( + id__in=cloned_file_ids + ).values('id', 'copied_from_id')) + + file_infos = [] + new_size = 0 + for cf in cloned_files: + src_id = cf['copied_from_id'] + if src_id in source_size_map: + size = source_size_map[src_id] + elif src_id is not None: + # FileInfo not exist + node = BaseFileNode.objects.filter(id=src_id).first() + size = 0 + if node: + v = node.versions.order_by('-created').first() + size = (v.size or 0) if v else 0 + else: # src_id is None and not in map + size = 0 + + file_infos.append(FileInfo(file_id=cf['id'], file_size=size)) + new_size += size + + try: + FileInfo.objects.bulk_create(file_infos) + except IntegrityError: + for fi in file_infos: + FileInfo.objects.get_or_create(file_id=fi.file_id, defaults={'file_size': fi.file_size}) + replaced_size = 0 # osfstorage_delete already subtracted this + + # UserQuota: delta = new_size - replaced_size + delta = new_size - replaced_size + if delta != 0: + storage_type = get_project_storage_type(destination.target) + update_quota(destination.target, abs(delta), storage_type, add=(delta > 0)) + update_storage_usage(destination.target) - return ret + return cloned.serialize(), http_status.HTTP_201_CREATED @decorators.waterbutler_opt_hook def osfstorage_move_hook(source, destination, name=None, **kwargs): source_target = source.target is_check_permission = kwargs.get('is_check_permission') + replaced_size = int(kwargs.get('replaced_size', 0)) try: ret = source.move_under(destination, name=name, is_check_permission=is_check_permission).serialize(), http_status.HTTP_200_OK except exceptions.FileNodeCheckedOutError: @@ -150,6 +228,32 @@ def osfstorage_move_hook(source, destination, name=None, **kwargs): 'message_long': 'Cannot move file as it is the primary file of preprint.' }) + # Only handle if source is a node and destination is a node + if getattr(source_target, 'type', '') == 'osf.node' and getattr(destination.target, 'type', '') == 'osf.node': + if source.is_file: + latest_version = source.versions.order_by('-created').first() + new_size = latest_version.size if latest_version and latest_version.size else 0 + # FileInfo: UPSERT + FileInfo.objects.update_or_create(file=source, defaults={'file_size': new_size}) + else: + source_file_ids = _get_all_descendant_file_ids(source) # recursive + new_size = FileInfo.objects.filter( + file_id__in=source_file_ids + ).aggregate(total=Sum('file_size'))['total'] or 0 + replaced_size = 0 # osfstorage_delete already subtracted this + + # UserQuota + if source_target != destination.target: + src_storage = get_project_storage_type(source_target) + dest_storage = get_project_storage_type(destination.target) + update_quota(source_target, new_size, src_storage, add=False) + delta = new_size - replaced_size + if delta != 0: + update_quota(destination.target, abs(delta), dest_storage, add=(delta > 0)) + elif replaced_size > 0: + storage_type = get_project_storage_type(destination.target) + update_quota(destination.target, replaced_size, storage_type, add=False) + # once the move is complete recalculate storage for both targets if it's a inter-target move. if is_check_permission and source_target != destination.target: update_storage_usage(destination.target) @@ -379,6 +483,24 @@ def osfstorage_create_child(file_node, payload, **kwargs): if not current_version or not current_version.is_duplicate(new_version): update_storage_usage(file_node.target) + # Only handle if target is a node + if getattr(file_node.target, 'type', '') == 'osf.node': + new_size = new_version.size if new_version.size and new_version.size > 0 else 0 + if new_size >= 0: + old_info = FileInfo.objects.filter(file=file_node).first() + old_size = old_info.file_size if old_info else 0 + + # FileInfo: UPSERT + FileInfo.objects.update_or_create( + file=file_node, defaults={'file_size': new_size} + ) + + # UserQuota: delta = new - old + delta = new_size - old_size + if delta != 0: + storage_type = get_project_storage_type(file_node.target) + update_quota(file_node.target, abs(delta), storage_type, add=(delta > 0)) + version_id = new_version._id archive_exists = new_version.archive is not None else: @@ -406,6 +528,17 @@ def osfstorage_delete(file_node, payload, target, **kwargs): if file_node == OsfStorageFolder.objects.get_root(target=target): raise HTTPError(http_status.HTTP_400_BAD_REQUEST) + if getattr(file_node.target, 'type', '') == 'osf.node' and not file_node.is_file: + # Since we are deleting a folder, we need to calculate the total size of all descendant files to update quota + all_file_ids = _get_all_descendant_file_ids(file_node) + total_size = FileInfo.objects.filter(file_id__in=all_file_ids) \ + .aggregate(total=Sum('file_size'))['total'] or 0 + if total_size > 0: + storage_type = get_project_storage_type(target) + # Subtract quota BEFORE deletion; also zero FileInfo to avoid race conditions with new file creation during deletion + FileInfo.objects.filter(file_id__in=all_file_ids).update(file_size=0) + update_quota(target, total_size, storage_type, add=False) + try: file_node.delete(user=user) diff --git a/scripts/migration/migrate_fix_fileinfo_quota.py b/scripts/migration/migrate_fix_fileinfo_quota.py new file mode 100644 index 00000000000..e2fd73ba38a --- /dev/null +++ b/scripts/migration/migrate_fix_fileinfo_quota.py @@ -0,0 +1,533 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +migrate_fix_fileinfo_quota.py + +Fix missing or zero-size FileInfo records and recalculate UserQuota. + +Background +---------- +During Move/Copy operations, if a system error occurs after the file has been +written but before the FileInfo hook completes, the resulting OsfStorageFile may +have no FileInfo row (or a row with file_size=0). This causes UserQuota.used to +be under-counted for the affected project creator. + +This script: +1. Finds all non-deleted OsfStorageFile nodes that are missing a FileInfo record + OR have a FileInfo record with file_size=0. +2. Populates file_size from the latest FileVersion.size (which is authoritative). +3. Collects all unique project creators affected by the above. +4. Calls update_user_used_quota(user) for each affected user to re-sync + UserQuota.used from the actual FileInfo totals. + Storage type is determined per-project (NII_STORAGE or CUSTOM_STORAGE). +5. Exports a CSV report of all processed records, including skipped files. + +Database Transactions: + All database updates (FileInfo insertions/updates and UserQuota recalculations) + are wrapped in a single database transaction (transaction.atomic). If the script + crashes or encounters an unhandled exception during the fix process, all changes + are rolled back to ensure database consistency. + +Usage: + # Fix and export results to CSV + python -m scripts.migration.migrate_fix_fileinfo_quota + python -m scripts.migration.migrate_fix_fileinfo_quota --output result.csv + + # Preview changes and export to CSV without writing to DB (dry run) + python -m scripts.migration.migrate_fix_fileinfo_quota --dry + python -m scripts.migration.migrate_fix_fileinfo_quota --dry --output preview.csv +""" + +import sys +import os +import csv +import logging +import argparse +from datetime import datetime + +import django + +# Setup Django before importing models +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'api.base.settings') +django.setup() + +from django.db import transaction +from django.db.models import Q + +from addons.osfstorage.models import OsfStorageFile +from osf.models import FileInfo, OSFUser, UserQuota, ProjectStorageType +from website.util.quota import update_user_used_quota, get_project_storage_type +from scripts import utils as script_utils +from django.contrib.contenttypes.models import ContentType +from osf.models import Guid + +logger = logging.getLogger(__name__) + +RESULT_FIELDNAMES = [ + # osf_file_id : _id (hex string) of the OsfStorageFile / BaseFileNode record + # fileinfo_id : integer PK of the FileInfo record inserted/updated (empty in dry_run) + # fileinfo__id : _id (hex string) of the FileInfo record inserted/updated (empty in dry_run) + # fileinfo_file_id: integer PK of BaseFileNode — the value stored as file_id (FK) in the FileInfo table + # fileinfo_created: True = new FileInfo row INSERT-ed, False = existing row UPDATE-d (empty in dry_run) + # action : would_update | created | updated | updated_no_quota | skipped + # quota_recalculated: True if quota was recalculated for this file's creator + 'osf_file_id', 'file_name', + 'fileinfo_id', 'fileinfo__id', 'fileinfo_file_id', + 'file_size_bytes', 'project_guid', 'creator_id', 'storage_type', + 'fileinfo_created', 'action', 'quota_recalculated', 'dry_run', +] + + +# --- Scanning Operations --- + +def get_broken_file_nodes(): + """Return queryset of OsfStorageFile nodes that: + - are not deleted (deleted_on is None) + - either have no FileInfo, or have FileInfo with file_size == 0 + + Optimized using a LEFT OUTER JOIN query to avoid subquery bottlenecks with large datasets. + """ + return ( + OsfStorageFile.objects + .filter(deleted_on=None, deleted_by_id=None) + .filter(Q(fileinfo__isnull=True) | Q(fileinfo__file_size=0)) + # NOTE: Do NOT use prefetch_related('target') here. + # BaseFileNode uses TypedModel + GenericForeignKey; Django's GFK prefetch cache + # is incompatible with TypedModel proxy resolution and returns None even when + # target_content_type_id is valid. Use direct lookup instead (see resolve_target). + ) + + +def _bulk_get_latest_version_sizes(osf_file_ids): + """Fetch the latest FileVersion size for each OsfStorageFile in bulk. + Returns a dict mapping osf_file.id -> size (int or None). + """ + if not osf_file_ids: + return {} + + # Access the M2M through-model (BaseFileVersionsThrough) without importing it directly. + # Fields: basefilenode_id (FK → BaseFileNode), fileversion_id (FK → FileVersion) + ThroughModel = OsfStorageFile.versions.through + + # Single JOIN query: fetches all through-entries + related FileVersion data at once. + entries = ( + ThroughModel.objects + .filter(basefilenode_id__in=osf_file_ids) + .select_related('fileversion') + ) + + # Group by file and pick the version with the highest numeric identifier. + # result maps: file_node_id → (version_number_int, size) + best = {} # file_node_id -> (ver_num, size) + for entry in entries: + fv = entry.fileversion + file_node_id = entry.basefilenode_id + + # identifier is a text field ('1', '2', ...) — parse to int for correct ordering + try: + ver_num = int(fv.identifier) + except (TypeError, ValueError): + ver_num = -1 # non-numeric identifiers rank last + + if file_node_id not in best or ver_num > best[file_node_id][0]: + # Resolve size: prefer fv.size, fall back to metadata dict + size = fv.size + if size is None and isinstance(fv.metadata, dict): + try: + size = int(fv.metadata.get('size')) + except (TypeError, ValueError): + size = None + best[file_node_id] = (ver_num, size) + + return {file_id: info[1] for file_id, info in best.items()} + + +def scan_broken_files(): + """Scan for OsfStorageFile nodes with missing or zero-size FileInfo. + + Returns a list of dicts with scan results (no DB writes). + Skipped files are also included with action='skipped' for full audit visibility. + """ + broken_files = list(get_broken_file_nodes()) + total = len(broken_files) + logger.info('Scanning OsfStorageFile records...') + logger.info('Found {} file node(s) with missing or zero-size FileInfo.'.format(total)) + + # Bulk-fetch latest version sizes to avoid N+1 queries + file_ids = [f.id for f in broken_files] + size_map = _bulk_get_latest_version_sizes(file_ids) + + rows = [] + for idx, osf_file in enumerate(broken_files, 1): + size = size_map.get(osf_file.id) + + if size is None or size <= 0: + logger.warning( + '[{}/{}] Skipping {} (_id={}) — could not determine size from FileVersion.'.format( + idx, total, osf_file.name, osf_file._id + ) + ) + # Include skipped files in CSV for full audit trail + rows.append({ + 'osf_file_id': osf_file._id, + 'fileinfo_file_id': osf_file.id, + 'file_name': osf_file.name, + 'file_size_bytes': size if size is not None else '', + 'project_guid': '', + 'creator_id': '', + 'storage_type': None, + 'action': 'skipped', + 'quota_recalculated': False, + 'osf_file': osf_file, + }) + continue + + project_guid = '' + creator_id = '' + storage_type_value = None + try: + # Direct lookup via ContentType to avoid GFK+TypedModel prefetch incompatibility. + # osf_file.target (GFK accessor) returns None when used after prefetch_related + # because TypedModel proxy resolution breaks the GFK prefetch cache. + target = None + if osf_file.target_content_type_id and osf_file.target_object_id: + ct = ContentType.objects.get_for_id(osf_file.target_content_type_id) + model_class = ct.model_class() + if model_class: + target = model_class.objects.filter(id=osf_file.target_object_id).first() + + if target is not None: + first_guid = target.guids.values_list('_id', flat=True).first() + project_guid = first_guid or '' + creator = getattr(target, 'creator', None) + if creator: + creator_id = creator._id + # Determine storage type for this project + storage_type_value = get_project_storage_type(target) + else: + # Target object not found — try Guid table as last resort + if osf_file.target_content_type_id and osf_file.target_object_id: + ct = ContentType.objects.get_for_id(osf_file.target_content_type_id) + guid_obj = Guid.objects.filter( + content_type=ct, + object_id=osf_file.target_object_id + ).first() + if guid_obj: + project_guid = guid_obj._id + logger.warning( + 'File {} target not found (target_content_type_id={}, target_object_id={}). ' + 'Resolved project_guid={}'.format( + osf_file._id, osf_file.target_content_type_id, osf_file.target_object_id, project_guid + ) + ) + except Exception as e: + logger.warning('Could not resolve creator/guid for file {}: {}'.format(osf_file._id, e)) + + rows.append({ + 'osf_file_id': osf_file._id, # hex _id string of OsfStorageFile + 'fileinfo_file_id': osf_file.id, # integer PK stored as file_id FK in FileInfo table + 'file_name': osf_file.name, + 'file_size_bytes': size, + 'project_guid': project_guid, + 'creator_id': creator_id, + 'storage_type': storage_type_value, + 'osf_file': osf_file, + }) + + eligible = sum(1 for r in rows if r.get('action') != 'skipped') + logger.info('Scan complete. {} record(s) eligible for fix, {} skipped.'.format( + eligible, total - eligible + )) + return rows + + +def print_results(rows): + """Display scanned records in a formatted summary table.""" + eligible = [r for r in rows if r.get('action') != 'skipped'] + skipped = [r for r in rows if r.get('action') == 'skipped'] + + if not eligible and not skipped: + print(' No broken FileInfo records found.') + return + + if eligible: + print('\n' + '-' * 160) + print(' {:<26} {:<16} {:<22} {:<18} {:<16} {:<14} {:<18}'.format( + 'OSF_FILE_ID (_id)', 'FILEINFO_FILE_ID', 'FILE_NAME', 'PROJECT_GUID', 'CREATOR_ID', 'STORAGE_TYPE', 'SIZE (bytes)' + )) + print('-' * 160) + for r in eligible: + print(' {:<26} {:<16} {:<22} {:<18} {:<16} {:<14} {:<18}'.format( + r['osf_file_id'], + r['fileinfo_file_id'], + (r['file_name'] or '')[:20], + r['project_guid'], + r['creator_id'], + str(r.get('storage_type') if r.get('storage_type') is not None else ''), + r['file_size_bytes'], + )) + print('-' * 160) + print(' Total: {} record(s) would be updated.\n'.format(len(eligible))) + + if skipped: + print(' Skipped {} record(s) (no valid FileVersion size):'.format(len(skipped))) + for r in skipped: + print(' - {} ({})'.format(r['osf_file_id'], r['file_name'])) + print() + + +# --- Updating Operations --- + +def apply_fixes(rows, dry_run=False): + """Apply FileInfo fixes and recalculate UserQuota for affected users. + + This function wraps all database modifications in a single database transaction + when dry_run is False. If any unhandled exception occurs, the transaction is + rolled back, ensuring database consistency. + + If dry_run is True, no database writes are performed. + Returns the list of result rows with 'action', 'quota_recalculated', and 'dry_run' fields added. + """ + if dry_run: + logger.info('[DRY RUN] Simulating FileInfo updates (no DB writes)...') + return _apply_fixes_internal(rows, dry_run=True) + else: + logger.info('Applying FileInfo updates (wrapped in database transaction)...') + with transaction.atomic(): + return _apply_fixes_internal(rows, dry_run=False) + + +def _apply_fixes_internal(rows, dry_run=False): + """Internal helper to apply fixes, called inside/outside transaction.""" + result_rows = [] + affected_user_storage = set() # set of (creator_id, storage_type) + fixed = 0 + skipped = 0 + + for row in rows: + if row.get('action') == 'skipped': + result_rows.append({ + 'osf_file_id': row['osf_file_id'], + 'fileinfo_file_id': row['fileinfo_file_id'], + 'file_name': row['file_name'], + 'file_size_bytes': row['file_size_bytes'], + 'project_guid': row['project_guid'], + 'creator_id': row['creator_id'], + 'storage_type': row['storage_type'], + 'fileinfo_id': '', + 'fileinfo__id': '', + 'fileinfo_created': '', + 'action': 'skipped', + 'quota_recalculated': False, + 'dry_run': dry_run, + }) + skipped += 1 + continue + + # Optimize: reuse osf_file object from scan, avoiding re-querying + osf_file = row.get('osf_file') + if osf_file is None: + osf_file = OsfStorageFile.objects.filter(_id=row['osf_file_id']).first() + + if osf_file is None: + logger.warning('File {} not found, skipping.'.format(row['osf_file_id'])) + result_rows.append({ + 'osf_file_id': row['osf_file_id'], + 'fileinfo_file_id': row['fileinfo_file_id'], + 'file_name': row['file_name'], + 'file_size_bytes': row['file_size_bytes'], + 'project_guid': row['project_guid'], + 'creator_id': row['creator_id'], + 'storage_type': row['storage_type'], + 'fileinfo_id': '', + 'fileinfo__id': '', + 'fileinfo_created': '', + 'action': 'skipped', + 'quota_recalculated': False, + 'dry_run': dry_run, + }) + skipped += 1 + continue + + fileinfo_id = '' + fileinfo__id = '' + fileinfo_created = '' + if not dry_run: + fileinfo_obj, created = FileInfo.objects.update_or_create( + file=osf_file, + defaults={'file_size': row['file_size_bytes']}, + ) + fileinfo_id = fileinfo_obj.id # integer PK of FileInfo + fileinfo__id = fileinfo_obj._id # hex _id string of FileInfo + fileinfo_created = created # True = INSERT, False = UPDATE + + # Determine whether quota will actually be recalculated + creator_id = row.get('creator_id', '') + storage_type = row.get('storage_type', None) + quota_will_recalculate = bool(creator_id and storage_type is not None) + + if quota_will_recalculate: + # Track (creator_id, storage_type) pair + affected_user_storage.add((creator_id, storage_type)) + + # Use distinct action when creator_id is missing + if dry_run: + action = 'would_update' + elif not quota_will_recalculate: + # FileInfo was fixed but quota cannot be recalculated (no creator resolved) + action = 'updated_no_quota' + else: + action = 'created' if fileinfo_created else 'updated' + + logger.info('{} {} (osf_file_id={}, fileinfo_id={}, fileinfo__id={}, fileinfo_file_id={}, created={}) file_size={} bytes storage_type={}'.format( + '[DRY RUN]' if dry_run else 'Fixed:', + row['file_name'], row['osf_file_id'], + fileinfo_id, fileinfo__id, row['fileinfo_file_id'], + fileinfo_created, row['file_size_bytes'], storage_type, + )) + + if action == 'updated_no_quota': + logger.warning( + 'File {} FileInfo fixed but creator_id is empty — UserQuota NOT recalculated. ' + 'Manual review required. project_guid={}'.format( + row['osf_file_id'], row.get('project_guid', '') + ) + ) + + result_rows.append({ + 'osf_file_id': row['osf_file_id'], + 'fileinfo_file_id': row['fileinfo_file_id'], + 'file_name': row['file_name'], + 'file_size_bytes': row['file_size_bytes'], + 'project_guid': row['project_guid'], + 'creator_id': row['creator_id'], + 'storage_type': row['storage_type'], + 'fileinfo_id': fileinfo_id, + 'fileinfo__id': fileinfo__id, + 'fileinfo_created': fileinfo_created, + 'action': action, + 'quota_recalculated': False, # will be updated below after quota recalc + 'dry_run': dry_run, + }) + fixed += 1 + + logger.info('FileInfo backfill complete. fixed={}, skipped={} (dry={})'.format( + fixed, skipped, dry_run + )) + + # --- Recalculate UserQuota.used for all affected (user, storage_type) pairs --- + logger.info('Recalculating UserQuota.used for {} unique (user, storage_type) pair(s).'.format( + len(affected_user_storage) + )) + + quota_updated = 0 + quota_errors = 0 + # Track which creator_ids had successful quota recalculation + recalculated_user_storage = set() + + for user_id, storage_type in sorted(affected_user_storage): + try: + user = OSFUser.load(user_id) + if user is None: + logger.warning('User {} not found, skipping quota recalc.'.format(user_id)) + continue + + if not dry_run: + # Update quota for the actual storage_type of the project + update_user_used_quota(user, storage_type=storage_type) + + logger.info('{} Recalculated quota for user {} (storage_type={})'.format( + '[DRY RUN]' if dry_run else '', user_id, storage_type + )) + quota_updated += 1 + recalculated_user_storage.add((user_id, storage_type)) + except Exception as e: + logger.error('Failed to recalculate quota for user {} (storage_type={}): {}'.format( + user_id, storage_type, e + )) + quota_errors += 1 + + logger.info('Quota recalculation complete. updated={}, errors={} (dry={})'.format( + quota_updated, quota_errors, dry_run + )) + + # Update quota_recalculated flag in result rows + for row in result_rows: + creator_id = row.get('creator_id', '') + storage_type = row.get('storage_type', None) + if dry_run and creator_id and storage_type is not None: + # In dry-run mode, mark as "would recalculate" + row['quota_recalculated'] = 'would_recalculate' + elif (creator_id, storage_type) in recalculated_user_storage: + row['quota_recalculated'] = True + + return result_rows + + +# --- CSV Export --- + +def save_result_csv(result_rows, output_path): + """Save full results to CSV file.""" + with open(output_path, 'w', newline='', encoding='utf-8-sig') as f: + writer = csv.DictWriter(f, fieldnames=RESULT_FIELDNAMES) + writer.writeheader() + writer.writerows(result_rows) + total = len(result_rows) + skipped = sum(1 for r in result_rows if r.get('action') == 'skipped') + no_quota = sum(1 for r in result_rows if r.get('action') == 'updated_no_quota') + print('\n Exported {} result(s) to: {}'.format(total, output_path)) + if skipped: + print(' - {} skipped (no valid FileVersion size)'.format(skipped)) + if no_quota: + print(' - {} updated_no_quota (FileInfo fixed but quota NOT recalculated — manual review needed)'.format(no_quota)) + + +# --- Main --- + +def main(): + ts = datetime.now().strftime('%Y%m%d_%H%M%S') + default_output = f'migrate_fix_fileinfo_quota_{ts}.csv' + + parser = argparse.ArgumentParser( + description='Fix missing/zero-size FileInfo records and recalculate UserQuota.', + ) + parser.add_argument( + '--output', '-o', + default=default_output, + help=f'Output CSV file path (default: {default_output})', + ) + parser.add_argument( + '--dry', action='store_true', default=False, + help='Run in dry-run mode (do not commit updates to the database).', + ) + + args = parser.parse_args() + + if not args.dry: + script_utils.add_file_logger(logger, __file__) + + # Configure console logging format + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s %(levelname)s %(message)s', + ) + + # 1. Scan for broken records + rows = scan_broken_files() + + if not rows: + print('No broken FileInfo records found.') + return + + # 2. Print summary table + print_results(rows) + + # 3. Apply fixes (or simulate in dry-run mode) + result_rows = apply_fixes(rows, dry_run=args.dry) + + # 4. Export results to CSV + save_result_csv(result_rows, args.output) + + +if __name__ == '__main__': + main() diff --git a/tests/test_addons.py b/tests/test_addons.py index e6f18031b56..a3a46180e93 100644 --- a/tests/test_addons.py +++ b/tests/test_addons.py @@ -25,7 +25,7 @@ DraftRegistrationFactory, ExportDataLocationFactory, ) -from website import settings +from website import settings, mails from api.base import settings as api_settings from addons.base import views from addons.github.exceptions import ApiError @@ -113,7 +113,9 @@ def test_auth_download(self): data = jwt.decode(jwe.decrypt(res.json['payload'].encode('utf-8'), self.JWE_KEY), settings.WATERBUTLER_JWT_SECRET, algorithm=settings.WATERBUTLER_JWT_ALGORITHM)['data'] assert_equal(data['auth'], views.make_auth(self.user)) assert_equal(data['credentials'], self.node_addon.serialize_waterbutler_credentials()) - assert_equal(data['settings'], self.node_addon.serialize_waterbutler_settings()) + expected_settings = self.node_addon.serialize_waterbutler_settings().copy() + expected_settings['max_file_size'] = 100 + assert_equal(data['settings'], expected_settings) expected_url = furl.furl(self.node.api_url_for('create_waterbutler_log', _absolute=True, _internal=True)) observed_url = furl.furl(data['callback_url']) observed_url.port = expected_url.port @@ -153,7 +155,9 @@ def test_auth_bad_cookie(self): data = jwt.decode(jwe.decrypt(res.json['payload'].encode('utf-8'), self.JWE_KEY), settings.WATERBUTLER_JWT_SECRET, algorithm=settings.WATERBUTLER_JWT_ALGORITHM)['data'] assert_equal(data['auth'], views.make_auth(self.user)) assert_equal(data['credentials'], self.node_addon.serialize_waterbutler_credentials()) - assert_equal(data['settings'], self.node_addon.serialize_waterbutler_settings()) + expected_settings = self.node_addon.serialize_waterbutler_settings().copy() + expected_settings['max_file_size'] = 100 + assert_equal(data['settings'], expected_settings) expected_url = furl.furl(self.node.api_url_for('create_waterbutler_log', _absolute=True, _internal=True)) observed_url = furl.furl(data['callback_url']) observed_url.port = expected_url.port @@ -1019,6 +1023,99 @@ def test_add_folder_osfstorage_log(self, mock_basefilenode): assert_equal(self.node.logs.count(), nlogs + 1) assert('urls' not in self.node.logs.filter(action='osf_storage_file_added')[0].params) + @mock.patch('website.mails.send_mail') + def test_create_waterbutler_log_quota_exceeded_error(self, mock_send_mail): + url = self.node.api_url_for('create_waterbutler_log') + payload = self.build_payload( + action='move', + metadata={'path': 'pizza'}, + source={ + 'provider': 'github', + 'materialized': '/src.txt', + 'path': '/src.txt', + 'name': 'src.txt', + 'nid': self.node._id, + }, + destination={ + 'provider': 'github', + 'materialized': '/dest.txt', + 'path': '/dest.txt', + 'name': 'dest.txt', + 'nid': self.node._id, + }, + errors=[''] + ) + res = self.app.put_json(url, payload, headers={'Content-Type': 'application/json'}) + assert_equal(res.status_code, 200) + assert_true(mock_send_mail.called) + call_args = mock_send_mail.call_args[1] + assert_equal(call_args['error_info'], {'type': 'quota_exceeded'}) + assert_equal(mock_send_mail.call_args[0][1], mails.FILE_OPERATION_FAILED) + + @mock.patch('website.mails.send_mail') + def test_create_waterbutler_log_oversized_error(self, mock_send_mail): + url = self.node.api_url_for('create_waterbutler_log') + payload = self.build_payload( + action='move', + metadata={'path': 'pizza'}, + source={ + 'provider': 'github', + 'materialized': '/src.txt', + 'path': '/src.txt', + 'name': 'src.txt', + 'nid': self.node._id, + }, + destination={ + 'provider': 'github', + 'materialized': '/dest.txt', + 'path': '/dest.txt', + 'name': 'dest.txt', + 'nid': self.node._id, + }, + errors=[''] + ) + res = self.app.put_json(url, payload, headers={'Content-Type': 'application/json'}) + assert_equal(res.status_code, 200) + assert_true(mock_send_mail.called) + call_args = mock_send_mail.call_args[1] + assert_equal(call_args['error_info'], { + 'type': 'oversized', + 'oversized_files': [{'name': 'large.zip', 'size': 2000000000}], + 'max_size': 1000000000 + }) + assert_equal(mock_send_mail.call_args[0][1], mails.FILE_OPERATION_FAILED) + + @mock.patch('addons.base.views.logger.warning') + @mock.patch('website.mails.send_mail') + def test_create_waterbutler_log_unparseable_error(self, mock_send_mail, mock_logger_warning): + url = self.node.api_url_for('create_waterbutler_log') + payload = self.build_payload( + action='move', + metadata={'path': 'pizza'}, + source={ + 'provider': 'github', + 'materialized': '/src.txt', + 'path': '/src.txt', + 'name': 'src.txt', + 'nid': self.node._id, + }, + destination={ + 'provider': 'github', + 'materialized': '/dest.txt', + 'path': '/dest.txt', + 'name': 'dest.txt', + 'nid': self.node._id, + }, + errors=[''] + ) + res = self.app.put_json(url, payload, headers={'Content-Type': 'application/json'}) + assert_equal(res.status_code, 200) + assert_true(mock_send_mail.called) + assert_true(mock_logger_warning.called) + call_args = mock_send_mail.call_args[1] + assert_equal(call_args['error_info'], None) + assert_equal(mock_send_mail.call_args[0][1], mails.FILE_OPERATION_FAILED) + class TestAddonLogsDifferentProvider(OsfTestCase): def setUp(self): diff --git a/tests/test_quota.py b/tests/test_quota.py index 2fc183213d0..a6d73106cf7 100644 --- a/tests/test_quota.py +++ b/tests/test_quota.py @@ -268,7 +268,7 @@ def setUp(self): ) self.file.save() - def test_add_file_info(self): + def test_file_info_skipped_for_osfstorage_signal(self): file_info_query = FileInfo.objects.filter(file=self.file) assert_false(file_info_query.exists()) @@ -294,9 +294,8 @@ def test_add_file_info(self): ) file_info_list = FileInfo.objects.filter(file=self.file).all() - assert_equal(file_info_list.count(), 1) - file_info = file_info_list.first() - assert_equal(file_info.file_size, 1000) + # FileInfo was not created + assert_equal(file_info_list.count(), 0) def test_update_file_info(self): file_info = FileInfo(file=self.file, file_size=1000) @@ -386,7 +385,7 @@ def setUp(self): target_content_type_id=2 ) - def test_add_first_file(self): + def test_first_file_userquota_not_created_for_osfstorage_signal(self): assert_false(UserQuota.objects.filter(user=self.project_creator).exists()) quota.update_used_quota( @@ -414,9 +413,7 @@ def test_add_first_file(self): storage_type=UserQuota.NII_STORAGE, user=self.project_creator ).all() - assert_equal(len(user_quota), 1) - user_quota = user_quota[0] - assert_equal(user_quota.used, 1000) + assert_equal(len(user_quota), 0) def test_add_first_file_custom_storage(self): assert_false(UserQuota.objects.filter(user=self.project_creator).exists()) @@ -453,11 +450,9 @@ def test_add_first_file_custom_storage(self): storage_type=UserQuota.CUSTOM_STORAGE, user=self.project_creator ).all() - assert_equal(len(user_quota), 1) - user_quota = user_quota[0] - assert_equal(user_quota.used, 1200) + assert_equal(len(user_quota), 0) - def test_add_file(self): + def test_userquota_unchanged_for_osfstorage_signal(self): UserQuota.objects.create( user=self.project_creator, storage_type=UserQuota.NII_STORAGE, @@ -492,7 +487,7 @@ def test_add_file(self): ).all() assert_equal(len(user_quota), 1) user_quota = user_quota[0] - assert_equal(user_quota.used, 6500) + assert_equal(user_quota.used, 5500) def test_add_file_custom_storage(self): UserQuota.objects.create( @@ -536,7 +531,7 @@ def test_add_file_custom_storage(self): ).all() assert_equal(len(user_quota), 1) user_quota = user_quota[0] - assert_equal(user_quota.used, 6700) + assert_equal(user_quota.used, 5500) def test_add_file_negative_size(self): quota.update_used_quota( @@ -1358,7 +1353,7 @@ def test_delete_file_with_Amazon_S3_Compatible_Storage_for_Institution(self): if check_select_for_update(): mock_file_info.objects.filter.return_value.select_for_update.return_value.get.return_value = FileInfo( file=self.base_file_node, file_size=1000) - mock_user_quota.objects.filter.return_value.select_for_update.return_value.first.return_value = UserQuota( + mock_user_quota.objects.filter.return_value.select_for_update.return_value.get.return_value = UserQuota( user=self.project_creator, storage_type=UserQuota.CUSTOM_STORAGE, max_quota=api_settings.DEFAULT_MAX_QUOTA, @@ -1366,12 +1361,14 @@ def test_delete_file_with_Amazon_S3_Compatible_Storage_for_Institution(self): ) else: mock_file_info.objects.get.return_value = FileInfo(file=self.base_file_node, file_size=1000) - mock_user_quota.objects.filter.return_value.first.return_value = UserQuota( + _real_user_quota = UserQuota( user=self.project_creator, storage_type=UserQuota.CUSTOM_STORAGE, max_quota=api_settings.DEFAULT_MAX_QUOTA, used=5500 ) + mock_user_quota.objects.get.return_value = _real_user_quota + mock_user_quota.objects.filter.return_value.first.return_value = _real_user_quota with mock.patch('website.util.quota.BaseFileNode', mock_base_file_node): with mock.patch('website.util.quota.FileInfo', mock_file_info): with mock.patch('website.util.quota.UserQuota', mock_user_quota): @@ -1414,7 +1411,7 @@ def test_delete_folder_with_Amazon_S3_Compatible_Storage_for_Institution(self): if check_select_for_update(): mock_file_info.objects.filter.return_value.select_for_update.return_value.get.return_value = FileInfo( file=self.base_file_node, file_size=1500) - mock_user_quota.objects.filter.return_value.select_for_update.return_value.first.return_value = UserQuota( + mock_user_quota.objects.filter.return_value.select_for_update.return_value.get.return_value = UserQuota( user=self.project_creator, storage_type=UserQuota.CUSTOM_STORAGE, max_quota=api_settings.DEFAULT_MAX_QUOTA, @@ -1422,12 +1419,14 @@ def test_delete_folder_with_Amazon_S3_Compatible_Storage_for_Institution(self): ) else: mock_file_info.objects.get.return_value = FileInfo(file=self.base_file_node, file_size=1500) - mock_user_quota.objects.filter.return_value.first.return_value = UserQuota( + _real_user_quota = UserQuota( user=self.project_creator, storage_type=UserQuota.CUSTOM_STORAGE, max_quota=api_settings.DEFAULT_MAX_QUOTA, used=5500 ) + mock_user_quota.objects.get.return_value = _real_user_quota + mock_user_quota.objects.filter.return_value.first.return_value = _real_user_quota with mock.patch('website.util.quota.BaseFileNode', mock_base_file_node): with mock.patch('website.util.quota.FileInfo', mock_file_info): with mock.patch('website.util.quota.UserQuota', mock_user_quota): diff --git a/tests/test_quota_move_copy.py b/tests/test_quota_move_copy.py new file mode 100644 index 00000000000..114113ebbf3 --- /dev/null +++ b/tests/test_quota_move_copy.py @@ -0,0 +1,248 @@ +import pytest +from unittest import mock +from website.util import quota +from django.db import IntegrityError + +@pytest.fixture +def mock_user(): + user = mock.Mock() + user.id = 1 + user._id = 'abc123' + user.userquota_set.get.side_effect = quota.UserQuota.DoesNotExist + return user + +@pytest.fixture +def mock_node(): + node = mock.Mock(spec=quota.AbstractNode) + node.id = 42 + node.creator = mock.Mock() + node.type = 'osf.node' + return node + +def test_get_file_size_file(): + children = {'children': [{'kind': 'file', 'size': 100}, {'kind': 'file', 'size': 200}]} + assert quota.get_file_size(children) == 300 + +def test_get_file_size_nested(): + children = { + 'children': [ + {'kind': 'file', 'size': 100}, + {'kind': 'folder', 'children': [{'kind': 'file', 'size': 50}]} + ] + } + assert quota.get_file_size(children) == 150 + +@mock.patch('website.util.quota.ProjectStorageType.objects.get') +@mock.patch('website.util.quota.AbstractNode.objects.get') +@mock.patch('website.util.quota.update_quota') +@pytest.mark.django_db +def test_handle_move_copy_source_extend_dest_osfstorage(mock_update_quota, mock_node_get, mock_pst_get, mock_node): + """source=s3compatinstitutions, dest=osfstorage → update_quota không được gọi vì source không phải osfstorage""" + mock_pst_get.return_value = mock.Mock(storage_type=1) + mock_node_get.return_value = mock_node + + payload = { + 'destination': {'provider': 'osfstorage', 'size': 123, 'children': None}, + 'source': {'nid': 'nid1', 'provider': 's3compatinstitutions'} + } + quota._handle_move_copy(quota.FileLog.FILE_MOVED, mock_node, mock.Mock(), payload) + mock_update_quota.assert_not_called() + +@mock.patch('website.util.quota.ProjectStorageType.objects.get') +@mock.patch('website.util.quota.AbstractNode.objects.get') +@mock.patch('website.util.quota.update_quota') +@pytest.mark.django_db +def test_handle_move_copy_source_osfstorage(mock_update_quota, mock_node_get, mock_pst_get, mock_user): + mock_node = mock.create_autospec(quota.AbstractNode, instance=True) + mock_node.type = 'osf.node' + mock_pst_get.return_value = mock.Mock(storage_type=1) + mock_node_get.return_value = mock_node + + payload = { + 'destination': {'provider': 's3compatinstitutions', 'size': 123}, # <-- thêm size + 'source': {'provider': 'osfstorage', 'size': 123, 'nid': 'nid1'} + } + quota._handle_move_copy(quota.FileLog.FILE_MOVED, mock_node, mock_user, payload) + mock_update_quota.assert_called_once() + +@mock.patch('website.util.quota.update_quota') +@pytest.mark.django_db +def test_handle_move_copy_no_osfstorage(mock_update_quota, mock_node, mock_user): + payload = { + 'destination': {'provider': 's3compatinstitutions'}, + 'source': {'provider': 's3compatinstitutions'} + } + quota._handle_move_copy(quota.FileLog.FILE_MOVED, mock_node, mock_user, payload) + mock_update_quota.assert_not_called() + +@mock.patch('website.util.quota.ProjectStorageType.objects.get') +@mock.patch('website.util.quota.AbstractNode.objects.get') +@mock.patch('website.util.quota.update_quota') +@pytest.mark.django_db +def test_file_moved_file(mock_update_quota, mock_node_get, mock_pst_get, mock_node): + mock_pst_get.return_value = mock.Mock(storage_type=1) + mock_node_get.return_value = mock_node + payload = { + 'destination': {'size': 123, 'children': None}, + 'source': {'nid': 'nid1', 'provider': 'osfstorage'} + } + quota.file_moved(mock_node, payload) + mock_update_quota.assert_called_once() + +@mock.patch('website.util.quota.ProjectStorageType.objects.get') +@mock.patch('website.util.quota.AbstractNode.objects.get') +@mock.patch('website.util.quota.update_quota') +@pytest.mark.django_db +def test_file_moved_folder(mock_update_quota, mock_node_get, mock_pst_get, mock_node): + mock_pst_get.return_value = mock.Mock(storage_type=1) + mock_node_get.return_value = mock_node + payload = { + 'destination': {'children': [{'kind': 'file', 'size': 100}]}, + 'source': {'nid': 'nid1', 'provider': 'osfstorage'} + } + quota.file_moved(mock_node, payload) + mock_update_quota.assert_called_once() + +@mock.patch('website.util.quota.ProjectStorageType.objects.get', side_effect=Exception) +@mock.patch('website.util.quota.AbstractNode.objects.get') +@mock.patch('website.util.quota.update_quota') +@pytest.mark.django_db +def test_file_moved_exception(mock_update_quota, mock_node_get, mock_pst_get, mock_node): + mock_node_get.return_value = mock_node + payload = { + 'destination': {'size': 123, 'children': None}, + 'source': {'nid': 'nid1', 'provider': 'osfstorage'} + } + with pytest.raises(Exception): + quota.file_moved(mock_node, payload) + mock_update_quota.assert_not_called() + +@mock.patch('website.util.quota.UserQuota.objects.get') +@mock.patch('website.util.quota.UserQuota.objects.filter') +@pytest.mark.django_db +def test_update_quota_add(mock_filter, mock_get, mock_node): + uq = mock.Mock() + uq.used = 0 + mock_get.return_value = uq + mock_filter.return_value.select_for_update.return_value.get.return_value = uq + quota.update_quota(mock_node, 100, 1, add=True) + assert uq.used == 100 + +@mock.patch('website.util.quota.UserQuota.objects.get') +@mock.patch('website.util.quota.UserQuota.objects.filter') +@pytest.mark.django_db +def test_update_quota_subtract(mock_filter, mock_get, mock_node): + uq = mock.Mock() + uq.used = 200 + mock_get.return_value = uq + mock_filter.return_value.select_for_update.return_value.get.return_value = uq + quota.update_quota(mock_node, 50, 1, add=False) + assert uq.used == 150 + +@mock.patch('website.util.quota.check_select_for_update', return_value=False) +@mock.patch('website.util.quota.transaction.atomic') +@mock.patch('website.util.quota.UserQuota.objects.create') +@mock.patch('website.util.quota.UserQuota.objects.filter') +@mock.patch('website.util.quota.UserQuota.objects.get', side_effect=quota.UserQuota.DoesNotExist) +@pytest.mark.django_db +def test_update_quota_userquota_not_exist(mock_get, mock_filter, mock_create, mock_atomic, mock_node): + mock_filter.return_value.select_for_update.return_value.get.side_effect = quota.UserQuota.DoesNotExist + quota.update_quota(mock_node, 50, 1, add=True) + mock_get.assert_called_once() + +@mock.patch('website.util.quota.UserQuota.objects.get', side_effect=Exception) +@mock.patch('website.util.quota.UserQuota.objects.filter') +@pytest.mark.django_db +def test_update_quota_other_exception(mock_filter, mock_get, mock_node): + mock_filter.return_value.select_for_update.return_value.get.side_effect = Exception + with pytest.raises(Exception): + quota.update_quota(mock_node, 50, 1, add=True) + mock_get.assert_not_called() + +@mock.patch('website.util.quota._handle_move_copy') +def test_update_used_quota_move_event(mock_handle_move_copy, mock_node, mock_user): + payload = { + 'destination': {'provider': 's3compatinstitutions'}, + 'source': {'provider': 'osfstorage'} + } + quota.update_used_quota(None, mock_node, mock_user, quota.FileLog.FILE_MOVED, payload) + mock_handle_move_copy.assert_called_once() + +@mock.patch('website.util.quota._handle_move_copy') +def test_update_used_quota_dest_osfstorage_returns_early(mock_handle_move_copy, mock_node, mock_user): + payload = { + 'destination': {'provider': 'osfstorage'} + } + result = quota.update_used_quota(None, mock_node, mock_user, quota.FileLog.FILE_MOVED, payload) + assert result is None + mock_handle_move_copy.assert_not_called() + +@mock.patch('website.util.quota.check_select_for_update', return_value=True) +@mock.patch('website.util.quota.transaction.atomic') +@mock.patch('website.util.quota.UserQuota.objects.create', side_effect=IntegrityError) +@mock.patch('website.util.quota.UserQuota.objects.filter') +@pytest.mark.django_db +def test_update_quota_integrityerror_select_for_update_add( + mock_filter, mock_create, mock_atomic, mock_check, mock_node +): + uq = mock.Mock() + uq.used = 10 + mock_filter.return_value.select_for_update.return_value.get.side_effect = [ + quota.UserQuota.DoesNotExist, + uq + ] + quota.update_quota(mock_node, 5, 1, add=True) + assert mock_filter.return_value.select_for_update.return_value.get.call_count == 2 + assert uq.used == 15 + uq.save.assert_called_once() + +@mock.patch('website.util.quota.check_select_for_update', return_value=True) +@mock.patch('website.util.quota.transaction.atomic') +@mock.patch('website.util.quota.UserQuota.objects.create', side_effect=IntegrityError) +@mock.patch('website.util.quota.UserQuota.objects.filter') +@pytest.mark.django_db +def test_update_quota_integrityerror_select_for_update_subtract( + mock_filter, mock_create, mock_atomic, mock_check, mock_node +): + uq = mock.Mock() + uq.used = 10 + mock_filter.return_value.select_for_update.return_value.get.side_effect = [ + quota.UserQuota.DoesNotExist, + uq + ] + quota.update_quota(mock_node, 3, 1, add=False) + assert mock_filter.return_value.select_for_update.return_value.get.call_count == 1 + assert uq.used == 10 + uq.save.assert_not_called() + +@mock.patch('website.util.quota.check_select_for_update', return_value=False) +@mock.patch('website.util.quota.transaction.atomic') +@mock.patch('website.util.quota.UserQuota.objects.create', side_effect=IntegrityError) +@mock.patch('website.util.quota.UserQuota.objects.get') +@mock.patch('website.util.quota.UserQuota.objects.filter') +@pytest.mark.django_db +def test_update_quota_integrityerror_get_add( + mock_filter, mock_get, mock_create, mock_atomic, mock_check, mock_node +): + uq = mock.Mock() + uq.used = 20 + mock_get.side_effect = [quota.UserQuota.DoesNotExist, uq] + quota.update_quota(mock_node, 8, 1, add=True) + assert uq.used == 28 + uq.save.assert_called_once() + +@mock.patch('website.util.quota.check_select_for_update', return_value=False) +@mock.patch('website.util.quota.transaction.atomic') +@mock.patch('website.util.quota.UserQuota.objects.create', side_effect=IntegrityError) +@mock.patch('website.util.quota.UserQuota.objects.get') +@mock.patch('website.util.quota.UserQuota.objects.filter') +@pytest.mark.django_db +def test_update_quota_integrityerror_get_subtract( + mock_filter, mock_get, mock_create, mock_atomic, mock_check, mock_node +): + uq = mock.Mock() + uq.used = 30 + mock_get.side_effect = [quota.UserQuota.DoesNotExist, uq] + quota.update_quota(mock_node, 12, 1, add=False) + assert uq.used == 30 + uq.save.assert_not_called() diff --git a/website/static/js/fangorn.js b/website/static/js/fangorn.js index 37f5a8c65ca..be7042faf14 100644 --- a/website/static/js/fangorn.js +++ b/website/static/js/fangorn.js @@ -89,6 +89,10 @@ var OPERATIONS = { } }; +var MESSAGE_MAP = { + 'quota_exceeded': gettext('You do not have enough available quota.'), +}; + var isInUploadFolderProcess = false; var isOngoingUploadFolder = false; @@ -583,6 +587,25 @@ function checkConflictsRename(tb, item, name, cb) { cb('replace'); } +/** + * Recursively finds all individual files that exceed maxSizeBytes. + * Returns an array of offending item objects. + */ +function _findOversizedFiles(item, maxSizeBytes) { + var oversized = []; + if (item.kind === 'file') { + var size = item.data.size || 0; + if (size > 0 && size > maxSizeBytes) { + oversized.push(item); + } + } else if (item.kind === 'folder' && item.children && item.children.length > 0) { + for (var i = 0; i < item.children.length; i++) { + oversized = oversized.concat(_findOversizedFiles(item.children[i], maxSizeBytes)); + } + } + return oversized; +} + function doItemOp(operation, to, from, rename, conflict) { var tb = this; // dismiss old modal immediately to prevent button mashing @@ -626,6 +649,75 @@ function doItemOp(operation, to, from, rename, conflict) { return; } + if (operation !== OPERATIONS.RENAME) { + var destMaxSize = to.data && to.data.accept && to.data.accept.maxSize; + if (destMaxSize) { + var maxSizeBytes = destMaxSize * 1000000; + var oversizedFiles = _findOversizedFiles(from, maxSizeBytes); + if (oversizedFiles.length > 0) { + var maxSizeDisplay = $osf.humanFileSize(maxSizeBytes, true); + oversizedFiles.forEach(function(oversized) { + var displaySize = $osf.humanFileSize(oversized.data.size, true); + $osf.growl(sprintf( + gettext('File「%1$s」is too large (%2$s). Max file size is %3$s.'), + oversized.data.name, displaySize, maxSizeDisplay + )); + }); + from.inProgress = false; + return; + } + } + + var destProvider = to.data.provider; + if (destProvider === 'osfstorage') { + // Sum size of all files in the item being moved/copied + var totalMoveSize = 0; + if (from.kind === 'file') { + totalMoveSize = from.data.size || 0; + } else { + getAllChildren(from).forEach(function(child) { + if (child.kind === 'file') { + totalMoveSize += child.data.size || 0; + } + }); + } + + if (totalMoveSize > 0) { + var quotaResp = $.ajax({ + async: false, + method: 'GET', + url: to.data.nodeApiUrl + 'get_creator_quota/' + }); + if (quotaResp.responseJSON) { + var quotaData = quotaResp.responseJSON; + var quotaMsgText = ''; + + if (quotaData.used + totalMoveSize > quotaData.max) { + if (from.kind === 'file') { + quotaMsgText = sprintf(gettext('Not enough quota to move/copy the file.')); + } else { + var folderSize = $osf.humanFileSize(totalMoveSize, true); + quotaMsgText = sprintf(gettext('Not enough quota to move/copy. The total size of the folder %1$s.'), folderSize); + } + from.notify.update(quotaMsgText, 'warning', undefined, 3000); + from.inProgress = false; + return; + } + if (quotaData.used + totalMoveSize > quotaData.max * window.contextVars.threshold) { + $osf.growl( + gettext('Quota usage alert'), + sprintf( + gettext('You have used more than %1$s%% of your quota.'), + (window.contextVars.threshold * 100) + ), + 'warning' + ); + } + } + } + } + } + var origFrom = Object.assign({}, from); if (operation === OPERATIONS.COPY) { from = tb.createItem($.extend(true, {status: operation.status}, from.data), to.id); @@ -731,6 +823,41 @@ function doItemOp(operation, to, from, rename, conflict) { from.data.status = undefined; } + if (xhr.status === 413 && xhr.responseJSON && xhr.responseJSON.oversized_files) { + var serverMaxSize = xhr.responseJSON.max_size; + var destMaxSize = to.data && to.data.accept && to.data.accept.maxSize; + var maxSizeDisplay = serverMaxSize ? + $osf.humanFileSize(serverMaxSize, true) : + (destMaxSize ? $osf.humanFileSize(destMaxSize * 1000000, true) : null); + xhr.responseJSON.oversized_files.forEach(function(fileObj) { + var displaySize = $osf.humanFileSize(fileObj.size, true); + var msg = maxSizeDisplay ? + sprintf(gettext('File「%1$s」is too large (%2$s). Max file size is %3$s.'), fileObj.name, displaySize, maxSizeDisplay) : + sprintf(gettext('File「%1$s」is too large (%2$s).'), fileObj.name, displaySize); + $osf.growl(msg); + }); + + if (notRenameOp) { + addFileStatus(tb, from, false, '', '', conflict); + } + orderFolder.call(tb, from.parent()); + tb.pendingReadyFiles = (tb.pendingReadyFiles || []).filter(function (file) { return file.data.id !== from.data.id; }); + from.inProgress = false; + return; + } + if (xhr.status === 406 && xhr.responseJSON && xhr.responseJSON.message) { + var key = xhr.responseJSON.message_key; + var msg = (key && MESSAGE_MAP[key]) ? MESSAGE_MAP[key] : xhr.responseJSON.message; + $osf.growl(msg); + if (notRenameOp) { + addFileStatus(tb, from, false, '', '', conflict); + } + orderFolder.call(tb, from.parent()); + tb.pendingReadyFiles = (tb.pendingReadyFiles || []).filter(function (file) { return file.data.id !== from.data.id; }); + from.inProgress = false; + return; + } + var message; if (xhr.status !== 500 && xhr.responseJSON && (xhr.responseJSON.message || xhr.responseJSON.message_long)) { @@ -3365,6 +3492,7 @@ function _dropLogic(event, items, folder) { if (toMove.conflicts.length > 0) { tb.syncFileMoveCache[folder.data.provider].conflicts = tb.syncFileMoveCache[folder.data.provider].conflicts || []; + tb.syncFileMoveCache[folder.data.provider].ready = tb.syncFileMoveCache[folder.data.provider].ready || []; toMove.conflicts.forEach(function(item) { tb.syncFileMoveCache[folder.data.provider].conflicts.push({'item' : item, 'folder' : folder}); }); diff --git a/website/templates/emails/file_operation_failed.html.mako b/website/templates/emails/file_operation_failed.html.mako index 2c72fed9b16..16adf99fc6e 100644 --- a/website/templates/emails/file_operation_failed.html.mako +++ b/website/templates/emails/file_operation_failed.html.mako @@ -59,8 +59,30 @@ - An error has occurred, and the ${'folder' if source_path.endswith('/') else 'file'} from ${source_node.title} on the GakuNin RDM was not successfully ${'moved' if action == 'move' else 'copied'}. - Please log in and try this action again. If the problem persists, please email ${osf_support_email}. + <% + ei = context.get('error_info') or {} + err_type = ei.get('type', '') + %> + % if err_type == 'quota_exceeded': + An error has occurred, and the ${'folder' if source_path.endswith('/') else 'file'} from ${source_node.title} on the GakuNin RDM was not successfully ${'moved' if action == 'move' else 'copied'} because you do not have enough available storage quota. + Please free up storage space or contact your administrator to increase your quota, then try again. If the problem persists, please email ${osf_support_email}. + % elif err_type == 'oversized': + <% + oversized_files = ei.get('oversized_files', []) + max_size_bytes = ei.get('max_size') or 0 + max_size_mb = max_size_bytes // (1024 * 1024) if max_size_bytes else 0 + %> + An error has occurred, and the ${'folder' if source_path.endswith('/') else 'file'} from ${source_node.title} on the GakuNin RDM was not successfully ${'moved' if action == 'move' else 'copied'} because the following file(s) exceed the maximum allowed file size% if max_size_mb: of ${max_size_mb} MB% endif: +
    + % for f in oversized_files: +
  • ${f.get('name', 'Unknown file')} (${'{:.1f}'.format(f.get('size', 0) / (1024 * 1024))} MB)
  • + % endfor +
+ Please reduce the file size and try again. If the problem persists, please email ${osf_support_email}. + % else: + An error has occurred, and the ${'folder' if source_path.endswith('/') else 'file'} from ${source_node.title} on the GakuNin RDM was not successfully ${'moved' if action == 'move' else 'copied'}. + Please log in and try this action again. If the problem persists, please email ${osf_support_email}. + % endif diff --git a/website/translations/en/LC_MESSAGES/js_messages.po b/website/translations/en/LC_MESSAGES/js_messages.po index 674d3c0fa1d..07195312429 100644 --- a/website/translations/en/LC_MESSAGES/js_messages.po +++ b/website/translations/en/LC_MESSAGES/js_messages.po @@ -9288,3 +9288,15 @@ msgstr "" msgid "The Integrated Admin added ${contributors} as contributor(s) to ${node}" msgstr "" + +msgid "File「%1$s」is too large (%2$s). Max file size is %3$s." +msgstr "" + +msgid "You do not have enough available quota." +msgstr "" + +msgid "Not enough quota to move/copy the file." +msgstr "" + +msgid "Not enough quota to move/copy. The total size of the folder %1$s." +msgstr "" diff --git a/website/translations/ja/LC_MESSAGES/js_messages.po b/website/translations/ja/LC_MESSAGES/js_messages.po index 81dd0b3e5e0..a4e897f9f26 100644 --- a/website/translations/ja/LC_MESSAGES/js_messages.po +++ b/website/translations/ja/LC_MESSAGES/js_messages.po @@ -10841,3 +10841,15 @@ msgstr "" "
  • このアドオンにより、GakuNin RDMプロジェクトは外部サービスに接続されます。このサービスを利用すると、外部サービスの利用規約に拘束されます。GakuNin RDMはサービスおよびその利用について責任を負いません。
  • \n" "
  • このアドオンを利用すると外部サービスにファイルを保存できます。このアドオンに追加したファイルはGakuNin RDM内には保存されません。
  • \n" "\n" + +msgid "File「%1$s」is too large (%2$s). Max file size is %3$s." +msgstr "ファイル「%1$s」は大きすぎます (%2$s)。最大ファイルサイズは %3$s です。" + +msgid "You do not have enough available quota." +msgstr "利用可能なクォータが不足しています。" + +msgid "Not enough quota to move/copy the file." +msgstr "ファイルを移動/コピーするのに十分なクォータがありません。" + +msgid "Not enough quota to move/copy. The total size of the folder %1$s." +msgstr "移動/コピーするための空き容量が足りません。フォルダのサイズは%1$sです。" diff --git a/website/translations/js_messages.pot b/website/translations/js_messages.pot index 7086b672e57..ecaba5d4cda 100644 --- a/website/translations/js_messages.pot +++ b/website/translations/js_messages.pot @@ -9241,3 +9241,15 @@ msgstr "" msgid "The Integrated Admin added ${contributors} as contributor(s) to ${node}" msgstr "" + +msgid "File「%1$s」 is too large (%2$s). Max file size is %3$s." +msgstr "" + +msgid "You do not have enough available quota." +msgstr "" + +msgid "Not enough quota to move/copy the file." +msgstr "" + +msgid "Not enough quota to move/copy. The total size of the folder %1$s." +msgstr "" diff --git a/website/util/quota.py b/website/util/quota.py index 1990073a444..31688511674 100644 --- a/website/util/quota.py +++ b/website/util/quota.py @@ -142,6 +142,17 @@ def get_project_storage_type(node): @file_signals.file_updated.connect def update_used_quota(self, target, user, event_type, payload): + # FILE_MOVED and FILE_COPIED carry 'source'/'destination', not 'metadata'. + # Handle them first before the metadata_provider guard below. + if event_type in (FileLog.FILE_MOVED, FileLog.FILE_COPIED): + dest_provider = payload.get('destination', {}).get('provider') + if dest_provider == 'osfstorage': + return # Hooks already handled (create_child), SKIP double counting + src_provider = payload.get('source', {}).get('provider') + if src_provider == 'osfstorage' or src_provider in PROVIDERS: + _handle_move_copy(event_type, target, user, payload) + return + data = dict(payload.get('metadata')) if payload.get('metadata') else None metadata_provider = data.get('provider') if payload.get('metadata') else None if metadata_provider == 'osfstorage' or metadata_provider in PROVIDERS: @@ -180,6 +191,9 @@ def update_used_quota(self, target, user, event_type, payload): storage_type = get_project_storage_type(target) if event_type == FileLog.FILE_ADDED: + metadata_provider = (payload.get('metadata') or {}).get('provider') + if metadata_provider == 'osfstorage': + return file_added(target, payload, file_node, storage_type) elif event_type == FileLog.FILE_REMOVED: if metadata_provider in PROVIDERS: @@ -223,76 +237,31 @@ def file_added(target, payload, file_node, storage_type): file_size = int(payload['metadata']['size']) if file_size < 0: return - try: - if check_select_for_update(): - user_quota = UserQuota.objects.filter( - user=target.creator, - storage_type=storage_type - ).select_for_update().get() - else: - user_quota = UserQuota.objects.get( - user=target.creator, - storage_type=storage_type - ) - user_quota.used += file_size - user_quota.save() - except UserQuota.DoesNotExist: + update_quota(target, file_size, storage_type, add=True) + FileInfo.objects.create(file=file_node, file_size=file_size) + +def node_removed(target, user, payload, file_node, storage_type): + if 'osf.trashed' not in file_node.type: + logging.error('FileNode is not trashed, cannot update used quota!') + return + + total_removed = 0 + for removed_file in get_node_file_list(file_node): try: - with transaction.atomic(): - UserQuota.objects.create( - user=target.creator, - storage_type=storage_type, - max_quota=api_settings.DEFAULT_MAX_QUOTA, - used=file_size - ) - except IntegrityError: if check_select_for_update(): - user_quota = UserQuota.objects.filter( - user=target.creator, - storage_type=storage_type - ).select_for_update().get() + file_info = FileInfo.objects.filter(file=removed_file).select_for_update().get() else: - user_quota = UserQuota.objects.get( - user=target.creator, - storage_type=storage_type - ) - user_quota.used += file_size - user_quota.save() + file_info = FileInfo.objects.get(file=removed_file) + except FileInfo.DoesNotExist: + logging.error('FileInfo not found, cannot update used quota!') + continue - FileInfo.objects.create(file=file_node, file_size=file_size) + total_removed += file_info.file_size + file_info.file_size = 0 + file_info.save() -def node_removed(target, user, payload, file_node, storage_type): - if check_select_for_update(): - user_quota = UserQuota.objects.filter( - user=target.creator, - storage_type=storage_type - ).select_for_update().first() - else: - user_quota = UserQuota.objects.filter( - user=target.creator, - storage_type=storage_type - ).first() - if user_quota is not None: - if 'osf.trashed' not in file_node.type: - logging.error('FileNode is not trashed, cannot update used quota!') - return - - for removed_file in get_node_file_list(file_node): - try: - if check_select_for_update(): - file_info = FileInfo.objects.filter(file=removed_file).select_for_update().get() - else: - file_info = FileInfo.objects.get(file=removed_file) - except FileInfo.DoesNotExist: - logging.error('FileInfo not found, cannot update used quota!') - continue - - user_quota.used -= file_info.file_size - if user_quota.used < 0: - user_quota.used = 0 - file_info.file_size = 0 - file_info.save() - user_quota.save() + if total_removed > 0: + update_quota(target, total_removed, storage_type, add=False) def file_modified(target, user, payload, file_node, storage_type): file_size = int(payload['metadata']['size']) @@ -390,3 +359,116 @@ def get_node_file_list(file_node): file_list.append(child_file_node) return file_list + +def _handle_move_copy(event_type, target, user, payload): + """Resolve the destination file_node and dispatch to file_moved or file_copied. + + For FILE_MOVED / FILE_COPIED events the relevant file information lives in + ``payload['destination']``, not in ``payload['metadata']``. Only extended-storage + providers (PROVIDERS) destinations are processed here. + + Scenarios handled: + - osfstorage → extended storage (quota decreases for source node) + - same provider moves / copies (if it involves extended storage) + + (Note: Moves TO osfstorage are handled entirely by OSF hooks, bypassing this flow). + + :param str event_type: FileLog.FILE_MOVED or FileLog.FILE_COPIED + :param AbstractNode target: destination project node + :param OSFUser user: acting user + :param dict payload: full webhook payload + """ + + if event_type == FileLog.FILE_MOVED: + file_moved(target, payload) + +def file_moved(target, payload): + """Update per-user-per-storage used quota when moving file + + :param Object target: Id of project + :param dict payload: The payload of the move operation + """ + if isinstance(target, AbstractNode): + file_size = -1 + dest_size = payload['destination'].get('size', None) + children = payload['destination'].get('children', None) + # Move file + if dest_size is not None: + file_size = int(dest_size) + # Move folder + elif children is not None: + file_size = get_file_size(payload['destination']) + if file_size < 0: + return + + source_node_id = payload['source']['nid'] + source_node = AbstractNode.objects.get(guids___id=source_node_id) + # Move from bulk-mount + if payload['source']['provider'] == 'osfstorage' \ + and source_node.type != 'osf.quickfilesnode': + source_storage_type = get_project_storage_type(source_node) + update_quota(source_node, file_size, source_storage_type, add=False) + +def update_quota(target, file_size, storage_type, add=True): + """Add or subtract file_size from UserQuota.used for target.creator. + + This is the single source of truth for UserQuota updates. + Both ``file_added`` and ``node_removed`` delegate to this function. + + :param target: AbstractNode whose creator's quota is updated + :param int file_size: size in bytes to add or subtract + :param int storage_type: UserQuota.NII_STORAGE or CUSTOM_STORAGE + :param bool add: True to add (upload), False to subtract (delete) + """ + try: + if check_select_for_update(): + user_quota = UserQuota.objects.filter( + user=target.creator, + storage_type=storage_type + ).select_for_update().get() + else: + user_quota = UserQuota.objects.get( + user=target.creator, + storage_type=storage_type + ) + if add: + user_quota.used += file_size + else: + user_quota.used -= file_size + if user_quota.used < 0: + user_quota.used = 0 + user_quota.save() + except UserQuota.DoesNotExist: + if not add: + return + try: + with transaction.atomic(): + UserQuota.objects.create( + user=target.creator, + storage_type=storage_type, + max_quota=api_settings.DEFAULT_MAX_QUOTA, + used=file_size + ) + except IntegrityError: + if check_select_for_update(): + user_quota = UserQuota.objects.filter( + user=target.creator, + storage_type=storage_type + ).select_for_update().get() + else: + user_quota = UserQuota.objects.get( + user=target.creator, + storage_type=storage_type + ) + user_quota.used += file_size + user_quota.save() + +def get_file_size(children): + children = children.get('children', []) + size = 0 + for child in children: + if child['kind'] == 'file': + size += int(child['size']) + else: + size += get_file_size(child) + return size