diff --git a/compatibility_lib/compatibility_lib/compatibility_checker.py b/compatibility_lib/compatibility_lib/compatibility_checker.py index a228b95e..dd828a76 100644 --- a/compatibility_lib/compatibility_lib/compatibility_checker.py +++ b/compatibility_lib/compatibility_lib/compatibility_checker.py @@ -51,45 +51,85 @@ def check(self, packages, python_version): result = requests.get(SERVER_URL, params=data) content = result.content.decode('utf-8') - return json.loads(content) + return json.loads(content), python_version def filter_packages(self, packages, python_version): - return [pkg for pkg in packages if pkg not in - configs.PKG_PY_VERSION_NOT_SUPPORTED[int(python_version)]] - - @retrying.retry(wait_exponential_multiplier=5000, - wait_exponential_max=20000) + """Filter out the packages not supported by the given py version.""" + filtered_packages = [] + for pkg in packages: + if 'github.com' in pkg: + pkg_name = configs.WHITELIST_URLS[pkg] + else: + pkg_name = pkg + if pkg_name not in configs.PKG_PY_VERSION_NOT_SUPPORTED[ + int(python_version)]: + filtered_packages.append(pkg) + return filtered_packages + + @retrying.retry(wait_random_min=1000, + wait_random_max=2000) def retrying_check(self, args): """Retrying logic for sending requests to checker server.""" packages = args[0] python_version = args[1] return self.check(packages, python_version) - def get_self_compatibility(self, python_version, packages=None): - """Get the self compatibility data for each package.""" + def collect_check_packages( + self, python_version=None, packages=None, pkg_sets=None): + # Generating single packages if packages is None: packages = configs.PKG_LIST - # Remove the package not supported in the python_version - packages = self.filter_packages(packages, python_version) - with concurrent.futures.ThreadPoolExecutor( - max_workers=self.max_workers) as p: - pkg_set_results = p.map( - self.retrying_check, - (([pkg], python_version) for pkg in packages)) - for result in zip(pkg_set_results): - yield result + check_singles = [] + if python_version is None: + for py_ver in ['2', '3']: + # Remove the package not supported in the python_version + packages = self.filter_packages(packages, py_ver) + for pkg in packages: + check_singles.append(([pkg], py_ver)) + else: + packages = self.filter_packages(packages, python_version) + check_singles = [([pkg], python_version) for pkg in packages] - def get_pairwise_compatibility(self, python_version, pkg_sets=None): - """Get pairwise compatibility data for each pair of packages.""" + # Generating pairs if pkg_sets is None: - packages = self.filter_packages(configs.PKG_LIST, python_version) - pkg_sets = itertools.combinations(packages, 2) + pkg_sets = list(itertools.combinations(configs.PKG_LIST, 2)) + + check_pairs = [] + if python_version is None: + for py_ver in ['2', '3']: + filtered_pkgs = [] + for pkgs in pkg_sets: + if list(pkgs) != self.filter_packages(pkgs, + py_ver): + continue + filtered_pkgs.append(pkgs) + for pkg_set in filtered_pkgs: + check_pairs.append((list(pkg_set), py_ver)) + else: + filtered_pkgs = [] + for pkgs in pkg_sets: + if list(pkgs) != self.filter_packages(pkgs, + python_version): + continue + filtered_pkgs.append(pkgs) + check_pairs = [(list(pkg_set), python_version) + for pkg_set in pkg_sets] + + res = tuple(check_singles) + tuple(check_pairs) + return res + + def get_compatibility( + self, python_version=None, packages=None, pkg_sets=None): + """Get the compatibility data for each package and package pairs.""" + check_packages = self.collect_check_packages( + python_version, packages, pkg_sets) + with concurrent.futures.ThreadPoolExecutor( max_workers=self.max_workers) as p: pkg_set_results = p.map( self.retrying_check, - ((list(pkg_set), python_version) for pkg_set in pkg_sets)) + tuple(check_packages)) for result in zip(pkg_set_results): yield result diff --git a/compatibility_lib/compatibility_lib/compatibility_store.py b/compatibility_lib/compatibility_lib/compatibility_store.py index 4dac76cc..f0a0cd57 100644 --- a/compatibility_lib/compatibility_lib/compatibility_store.py +++ b/compatibility_lib/compatibility_lib/compatibility_store.py @@ -24,6 +24,7 @@ from google.cloud import bigquery from google.cloud.bigquery import table +from compatibility_lib import configs from compatibility_lib import package _DATASET_NAME = 'compatibility_checker' @@ -438,8 +439,8 @@ def save_compatibility_statuses( install_name_to_compatibility_result[install_name]) new_version_string = self._get_package_version(cs) - old_version = version.StrictVersion(old_version_string) - new_version = version.StrictVersion(new_version_string) + old_version = version.LooseVersion(old_version_string) + new_version = version.LooseVersion(new_version_string) if new_version > old_version: install_name_to_compatibility_result[install_name] = cs @@ -478,6 +479,8 @@ def _get_package_version(self, result: CompatibilityResult) -> str: raise ValueError('multiple packages found in CompatibilityResult') install_name = result.packages[0].install_name + if 'github.com' in install_name: + install_name = configs.WHITELIST_URLS[install_name] install_name_sanitized = install_name.split('[')[0] for pkg, version_info in result.dependency_info.items(): diff --git a/compatibility_lib/compatibility_lib/configs.py b/compatibility_lib/compatibility_lib/configs.py index d44ff340..bb13cff1 100644 --- a/compatibility_lib/compatibility_lib/configs.py +++ b/compatibility_lib/compatibility_lib/configs.py @@ -171,18 +171,16 @@ def _format_url(repo_name, setuppy_path=''): _format_url('google/apitools'): 'google-apitools', _format_url('GoogleCloudPlatform/gsutil'): 'gsutil', _format_url('census-instrumentation/opencensus-python'): 'opencensus', - _format_url('protocolbuffers/protobuf', 'python'): 'protobuf', _format_url('google/protorpc'): 'protorpc', _format_url('tensorflow/tensorflow', 'tensorflow/tools/pip_package'): 'tensorflow', - _format_url('tensorflow/tensorflow', - 'tensorflow/contrib/tpu/profiler/pip_package'): 'tensorflow', _format_url('GoogleCloudPlatform/cloud-opensource-python', 'compatibility_lib'): 'compatibility-lib', # TODO: The following projects do not use setup.py # googleapis-common-protos # grpc-google-iam-v1 # grpcio + # protobuf # tensorboard - not sure what the build process is # _format_url('tensorflow/tensorboard', 'tensorboard/pip_package'): # 'tensorboard', diff --git a/compatibility_lib/compatibility_lib/get_compatibility_data.py b/compatibility_lib/compatibility_lib/get_compatibility_data.py index 88deaae0..cec16589 100644 --- a/compatibility_lib/compatibility_lib/get_compatibility_data.py +++ b/compatibility_lib/compatibility_lib/get_compatibility_data.py @@ -14,31 +14,34 @@ """Get self and pairwise compatibility data and write to bigquery.""" +import argparse import datetime +import itertools from compatibility_lib import compatibility_checker from compatibility_lib import compatibility_store from compatibility_lib import configs from compatibility_lib import package -checker = compatibility_checker.CompatibilityChecker() +checker = compatibility_checker.CompatibilityChecker(max_workers=800) store = compatibility_store.CompatibilityStore() PY2 = '2' PY3 = '3' -def _result_dict_to_compatibility_result(results, python_version): +def _result_dict_to_compatibility_result(results): res_list = [] for item in results: res_dict = item[0] - check_result = res_dict.get('result') + result_content, python_version = res_dict + check_result = result_content.get('result') packages_list = [package.Package(pkg) - for pkg in res_dict.get('packages')] - details = res_dict.get('description') + for pkg in result_content.get('packages')] + details = result_content.get('description') timestamp = datetime.datetime.now().isoformat() - dependency_info = res_dict.get('dependency_info') + dependency_info = result_content.get('dependency_info') compatibility_result = compatibility_store.CompatibilityResult( packages=packages_list, @@ -53,27 +56,56 @@ def _result_dict_to_compatibility_result(results, python_version): return res_list -def write_to_status_table(): +def get_package_pairs(check_pypi=False, check_github=False): + """Get package pairs for pypi and github head.""" + self_packages = [] + pair_packages = [] + if check_pypi: + # Get pypi packages for single checks + self_packages.extend(configs.PKG_LIST) + # Get pypi packages for pairwise checks + pypi_pairs = list(itertools.combinations(configs.PKG_LIST, 2)) + pair_packages.extend(pypi_pairs) + if check_github: + # Get github head packages for single checks + self_packages.extend(list(configs.WHITELIST_URLS.keys())) + # Get github head packages for pairwise checks + for gh_url in configs.WHITELIST_URLS: + pairs = [] + gh_name = configs.WHITELIST_URLS[gh_url] + for pypi_pkg in configs.PKG_LIST: + if pypi_pkg != gh_name: + pairs.append((gh_url, pypi_pkg)) + pair_packages.extend(pairs) + + return self_packages, pair_packages + + +def write_to_status_table(check_pypi=False, check_github=False): """Get the compatibility status for PyPI versions.""" # Write self compatibility status to BigQuery - self_res_list = [] - packages = configs.PKG_LIST - for py_version in [PY2, PY3]: - results = checker.get_self_compatibility( - python_version=py_version, - packages=packages) - res_list = _result_dict_to_compatibility_result(results, py_version) - self_res_list.extend(res_list) + self_packages, pair_packages = get_package_pairs(check_pypi, check_github) + results = checker.get_compatibility( + packages=self_packages, pkg_sets=pair_packages) + res_list = _result_dict_to_compatibility_result(results) - store.save_compatibility_statuses(self_res_list) - - # Write pairwise compatibility status to BigQuery - for py_version in [PY2, PY3]: - # For PyPI released versions - results = checker.get_pairwise_compatibility(py_version) - res_list = _result_dict_to_compatibility_result(results, py_version) - store.save_compatibility_statuses(res_list) + store.save_compatibility_statuses(res_list) if __name__ == '__main__': - write_to_status_table() + parser = argparse.ArgumentParser(description='Determine what to check.') + parser.add_argument( + '--pypi', + type=bool, + default=False, + help='Check PyPI released packages or not.') + parser.add_argument( + '--github', + type=bool, + default=False, + help='Check GitHub head packages or not.') + args = parser.parse_args() + + check_pypi = args.pypi + check_github = args.github + write_to_status_table(check_pypi, check_github) diff --git a/compatibility_lib/compatibility_lib/test_compatibility_checker.py b/compatibility_lib/compatibility_lib/test_compatibility_checker.py index 64a42a7e..0dcdc240 100644 --- a/compatibility_lib/compatibility_lib/test_compatibility_checker.py +++ b/compatibility_lib/compatibility_lib/test_compatibility_checker.py @@ -54,50 +54,15 @@ def _mock_retrying_check(self, *args): python_version = args[0][1] return (packages, python_version, 'SUCCESS') - def test_get_self_compatibility(self): + def test_get_compatibility(self): checker = compatibility_checker.CompatibilityChecker() - pkg_list = ['pkg1', 'pkg2'] - pkg_py_version_not_supported = { - 2: ['tensorflow', ], - 3: ['apache-beam[gcp]', 'gsutil', ], - } - python_version = 3 - - mock_config = mock.Mock() - mock_config.PKG_LIST = pkg_list - mock_config.PKG_PY_VERSION_NOT_SUPPORTED = pkg_py_version_not_supported - patch_config = mock.patch( - 'compatibility_lib.compatibility_checker.configs', mock_config) - - patch_executor = mock.patch( - 'compatibility_lib.compatibility_checker.concurrent.futures.ThreadPoolExecutor', - FakeExecutor) - patch_retrying_check = mock.patch.object( - compatibility_checker.CompatibilityChecker, - 'retrying_check', - self._mock_retrying_check) - - res = [] - with patch_config, patch_executor, patch_retrying_check: - result = checker.get_self_compatibility(python_version) - - for item in result: - res.append(item) - - self.assertEqual(res, - [((['pkg1'], 3, 'SUCCESS'),), - ((['pkg2'], 3, 'SUCCESS'),)]) - - def test_get_pairwise_compatibility(self): pkg_list = ['pkg1', 'pkg2', 'pkg3'] pkg_py_version_not_supported = { 2: ['tensorflow', ], 3: ['apache-beam[gcp]', 'gsutil', ], } - python_version = 3 - mock_config = mock.Mock() mock_config.PKG_LIST = pkg_list mock_config.PKG_PY_VERSION_NOT_SUPPORTED = pkg_py_version_not_supported @@ -114,16 +79,26 @@ def test_get_pairwise_compatibility(self): res = [] with patch_config, patch_executor, patch_retrying_check: - checker = compatibility_checker.CompatibilityChecker() - result = checker.get_pairwise_compatibility(python_version) + result = checker.get_compatibility() for item in result: res.append(item) - self.assertEqual(res, - [((['pkg1', 'pkg2'], 3, 'SUCCESS'),), - ((['pkg1', 'pkg3'], 3, 'SUCCESS'),), - ((['pkg2', 'pkg3'], 3, 'SUCCESS'),)]) + expected = sorted([ + ((['pkg1'], '2', 'SUCCESS'),), + ((['pkg2'], '2', 'SUCCESS'),), + ((['pkg3'], '2', 'SUCCESS'),), + ((['pkg1'], '3', 'SUCCESS'),), + ((['pkg2'], '3', 'SUCCESS'),), + ((['pkg3'], '3', 'SUCCESS'),), + ((['pkg1', 'pkg2'], '2', 'SUCCESS'),), + ((['pkg1', 'pkg3'], '2', 'SUCCESS'),), + ((['pkg2', 'pkg3'], '2', 'SUCCESS'),), + ((['pkg1', 'pkg2'], '3', 'SUCCESS'),), + ((['pkg1', 'pkg3'], '3', 'SUCCESS'),), + ((['pkg2', 'pkg3'], '3', 'SUCCESS'),)]) + + self.assertEqual(sorted(res), expected) class FakeExecutor(object): diff --git a/compatibility_lib/compatibility_lib/test_get_compatibility_data.py b/compatibility_lib/compatibility_lib/test_get_compatibility_data.py index ce5647c3..f615ca92 100644 --- a/compatibility_lib/compatibility_lib/test_get_compatibility_data.py +++ b/compatibility_lib/compatibility_lib/test_get_compatibility_data.py @@ -38,14 +38,15 @@ class TestGetCompatibilityData(unittest.TestCase): } } results = ( - [ + (( { 'result': 'SUCCESS', 'packages': ['google-api-core'], 'description': None, 'dependency_info': dependency_info, - } - ], + }, + '3', + ),), ) packages = [package.Package('google-api-core')] @@ -53,8 +54,7 @@ class TestGetCompatibilityData(unittest.TestCase): def setUp(self): self.mock_checker = mock.Mock() - self.mock_checker.get_self_compatibility.return_value = self.results - self.mock_checker.get_pairwise_compatibility.return_value = self.results + self.mock_checker.get_compatibility.return_value = self.results self.fake_store = fake_compatibility_store.CompatibilityStore() @@ -72,14 +72,81 @@ def mock_init(): 'compatibility_lib.get_compatibility_data.store', self.fake_store) + def test_get_package_pairs_pypi(self): + mock_config = mock.Mock() + PKG_LIST = ['package1', 'package2', 'package3'] + mock_config.PKG_LIST = PKG_LIST + WHITELIST_URLS = { + 'github.com/pkg1.git': 'package1', + 'github.com/pkg2.git': 'package2', + 'github.com/pkg3.git': 'package3' + } + mock_config.WHITELIST_URLS = WHITELIST_URLS + patch_config = mock.patch( + 'compatibility_lib.get_compatibility_data.configs', + mock_config) + + with patch_config, self.patch_constructor, self.patch_checker, self.patch_store: + from compatibility_lib import get_compatibility_data + + self_packages, pair_packages = get_compatibility_data.get_package_pairs( + check_pypi=True, check_github=False) + + expected_self_packages = sorted(['package1', 'package2', 'package3']) + self.assertEqual(sorted(self_packages), expected_self_packages) + + expected_pair_packages = sorted( + [('package1', 'package2'), + ('package1', 'package3'), + ('package2', 'package3')]) + self.assertEqual( + sorted(pair_packages), + expected_pair_packages) + + def test_get_package_pairs_github(self): + mock_config = mock.Mock() + PKG_LIST = ['package1', 'package2', 'package3'] + mock_config.PKG_LIST = PKG_LIST + WHITELIST_URLS = { + 'github.com/pkg1.git': 'package1', + 'github.com/pkg2.git': 'package2', + 'github.com/pkg3.git': 'package3' + } + mock_config.WHITELIST_URLS = WHITELIST_URLS + patch_config = mock.patch( + 'compatibility_lib.get_compatibility_data.configs', + mock_config) + + with patch_config, self.patch_constructor, self.patch_checker, self.patch_store: + from compatibility_lib import get_compatibility_data + + self_packages, pair_packages = get_compatibility_data.get_package_pairs( + check_pypi=False, check_github=True) + + expected_self_packages = sorted( + ['github.com/pkg1.git', + 'github.com/pkg2.git', + 'github.com/pkg3.git']) + self.assertEqual( + sorted(self_packages), expected_self_packages) + + expected_pair_packages = sorted( + [('github.com/pkg1.git', 'package2'), + ('github.com/pkg1.git', 'package3'), + ('github.com/pkg2.git', 'package1'), + ('github.com/pkg2.git', 'package3'), + ('github.com/pkg3.git', 'package1'), + ('github.com/pkg3.git', 'package2')]) + self.assertEqual( + sorted(pair_packages), expected_pair_packages) + def test__result_dict_to_compatibility_result(self): with self.patch_constructor, self.patch_checker, self.patch_store: from compatibility_lib import compatibility_store from compatibility_lib import get_compatibility_data - python_version = 3 res_list = get_compatibility_data._result_dict_to_compatibility_result( - self.results, python_version) + self.results) self.assertTrue(isinstance( res_list[0], compatibility_store.CompatibilityResult)) @@ -94,12 +161,11 @@ def test_write_to_status_table(self): get_compatibility_data.write_to_status_table() - self.assertTrue(self.mock_checker.get_self_compatibility.called) - self.assertTrue(self.mock_checker.get_pairwise_compatibility.called) + self.assertTrue(self.mock_checker.get_compatibility.called) saved_results = self.fake_store._packages_to_compatibility_result.get( frozenset({self.packages[0]})) self.assertIsNotNone(saved_results) - self.assertEqual(len(saved_results), 4) + self.assertEqual(len(saved_results), 1) saved_item = saved_results[0] self.assertEqual(saved_item.packages, self.packages) self.assertEqual(saved_item.dependency_info, self.dependency_info)