diff --git a/requirements.txt b/requirements.txt index d179db0..59215f3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,12 +5,12 @@ google-auth-httplib2>=0.0.3 google_api_python_client>=1.6.7 google_auth_oauthlib>=0.2.0 google-cloud-bigquery>=1.24.0 -googleads>=42.0.0 +googleads>=49.0.0 isort==4.3.9 lxml>=4.6.5 mysql-connector-python==9.1.0 numpy>=1.16.2 -pandas>=0.24.0 +pandas>=2.0.0 pyarrow>=0.11.1 qds_sdk>=1.10.1 requests>=2.20 diff --git a/sroka/api/athena/athena_api.py b/sroka/api/athena/athena_api.py index 20ab687..17824b5 100644 --- a/sroka/api/athena/athena_api.py +++ b/sroka/api/athena/athena_api.py @@ -5,8 +5,9 @@ from botocore.exceptions import ClientError, EndpointConnectionError import sroka.config.config as config -from sroka.api.athena.athena_api_helpers import poll_status, download_file, return_on_exception, \ - input_check +from sroka.api.athena.athena_api_helpers import (download_file, input_check, + poll_status, + return_on_exception) def query_athena(query, filename=None): diff --git a/sroka/api/athena/athena_api_helpers.py b/sroka/api/athena/athena_api_helpers.py index 138506a..d9cd454 100644 --- a/sroka/api/athena/athena_api_helpers.py +++ b/sroka/api/athena/athena_api_helpers.py @@ -5,7 +5,7 @@ def input_check(input_to_check, expected_types): for expected_type in expected_types: - if type(input_to_check) == expected_type: + if isinstance(input_to_check, expected_type): if expected_type == str and len(input_to_check) == 0: print('Function input must be a nonempty string.') return False diff --git a/sroka/api/google_ad_manager/gam_api.py b/sroka/api/google_ad_manager/gam_api.py index 886cb46..65af0e1 100644 --- a/sroka/api/google_ad_manager/gam_api.py +++ b/sroka/api/google_ad_manager/gam_api.py @@ -22,44 +22,44 @@ def dict_type_checker(dict_argument, argument_name, mandatory=True): if mandatory: - if type(dict_argument) != dict: + if not isinstance(dict_argument, dict): print(f"""{argument_name} needs to be a dict""") return "Incorrect type" else: - if dict_argument and type(dict_argument) != dict: + if dict_argument and not isinstance(dict_argument, dict): print(f"""{argument_name} needs to be a dict""") return "Incorrect type" def int_type_checker(int_argument, argument_name, mandatory=True): if mandatory: - if type(int_argument) != int: + if not isinstance(int_argument, int): print(f"""{argument_name} needs to be an integer""") return "Incorrect type" else: - if int_argument and type(int_argument) != int: + if int_argument and not isinstance(int_argument, int): print(f"""{argument_name} needs to be an integer""") return "Incorrect type" def list_type_checker(list_argument, argument_name, mandatory=True): if mandatory: - if type(list_argument) != list: + if not isinstance(list_argument, list): print(f"""{argument_name} needs to be a list""") return "Incorrect type" else: - if list_argument and type(list_argument) != list: + if list_argument and not isinstance(list_argument, list): print(f"""{argument_name} needs to be a list""") return "Incorrect type" def str_type_checker(str_argument, argument_name, mandatory=True): if mandatory: - if type(str_argument) != str: + if not isinstance(str_argument, str): print(f"""{argument_name} needs to be a string""") return "Incorrect type" else: - if str_argument and type(str_argument) != str: + if str_argument and not isinstance(str_argument, str): print(f"""{argument_name} needs to be a string""") return "Incorrect type" @@ -194,7 +194,7 @@ def get_users_from_admanager(query, dimensions, network_code=None): print('Failed to generate user list. Incorrect dimension: {}'.format(e)) return - user_df = user_df.append(dimensions_df, sort=False) + user_df = pd.concat([user_df, dimensions_df], sort=False) statement.offset += statement.limit else: break @@ -250,7 +250,7 @@ def get_companies_from_admanager(query, dimensions, network_code=None): print('Failed to generate company list. Incorrect dimension: {}'.format(e)) return - company_df = company_df.append(dimensions_df, sort=False) + company_df = pd.concat([company_df, dimensions_df], sort=False) statement.offset += statement.limit else: break @@ -404,7 +404,6 @@ def get_service_data_from_admanager( print("No more items found.") break - print( f"Successfully fetched a total of {len(all_items)} '{service}' items.\n" ) diff --git a/sroka/api/google_bigquery/bigquery_api.py b/sroka/api/google_bigquery/bigquery_api.py index 6b0e635..f2d6668 100644 --- a/sroka/api/google_bigquery/bigquery_api.py +++ b/sroka/api/google_bigquery/bigquery_api.py @@ -1,8 +1,8 @@ import pandas as pd +from google.api_core.exceptions import BadRequest, Forbidden, NotFound from google.cloud import bigquery -import sroka.config.config as config -from google.api_core.exceptions import Forbidden, NotFound, BadRequest +import sroka.config.config as config KEY_FILE = config.get_file_path('google_bigquery') @@ -10,11 +10,11 @@ def query_bigquery(input_query, filename=None): if filename: - if type(filename) != str: + if not isinstance(filename, str): print('filename needs to be a string') return None - if type(input_query) != str: + if not isinstance(input_query, str): print('input_query needs to be a string') return None @@ -26,7 +26,7 @@ def query_bigquery(input_query, filename=None): return None else: - if type(input_query) != str: + if not isinstance(input_query, str): print('input_query needs to be a string') return pd.DataFrame([]) @@ -55,11 +55,11 @@ def query_bigquery(input_query, filename=None): def done_bigquery(job_id, filename=None): if filename: - if type(filename) != str: + if not isinstance(filename, str): print('filename needs to be a string') return None - if type(job_id) != str: + if not isinstance(job_id, str): print('input_query needs to be a string') return None @@ -71,7 +71,7 @@ def done_bigquery(job_id, filename=None): return None else: - if type(job_id) != str: + if not isinstance(job_id, str): print('input_query needs to be a string') return pd.DataFrame([]) diff --git a/sroka/api/google_drive/google_drive_api.py b/sroka/api/google_drive/google_drive_api.py index 4394d1d..cc34da3 100644 --- a/sroka/api/google_drive/google_drive_api.py +++ b/sroka/api/google_drive/google_drive_api.py @@ -2,9 +2,10 @@ import numpy as np import pandas as pd - from googleapiclient.errors import HttpError -from sroka.api.google_drive.google_drive_helpers import is_valid_email, service_builder + +from sroka.api.google_drive.google_drive_helpers import (is_valid_email, + service_builder) def google_drive_sheets_read(sheetname_id: str, sheet_range: str, first_row_columns=False): @@ -231,7 +232,7 @@ def google_drive_sheets_delete_tab(spreadsheet_id: str, tab_name: str): Args: spreadsheet_id (str): The ID of the spreadsheet. - tab_name (str): The name of the tab to delete. + tab_name (str): The name of the tab to delete. Returns: str: The ID of the spreadsheet. """ @@ -274,10 +275,10 @@ def google_drive_sheets_delete_tab(spreadsheet_id: str, tab_name: str): def google_drive_get_file_parents(file_id: str): """ Retrieves the parent folder IDs for a specified file on Google Drive. - + Args: file_id (str): The ID of the file whose parent folders are to be retrieved. - + Returns: list: A list of string IDs for the parent folder(s) of the file. Returns an empty list on failure. """ @@ -330,7 +331,7 @@ def google_drive_transfer_ownership(file_id: str, new_owner_email: str): # Create the permission, triggering the ownership transfer # pylint: disable=E1101 - permission = service.permissions().create( + service.permissions().create( fileId=file_id, body=permission_body, transferOwnership=True, @@ -373,7 +374,7 @@ def google_drive_change_file_permission(file_id: str, user_email: str, role: str except ValueError as er: print(f"An incorrect role has been used in the function - {er}") return False - + try: if is_valid_email(user_email) is False: raise ValueError(f'The {user_email} is incorrect.') @@ -390,10 +391,10 @@ def google_drive_change_file_permission(file_id: str, user_email: str, role: str 'role': role.lower(), 'emailAddress': user_email.lower() } - + # Insert the new permission # pylint: disable=E1101 - permission = service.permissions().create( + service.permissions().create( fileId=file_id, body=permission_body, fields='id' diff --git a/sroka/api/google_drive/google_drive_helpers.py b/sroka/api/google_drive/google_drive_helpers.py index 5be1720..d43c18e 100644 --- a/sroka/api/google_drive/google_drive_helpers.py +++ b/sroka/api/google_drive/google_drive_helpers.py @@ -1,6 +1,8 @@ import os import re + from googleapiclient.discovery import build + import sroka.config.config as config @@ -18,6 +20,7 @@ def is_valid_email(email_string: str): return True return False + def service_builder(service_type: int, version: str): """ Creates a service with proper credentials for a selected build from the service_type variable: 'sheets' or 'drive' @@ -38,15 +41,15 @@ def service_builder(service_type: int, version: str): try: valid_service_type = [1, 2] if service_type not in valid_service_type: - raise ValueError('Available services: [1] sheets, [2] drive') + raise ValueError('Available services: [1] sheets, [2] drive') except ValueError as e: print(f"An incorrect role has been used in the function - {e}") return False - + try: valid_version = ['v2', 'v3', 'v4'] if version.lower() not in valid_version: - raise ValueError('Available versions: v2, v3, v4') + raise ValueError('Available versions: v2, v3, v4') except ValueError as er: print(f"An incorrect version has been used in the function - {er}") return False diff --git a/sroka/api/moat/moat_api_helpers.py b/sroka/api/moat/moat_api_helpers.py index b297449..6354d20 100644 --- a/sroka/api/moat/moat_api_helpers.py +++ b/sroka/api/moat/moat_api_helpers.py @@ -14,7 +14,7 @@ def validate_input_dict(moat_dict): print('{} is not defined'.format(column)) return False - if type(moat_dict['columns']) != list or moat_dict['columns'] == []: + if not isinstance(moat_dict['columns'], list) or moat_dict['columns'] == []: print('columns are not defined correctly') return False diff --git a/sroka/api/mysql/mysql.py b/sroka/api/mysql/mysql.py index b9f6360..7355ca7 100644 --- a/sroka/api/mysql/mysql.py +++ b/sroka/api/mysql/mysql.py @@ -1,11 +1,15 @@ import os -import mysql.connector -import pandas as pd from configparser import NoSectionError from pathlib import Path -from mysql.connector.errors import DatabaseError, OperationalError, InternalError + +import mysql.connector +import pandas as pd +from mysql.connector.errors import (DatabaseError, InternalError, + OperationalError) from retrying import retry -from sroka.api.mysql.mysql_helpers import validate_options, get_options_from_config + +from sroka.api.mysql.mysql_helpers import (get_options_from_config, + validate_options) @retry(stop_max_attempt_number=1, diff --git a/sroka/api/mysql/mysql_helpers.py b/sroka/api/mysql/mysql_helpers.py index 9ead623..d189665 100644 --- a/sroka/api/mysql/mysql_helpers.py +++ b/sroka/api/mysql/mysql_helpers.py @@ -1,6 +1,7 @@ -import sroka.config.config as config from configparser import NoOptionError +import sroka.config.config as config + def get_options_from_config(): # Set the options in a dictionary, in order to pass only the diff --git a/sroka/api/neo4j/neo4j_api.py b/sroka/api/neo4j/neo4j_api.py index f3c8bff..5e39be9 100644 --- a/sroka/api/neo4j/neo4j_api.py +++ b/sroka/api/neo4j/neo4j_api.py @@ -1,13 +1,12 @@ import pandas as pd -from py2neo import Graph -from py2neo import ClientError +from py2neo import ClientError, Graph import sroka.config.config as config def neo4j_query_data(cypher, parameters=None, **kwparameters): - if type(cypher) != str: + if not isinstance(cypher, str): print('Cypher query needs to be a string') return pd.DataFrame([]) @@ -15,7 +14,7 @@ def neo4j_query_data(cypher, parameters=None, **kwparameters): print('Cypher query cannot be empty') return pd.DataFrame([]) - if parameters and type(parameters) != dict: + if parameters and not isinstance(parameters, dict): print('Parameters need to be a dictionary') return pd.DataFrame([]) diff --git a/sroka/api/qubole/qubole_api.py b/sroka/api/qubole/qubole_api.py index e6028f7..76db115 100644 --- a/sroka/api/qubole/qubole_api.py +++ b/sroka/api/qubole/qubole_api.py @@ -4,12 +4,8 @@ import pandas as pd from qds_sdk.commands import Command, HiveCommand, PrestoCommand -from qds_sdk.exception import ( - ForbiddenAccess, - ResourceInvalid, - ResourceNotFound, - UnauthorizedAccess, -) +from qds_sdk.exception import (ForbiddenAccess, ResourceInvalid, + ResourceNotFound, UnauthorizedAccess) from qds_sdk.qubole import Qubole import sroka.config.config as config diff --git a/sroka/api/s3_connection/s3_connection_api.py b/sroka/api/s3_connection/s3_connection_api.py index 9e9ec81..13d70d7 100644 --- a/sroka/api/s3_connection/s3_connection_api.py +++ b/sroka/api/s3_connection/s3_connection_api.py @@ -3,8 +3,8 @@ from io import BytesIO, StringIO import boto3 -import pandas as pd import numpy as np +import pandas as pd import pyarrow.parquet as pq from botocore.exceptions import ClientError, ParamValidationError from pandas.errors import EmptyDataError @@ -89,7 +89,7 @@ def s3_download_data(s3_filename, prefix=False, output_file=None, sep=',', skip_ key_prefix = match.group(2) - if type(sep) == str and len(sep) == 1: + if isinstance(sep, str) and len(sep) == 1: data = _download_data(key_prefix, s3, bucket_name, prefix, sep, skip_empty_files, first_row_columns) @@ -111,15 +111,15 @@ def s3_upload_data(data, bucket, path, sep=','): aws_secret_access_key=access_key ) - if type(sep) == str and len(sep) == 1: + if isinstance(sep, str) and len(sep) == 1: csv_buffer = StringIO() - if type(data) == pd.core.frame.DataFrame or type(data) == np.ndarray: + if isinstance(data, pd.core.frame.DataFrame) or isinstance(data, np.ndarray): - if type(data) == pd.core.frame.DataFrame: + if isinstance(data, pd.core.frame.DataFrame): data.to_csv(csv_buffer, sep=sep) - elif type(data) == np.ndarray: + elif isinstance(data, np.ndarray): np.savetxt(csv_buffer, data, delimiter=sep, fmt='%s') s3 = session.resource('s3')