From 130fae80f7f9736848979d947fbac18d1ccb89ad Mon Sep 17 00:00:00 2001 From: Nevsksar <63605974+Nevsksar@users.noreply.github.com> Date: Wed, 27 Sep 2023 13:07:43 -0300 Subject: [PATCH 1/3] Feature to add a filtering options on the API call --- tap_clickup/streams.py | 64 ++++++++++++++++++++++++++++++++++++++---- tap_clickup/tap.py | 10 +++++++ 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/tap_clickup/streams.py b/tap_clickup/streams.py index d4abd88..ad94b3e 100644 --- a/tap_clickup/streams.py +++ b/tap_clickup/streams.py @@ -4,6 +4,50 @@ import requests from singer_sdk.helpers.jsonpath import extract_jsonpath from tap_clickup.client import ClickUpStream +import yaml #added to read information specifying spaces and workspaces + +# Load the YAML config file from meltano +with open("meltano.yml", "r") as yaml_file: + cu_config = yaml.safe_load(yaml_file) + +#funcions to read the extra configuration data from the YAML file +#read and split lists to create list +def extract_and_convert_list(config, key): + value = config.get(key, "") + if value: + split_values = value.split(',') + return [int(item) for item in split_values] + return [] + +#specify which setting from which tap to read the config.info from ISSUE HERE the yaml read is hardcoded so breaks when i have multiple "tap-clickup" one for each env that i want to switch between +def find_tap_clickup_config(plugins): + for plugin in plugins: + if plugin.get("name") == "tap-clickup": + return plugin.get("config", {}) + return {} + +# Find the tap-clickup configuration +tap_clickup_config = find_tap_clickup_config(cu_config["plugins"]["extractors"]) + +# Extract and convert workspace ID +cu_workspace = tap_clickup_config.get("workspace_id") + +# Extract and convert spaces and lists into lists +spaces_id_list = extract_and_convert_list(tap_clickup_config, "spaces_id") +lists_id_list = extract_and_convert_list(tap_clickup_config, "list_ids") + +workteamid = cu_workspace # store workspace id E.g:30979640 +space_ids = spaces_id_list # store list of spaces to be fetched E.g: [90100432266,90100432289,90100437857] +list_ids = lists_id_list #store lists of lists to be fetched E.g: [900101856869,901002404954] + +# Check if there's only one value in the list, this is necessary because of a bug on click up's API. +if len(list_ids) == 1: + # Duplicate the value to ensure at least two parameters due to a bug on clickups API (the values returned by the API are NOT duplicated) + list_ids.append(list_ids[0]) +if len(space_ids) == 1: + # Duplicate the value to ensure at least two parameters due to a bug on clickups API (the values returned by the API are NOT duplicated) + space_ids.append(space_ids[0]) + SCHEMAS_DIR = Path(__file__).parent / Path("./schemas") @@ -16,7 +60,9 @@ class TeamsStream(ClickUpStream): primary_keys = ["id"] replication_key = None schema_filepath = SCHEMAS_DIR / "team.json" - records_jsonpath = "$.teams[*]" + # Necessary because if you have access to multiple workteams, the responses are replicated N times where N = # of worspaces you have access to + records_jsonpath = f"$.teams[?(@.id == {workteamid})]" + def get_child_context(self, record: dict, context: Optional[dict]) -> dict: """Return a context dictionary for child streams.""" @@ -25,6 +71,7 @@ def get_child_context(self, record: dict, context: Optional[dict]) -> dict: } + class TimeEntries(ClickUpStream): """Time Entries""" @@ -220,7 +267,6 @@ class TasksStream(ClickUpStream): name = "task" # Date_updated_gt is greater than or equal to not just greater than - path = "/team/{team_id}/task" primary_keys = ["id"] replication_key = "date_updated" is_sorted = True @@ -228,12 +274,19 @@ class TasksStream(ClickUpStream): schema_filepath = SCHEMAS_DIR / "task.json" records_jsonpath = "$.tasks[*]" parent_stream_type = TeamsStream - # Need this stub as a hack on _sync to force it to use Partitions # Since this is a child stream we want each team_id to create a request for # archived:true and archived:false. And we want state to track properly partitions = [] - + basepath = "/team/{team_id}/task" + space_ids_param = "&space_ids=" + "&space_ids=".join(map(str, space_ids)) if space_ids else "" #Dinamically create the spaces path to be passed on API call + list_ids_param = "?list_ids=" + "&list_ids=".join(map(str, list_ids)) if list_ids else "" #Dinamically create the spaces path to be passed on API call + # Necessary because if you pass list and space, lists MUST go first in the parameter string + if list_ids_param: + path = f"{basepath}{list_ids_param}{space_ids_param}" + else: + path = f"{basepath}?{space_ids_param}" + @property def base_partition(self): return [{"archived": "true"}, {"archived": "false"}] @@ -262,7 +315,6 @@ def get_next_page_token( for _ in extract_jsonpath(self.records_jsonpath, input=response.json()): recordcount = recordcount + 1 - # I wonder if a better approach is to just check for 0 records and stop # For now I'll follow the docs verbatium # From the api docs, https://clickup.com/api. @@ -273,4 +325,4 @@ def get_next_page_token( else: newtoken = None - return newtoken + return newtoken \ No newline at end of file diff --git a/tap_clickup/tap.py b/tap_clickup/tap.py index 1c371b3..7f89a99 100644 --- a/tap_clickup/tap.py +++ b/tap_clickup/tap.py @@ -47,6 +47,15 @@ class TapClickUp(Tap): th.Property( "api_token", th.StringType, required=True, description="Example: 'pk_12345" ), + th.Property( + "workspace_id", th.IntegerType, required=False, description="Example: '20214542" # fetches the data for workspace_id + ), + th.Property( + "spaces_id", th.StringType, required=False, description="Example: '[45215477,4547547]" # fetches the data for workspace_id + ), + th.Property( + "list_ids", th.StringType, required=False, description="Example: '[454455478,784552187]" # fetches the data for workspace_id + ), # Removing "official" start_date support re https://github.com/AutoIDM/tap-clickup/issues/118 # th.Property( # "start_date", @@ -64,3 +73,4 @@ class TapClickUp(Tap): def discover_streams(self) -> List[Stream]: """Return a list of discovered streams.""" return [stream_class(tap=self) for stream_class in STREAM_TYPES] + From 7caa0c02e233f85b4773becd55f51882d04f1b5c Mon Sep 17 00:00:00 2001 From: Hiaggo Bezerra Date: Fri, 13 Oct 2023 19:22:19 -0300 Subject: [PATCH 2/3] Addresses the requests to merge the PR #145 --- tap_clickup/streams.py | 32 +++++++++++++++++++++++++++++++- tap_clickup/tap.py | 6 +++--- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/tap_clickup/streams.py b/tap_clickup/streams.py index ad94b3e..3be48a4 100644 --- a/tap_clickup/streams.py +++ b/tap_clickup/streams.py @@ -1,6 +1,6 @@ """Stream type classes for tap-clickup.""" from pathlib import Path -from typing import Optional, Any, Dict +from typing import Optional, Any, Dict, Iterable import requests from singer_sdk.helpers.jsonpath import extract_jsonpath from tap_clickup.client import ClickUpStream @@ -70,6 +70,15 @@ def get_child_context(self, record: dict, context: Optional[dict]) -> dict: "team_id": record["id"], } + def get_records(self, context: Optional[dict]) -> Iterable[dict]: + """Return a generator of row-type dictionary objects.""" + # If workspace_ids is empty, null, or nonexistant, default to using the API to + # determine all workspaces/teams. + if "workspace_ids" in self.config and self.config.get("workspace_ids"): + return [{"id": id} for id in self.config.get("workspace_ids")] + else: + return super().get_records(context=context) + class TimeEntries(ClickUpStream): @@ -302,6 +311,27 @@ def get_url_params( params["order_by"] = "updated" params["reverse"] = "true" params["date_updated_gt"] = self.get_starting_replication_key_value(context) + + # If list_ids is empty, null, or nonexistant, default to using the API to + if "list_ids" in self.config and self.config.get("list_ids"): + params["list_ids"] = [item for item in self.config.get("list_ids")] + + # If space_ids is empty, null, or nonexistant, default to using the API to + if "space_ids" in self.config and self.config.get("space_ids"): + params["space_ids"] = [item for item in self.config.get("space_ids")] + + if "list_ids" in params and len(params["list_ids"]) == 1: + # To work around the ClickUp API bug that returns an error message stating + # "List ids must be an array" (ECODE: OAUTH_042), we should duplicate the list_id + # when there is only one, as the API requires an array format for list IDs. + params["list_ids"].append(params["list_ids"][0]) + + if "space_ids" in params and len(params["space_ids"]) == 1: + # To work around the ClickUp API bug that returns an error message stating + # "Space ids must be an array" (ECODE: OAUTH_042), we should duplicate the space_id + # when there is only one, as the API requires an array format for space IDs. + params["space_ids"].append(params["space_ids"][0]) + return params def get_next_page_token( diff --git a/tap_clickup/tap.py b/tap_clickup/tap.py index 7f89a99..fb0a9a8 100644 --- a/tap_clickup/tap.py +++ b/tap_clickup/tap.py @@ -48,13 +48,13 @@ class TapClickUp(Tap): "api_token", th.StringType, required=True, description="Example: 'pk_12345" ), th.Property( - "workspace_id", th.IntegerType, required=False, description="Example: '20214542" # fetches the data for workspace_id + "workspace_ids", th.ArrayType(th.IntegerType), required=False, description="Example: '[20214542]'" # fetches the data for workspace_id ), th.Property( - "spaces_id", th.StringType, required=False, description="Example: '[45215477,4547547]" # fetches the data for workspace_id + "space_ids", th.ArrayType(th.IntegerType), required=False, description="Example: '[45215477, 4547547]'" # fetches the data for workspace_id ), th.Property( - "list_ids", th.StringType, required=False, description="Example: '[454455478,784552187]" # fetches the data for workspace_id + "list_ids", th.ArrayType(th.IntegerType), required=False, description="Example: '[454455478, 784552187]'" # fetches the data for workspace_id ), # Removing "official" start_date support re https://github.com/AutoIDM/tap-clickup/issues/118 # th.Property( From 8af093cbbe7a73e6e07c44921c25cc94288021b1 Mon Sep 17 00:00:00 2001 From: Hiaggo Bezerra Date: Fri, 13 Oct 2023 19:29:20 -0300 Subject: [PATCH 3/3] Fixes discrepancies between branches that were not present in the previous commit. --- tap_clickup/streams.py | 64 ++++-------------------------------------- tap_clickup/tap.py | 1 - 2 files changed, 6 insertions(+), 59 deletions(-) diff --git a/tap_clickup/streams.py b/tap_clickup/streams.py index 3be48a4..7577a5f 100644 --- a/tap_clickup/streams.py +++ b/tap_clickup/streams.py @@ -4,50 +4,6 @@ import requests from singer_sdk.helpers.jsonpath import extract_jsonpath from tap_clickup.client import ClickUpStream -import yaml #added to read information specifying spaces and workspaces - -# Load the YAML config file from meltano -with open("meltano.yml", "r") as yaml_file: - cu_config = yaml.safe_load(yaml_file) - -#funcions to read the extra configuration data from the YAML file -#read and split lists to create list -def extract_and_convert_list(config, key): - value = config.get(key, "") - if value: - split_values = value.split(',') - return [int(item) for item in split_values] - return [] - -#specify which setting from which tap to read the config.info from ISSUE HERE the yaml read is hardcoded so breaks when i have multiple "tap-clickup" one for each env that i want to switch between -def find_tap_clickup_config(plugins): - for plugin in plugins: - if plugin.get("name") == "tap-clickup": - return plugin.get("config", {}) - return {} - -# Find the tap-clickup configuration -tap_clickup_config = find_tap_clickup_config(cu_config["plugins"]["extractors"]) - -# Extract and convert workspace ID -cu_workspace = tap_clickup_config.get("workspace_id") - -# Extract and convert spaces and lists into lists -spaces_id_list = extract_and_convert_list(tap_clickup_config, "spaces_id") -lists_id_list = extract_and_convert_list(tap_clickup_config, "list_ids") - -workteamid = cu_workspace # store workspace id E.g:30979640 -space_ids = spaces_id_list # store list of spaces to be fetched E.g: [90100432266,90100432289,90100437857] -list_ids = lists_id_list #store lists of lists to be fetched E.g: [900101856869,901002404954] - -# Check if there's only one value in the list, this is necessary because of a bug on click up's API. -if len(list_ids) == 1: - # Duplicate the value to ensure at least two parameters due to a bug on clickups API (the values returned by the API are NOT duplicated) - list_ids.append(list_ids[0]) -if len(space_ids) == 1: - # Duplicate the value to ensure at least two parameters due to a bug on clickups API (the values returned by the API are NOT duplicated) - space_ids.append(space_ids[0]) - SCHEMAS_DIR = Path(__file__).parent / Path("./schemas") @@ -60,9 +16,7 @@ class TeamsStream(ClickUpStream): primary_keys = ["id"] replication_key = None schema_filepath = SCHEMAS_DIR / "team.json" - # Necessary because if you have access to multiple workteams, the responses are replicated N times where N = # of worspaces you have access to - records_jsonpath = f"$.teams[?(@.id == {workteamid})]" - + records_jsonpath = "$.teams[*]" def get_child_context(self, record: dict, context: Optional[dict]) -> dict: """Return a context dictionary for child streams.""" @@ -80,7 +34,6 @@ def get_records(self, context: Optional[dict]) -> Iterable[dict]: return super().get_records(context=context) - class TimeEntries(ClickUpStream): """Time Entries""" @@ -276,6 +229,7 @@ class TasksStream(ClickUpStream): name = "task" # Date_updated_gt is greater than or equal to not just greater than + path = "/team/{team_id}/task" primary_keys = ["id"] replication_key = "date_updated" is_sorted = True @@ -283,19 +237,12 @@ class TasksStream(ClickUpStream): schema_filepath = SCHEMAS_DIR / "task.json" records_jsonpath = "$.tasks[*]" parent_stream_type = TeamsStream + # Need this stub as a hack on _sync to force it to use Partitions # Since this is a child stream we want each team_id to create a request for # archived:true and archived:false. And we want state to track properly partitions = [] - basepath = "/team/{team_id}/task" - space_ids_param = "&space_ids=" + "&space_ids=".join(map(str, space_ids)) if space_ids else "" #Dinamically create the spaces path to be passed on API call - list_ids_param = "?list_ids=" + "&list_ids=".join(map(str, list_ids)) if list_ids else "" #Dinamically create the spaces path to be passed on API call - # Necessary because if you pass list and space, lists MUST go first in the parameter string - if list_ids_param: - path = f"{basepath}{list_ids_param}{space_ids_param}" - else: - path = f"{basepath}?{space_ids_param}" - + @property def base_partition(self): return [{"archived": "true"}, {"archived": "false"}] @@ -345,6 +292,7 @@ def get_next_page_token( for _ in extract_jsonpath(self.records_jsonpath, input=response.json()): recordcount = recordcount + 1 + # I wonder if a better approach is to just check for 0 records and stop # For now I'll follow the docs verbatium # From the api docs, https://clickup.com/api. @@ -355,4 +303,4 @@ def get_next_page_token( else: newtoken = None - return newtoken \ No newline at end of file + return newtoken diff --git a/tap_clickup/tap.py b/tap_clickup/tap.py index fb0a9a8..9fd49a2 100644 --- a/tap_clickup/tap.py +++ b/tap_clickup/tap.py @@ -73,4 +73,3 @@ class TapClickUp(Tap): def discover_streams(self) -> List[Stream]: """Return a list of discovered streams.""" return [stream_class(tap=self) for stream_class in STREAM_TYPES] -