-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathpcd_create_records_batch.py
More file actions
120 lines (88 loc) · 3.53 KB
/
pcd_create_records_batch.py
File metadata and controls
120 lines (88 loc) · 3.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import time
import uuid
import json
import pandas as pd
from requests import Request
from utils.session_manager import SessionManager
from utils.dynamics_config import get_config
# Imports using the batch function which is faster for big imports
# https://learn.microsoft.com/en-us/power-apps/developer/data-platform/webapi/execute-batch-operations-using-web-api
# Known issue: the last record in the batch will fail to import. I am still determining the cause of this.
# When importing a lookup column:
# Use the logical name of the RELATIONSHIP and "@odatabind" as the column name
# Wrap each GUID in the logical name of the entity it references for example: accounts(GUID)
# Parameters
PathToDotEnv = "env.example"
EntityBeingAddedTo = "contacts"
PathToCSVOfRecords = "data\pcd_create_records.csv"
BatchSize = 500 # can be up to 1000 requests per batch
# The Pandas data types of the columns imported to avoid import issues
dtypes = {
"parentcustomerid_account@odata.bind": "object",
"lastname": "object",
"firstname": "object"
}
# Getting authenticated Requests object.
config = get_config(PathToDotEnv)
manager = SessionManager(config)
session = manager.get_authenticated_session()
# choose how you would like to act on errors
session.headers.update({"Prefer" : "odata.continue-on-error"})
# the post uri
batch_uri = f'{config.api_base_url}$batch'
request_uri = f'/api/data/v9.2/{EntityBeingAddedTo}'
base_preamble = """
Content-Type: application/http
Content-Transfer-Encoding: binary
POST """ + request_uri + """ HTTP/1.1
Content-Type: application/json; type=entry
"""
# read the CSV and convert to dataframe
df = pd.read_csv(PathToCSVOfRecords, dtype = dtypes)
first = 0
last = BatchSize - 1
resultdf = df.copy()
resultdf['codes'] = ""
resultdf['messages'] = ""
resultdf['batch'] = ""
timeStart = time.perf_counter()
while first < len(df.index):
boundary = f"batch_{str(uuid.uuid4())}"
session.headers.update({"Content-Type" : f'multipart/mixed; boundary="{boundary}"'})
boundary = ("--"+boundary).encode()
preamble = boundary + base_preamble.encode()
requestdf = df.loc[first:last]
body = "".encode()
records = json.loads(requestdf.to_json(orient = "records"))
for record in records:
body = body + preamble + json.dumps(record).encode()
body = body + "\n".encode() + boundary + "--".encode()
req = Request(
'POST',
batch_uri,
data = body,
headers = session.headers
).prepare()
r = session.send(req)
response = r.content.decode('utf-8')
delimiter = response[0:52]
responses = response.split(delimiter)
codes = []
messages = []
for i in range(1,len(responses)-1):
responses[i] = responses[i].removeprefix("\r\nContent-Type: application/http\r\nContent-Transfer-Encoding: binary\r\n\r\nHTTP/1.1 ")
responses[i] = responses[i].split('\r\n',1)
codes.append(responses[i][0])
messages.append(responses[i][1])
i += 1
resultdf.loc[first:last,'codes'] = codes
resultdf.loc[first:last,'messages'] = messages
resultdf.loc[first:last,'batch'] = boundary.decode()
resultdf.loc[first:last].to_csv(f"output\{boundary.decode()}.csv")
successes = sum(1 for i in codes if i == "204 No Content")
sent = len(requestdf.index)
print(f"Records {first} : {last} sent for import. {sent - successes} failures.")
first = last + 1
last = min(last + BatchSize,len(df.index))
print(f'IMPORTING TOOK: {round(time.perf_counter() - timeStart,0)} SECONDS ')
resultdf.to_csv("output\imported.csv")