1+ -- Copyright 2022 Google LLC
2+ --
3+ -- Licensed under the Apache License, Version 2.0 (the "License");
4+ -- you may not use this file except in compliance with the License.
5+ -- You may obtain a copy of the License at
6+ --
7+ -- http://www.apache.org/licenses/LICENSE-2.0
8+ --
9+ -- Unless required by applicable law or agreed to in writing, software
10+ -- distributed under the License is distributed on an "AS IS" BASIS,
11+ -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ -- See the License for the specific language governing permissions and
13+ -- limitations under the License.
14+
15+ WITH
16+ zero_record AS (
17+ SELECT
18+ ' <rule_binding_id>' AS rule_binding_id,
19+ ),
20+ data AS (
21+ SELECT
22+ * ,
23+ ' <rule_binding_id>' AS rule_binding_id,
24+ FROM
25+ ` <your-gcp-project-id>.<your_bigquery_dataset_id>.contact_details` d
26+ WHERE
27+ contact_type IN (' email' )
28+ ),
29+ last_mod AS (
30+ SELECT
31+ project_id || ' .' || dataset_id || ' .' || table_id AS table_id,
32+ TIMESTAMP_MILLIS(last_modified_time) AS last_modified
33+ FROM ` <your-gcp-project-id>.<your_bigquery_dataset_id>.__TABLES__`
34+ ),
35+ validation_results AS (
36+ SELECT
37+ CURRENT_TIMESTAMP () AS execution_ts,
38+ ' <rule_binding_id>' AS rule_binding_id,
39+ ' NO_DUPLICATES_IN_COLUMN_GROUPS' AS rule_id,
40+ ' <your-gcp-project-id>.<your_bigquery_dataset_id>.contact_details' AS table_id,
41+ CAST(NULL AS STRING) AS column_id,
42+ NULL AS column_value,
43+ CAST(NULL AS STRING) AS dimension,
44+ CAST(NULL AS BOOLEAN ) AS simple_rule_row_is_valid,
45+ TRUE AS skip_null_count,
46+ custom_sql_statement_validation_errors .complex_rule_validation_errors_count AS complex_rule_validation_errors_count,
47+ CASE
48+ WHEN custom_sql_statement_validation_errors .complex_rule_validation_errors_count IS NULL THEN CAST(NULL AS BOOLEAN )
49+ WHEN custom_sql_statement_validation_errors .complex_rule_validation_errors_count = 0 THEN TRUE
50+ ELSE FALSE
51+ END AS complex_rule_validation_success_flag,
52+ r" " "
53+ WITH
54+ zero_record AS (
55+ SELECT
56+ '<rule_binding_id>' AS rule_binding_id,
57+ ),
58+ data AS (
59+ SELECT
60+ *,
61+ '<rule_binding_id>' AS rule_binding_id,
62+ FROM
63+ `<your-gcp-project-id>.<your_bigquery_dataset_id>.contact_details` d
64+ WHERE
65+ contact_type IN ('email')
66+ ),
67+ last_mod AS (
68+ SELECT
69+ project_id || '.' || dataset_id || '.' || table_id AS table_id,
70+ TIMESTAMP_MILLIS(last_modified_time) AS last_modified
71+ FROM `<your-gcp-project-id>.<your_bigquery_dataset_id>.__TABLES__`
72+ ),
73+ validation_results AS (SELECT
74+ '<rule_binding_id>' AS rule_binding_id,
75+ 'NO_DUPLICATES_IN_COLUMN_GROUPS' AS rule_id,
76+ '<your-gcp-project-id>.<your_bigquery_dataset_id>.contact_details' AS table_id,
77+ CAST(NULL AS STRING) AS column_id,
78+ NULL AS column_value,
79+ custom_sql_statement_validation_errors,
80+ CAST(NULL AS STRING) AS dimension,
81+ CAST(NULL AS BOOLEAN) AS simple_rule_row_is_valid,
82+ TRUE AS skip_null_count,
83+ custom_sql_statement_validation_errors.complex_rule_validation_errors_count AS complex_rule_validation_errors_count,
84+ CASE
85+ WHEN custom_sql_statement_validation_errors.complex_rule_validation_errors_count IS NULL THEN CAST(NULL AS BOOLEAN)
86+ WHEN custom_sql_statement_validation_errors.complex_rule_validation_errors_count = 0 THEN TRUE
87+ ELSE FALSE
88+ END AS complex_rule_validation_success_flag,
89+ FROM
90+ zero_record
91+ LEFT JOIN
92+ (
93+ SELECT
94+ *,
95+ '<rule_binding_id>' AS _rule_binding_id,
96+ COUNT(*) OVER() AS complex_rule_validation_errors_count,
97+ FROM (
98+ select a.*
99+ from data a
100+ inner join (
101+ select
102+ contact_type,value
103+ from data
104+ group by contact_type,value
105+ having count(*) > 1
106+ ) duplicates
107+ using (contact_type,value)
108+ ) custom_sql
109+ ) custom_sql_statement_validation_errors
110+ ON
111+ zero_record.rule_binding_id = custom_sql_statement_validation_errors._rule_binding_id
112+ ),
113+ all_validation_results AS (
114+ SELECT
115+ '{{ invocation_id }}' as _dq_validation_invocation_id,
116+ r.rule_binding_id AS _dq_validation_rule_binding_id,
117+ r.rule_id AS _dq_validation_rule_id,
118+ r.column_id AS _dq_validation_column_id,
119+ r.column_value AS _dq_validation_column_value,
120+ CAST(r.dimension AS STRING) AS _dq_validation_dimension,
121+ r.simple_rule_row_is_valid AS _dq_validation_simple_rule_row_is_valid,
122+ r.complex_rule_validation_errors_count AS _dq_validation_complex_rule_validation_errors_count,
123+ r.complex_rule_validation_success_flag AS _dq_validation_complex_rule_validation_success_flag,
124+ r.custom_sql_statement_validation_errors,
125+ FROM
126+ validation_results r
127+ )
128+ SELECT
129+ *
130+ FROM
131+ all_validation_results
132+ WHERE
133+ _dq_validation_simple_rule_row_is_valid is False
134+ OR
135+ _dq_validation_complex_rule_validation_success_flag is False
136+ ORDER BY _dq_validation_rule_id" " "
137+ AS failed_records_query,
138+ FROM
139+ zero_record
140+ LEFT JOIN
141+ (
142+ SELECT
143+ * ,
144+ ' <rule_binding_id>' AS _rule_binding_id,
145+ COUNT (* ) OVER() AS complex_rule_validation_errors_count,
146+ FROM (
147+ select a.*
148+ from data a
149+ inner join (
150+ select
151+ contact_type,value
152+ from data
153+ group by contact_type,value
154+ having count (* ) > 1
155+ ) duplicates
156+ using (contact_type,value)
157+ ) custom_sql
158+ ) custom_sql_statement_validation_errors
159+ ON
160+ zero_record .rule_binding_id = custom_sql_statement_validation_errors ._rule_binding_id
161+ UNION ALL
162+ SELECT
163+ CURRENT_TIMESTAMP () AS execution_ts,
164+ ' <rule_binding_id>' AS rule_binding_id,
165+ ' NOT_NULL_SIMPLE' AS rule_id,
166+ ' <your-gcp-project-id>.<your_bigquery_dataset_id>.contact_details' AS table_id,
167+ ' value' AS column_id,
168+ data .value AS column_value,
169+ CAST(NULL AS STRING) AS dimension,
170+ CASE
171+ WHEN value IS NOT NULL THEN TRUE
172+ ELSE
173+ FALSE
174+ END AS simple_rule_row_is_valid,
175+ TRUE AS skip_null_count,
176+ CAST(NULL AS INT64) AS complex_rule_validation_errors_count,
177+ CAST(NULL AS BOOLEAN ) AS complex_rule_validation_success_flag,
178+ r" " "
179+ WITH
180+ zero_record AS (
181+ SELECT
182+ '<rule_binding_id>' AS rule_binding_id,
183+ ),
184+ data AS (
185+ SELECT
186+ *,
187+ '<rule_binding_id>' AS rule_binding_id,
188+ FROM
189+ `<your-gcp-project-id>.<your_bigquery_dataset_id>.contact_details` d
190+ WHERE
191+ contact_type IN ('email')
192+ ),
193+ last_mod AS (
194+ SELECT
195+ project_id || '.' || dataset_id || '.' || table_id AS table_id,
196+ TIMESTAMP_MILLIS(last_modified_time) AS last_modified
197+ FROM `<your-gcp-project-id>.<your_bigquery_dataset_id>.__TABLES__`
198+ ),
199+ validation_results AS (SELECT
200+ '<rule_binding_id>' AS rule_binding_id,
201+ 'NOT_NULL_SIMPLE' AS rule_id,
202+ '<your-gcp-project-id>.<your_bigquery_dataset_id>.contact_details' AS table_id,
203+ 'value' AS column_id,
204+ data.value AS column_value,
205+ data.row_id AS row_id,
206+ data.contact_type AS contact_type,
207+ data.value AS value,
208+ CAST(NULL AS STRING) AS dimension,
209+ CASE
210+ WHEN value IS NOT NULL THEN TRUE
211+ ELSE
212+ FALSE
213+ END AS simple_rule_row_is_valid,
214+ TRUE AS skip_null_count,
215+ CAST(NULL AS INT64) AS complex_rule_validation_errors_count,
216+ CAST(NULL AS BOOLEAN) AS complex_rule_validation_success_flag,
217+ FROM
218+ zero_record
219+ LEFT JOIN
220+ data
221+ ON
222+ zero_record.rule_binding_id = data.rule_binding_id
223+ ),
224+ all_validation_results AS (
225+ SELECT
226+ '{{ invocation_id }}' as _dq_validation_invocation_id,
227+ r.rule_binding_id AS _dq_validation_rule_binding_id,
228+ r.rule_id AS _dq_validation_rule_id,
229+ r.column_id AS _dq_validation_column_id,
230+ r.column_value AS _dq_validation_column_value,
231+ CAST(r.dimension AS STRING) AS _dq_validation_dimension,
232+ r.simple_rule_row_is_valid AS _dq_validation_simple_rule_row_is_valid,
233+ r.complex_rule_validation_errors_count AS _dq_validation_complex_rule_validation_errors_count,
234+ r.complex_rule_validation_success_flag AS _dq_validation_complex_rule_validation_success_flag,
235+ r.row_id AS row_id,
236+ r.contact_type AS contact_type,
237+ r.value AS value,
238+ FROM
239+ validation_results r
240+ )
241+ SELECT
242+ *
243+ FROM
244+ all_validation_results
245+ WHERE
246+ _dq_validation_simple_rule_row_is_valid is False
247+ OR
248+ _dq_validation_complex_rule_validation_success_flag is False
249+ ORDER BY _dq_validation_rule_id" " "
250+ AS failed_records_query,
251+ FROM
252+ zero_record
253+ LEFT JOIN
254+ data
255+ ON
256+ zero_record .rule_binding_id = data .rule_binding_id
257+ ),
258+ all_validation_results AS (
259+ SELECT
260+ r .execution_ts AS execution_ts,
261+ r .rule_binding_id AS rule_binding_id,
262+ r .rule_id AS rule_id,
263+ r .table_id AS table_id,
264+ r .column_id AS column_id,
265+ r .column_value AS column_value,
266+ CAST(r .dimension AS STRING) AS dimension,
267+ r .skip_null_count AS skip_null_count,
268+ r .simple_rule_row_is_valid AS simple_rule_row_is_valid,
269+ r .complex_rule_validation_errors_count AS complex_rule_validation_errors_count,
270+ r .complex_rule_validation_success_flag AS complex_rule_validation_success_flag,
271+ (SELECT COUNT (* ) FROM data) AS rows_validated,
272+ last_mod .last_modified ,
273+ ' {"brand": "one"}' AS metadata_json_string,
274+ ' ' AS configs_hashsum,
275+ ' <your_dataplex_lake_id>' AS dataplex_lake,
276+ ' <your_dataplex_zone_id>' AS dataplex_zone,
277+ ' <your_dataplex_asset_id>' AS dataplex_asset_id,
278+ CONCAT(r .rule_binding_id , ' _' , r .rule_id , ' _' , r .execution_ts , ' _' , True) AS dq_run_id,
279+ TRUE AS progress_watermark,
280+ failed_records_query AS failed_records_query,
281+ FROM
282+ validation_results r
283+ JOIN last_mod USING(table_id)
284+ )
285+ SELECT
286+ *
287+ FROM
288+ all_validation_results
0 commit comments