From c32d6616bf65464ed8b7ac4c56990507ab13c206 Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Fri, 24 Jan 2020 00:04:21 +0000 Subject: [PATCH 01/22] AsciiDoc proof of concept --- asterixdb/asterix-doc/pom.xml | 7 ++ .../src/site/asciidoc/releasenote.adoc | 74 +++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 asterixdb/asterix-doc/src/site/asciidoc/releasenote.adoc diff --git a/asterixdb/asterix-doc/pom.xml b/asterixdb/asterix-doc/pom.xml index ea23f5e1464..2cde42eca81 100644 --- a/asterixdb/asterix-doc/pom.xml +++ b/asterixdb/asterix-doc/pom.xml @@ -86,6 +86,13 @@ org.apache.maven.plugins maven-site-plugin + + + org.asciidoctor + asciidoctor-maven-plugin + 1.5.8 + + false diff --git a/asterixdb/asterix-doc/src/site/asciidoc/releasenote.adoc b/asterixdb/asterix-doc/src/site/asciidoc/releasenote.adoc new file mode 100644 index 00000000000..26f33abf0f2 --- /dev/null +++ b/asterixdb/asterix-doc/src/site/asciidoc/releasenote.adoc @@ -0,0 +1,74 @@ +// Copyright 2016-2018 Couchbase, Inc. += Release Note +:description: A description about the release. +:pp: {plus}{plus} + +// Copyright 2016-2018 Couchbase, Inc. + +[#Whats_new] +== What's new? + +The following list covers new features in Couchbase Analytics Developer Preview 4. + +|=== +| Issue | Description | Detail + +| https://issues.couchbase.com/browse/MB-24190[MB-24190] +| Support for DISTINCT aggregates +| SQL{pp} now has support for DISTINCT in aggregate functions. + +| https://issues.couchbase.com/browse/MB-25932[MB-25932] +| Improved integration with Role-Based Access Control (RBAC) +| Analytics Service now has its own set of roles in Couchbase Server RBAC. + +| https://issues.couchbase.com/browse/MB-26002[MB-26002] +| Configure IO devices for Analytics Service +| Multiple IO Devices can now be configured for Analytics Service during Couchbase node initialization. + +| https://issues.couchbase.com/browse/MB-26124[MB-26124] +| Add Analytics statistics to Couchbase web console +| Analytics statistics can now be viewed in Couchbase web console. + +| https://issues.couchbase.com/browse/MB-26125[MB-26125] +| Add number of processed objects metric +| Analytics Query Service API now returns the number of processed objects during query execution. +|=== + +// Copyright 2016-2018 Couchbase, Inc. + +[#Limitations] +== Limitations + +The following list covers some of the limitations with Couchbase Analytics Developer Preview 4. +Please read this carefully before reporting any new issues. + +|=== +| Issue | Description + +| https://issues.couchbase.com/browse/MB-25552[MB-25552] +| Failover is not supported with any node running Analytics Service. + +| https://issues.couchbase.com/browse/MB-25425[MB-25425] +| Analytics Cluster Controller node must be the last Analytics Service node removed from the cluster. + +| https://issues.couchbase.com/browse/MB-21155[MB-21155] +| The SELECT DISTINCT operation does not check object deep equality. + +| https://issues.couchbase.com/browse/MB-21163[MB-21163] +| The MINUS, INTERSECT, UNION without ALL operations are not supported. + +| https://issues.couchbase.com/browse/MB-21182[MB-21182] +| The OFFSET without a LIMIT statement is not supported. + +| https://issues.couchbase.com/browse/MB-21184[MB-21184] +| If you use add a GROUP BY expression in the SQL format, the expression for the GROUP BY key in the SELECT clause must be identical to the GROUP BY key expression. + +| https://issues.couchbase.com/browse/MB-21270[MB-21270] +| The PREPARE statement is not supported. + +| https://issues.couchbase.com/browse/MB-21294[MB-21294] +| Data Manipulation Language (DML) operations such as INSERT, DELETE, UPSERT, UPDATE do not apply to shadow datasets (which mirror data in Couchbase Server) and are therefore not supported. + +| https://issues.couchbase.com/browse/MB-26478[MB-26478] +| On Windows, a system limitation with the Analytics statistics may cause "system load" to have a negative value. In addition, on Windows and macOS the statistics for "bytes read/sec" and "bytes written/sec" are not reported and always show zero. +|=== From c90f5bdea41a816acc86ebeee13ecb350c7cb94f Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Fri, 24 Jan 2020 16:09:52 +0000 Subject: [PATCH 02/22] Remove initial test file Change-Id: I280cbda09f59ea3f236e74dd0af70a6b9eca236f --- .../src/site/asciidoc/releasenote.adoc | 74 ------------------- 1 file changed, 74 deletions(-) delete mode 100644 asterixdb/asterix-doc/src/site/asciidoc/releasenote.adoc diff --git a/asterixdb/asterix-doc/src/site/asciidoc/releasenote.adoc b/asterixdb/asterix-doc/src/site/asciidoc/releasenote.adoc deleted file mode 100644 index 26f33abf0f2..00000000000 --- a/asterixdb/asterix-doc/src/site/asciidoc/releasenote.adoc +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright 2016-2018 Couchbase, Inc. -= Release Note -:description: A description about the release. -:pp: {plus}{plus} - -// Copyright 2016-2018 Couchbase, Inc. - -[#Whats_new] -== What's new? - -The following list covers new features in Couchbase Analytics Developer Preview 4. - -|=== -| Issue | Description | Detail - -| https://issues.couchbase.com/browse/MB-24190[MB-24190] -| Support for DISTINCT aggregates -| SQL{pp} now has support for DISTINCT in aggregate functions. - -| https://issues.couchbase.com/browse/MB-25932[MB-25932] -| Improved integration with Role-Based Access Control (RBAC) -| Analytics Service now has its own set of roles in Couchbase Server RBAC. - -| https://issues.couchbase.com/browse/MB-26002[MB-26002] -| Configure IO devices for Analytics Service -| Multiple IO Devices can now be configured for Analytics Service during Couchbase node initialization. - -| https://issues.couchbase.com/browse/MB-26124[MB-26124] -| Add Analytics statistics to Couchbase web console -| Analytics statistics can now be viewed in Couchbase web console. - -| https://issues.couchbase.com/browse/MB-26125[MB-26125] -| Add number of processed objects metric -| Analytics Query Service API now returns the number of processed objects during query execution. -|=== - -// Copyright 2016-2018 Couchbase, Inc. - -[#Limitations] -== Limitations - -The following list covers some of the limitations with Couchbase Analytics Developer Preview 4. -Please read this carefully before reporting any new issues. - -|=== -| Issue | Description - -| https://issues.couchbase.com/browse/MB-25552[MB-25552] -| Failover is not supported with any node running Analytics Service. - -| https://issues.couchbase.com/browse/MB-25425[MB-25425] -| Analytics Cluster Controller node must be the last Analytics Service node removed from the cluster. - -| https://issues.couchbase.com/browse/MB-21155[MB-21155] -| The SELECT DISTINCT operation does not check object deep equality. - -| https://issues.couchbase.com/browse/MB-21163[MB-21163] -| The MINUS, INTERSECT, UNION without ALL operations are not supported. - -| https://issues.couchbase.com/browse/MB-21182[MB-21182] -| The OFFSET without a LIMIT statement is not supported. - -| https://issues.couchbase.com/browse/MB-21184[MB-21184] -| If you use add a GROUP BY expression in the SQL format, the expression for the GROUP BY key in the SELECT clause must be identical to the GROUP BY key expression. - -| https://issues.couchbase.com/browse/MB-21270[MB-21270] -| The PREPARE statement is not supported. - -| https://issues.couchbase.com/browse/MB-21294[MB-21294] -| Data Manipulation Language (DML) operations such as INSERT, DELETE, UPSERT, UPDATE do not apply to shadow datasets (which mirror data in Couchbase Server) and are therefore not supported. - -| https://issues.couchbase.com/browse/MB-26478[MB-26478] -| On Windows, a system limitation with the Analytics statistics may cause "system load" to have a negative value. In addition, on Windows and macOS the statistics for "bytes read/sec" and "bytes written/sec" are not reported and always show zero. -|=== From a5b2b7344e8730cdef8ed466f23bea3e211ade7e Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Fri, 24 Jan 2020 16:11:11 +0000 Subject: [PATCH 03/22] Create AsciiDoc partials Change-Id: I16c66a5f507d7cd2f9894d235c38a88a7f1f4ab8 --- .../builtins/partials/0_toc_common.adoc | 3 + .../builtins/partials/10_comparison.adoc | 71 + .../modules/builtins/partials/11_type.adoc | 657 +++++ .../modules/builtins/partials/12_misc.adoc | 235 ++ .../builtins/partials/13_conditional.adoc | 403 +++ .../modules/builtins/partials/14_window.adoc | 1238 ++++++++ .../modules/builtins/partials/15_bitwise.adoc | 665 +++++ .../builtins/partials/1_numeric_common.adoc | 782 +++++ .../builtins/partials/1_numeric_delta.adoc | 47 + .../builtins/partials/2_string_common.adoc | 758 +++++ .../builtins/partials/2_string_delta.adoc | 192 ++ .../modules/builtins/partials/3_binary.adoc | 147 + .../modules/builtins/partials/4_spatial.adoc | 384 +++ .../builtins/partials/5_similarity.adoc | 217 ++ .../builtins/partials/6_tokenizing.adoc | 32 + .../modules/builtins/partials/7_allens.adoc | 306 ++ .../modules/builtins/partials/7_temporal.adoc | 993 +++++++ .../modules/builtins/partials/8_record.adoc | 707 +++++ .../builtins/partials/9_aggregate_aql.adoc | 372 +++ .../builtins/partials/9_aggregate_sql.adoc | 834 ++++++ .../modules/sqlpp/partials/1_intro.adoc | 32 + .../shared/modules/sqlpp/partials/2_expr.adoc | 677 +++++ .../modules/sqlpp/partials/2_expr_title.adoc | 3 + .../sqlpp/partials/3_declare_dataverse.adoc | 21 + .../sqlpp/partials/3_declare_function.adoc | 34 + .../modules/sqlpp/partials/3_query.adoc | 2615 +++++++++++++++++ .../modules/sqlpp/partials/3_query_title.adoc | 10 + .../modules/sqlpp/partials/4_error.adoc | 131 + .../modules/sqlpp/partials/4_error_title.adoc | 3 + .../sqlpp/partials/5_ddl_dataset_index.adoc | 421 +++ .../modules/sqlpp/partials/5_ddl_dml.adoc | 118 + .../partials/5_ddl_function_removal.adoc | 135 + .../modules/sqlpp/partials/5_ddl_head.adoc | 22 + .../sqlpp/partials/appendix_1_keywords.adoc | 23 + .../sqlpp/partials/appendix_1_title.adoc | 3 + .../sqlpp/partials/appendix_2_index_only.adoc | 29 + .../partials/appendix_2_parallel_sort.adoc | 31 + .../sqlpp/partials/appendix_2_parameters.adoc | 98 + .../sqlpp/partials/appendix_2_title.adoc | 3 + .../sqlpp/partials/appendix_3_resolution.adoc | 349 +++ .../sqlpp/partials/appendix_3_title.adoc | 3 + 41 files changed, 13804 insertions(+) create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/0_toc_common.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/10_comparison.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/11_type.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/12_misc.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/13_conditional.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/15_bitwise.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_common.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_delta.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_common.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_delta.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/3_binary.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/4_spatial.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/5_similarity.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/6_tokenizing.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_allens.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_temporal.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/8_record.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_aql.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/1_intro.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr_title.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_dataverse.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_function.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query_title.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/4_error.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/4_error_title.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_dataset_index.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_dml.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_function_removal.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_head.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_1_keywords.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_1_title.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_index_only.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_parallel_sort.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_parameters.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_title.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_3_resolution.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_3_title.adoc diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/0_toc_common.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/0_toc_common.adoc new file mode 100644 index 00000000000..88b1a982518 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/0_toc_common.adoc @@ -0,0 +1,3 @@ +The system provides various classes of functions to support operations +on numeric, string, spatial, and temporal data. This document explains +how to use these functions. diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/10_comparison.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/10_comparison.adoc new file mode 100644 index 00000000000..663a0d3fdfb --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/10_comparison.adoc @@ -0,0 +1,71 @@ +[[comparison-functions]] +Comparison Functions +~~~~~~~~~~~~~~~~~~~~ + +[[greatest]] +greatest +^^^^^^^^ + +* Syntax: ++ +--------------------------------------------- +greatest(numeric_value1, numeric_value2, ...) +--------------------------------------------- +* Computes the greatest value among arguments. +* Arguments: +** `numeric_value1`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value, +** `numeric_value2`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value, +** .... +* Return Value: +** the greatest values among arguments. The returning type is decided by +the item type with the highest order in the numeric type promotion order +(`tinyint`-> `smallint`->`integer`->`bigint`->`float`->`double`) among +items. +** `null` if any argument is a `missing` value or `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +-------------------------------------------------------------------------------- +{ "v1": greatest(1, 2, 3), "v2": greatest(float("0.5"), double("-0.5"), 5000) }; +-------------------------------------------------------------------------------- +* The expected result is: ++ +------------------------- +{ "v1": 3, "v2": 5000.0 } +------------------------- + +[[least]] +least +^^^^^ + +* Syntax: ++ +------------------------------------------ +least(numeric_value1, numeric_value2, ...) +------------------------------------------ +* Computes the least value among arguments. +* Arguments: +** `numeric_value1`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value, +** `numeric_value2`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value, +** .... +* Return Value: +** the least values among arguments. The returning type is decided by +the item type with the highest order in the numeric type promotion order +(`tinyint`-> `smallint`->`integer`->`bigint`->`float`->`double`) among +items. +** `null` if any argument is a `missing` value or `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +-------------------------------------------------------------------------- +{ "v1": least(1, 2, 3), "v2": least(float("0.5"), double("-0.5"), 5000) }; +-------------------------------------------------------------------------- +* The expected result is: ++ +----------------------- +{ "v1": 1, "v2": -0.5 } +----------------------- diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/11_type.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/11_type.adoc new file mode 100644 index 00000000000..d7126486016 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/11_type.adoc @@ -0,0 +1,657 @@ +[[type-functions]] +Type Functions +~~~~~~~~~~~~~~ + +[[is_array]] +is_array +^^^^^^^^ + +* Syntax: ++ +-------------- +is_array(expr) +-------------- +* Checks whether the given expression is evaluated to be an `array` +value. +* Arguments: +** `expr` : an expression (any type is allowed). +* Return Value: +** a `boolean` on whether the argument is an `array` value or not, +** a `missing` if the argument is a `missing` value, +** a `null` if the argument is a `null` value. +* Example: ++ +------------------------- +{ + "a": is_array(true), + "b": is_array(false), + "c": isarray(null), + "d": isarray(missing), + "e": isarray("d"), + "f": isarray(4.0), + "g": isarray(5), + "h": isarray(["1", 2]), + "i": isarray({"a":1}) +}; +------------------------- +* The expected result is: ++ +------------------------------------------------------------------------------------------------ +{ "a": false, "b": false, "c": null, "e": false, "f": false, "g": false, "h": true, "i": false } +------------------------------------------------------------------------------------------------ + +The function has an alias `isarray`. + +[[is_atomic-is_atom]] +is_atomic (is_atom) +^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------- +is_atomic(expr) +--------------- +* Checks whether the given expression is evaluated to be a value of a +link:../datamodel.html#PrimitiveTypes[primitive] type. +* Arguments: +** `expr` : an expression (any type is allowed). +* Return Value: +** a `boolean` on whether the argument is a primitive type or not, +** a `missing` if the argument is a `missing` value, +** a `null` if the argument is a `null` value. +* Example: ++ +------------------------- +{ + "a": is_atomic(true), + "b": is_atomic(false), + "c": isatomic(null), + "d": isatomic(missing), + "e": isatomic("d"), + "f": isatom(4.0), + "g": isatom(5), + "h": isatom(["1", 2]), + "i": isatom({"a":1}) +}; +------------------------- +* The expected result is: ++ +-------------------------------------------------------------------------------------------- +{ "a": true, "b": true, "c": null, "e": true, "f": true, "g": true, "h": false, "i": false } +-------------------------------------------------------------------------------------------- + +The function has three aliases: `isatomic`, `is_atom`, and `isatom`. + +[[is_boolean-is_bool]] +is_boolean (is_bool) +^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +---------------- +is_boolean(expr) +---------------- +* Checks whether the given expression is evaluated to be a `boolean` +value. +* Arguments: +** `expr` : an expression (any type is allowed). +* Return Value: +** a `boolean` on whether the argument is a `boolean` value or not, +** a `missing` if the argument is a `missing` value, +** a `null` if the argument is a `null` value. +* Example: ++ +--------------------------- +{ + "a": isboolean(true), + "b": isboolean(false), + "c": is_boolean(null), + "d": is_boolean(missing), + "e": isbool("d"), + "f": isbool(4.0), + "g": isbool(5), + "h": isbool(["1", 2]), + "i": isbool({"a":1}) +}; +--------------------------- +* The expected result is: ++ +----------------------------------------------------------------------------------------------- +{ "a": true, "b": true, "c": null, "e": false, "f": false, "g": false, "h": false, "i": false } +----------------------------------------------------------------------------------------------- + +The function has three aliases: `isboolean`, `is_bool`, and `isbool`. + +[[is_number-is_num]] +is_number (is_num) +^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------- +is_number(expr) +--------------- +* Checks whether the given expression is evaluated to be a numeric +value. +* Arguments: +** `expr` : an expression (any type is allowed). +* Return Value: +** a `boolean` on whether the argument is a +`smallint`/`tinyint`/`integer`/`bigint`/`float`/`double` value or not, +** a `missing` if the argument is a `missing` value, +** a `null` if the argument is a `null` value. +* Example: ++ +------------------------- +{ + "a": is_number(true), + "b": is_number(false), + "c": isnumber(null), + "d": isnumber(missing), + "e": isnumber("d"), + "f": isnum(4.0), + "g": isnum(5), + "h": isnum(["1", 2]), + "i": isnum({"a":1}) +}; +------------------------- +* The expected result is: ++ +----------------------------------------------------------------------------------------------- +{ "a": false, "b": false, "c": null, "e": false, "f": true, "g": true, "h": false, "i": false } +----------------------------------------------------------------------------------------------- + +The function has three aliases: `isnumber`, `is_num`, and `isnum`. + +[[is_object-is_obj]] +is_object (is_obj) +^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------- +is_object(expr) +--------------- +* Checks whether the given expression is evaluated to be a `object` +value. +* Arguments: +** `expr` : an expression (any type is allowed). +* Return Value: +** a `boolean` on whether the argument is a `object` value or not, +** a `missing` if the argument is a `missing` value, +** a `null` if the argument is a `null` value. +* Example: ++ +------------------------- +{ + "a": is_object(true), + "b": is_object(false), + "c": isobject(null), + "d": isobject(missing), + "e": isobj("d"), + "f": isobj(4.0), + "g": isobj(5), + "h": isobj(["1", 2]), + "i": isobj({"a":1}) +}; +------------------------- +* The expected result is: ++ +\{ "a": false, "b": false, "c": null, "e": false, "f": false, "g": +false, "h": false, "i": true } + +The function has three aliases: `isobject`, `is_obj`, and `isobj`. + +[[is_string-is_str]] +is_string (is_str) +^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------- +is_string(expr) +--------------- +* Checks whether the given expression is evaluated to be a `string` +value. +* Arguments: +** `expr` : an expression (any type is allowed). +* Return Value: +** a `boolean` on whether the argument is a `string` value or not, +** a `missing` if the argument is a `missing` value, +** a `null` if the argument is a `null` value. +* Example: ++ +----------------------- +{ + "a": is_string(true), + "b": isstring(false), + "c": isstring(null), + "d": isstr(missing), + "e": isstr("d"), + "f": isstr(4.0), + "g": isstr(5), + "h": isstr(["1", 2]), + "i": isstr({"a":1}) +}; +----------------------- +* The expected result is: ++ +------------------------------------------------------------------------------------------------ +{ "a": false, "b": false, "c": null, "e": true, "f": false, "g": false, "h": false, "i": false } +------------------------------------------------------------------------------------------------ + +The function has three aliases: `isstring`, `is_str`, and `isstr`. + +[[is_null]] +is_null +^^^^^^^ + +* Syntax: ++ +------------- +is_null(expr) +------------- +* Checks whether the given expression is evaluated to be a `null` value. +* Arguments: +** `expr` : an expression (any type is allowed). +* Return Value: +** a `boolean` on whether the variable is a `null` or not, +** a `missing` if the input is `missing`. +* Example: ++ +------------------------------------------------------------------ +{ "v1": is_null(null), "v2": is_null(1), "v3": is_null(missing) }; +------------------------------------------------------------------ +* The expected result is: ++ +--------------------------- +{ "v1": true, "v2": false } +--------------------------- + +The function has an alias `isnull`. + +[[is_missing]] +is_missing +^^^^^^^^^^ + +* Syntax: ++ +---------------- +is_missing(expr) +---------------- +* Checks whether the given expression is evaluated to be a `missing` +value. +* Arguments: +** `expr` : an expression (any type is allowed). +* Return Value: +** a `boolean` on whether the variable is a `missing` or not. +* Example: ++ +--------------------------------------------------------------------------- +{ "v1": is_missing(null), "v2": is_missing(1), "v3": is_missing(missing) }; +--------------------------------------------------------------------------- +* The expected result is: ++ +---------------------------------------- +{ "v1": false, "v2": false, "v3": true } +---------------------------------------- + +The function has an alias `ismissing`. + +[[is_unknown]] +is_unknown +^^^^^^^^^^ + +* Syntax: ++ +---------------- +is_unknown(expr) +---------------- +* Checks whether the given variable is a `null` value or a `missing` +value. +* Arguments: +** `expr` : an expression (any type is allowed). +* Return Value: +** a `boolean` on whether the variable is a `null`/``missing` value +(`true`) or not (`false`). +* Example: ++ +--------------------------------------------------------------------------- +{ "v1": is_unknown(null), "v2": is_unknown(1), "v3": is_unknown(missing) }; +--------------------------------------------------------------------------- +* The expected result is: ++ +--------------------------------------- +{ "v1": true, "v2": false, "v3": true } +--------------------------------------- + +The function has an alias `isunknown`. + +[[to_array]] +to_array +^^^^^^^^ + +* Syntax: ++ +-------------- +to_array(expr) +-------------- +* Converts input value to an `array` value +* Arguments: +** `expr` : an expression +* Return Value: +** if the argument is `missing` then `missing` is returned +** if the argument is `null` then `null` is returned +** if the argument is of `array` type then it is returned as is +** if the argument is of `multiset` type then it is returned as an +`array` with elements in an undefined order +** otherwise an `array` containing the input expression as its single +item is returned +* Example: ++ +------------------------------ +{ + "v1": to_array("asterix"), + "v2": to_array(["asterix"]), +}; +------------------------------ +* The expected result is: ++ +---------------------------------------- +{ "v1": ["asterix"], "v2": ["asterix"] } +---------------------------------------- + +The function has an alias `toarray`. + +[[to_atomic-to_atom]] +to_atomic (to_atom) +^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------- +to_atomic(expr) +--------------- +* Converts input value to a +link:../datamodel.html#PrimitiveTypes[primitive] value +* Arguments: +** `expr` : an expression +* Return Value: +** if the argument is `missing` then `missing` is returned +** if the argument is `null` then `null` is returned +** if the argument is of primitive type then it is returned as is +** if the argument is of `array` or `multiset` type and has only one +element then the result of invoking to_atomic() on that element is +returned +** if the argument is of `object` type and has only one field then the +result of invoking to_atomic() on the value of that field is returned +** otherwise `null` is returned +* Example: ++ +---------------------------------------- +{ + "v1": to_atomic("asterix"), + "v2": to_atomic(["asterix"]), + "v3": to_atomic([0, 1]), + "v4": to_atomic({"value": "asterix"}), + "v5": to_number({"x": 1, "y": 2}) +}; +---------------------------------------- +* The expected result is: ++ +----------------------------------------------------------------------------- +{ "v1": "asterix", "v2": "asterix", "v3": null, "v4": "asterix", "v5": null } +----------------------------------------------------------------------------- + +The function has three aliases: `toatomic`, `to_atom`, and `toatom`. + +[[to_boolean-to_bool]] +to_boolean (to_bool) +^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +---------------- +to_boolean(expr) +---------------- +* Converts input value to a `boolean` value +* Arguments: +** `expr` : an expression +* Return Value: +** if the argument is `missing` then `missing` is returned +** if the argument is `null` then `null` is returned +** if the argument is of `boolean` type then it is returned as is +** if the argument is of numeric type then `false` is returned if it is +`0` or `NaN`, otherwise `true` +** if the argument is of `string` type then `false` is returned if it's +empty, otherwise `true` +** if the argument is of `array` or `multiset` type then `false` is +returned if it's size is `0`, otherwise `true` +** if the argument is of `object` type then `false` is returned if it +has no fields, otherwise `true` +** type error is raised for all other input types +* Example: ++ +----------------------------- +{ + "v1": to_boolean(0), + "v2": to_boolean(1), + "v3": to_boolean(""), + "v4": to_boolean("asterix") +}; +----------------------------- +* The expected result is: ++ +---------------------------------------------------- +{ "v1": false, "v2": true, "v3": false, "v4": true } +---------------------------------------------------- + +The function has three aliases: `toboolean`, `to_bool`, and `tobool`. + +[[to_bigint]] +to_bigint +^^^^^^^^^ + +* Syntax: ++ +--------------- +to_bigint(expr) +--------------- +* Converts input value to an integer value +* Arguments: +** `expr` : an expression +* Return Value: +** if the argument is `missing` then `missing` is returned +** if the argument is `null` then `null` is returned +** if the argument is of `boolean` type then `1` is returned if it is +`true`, `0` if it is `false` +** if the argument is of numeric integer type then it is returned as the +same value of `bigint` type +** if the argument is of numeric `float`/`double` type then it is +converted to `bigint` type +** if the argument is of `string` type and can be parsed as integer then +that integer value is returned, otherwise `null` is returned +** if the argument is of `array`/`multiset`/`object` type then `null` is +returned +** type error is raised for all other input types +* Example: ++ +------------------------------------ +{ + "v1": to_bigint(false), + "v2": to_bigint(true), + "v3": to_bigint(10), + "v4": to_bigint(float("1e100")), + "v5": to_bigint(double("1e1000")), + "v6": to_bigint("20") +}; +------------------------------------ +* The expected result is: ++ +---------------------------------------------------------------------------------------------- +{ "v1": 0, "v2": 1, "v3": 10, "v4": 9223372036854775807, "v5": 9223372036854775807, "v6": 20 } +---------------------------------------------------------------------------------------------- + +The function has an alias `tobigint`. + +[[to_double]] +to_double +^^^^^^^^^ + +* Syntax: ++ +--------------- +to_double(expr) +--------------- +* Converts input value to a `double` value +* Arguments: +** `expr` : an expression +* Return Value: +** if the argument is `missing` then `missing` is returned +** if the argument is `null` then `null` is returned +** if the argument is of `boolean` type then `1.0` is returned if it is +`true`, `0.0` if it is `false` +** if the argument is of numeric type then it is returned as the value +of `double` type +** if the argument is of `string` type and can be parsed as `double` +then that `double` value is returned, otherwise `null` is returned +** if the argument is of `array`/`multiset`/`object` type then `null` is +returned +** type error is raised for all other input types +* Example: ++ +------------------------- +{ + "v1": to_double(false), + "v2": to_double(true), + "v3": to_double(10), + "v4": to_double(11.5), + "v5": to_double("12.5") +}; +------------------------- +* The expected result is: ++ +------------------------------------------------------------ +{ "v1": 0.0, "v2": 1.0, "v3": 10.0, "v4": 11.5, "v5": 12.5 } +------------------------------------------------------------ + +The function has an alias `todouble`. + +[[to_number-to_num]] +to_number (to_num) +^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------- +to_number(expr) +--------------- +* Converts input value to a numeric value +* Arguments: +** `expr` : an expression +* Return Value: +** if the argument is `missing` then `missing` is returned +** if the argument is `null` then `null` is returned +** if the argument is of numeric type then it is returned as is +** if the argument is of `boolean` type then `1` is returned if it is +`true`, `0` if it is `false` +** if the argument is of `string` type and can be parsed as `bigint` +then that `bigint` value is returned, otherwise if it can be parsed as +`double` then that `double` value is returned, otherwise `null` is +returned +** if the argument is of `array`/`multiset`/`object` type then `null` is +returned +** type error is raised for all other input types +* Example: ++ +------------------------- +{ + "v1": to_number(false), + "v2": to_number(true), + "v3": to_number(10), + "v4": to_number(11.5), + "v5": to_number("12.5") +}; +------------------------- +* The expected result is: ++ +------------------------------------------------------ +{ "v1": 0, "v2": 1, "v3": 10, "v4": 11.5, "v5": 12.5 } +------------------------------------------------------ + +The function has three aliases: `tonumber`, `to_num`, and `tonum`. + +[[to_object-to_obj]] +to_object (to_obj) +^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------- +to_object(expr) +--------------- +* Converts input value to an `object` value +* Arguments: +** `expr` : an expression +* Return Value: +** if the argument is `missing` then `missing` is returned +** if the argument is `null` then `null` is returned +** if the argument is of `object` type then it is returned as is +** otherwise an empty `object` is returned +* Example: ++ +---------------------------------------- +{ + "v1": to_object({"value": "asterix"}), + "v2": to_object("asterix") +}; +---------------------------------------- +* The expected result is: ++ +---------------------------------------- +{ "v1": {"value": "asterix"}, "v2": {} } +---------------------------------------- + +The function has three aliases: `toobject`, `to_obj`, and `toobj`. + +[[to_string-to_str]] +to_string (to_str) +^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------- +to_string(expr) +--------------- +* Converts input value to a string value +* Arguments: +** `expr` : an expression +* Return Value: +** if the argument is `missing` then `missing` is returned +** if the argument is `null` then `null` is returned +** if the argument is of `boolean` type then `"true"` is returned if it +is `true`, `"false"` if it is `false` +** if the argument is of numeric type then its string representation is +returned +** if the argument is of `string` type then it is returned as is +** if the argument is of `array`/`multiset`/`object` type then `null` is +returned +** type error is raised for all other input types +* Example: ++ +---------------------------- +{ + "v1": to_string(false), + "v2": to_string(true), + "v3": to_string(10), + "v4": to_string(11.5), + "v5": to_string("asterix") +}; +---------------------------- +* The expected result is: ++ +-------------------------------------------------------------------------- +{ "v1": "false", "v2": "true", "v3": "10", "v4": "11.5", "v5": "asterix" } +-------------------------------------------------------------------------- + +The function has three aliases: `tostring`, `to_str`, and `tostr`. diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/12_misc.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/12_misc.adoc new file mode 100644 index 00000000000..4b5141ddb9c --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/12_misc.adoc @@ -0,0 +1,235 @@ +[[miscellaneous-functions]] +Miscellaneous Functions +~~~~~~~~~~~~~~~~~~~~~~~ + +[[uuid]] +uuid +^^^^ + +* Syntax: ++ +------ +uuid() +------ +* Generates a `uuid`. +* Arguments: +** none +* Return Value: +** a generated, random `uuid`. + +[[len]] +len +^^^ + +* Syntax: ++ +len(array) +* Returns the length of the array array. +* Arguments: +** `array` : an `array`, `multiset`, `null`, or `missing`, represents +the collection that needs to be checked. +* Return Value: +** an `integer` that represents the length of input array or the size of +the input multiset, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value. +* Example: ++ +----------------------- +len(["Hello", "World"]) +----------------------- +* The expected result is: ++ +- +2 +- + +[[not]] +not +^^^ + +* Syntax: ++ +--------- +not(expr) +--------- +* Inverts a `boolean` value +* Arguments: +** `expr` : an expression +* Return Value: +** a `boolean`, the inverse of `expr`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** other non-boolean argument value will cause a type error. +* Example: ++ +----------------------------------------------------------------------------------- +{ "v1": `not`(true), "v2": `not`(false), "v3": `not`(null), "v4": `not`(missing) }; +----------------------------------------------------------------------------------- +* The expected result is: ++ +--------------------------------------- +{ "v1": false, "v2": true, "v3": null } +--------------------------------------- + +[[random]] +random +^^^^^^ + +* Syntax: ++ +---------------------- +random( [seed_value] ) +---------------------- +* Returns a random number, accepting an optional seed value +* Arguments: +** `seed_value`: an optional +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value +representing the seed number. +* Return Value: +** A random number of type `double` between 0 and 1, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value or a non-numeric value. +* Example: ++ +----------------------------------------------------------------- +{ + "v1": random(), + "v2": random(unix_time_from_datetime_in_ms(current_datetime())) +}; +----------------------------------------------------------------- + +[[range]] +range +^^^^^ + +* Syntax: ++ +--------------------------------------------- +range(start_numeric_value, end_numeric_value) +--------------------------------------------- +* Generates a series of `bigint` values based start the +`start_numeric_value` until the `end_numeric_value`. +* Arguments: +* `start_numeric_value`: a `tinyint`/`smallint`/`integer`/`bigint` value +representing the start value. +* `end_numeric_value`: a `tinyint`/`smallint`/`integer`/`bigint` value +representing the max final value. +* Return Value: +** an array that starts with the integer value of `start_numeric_value` +and ends with the integer value of `end_numeric_value`, where the value +of each entry in the array is the integer successor of the value in the +preceding entry. +* Example: ++ +------------ +range(0, 3); +------------ +* The expected result is: ++ +-------------- +[ 0, 1, 2, 3 ] +-------------- + +[[switch_case]] +switch_case +^^^^^^^^^^^ + +* Syntax: ++ +--------------------------- +switch_case( + condition, + case1, case1_result, + case2, case2_result, + ..., + default, default_result +) +--------------------------- +* Switches amongst a sequence of cases and returns the result of the +first matching case. If no match is found, the result of the default +case is returned. +* Arguments: +** `condition`: a variable (any type is allowed). +** `caseI/default`: a variable (any type is allowed). +** `caseI/default_result`: a variable (any type is allowed). +* Return Value: +** `caseI_result` if `condition` matches `caseI`, otherwise +`default_result`. +* Example 1: ++ +------------ +switch_case( + "a", + "a", 0, + "x", 1, + "y", 2, + "z", 3 +); +------------ +* The expected result is: ++ +- +0 +- +* Example 2: ++ +------------ +switch_case( + "a", + "x", 1, + "y", 2, + "z", 3 +); +------------ +* The expected result is: ++ +- +3 +- + +[[deep_equal]] +deep_equal +^^^^^^^^^^ + +* Syntax: ++ +------------------------ +deep_equal(expr1, expr2) +------------------------ +* Assess the equality between two expressions of any type (e.g., object, +arrays, or multiset). Two objects are deeply equal iff both their types +and values are equal. +* Arguments: +** `expr1` : an expression, +** `expr2` : an expression. +* Return Value: +** `true` or `false` depending on the data equality, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value. +* Example: ++ +----------------------------------------------------------------- +deep_equal( + { + "id":1, + "project":"AsterixDB", + "address":{"city":"Irvine", "state":"CA"}, + "related":["Hivestrix", "Preglix", "Apache VXQuery"] + }, + { + "id":1, + "project":"AsterixDB", + "address":{"city":"San Diego", "state":"CA"}, + "related":["Hivestrix", "Preglix", "Apache VXQuery"] + } +); +----------------------------------------------------------------- +* The expected result is: ++ +----- +false +----- diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/13_conditional.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/13_conditional.adoc new file mode 100644 index 00000000000..b0c9166eccc --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/13_conditional.adoc @@ -0,0 +1,403 @@ +[[conditional-functions]] +Conditional Functions +~~~~~~~~~~~~~~~~~~~~~ + +[[if_null-ifnull]] +if_null (ifnull) +^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------------------------- +if_null(expression1, expression2, ... expressionN) +-------------------------------------------------- +* Finds first argument which value is not `null` and returns that value +* Arguments: +** `expressionI` : an expression (any type is allowed). +* Return Value: +** a `null` if all arguments evaluate to `null` or no arguments +specified +** a value of the first non-`null` argument otherwise +* Example: ++ +------------------------------------- +{ + "a": if_null(), + "b": if_null(null), + "c": if_null(null, "asterixdb"), + "d": is_missing(if_null(missing)) +}; +------------------------------------- +* The expected result is: ++ +----------------------------------------------------- +{ "a": null, "b": null, "c": "asterixdb", "d": true } +----------------------------------------------------- + +The function has an alias `ifnull`. + +[[if_missing-ifmissing]] +if_missing (ifmissing) +^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +----------------------------------------------------- +if_missing(expression1, expression2, ... expressionN) +----------------------------------------------------- +* Finds first argument which value is not `missing` and returns that +value +* Arguments: +** `expressionI` : an expression (any type is allowed). +* Return Value: +** a `null` if all arguments evaluate to `missing` or no arguments +specified +** a value of the first non-`missing` argument otherwise +* Example: ++ +------------------------------------------ +{ + "a": if_missing(), + "b": if_missing(missing), + "c": if_missing(missing, "asterixdb"), + "d": if_missing(null, "asterixdb") +}; +------------------------------------------ +* The expected result is: ++ +----------------------------------------------------- +{ "a": null, "b": null, "c": "asterixdb", "d": null } +----------------------------------------------------- + +The function has an alias `ifmissing`. + +[[if_missing_or_null-ifmissingornull-coalesce]] +if_missing_or_null (ifmissingornull, coalesce) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------------------- +if_missing_or_null(expression1, expression2, ... expressionN) +------------------------------------------------------------- +* Finds first argument which value is not `null` or `missing` and +returns that value +* Arguments: +** `expressionI` : an expression (any type is allowed). +* Return Value: +** a `null` if all arguments evaluate to either `null` or `missing`, or +no arguments specified +** a value of the first non-`null`, non-`missing` argument otherwise +* Example: ++ +------------------------------------------------------- +{ + "a": if_missing_or_null(), + "b": if_missing_or_null(null, missing), + "c": if_missing_or_null(null, missing, "asterixdb") +}; +------------------------------------------------------- +* The expected result is: ++ +------------------------------------------ +{ "a": null, "b": null, "c": "asterixdb" } +------------------------------------------ + +The function has two aliases: `ifmissingornull` and `coalesce`. + +[[if_inf-ifinf]] +if_inf (ifinf) +^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------- +if_inf(expression1, expression2, ... expressionN) +------------------------------------------------- +* Finds first argument which is a non-infinite (`INF` or`-INF`) number +* Arguments: +** `expressionI` : an expression (any type is allowed). +* Return Value: +** a `missing` if `missing` argument was encountered before the first +non-infinite number argument +** a `null` if `null` argument or any other non-number argument was +encountered before the first non-infinite number argument +** the first non-infinite number argument otherwise +* Example: ++ +------------------------------------------------------------ +{ + "a": is_null(if_inf(null)), + "b": is_missing(if_inf(missing)), + "c": is_null(if_inf(double("INF"))), + "d": if_inf(1, null, missing) ], + "e": is_null(if_inf(null, missing, 1)) ], + "f": is_missing(if_inf(missing, null, 1)) ], + "g": if_inf(float("INF"), 1) ], + "h": to_string(if_inf(float("INF"), double("NaN"), 1)) ] +}; +------------------------------------------------------------ +* The expected result is: ++ +------------------------------------------------------------------------------------- +{ "a": true, "b": true, "c": true, "d": 1, "e": true, "f": true, "g": 1, "h": "NaN" } +------------------------------------------------------------------------------------- + +The function has an alias `ifinf`. + +[[if_nan-ifnan]] +if_nan (ifnan) +^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------- +if_nan(expression1, expression2, ... expressionN) +------------------------------------------------- +* Finds first argument which is a non-`NaN` number +* Arguments: +** `expressionI` : an expression (any type is allowed). +* Return Value: +** a `missing` if `missing` argument was encountered before the first +non-`NaN` number argument +** a `null` if `null` argument or any other non-number argument was +encountered before the first non-`NaN` number argument +** the first non-`NaN` number argument otherwise +* Example: ++ +------------------------------------------------------------ +{ + "a": is_null(if_nan(null)), + "b": is_missing(if_nan(missing)), + "c": is_null(if_nan(double("NaN"))), + "d": if_nan(1, null, missing) ], + "e": is_null(if_nan(null, missing, 1)) ], + "f": is_missing(if_nan(missing, null, 1)) ], + "g": if_nan(float("NaN"), 1) ], + "h": to_string(if_nan(float("NaN"), double("INF"), 1)) ] +}; +------------------------------------------------------------ +* The expected result is: ++ +------------------------------------------------------------------------------------- +{ "a": true, "b": true, "c": true, "d": 1, "e": true, "f": true, "g": 1, "h": "INF" } +------------------------------------------------------------------------------------- + +The function has an alias `ifnan`. + +[[if_nan_or_inf-ifnanorinf]] +if_nan_or_inf (ifnanorinf) +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------------------------------- +if_nan_or_inf(expression1, expression2, ... expressionN) +-------------------------------------------------------- +* Finds first argument which is a non-infinite (`INF` or`-INF`) and +non-`NaN` number +* Arguments: +** `expressionI` : an expression (any type is allowed). +* Return Value: +** a `missing` if `missing` argument was encountered before the first +non-infinite and non-`NaN` number argument +** a `null` if `null` argument or any other non-number argument was +encountered before the first non-infinite and non-`NaN` number argument +** the first non-infinite and non-`NaN` number argument otherwise +* Example: ++ +-------------------------------------------------------------- +{ + "a": is_null(if_nan_or_inf(null)), + "b": is_missing(if_nan_or_inf(missing)), + "c": is_null(if_nan_or_inf(double("NaN"), double("INF"))), + "d": if_nan_or_inf(1, null, missing) ], + "e": is_null(if_nan_or_inf(null, missing, 1)) ], + "f": is_missing(if_nan_or_inf(missing, null, 1)) ], + "g": if_nan_or_inf(float("NaN"), float("INF"), 1) ], +}; +-------------------------------------------------------------- +* The expected result is: ++ +------------------------------------------------------------------------- +{ "a": true, "b": true, "c": true, "d": 1, "e": true, "f": true, "g": 1 } +------------------------------------------------------------------------- + +The function has an alias `ifnanorinf`. + +[[null_if-nullif]] +null_if (nullif) +^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------- +null_if(expression1, expression2) +--------------------------------- +* Compares two arguments and returns `null` if they are equal, otherwise +returns the first argument. +* Arguments: +** `expressionI` : an expression (any type is allowed). +* Return Value: +** `missing` if any argument is a `missing` value, +** `null` if +*** any argument is a `null` value but no argument is a `missing` value, +or +*** `argument1` = `argument2` +** a value of the first argument otherwise +* Example: ++ +------------------------------------------- +{ + "a": null_if("asterixdb", "asterixdb"), + "b": null_if(1, 2) +}; +------------------------------------------- +* The expected result is: ++ +--------------------- +{ "a": null, "b": 1 } +--------------------- + +The function has an alias `nullif`. + +[[missing_if-missingif]] +missing_if (missingif) +^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------ +missing_if(expression1, expression2) +------------------------------------ +* Compares two arguments and returns `missing` if they are equal, +otherwise returns the first argument. +* Arguments: +** `expressionI` : an expression (any type is allowed). +* Return Value: +** `missing` if +*** any argument is a `missing` value, or +*** no argument is a `null` value and `argument1` = `argument2` +** `null` if any argument is a `null` value but no argument is a +`missing` value +** a value of the first argument otherwise +* Example: ++ +--------------------------------------------- +{ + "a": missing_if("asterixdb", "asterixdb") + "b": missing_if(1, 2), +}; +--------------------------------------------- +* The expected result is: ++ +---------- +{ "b": 1 } +---------- + +The function has an alias `missingif`. + +[[nan_if-nanif]] +nan_if (nanif) +^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------- +nan_if(expression1, expression2) +-------------------------------- +* Compares two arguments and returns `NaN` value if they are equal, +otherwise returns the first argument. +* Arguments: +** `expressionI` : an expression (any type is allowed). +* Return Value: +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value +** `NaN` value of type `double` if `argument1` = `argument2` +** a value of the first argument otherwise +* Example: ++ +----------------------------------------------------- +{ + "a": to_string(nan_if("asterixdb", "asterixdb")), + "b": nan_if(1, 2) +}; +----------------------------------------------------- +* The expected result is: ++ +---------------------- +{ "a": "NaN", "b": 1 } +---------------------- + +The function has an alias `nanif`. + +[[posinf_if-posinfif]] +posinf_if (posinfif) +^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +----------------------------------- +posinf_if(expression1, expression2) +----------------------------------- +* Compares two arguments and returns `+INF` value if they are equal, +otherwise returns the first argument. +* Arguments: +** `expressionI` : an expression (any type is allowed). +* Return Value: +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value +** `+INF` value of type `double` if `argument1` = `argument2` +** a value of the first argument otherwise +* Example: ++ +-------------------------------------------------------- +{ + "a": to_string(posinf_if("asterixdb", "asterixdb")), + "b": posinf_if(1, 2) +}; +-------------------------------------------------------- +* The expected result is: ++ +----------------------- +{ "a": "+INF", "b": 1 } +----------------------- + +The function has an alias `posinfif`. + +[[neginf_if-neginfif]] +neginf_if (neginfif) +^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +----------------------------------- +neginf_if(expression1, expression2) +----------------------------------- +* Compares two arguments and returns `-INF` value if they are equal, +otherwise returns the first argument. +* Arguments: +** `expressionI` : an expression (any type is allowed). +* Return Value: +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value +** `-INF` value of type `double` if `argument1` = `argument2` +** a value of the first argument otherwise +* Example: ++ +-------------------------------------------------------- +{ + "a": to_string(neginf_if("asterixdb", "asterixdb")), + "b": neginf_if(1, 2) +}; +-------------------------------------------------------- +* The expected result is: ++ +----------------------- +{ "a": "-INF", "b": 1 } +----------------------- + +The function has an alias `neginfif`. diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc new file mode 100644 index 00000000000..8e3fa8f660e --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc @@ -0,0 +1,1238 @@ +[[window-functions]] +Window Functions +~~~~~~~~~~~~~~~~ + +Window functions are used to compute an aggregate or cumulative value, +based on a portion of the tuples selected by a query. For each input +tuple, a movable window of tuples is defined. The window determines the +tuples to be used by the window function. + +The tuples are not grouped into a single output tuple — each tuple +remains separate in the query output. + +All window functions must be used with an OVER clause. Refer to +link:manual.html#Over_clauses[OVER Clauses] for details. + +Window functions cannot appear in the FROM clause clause or LIMIT +clause. + +The examples in this section use the `GleambookMessages` dataset, +described in the section on link:manual.html#SELECT_statements[SELECT +Statements]. + +[[cume_dist]] +cume_dist +^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------------------------ +CUME_DIST() OVER ([window-partition-clause] [window-order-clause]) +------------------------------------------------------------------ +* Returns the percentile rank of the current tuple as part of the +cumulative distribution – that is, the number of tuples ranked lower +than or equal to the current tuple, including the current tuple, divided +by the total number of tuples in the window partition. ++ +The window order clause determines the sort order of the tuples. If the +window order clause is omitted, the function returns the same result +(1.0) for each tuple. +* Arguments: +** None. +* Clauses: +** (Optional) link:manual.html#Window_partition_clause[Window Partition +Clause]. +** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +* Return Value: +** A number greater than 0 and less than or equal to 1. The higher the +value, the higher the ranking. +* Example: ++ +For each author, find the cumulative distribution of all messages in +order of message ID. ++ +-------------------------------------------------- +SELECT m.messageId, m.authorId, CUME_DIST() OVER ( + PARTITION BY m.authorId + ORDER BY m.messageId +) AS `rank` +FROM GleambookMessages AS m; +-------------------------------------------------- +* The expected result is: ++ +-------------------- +[ + { + "rank": 0.2, + "messageId": 2, + "authorId": 1 + }, + { + "rank": 0.4, + "messageId": 4, + "authorId": 1 + }, + { + "rank": 0.6, + "messageId": 8, + "authorId": 1 + }, + { + "rank": 0.8, + "messageId": 10, + "authorId": 1 + }, + { + "rank": 1, + "messageId": 11, + "authorId": 1 + }, + { + "rank": 0.5, + "messageId": 3, + "authorId": 2 + }, + { + "rank": 1, + "messageId": 6, + "authorId": 2 + } +] +-------------------- + +[[dense_rank]] +dense_rank +^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------------------------- +DENSE_RANK() OVER ([window-partition-clause] [window-order-clause]) +------------------------------------------------------------------- +* Returns the dense rank of the current tuple – that is, the number of +distinct tuples preceding this tuple in the current window partition, +plus one. ++ +The tuples are ordered by the window order clause. If any tuples are +tied, they will have the same rank. If the window order clause is +omitted, the function returns the same result (1) for each tuple. ++ +For this function, when any tuples have the same rank, the rank of the +next tuple will be consecutive, so there will not be a gap in the +sequence of returned values. For example, if there are three tuples +ranked 2, the next dense rank is 3. +* Arguments: +** None. +* Clauses: +** (Optional) link:manual.html#Window_partition_clause[Window Partition +Clause]. +** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +* Return Value: +** An integer, greater than or equal to 1. +* Example: ++ +For each author, find the dense rank of all messages in order of +location. ++ +----------------------------------------------------------------- +SELECT m.authorId, m.messageId, m.senderLocation[1] as longitude, +DENSE_RANK() OVER ( + PARTITION BY m.authorId + ORDER BY m.senderLocation[1] +) AS `rank` +FROM GleambookMessages AS m; +----------------------------------------------------------------- +* The expected result is: ++ +---------------------- +[ + { + "rank": 1, + "authorId": 1, + "messageId": 10, + "longitude": 70.01 + }, + { + "rank": 2, + "authorId": 1, + "messageId": 11, + "longitude": 77.49 + }, + { + "rank": 3, + "authorId": 1, + "messageId": 2, + "longitude": 80.87 + }, + { + "rank": 3, + "authorId": 1, + "messageId": 8, + "longitude": 80.87 + }, + { + "rank": 4, + "authorId": 1, + "messageId": 4, + "longitude": 97.04 + }, + { + "rank": 1, + "authorId": 2, + "messageId": 6, + "longitude": 75.56 + }, + { + "rank": 2, + "authorId": 2, + "messageId": 3, + "longitude": 81.01 + } +] +---------------------- + +[[first_value]] +first_value +^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------------------ +FIRST_VALUE(expr) [nulls-treatment] OVER (window-definition) +------------------------------------------------------------ +* Returns the requested value from the first tuple in the current window +frame, where the window frame is specified by the window definition. +* Arguments: +** `expr`: The value that you want to return from the first tuple in the +window frame. [link:#fn_1[1]] +* Modifiers: +** link:manual.html#Nulls_treatment[Nulls Treatment]: (Optional) +Determines how NULL or MISSING values are treated when finding the first +value in the window frame. +*** `IGNORE NULLS`: If the values for any tuples evaluate to NULL or +MISSING, those tuples are ignored when finding the first tuple. In this +case, the function returns the first non-NULL, non-MISSING value. +*** `RESPECT NULLS`: If the values for any tuples evaluate to NULL or +MISSING, those tuples are included when finding the first tuple. ++ +If this modifier is omitted, the default is `RESPECT NULLS`. +* Clauses: +** (Optional) link:manual.html#Window_partition_clause[Window Partition +Clause]. +** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +** (Optional) link:manual.html#Window_frame_clause[Window Frame Clause]. +* Return Value: +** The specified value from the first tuple. The order of the tuples is +determined by the window order clause. +** NULL, if the frame was empty or if all values were NULL or MISSING +and the `IGNORE NULLS` modifier was specified. +** In the following cases, this function may return unpredictable +results. +*** If the window order clause is omitted. +*** If the window frame is defined by `ROWS`, and there are tied tuples +in the window frame. +** To make the function return deterministic results, add a window order +clause, or add further ordering terms to the window order clause so that +no tuples are tied. +** If the window frame is defined by `RANGE` or `GROUPS`, and there are +tied tuples in the window frame, the function returns the first value of +the input expression. +* Example: ++ +For each author, show the length of each message, including the length +of the shortest message from that author. ++ +------------------------------------- +SELECT m.authorId, m.messageId, +LENGTH(m.message) AS message_length, +FIRST_VALUE(LENGTH(m.message)) OVER ( + PARTITION BY m.authorId + ORDER BY LENGTH(m.message) +) AS shortest_message +FROM GleambookMessages AS m; +------------------------------------- +* The expected result is: ++ +--------------------------- +[ + { + "message_length": 31, + "shortest_message": 31, + "authorId": 1, + "messageId": 8 + }, + { + "message_length": 39, + "shortest_message": 31, + "authorId": 1, + "messageId": 11 + }, + { + "message_length": 44, + "shortest_message": 31, + "authorId": 1, + "messageId": 4 + }, + { + "message_length": 45, + "shortest_message": 31, + "authorId": 1, + "messageId": 2 + }, + { + "message_length": 51, + "shortest_message": 31, + "authorId": 1, + "messageId": 10 + }, + { + "message_length": 35, + "shortest_message": 35, + "authorId": 2, + "messageId": 3 + }, + { + "message_length": 44, + "shortest_message": 35, + "authorId": 2, + "messageId": 6 + } +] +--------------------------- + +[[lag]] +lag +^^^ + +* Syntax: ++ +------------------------------------------------------------------------------------------------------- +LAG(expr[, offset[, default]]) [nulls-treatment] OVER ([window-partition-clause] [window-order-clause]) +------------------------------------------------------------------------------------------------------- +* Returns the value from a tuple at a given offset prior to the current +tuple position. ++ +The window order clause determines the sort order of the tuples. If the +window order clause is omitted, the return values may be unpredictable. +* Arguments: +** `expr`: The value that you want to return from the offset tuple. +[link:#fn_1[1]] +** `offset`: (Optional) A positive integer. If omitted, the default is +1. +** `default`: (Optional) The value to return when the offset goes out of +partition scope. If omitted, the default is NULL. +* Modifiers: +** link:manual.html#Nulls_treatment[Nulls Treatment]: (Optional) +Determines how NULL or MISSING values are treated when finding the +offset tuple in the window partition. +*** `IGNORE NULLS`: If the values for any tuples evaluate to NULL or +MISSING, those tuples are ignored when finding the offset tuple. +*** `RESPECT NULLS`: If the values for any tuples evaluate to NULL or +MISSING, those tuples are included when finding the offset tuple. ++ +If this modifier is omitted, the default is `RESPECT NULLS`. +* Clauses: +** (Optional) link:manual.html#Window_partition_clause[Window Partition +Clause]. +** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +* Return Value: +** The specified value from the offset tuple. +** If the offset tuple is out of partition scope, it returns the default +value, or NULL if no default is specified. +* Example: ++ +For each author, show the length of each message, including the length +of the next-shortest message. ++ +------------------------------------------------------ +SELECT m.authorId, m.messageId, +LENGTH(m.message) AS message_length, +LAG(LENGTH(m.message), 1, "No shorter message") OVER ( + PARTITION BY m.authorId + ORDER BY LENGTH(m.message) +) AS next_shortest_message +FROM GleambookMessages AS m; +------------------------------------------------------ +* The expected result is: ++ +------------------------------------------------- +[ + { + "message_length": 31, + "authorId": 1, + "messageId": 8, + "next_shortest_message": "No shorter message" + }, + { + "message_length": 39, + "authorId": 1, + "messageId": 11, + "next_shortest_message": 31 + }, + { + "message_length": 44, + "authorId": 1, + "messageId": 4, + "next_shortest_message": 39 + }, + { + "message_length": 45, + "authorId": 1, + "messageId": 2, + "next_shortest_message": 44 + }, + { + "message_length": 51, + "authorId": 1, + "messageId": 10, + "next_shortest_message": 45 + }, + { + "message_length": 35, + "authorId": 2, + "messageId": 3, + "next_shortest_message": "No shorter message" + }, + { + "message_length": 44, + "authorId": 2, + "messageId": 6, + "next_shortest_message": 35 + } +] +------------------------------------------------- + +[[last_value]] +last_value +^^^^^^^^^^ + +* Syntax: ++ +----------------------------------------------------------- +LAST_VALUE(expr) [nulls-treatment] OVER (window-definition) +----------------------------------------------------------- +* Returns the requested value from the last tuple in the current window +frame, where the window frame is specified by the window definition. +* Arguments: +** `expr`: The value that you want to return from the last tuple in the +window frame. [link:#fn_1[1]] +* Modifiers: +** link:manual.html#Nulls_treatment[Nulls Treatment]: (Optional) +Determines how NULL or MISSING values are treated when finding the last +tuple in the window frame. +*** `IGNORE NULLS`: If the values for any tuples evaluate to NULL or +MISSING, those tuples are ignored when finding the last tuple. In this +case, the function returns the last non-NULL, non-MISSING value. +*** `RESPECT NULLS`: If the values for any tuples evaluate to NULL or +MISSING, those tuples are included when finding the last tuple. ++ +If this modifier is omitted, the default is `RESPECT NULLS`. +* Clauses: +** (Optional) link:manual.html#Window_partition_clause[Window Partition +Clause]. +** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +** (Optional) link:manual.html#Window_frame_clause[Window Frame Clause]. +* Return Value: +** The specified value from the last tuple. The order of the tuples is +determined by the window order clause. +** NULL, if the frame was empty or if all values were NULL or MISSING +and the `IGNORE NULLS` modifier was specified. +** In the following cases, this function may return unpredictable +results. +*** If the window order clause is omitted. +*** If the window frame clause is omitted. +*** If the window frame is defined by `ROWS`, and there are tied tuples +in the window frame. +** To make the function return deterministic results, add a window order +clause, or add further ordering terms to the window order clause so that +no tuples are tied. +** If the window frame is defined by `RANGE` or `GROUPS`, and there are +tied tuples in the window frame, the function returns the last value of +the input expression. +* Example: ++ +For each author, show the length of each message, including the length +of the longest message from that author. ++ +--------------------------------------------------------------- +SELECT m.authorId, m.messageId, +LENGTH(m.message) AS message_length, +LAST_VALUE(LENGTH(m.message)) OVER ( + PARTITION BY m.authorId + ORDER BY LENGTH(m.message) + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING -- ➊ +) AS longest_message +FROM GleambookMessages AS m; +--------------------------------------------------------------- +* The expected result is: ++ +-------------------------- +[ + { + "message_length": 31, + "longest_message": 51, + "authorId": 1, + "messageId": 8 + }, + { + "message_length": 39, + "longest_message": 51, + "authorId": 1, + "messageId": 11 + }, + { + "message_length": 44, + "longest_message": 51, + "authorId": 1, + "messageId": 4 + }, + { + "message_length": 45, + "longest_message": 51, + "authorId": 1, + "messageId": 2 + }, + { + "message_length": 51, + "longest_message": 51, + "authorId": 1, + "messageId": 10 + }, + { + "message_length": 35, + "longest_message": 44, + "authorId": 2, + "messageId": 3 + }, + { + "message_length": 44, + "longest_message": 44, + "authorId": 2, + "messageId": 6 + } +] +-------------------------- ++ +➀ This clause specifies that the window frame should extend to the end +of the window partition. Without this clause, the end point of the +window frame would always be the current tuple. This would mean that the +longest message would always be the same as the current message. + +[[lead]] +lead +^^^^ + +* Syntax: ++ +-------------------------------------------------------------------------------------------------------- +LEAD(expr[, offset[, default]]) [nulls-treatment] OVER ([window-partition-clause] [window-order-clause]) +-------------------------------------------------------------------------------------------------------- +* Returns the value from a tuple at a given offset ahead of the current +tuple position. ++ +The window order clause determines the sort order of the tuples. If the +window order clause is omitted, the return values may be unpredictable. +* Arguments: +** `expr`: The value that you want to return from the offset tuple. +[link:#fn_1[1]] +** `offset`: (Optional) A positive integer. If omitted, the default is +1. +** `default`: (Optional) The value to return when the offset goes out of +window partition scope. If omitted, the default is NULL. +* Modifiers: +** link:manual.html#Nulls_treatment[Nulls Treatment]: (Optional) +Determines how NULL or MISSING values are treated when finding the +offset tuple in the window partition. +*** `IGNORE NULLS`: If the values for any tuples evaluate to NULL or +MISSING, those tuples are ignored when finding the offset tuple. +*** `RESPECT NULLS`: If the values for any tuples evaluate to NULL or +MISSING, those tuples are included when finding the offset tuple. ++ +If this modifier is omitted, the default is `RESPECT NULLS`. +* Clauses: +** (Optional) link:manual.html#Window_partition_clause[Window Partition +Clause]. +** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +* Return Value: +** The specified value from the offset tuple. +** If the offset tuple is out of partition scope, it returns the default +value, or NULL if no default is specified. +* Example: ++ +For each author, show the length of each message, including the length +of the next-longest message. ++ +------------------------------------------------------ +SELECT m.authorId, m.messageId, +LENGTH(m.message) AS message_length, +LEAD(LENGTH(m.message), 1, "No longer message") OVER ( + PARTITION BY m.authorId + ORDER BY LENGTH(m.message) +) AS next_longest_message +FROM GleambookMessages AS m; +------------------------------------------------------ +* The expected result is: ++ +----------------------------------------------- +[ + { + "message_length": 31, + "authorId": 1, + "messageId": 8, + "next_longest_message": 39 + }, + { + "message_length": 39, + "authorId": 1, + "messageId": 11, + "next_longest_message": 44 + }, + { + "message_length": 44, + "authorId": 1, + "messageId": 4, + "next_longest_message": 45 + }, + { + "message_length": 45, + "authorId": 1, + "messageId": 2, + "next_longest_message": 51 + }, + { + "message_length": 51, + "authorId": 1, + "messageId": 10, + "next_longest_message": "No longer message" + }, + { + "message_length": 35, + "authorId": 2, + "messageId": 3, + "next_longest_message": 44 + }, + { + "message_length": 44, + "authorId": 2, + "messageId": 6, + "next_longest_message": "No longer message" + } +] +----------------------------------------------- + +[[nth_value]] +nth_value +^^^^^^^^^ + +* Syntax: ++ +-------------------------------------------------------------------------------- +NTH_VALUE(expr, offset) [nthval-from] [nulls-treatment] OVER (window-definition) +-------------------------------------------------------------------------------- +* Returns the requested value from a tuple in the current window frame, +where the window frame is specified by the window definition. +* Arguments: +** `expr`: The value that you want to return from the offset tuple in +the window frame. [link:#fn_1[1]] +** `offset`: The number of the offset tuple within the window frame, +counting from 1. +* Modifiers: +** link:manual.html#Nth_val_from[Nth Val From]: (Optional) Determines +where the function starts counting the offset. +*** `FROM FIRST`: Counting starts at the first tuple in the window +frame. In this case, an offset of 1 is the first tuple in the window +frame, 2 is the second tuple, and so on. +*** `FROM LAST`: Counting starts at the last tuple in the window frame. +In this case, an offset of 1 is the last tuple in the window frame, 2 is +the second-to-last tuple, and so on. ++ +The order of the tuples is determined by the window order clause. If +this modifier is omitted, the default is `FROM FIRST`. +** link:manual.html#Nulls_treatment[Nulls Treatment]: (Optional) +Determines how NULL or MISSING values are treated when finding the +offset tuple in the window frame. +*** `IGNORE NULLS`: If the values for any tuples evaluate to NULL or +MISSING, those tuples are ignored when finding the offset tuple. +*** `RESPECT NULLS`: If the values for any tuples evaluate to NULL or +MISSING, those tuples are included when finding the offset tuple. ++ +If this modifier is omitted, the default is `RESPECT NULLS`. +* Clauses: +** (Optional) link:manual.html#Window_partition_clause[Window Partition +Clause]. +** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +** (Optional) link:manual.html#Window_frame_clause[Window Frame Clause]. +* Return Value: +** The specified value from the offset tuple. +** In the following cases, this function may return unpredictable +results. +*** If the window order clause is omitted. +*** If the window frame is defined by `ROWS`, and there are tied tuples +in the window frame. +** To make the function return deterministic results, add a window order +clause, or add further ordering terms to the window order clause so that +no tuples are tied. +** If the window frame is defined by `RANGE` or `GROUPS`, and there are +tied tuples in the window frame, the function returns the first value of +the input expression when counting `FROM FIRST`, or the last value of +the input expression when counting `FROM LAST`. +* Example 1: ++ +For each author, show the length of each message, including the length +of the second shortest message from that author. ++ +--------------------------------------------------------------- +SELECT m.authorId, m.messageId, +LENGTH(m.message) AS message_length, +NTH_VALUE(LENGTH(m.message), 2) FROM FIRST OVER ( + PARTITION BY m.authorId + ORDER BY LENGTH(m.message) + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING -- ➊ +) AS shortest_message_but_1 +FROM GleambookMessages AS m; +--------------------------------------------------------------- +* The expected result is: ++ +--------------------------------- +[ + { + "message_length": 31, + "shortest_message_but_1": 39, + "authorId": 1, + "messageId": 8 + }, + { + "message_length": 39, + "shortest_message_but_1": 39, + "authorId": 1, + "messageId": 11 // ➋ + }, + { + "message_length": 44, + "shortest_message_but_1": 39, + "authorId": 1, + "messageId": 4 + }, + { + "message_length": 45, + "shortest_message_but_1": 39, + "authorId": 1, + "messageId": 2 + }, + { + "message_length": 51, + "shortest_message_but_1": 39, + "authorId": 1, + "messageId": 10 + }, + { + "message_length": 35, + "shortest_message_but_1": 44, + "authorId": 2, + "messageId": 3 + }, + { + "message_length": 44, + "shortest_message_but_1": 44, + "authorId": 2, + "messageId": 6 // ➋ + } +] +--------------------------------- ++ +➀ This clause specifies that the window frame should extend to the end +of the window partition. Without this clause, the end point of the +window frame would always be the current tuple. This would mean that for +the shortest message, the function would be unable to find the route +with the second shortest message. ++ +➁ The second shortest message from this author. +* Example 2: ++ +For each author, show the length of each message, including the length +of the second longest message from that author. ++ +--------------------------------------------------------------- +SELECT m.authorId, m.messageId, +LENGTH(m.message) AS message_length, +NTH_VALUE(LENGTH(m.message), 2) FROM LAST OVER ( + PARTITION BY m.authorId + ORDER BY LENGTH(m.message) + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING -- ➊ +) AS longest_message_but_1 +FROM GleambookMessages AS m; +--------------------------------------------------------------- +* The expected result is: ++ +-------------------------------- +[ + { + "message_length": 31, + "longest_message_but_1": 45, + "authorId": 1, + "messageId": 8 + }, + { + "message_length": 39, + "longest_message_but_1": 45, + "authorId": 1, + "messageId": 11 + }, + { + "message_length": 44, + "longest_message_but_1": 45, + "authorId": 1, + "messageId": 4 + }, + { + "message_length": 45, + "longest_message_but_1": 45, + "authorId": 1, + "messageId": 2 // ➋ + }, + { + "message_length": 51, + "longest_message_but_1": 45, + "authorId": 1, + "messageId": 10 + }, + { + "message_length": 35, + "longest_message_but_1": 35, + "authorId": 2, + "messageId": 3 // ➋ + }, + { + "message_length": 44, + "longest_message_but_1": 35, + "authorId": 2, + "messageId": 6 + } +] +-------------------------------- ++ +➀ This clause specifies that the window frame should extend to the end +of the window partition. Without this clause, the end point of the +window frame would always be the current tuple. This would mean the +function would be unable to find the second longest message for shorter +messages. ++ +➁ The second longest message from this author. + +[[ntile]] +ntile +^^^^^ + +* Syntax: ++ +----------------------------------------------------------------------- +NTILE(num_tiles) OVER ([window-partition-clause] [window-order-clause]) +----------------------------------------------------------------------- +* Divides the window partition into the specified number of tiles, and +allocates each tuple in the window partition to a tile, so that as far +as possible each tile has an equal number of tuples. When the set of +tuples is not equally divisible by the number of tiles, the function +puts more tuples into the lower-numbered tiles. For each tuple, the +function returns the number of the tile into which that tuple was +placed. ++ +The window order clause determines the sort order of the tuples. If the +window order clause is omitted then the tuples are processed in an +undefined order. +* Arguments: +** `num_tiles`: The number of tiles into which you want to divide the +window partition. This argument can be an expression and must evaluate +to a number. If the number is not an integer, it will be truncated. +* Clauses: +** (Optional) link:manual.html#Window_partition_clause[Window Partition +Clause]. +** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +* Return Value: +** An value greater than or equal to 1 and less than or equal to the +number of tiles. +* Example: ++ +Allocate each message to one of three tiles by length and message ID. ++ +-------------------------------------------------- +SELECT m.messageId, LENGTH(m.message) AS `length`, +NTILE(3) OVER ( + ORDER BY LENGTH(m.message), m.messageId +) AS `ntile` +FROM GleambookMessages AS m; +-------------------------------------------------- +* The expected result is: ++ +------------------- +[ + { + "length": 31, + "ntile": 1, + "messageId": 8 + }, + { + "length": 35, + "ntile": 1, + "messageId": 3 + }, + { + "length": 39, + "ntile": 1, + "messageId": 11 + }, + { + "length": 44, + "ntile": 2, + "messageId": 4 + }, + { + "length": 44, + "ntile": 2, + "messageId": 6 + }, + { + "length": 45, + "ntile": 3, + "messageId": 2 + }, + { + "length": 51, + "ntile": 3, + "messageId": 10 + } +] +------------------- + +[[percent_rank]] +percent_rank +^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------------------------------------------- +PERCENT_RANK() OVER ([window-partition-clause] [window-order-clause]) +--------------------------------------------------------------------- +* Returns the percentile rank of the current tuple – that is, the rank +of the tuples minus one, divided by the total number of tuples in the +window partition minus one. ++ +The window order clause determines the sort order of the tuples. If the +window order clause is omitted, the function returns the same result (0) +for each tuple. +* Arguments: +** None. +* Clauses: +** (Optional) link:manual.html#Window_partition_clause[Window Partition +Clause]. +** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +* Return Value: +** A number between 0 and 1. The higher the value, the higher the +ranking. +* Example: ++ +For each author, find the percentile rank of all messages in order of +message ID. ++ +----------------------------------------------------- +SELECT m.messageId, m.authorId, PERCENT_RANK() OVER ( + PARTITION BY m.authorId + ORDER BY m.messageId +) AS `rank` +FROM GleambookMessages AS m; +----------------------------------------------------- +* The expected result is: ++ +-------------------- +[ + { + "rank": 0, + "messageId": 2, + "authorId": 1 + }, + { + "rank": 0.25, + "messageId": 4, + "authorId": 1 + }, + { + "rank": 0.5, + "messageId": 8, + "authorId": 1 + }, + { + "rank": 0.75, + "messageId": 10, + "authorId": 1 + }, + { + "rank": 1, + "messageId": 11, + "authorId": 1 + }, + { + "rank": 0, + "messageId": 3, + "authorId": 2 + }, + { + "rank": 1, + "messageId": 6, + "authorId": 2 + } +] +-------------------- + +[[rank]] +rank +^^^^ + +* Syntax: ++ +------------------------------------------------------------- +RANK() OVER ([window-partition-clause] [window-order-clause]) +------------------------------------------------------------- +* Returns the rank of the current tuple – that is, the number of +distinct tuples preceding this tuple in the current window partition, +plus one. ++ +The tuples are ordered by the window order clause. If any tuples are +tied, they will have the same rank. If the window order clause is +omitted, the function returns the same result (1) for each tuple. ++ +When any tuples have the same rank, the rank of the next tuple will +include all preceding tuples, so there may be a gap in the sequence of +returned values. For example, if there are three tuples ranked 2, the +next rank is 5. ++ +To avoid gaps in the returned values, use the DENSE_RANK() function +instead. +* Arguments: +** None. +* Clauses: +** (Optional) link:manual.html#Window_partition_clause[Window Partition +Clause]. +** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +* Return Value: +** An integer, greater than or equal to 1. +* Example: ++ +For each author, find the rank of all messages in order of location. ++ +----------------------------------------------------------------- +SELECT m.authorId, m.messageId, m.senderLocation[1] as longitude, +RANK() OVER ( + PARTITION BY m.authorId + ORDER BY m.senderLocation[1] +) AS `rank` +FROM GleambookMessages AS m; +----------------------------------------------------------------- +* The expected result is: ++ +---------------------- +[ + { + "rank": 1, + "authorId": 1, + "messageId": 10, + "longitude": 70.01 + }, + { + "rank": 2, + "authorId": 1, + "messageId": 11, + "longitude": 77.49 + }, + { + "rank": 3, + "authorId": 1, + "messageId": 2, + "longitude": 80.87 + }, + { + "rank": 3, + "authorId": 1, + "messageId": 8, + "longitude": 80.87 + }, + { + "rank": 5, + "authorId": 1, + "messageId": 4, + "longitude": 97.04 + }, + { + "rank": 1, + "authorId": 2, + "messageId": 6, + "longitude": 75.56 + }, + { + "rank": 2, + "authorId": 2, + "messageId": 3, + "longitude": 81.01 + } +] +---------------------- + +[[ratio_to_report]] +ratio_to_report +^^^^^^^^^^^^^^^ + +* Syntax: ++ +---------------------------------------------- +RATIO_TO_REPORT(expr) OVER (window-definition) +---------------------------------------------- +* Returns the fractional ratio of the specified value for each tuple to +the sum of values for all tuples in the window frame. +* Arguments: +** `expr`: The value for which you want to calculate the fractional +ratio. [link:#fn_1[1]] +* Clauses: +** (Optional) link:manual.html#Window_partition_clause[Window Partition +Clause]. +** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +** (Optional) link:manual.html#Window_frame_clause[Window Frame Clause]. +* Return Value: +** A number between 0 and 1, representing the fractional ratio of the +value for the current tuple to the sum of values for all tuples in the +current window frame. The sum of returned values for all tuples in the +current window frame is 1. +** If the input expression does not evaluate to a number, or the sum of +values for all tuples is zero, it returns NULL. +* Example: ++ +For each author, calculate the length of each message as a fraction of +the total length of all messages. ++ +----------------------------------------- +SELECT m.messageId, m.authorId, +RATIO_TO_REPORT(LENGTH(m.message)) OVER ( + PARTITION BY m.authorId +) AS length_ratio +FROM GleambookMessages AS m; +----------------------------------------- +* The expected result is: ++ +---------------------------------------- +[ + { + "length_ratio": 0.21428571428571427, + "messageId": 2, + "authorId": 1 + }, + { + "length_ratio": 0.20952380952380953, + "messageId": 4, + "authorId": 1 + }, + { + "length_ratio": 0.14761904761904762, + "messageId": 8, + "authorId": 1 + }, + { + "length_ratio": 0.24285714285714285, + "messageId": 10, + "authorId": 1 + }, + { + "length_ratio": 0.18571428571428572, + "messageId": 11, + "authorId": 1 + }, + { + "length_ratio": 0.4430379746835443, + "messageId": 3, + "authorId": 2 + }, + { + "length_ratio": 0.5569620253164557, + "messageId": 6, + "authorId": 2 + } +] +---------------------------------------- + +[[row_number]] +row_number +^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------------------------- +ROW_NUMBER() OVER ([window-partition-clause] [window-order-clause]) +------------------------------------------------------------------- +* Returns a unique row number for every tuple in every window partition. +In each window partition, the row numbering starts at 1. ++ +The window order clause determines the sort order of the tuples. If the +window order clause is omitted, the return values may be unpredictable. +* Arguments: +** None. +* Clauses: +** (Optional) link:manual.html#Window_partition_clause[Window Partition +Clause]. +** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +* Return Value: +** An integer, greater than or equal to 1. +* Example: ++ +For each author, number all messages in order of length. ++ +------------------------------- +SELECT m.messageId, m.authorId, +ROW_NUMBER() OVER ( + PARTITION BY m.authorId + ORDER BY LENGTH(m.message) +) AS `row` +FROM GleambookMessages AS m; +------------------------------- +* The expected result is: ++ +-------------------- +[ + { + "row": 1, + "messageId": 8, + "authorId": 1 + }, + { + "row": 2, + "messageId": 11, + "authorId": 1 + }, + { + "row": 3, + "messageId": 4, + "authorId": 1 + }, + { + "row": 4, + "messageId": 2, + "authorId": 1 + }, + { + "row": 5, + "messageId": 10, + "authorId": 1 + }, + { + "row": 1, + "messageId": 3, + "authorId": 2 + }, + { + "row": 2, + "messageId": 6, + "authorId": 2 + } +] +-------------------- + +''''' + +\1. If the query contains the GROUP BY clause or any +link:#AggregateFunctions[aggregate functions], this expression must only +depend on GROUP BY expressions or aggregate functions. diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/15_bitwise.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/15_bitwise.adoc new file mode 100644 index 00000000000..d5faf446d63 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/15_bitwise.adoc @@ -0,0 +1,665 @@ +[[bitwise-functions]] +Bitwise Functions +~~~~~~~~~~~~~~~~~ + +All Bit/Binary functions can only operate on 64-bit signed integers. + +*Note:* All non-integer numbers and other data types result in null. + +*Note:* The query language uses two’s complement representation. + +When looking at the value in binary form, bit 1 is the Least Significant +Bit (LSB) and bit 32 is the Most Significant Bit (MSB). + +(MSB) Bit 32 → `0000 0000 0000 0000 0000 0000 0000 0000` ← Bit 1 (LSB) + +[[bitand]] +bitand +^^^^^^ + +* Syntax: ++ +------------------------------------------------ +BITAND(int_value1, int_value2, ... , int_valueN) +------------------------------------------------ +* Returns the result of a bitwise AND operation performed on all input +integer values. ++ +The bitwise AND operation compares each bit of `int_value1` to the +corresponding bit of every other `int_value`. If all bits are 1, then +the corresponding result bit is set to 1; otherwise it is set to 0 +(zero). +* Arguments: +** `int_valueI`: Integers, or any valid expressions which evaluate to +integers, that are used to compare. +* Return Value: +** An integer, representing the bitwise AND between all of the input +integers. +* Limitations: +** Input values must be integers (such as 1 or 1.0) and cannot contain +decimals (such as 1.2). +* Example 1: ++ +Compare 3 (0011 in binary) and 6 (0110 in binary). ++ +-------------------------- +{ "BitAND": BITAND(3,6) }; +-------------------------- +* The expected result is: ++ +--------------- +{ "BitAND": 2 } +--------------- ++ +This results in 2 (0010 in binary) because only bit 2 is set in both 3 +(00*1*1) and 6 (01*1*0). +* Example 2: ++ +Compare 4.5 and 3 (0011 in binary). ++ +---------------------------- +{ "BitAND": BITAND(4.5,3) }; +---------------------------- +* The expected result is: ++ +------------------ +{ "BitAND": null } +------------------ ++ +The result is null because 4.5 is not an integer. +* Example 3: ++ +Compare 4.0 (0100 in binary) and 3 (0011 in binary). ++ +---------------------------- +{ "BitAND": BITAND(4.0,3) }; +---------------------------- +* The expected result is: ++ +--------------- +{ "BitAND": 0 } +--------------- ++ +This results in 0 (zero) because 4.0 (0100) and 3 (0011) do not share +any bits that are both 1. +* Example 4: ++ +Compare 3 (0011 in binary) and 6 (0110 in binary) and 15 (1111 in +binary). ++ +----------------------------- +{ "BitAND": BITAND(3,6,15) }; +----------------------------- +* The expected result is: ++ +--------------- +{ "BitAND": 2 } +--------------- ++ +This results in 2 (0010 in binary) because only the 2nd bit from the +right is 1 in all three numbers. + +[[bitclear]] +bitclear +^^^^^^^^ + +* Syntax: ++ +------------------------------ +BITCLEAR(int_value, positions) +------------------------------ +* Returns the result after clearing the specified bit, or array of bits +in `int_value` using the given `positions`. ++ +*Note:* Specifying a negative or zero bit position makes the function +return a null. +* Arguments: +** `int_value`: An integer, or any valid expression which evaluates to +an integer, that contains the target bit or bits to clear. +** `positions`: An integer or an array of integers specifying the +position or positions to be cleared. +* Return Value: +** An integer, representing the result after clearing the bit or bits +specified. +* Limitations: +** Input values must be integers (such as 1 or 1.0) and cannot contain +decimals (such as 1.2). +* Example 1: ++ +Clear bit 1 from 6 (0110 in binary). ++ +------------------------------ +{ "BitCLEAR": BITCLEAR(6,1) }; +------------------------------ +* The expected result is: ++ +----------------- +{ "BitCLEAR": 6 } +----------------- ++ +This results in 6 (011*0* in binary) because bit 1 was already zero. +* Example 2: ++ +Clear bits 1 and 2 from 6 (01*10* in binary). ++ +---------------------------------- +{ "BitCLEAR": BITCLEAR(6,[1,2]) }; +---------------------------------- +* The expected result is: ++ +----------------- +{ "BitCLEAR": 4 } +----------------- ++ +This results in 4 (01*0*0 in binary) because bit 2 changed to zero. +* Example 3: ++ +Clear bits 1, 2, 4, and 5 from 31 (0*11*1*11* in binary). ++ +--------------------------------------- +{ "BitCLEAR": BITCLEAR(31,[1,2,4,5]) }; +--------------------------------------- +* The expected result is: ++ +----------------- +{ "BitCLEAR": 4 } +----------------- ++ +This results in 4 (0*00*1*00*) because bits 1, 2, 4, and 5 changed to +zero. + +[[bitnot]] +bitnot +^^^^^^ + +* Syntax: ++ +----------------- +BITNOT(int_value) +----------------- +* Returns the results of a bitwise logical NOT operation performed on an +integer value. ++ +The bitwise logical NOT operation reverses the bits in the value. For +each value bit that is 1, the corresponding result bit will be set to 0 +(zero); and for each value bit that is 0 (zero), the corresponding +result bit will be set to 1. ++ +*Note:* All bits of the integer will be altered by this operation. +* Arguments: +** `int_value`: An integer, or any valid expression which evaluates to +an integer, that contains the target bits to reverse. +* Return Value: +** An integer, representing the result after performing the logical NOT +operation. +* Limitations: +** Input values must be integers (such as 1 or 1.0) and cannot contain +decimals (such as 1.2). +* Example 1: ++ +Perform the NOT operation on 3 (0000 0000 0000 0000 0000 0000 0000 0011 +in binary). ++ +------------------------ +{ "BitNOT": BITNOT(3) }; +------------------------ +* The expected result is: ++ +---------------- +{ "BitNOT": -4 } +---------------- ++ +This results in -4 (*1111 1111 1111 1111 1111 1111 1111 1100* in binary) +because all bits changed. + +[[bitor]] +bitor +^^^^^ + +* Syntax: ++ +----------------------------------------------- +BITOR(int_value1, int_value2, ... , int_valueN) +----------------------------------------------- +* Returns the result of a bitwise inclusive OR operation performed on +all input integer values. ++ +The bitwise inclusive OR operation compares each bit of `int_value1` to +the corresponding bit of every other `int_value`. If any bit is 1, the +corresponding result bit is set to 1; otherwise, it is set to 0 (zero). +* Arguments: +** `int_valueI`: Integers, or any valid expressions which evaluate to +integers, that are used to compare. +* Return Value: +** An integer, representing the bitwise OR between all of the input +integers. +* Limitations: +** Input values must be integers (such as 1 or 1.0) and cannot contain +decimals (such as 1.2). +* Example 1: ++ +Perform OR on 3 (0011 in binary) and 6 (0110 in binary). ++ +------------------------ +{ "BitOR": BITOR(3,6) }; +------------------------ +* The expected result is: ++ +-------------- +{ "BitOR": 7 } +-------------- ++ +This results in 7 (0*111* in binary) because at least 1 bit of each +(00*11* and 0*11*0) is 1 in bits 1, 2, and 3. +* Example 2: ++ +Perform OR on 3 (0011 in binary) and -4 (1000 0000 0000 ... 0000 1100 in +binary). ++ +------------------------- +{ "BitOR": BITOR(3,-4) }; +------------------------- +* The expected result is: ++ +--------------- +{ "BitOR": -1 } +--------------- ++ +This results in -1 (*1111 1111 1111 ... 1111 1111* in binary) because +the two 1 bits in 3 fill in the two 0 bits in -4 to turn on all the +bits. +* Example 3: ++ +Perform OR on 3 (0011 in binary) and 6 (0110 in binary) and 15 (1111 in +binary). ++ +--------------------------- +{ "BitOR": BITOR(3,6,15) }; +--------------------------- +* The expected result is: ++ +--------------- +{ "BitOR": 15 } +--------------- ++ +This results in 15 (1111 in binary) because there is at least one 1 in +each of the four rightmost bits. + +[[bitset]] +bitset +^^^^^^ + +* Syntax: ++ +---------------------------- +BITSET(int_value, positions) +---------------------------- +* Returns the result after setting the specified bit `position`, or +array of bit positions, to 1 in the given `int_value`. ++ +*Note:* Specifying a negative or zero position makes the function return +a null. +* Arguments: +** `int_value`: An integer, or any valid expression which evaluates to +an integer, that contains the target bit or bits to set. +** `positions`: An integer or an array of integers specifying the +position or positions to be set. +* Return Value: +** An integer, representing the result after setting the bit or bits +specified. If the bit is already set, then it stays set. +* Limitations: +** Input values must be integers (such as 1 or 1.0) and cannot contain +decimals (such as 1.2). +* Example 1: ++ +Set bit 1 in the value 6 (011*0* in binary). ++ +-------------------------- +{ "BitSET": BITSET(6,1) }; +-------------------------- +* The expected result is: ++ +--------------- +{ "BitSET": 7 } +--------------- ++ +This results in 7 (011*1* in binary) because bit 1 changed to 1. +* Example 2: ++ +Set bits 1 and 2 in the value 6 (01*10* in binary). ++ +------------------------------ +{ "BitSET": BITSET(6,[1,2]) }; +------------------------------ +* The expected result is: ++ +--------------- +{ "BitSET": 7 } +--------------- ++ +This also results in 7 (01*11* in binary) because bit 1 changed while +bit 2 remained the same. +* Example 3: ++ +Set bits 1 and 4 in the value 6 (*0*11*0* in binary). ++ +------------------------------ +{ "BitSET": BITSET(6,[1,4]) }; +------------------------------ +* The expected result is: ++ +---------------- +{ "BitSET": 15 } +---------------- ++ +This results in 15 (*1*11*1* in binary) because bit 1 and 4 changed to +ones. + +[[bitshift]] +bitshift +^^^^^^^^ + +* Syntax: ++ +------------------------------------------- +BITSHIFT(int_value, shift_amount[, rotate]) +------------------------------------------- +* Returns the result of a bit shift operation performed on the integer +value `int_value`. The `shift_amount` supports left and right shifts. +These are logical shifts. The third parameter `rotate` supports circular +shift. This is similar to the BitROTATE function in Oracle. +* Arguments: +** `int_value`: An integer, or any valid expression which evaluates to +an integer, that contains the target bit or bits to shift. +** `shift_amount`: An integer, or any valid expression which evaluates +to an integer, that contains the number of bits to shift. +*** A positive (+) number means this is a LEFT shift. +*** A negative (-) number means this is a RIGHT shift. +** `rotate`: (Optional) A boolean, or any valid expression which +evaluates to a boolean, where: +*** FALSE means this is a LOGICAL shift, where bits shifted off the end +of a value are considered lost. +*** TRUE means this is a CIRCULAR shift (shift-and-rotate operation), +where bits shifted off the end of a value are rotated back onto the +value at the _other_ end. In other words, the bits rotate in what might +be thought of as a circular pattern; therefore, these bits are not lost. ++ +If omitted, the default is FALSE. ++ +For comparison, see the below table. ++ +[cols=",,,",options="header",] +|======================================================================= +|Input |Shift |Result of Logical Shift (Rotate FALSE) |Result of +Circular Shift (Rotate TRUE) +|6 (0000 0110) |4 |96 (0110 0000) |96 (0110 0000) + +|6 (0000 0110) |3 |48 (0011 0000) |48 (0011 0000) + +|6 (0000 0110) |2 |24 (0001 1000) |24 (0001 1000) + +|6 (0000 0110) |1 |12 (0000 1100) |12 (0000 1100) + +|*6 (0000 0110)* |*0* |*6 (0000 0110)* |*6 (0000 0110)* + +|6 (0000 0110) |-1 |3 (0000 0011) |3 (0000 0011) + +|6 (0000 0110) |-2 |1 (0000 0001) |-9223372036854775807 (1000 0000 ... +0000 0001) + +|6 (0000 0110) |-3 |0 (0000 0000) |-4611686018427387904 (1100 0000 ... +0000 0000) + +|6 (0000 0110) |-4 |0 (0000 0000) |6917529027641081856 (0110 0000 ... +0000 0000) +|======================================================================= +* Return Value: +** An integer, representing the result of either a logical or circular +shift of the given integer. +* Limitations: +** Input values must be integers (such as 1 or 1.0) and cannot contain +decimals (such as 1.2). +* Example 1: ++ +Logical left shift of the number 6 (0110 in binary) by one bit. ++ +------------------------------------ +{ "BitSHIFT": BITSHIFT(6,1,FALSE) }; +------------------------------------ +* The expected result is: ++ +------------------ +{ "BitSHIFT": 12 } +------------------ ++ +This results in 12 (1100 in binary) because the 1-bits moved from +positions 2 and 3 to positions 3 and 4. +* Example 2: ++ +Logical right shift of the number 6 (0110 in binary) by two bits. ++ +------------------------------- +{ "BitSHIFT": BITSHIFT(6,-2) }; +------------------------------- +* The expected result is: ++ +----------------- +{ "BitSHIFT": 1 } +----------------- ++ +This results in 1 (0001 in binary) because the 1-bit in position 3 moved +to position 1 and the 1-bit in position 2 was dropped. +* Example 2b: ++ +Circular right shift of the number 6 (0110 in binary) by two bits. ++ +------------------------------------ +{ "BitSHIFT": BITSHIFT(6,-2,TRUE) }; +------------------------------------ +* The expected result is: ++ +------------------------------------ +{ "BitSHIFT": -9223372036854775807 } +------------------------------------ ++ +This results in -9223372036854775807 (1100 0000 0000 0000 0000 0000 0000 +0000 in binary) because the two 1-bits wrapped right, around to the Most +Significant Digit position and changed the integer’s sign to negative. +* Example 3: ++ +Circular left shift of the number 524288 (1000 0000 0000 0000 0000 in +binary) by 45 bits. ++ +----------------------------------------- +{ "BitSHIFT": BITSHIFT(524288,45,TRUE) }; +----------------------------------------- +* The expected result is: ++ +----------------- +{ "BitSHIFT": 1 } +----------------- ++ +This results in 1 because the 1-bit wrapped left, around to the Least +Significant Digit position. + +[[bittest]] +bittest +^^^^^^^ + +* Syntax: ++ +----------------------------------------- +BITTEST(int_value, positions [, all_set]) +----------------------------------------- +* Returns TRUE if the specified bit, or bits, is a 1; otherwise, returns +FALSE if the specified bit, or bits, is a 0 (zero). ++ +*Note:* Specifying a negative or zero bit position will result in null +being returned. +* Arguments: +** `int_value`: An integer, or any valid expression which evaluates to +an integer, that contains the target bit or bits to test. +** `positions`: An integer or an array of integers specifying the +position or positions to be tested. +** `all_set`: (Optional) A boolean, or any valid expression which +evaluates to a boolean. +*** When `all_set` is FALSE, then it returns TRUE even if one bit in one +of the positions is set. +*** When `all_set` is TRUE, then it returns TRUE only if all input +positions are set. ++ +If omitted, the default is FALSE. +* Return Value: +** A boolean, that follows the below table: ++ +[cols=",,",options="header",] +|=========================================== +|`int_value` |`all_set` |Return Value +|_all_ specified bits are TRUE |FALSE |TRUE +|_all_ specified bits are TRUE |TRUE |TRUE +|_some_ specified bits are TRUE |FALSE |TRUE +|_some_ specified bits are TRUE |TRUE |FALSE +|=========================================== +* Limitations: +** Input values must be integers (such as 1 or 1.0) and cannot contain +decimals (such as 1.2). +* Example 1: ++ +In the number 6 (0110 in binary), is bit 1 set? ++ +------------------------------ +{ "IsBitSET": ISBITSET(6,1) }; +------------------------------ +* The expected result is: ++ +--------------------- +{ "IsBitSET": false } +--------------------- ++ +This returns FALSE because bit 1 of 6 (011*0* in binary) is not set to +1. +* Example 2: ++ +In the number 1, is either bit 1 or bit 2 set? ++ +-------------------------------------- +{ "BitTEST": BITTEST(1,[1,2],FALSE) }; +-------------------------------------- +* The expected result is: ++ +------------------- +{ "BitTEST": true } +------------------- ++ +This returns TRUE because bit 1 of the number 1 (000*1* in binary) is +set to 1. +* Example 3: ++ +In the number 6 (0110 in binary), are both bits 2 and 3 set? ++ +--------------------------------------- +{ "IsBitSET": ISBITSET(6,[2,3],TRUE) }; +--------------------------------------- +* The expected result is: ++ +-------------------- +{ "IsBitSET": true } +-------------------- ++ +This returns TRUE because both bits 2 and 3 in the number 6 (0*11*0 in +binary) are set to 1. +* Example 4: ++ +In the number 6 (0110 in binary), are all the bits in positions 1 +through 3 set? ++ +------------------------------------- +{ "BitTEST": BITTEST(6,[1,3],TRUE) }; +------------------------------------- +* The expected result is: ++ +-------------------- +{ "BitTEST": false } +-------------------- ++ +This returns FALSE because bit 1 in the number 6 (011*0* in binary) is +set to 0 (zero). + +The function has an alias `isbitset`. + +[[bitxor]] +bitxor +^^^^^^ + +* Syntax: ++ +------------------------------------------------ +BITXOR(int_value1, int_value2, ... , int_valueN) +------------------------------------------------ +* Returns the result of a bitwise Exclusive OR operation performed on +two or more integer values. ++ +The bitwise Exclusive OR operation compares each bit of `int_value1` to +the corresponding bit of `int_value2`. ++ +If there are more than two input values, the first two are compared; +then their result is compared to the next input value; and so on. ++ +When the compared bits do not match, the result bit is 1; otherwise, the +compared bits do match, and the result bit is 0 (zero), as summarized: ++ +[cols=",,",options="header",] +|============================ +|Bit 1 |Bit 2 |XOR Result Bit +|0 |0 |0 +|0 |1 |1 +|1 |0 |1 +|1 |1 |0 +|============================ +* Arguments: +** `int_valueI`: Integers, or any valid expressions which evaluate to +integers, that are used to compare. +* Return Value: +** An integer, representing the bitwise XOR between the input integers. +* Limitations: +** Input values must be integers (such as 1 or 1.0) and cannot contain +decimals (such as 1.2). +* Example 1: ++ +Perform the XOR operation on 3 (0011 in binary) and 6 (0110 in binary). ++ +-------------------------- +{ "BitXOR": BITXOR(3,6) }; +-------------------------- +* The expected result is: ++ +--------------- +{ "BitXOR": 5 } +--------------- ++ +This returns 5 (0101 in binary) because the 1st bit pair and 3rd bit +pair are different (resulting in 1) while the 2nd bit pair and 4th bit +pair are the same (resulting in 0): ++ +-------- +0011 (3) +0110 (6) +==== +0101 (5) +-------- +* Example 2: ++ +Perform the XOR operation on 3 (0011 in binary) and 6 (0110 in binary) +and 15 (1111 in binary). ++ +----------------------------- +{ "BitXOR": BITXOR(3,6,15) }; +----------------------------- +* The expected result is: ++ +---------------- +{ "BitXOR": 10 } +---------------- ++ +This returns 10 (1010 in binary) because 3 XOR 6 equals 5 (0101 in +binary), and then 5 XOR 15 equals 10 (1010 in binary). diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_common.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_common.adoc new file mode 100644 index 00000000000..ae9453fa406 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_common.adoc @@ -0,0 +1,782 @@ +[[numeric-functions]] +Numeric Functions +~~~~~~~~~~~~~~~~~ + +[[abs]] +abs +^^^ + +* Syntax: ++ +------------------ +abs(numeric_value) +------------------ +* Computes the absolute value of the argument. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** The absolute value of the argument with the same type as the input +argument, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +----------------------------------------------------------------------------------------------------------------------------- +{ "v1": abs(2013), "v2": abs(-4036), "v3": abs(0), "v4": abs(float("-2013.5")), "v5": abs(double("-2013.593823748327284")) }; +----------------------------------------------------------------------------------------------------------------------------- +* The expected result is: ++ +--------------------------------------------------------------------------- +{ "v1": 2013, "v2": 4036, "v3": 0, "v4": 2013.5, "v5": 2013.5938237483274 } +--------------------------------------------------------------------------- + +[[acos]] +acos +^^^^ + +* Syntax: ++ +------------------- +acos(numeric_value) +------------------- +* Computes the arc cosine value of the argument. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** the `double` arc cosine in radians for the argument, if the argument +is in the range of -1 (inclusive) to 1 (inclusive), +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error, +** "NaN" for other legitimate numeric values. +* Example: ++ +------------------------------------------------------------------------------------------------------ +{ "v1": acos(1), "v2": acos(2), "v3": acos(0), "v4": acos(float("0.5")), "v5": acos(double("-0.5")) }; +------------------------------------------------------------------------------------------------------ +* The expected result is: ++ +-------------------------------------------------------------------------------------------------------- +{ "v1": 0.0, "v2": "NaN", "v3": 1.5707963267948966, "v4": 1.0471975511965979, "v5": 2.0943951023931957 } +-------------------------------------------------------------------------------------------------------- + +[[asin]] +asin +^^^^ + +* Syntax: ++ +------------------- +asin(numeric_value) +------------------- +* Computes the arc sine value of the argument. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** the `double` arc sin in radians for the argument, if the argument is +in the range of -1 (inclusive) to 1 (inclusive), +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error, +** "NaN" for other legitimate numeric values. +* Example: ++ +------------------------------------------------------------------------------------------------------ +{ "v1": asin(1), "v2": asin(2), "v3": asin(0), "v4": asin(float("0.5")), "v5": asin(double("-0.5")) }; +------------------------------------------------------------------------------------------------------ +* The expected result is: ++ +--------------------------------------------------------------------------------------------------------- +{ "v1": 1.5707963267948966, "v2": "NaN", "v3": 0.0, "v4": 0.5235987755982989, "v5": -0.5235987755982989 } +--------------------------------------------------------------------------------------------------------- + +[[atan]] +atan +^^^^ + +* Syntax: ++ +------------------- +atan(numeric_value) +------------------- +* Computes the arc tangent value of the argument. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** the `double` arc tangent in radians for the argument, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------------------------------------ +{ "v1": atan(1), "v2": atan(2), "v3": atan(0), "v4": atan(float("0.5")), "v5": atan(double("1000")) }; +------------------------------------------------------------------------------------------------------ +* The expected result is: ++ +--------------------------------------------------------------------------------------------------------------------- +{ "v1": 0.7853981633974483, "v2": 1.1071487177940904, "v3": 0.0, "v4": 0.4636476090008061, "v5": 1.5697963271282298 } +--------------------------------------------------------------------------------------------------------------------- + +[[atan2]] +atan2 +^^^^^ + +* Syntax: ++ +------------------------------------- +atan2(numeric_value1, numeric_value2) +------------------------------------- +* Computes the arc tangent value of numeric_value2/numeric_value1. +* Arguments: +** `numeric_value1`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value, +** `numeric_value2`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** the `double` arc tangent in radians for `numeric_value1` and +`numeric_value2`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------------------ +{ "v1": atan2(1, 2), "v2": atan2(0, 4), "v3": atan2(float("0.5"), double("-0.5")) }; +------------------------------------------------------------------------------------ +* The expected result is: ++ +---------------------------------------------------------------- +{ "v1": 0.4636476090008061, "v2": 0.0, "v3": 2.356194490192345 } +---------------------------------------------------------------- + +[[ceil]] +ceil +^^^^ + +* Syntax: ++ +------------------- +ceil(numeric_value) +------------------- +* Computes the smallest (closest to negative infinity) number with no +fractional part that is not less than the value of the argument. If the +argument is already equal to mathematical integer, then the result is +the same as the argument. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** The ceiling value for the given number in the same type as the input +argument, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +--------------------------------------------- +{ + "v1": ceil(2013), + "v2": ceil(-4036), + "v3": ceil(0.3), + "v4": ceil(float("-2013.2")), + "v5": ceil(double("-2013.893823748327284")) +}; +--------------------------------------------- +* The expected result is: ++ +-------------------------------------------------------------------- +{ "v1": 2013, "v2": -4036, "v3": 1.0, "v4": -2013.0, "v5": -2013.0 } +-------------------------------------------------------------------- + +[[cos]] +cos +^^^ + +* Syntax: ++ +------------------ +cos(numeric_value) +------------------ +* Computes the cosine value of the argument. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** the `double` cosine value for the argument, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------------------------------- +{ "v1": cos(1), "v2": cos(2), "v3": cos(0), "v4": cos(float("0.5")), "v5": cos(double("1000")) }; +------------------------------------------------------------------------------------------------- +* The expected result is: ++ +--------------------------------------------------------------------------------------------------------------------- +{ "v1": 0.5403023058681398, "v2": -0.4161468365471424, "v3": 1.0, "v4": 0.8775825618903728, "v5": 0.562379076290703 } +--------------------------------------------------------------------------------------------------------------------- + +[[cosh]] +cosh +^^^^ + +* Syntax: ++ +------------------- +cosh(numeric_value) +------------------- +* Computes the hyperbolic cosine value of the argument. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** the `double` hyperbolic cosine value for the argument, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +--------------------------------------------------------------------------------------------------- +{ "v1": cosh(1), "v2": cosh(2), "v3": cosh(0), "v4": cosh(float("0.5")), "v5": cosh(double("8")) }; +--------------------------------------------------------------------------------------------------- +* The expected result is: ++ +-------------------------------------------------------------------------------------------------------------------- +{ "v1": 1.5430806348152437, "v2": 3.7621956910836314, "v3": 1.0, "v4": 1.1276259652063807, "v5": 1490.479161252178 } +-------------------------------------------------------------------------------------------------------------------- + +[[degrees]] +degrees +^^^^^^^ + +* Syntax: ++ +---------------------- +degrees(numeric_value) +---------------------- +* Converts radians to degrees +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** The degrees value for the given radians value. The returned value has +type `double`, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +------------------------ +{ "v1": degrees(pi()) }; +------------------------ +* The expected result is: ++ +--------------- +{ "v1": 180.0 } +--------------- + +[[e]] +e +^ + +* Syntax: ++ +--- +e() +--- +* Return Value: +** e (base of the natural logarithm) +* Example: ++ +-------------- +{ "v1": e() }; +-------------- +* The expected result is: ++ +--------------------------- +{ "v1": 2.718281828459045 } +--------------------------- + +[[exp]] +exp +^^^ + +* Syntax: ++ +------------------ +exp(numeric_value) +------------------ +* Computes enumeric_value. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** enumeric_value, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------------------------------- +{ "v1": exp(1), "v2": exp(2), "v3": exp(0), "v4": exp(float("0.5")), "v5": exp(double("1000")) }; +------------------------------------------------------------------------------------------------- +* The expected result is: ++ +---------------------------------------------------------------------------------------------------------- +{ "v1": 2.718281828459045, "v2": 7.38905609893065, "v3": 1.0, "v4": 1.6487212707001282, "v5": "Infinity" } +---------------------------------------------------------------------------------------------------------- + +[[floor]] +floor +^^^^^ + +* Syntax: ++ +-------------------- +floor(numeric_value) +-------------------- +* Computes the largest (closest to positive infinity) number with no +fractional part that is not greater than the value. If the argument is +already equal to mathematical integer, then the result is the same as +the argument. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** The floor value for the given number in the same type as the input +argument, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +---------------------------------------------- +{ + "v1": floor(2013), + "v2": floor(-4036), + "v3": floor(0.8), + "v4": floor(float("-2013.2")), + "v5": floor(double("-2013.893823748327284")) +}; +---------------------------------------------- +* The expected result is: ++ +-------------------------------------------------------------------- +{ "v1": 2013, "v2": -4036, "v3": 0.0, "v4": -2014.0, "v5": -2014.0 } +-------------------------------------------------------------------- + +[[ln]] +ln +^^ + +* Syntax: ++ +----------------- +ln(numeric_value) +----------------- +* Computes logenumeric_value. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** logenumeric_value, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +-------------------------------------------------------------------------------------------- +{ "v1": ln(1), "v2": ln(2), "v3": ln(0), "v4": ln(float("0.5")), "v5": ln(double("1000")) }; +-------------------------------------------------------------------------------------------- +* The expected result is: ++ +-------------------------------------------------------------------------------------------------------------- +{ "v1": 0.0, "v2": 0.6931471805599453, "v3": "-Infinity", "v4": -0.6931471805599453, "v5": 6.907755278982137 } +-------------------------------------------------------------------------------------------------------------- + +[[log]] +log +^^^ + +* Syntax: ++ +------------------ +log(numeric_value) +------------------ +* Computes log10numeric_value. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** log10numeric_value, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------------------------------- +{ "v1": log(1), "v2": log(2), "v3": log(0), "v4": log(float("0.5")), "v5": log(double("1000")) }; +------------------------------------------------------------------------------------------------- +* The expected result is: ++ +------------------------------------------------------------------------------------------------ +{ "v1": 0.0, "v2": 0.3010299956639812, "v3": "-Infinity", "v4": -0.3010299956639812, "v5": 3.0 } +------------------------------------------------------------------------------------------------ + +[[pi]] +pi +^^ + +* Syntax: ++ +---- +pi() +---- +* Return Value: +** Pi +* Example: ++ +--------------- +{ "v1": pi() }; +--------------- +* The expected result is: ++ +--------------------------- +{ "v1": 3.141592653589793 } +--------------------------- + +[[power]] +power +^^^^^ + +* Syntax: ++ +------------------------------------- +power(numeric_value1, numeric_value2) +------------------------------------- +* Computes numeric_value1numeric_value2. +* Arguments: +** `numeric_value1`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value, +** `numeric_value2`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** numeric_value1numeric_value2, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------------------ +{ "v1": power(1, 2), "v3": power(0, 4), "v4": power(float("0.5"), double("-0.5")) }; +------------------------------------------------------------------------------------ +* The expected result is: ++ +---------------------------------------------- +{ "v1": 1, "v3": 0, "v4": 1.4142135623730951 } +---------------------------------------------- + +[[radians]] +radians +^^^^^^^ + +* Syntax: ++ +---------------------- +radians(numeric_value) +---------------------- +* Converts degrees to radians +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** The radians value for the given degrees value. The returned value has +type `double`, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +----------------------- +{ "v1": radians(180) }; +----------------------- +* The expected result is: ++ +--------------------------- +{ "v1": 3.141592653589793 } +--------------------------- + +[[round]] +round +^^^^^ + +* Syntax: ++ +----------------------------------- +round(numeric_value[, round_digit]) +----------------------------------- +* Rounds the value to the given number of integer digits to the right of +the decimal point, or to the left of the decimal point if the number of +digits is negative. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value that +represents the numeric value to be rounded. +** `round_digit`: (Optional) a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value that +specifies the digit to round to. This argument may be positive or +negative; positive indicating that rounding needs to be to the right of +the decimal point, and negative indicating that rounding needs to be to +the left of the decimal point. Values such as 1.0 and 2.0 are +acceptable, but values such as 1.3 and 1.5 result in a `null`. If +omitted, the default is 0. +* Return Value: +** The rounded value for the given number. The returned value has the +following type: +*** `bigint` if the input value has type `tinyint`, `smallint`, +`integer` or `bigint`, +*** `float` if the input value has type `float`, +*** `double` if the input value has type `double`; +** `missing` if the input value is a `missing` value, +** `null` if the input value is a `null` value, +** any other non-numeric input value will return a `null` value. +* Example: ++ +---------------------------------------------- +{ + "v1": round(2013), + "v2": round(-4036), + "v3": round(0.8), + "v4": round(float("-2013.256")), + "v5": round(double("-2013.893823748327284")) + "v6": round(123456, -1), + "v7": round(456.456, 2), + "v8": round(456.456, -1), + "v9": round(-456.456, -2) +}; +---------------------------------------------- +* The expected result is: ++ +----------------------------------------------------------------------------------------------------------------------- +{ "v1": 2013, "v2": -4036, "v3": 1.0, "v4": -2013.0, "v5": -2014.0, "v6": 123460, "v7": 456.46, "v8": 460, "v9": -500 } +----------------------------------------------------------------------------------------------------------------------- + +[[sign]] +sign +^^^^ + +* Syntax: ++ +------------------- +sign(numeric_value) +------------------- +* Computes the sign of the argument. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** the sign (a `tinyint`) of the argument, -1 for negative values, 0 for +0, and 1 for positive values, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------------------------------------- +{ "v1": sign(1), "v2": sign(2), "v3": sign(0), "v4": sign(float("0.5")), "v5": sign(double("-1000")) }; +------------------------------------------------------------------------------------------------------- +* The expected result is: ++ +------------------------------------------------ +{ "v1": 1, "v2": 1, "v3": 0, "v4": 1, "v5": -1 } +------------------------------------------------ + +[[sin]] +sin +^^^ + +* Syntax: ++ +------------------ +sin(numeric_value) +------------------ +* Computes the sine value of the argument. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** the `double` sine value for the argument, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------------------------------- +{ "v1": sin(1), "v2": sin(2), "v3": sin(0), "v4": sin(float("0.5")), "v5": sin(double("1000")) }; +------------------------------------------------------------------------------------------------- +* The expected result is: ++ +-------------------------------------------------------------------------------------------------------------------- +{ "v1": 0.8414709848078965, "v2": 0.9092974268256817, "v3": 0.0, "v4": 0.479425538604203, "v5": 0.8268795405320025 } +-------------------------------------------------------------------------------------------------------------------- + +[[sinh]] +sinh +^^^^ + +* Syntax: ++ +------------------- +sinh(numeric_value) +------------------- +* Computes the hyperbolic sine value of the argument. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** the `double` hyperbolic sine value for the argument, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +--------------------------------------------------------------------------------------------------- +{ "v1": sinh(1), "v2": sinh(2), "v3": sinh(0), "v4": sinh(float("0.5")), "v5": sinh(double("8")) }; +--------------------------------------------------------------------------------------------------- +* The expected result is: ++ +-------------------------------------------------------------------------------------------------------------------- +{ "v1": 1.1752011936438014, "v2": 3.626860407847019, "v3": 0.0, "v4": 0.5210953054937474, "v5": 1490.4788257895502 } +-------------------------------------------------------------------------------------------------------------------- + +[[sqrt]] +sqrt +^^^^ + +* Syntax: ++ +------------------- +sqrt(numeric_value) +------------------- +* Computes the square root of the argument. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** the `double` square root value for the argument, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------------------------------------ +{ "v1": sqrt(1), "v2": sqrt(2), "v3": sqrt(0), "v4": sqrt(float("0.5")), "v5": sqrt(double("1000")) }; +------------------------------------------------------------------------------------------------------ +* The expected result is: ++ +------------------------------------------------------------------------------------------------------ +{ "v1": 1.0, "v2": 1.4142135623730951, "v3": 0.0, "v4": 0.7071067811865476, "v5": 31.622776601683793 } +------------------------------------------------------------------------------------------------------ + +[[tan]] +tan +^^^ + +* Syntax: ++ +------------------ +tan(numeric_value) +------------------ +* Computes the tangent value of the argument. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** the `double` tangent value for the argument, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------------------------------- +{ "v1": tan(1), "v2": tan(2), "v3": tan(0), "v4": tan(float("0.5")), "v5": tan(double("1000")) }; +------------------------------------------------------------------------------------------------- +* The expected result is: ++ +--------------------------------------------------------------------------------------------------------------------- +{ "v1": 1.5574077246549023, "v2": -2.185039863261519, "v3": 0.0, "v4": 0.5463024898437905, "v5": 1.4703241557027185 } +--------------------------------------------------------------------------------------------------------------------- + +[[tanh]] +tanh +^^^^ + +* Syntax: ++ +------------------- +tanh(numeric_value) +------------------- +* Computes the hyperbolic tangent value of the argument. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +* Return Value: +** the `double` hyperbolic tangent value for the argument, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +--------------------------------------------------------------------------------------------------- +{ "v1": tanh(1), "v2": tanh(2), "v3": tanh(0), "v4": tanh(float("0.5")), "v5": tanh(double("8")) }; +--------------------------------------------------------------------------------------------------- +* The expected result is: ++ +------------------------------------------------------------------------------------------------------------------- +{ "v1": 0.7615941559557649, "v2": 0.964027580075817, "v3": 0.0, "v4": 0.4621171572600098, "v5": 0.999999774929676 } +------------------------------------------------------------------------------------------------------------------- + +[[trunc]] +trunc +^^^^^ + +* Syntax: ++ +----------------------------------- +trunc(numeric_value, number_digits) +----------------------------------- +* Truncates the number to the given number of integer digits to the +right of the decimal point (left if digits is negative). Digits is 0 if +not given. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value, +** `number_digits`: a `tinyint`/`smallint`/`integer`/`bigint` value. +* Return Value: +** the `double` tangent value for the argument, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is +`missing`, +** a type error will be raised if: +*** the first argument is any other non-numeric value, +*** the second argument is any other non-tinyint, non-smallint, +non-integer, and non-bigint value. +* Example: ++ +--------------------------------------------------------------------------------------------------------------------------------------- +{ "v1": trunc(1, 1), "v2": trunc(2, -2), "v3": trunc(0.122, 2), "v4": trunc(float("11.52"), -1), "v5": trunc(double("1000.5252"), 3) }; +--------------------------------------------------------------------------------------------------------------------------------------- +* The expected result is: ++ +------------------------------------------------------------ +{ "v1": 1, "v2": 2, "v3": 0.12, "v4": 10.0, "v5": 1000.525 } +------------------------------------------------------------ diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_delta.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_delta.adoc new file mode 100644 index 00000000000..01ea20333d1 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_delta.adoc @@ -0,0 +1,47 @@ +[[round_half_to_even]] +round_half_to_even +^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +---------------------------------------------- +round_half_to_even(numeric_value, [precision]) +---------------------------------------------- +* Computes the closest numeric value to `numeric_value` that is a +multiple of ten to the power of minus `precision`. `precision` is +optional and by default value `0` is used. +* Arguments: +** `numeric_value`: a +`tinyint`/`smallint`/`integer`/`bigint`/`float`/`double` value. +** `precision`: an optional `tinyint`/`smallint`/`integer`/`bigint` +field representing the number of digits in the fraction of the the +result +* Return Value: +** The rounded value for the given number in the same type as the input +argument, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first argument is any other non-numeric value, +*** or, the second argument is any other non-tinyint, non-smallint, +non-integer, or non-bigint value. +* Example: ++ +--------------------------------------------------------------- +{ + "v1": round_half_to_even(2013), + "v2": round_half_to_even(-4036), + "v3": round_half_to_even(0.8), + "v4": round_half_to_even(float("-2013.256")), + "v5": round_half_to_even(double("-2013.893823748327284")), + "v6": round_half_to_even(double("-2013.893823748327284"), 2), + "v7": round_half_to_even(2013, 4), + "v8": round_half_to_even(float("-2013.256"), 5) +}; +--------------------------------------------------------------- +* The expected result is: ++ +----------------------------------------------------------------------------------------------------------------- +{ "v1": 2013, "v2": -4036, "v3": 1.0, "v4": -2013.0, "v5": -2014.0, "v6": -2013.89, "v7": 2013, "v8": -2013.256 } +----------------------------------------------------------------------------------------------------------------- diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_common.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_common.adoc new file mode 100644 index 00000000000..a98a9863b91 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_common.adoc @@ -0,0 +1,758 @@ +[[string-functions]] +String Functions +~~~~~~~~~~~~~~~~ + +[[concat]] +concat +^^^^^^ + +* Syntax: ++ +----------------------------- +concat(string1, string2, ...) +----------------------------- +* Returns a concatenated string from arguments. +* Arguments: +** `string1`: a string value, +** `string2`: a string value, +** .... +* Return Value: +** a concatenated string from arguments, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-string input value will cause a type error. +* Example: ++ +------------------------------------------ +concat("test ", "driven ", "development"); +------------------------------------------ +* The expected result is: ++ +------------------------- +"test driven development" +------------------------- + +[[contains]] +contains +^^^^^^^^ + +* Syntax: ++ +-------------------------------------- +contains(string, substring_to_contain) +-------------------------------------- +* Checks whether the string `string` contains the string +`substring_to_contain` +* Arguments: +** `string` : a `string` that might contain the given substring, +** `substring_to_contain` : a target `string` that might be contained. +* Return Value: +** a `boolean` value, `true` if `string` contains +`substring_to_contain`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-string input value will cause a type error, +** `false` otherwise. +* Note: an +link:similarity.html#UsingIndexesToSupportSimilarityQueries[n_gram +index] can be utilized for this function. +* Example: ++ +------------------------------------------------------------------------------ +{ "v1": contains("I like x-phone", "phone"), "v2": contains("one", "phone") }; +------------------------------------------------------------------------------ +* The expected result is: ++ +--------------------------- +{ "v1": true, "v2": false } +--------------------------- + +[[ends_with]] +ends_with +^^^^^^^^^ + +* Syntax: ++ +---------------------------------------- +ends_with(string, substring_to_end_with) +---------------------------------------- +* Checks whether the string `string` ends with the string +`substring_to_end_with`. +* Arguments: +** `string` : a `string` that might end with the given string, +** `substring_to_end_with` : a `string` that might be contained as the +ending substring. +* Return Value: +** a `boolean` value, `true` if `string` contains +`substring_to_contain`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-string input value will cause a type error, +** `false` otherwise. +* Example: ++ +-------------------------------------------------------------------------- +{ + "v1": ends_with(" love product-b its shortcut_menu is awesome:)", ":)"), + "v2": ends_with(" awsome:)", ":-)") +}; +-------------------------------------------------------------------------- +* The expected result is: ++ +--------------------------- +{ "v1": true, "v2": false } +--------------------------- + +[[initcap-or-title]] +initcap (or title) +^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------- +initcap(string) +--------------- +* Converts a given string `string` so that the first letter of each word +is uppercase and every other letter is lowercase. The function has an +alias called "title". +* Arguments: +** `string` : a `string` to be converted. +* Return Value: +** a `string` as the title form of the given `string`, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-string input value will cause a type error. +* Example: ++ +--------------------------------------------------------------------------- +{ "v1": initcap("ASTERIXDB is here!"), "v2": title("ASTERIXDB is here!") }; +--------------------------------------------------------------------------- +* The expected result is: ++ +---------------------------------------------------------- +{ "v1": "Asterixdb Is Here!", "v2": "Asterixdb Is Here!" } +---------------------------------------------------------- + +[[length]] +length +^^^^^^ + +* Syntax: ++ +-------------- +length(string) +-------------- +* Returns the length of the string `string`. +* Arguments: +** `string` : a `string` or `null` that represents the string to be +checked. +* Return Value: +** an `bigint` that represents the length of `string`, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-string input value will cause a type error. +* Example: ++ +---------------------- +length("test string"); +---------------------- +* The expected result is: ++ +-- +11 +-- + +[[lower]] +lower +^^^^^ + +* Syntax: ++ +------------- +lower(string) +------------- +* Converts a given string `string` to its lowercase form. +* Arguments: +** `string` : a `string` to be converted. +* Return Value: +** a `string` as the lowercase form of the given `string`, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-string input value will cause a type error. +* Example: ++ +------------------- +lower("ASTERIXDB"); +------------------- +* The expected result is: ++ +----------- +"asterixdb" +----------- + +[[ltrim]] +ltrim +^^^^^ + +* Syntax: ++ +----------------------- +ltrim(string[, chars]); +----------------------- +* Returns a new string with all leading characters that appear in +`chars` removed. By default, white space is the character to trim. +* Arguments: +** `string` : a `string` to be trimmed, +** `chars` : a `string` that contains characters that are used to trim. +* Return Value: +** a trimmed, new `string`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-string input value will cause a type error. +* Example: ++ +-------------------------------- +ltrim("me like x-phone", "eml"); +-------------------------------- +* The expected result is: ++ +--------------- +" like x-phone" +--------------- + +[[position]] +position +^^^^^^^^ + +* Syntax: ++ +-------------------------------- +position(string, string_pattern) +-------------------------------- +* Returns the first position of `string_pattern` within `string`. The +function returns the 0-based position. Another version of the function +returns the 1-based position. Below are the aliases for each version: +* Aliases: +** 0-based: `position`, `pos`, `position0`, `pos0`. +** 1-based: `position1`, `pos1`. +* Arguments: +** `string` : a `string` that might contain the pattern. +** `string_pattern` : a pattern `string` to be matched. +* Return Value: +** the first position that `string_pattern` appears within `string` +(starting at 0), or -1 if it does not appear, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-string input value will return a `null`. +* Example: ++ +---------------------------------------- +{ + "v1": position("ppphonepp", "phone"), + "v2": position("hone", "phone"), + "v3": position1("ppphonepp", "phone"), + "v4": position1("hone", "phone"), +}; +---------------------------------------- +* The expected result is: ++ +--------------------------------------- +{ "v1": 2, "v2": -1, v3": 3, "v4": -1 } +--------------------------------------- + +[[regexp_contains]] +regexp_contains +^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------------- +regexp_contains(string, string_pattern[, string_flags]) +------------------------------------------------------- +* Checks whether the strings `string` contains the regular expression +pattern `string_pattern` (a Java regular expression pattern). +* Aliases: +** `regexp_contains`, `regex_contains`, `contains_regexp`, +`contains_regex`. +* Arguments: +** `string` : a `string` that might contain the pattern. +** `string_pattern` : a pattern `string` to be matched. +** `string_flag` : (Optional) a `string` with flags to be used during +regular expression matching. +*** The following modes are enabled with these flags: dotall (s), +multiline (m), case_insensitive (i), and comments and whitespace (x). +* Return Value: +** a `boolean`, returns `true` if `string` contains the pattern +`string_pattern`, `false` otherwise. +** `missing` if any argument is a `missing` value. +** `null` if any argument is a `null` value but no argument is a +`missing` value. +** any other non-string input value will return a `null`. +* Example: ++ +---------------------------------------------- +{ + "v1": regexp_contains("pphonepp", "p*hone"), + "v2": regexp_contains("hone", "p+hone") +}; +---------------------------------------------- +* The expected result is: ++ +--------------------------- +{ "v1": true, "v2": false } +--------------------------- + +[[regexp_like]] +regexp_like +^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------------------------- +regexp_like(string, string_pattern[, string_flags]) +--------------------------------------------------- +* Checks whether the string `string` exactly matches the regular +expression pattern `string_pattern` (a Java regular expression pattern). +* Aliases: +** `regexp_like`, `regex_like`. +* Arguments: +** `string` : a `string` that might contain the pattern. +** `string_pattern` : a pattern `string` that might be contained. +** `string_flag` : (Optional) a `string` with flags to be used during +regular expression matching. +*** The following modes are enabled with these flags: dotall (s), +multiline (m), case_insensitive (i), and comments and whitespace (x). +* Return Value: +** a `boolean` value, `true` if `string` contains the pattern +`string_pattern`, `false` otherwise. +** `missing` if any argument is a `missing` value. +** `null` if any argument is a `null` value but no argument is a +`missing` value. +** any other non-string input value will return a `null`. +* Example: ++ +--------------------------------------------------------------------------------- +{ + "v1": regexp_like(" can't stand acast the network is horrible:(", ".*acast.*"), + "v2": regexp_like("acast", ".*acst.*") +}; +--------------------------------------------------------------------------------- +* The expected result is: ++ +--------------------------- +{ "v1": true, "v2": false } +--------------------------- + +[[regexp_position]] +regexp_position +^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------------- +regexp_position(string, string_pattern[, string_flags]) +------------------------------------------------------- +* Returns first position of the regular expression `string_pattern` (a +Java regular expression pattern) within `string`. The function returns +the 0-based position. Another version of the function returns the +1-based position. Below are the aliases for each version: +* Aliases: +** 0-Based: `regexp_position`, `regexp_pos`, `regexp_position0`, +`regexp_pos0`, `regex_position`, `regex_pos`, `regex_position0`, +`regex_pos0`. +** 1-Based: `regexp_position1`, `regexp_pos1`, `regex_position1` +`regex_pos1`. +* Arguments: +** `string` : a `string` that might contain the pattern. +** `string_pattern` : a pattern `string` to be matched. +** `string_flag` : (Optional) a `string` with flags to be used during +regular expression matching. +*** The following modes are enabled with these flags: dotall (s), +multiline (m), case_insensitive (i), and comments and whitespace (x). +* Return Value: +** the first position that the regular expression `string_pattern` +appears in `string` (starting at 0), or -1 if it does not appear. +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-string input value will return a `null`. +* Example: ++ +----------------------------------------------- +{ + "v1": regexp_position("pphonepp", "p*hone"), + "v2": regexp_position("hone", "p+hone"), + "v3": regexp_position1("pphonepp", "p*hone"), + "v4": regexp_position1("hone", "p+hone") +}; +----------------------------------------------- +* The expected result is: ++ +---------------------------------------- +{ "v1": 0, "v2": -1, "v3": 1, "v4": -1 } +---------------------------------------- + +[[regexp_replace]] +regexp_replace +^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------------------------------------- +regexp_replace(string, string_pattern, string_replacement[, string_flags]) +regexp_replace(string, string_pattern, string_replacement[, replacement_limit]) +------------------------------------------------------------------------------- +* Checks whether the string `string` matches the given regular +expression pattern `string_pattern` (a Java regular expression pattern), +and replaces the matched pattern `string_pattern` with the new pattern +`string_replacement`. +* Aliases: +** `regexp_replace`, `regex_replace`. +* Arguments: +** `string` : a `string` that might contain the pattern. +** `string_pattern` : a pattern `string` to be matched. +** `string_replacement` : a pattern `string` to be used as the +replacement. +** `string_flag` : (Optional) a `string` with flags to be used during +replace. +*** The following modes are enabled with these flags: dotall (s), +multiline (m), case_insensitive (i), and comments and whitespace (x). +** `replacement_limit`: (Optional) an `integer` specifying the maximum +number of replacements to make (if negative then all occurrences will be +replaced) +* Return Value: +** Returns a `string` that is obtained after the replacements. +** `missing` if any argument is a `missing` value. +** `null` if any argument is a `null` value but no argument is a +`missing` value. +** any other non-string input value will return a `null`. +* Example: ++ +---------------------------------------------------------------------------------------------------- +regexp_replace(" like x-phone the voicemail_service is awesome", " like x-phone", "like product-a"); +---------------------------------------------------------------------------------------------------- +* The expected result is: ++ +------------------------------------------------- +"like product-a the voicemail_service is awesome" +------------------------------------------------- + +[[repeat]] +repeat +^^^^^^ + +* Syntax: ++ +----------------- +repeat(string, n) +----------------- +* Returns a string formed by repeating the input `string` `n` times. +* Arguments: +** `string` : a `string` to be repeated, +** `n` : an `tinyint`/`smallint`/`integer`/`bigint` value - how many +times the string should be repeated. +* Return Value: +** a string that repeats the input `string` `n` times, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first argument is any other non-string value, +*** or, the second argument is not a `tinyint`, `smallint`, `integer`, +or `bigint`. +* Example: ++ +------------------ +repeat("test", 3); +------------------ +* The expected result is: ++ +-------------- +"testtesttest" +-------------- + +[[replace]] +replace +^^^^^^^ + +* Syntax: ++ +----------------------------------------------------------- +replace(string, search_string, replacement_string[, limit]) +----------------------------------------------------------- +* Finds occurrences of the given substring `search_string` in the input +string `string` and replaces them with the new substring +`replacement_string`. +* Arguments: +** `string` : an input `string`, +** `search_string` : a `string` substring to be searched for, +** `replacement_string` : a `string` to be used as the replacement, +** `limit` : (Optional) an `integer` - maximum number of occurrences to +be replaced. If not specified or negative then all occurrences will be +replaced +* Return Value: +** Returns a `string` that is obtained after the replacements, +** `missing` if any argument is a `missing` value, +** any other non-string input value or non-integer `limit` will cause a +type error, +** `null` if any argument is a `null` value but no argument is a +`missing` value. +* Example: ++ +----------------------------------------------------------------------------------------------------- +{ + "v1": replace(" like x-phone the voicemail_service is awesome", " like x-phone", "like product-a"), + "v2": replace("x-phone and x-phone", "x-phone", "product-a", 1) +}; +----------------------------------------------------------------------------------------------------- +* The expected result is: ++ +---------------------------------------------------------- +{ + "v1": "like product-a the voicemail_service is awesome", + "v2": "product-a and x-phone" +} +---------------------------------------------------------- + +[[reverse]] +reverse +^^^^^^^ + +* Syntax: ++ +--------------- +reverse(string) +--------------- +* Returns a string formed by reversing characters in the input `string`. +* Arguments: +** `string` : a `string` to be reversed +* Return Value: +** a string containing characters from the the input `string` in the +reverse order, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first argument is any other non-string value +* Example: ++ +----------------- +reverse("hello"); +----------------- +* The expected result is: ++ +------- +"olleh" +------- + +[[rtrim]] +rtrim +^^^^^ + +* Syntax: ++ +----------------------- +rtrim(string[, chars]); +----------------------- +* Returns a new string with all trailing characters that appear in +`chars` removed. By default, white space is the character to trim. +* Arguments: +** `string` : a `string` to be trimmed, +** `chars` : a `string` that contains characters that are used to trim. +* Return Value: +** a trimmed, new `string`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-string input value will cause a type error. +* Example: ++ +------------------------------------------- +{ + "v1": rtrim("i like x-phone", "x-phone"), + "v2": rtrim("i like x-phone", "onexph") +}; +------------------------------------------- +* The expected result is: ++ +------------------------------------ +{ "v1": "i like ", "v2": "i like " } +------------------------------------ + +[[split]] +split +^^^^^ + +* Syntax: ++ +------------------ +split(string, sep) +------------------ +* Splits the input `string` into an array of substrings separated by the +string `sep`. +* Arguments: +** `string` : a `string` to be split. +* Return Value: +** an array of substrings by splitting the input `string` by `sep`, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-string input value will cause a type error. +* Example: ++ +-------------------------------------- +split("test driven development", " "); +-------------------------------------- +* The expected result is: ++ +----------------------------------- +[ "test", "driven", "development" ] +----------------------------------- + +[[starts_with]] +starts_with +^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------------------- +starts_with(string, substring_to_start_with) +-------------------------------------------- +* Checks whether the string `string` starts with the string +`substring_to_start_with`. +* Arguments: +** `string` : a `string` that might start with the given string. +** `substring_to_start_with` : a `string` that might be contained as the +starting substring. +* Return Value: +** a `boolean`, returns `true` if `string` starts with the string +`substring_to_start_with`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-string input value will cause a type error, +** `false` otherwise. +* Example: ++ +--------------------------------------------------------- +{ + "v1" : starts_with(" like the plan, amazing", " like"), + "v2" : starts_with("I like the plan, amazing", " like") +}; +--------------------------------------------------------- +* The expected result is: ++ +--------------------------- +{ "v1": true, "v2": false } +--------------------------- + +[[substr]] +substr +^^^^^^ + +* Syntax: ++ +-------------------------------- +substr(string, offset[, length]) +-------------------------------- +* Returns the substring from the given string `string` based on the +given start offset `offset` with the optional `length`. The function +uses the 0-based position. Another version of the function uses the +1-based position. Below are the aliases for each version: +* Aliases: +** 0-Based: `substring`, `substr`, `substring0`, `substr0`. +** 1-Based: `substring1`, `substr1`. +* Arguments: +** `string` : a `string` to be extracted. +** `offset` : an `tinyint`/`smallint`/`integer`/`bigint` value as the +starting offset of the substring in `string` (starting at 0). If +negative then counted from the end of the string. +** `length` : (Optional) an an `tinyint`/`smallint`/`integer`/`bigint` +value as the length of the substring. +* Return Value: +** a `string` that represents the substring, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, or if the substring could not be obtained because the +starting offset is not within string bounds or `length` is negative. +** a `null` will be returned if: +*** the first argument is any other non-string value. +*** the second argument is not a `tinyint`, `smallint`, `integer`, or +`bigint`. +*** the third argument is not a `tinyint`, `smallint`, `integer`, or +`bigint` if the argument is present. +* Example: ++ +-------------------------------------------------------------------------- +{ "v1": substr("test string", 6, 3), "v2": substr1("test string", 6, 3) }; +-------------------------------------------------------------------------- +* The expected result is: ++ +---------------------------- +{ "v1": "tri", "v2": "str" } +---------------------------- + +The function has an alias `substring`. + +[[trim]] +trim +^^^^ + +* Syntax: ++ +---------------------- +trim(string[, chars]); +---------------------- +* Returns a new string with all leading characters that appear in +`chars` removed. By default, white space is the character to trim. +* Arguments: +** `string` : a `string` to be trimmed, +** `chars` : a `string` that contains characters that are used to trim. +* Return Value: +** a trimmed, new `string`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-string input value will cause a type error. +* Example: ++ +--------------------------------- +trim("i like x-phone", "xphoen"); +--------------------------------- +* The expected result is: ++ +-------- +" like " +-------- + +[[upper]] +upper +^^^^^ + +* Syntax: ++ +------------- +upper(string) +------------- +* Converts a given string `string` to its uppercase form. +* Arguments: +** `string` : a `string` to be converted. +* Return Value: +** a `string` as the uppercase form of the given `string`, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-string input value will cause a type error. +* Example: ++ +-------------- +upper("hello") +-------------- +* The expected result is: ++ +------- +"HELLO" +------- diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_delta.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_delta.adoc new file mode 100644 index 00000000000..6dc453c3b78 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_delta.adoc @@ -0,0 +1,192 @@ +[[string_concat]] +string_concat +^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------- +string_concat(array) +-------------------- +* Concatenates an array of strings `array` into a single string. +* Arguments: +** `array` : an `array` or `multiset` of `string`s (could be `null` or +`missing`) to be concatenated. +* Return Value: +** the concatenated `string` value, +** `missing` if the argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** `missing` if any element in the input array is `missing`, +** `null` if any element in the input array is `null` but no element in +the input array is `missing`, +** any other non-array input value or non-integer element in the input +array will cause a type error. +* Example: ++ +------------------------------------------ +string_concat(["ASTERIX", " ", "ROCKS!"]); +------------------------------------------ +* The expected result is: ++ +---------------- +"ASTERIX ROCKS!" +---------------- + +[[string_join]] +string_join +^^^^^^^^^^^ + +* Syntax: ++ +-------------------------- +string_join(array, string) +-------------------------- +* Joins an array or multiset of strings `array` with the given separator +`string` into a single string. +* Arguments: +** `array` : an `array` or `multiset` of strings (could be `null`) to be +joined. +** `string` : a `string` to serve as the separator. +* Return Value: +** the joined `string`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** `missing` if the first argument array contains a `missing`, +** `null` if the first argument array contains a `null` but does not +contain a `missing`, +** a type error will be raised if: +*** the first argument is any other non-array value, or contains any +other non-string value, +*** or, the second argument is any other non-string value. +* Example: ++ +------------------------------------------ +string_join(["ASTERIX", "ROCKS~"], "!! "); +------------------------------------------ +* The expected result is: ++ +------------------ +"ASTERIX!! ROCKS~" +------------------ + +[[string_to_codepoint]] +string_to_codepoint +^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------- +string_to_codepoint(string) +--------------------------- +* Converts the string `string` to its code_based representation. +* Arguments: +** `string` : a `string` that will be converted. +* Return Value: +** an `array` of the code points for the string `string`, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-string input value will cause a type error. +* Example: ++ +-------------------------------------- +string_to_codepoint("Hello ASTERIX!"); +-------------------------------------- +* The expected result is: ++ +-------------------------------------------------------------- +[ 72, 101, 108, 108, 111, 32, 65, 83, 84, 69, 82, 73, 88, 33 ] +-------------------------------------------------------------- + +[[codepoint_to_string]] +codepoint_to_string +^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------- +codepoint_to_string(array) +-------------------------- +* Converts the ordered code_based representation `array` to the +corresponding string. +* Arguments: +** `array` : an `array` of integer code_points. +* Return Value: +** a `string` representation of `array`. +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** `missing` if any element in the input array is `missing`, +** `null` if any element in the input array is `null` but no element in +the input array is `missing`, +** any other non-array input value or non-integer element in the input +array will cause a type error. +* Example: ++ +---------------------------------------------------------------------------------- +codepoint_to_string([72, 101, 108, 108, 111, 32, 65, 83, 84, 69, 82, 73, 88, 33]); +---------------------------------------------------------------------------------- +* The expected result is: ++ +---------------- +"Hello ASTERIX!" +---------------- + +[[substring_before]] +substring_before +^^^^^^^^^^^^^^^^ + +* Syntax: ++ +---------------------------------------- +substring_before(string, string_pattern) +---------------------------------------- +* Returns the substring from the given string `string` before the given +pattern `string_pattern`. +* Arguments: +** `string` : a `string` to be extracted. +** `string_pattern` : a `string` pattern to be searched. +* Return Value: +** a `string` that represents the substring, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-string input value will cause a type error. +* Example: ++ +--------------------------------------------- +substring_before(" like x-phone", "x-phone"); +--------------------------------------------- +* The expected result is: ++ +-------- +" like " +-------- + +[[substring_after]] +substring_after +^^^^^^^^^^^^^^^ + +* Syntax: ++ +substring_after(string, string_pattern); +* Returns the substring from the given string `string` after the given +pattern `string_pattern`. +* Arguments: +** `string` : a `string` to be extracted. +** `string_pattern` : a `string` pattern to be searched. +* Return Value: +** a `string` that represents the substring, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-string input value will cause a type error. +* Example: ++ +---------------------------------------- +substring_after(" like x-phone", "xph"); +---------------------------------------- +* The expected result is: ++ +----- +"one" +----- diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/3_binary.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/3_binary.adoc new file mode 100644 index 00000000000..22410f7e9d6 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/3_binary.adoc @@ -0,0 +1,147 @@ +[[binary-functions]] +Binary Functions +~~~~~~~~~~~~~~~~ + +[[parse_binary]] +parse_binary +^^^^^^^^^^^^ + +* Syntax: ++ +parse_binary(string, encoding) +* Creates a `binary` from an string encoded in `encoding` format. +* Arguments: +** `string` : an encoded `string`, +** `encoding` : a string notation specifies the encoding type of the +given `string`. Currently we support `hex` and `base64` format. +* Return Value: +** a `binary` that is decoded from the given `string`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-string input value will cause a type error. +* Example: ++ +[ parse_binary("ABCDEF0123456789","hex"), +parse_binary("abcdef0123456789","HEX"), +parse_binary('QXN0ZXJpeAE=',"base64") ]; +* The expected result is: ++ +[ hex("ABCDEF0123456789"), hex("ABCDEF0123456789"), +hex("4173746572697801") ] + +[[print_binary]] +print_binary +^^^^^^^^^^^^ + +* Syntax: ++ +print_binary(binary, encoding) +* Prints a `binary` to the required encoding `string` format. +* Arguments: +** `binary` : a `binary` data need to be printed. +** `encoding` : a string notation specifies the expected encoding type. +Currently we support `hex` and `base64` format. +* Return Value: +** a `string` that represents the encoded format of a `binary`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-string input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------------------------------ +[ print_binary(hex("ABCDEF0123456789"), "base64"), print_binary(base64("q83vASNFZ4k="), "hex") ] +------------------------------------------------------------------------------------------------ +* The expected result are: ++ +-------------------------------------- +[ "q83vASNFZ4k=", "ABCDEF0123456789" ] +-------------------------------------- + +[[binary_length]] +binary_length +^^^^^^^^^^^^^ + +* Syntax: ++ +binary_length(binary) +* Returns the number of bytes storing the binary data. +* Arguments: +** `binary` : a `binary` value to be checked. +* Return Value: +** an `bigint` that represents the number of bytes, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-binary input value will cause a type error. +* Example: ++ +-------------------------- +binary_length(hex("00AA")) +-------------------------- +* The expected result is: ++ +2 + +[[sub_binary]] +sub_binary +^^^^^^^^^^ + +* Syntax: ++ +sub_binary(binary, offset[, length]) +* Returns the sub binary from the given `binary` based on the given +start offset with the optional `length`. +* Arguments: +** `binary` : a `binary` to be extracted, +** `offset` : a `tinyint`, `smallint`, `integer`, or `bigint` value as +the starting offset of the sub binary in `binary` (starting at 0), +** `length` : (Optional) a `tinyint`, `smallint`, `integer`, or `bigint` +value as the length of the sub binary. +* Return Value: +** a `binary` that represents the sub binary, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first argument is any other non-binary value, +*** or, the second argument is any other non-integer value, +*** or, the third argument is any other non-integer value, if it is +present. +* Example: ++ +------------------------------- +sub_binary(hex("AABBCCDD"), 4); +------------------------------- +* The expected result is ++ +--------- +hex("DD") +--------- + +[[binary_concat]] +binary_concat +^^^^^^^^^^^^^ + +* Syntax: ++ +binary_concat(array) +* Concatenates a binary `array` or `multiset` into a single binary. +* Arguments: +** `array` : an `array` or `multiset` of binaries (could be `null` or +`missing`) to be concatenated. +* Return Value : +** the concatenated `binary` value, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** `missing` if any element in the input array is `missing`, +** `null` if any element in the input array is `null` but no element in +the input array is `missing`, +** any other non-array input value or non-binary element in the input +array will cause a type error. +* Example: ++ +binary_concat([hex("42"), hex(""), hex('42')]); +* The expected result is ++ +hex("4242") diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/4_spatial.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/4_spatial.adoc new file mode 100644 index 00000000000..96f6a300648 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/4_spatial.adoc @@ -0,0 +1,384 @@ +[[spatial-functions]] +Spatial Functions +~~~~~~~~~~~~~~~~~ + +[[create_point]] +create_point +^^^^^^^^^^^^ + +* Syntax: ++ +------------------ +create_point(x, y) +------------------ +* Creates the primitive type `point` using an `x` and `y` value. +* Arguments: +* `x` : a `double` that represents the x-coordinate, +* `y` : a `double` that represents the y-coordinate. +* Return Value: +* a `point` representing the ordered pair (`x`, `y`), +* `missing` if any argument is a `missing` value, +* `null` if any argument is a `null` value but no argument is a +`missing` value, +* any other non-double input value will cause a type error. +* Example: ++ +------------------------------------- +{ "point": create_point(30.0,70.0) }; +------------------------------------- +* The expected result is: ++ +------------------------------- +{ "point": point("30.0,70.0") } +------------------------------- + +[[create_line]] +create_line +^^^^^^^^^^^ + +* Syntax: ++ +--------------------------- +create_line(point1, point2) +--------------------------- +* Creates the primitive type `line` using `point1` and `point2`. +* Arguments: +** `point1` : a `point` that represents the start point of the line. +** `point2` : a `point` that represents the end point of the line. +* Return Value: +** a spatial `line` created using the points provided in `point1` and +`point2`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-point input value will cause a type error. +* Example: ++ +-------------------------------------------------------------------------- +{ "line": create_line(create_point(30.0,70.0), create_point(50.0,90.0)) }; +-------------------------------------------------------------------------- +* The expected result is: ++ +--------------------------------------- +{ "line": line("30.0,70.0 50.0,90.0") } +--------------------------------------- + +[[create_rectangle]] +create_rectangle +^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------- +create_rectangle(point1, point2) +-------------------------------- +* Creates the primitive type `rectangle` using `point1` and `point2`. +* Arguments: +** `point1` : a `point` that represents the lower_left point of the +rectangle. +** `point2` : a `point` that represents the upper_right point of the +rectangle. +* Return Value: +** a spatial `rectangle` created using the points provided in `point1` +and `point2`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-point input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------------------ +{ "rectangle": create_rectangle(create_point(30.0,70.0), create_point(50.0,90.0)) }; +------------------------------------------------------------------------------------ +* The expected result is: ++ +------------------------------------------------- +{ "rectangle": rectangle("30.0,70.0 50.0,90.0") } +------------------------------------------------- + +[[create_circle]] +create_circle +^^^^^^^^^^^^^ + +* Syntax: ++ +---------------------------- +create_circle(point, radius) +---------------------------- +* Creates the primitive type `circle` using `point` and `radius`. +* Arguments: +** `point` : a `point` that represents the center of the circle. +** `radius` : a `double` that represents the radius of the circle. +* Return Value: +** a spatial `circle` created using the center point and the radius +provided in `point` and `radius`. +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first argument is any other non-point value, +*** or, the second argument is any other non-double value. +* Example: ++ +--------------------------------------------------------- +{ "circle": create_circle(create_point(30.0,70.0), 5.0) } +--------------------------------------------------------- +* The expected result is: ++ +------------------------------------- +{ "circle": circle("30.0,70.0 5.0") } +------------------------------------- + +[[create_polygon]] +create_polygon +^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------- +create_polygon(array) +--------------------- +* Creates the primitive type `polygon` using the double values provided +in the argument `array`. Each two consecutive double values represent a +point starting from the first double value in the array. Note that at +least six double values should be specified, meaning a total of three +points. +* Arguments: +** `array` : an array of doubles representing the points of the polygon. +* Return Value: +** a `polygon`, represents a spatial simple polygon created using the +points provided in `array`. +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** `missing` if any element in the input array is `missing`, +** `null` if any element in the input array is `null` but no element in +the input array is `missing`, +** any other non-array input value or non-double element in the input +array will cause a type error. +* Example: ++ +----------------------------------------------------------------- +{ "polygon": create_polygon([1.0,1.0,2.0,2.0,3.0,3.0,4.0,4.0]) }; +----------------------------------------------------------------- +* The expected result is: ++ +--------------------------------------------------------- +{ "polygon": polygon("1.0,1.0 2.0,2.0 3.0,3.0 4.0,4.0") } +--------------------------------------------------------- + +[[get_xget_y]] +get_x/get_y +^^^^^^^^^^^ + +* Syntax: ++ +---------------------------- +get_x(point) or get_y(point) +---------------------------- +* Returns the x or y coordinates of a point `point`. +* Arguments: +** `point` : a `point`. +* Return Value: +** a `double` representing the x or y coordinates of the point `point`, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-point input value will cause a type error. +* Example: ++ +----------------------------------------------------------------------------------------------- +{ "x_coordinate": get_x(create_point(2.3,5.0)), "y_coordinate": get_y(create_point(2.3,5.0)) }; +----------------------------------------------------------------------------------------------- +* The expected result is: ++ +-------------------------------------------- +{ "x_coordinate": 2.3, "y_coordinate": 5.0 } +-------------------------------------------- + +[[get_points]] +get_points +^^^^^^^^^^ + +* Syntax: ++ +-------------------------- +get_points(spatial_object) +-------------------------- +* Returns an ordered array of the points forming the spatial object +`spatial_object`. +* Arguments: +** `spatial_object` : a `point`, `line`, `rectangle`, `circle`, or +`polygon`. +* Return Value: +** an `array` of the points forming the spatial object `spatial_object`, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-spatial-object input value will cause a type error. +* Example: ++ +------------------------------------------------------------- +get_points(create_polygon([1.0,1.0,2.0,2.0,3.0,3.0,4.0,4.0])) +------------------------------------------------------------- +* The expected result is: ++ +-------------------------------------------------------------------------- +[ point("1.0,1.0"), point("2.0,2.0"), point("3.0,3.0"), point("4.0,4.0") ] +-------------------------------------------------------------------------- + +[[get_centerget_radius]] +get_center/get_radius +^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------------------------------------- +get_center(circle_expression) or get_radius(circle_expression) +-------------------------------------------------------------- +* Returns the center and the radius of a circle `circle_expression`, +respectively. +* Arguments: +** `circle_expression` : a `circle`. +* Return Value: +** a `point` or `double`, represent the center or radius of the circle +`circle_expression`. +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-circle input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------- +{ + "circle_radius": get_radius(create_circle(create_point(6.0,3.0), 1.0)), + "circle_center": get_center(create_circle(create_point(6.0,3.0), 1.0)) +}; +------------------------------------------------------------------------- +* The expected result is: ++ +----------------------------------------------------------- +{ "circle_radius": 1.0, "circle_center": point("6.0,3.0") } +----------------------------------------------------------- + +[[spatial_distance]] +spatial_distance +^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------- +spatial_distance(point1, point2) +-------------------------------- +* Returns the Euclidean distance between `point1` and `point2`. +* Arguments: +** `point1` : a `point`. +** `point2` : a `point`. +* Return Value: +** a `double` as the Euclidean distance between `point1` and `point2`. +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-point input value will cause a type error. +* Example: ++ +---------------------------------------------------------------- +spatial_distance(point("47.44,80.65"), create_point(30.0,70.0)); +---------------------------------------------------------------- +* The expected result is: ++ +------------------ +20.434678857275934 +------------------ + +[[spatial_area]] +spatial_area +^^^^^^^^^^^^ + +* Syntax: ++ +----------------------------------- +spatial_area(spatial_2d_expression) +----------------------------------- +* Returns the spatial area of `spatial_2d_expression`. +* Arguments: +** `spatial_2d_expression` : a `rectangle`, `circle`, or `polygon`. +* Return Value: +** a `double` representing the area of `spatial_2d_expression`. +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-2d-spatial-object will cause a type error. +* Example: ++ +-------------------------------------------------------- +spatial_area(create_circle(create_point(0.0,0.0), 5.0)); +-------------------------------------------------------- +* The expected result is: ++ +----------- +78.53981625 +----------- + +[[spatial_intersect]] +spatial_intersect +^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------------------------- +spatial_intersect(spatial_object1, spatial_object2) +--------------------------------------------------- +* Checks whether `@arg1` and `@arg2` spatially intersect each other. +* Arguments: +** `spatial_object1` : a `point`, `line`, `rectangle`, `circle`, or +`polygon`. +** `spatial_object2` : a `point`, `line`, `rectangle`, `circle`, or +`polygon`. +* Return Value: +** a `boolean` representing whether `spatial_object1` and +`spatial_object2` spatially overlap with each other, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-spatial-object input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------------------------------------------ +spatial_intersect(point("39.28,70.48"), create_rectangle(create_point(30.0,70.0), create_point(40.0,80.0))); +------------------------------------------------------------------------------------------------------------ +* The expected result is: ++ +---- +true +---- + +[[spatial_cell]] +spatial_cell +^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------------ +spatial_cell(point1, point2, x_increment, y_increment) +------------------------------------------------------ +* Returns the grid cell that `point1` belongs to. +* Arguments: +** `point1` : a `point` representing the point of interest that its grid +cell will be returned. +** `point2` : a `point` representing the origin of the grid. +** `x_increment` : a `double`, represents X increments. +** `y_increment` : a `double`, represents Y increments. +* Return Value: +** a `rectangle` representing the grid cell that `point1` belongs to, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first or second argument is any other non-point value, +*** or, the second or third argument is any other non-double value. +* Example: ++ +---------------------------------------------------------------------- +spatial_cell(point("39.28,70.48"), create_point(20.0,50.0), 5.5, 6.0); +---------------------------------------------------------------------- +* The expected result is: ++ +--------------------------------- +rectangle("36.5,68.0 42.0,74.0"); +--------------------------------- diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/5_similarity.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/5_similarity.adoc new file mode 100644 index 00000000000..dd83c9bd482 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/5_similarity.adoc @@ -0,0 +1,217 @@ +[[similarity-functions]] +Similarity Functions +~~~~~~~~~~~~~~~~~~~~ + +AsterixDB supports queries with different similarity functions, +including http://en.wikipedia.org/wiki/Levenshtein_distance[edit +distance] and https://en.wikipedia.org/wiki/Jaccard_index[Jaccard]. + +[[edit_distance]] +edit_distance +^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------------- +edit_distance(expression1, expression2) +--------------------------------------- +* Returns the edit distance of `expression1` and `expression2`. +* Arguments: +** `expression1` : a `string` or a homogeneous `array` of a comparable +item type. +** `expression2` : The same type as `expression1`. +* Return Value: +** an `bigint` that represents the edit distance between `expression1` +and `expression2`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-string input value will cause a type error. +* Note: an +link:similarity.html#UsingIndexesToSupportSimilarityQueries[n_gram +index] can be utilized for this function. +* Example: ++ +-------------------------------------------------- +edit_distance("SuzannaTillson", "Suzanna Tilson"); +-------------------------------------------------- +* The expected result is: ++ +- +2 +- + +[[edit_distance_check]] +edit_distance_check +^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------------------------------- +edit_distance_check(expression1, expression2, threshold) +-------------------------------------------------------- +* Checks whether the edit distance of `expression1` and `expression2` is +within a given threshold. +* Arguments: +** `expression1` : a `string` or a homogeneous `array` of a comparable +item type. +** `expression2` : The same type as `expression1`. +** `threshold` : a `bigint` that represents the distance threshold. +* Return Value: +** an `array` with two items: +*** The first item contains a `boolean` value representing whether the +edit distance of `expression1` and `expression2` is within the given +threshold. +*** The second item contains an `integer` that represents the edit +distance of `expression1` and `expression2` if the first item is true. +*** If the first item is false, then the second item is set to +2147483647. +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first or second argument is any other non-string value, +*** or, the third argument is any other non-bigint value. +* Note: an +link:similarity.html#UsingIndexesToSupportSimilarityQueries[n_gram +index] can be utilized for this function. +* Example: ++ +-------------------------------------- +edit_distance_check("happy","hapr",2); +-------------------------------------- +* The expected result is: ++ +----------- +[ true, 2 ] +----------- + +[[edit_distance_contains]] +edit_distance_contains +^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +----------------------------------------------------------- +edit_distance_contains(expression1, expression2, threshold) +----------------------------------------------------------- +* Checks whether `expression1` contains `expression2` with an +http://en.wikipedia.org/wiki/Levenshtein_distance[edit distance] within +a given threshold. +* Arguments: +** `expression1` : a `string` or a homogeneous `array` of a comparable +item type. +** `expression2` : The same type as `expression1`. +** `threshold` : a `bigint` that represents the distance threshold. +* Return Value: +** an `array` with two items: +*** The first item contains a `boolean` value representing whether +`expression1` can contain `expression2`. +*** The second item contains an `integer` that represents the required +edit distance for `expression1` to contain `expression2` if the first +item is true. +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first or second argument is any other non-string value, +*** or, the third argument is any other non-bigint value. +* Note: an +link:similarity.html#UsingIndexesToSupportSimilarityQueries[n_gram +index] can be utilized for this function. +* Example: ++ +----------------------------------------- +edit_distance_contains("happy","hapr",2); +----------------------------------------- +* The expected result is: ++ +----------- +[ true, 1 ] +----------- + +[[similarity_jaccard]] +similarity_jaccard +^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +---------------------------------- +similarity_jaccard(array1, array2) +---------------------------------- +* Returns the http://en.wikipedia.org/wiki/Jaccard_index[Jaccard +similarity] of `array1` and `array2`. +* Arguments: +** `array1` : an `array` or `multiset`. +** `array2` : an `array` or `multiset`. +* Return Value: +** a `float` that represents the Jaccard similarity of `array1` and +`array2`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** `missing` if any element in any input array is `missing`, +** `null` if any element in any input array is `null` but no element in +the input array is `missing`, +** any other non-array input value or non-integer element in any input +array will cause a type error. +* Note: a +link:similarity.html#UsingIndexesToSupportSimilarityQueries[keyword +index] can be utilized for this function. +* Example: ++ +------------------------------------------ +similarity_jaccard([1,5,8,9], [1,5,9,10]); +------------------------------------------ +* The expected result is: ++ +--- +0.6 +--- + +[[similarity_jaccard_check]] +similarity_jaccard_check +^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------------------------- +similarity_jaccard_check(array1, array2, threshold) +--------------------------------------------------- +* Checks whether `array1` and `array2` have a +http://en.wikipedia.org/wiki/Jaccard_index[Jaccard similarity] greater +than or equal to threshold. Again, the “check” version of Jaccard is +faster than the "non_check" version. +* Arguments: +** `array1` : an `array` or `multiset`. +** `array2` : an `array` or `multiset`. +** `threshold` : a `double` that represents the similarity threshold. +* Return Value: +** an `array` with two items: +*** The first item contains a `boolean` value representing whether +`array1` and `array2` are similar. +*** The second item contains a `float` that represents the Jaccard +similarity of `array1` and `array2` if it is greater than or equal to +the threshold, or 0 otherwise. +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** `missing` if any element in any input array is `missing`, +** `null` if any element in any input array is `null` but no element in +the input array is `missing`, +** a type error will be raised if: * the first or second argument is any +other non-array value, * or, the third argument is any other non-double +value. +* Note: a +link:similarity.html#UsingIndexesToSupportSimilarityQueries[keyword +index] can be utilized for this function. +* Example: ++ +----------------------------------------------------- +similarity_jaccard_check([1,5,8,9], [1,5,9,10], 0.6); +----------------------------------------------------- +* The expected result is: ++ +-------------- +[ false, 0.0 ] +-------------- diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/6_tokenizing.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/6_tokenizing.adoc new file mode 100644 index 00000000000..391e7f02c3e --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/6_tokenizing.adoc @@ -0,0 +1,32 @@ +[[tokenizing-functions]] +Tokenizing Functions +~~~~~~~~~~~~~~~~~~~~ + +[[word_tokens]] +word_tokens +^^^^^^^^^^^ + +* Syntax: ++ +------------------- +word_tokens(string) +------------------- +* Returns an array of word tokens of `string` using non_alphanumeric +characters as delimiters. +* Arguments: +** `string` : a `string` that will be tokenized. +* Return Value: +** an `array` of `string` word tokens, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-string input value will cause a type error. +* Example: ++ +------------------------------------------ +word_tokens("I like the phone, awesome!"); +------------------------------------------ +* The expected result is: ++ +------------------------------------------ +[ "i", "like", "the", "phone", "awesome" ] +------------------------------------------ diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_allens.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_allens.adoc new file mode 100644 index 00000000000..ccc611d4253 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_allens.adoc @@ -0,0 +1,306 @@ +[[interval_before-interval_after]] +interval_before, interval_after +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------- +interval_before(interval1, interval2) +interval_after(interval1, interval2) +------------------------------------- +* These two functions check whether an interval happens before/after +another interval. +* Arguments: +** `interval1`, `interval2`: two intervals to be compared +* Return Value: +** a `boolean` value. Specifically, +`interval_before(interval1, interval2)` is true if and only if +`interval1.end < interval2.start`, and +`interval_after(interval1, interval2)` is true if and only if +`interval1.start > interval2.end`. +** `missing` if the argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-interval input value will cause a type error. +* Examples: ++ +--------------------------------------------------------------------------------------- +{ + "interval_before": interval_before(interval(date("2000-01-01"), date("2005-01-01")), + interval(date("2005-05-01"), date("2012-09-09"))), + "interval_after": interval_after(interval(date("2005-05-01"), date("2012-09-09")), + interval(date("2000-01-01"), date("2005-01-01"))) +}; +--------------------------------------------------------------------------------------- +* The expected result is: ++ +--------------------------------------------------- +{ "interval_before": true, "interval_after": true } +--------------------------------------------------- + +[[interval_covers-interval_covered_by]] +interval_covers, interval_covered_by +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +----------------------------------------- +interval_covers(interval1, interval2) +interval_covered_by(interval1, interval2) +----------------------------------------- +* These two functions check whether one interval covers the other +interval. +* Arguments: +** `interval1`, `interval2`: two intervals to be compared +* Return Value: +** a `boolean` value. Specifically, +`interval_covers(interval1, interval2)` is true if and only if ++ +interval1.start <= interval2.start AND interval1.end >= interval2.end ++ +`interval_covered_by(interval1, interval2)` is true if and only if ++ +interval2.start <= interval1.start AND interval2.end >= interval1.end +** `missing` if the argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-interval input value will cause a type error. +* Examples: ++ +---------------------------------------------------------------------------------------------- +{ + "interval_covers": interval_covers(interval(date("2000-01-01"), date("2005-01-01")), + interval(date("2000-03-01"), date("2004-09-09"))), + "interval_covered_by": interval_covered_by(interval(date("2006-08-01"), date("2007-03-01")), + interval(date("2004-09-10"), date("2012-08-01"))) +}; +---------------------------------------------------------------------------------------------- +* The expected result is: ++ +-------------------------------------------------------- +{ "interval_covers": true, "interval_covered_by": true } +-------------------------------------------------------- + +[[interval_overlaps-interval_overlapped_by]] +interval_overlaps, interval_overlapped_by +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------------------- +interval_overlaps(interval1, interval2) +interval_overlapped_by(interval1, interval2) +-------------------------------------------- +* These functions check whether two intervals overlap with each other. +* Arguments: +** `interval1`, `interval2`: two intervals to be compared +* Return Value: +** a `boolean` value. Specifically, +`interval_overlaps(interval1, interval2)` is true if and only if ++ +interval1.start < interval2.start AND interval2.end > interval1.end AND +interval1.end > interval2.start ++ +`interval_overlapped_by(interval1, interval2)` is true if and only if ++ +----------------------------------- +interval2.start < interval1.start +AND interval1.end > interval2.end +AND interval2.end > interval1.start +----------------------------------- +** `missing` if the argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-interval input value will cause a type error. ++ +Note that `interval_overlaps` and `interval_overlapped_by` are following +the Allen's relations on the definition of overlap. +* Examples: ++ +-------------------------------------------------------------------------------------------- +{ + "overlaps": interval_overlaps(interval(date("2000-01-01"), date("2005-01-01")), + interval(date("2004-05-01"), date("2012-09-09"))), + "overlapped_by": interval_overlapped_by(interval(date("2006-08-01"), date("2007-03-01")), + interval(date("2004-05-01"), date("2012-09-09")))) +}; +-------------------------------------------------------------------------------------------- +* The expected result is: ++ +------------------------------------------- +{ "overlaps": true, "overlapped_by": true } +------------------------------------------- + +[[interval_overlapping]] +interval_overlapping +^^^^^^^^^^^^^^^^^^^^ + +Note that `interval_overlapping` is not an Allen's Relation, but +syntactic sugar we added for the case that the intersect of two +intervals is not empty. Basically this function returns true if any of +these functions return true: `interval_overlaps`, +`interval_overlapped_by`, `interval_covers`, or `interval_covered_by`. + +* Syntax: ++ +------------------------------------------ +interval_overlapping(interval1, interval2) +------------------------------------------ +* This functions check whether two intervals share any points with each +other. +* Arguments: +** `interval1`, `interval2`: two intervals to be compared +* Return Value: +** a `boolean` value. Specifically, +`interval_overlapping(interval1, interval2)` is true if ++ +interval1.start < interval2.end AND interval1.end > interval2.start +** `missing` if the argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-interval input value will cause a type error. +* Examples: ++ +----------------------------------------------------------------------------------------- +{ + "overlapping1": interval_overlapping(interval(date("2000-01-01"), date("2005-01-01")), + interval(date("2004-05-01"), date("2012-09-09"))), + "overlapping2": interval_overlapping(interval(date("2006-08-01"), date("2007-03-01")), + interval(date("2004-09-10"), date("2006-12-31"))) +}; +----------------------------------------------------------------------------------------- +* The expected result is: ++ +---------------------------------------------- +{ "overlapping1": true, "overlapping2": true } +---------------------------------------------- + +[[interval_meets-interval_met_by]] +interval_meets, interval_met_by +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------- +interval_meets(interval1, interval2) +interval_met_by(interval1, interval2) +------------------------------------- +* These two functions check whether an interval meets with another +interval. +* Arguments: +** `interval1`, `interval2`: two intervals to be compared +* Return Value: +** a `boolean` value. Specifically, +`interval_meets(interval1, interval2)` is true if and only if +`interval1.end = interval2.start`, and +`interval_met_by(interval1, interval2)` is true if and only if +`interval1.start = interval2.end`. If any of the two inputs is `null`, +`null` is returned. +** `missing` if the argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-interval input value will cause a type error. +* Examples: ++ +---------------------------------------------------------------------------- +{ + "meets": interval_meets(interval(date("2000-01-01"), date("2005-01-01")), + interval(date("2005-01-01"), date("2012-09-09"))), + "metby": interval_met_by(interval(date("2006-08-01"), date("2007-03-01")), + interval(date("2004-09-10"), date("2006-08-01"))) +}; +---------------------------------------------------------------------------- +* The expected result is: ++ +-------------------------------- +{ "meets": true, "metby": true } +-------------------------------- + +[[interval_starts-interval_started_by]] +interval_starts, interval_started_by +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +----------------------------------------- +interval_starts(interval1, interval2) +interval_started_by(interval1, interval2) +----------------------------------------- +* These two functions check whether one interval starts with the other +interval. +* Arguments: +** `interval1`, `interval2`: two intervals to be compared +* Return Value: +** a `boolean` value. Specifically, +`interval_starts(interval1, interval2)` returns true if and only if ++ +interval1.start = interval2.start AND interval1.end <= interval2.end ++ +`interval_started_by(interval1, interval2)` returns true if and only if ++ +---------------------------------- +interval1.start = interval2.start +AND interval2.end <= interval1.end +---------------------------------- +** `missing` if the argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-interval input value will cause a type error. +* Examples: ++ +---------------------------------------------------------------------------------------------- +{ + "interval_starts": interval_starts(interval(date("2000-01-01"), date("2005-01-01")), + interval(date("2000-01-01"), date("2012-09-09"))), + "interval_started_by": interval_started_by(interval(date("2006-08-01"), date("2007-03-01")), + interval(date("2006-08-01"), date("2006-08-02"))) +}; +---------------------------------------------------------------------------------------------- +* The expected result is: ++ +-------------------------------------------------------- +{ "interval_starts": true, "interval_started_by": true } +-------------------------------------------------------- + +[[interval_ends-interval_ended_by]] +interval_ends, interval_ended_by +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------------- +interval_ends(interval1, interval2) +interval_ended_by(interval1, interval2) +--------------------------------------- +* These two functions check whether one interval ends with the other +interval. +* Arguments: +** `interval1`, `interval2`: two intervals to be compared +* Return Value: +** a `boolean` value. Specifically, +`interval_ends(interval1, interval2)` returns true if and only if ++ +interval1.end = interval2.end AND interval1.start >= interval2.start ++ +`interval_ended_by(interval1, interval2)` returns true if and only if ++ +interval2.end = interval1.end AND interval2.start >= interval1.start +** `missing` if the argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-interval input value will cause a type error. +* Examples: ++ +------------------------------------------------------------------------------------------ +{ + "interval_ends": interval_ends(interval(date("2000-01-01"), date("2005-01-01")), + interval(date("1998-01-01"), date("2005-01-01"))), + "interval_ended_by": interval_ended_by(interval(date("2006-08-01"), date("2007-03-01")), + interval(date("2006-09-10"), date("2007-03-01"))) +}; +------------------------------------------------------------------------------------------ +* The expected result is: ++ +---------------------------------------------------- +{ "interval_ends": true, "interval_ended_by": true } +---------------------------------------------------- diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_temporal.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_temporal.adoc new file mode 100644 index 00000000000..c2276773474 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_temporal.adoc @@ -0,0 +1,993 @@ +[[temporal-functions]] +Temporal Functions +~~~~~~~~~~~~~~~~~~ + +[[get_yearget_monthget_dayget_hourget_minuteget_secondget_millisecond]] +get_year/get_month/get_day/get_hour/get_minute/get_second/get_millisecond +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +----------------------------------------------------------------------------------------- +get_year/get_month/get_day/get_hour/get_minute/get_second/get_millisecond(temporal_value) +----------------------------------------------------------------------------------------- +* Accessors for accessing fields in a temporal value +* Arguments: +** `temporal_value` : a temporal value represented as one of the +following types: `date`, `datetime`, `time`, and `duration`. +* Return Value: +** an `bigint` value representing the field to be extracted, +** `missing` if the argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-interval input value will cause a type error. +* Example: ++ +--------------------------------------------------------------- +{ + "year": get_year(date("2010-10-30")), + "month": get_month(datetime("1987-11-19T23:49:23.938")), + "day": get_day(date("2010-10-30")), + "hour": get_hour(time("12:23:34.930+07:00")), + "min": get_minute(duration("P3Y73M632DT49H743M3948.94S")), + "second": get_second(datetime("1987-11-19T23:49:23.938")), + "ms": get_millisecond(duration("P3Y73M632DT49H743M3948.94S")) +}; +--------------------------------------------------------------- +* The expected result is: ++ +-------------------------------------------------------------------------------------- +{ "year": 2010, "month": 11, "day": 30, "hour": 5, "min": 28, "second": 23, "ms": 94 } +-------------------------------------------------------------------------------------- + +[[adjust_datetime_for_timezone]] +adjust_datetime_for_timezone +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +---------------------------------------------- +adjust_datetime_for_timezone(datetime, string) +---------------------------------------------- +* Adjusts the given datetime `datetime` by applying the timezone +information `string`. +* Arguments: +** `datetime` : a `datetime` value to be adjusted. +** `string` : a `string` representing the timezone information. +* Return Value: +** a `string` value representing the new datetime after being adjusted +by the timezone information, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first argument is any other non-datetime value, +*** or, the second argument is any other non-string value. +* Example: ++ +------------------------------------------------------------------------ +adjust_datetime_for_timezone(datetime("2008-04-26T10:10:00"), "+08:00"); +------------------------------------------------------------------------ +* The expected result is: ++ +------------------------------- +"2008-04-26T18:10:00.000+08:00" +------------------------------- + +[[adjust_time_for_timezone]] +adjust_time_for_timezone +^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------------- +adjust_time_for_timezone(time, string) +-------------------------------------- +* Adjusts the given time `time` by applying the timezone information +`string`. +* Arguments: +** `time` : a `time` value to be adjusted. +** `string` : a `string` representing the timezone information. +* Return Value: +** a `string` value representing the new time after being adjusted by +the timezone information, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first argument is any other non-time value, +*** or, the second argument is any other non-string value. +* Example: ++ +-------------------------------------------------------------------------------------------- +adjust_time_for_timezone(get_time_from_datetime(datetime("2008-04-26T10:10:00")), "+08:00"); +-------------------------------------------------------------------------------------------- +* The expected result is: ++ +-------------------- +"18:10:00.000+08:00" +-------------------- + +[[calendar_duration_from_datetime]] +calendar_duration_from_datetime +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------------------------------- +calendar_duration_from_datetime(datetime, duration_value) +--------------------------------------------------------- +* Gets a user_friendly representation of the duration `duration_value` +based on the given datetime `datetime`. +* Arguments: +** `datetime` : a `datetime` value to be used as the reference time +point. +** `duration_value` : a `duration` value to be converted. +* Return Value: +** a `duration` value with the duration as `duration_value` but with a +user_friendly representation, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first argument is any other non-datetime value, +*** or, the second argument is any other non-duration input value. +* Example: ++ +----------------------------------------------------------------------- +calendar_duration_from_datetime( + datetime("2016-03-26T10:10:00"), + datetime("2016-03-26T10:10:00") - datetime("2011-01-01T00:00:00") +); +----------------------------------------------------------------------- +* The expected result is: ++ +--------------------------- +duration("P5Y2M24DT10H10M") +--------------------------- + +[[get_year_month_durationget_day_time_duration]] +get_year_month_duration/get_day_time_duration +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------------------- +get_year_month_duration/get_day_time_duration(duration_value) +------------------------------------------------------------- +* Extracts the correct `duration` subtype from `duration_value`. +* Arguments: +** `duration_value` : a `duration` value to be converted. +* Return Value: +** a `year_month_duration` value or a `day_time_duration` value, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-duration input value will cause a type error. +* Example: ++ +------------------------------------------------- +get_year_month_duration(duration("P12M50DT10H")); +------------------------------------------------- +* The expected result is: ++ +-------------------------- +year_month_duration("P1Y") +-------------------------- + +[[months_from_year_month_durationms_from_day_time_duration]] +months_from_year_month_duration/ms_from_day_time_duration +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------------------------------- +months_from_year_month_duration/ms_from_day_time_duration(duration_value) +------------------------------------------------------------------------- +* Extracts the number of months or the number of milliseconds from the +`duration` subtype. +* Arguments: +** `duration_value` : a `duration` of the correct subtype. +* Return Value: +** a `bigint` representing the number of months/milliseconds, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-duration input value will cause a type error. +* Example: ++ +---------------------------------------------------------------------------------------------- +{ + "months": months_from_year_month_duration(get_year_month_duration(duration("P5Y7MT50M"))), + "milliseconds": ms_from_day_time_duration(get_day_time_duration(duration("P5Y7MT50M"))) +}; +---------------------------------------------------------------------------------------------- +* The expected result is: ++ +--------------------------------------- +{"months": 67, "milliseconds": 3000000} +--------------------------------------- + +[[duration_from_monthsduration_from_ms]] +duration_from_months/duration_from_ms +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------------------------- +duration_from_months/duration_from_ms(number_value) +--------------------------------------------------- +* Creates a `duration` from `number_value`. +* Arguments: +** `number_value` : a `bigint` representing the number of +months/milliseconds +* Return Value: +** a `duration` containing `number_value` value for months/milliseconds, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-duration input value will cause a type error. +* Example: ++ +------------------------ +duration_from_months(8); +------------------------ +* The expected result is: ++ +--------------- +duration("P8M") +--------------- + +[[duration_from_interval]] +duration_from_interval +^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------------- +duration_from_interval(interval_value) +-------------------------------------- +* Creates a `duration` from `interval_value`. +* Arguments: +** `interval_value` : an `interval` value +* Return Value: +** a `duration` representing the time in the `interval_value` +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-duration input value will cause a type error. +* Example: ++ +--------------------------------------------------------------------------------------------------------------------- +{ + "dr1" : duration_from_interval(interval(date("2010-10-30"), date("2010-12-21"))), + "dr2" : duration_from_interval(interval(datetime("2012-06-26T01:01:01.111"), datetime("2012-07-27T02:02:02.222"))), + "dr3" : duration_from_interval(interval(time("12:32:38"), time("20:29:20"))), + "dr4" : duration_from_interval(null) +}; +--------------------------------------------------------------------------------------------------------------------- +* The expected result is: ++ +---------------------------------------------- +{ + "dr1": day_time_duration("P52D"), + "dr2": day_time_duration("P31DT1H1M1.111S"), + "dr3": day_time_duration("PT7H56M42S"), + "dr4": null +} +---------------------------------------------- + +[[current_date]] +current_date +^^^^^^^^^^^^ + +* Syntax: ++ +-------------- +current_date() +-------------- +* Gets the current date. +* Arguments: None +* Return Value: +** a `date` value of the date when the function is called. + +[[current_time]] +current_time +^^^^^^^^^^^^ + +* Syntax: ++ +-------------- +current_time() +-------------- +* Get the current time +* Arguments: None +* Return Value: +** a `time` value of the time when the function is called. + +[[current_datetime]] +current_datetime +^^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------ +current_datetime() +------------------ +* Get the current datetime +* Arguments: None +* Return Value: +** a `datetime` value of the datetime when the function is called. + +[[get_date_from_datetime]] +get_date_from_datetime +^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------- +get_date_from_datetime(datetime) +-------------------------------- +* Gets the date value from the given datetime value `datetime`. +* Arguments: +** `datetime`: a `datetime` value to be extracted from. +* Return Value: +** a `date` value from the datetime, +** any other non-datetime input value will cause a type error. + +[[get_time_from_datetime]] +get_time_from_datetime +^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------- +get_time_from_datetime(datetime) +-------------------------------- +* Get the time value from the given datetime value `datetime` +* Arguments: +** `datetime`: a `datetime` value to be extracted from. +* Return Value: +** a `time` value from the datetime. +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-datetime input value will cause a type error. +* Example: ++ +-------------------------------------------------------- +get_time_from_datetime(datetime("2016-03-26T10:10:00")); +-------------------------------------------------------- +* The expected result is: ++ +--------------------- +time("10:10:00.000Z") +--------------------- + +[[day_of_week]] +day_of_week +^^^^^^^^^^^ + +* Syntax: ++ +----------------- +day_of_week(date) +----------------- +* Finds the day of the week for a given date (1_7) +* Arguments: +** `date`: a `date` value (Can also be a `datetime`) +* Return Value: +** an `tinyint` representing the day of the week (1_7), +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-date input value will cause a type error. +* Example: ++ +-------------------------------------------------- +day_of_week(datetime("2012-12-30T12:12:12.039Z")); +-------------------------------------------------- +* The expected result is: ++ +- +7 +- + +[[date_from_unix_time_in_days]] +date_from_unix_time_in_days +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------ +date_from_unix_time_in_days(numeric_value) +------------------------------------------ +* Gets a date representing the time after `numeric_value` days since +1970_01_01. +* Arguments: +** `numeric_value`: a `tinyint`/`smallint`/`integer`/`bigint` value +representing the number of days. +* Return Value: +** a `date` value as the time after `numeric_value` days since +1970-01-01, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. + +[[datetime_from_unix_time_in_ms]] +datetime_from_unix_time_in_ms +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------------------- +datetime_from_unix_time_in_ms(numeric_value) +-------------------------------------------- +* Gets a datetime representing the time after `numeric_value` +milliseconds since 1970_01_01T00:00:00Z. +* Arguments: +** `numeric_value`: a `tinyint`/`smallint`/`integer`/`bigint` value +representing the number of milliseconds. +* Return Value: +** a `datetime` value as the time after `numeric_value` milliseconds +since 1970-01-01T00:00:00Z, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. + +[[datetime_from_unix_time_in_secs]] +datetime_from_unix_time_in_secs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +---------------------------------------------- +datetime_from_unix_time_in_secs(numeric_value) +---------------------------------------------- +* Gets a datetime representing the time after `numeric_value` seconds +since 1970_01_01T00:00:00Z. +* Arguments: +** `numeric_value`: a `tinyint`/`smallint`/`integer`/`bigint` value +representing the number of seconds. +* Return Value: +** a `datetime` value as the time after `numeric_value` seconds since +1970_01_01T00:00:00Z, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. + +[[datetime_from_date_time]] +datetime_from_date_time +^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: + +datetime_from_date_time(date,time) + +* Gets a datetime representing the combination of `date` and `time` +** Arguments: +** `date`: a `date` value +** `time` a `time` value +* Return Value: +** a `datetime` value by combining `date` and `time`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if +*** the first argument is any other non-date value, +*** or, the second argument is any other non-time value. + +[[time_from_unix_time_in_ms]] +time_from_unix_time_in_ms +^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +---------------------------------------- +time_from_unix_time_in_ms(numeric_value) +---------------------------------------- +* Gets a time representing the time after `numeric_value` milliseconds +since 00:00:00.000Z. +* Arguments: +** `numeric_value`: a `tinyint`/`smallint`/`integer`/`bigint` value +representing the number of milliseconds. +* Return Value: +** a `time` value as the time after `numeric_value` milliseconds since +00:00:00.000Z, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-numeric input value will cause a type error. +* Example: ++ +----------------------------------------------------------- +{ + "date": date_from_unix_time_in_days(15800), + "datetime": datetime_from_unix_time_in_ms(1365139700000), + "time": time_from_unix_time_in_ms(3748) +}; +----------------------------------------------------------- +* The expected result is: ++ +--------------------------------------------------------------------------------------------------------------- +{ "date": date("2013-04-05"), "datetime": datetime("2013-04-05T05:28:20.000Z"), "time": time("00:00:03.748Z") } +--------------------------------------------------------------------------------------------------------------- + +[[unix_time_from_date_in_days]] +unix_time_from_date_in_days +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------------- +unix_time_from_date_in_days(date_value) +--------------------------------------- +* Gets an integer value representing the number of days since 1970_01_01 +for `date_value`. +* Arguments: +** `date_value`: a `date` value. +* Return Value: +** a `bigint` value representing the number of days, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-date input value will cause a type error. + +[[unix_time_from_datetime_in_ms]] +unix_time_from_datetime_in_ms +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------------------- +unix_time_from_datetime_in_ms(datetime_value) +--------------------------------------------- +* Gets an integer value representing the time in milliseconds since +1970_01_01T00:00:00Z for `datetime_value`. +* Arguments: +** `datetime_value` : a `datetime` value. +* Return Value: +** a `bigint` value representing the number of milliseconds, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-datetime input value will cause a type error. + +[[unix_time_from_datetime_in_secs]] +unix_time_from_datetime_in_secs +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +----------------------------------------------- +unix_time_from_datetime_in_secs(datetime_value) +----------------------------------------------- +* Gets an integer value representing the time in seconds since +1970_01_01T00:00:00Z for `datetime_value`. +* Arguments: +** `datetime_value` : a `datetime` value. +* Return Value: +** a `bigint` value representing the number of seconds, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-datetime input value will cause a type error. + +[[unix_time_from_time_in_ms]] +unix_time_from_time_in_ms +^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------- +unix_time_from_time_in_ms(time_value) +------------------------------------- +* Gets an integer value representing the time the milliseconds since +00:00:00.000Z for `time_value`. +* Arguments: +** `time_value` : a `time` value. +* Return Value: +** a `bigint` value representing the number of milliseconds, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-datetime input value will cause a type error. +* Example: ++ +----------------------------------------------------------- +{ + "date": date_from_unix_time_in_days(15800), + "datetime": datetime_from_unix_time_in_ms(1365139700000), + "time": time_from_unix_time_in_ms(3748) +} +----------------------------------------------------------- +* The expected result is: ++ +--------------------------------------------------------------------------------------------------------------- +{ "date": date("2013-04-05"), "datetime": datetime("2013-04-05T05:28:20.000Z"), "time": time("00:00:03.748Z") } +--------------------------------------------------------------------------------------------------------------- + +[[parse_dateparse_timeparse_datetime]] +parse_date/parse_time/parse_datetime +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: + +parse_date/parse_time/parse_datetime(date,formatting_expression) + +* Creates a `date/time/date_time` value by treating `date` with +formatting `formatting_expression` +* Arguments: +** `date`: a `string` value representing the `date/time/datetime`. +** `formatting_expression` a `string` value providing the formatting for +`date_expression`.Characters used to create date expression: +** `h` hours +** `m` minutes +** `s` seconds +** `n` milliseconds +** `a` am/pm +** `z` timezone +** `Y` year +** `M` month +** `D` day +** `W` weekday +** `_`, `'`, `/`, `.`, `,`, `T` seperators for both time and date +* Return Value: +** a `date/time/date_time` value corresponding to `date`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +** the first argument is any other non-date value, +** the second argument is any other non-string value. +* Example: ++ +-------------------------- +parse_time("30:30","m:s"); +-------------------------- +* The expected result is: ++ +--------------------- +time("00:30:30.000Z") +--------------------- + +[[print_dateprint_timeprint_datetime]] +print_date/print_time/print_datetime +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +---------------------------------------------------------------- +print_date/print_time/print_datetime(date,formatting_expression) +---------------------------------------------------------------- +* Creates a `string` representing a `date/time/date_time` value of the +`date` using the formatting `formatting_expression` +* Arguments: +** `date`: a `date/time/datetime` value. +** `formatting_expression` a `string` value providing the formatting for +`date_expression`. Characters used to create date expression: +** `h` hours +** `m` minutes +** `s` seconds +** `n` milliseconds +** `a` am/pm +** `z` timezone +** `Y` year +** `M` month +** `D` day +** `W` weekday +** `_`, `'`, `/`, `.`, `,`, `T` seperators for both time and date +* Return Value: +** a `string` value corresponding to `date`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first argument is any other non-date value, +*** the second argument is any other non-string value. +* Example: ++ +---------------------------------------- +print_time(time("00:30:30.000Z"),"m:s"); +---------------------------------------- +* The expected result is: ++ +------- +"30:30" +------- + +[[get_interval_start-get_interval_end]] +get_interval_start, get_interval_end +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------------------- +get_interval_start/get_interval_end(interval) +--------------------------------------------- +* Gets the start/end of the given interval. +* Arguments: +** `interval`: the interval to be accessed. +* Return Value: +** a `time`, `date`, or `datetime` (depending on the time instances of +the interval) representing the starting or ending time, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-interval value will cause a type error. +* Example: ++ +----------------------------------------------------------------------------- +{ + "start": get_interval_start(interval_start_from_date("1984-01-01", "P1Y")), + "end": get_interval_end(interval_start_from_date("1984-01-01", "P1Y")) +}; +----------------------------------------------------------------------------- +* The expected result is: ++ +---------------------------------------------------------- +{ "start": date("1984_01_01"), "end": date("1985_01_01") } +---------------------------------------------------------- + +[[get_interval_start_dateget_interval_start_datetimeget_interval_start_time-get_interval_end_dateget_interval_end_datetimeget_interval_end_time]] +get_interval_start_date/get_interval_start_datetimeget_interval_start_time, +get_interval_end_date/get_interval_end_datetime/get_interval_end_time +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +----------------------------------------------------------------------------------------------------------------------------------------------------------- +get_interval_start_date/get_interval_start_datetime/get_interval_start_time/get_interval_end_date/get_interval_end_datetime/get_interval_end_time(interval) +----------------------------------------------------------------------------------------------------------------------------------------------------------- +* Gets the start/end of the given interval for the specific +date/datetime/time type. +* Arguments: +** `interval`: the interval to be accessed. +* Return Value: +** a `time`, `date`, or `datetime` (depending on the function) +representing the starting or ending time, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-interval value will cause a type error. +* Example: ++ +---------------------------------------------------------------------------------------------------------- +{ + "start1": get_interval_start_date(interval_start_from_date("1984-01-01", "P1Y")), + "end1": get_interval_end_date(interval_start_from_date("1984-01-01", "P1Y")), + "start2": get_interval_start_datetime(interval_start_from_datetime("1984-01-01T08:30:00.000", "P1Y1H")), + "end2": get_interval_end_datetime(interval_start_from_datetime("1984-01-01T08:30:00.000", "P1Y1H")), + "start3": get_interval_start_time(interval_start_from_time("08:30:00.000", "P1H")), + "end3": get_interval_end_time(interval_start_from_time("08:30:00.000", "P1H")) +}; +---------------------------------------------------------------------------------------------------------- +* The expected result is: ++ +------------------------------------------------- +{ + "start1": date("1984-01-01"), + "end1": date("1985-01-01"), + "start2": datetime("1984-01-01T08:30:00.000Z"), + "end2": datetime("1985-01-01T09:30:00.000Z"), + "start3": time("08:30:00.000Z"), + "end3": time("09:30:00.000Z") +} +------------------------------------------------- + +[[get_overlapping_interval]] +get_overlapping_interval +^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +---------------------------------------------- +get_overlapping_interval(interval1, interval2) +---------------------------------------------- +* Gets the start/end of the given interval for the specific +date/datetime/time type. +* Arguments: +** `interval1`: an `interval` value +** `interval2`: an `interval` value +* Return Value: +** an `interval` that is overlapping `interval1` and `interval2`. If +`interval1` and `interval2` do not overlap `null` is returned. Note each +interval must be of the same type. +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-interval input value will cause a type error. +* Example: ++ +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +{ "overlap1": get_overlapping_interval(interval(time("11:23:39"), time("18:27:19")), interval(time("12:23:39"), time("23:18:00"))), + "overlap2": get_overlapping_interval(interval(time("12:23:39"), time("18:27:19")), interval(time("07:19:39"), time("09:18:00"))), + "overlap3": get_overlapping_interval(interval(date("1980-11-30"), date("1999-09-09")), interval(date("2013-01-01"), date("2014-01-01"))), + "overlap4": get_overlapping_interval(interval(date("1980-11-30"), date("2099-09-09")), interval(date("2013-01-01"), date("2014-01-01"))), + "overlap5": get_overlapping_interval(interval(datetime("1844-03-03T11:19:39"), datetime("2000-10-30T18:27:19")), interval(datetime("1989-03-04T12:23:39"), datetime("2009-10-10T23:18:00"))), + "overlap6": get_overlapping_interval(interval(datetime("1989-03-04T12:23:39"), datetime("2000-10-30T18:27:19")), interval(datetime("1844-03-03T11:19:39"), datetime("1888-10-10T23:18:00"))) +}; +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +* The expected result is: ++ +--------------------------------------------------------------------------------------------------- +{ "overlap1": interval(time("12:23:39.000Z"), time("18:27:19.000Z")), + "overlap2": null, + "overlap3": null, + "overlap4": interval(date("2013-01-01"), date("2014_01_01")), + "overlap5": interval(datetime("1989-03-04T12:23:39.000Z"), datetime("2000-10-30T18:27:19.000Z")), + "overlap6": null +} +--------------------------------------------------------------------------------------------------- + +[[interval_bin]] +interval_bin +^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------------------- +interval_bin(time_to_bin, time_bin_anchor, duration_bin_size) +------------------------------------------------------------- +* Returns the `interval` value representing the bin containing the +`time_to_bin` value. +* Arguments: +** `time_to_bin`: a date/time/datetime value representing the time to be +binned. +** `time_bin_anchor`: a date/time/datetime value representing an anchor +of a bin starts. The type of this argument should be the same as the +first `time_to_bin` argument. +** `duration_bin_size`: the duration value representing the size of the +bin, in the type of year_month_duration or day_time_duration. The type +of this duration should be compatible with the type of `time_to_bin`, so +that the arithmetic operation between `time_to_bin` and +`duration_bin_size` is well_defined. Currently AsterixDB supports the +following arithmetic operations: +*** datetime +|_ year_month_duration +*** datetime +|_ day_time_duration +*** date +|_ year_month_duration +*** date +|_ day_time_duration +*** time +|_ day_time_duration +* Return Value: +** a `interval` value representing the bin containing the `time_to_bin` +value. Note that the internal type of this interval value should be the +same as the `time_to_bin` type, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first argument or the second argument is any other +non-date/non-time/non-datetime value, +*** or, the second argument is any other +non-year_month_duration/non-day_time_duration value. +* Example: ++ +------------------------------------------------------------------------------------------------------------------------------ +{ + "bin1": interval_bin(date("2010-10-30"), date("1990-01-01"), year_month_duration("P1Y")), + "bin2": interval_bin(datetime("1987-11-19T23:49:23.938"), datetime("1990-01-01T00:00:00.000Z"), year_month_duration("P6M")), + "bin3": interval_bin(time("12:23:34.930+07:00"), time("00:00:00"), day_time_duration("PT1M")), + "bin4": interval_bin(datetime("1987-11-19T23:49:23.938"), datetime("2013-01-01T00:00:00.000"), day_time_duration("PT24H")) +}; +------------------------------------------------------------------------------------------------------------------------------ +* The expected result is: ++ +----------------------------------------------------------------------------------------------- +{ + "bin1": interval(date("2010-01-01"),date("2011-01-01")), + "bin2": interval(datetime("1987-07-01T00:00:00.000Z"), datetime("1988-01-01T00:00:00.000Z")), + "bin3": interval(time("05:23:00.000Z"), time("05:24:00.000Z")), + "bin4": interval(datetime("1987-11-19T00:00:00.000Z"), datetime("1987-11-20T00:00:00.000Z")) +} +----------------------------------------------------------------------------------------------- + +[[interval_start_from_datetimedatetime]] +interval_start_from_date/time/datetime +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------------------------------------------- +interval_start_from_date/time/datetime(date/time/datetime, duration) +-------------------------------------------------------------------- +* Construct an `interval` value by the given starting +`date`/`time`/`datetime` and the `duration` that the interval lasts. +* Arguments: +** `date/time/datetime`: a `string` representing a `date`, `time` or +`datetime`, or a `date`/`time`/`datetime` value, representing the +starting time point. +** `duration`: a `string` or `duration` value representing the duration +of the interval. Note that duration cannot be negative value. +* Return Value: +** an `interval` value representing the interval starting from the given +time point with the length of duration, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first argument or the second argument is any other +non-date/non-time/non-datetime value, +*** or, the second argument is any other non-duration value. +* Example: ++ +------------------------------------------------------------------------------------------ +{ + "interval1": interval_start_from_date("1984-01-01", "P1Y"), + "interval2": interval_start_from_time(time("02:23:28.394"), "PT3H24M"), + "interval3": interval_start_from_datetime("1999-09-09T09:09:09.999", duration("P2M30D")) +}; +------------------------------------------------------------------------------------------ +* The expectecd result is: ++ +--------------------------------------------------------------------------------------------------- +{ + "interval1": interval(date("1984-01-01"), date("1985-01-01")), + "interval2": interval(time("02:23:28.394Z"), time("05:47:28.394Z")), + "interval3": interval(datetime("1999-09-09T09:09:09.999Z"), datetime("1999-12-09T09:09:09.999Z")) +} +--------------------------------------------------------------------------------------------------- + +[[overlap_bins]] +overlap_bins +^^^^^^^^^^^^ + +* Return Value: +** a `interval` value representing the bin containing the `time_to_bin` +value. Note that the internal type of this interval value should be the +same as the `time_to_bin` type. +* Syntax: ++ +---------------------------------------------------------- +overlap_bins(interval, time_bin_anchor, duration_bin_size) +---------------------------------------------------------- +* Returns an ordered list of `interval` values representing each bin +that is overlapping the `interval`. +* Arguments: +** `interval`: an `interval` value +** `time_bin_anchor`: a date/time/datetime value representing an anchor +of a bin starts. The type of this argument should be the same as the +first `time_to_bin` argument. +** `duration_bin_size`: the duration value representing the size of the +bin, in the type of year_month_duration or day_time_duration. The type +of this duration should be compatible with the type of `time_to_bin`, so +that the arithmetic operation between `time_to_bin` and +`duration_bin_size` is well_defined. Currently AsterixDB supports the +following arithmetic operations: +*** datetime +|_ year_month_duration +*** datetime +|_ day_time_duration +*** date +|_ year_month_duration +*** date +|_ day_time_duration +*** time +|_ day_time_duration +* Return Value: +** a ordered list of `interval` values representing each bin that is +overlapping the `interval`. Note that the internal type as `time_to_bin` +and `duration_bin_size`. +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first arugment is any other non-interval value, +*** or, the second argument is any other non-date/non-time/non-datetime +value, +*** or, the second argument is any other +non-year_month_duration/non-day_time_duration value. +* Example: ++ +------------------------------------------------------------------------------------------------------------------------------ +{ + "timebins": overlap_bins(interval(time("17:23:37"), time("18:30:21")), time("00:00:00"), day_time_duration("PT30M")), + "datebins": overlap_bins(interval(date("1984-03-17"), date("2013-08-22")), date("1990-01-01"), year_month_duration("P10Y")), + "datetimebins": overlap_bins(interval(datetime("1800-01-01T23:59:48.938"), datetime("2015-07-26T13:28:30.218")), + datetime("1900-01-01T00:00:00.000"), year_month_duration("P100Y")) +}; +------------------------------------------------------------------------------------------------------------------------------ +* The expected result is: ++ +--------------------------------------------------------------------------------------------------------- +{ + "timebins": [ + interval(time("17:00:00.000Z"), time("17:30:00.000Z")), + interval(time("17:30:00.000Z"), time("18:00:00.000Z")), + interval(time("18:00:00.000Z"), time("18:30:00.000Z")), + interval(time("18:30:00.000Z"), time("19:00:00.000Z")) + ], + "datebins": [ + interval(date("1980-01-01"), date("1990-01-01")), + interval(date("1990-01-01"), date("2000-01-01")), + interval(date("2000-01-01"), date("2010-01-01")), + interval(date("2010-01-01"), date("2020-01-01")) + ], + "datetimebins": [ + interval(datetime("1800-01-01T00:00:00.000Z"), datetime("1900-01-01T00:00:00.000Z")), + interval(datetime("1900-01-01T00:00:00.000Z"), datetime("2000-01-01T00:00:00.000Z")), + interval(datetime("2000-01-01T00:00:00.000Z"), datetime("2100-01-01T00:00:00.000Z")) + ] +}; +--------------------------------------------------------------------------------------------------------- diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/8_record.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/8_record.adoc new file mode 100644 index 00000000000..3c66c12a71a --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/8_record.adoc @@ -0,0 +1,707 @@ +[[object-functions]] +Object Functions +~~~~~~~~~~~~~~~~ + +[[get_object_fields]] +get_object_fields +^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------- +get_object_fields(input_object) +------------------------------- +* Access the object field names, type and open status for a given +object. +* Arguments: +** `input_object` : a object value. +* Return Value: +** an array of `object` values that include the field_name `string`, +field_type `string`, is_open `boolean` (used for debug purposes only: +`true` if field is open and `false` otherwise), and optional nested +`orderedList` for the values of a nested object, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value, +** any other non-object input value will cause a type error. +* Example: ++ +------------------------------------------------------------------------- +get_object_fields( + { + "id": 1, + "project": "AsterixDB", + "address": {"city": "Irvine", "state": "CA"}, + "related": ["Hivestrix", "Preglix", "Apache VXQuery"] + } + ); +------------------------------------------------------------------------- +* The expected result is: ++ +----------------------------------------------------------------------------------- +[ + { "field-name": "id", "field-type": "INT64", "is-open": false }, + { "field-name": "project", "field-type": "STRING", "is-open": false }, + { "field-name": "address", "field-type": "RECORD", "is-open": false, + "nested": [ + { "field-name": "city", "field-type": "STRING", "is-open": false }, + { "field-name": "state", "field-type": "STRING", "is-open": false } + ] + }, + { "field-name": + "related", + "field-type": "ORDEREDLIST", + "is-open": false, + "list": [ + { "field-type": "STRING" }, + { "field-type": "STRING" }, + { "field-type": "STRING" } + ] + } +] +----------------------------------------------------------------------------------- + +] ### get_object_field_value ### * Syntax: + +------------------------------------------------ + get_object_field_value(input_object, string) +------------------------------------------------ + +* Access the field name given in the `string_expression` from the +`object_expression`. +* Arguments: +** `input_object` : a `object` value. +** `string` : a `string` representing the top level field name. +* Return Value: +** an `any` value saved in the designated field of the object, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first argument is any other non-object value, +*** or, the second argument is any other non-string value. +* Example: ++ +------------------------------------------------------------------------------ +get_object_field_value({ + "id": 1, + "project": "AsterixDB", + "address": {"city": "Irvine", "state": "CA"}, + "related": ["Hivestrix", "Preglix", "Apache VXQuery"] + }, + "project" + ); +------------------------------------------------------------------------------ +* The expected result is: ++ +----------- +"AsterixDB" +----------- + +[[object_remove_fields]] +object_remove_fields +^^^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +----------------------------------------------- +object_remove_fields(input_object, field_names) +----------------------------------------------- +* Remove indicated fields from a object given a list of field names. +* Arguments: +** `input_object`: a object value. +** `field_names`: an array of strings and/or array of array of strings. +* Return Value: +** a new object value without the fields listed in the second argument, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first argument is any other non-object value, +*** or, the second argument is any other non-array value or recursively +contains non-string items. +* Example: ++ +----------------------------------------------------------------------------- +object_remove_fields( + { + "id":1, + "project":"AsterixDB", + "address":{"city":"Irvine", "state":"CA"}, + "related":["Hivestrix", "Preglix", "Apache VXQuery"] + }, + [["address", "city"], "related"] + ); +----------------------------------------------------------------------------- +* The expected result is: ++ +----------------------------- +{ + "id":1, + "project":"AsterixDB", + "address":{ "state": "CA" } +} +----------------------------- + +[[object_add_fields]] +object_add_fields +^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------------- +object_add_fields(input_object, fields) +--------------------------------------- +* Add fields to a object given a list of field names. +* Arguments: +** `input_object` : a object value. +** `fields`: an array of field descriptor objects where each object has +field_name and field_value. +* Return Value: +** a new object value with the new fields included, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** a type error will be raised if: +*** the first argument is any other non-object value, +*** the second argument is any other non-array value, or contains +non-object items. +* Example: ++ +------------------------------------------------------------------------------------------------- +object_add_fields( + { + "id":1, + "project":"AsterixDB", + "address":{"city":"Irvine", "state":"CA"}, + "related":["Hivestrix", "Preglix", "Apache VXQuery"] + }, + [{"field-name":"employment_location", "field-value":create_point(30.0,70.0)}] + ); +------------------------------------------------------------------------------------------------- +* The expected result is: ++ +------------------------------------------------------- +{ + "id":1, + "project":"AsterixDB", + "address":{"city":"Irvine", "state":"CA"}, + "related":["Hivestrix", "Preglix", "Apache VXQuery"] + "employment_location": point("30.0,70.0") + } +------------------------------------------------------- + +[[object_merge]] +object_merge +^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------ +object_merge(object1, object2) +------------------------------ +* Merge two different objects into a new object. +* Arguments: +** `object1` : a object value. +** `object2` : a object value. +* Return Value: +** a new object value with fields from both input objects. If a field’s +names in both objects are the same, an exception is issued, +** `missing` if any argument is a `missing` value, +** `null` if any argument is a `null` value but no argument is a +`missing` value, +** any other non-object input value will cause a type error. +* Example: ++ +-------------------------------------------------------------------- +object_merge( + { + "id":1, + "project":"AsterixDB", + "address":{"city":"Irvine", "state":"CA"}, + "related":["Hivestrix", "Preglix", "Apache VXQuery"] + }, + { + "user_id": 22, + "employer": "UC Irvine", + "employment_type": "visitor" + } + ); +-------------------------------------------------------------------- +* The expected result is: ++ +------------------------------- +{ + "employment_type": "visitor", + "address": { + "city": "Irvine", + "state": "CA" + }, + "related": [ + "Hivestrix", + "Preglix", + "Apache VXQuery" + ], + "user_id": 22, + "project": "AsterixDB", + "employer": "UC Irvine", + "id": 1 +} +------------------------------- + +[[object_length]] +object_length +^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------- +object_length(input_object) +--------------------------- +* Returns number of top-level fields in the given object +* Arguments: +** `input_object` : an object value. +* Return Value: +** an integer that represents the number of top-level fields in the +given object, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value or any other non-object +value +* Example: ++ +-------------------------------------------------------------- +object_length( + { + "id": 1, + "project": "AsterixDB", + "address": {"city": "Irvine", "state": "CA"}, + } + ); +-------------------------------------------------------------- +* The expected result is: ++ +- +3 +- + +[[object_names]] +object_names +^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------- +object_names(input_object) +-------------------------- +* Returns names of top-level fields in the given object +* Arguments: +** `input_object` : an object value. +* Return Value: +** an array with top-level field names of the given object, +** `missing` if the argument is a `missing` value, +** `null` if the argument is a `null` value or any other non-object +value +* Example: ++ +-------------------------------------------------------------- +object_names( + { + "id": 1, + "project": "AsterixDB", + "address": {"city": "Irvine", "state": "CA"}, + } + ); +-------------------------------------------------------------- +* The expected result is: ++ +------------------------------ +[ "id", "project", "address" ] +------------------------------ + +[[object_remove]] +object_remove +^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------------- +object_remove(input_object, field_name) +--------------------------------------- +* Returns a new object that has the same fields as the input object +except the field to be removed +* Arguments: +** `input_object` : an object value. +** `field_name` : a string field name. +* Return Value: +** A new object that has the same fields as `input_object` except the +field `field_name`, +** `missing` if the argument `input_object` or `field_name` is missing, +** `null` if the argument `input_object` is `null` or any other +non-object value, or the argument `field_name` is `null` or any other +non-string value. +* Example: ++ +------------------------------------------------------------- +object_remove( + { + "id": 1, + "project": "AsterixDB", + "address": {"city": "Irvine", "state": "CA"} + } + , "address" + ); +------------------------------------------------------------- +* The expected result is: ++ +------------------------- +{ + "id": 1, + "project": "AsterixDB", +} +------------------------- + +[[object_rename]] +object_rename +^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------- +object_rename(input_object, old_field, new_field) +------------------------------------------------- +* Returns a new object that has the same fields as `input_object` with +field `old_field` replaced by `new_field` +* Arguments: +** `input_object` : an object value. +** `old_field` : a string representing the old (original) field name +inside the object `input_object`. +** `new_field` : a string representing the new field name to replace +`old_field` inside the object `input_object`. +* Return Value: +** A new object that has the same fields as `input_object` with field +`old_field` replaced by `new_field`, +** `missing` if any argument is a `missing` value, +** `null` if any argument is `null` or `input_object` is non-object +value, or `old_field` is non-string value, or `new_field` is any +non-string value. +* Example: ++ +------------------------------------------------------------- +object_rename( + { + "id": 1, + "project": "AsterixDB", + "address": {"city": "Irvine", "state": "CA"} + } + , "address" + , "location" + ); +------------------------------------------------------------- +* The expected result is: ++ +----------------------------------------------- +{ + "id": 1, + "project": "AsterixDB", + "location": {"city": "Irvine", "state": "CA"} +} +----------------------------------------------- + +[[object_unwrap]] +object_unwrap +^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------- +object_unwrap(input_object) +--------------------------- +* Returns the value of the single name-value pair that appears in +`input_object`. +* Arguments: +** `input_object` : an object value that consists of exactly one +name-value pair. +* Return Value: +** The value of the single name-value pair that appears in +`input_object`, +** `missing` if `input_object` is `missing`, +** `null` if `input_object` is null, or an empty object, or there is +more than one name-value pair in `input_object`, or any non-object +value. +* Example: ++ +---------------------- +object_unwrap( + { + "id": 1 + } + ); +---------------------- +* The expected result is: ++ +--- +{ + 1 +} +--- + +[[object_replace]] +object_replace +^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------------------------- +object_replace(input_object, old_value, new_value) +-------------------------------------------------- +* Returns a new object that has the same fields as `input_object` with +all occurrences of value `old_value` replaced by `new_value` +* Arguments: +** `input_object` : an object value. +** `old_value` : a primitive type value to be replaced by `new_value`. +** `new_value` : a value to replace `old_value`. +* Return Value: +** A new object that has the same fields as `input_object` with all +occurrences of value `old_value` replaced by `new_value`, +** `missing` if any argument is a `missing` value, +** `null` if `input_object` or `old_value` is null, +** a type error will be raised if: +*** `old_value` is not a primitive type value. +* Example: ++ +------------------------------------------------------------- +object_replace( + { + "id": 1, + "project": "AsterixDB", + "address": {"city": "Irvine", "state": "CA"} + } + , "AsterixDB" + , "Apache AsterixDB" + ); +------------------------------------------------------------- +* The expected result is: ++ +----------------------------------------------- +{ + "id": 1, + "project": "Apache AsterixDB", + "location": {"city": "Irvine", "state": "CA"} +} +----------------------------------------------- + +[[object_add]] +object_add +^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------- +object_add(input_object, field_name, field_value) +------------------------------------------------- +* Returns a new object that has the same fields as `input_object` as +well as the new field `field_name`. +* Arguments: +** `input_object` : an object value. +** `field_name` : a string representing a field name to be added. +** `field_value` : a value to be assigned to the new field `field_name`. +* Return Value: +** A new object that has the same fields as `input_object` as well as +the new field `field_name`, +** `missing` if `input_object` or `field_name` is `missing`, +** `null` if `input_object` or `field_name` is `null`, or `input_object` +is not an object, or `field_name` is not a string, +** `input_object` if `field_name`already exists in `input_object` or +`field_value` is missing. +* Example: ++ +------------------------------------------------------------- +object_add( + { + "id": 1, + "project": "AsterixDB", + "address": {"city": "Irvine", "state": "CA"} + } + , "company" + , "Apache" + ); +------------------------------------------------------------- +* The expected result is: ++ +------------------------------------------------ +{ + "id": 1, + "project": "AsterixDB", + "location": {"city": "Irvine", "state": "CA"}, + "company": "Apache" +} +------------------------------------------------ + +[[object_put]] +object_put +^^^^^^^^^^ + +* Syntax: ++ +------------------------------------------------- +object_put(input_object, field_name, field_value) +------------------------------------------------- +* Adds, modifies, or removes a field of an object. +* Arguments: +** `input_object` : an object value. +** `field_name` : a string representing a field name to be added. +** `field_value` : a value to be assigned to the new field `field_name`. +* Return Value: +** a new object that has the same fields as `input_object` as well as +the new field `field_name`, or with updated `field_name` value to +`field_value` if `field_name` already exists in `input_object`, or with +`field_name`removed if `field_name` already exists in `input_object` and +`field_value` is `missing`, +** `missing` if `input_object` or `field_name` is `missing`, +** `null` if `input_object` or `field_name` is `null`, or `input_object` +is not an object, or `field_name` is not not a string. +* Example: ++ +------------------------------------------------------------- +object_put( + { + "id": 1, + "project": "AsterixDB", + "address": {"city": "Irvine", "state": "CA"} + } + , "project" + , "Apache AsterixDB" + ); +------------------------------------------------------------- +* The expected result is: ++ +----------------------------------------------- +{ + "id": 1, + "project": "Apache AsterixDB", + "location": {"city": "Irvine", "state": "CA"} +} +----------------------------------------------- + +[[object_values]] +object_values +^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------- +object_values(input_object) +--------------------------- +* Returns an array of the values of the fields in `input_object`. +* Arguments: +** `input_object` : an object value. +* Return Value: +** An array of the values of the fields in `input_object`, +** `missing` if `input_object` is `missing`, +** `null` if `input_object` is null or any non-object value. +* Example: ++ +------------------------------------------------------------- +object_values( + { + "id": 1, + "project": "AsterixDB", + "address": {"city": "Irvine", "state": "CA"} + } + ); +------------------------------------------------------------- +* The expected result is: ++ +----------------------------------- +[ + 1, + "AsterixDB", + {"city": "Irvine", "state": "CA"} +] +----------------------------------- + +[[object_pairs]] +object_pairs +^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------- +object_pairs(input_object) +-------------------------- +* Returns an array of objects describing fields of `input_object`. For +each field of the `input_object` the returned array contains an object +with two fields `name` and `value` which are set to the `input_object`'s +field name and value. +* Arguments: +** `input_object` : an object value. +* Return Value: +** An array of the `name`/`value` pairs of the fields in `input_object`, +** `missing` if `input_object` is `missing`, +** `null` if `input_object` is null or any non-object value. +* Example: ++ +------------------------------------------------------------ +object_pairs( + { + "id": 1, + "project": "AsterixDB", + "address": {"city": "Irvine", "state": "CA"} + } + ); +------------------------------------------------------------ +* The expected result is: ++ +------------------------------------------------------------------- +[ + { "name": "id", "value": 1 }, + { "name": "project", "value": "AsterixDB" }, + { "name": "address", "value": {"city": "Irvine", "state": "CA"} } +] +------------------------------------------------------------------- + +[[pairs]] +pairs +^^^^^ + +* Syntax: ++ +------------------- +pairs(input_object) +------------------- +* Returns an array of arrays describing fields of `input_object`, +including nested fields. For each field of the `input_object` the +returned array contains an array with two elements. The first element is +the name and the second one is the value of the `input_object`'s field. +The input object is introspected recursively, so all fields of its +nested objects are returned. Nested objects contained in arrays and +multisets are also processed by this function. +* Arguments: +** `input_object` : an object value (or an array or a multiset) +* Return Value: +** An array of arrays with name, value pairs of the fields in +`input_object`, including nested fields. Each inner array has exactly +two items: name and value of the `input_object`'s field. +** `missing` if `input_object` is `missing`, +** `null` if `input_object` is null or a value of a primitive data type. +* Example: ++ +----------------------------------------------------- +pairs( + { + "id": 1, + "project": "AsterixDB", + "address": {"city": "Irvine", "state": "CA"} + } + ); +----------------------------------------------------- +* The expected result is: ++ +----------------------------------------------------- +[ + [ "id", 1 ], + [ "project", "AsterixDB" ], + [ "address", { "city": "Irvine", "state": "CA" } ], + [ "city", "Irvine" ], + [ "state", "CA" ] +] +----------------------------------------------------- diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_aql.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_aql.adoc new file mode 100644 index 00000000000..53b23c68b63 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_aql.adoc @@ -0,0 +1,372 @@ +[[aggregate-functions-array-functions]] +Aggregate Functions (Array Functions) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This section contains detailed descriptions of each AQL aggregate +function (i.e., array function). + +[[sql-count]] +sql-count +^^^^^^^^^ + +* Syntax: ++ +--------------------- +sql-count(collection) +--------------------- +* Gets the number of non-null and non-missing items in the given +collection. +* Arguments: +** `collection` could be: +*** an `array` or `multiset` to be counted, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `bigint` value representing the number of non-null and non-missing +items in the given collection, +** `null` is returned if the input is `null` or `missing`, +** any other non-array and non-multiset input value will cause an error. +* Example: ++ +-------------------------------------------------------- +sql-count( ['hello', 'world', 1, 2, 3, null, missing] ); +-------------------------------------------------------- +* The expected result is: ++ +- +5 +- + +[[sql-avg]] +sql-avg +^^^^^^^ + +* Syntax: ++ +----------------------- +sql-avg(num_collection) +----------------------- +* Gets the average value of the non-null and non-missing numeric items +in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the average of the non-null and +non-missing numbers in the given collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if the given collection does not contain any +non-null and non-missing items, +** any other non-array and non-multiset input value will cause a type +error, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +------------------------------------ +sql-avg( [1.2, 2.3, 3.4, 0, null] ); +------------------------------------ +* The expected result is: ++ +----- +1.725 +----- + +[[sql-sum]] +sql-sum +^^^^^^^ + +* Syntax: ++ +----------------------- +sql-sum(num_collection) +----------------------- +* Gets the sum of non-null and non-missing items in the given +collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** the sum of the non-null and non-missing numbers in the given +collection. The returning type is decided by the item type with the +highest order in the numeric type promotion order (`tinyint`-> +`smallint`->`integer`->`bigint`->`float`->`double`) among items. +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if the given collection does not contain any +non-null and non-missing items, +** any other non-array and non-multiset input value will cause a type +error, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +--------------------------------------------- +sql-sum( [1.2, 2.3, 3.4, 0, null, missing] ); +--------------------------------------------- +* The expected result is: ++ +--- +6.9 +--- + +[[sql-sql_min]] +sql-sql_min +^^^^^^^^^^^ + +* Syntax: ++ +----------------------- +sql-min(num_collection) +----------------------- +* Gets the min value of non-null and non-missing comparable items in the +given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset`, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** the min value of non-null and non-missing values in the given +collection. The returning type is decided by the item type with the +highest order in the type promotion order (`tinyint`-> +`smallint`->`integer`->`bigint`->`float`->`double`) among numeric items. +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if the given collection does not contain any +non-null and non-missing items, +** multiple incomparable items in the input array or multiset will cause +a type error, +** any other non-array and non-multiset input value will cause a type +error. +* Example: ++ +--------------------------------------------- +sql-min( [1.2, 2.3, 3.4, 0, null, missing] ); +--------------------------------------------- +* The expected result is: ++ +--- +0.0 +--- + +[[sql-max]] +sql-max +^^^^^^^ + +* Syntax: ++ +----------------------- +sql-max(num_collection) +----------------------- +* Gets the max value of the non-null and non-missing comparable items in +the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset`, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** the max value of non-null and non-missing numbers in the given +collection. The returning type is decided by the item type with the +highest order in the type promotion order (`tinyint`-> +`smallint`->`integer`->`bigint`->`float`->`double`) among numeric items. +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if the given collection does not contain any +non-null and non-missing items, +** multiple incomparable items in the input array or multiset will cause +a type error, +** any other non-array and non-multiset input value will cause a type +error. +* Example: ++ +--------------------------------------------- +sql-max( [1.2, 2.3, 3.4, 0, null, missing] ); +--------------------------------------------- +* The expected result is: ++ +--- +3.4 +--- + +[[count]] +count +^^^^^ + +* Syntax: ++ +----------------- +count(collection) +----------------- +* Gets the number of items in the given collection. +* Arguments: +** `collection` could be: +*** an `array` or `multiset` containing the items to be counted, +*** or a `null` value, +*** or a `missing` value. +* Return Value: +** a `bigint` value representing the number of items in the given +collection, +** `null` is returned if the input is `null` or `missing`. +* Example: ++ +------------------------------- +count( [1, 2, null, missing] ); +------------------------------- +* The expected result is: ++ +- +4 +- + +[[avg]] +avg +^^^ + +* Syntax: ++ +------------------- +avg(num_collection) +------------------- +* Gets the average value of the numeric items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the average of the numbers in the given +collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if there is a `null` or `missing` in the input +collection, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +----------------------- +avg( [100, 200, 300] ); +----------------------- +* The expected result is: ++ +--------- +[ 200.0 ] +--------- + +[[sum]] +sum +^^^ + +* Syntax: ++ +------------------- +sum(num_collection) +------------------- +* Gets the sum of the items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** the sum of the numbers in the given collection. The returning type is +decided by the item type with the highest order in the numeric type +promotion order (`tinyint`-> +`smallint`->`integer`->`bigint`->`float`->`double`) among items. +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if there is a `null` or `missing` in the input +collection, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +----------------------- +sum( [100, 200, 300] ); +----------------------- +* The expected result is: ++ +--- +600 +--- + +[[sql-min]] +sql-min +^^^^^^^ + +* Syntax: ++ +------------------- +min(num_collection) +------------------- +* Gets the min value of comparable items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset`, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** the min value of the given collection. The returning type is decided +by the item type with the highest order in the type promotion order +(`tinyint`-> `smallint`->`integer`->`bigint`->`float`->`double`) among +numeric items. +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if there is a `null` or `missing` in the input +collection, +** multiple incomparable items in the input array or multiset will cause +a type error, +** any other non-array and non-multiset input value will cause a type +error. +* Example: ++ +---------------------- +min( [10.2, 100, 5] ); +---------------------- +* The expected result is: ++ +--- +5.0 +--- + +[[sql-max-1]] +sql-max +^^^^^^^ + +* Syntax: ++ +------------------- +max(num_collection) +------------------- +* Gets the max value of numeric items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset`, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** The max value of the given collection. The returning type is decided +by the item type with the highest order in the type promotion order +(`tinyint`-> `smallint`->`integer`->`bigint`->`float`->`double`) among +numeric items. +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if there is a `null` or `missing` in the input +collection, +** multiple incomparable items in the input array or multiset will cause +a type error, +** any other non-array and non-multiset input value will cause a type +error. +* Example: ++ +---------------------- +max( [10.2, 100, 5] ); +---------------------- +* The expected result is: ++ +----- +100.0 +----- diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc new file mode 100644 index 00000000000..740825f55e9 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc @@ -0,0 +1,834 @@ +[[aggregate-functions-array-functions]] +Aggregate Functions (Array Functions) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This section contains detailed descriptions of the built-in aggregate +functions in the query language. + +The query language also supports standard SQL aggregate functions (e.g., +`MIN`, `MAX`, `SUM`, `COUNT`, and `AVG`). Note that these are not real +functions in the query language, but just syntactic sugars over +corresponding builtin aggregate functions (e.g., `ARRAY_MIN`, +`ARRAY_MAX`, `ARRAY_SUM`, `ARRAY_COUNT`, and `ARRAY_AVG`). Refer to +link:manual.html#SQL-92_aggregation_functions[SQL-92 Aggregation +Functions] for details. + +The `DISTINCT` keyword may be used with built-in aggregate functions and +standard SQL aggregate functions. It may also be used with aggregate +functions used as window functions. It determines whether the function +aggregates all values in the group, or distinct values only. Refer to +link:manual.html#Aggregation_functions[Aggregation Functions] for +details. + +Aggregate functions may be used as window functions when they are used +with an OVER clause. Refer to link:manual.html#Over_clauses[OVER +Clauses] for details. + +[[array_count]] +array_count +^^^^^^^^^^^ + +* Syntax: ++ +----------------------- +array_count(collection) +----------------------- +* Gets the number of non-null and non-missing items in the given +collection. +* Arguments: +** `collection` could be: +*** an `array` or `multiset` to be counted, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `bigint` value representing the number of non-null and non-missing +items in the given collection, +** `null` is returned if the input is `null` or `missing`, +** any other non-array and non-multiset input value will cause an error. +* Example: ++ +---------------------------------------------------------- +array_count( ['hello', 'world', 1, 2, 3, null, missing] ); +---------------------------------------------------------- +* The expected result is: ++ +- +5 +- + +[[array_avg]] +array_avg +^^^^^^^^^ + +* Syntax: ++ +------------------------- +array_avg(num_collection) +------------------------- +* Gets the average value of the non-null and non-missing numeric items +in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the average of the non-null and +non-missing numbers in the given collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if the given collection does not contain any +non-null and non-missing items, +** any other non-array and non-multiset input value will cause a type +error, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +-------------------------------------- +array_avg( [1.2, 2.3, 3.4, 0, null] ); +-------------------------------------- +* The expected result is: ++ +----- +1.725 +----- + +[[array_sum]] +array_sum +^^^^^^^^^ + +* Syntax: ++ +------------------------- +array_sum(num_collection) +------------------------- +* Gets the sum of non-null and non-missing items in the given +collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** the sum of the non-null and non-missing numbers in the given +collection. The returning type is decided by the item type with the +highest order in the numeric type promotion order (`tinyint`-> +`smallint`->`integer`->`bigint`->`float`->`double`) among items. +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if the given collection does not contain any +non-null and non-missing items, +** any other non-array and non-multiset input value will cause a type +error, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +----------------------------------------------- +array_sum( [1.2, 2.3, 3.4, 0, null, missing] ); +----------------------------------------------- +* The expected result is: ++ +--- +6.9 +--- + +[[array_min]] +array_min +^^^^^^^^^ + +* Syntax: ++ +------------------------- +array_min(num_collection) +------------------------- +* Gets the min value of non-null and non-missing comparable items in the +given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset`, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** the min value of non-null and non-missing values in the given +collection. The returning type is decided by the item type with the +highest order in the type promotion order (`tinyint`-> +`smallint`->`integer`->`bigint`->`float`->`double`) among numeric items. +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if the given collection does not contain any +non-null and non-missing items, +** multiple incomparable items in the input array or multiset will cause +a type error, +** any other non-array and non-multiset input value will cause a type +error. +* Example: ++ +----------------------------------------------- +array_min( [1.2, 2.3, 3.4, 0, null, missing] ); +----------------------------------------------- +* The expected result is: ++ +--- +0.0 +--- + +[[array_max]] +array_max +^^^^^^^^^ + +* Syntax: ++ +------------------------- +array_max(num_collection) +------------------------- +* Gets the max value of the non-null and non-missing comparable items in +the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset`, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** the max value of non-null and non-missing numbers in the given +collection. The returning type is decided by the item type with the +highest order in the type promotion order (`tinyint`-> +`smallint`->`integer`->`bigint`->`float`->`double`) among numeric items. +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if the given collection does not contain any +non-null and non-missing items, +** multiple incomparable items in the input array or multiset will cause +a type error, +** any other non-array and non-multiset input value will cause a type +error. +* Example: ++ +----------------------------------------------- +array_max( [1.2, 2.3, 3.4, 0, null, missing] ); +----------------------------------------------- +* The expected result is: ++ +--- +3.4 +--- + +[[array_stddev_samp]] +array_stddev_samp +^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------- +array_stddev_samp(num_collection) +--------------------------------- +* Gets the sample standard deviation value of the non-null and +non-missing numeric items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the sample standard deviation of the +non-null and non-missing numbers in the given collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if the given collection does not contain any +non-null and non-missing items, +** any other non-array and non-multiset input value will cause a type +error, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +---------------------------------------------- +array_stddev_samp( [1.2, 2.3, 3.4, 0, null] ); +---------------------------------------------- +* The expected result is: ++ +------------------ +1.4591664287073858 +------------------ + +[[array_stddev_pop]] +array_stddev_pop +^^^^^^^^^^^^^^^^ + +* Syntax: ++ +-------------------------------- +array_stddev_pop(num_collection) +-------------------------------- +* Gets the population standard deviation value of the non-null and +non-missing numeric items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the population standard deviation of +the non-null and non-missing numbers in the given collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if the given collection does not contain any +non-null and non-missing items, +** any other non-array and non-multiset input value will cause a type +error, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +--------------------------------------------- +array_stddev_pop( [1.2, 2.3, 3.4, 0, null] ); +--------------------------------------------- +* The expected result is: ++ +------------------ +1.2636751956100112 +------------------ + +[[array_var_samp]] +array_var_samp +^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------ +array_var_samp(num_collection) +------------------------------ +* Gets the sample variance value of the non-null and non-missing numeric +items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the sample variance of the non-null and +non-missing numbers in the given collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if the given collection does not contain any +non-null and non-missing items, +** any other non-array and non-multiset input value will cause a type +error, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +------------------------------------------- +array_var_samp( [1.2, 2.3, 3.4, 0, null] ); +------------------------------------------- +* The expected result is: ++ +------------------ +2.1291666666666664 +------------------ + +[[array_var_pop]] +array_var_pop +^^^^^^^^^^^^^ + +* Syntax: ++ +----------------------------- +array_var_pop(num_collection) +----------------------------- +* Gets the population variance value of the non-null and non-missing +numeric items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the population variance of the non-null +and non-missing numbers in the given collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if the given collection does not contain any +non-null and non-missing items, +** any other non-array and non-multiset input value will cause a type +error, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +------------------------------------------ +array_var_pop( [1.2, 2.3, 3.4, 0, null] ); +------------------------------------------ +* The expected result is: ++ +------------------ +1.5968749999999998 +------------------ + +[[array_skewness]] +array_skewness +^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------ +array_skewness(num_collection) +------------------------------ +* Gets the skewness value of the non-null and non-missing numeric items +in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the skewness of the non-null and +non-missing numbers in the given collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if the given collection does not contain any +non-null and non-missing items, +** any other non-array and non-multiset input value will cause a type +error, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +------------------------------------------- +array_skewness( [1.2, 2.3, 3.4, 0, null] ); +------------------------------------------- +* The expected result is: ++ +-------------------- +-0.04808451539164242 +-------------------- + +[[array_kurtosis]] +array_kurtosis +^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------ +array_kurtosis(num_collection) +------------------------------ +* Gets the kurtosis value from the normal distribution of the non-null +and non-missing numeric items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the kurtosis from a normal distribution +of the non-null and non-missing numbers in the given collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if the given collection does not contain any +non-null and non-missing items, +** any other non-array and non-multiset input value will cause a type +error, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +------------------------------------------- +array_kurtosis( [1.2, 2.3, 3.4, 0, null] ); +------------------------------------------- +* The expected result is: ++ +------------------ +-1.342049701096427 +------------------ + +[[strict_count]] +strict_count +^^^^^^^^^^^^ + +* Syntax: ++ +------------------------ +strict_count(collection) +------------------------ +* Gets the number of items in the given collection. +* Arguments: +** `collection` could be: +*** an `array` or `multiset` containing the items to be counted, +*** or a `null` value, +*** or a `missing` value. +* Return Value: +** a `bigint` value representing the number of items in the given +collection, +** `null` is returned if the input is `null` or `missing`. +* Example: ++ +-------------------------------------- +strict_count( [1, 2, null, missing] ); +-------------------------------------- +* The expected result is: ++ +- +4 +- + +[[strict_avg]] +strict_avg +^^^^^^^^^^ + +* Syntax: ++ +-------------------------- +strict_avg(num_collection) +-------------------------- +* Gets the average value of the numeric items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the average of the numbers in the given +collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if there is a `null` or `missing` in the input +collection, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +------------------------------ +strict_avg( [100, 200, 300] ); +------------------------------ +* The expected result is: ++ +----- +200.0 +----- + +[[strict_sum]] +strict_sum +^^^^^^^^^^ + +* Syntax: ++ +-------------------------- +strict_sum(num_collection) +-------------------------- +* Gets the sum of the items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** the sum of the numbers in the given collection. The returning type is +decided by the item type with the highest order in the numeric type +promotion order (`tinyint`-> +`smallint`->`integer`->`bigint`->`float`->`double`) among items. +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if there is a `null` or `missing` in the input +collection, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +------------------------------ +strict_sum( [100, 200, 300] ); +------------------------------ +* The expected result is: ++ +--- +600 +--- + +[[strict_min]] +strict_min +^^^^^^^^^^ + +* Syntax: ++ +-------------------------- +strict_min(num_collection) +-------------------------- +* Gets the min value of comparable items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset`, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** the min value of the given collection. The returning type is decided +by the item type with the highest order in the type promotion order +(`tinyint`-> `smallint`->`integer`->`bigint`->`float`->`double`) among +numeric items. +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if there is a `null` or `missing` in the input +collection, +** multiple incomparable items in the input array or multiset will cause +a type error, +** any other non-array and non-multiset input value will cause a type +error. +* Example: ++ +----------------------------- +strict_min( [10.2, 100, 5] ); +----------------------------- +* The expected result is: ++ +--- +5.0 +--- + +[[strict_max]] +strict_max +^^^^^^^^^^ + +* Syntax: ++ +-------------------------- +strict_max(num_collection) +-------------------------- +* Gets the max value of numeric items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset`, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** The max value of the given collection. The returning type is decided +by the item type with the highest order in the type promotion order +(`tinyint`-> `smallint`->`integer`->`bigint`->`float`->`double`) among +numeric items. +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if there is a `null` or `missing` in the input +collection, +** multiple incomparable items in the input array or multiset will cause +a type error, +** any other non-array and non-multiset input value will cause a type +error. +* Example: ++ +----------------------------- +strict_max( [10.2, 100, 5] ); +----------------------------- +* The expected result is: ++ +----- +100.0 +----- + +[[strict_stddev_samp]] +strict_stddev_samp +^^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +---------------------------------- +strict_stddev_samp(num_collection) +---------------------------------- +* Gets the sample standard deviation value of the numeric items in the +given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the sample standard deviation of the +numbers in the given collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if there is a `null` or `missing` in the input +collection, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +-------------------------------------- +strict_stddev_samp( [100, 200, 300] ); +-------------------------------------- +* The expected result is: ++ +----- +100.0 +----- + +[[strict_stddev_pop]] +strict_stddev_pop +^^^^^^^^^^^^^^^^^ + +* Syntax: ++ +--------------------------------- +strict_stddev_pop(num_collection) +--------------------------------- +* Gets the population standard deviation value of the numeric items in +the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the population standard deviation of +the numbers in the given collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if there is a `null` or `missing` in the input +collection, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +------------------------------------- +strict_stddev_pop( [100, 200, 300] ); +------------------------------------- +* The expected result is: ++ +----------------- +81.64965809277261 +----------------- + +[[strict_var_samp]] +strict_var_samp +^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------- +strict_var_samp(num_collection) +------------------------------- +* Gets the sample variance value of the numeric items in the given +collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the sample variance of the numbers in +the given collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if there is a `null` or `missing` in the input +collection, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +----------------------------------- +strict_var_samp( [100, 200, 300] ); +----------------------------------- +* The expected result is: ++ +------- +10000.0 +------- + +[[strict_var_pop]] +strict_var_pop +^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------ +strict_var_pop(num_collection) +------------------------------ +* Gets the population variance value of the numeric items in the given +collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the population variance of the numbers +in the given collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if there is a `null` or `missing` in the input +collection, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +---------------------------------- +strict_var_pop( [100, 200, 300] ); +---------------------------------- +* The expected result is: ++ +----------------- +6666.666666666667 +----------------- + +[[strict_skewness]] +strict_skewness +^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------- +strict_skewness(num_collection) +------------------------------- +* Gets the skewness value of the numeric items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the skewness of the numbers in the +given collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if there is a `null` or `missing` in the input +collection, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +----------------------------------- +strict_skewness( [100, 200, 300] ); +----------------------------------- +* The expected result is: ++ +--- +0.0 +--- + +[[strict_kurtosis]] +strict_kurtosis +^^^^^^^^^^^^^^^ + +* Syntax: ++ +------------------------------- +strict_kurtosis(num_collection) +------------------------------- +* Gets the kurtosis value from the normal distribution of the numeric +items in the given collection. +* Arguments: +** `num_collection` could be: +*** an `array` or `multiset` containing numeric values, `null`s or +`missing`s, +*** or, a `null` value, +*** or, a `missing` value. +* Return Value: +** a `double` value representing the kurtosis from a normal distribution +of the numbers in the given collection, +** `null` is returned if the input is `null` or `missing`, +** `null` is returned if there is a `null` or `missing` in the input +collection, +** any other non-numeric value in the input collection will cause a type +error. +* Example: ++ +----------------------------------- +strict_kurtosis( [100, 200, 300] ); +----------------------------------- +* The expected result is: ++ +---- +-1.5 +---- diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/1_intro.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/1_intro.adoc new file mode 100644 index 00000000000..29b6328aa36 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/1_intro.adoc @@ -0,0 +1,32 @@ +[[introduction]] +1. Introduction +--------------- + +This document is intended as a reference guide to the full syntax and +semantics of AsterixDB's query language, a SQL-based language for +working with semistructured data. The language is a derivative of SQL++, +a declarative query language for JSON data which is largely backwards +compatible with SQL. SQL++ originated from research in the FORWARD +project at UC San Diego, and it has much in common with SQL; some +differences exist due to the different data models that the two +languages were designed to serve. SQL was designed for interacting with +the flat, schema-ified world of relational databases, while SQL++ +generalizes SQL to also handle nested data formats (like JSON) and the +schema-optional (or even schema-less) data models of modern NoSQL and +BigData systems. + +In the context of Apache AsterixDB, the query language is intended for +working with the Asterix Data Model (link:../datamodel.html[ADM]), a +data model based on a superset of JSON with an enriched and flexible +type system. New AsterixDB users are encouraged to read and work through +the (much friendlier) guide "link:primer-sqlpp.html[AsterixDB 101: An +ADM and SQL++ Primer]" before attempting to make use of this document. +In addition, readers are advised to read through the +link:../datamodel.html[Asterix Data Model (ADM) reference guide] first +as well, as an understanding of the data model is a prerequisite to +understanding the query language. + +In what follows, we detail the features of the query language in a +grammar-guided manner. We list and briefly explain each of the +productions in the query grammar, offering examples (and results) for +clarity. diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr.adoc new file mode 100644 index 00000000000..07a1fa1817e --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr.adoc @@ -0,0 +1,677 @@ +The query language is a highly composable expression language. Each +expression in the query language returns zero or more data model +instances. There are three major kinds of expressions. At the topmost +level, an expression can be an OperatorExpression (similar to a +mathematical expression) or a QuantifiedExpression (which yields a +boolean value). Each will be detailed as we explore the full grammar of +the language. + +-------------------------------------------------------- +Expression ::= OperatorExpression | QuantifiedExpression +-------------------------------------------------------- + +Note that in the following text, words enclosed in angle brackets denote +keywords that are not case-sensitive. + +[[operator-expressions]] +Operator Expressions +~~~~~~~~~~~~~~~~~~~~ + +Operators perform a specific operation on the input values or +expressions. The syntax of an operator expression is as follows: + +------------------------------------------------------------------------------------------------- +OperatorExpression ::= PathExpression + | Operator OperatorExpression + | OperatorExpression Operator (OperatorExpression)? + | OperatorExpression OperatorExpression OperatorExpression +------------------------------------------------------------------------------------------------- + +The language provides a full set of operators that you can use within +its statements. Here are the categories of operators: + +* link:#Arithmetic_operators[Arithmetic Operators], to perform basic +mathematical operations; +* link:#Collection_operators[Collection Operators], to evaluate +expressions on collections or objects; +* link:#Comparison_operators[Comparison Operators], to compare two +expressions; +* link:#Logical_operators[Logical Operators], to combine operators using +Boolean logic. + +The following table summarizes the precedence order (from higher to +lower) of the major unary and binary operators: + +[cols=",",options="header",] +|======================================================================= +|Operator |Operation +|EXISTS, NOT EXISTS |Collection emptiness testing + +|^ |Exponentiation + +|*, /, DIV, MOD (%) | Multiplication, division, modulo | | +, - | +Addition, subtraction | | || | String concatenation | | IS NULL, IS NOT +NULL, IS MISSING, IS NOT MISSING, IS UNKNOWN, IS NOT UNKNOWN, IS VALUED, +IS NOT VALUED | Unknown value comparison | | BETWEEN, NOT BETWEEN | +Range comparison (inclusive on both sides) | | =, !=, <>, <, >, <=, >=, +LIKE, NOT LIKE, IN, NOT IN | Comparison | | NOT | Logical negation | | +AND | Conjunction | | OR | Disjunction | +|======================================================================= + +In general, if any operand evaluates to a `MISSING` value, the enclosing +operator will return `MISSING`; if none of operands evaluates to a +`MISSING` value but there is an operand evaluates to a `NULL` value, the +enclosing operator will return `NULL`. However, there are a few +exceptions listed in link:#Comparison_operators[comparison operators] +and link:#Logical_operators[logical operators]. + +[[arithmetic-operators]] +Arithmetic Operators +^^^^^^^^^^^^^^^^^^^^ + +Arithmetic operators are used to exponentiate, add, subtract, multiply, +and divide numeric values, or concatenate string values. + +[cols=",,",options="header",] +|======================================================================= +|Operator |Purpose |Example +|+, - |As unary operators, they denote a positive or negative expression +|SELECT VALUE -1; + +|+, - |As binary operators, they add or subtract |SELECT VALUE 1 + 2; + +|* |Multiply |SELECT VALUE 4 * 2; + +|/ |Divide (returns a value of type `double` if both operands are +integers) |SELECT VALUE 5 / 2; + +|DIV |Divide (returns an integer value if both operands are integers) +|SELECT VALUE 5 DIV 2; + +|MOD (%) |Modulo |SELECT VALUE 5 % 2; + +|^ |Exponentiation |SELECT VALUE 2^3; + +||| |String concatenation |SELECT VALUE "ab"||"c"||"d"; +|======================================================================= + +[[collection-operators]] +Collection Operators +^^^^^^^^^^^^^^^^^^^^ + +Collection operators are used for membership tests (IN, NOT IN) or empty +collection tests (EXISTS, NOT EXISTS). + +[cols=",,",options="header",] +|======================================================================= +|Operator |Purpose |Example +|IN |Membership test |SELECT * FROM ChirpMessages cm WHERE cm.user.lang +IN ["en", "de"]; + +|NOT IN |Non-membership test |SELECT * FROM ChirpMessages cm WHERE +cm.user.lang NOT IN ["en"]; + +|EXISTS |Check whether a collection is not empty |SELECT * FROM +ChirpMessages cm WHERE EXISTS cm.referredTopics; + +|NOT EXISTS |Check whether a collection is empty |SELECT * FROM +ChirpMessages cm WHERE NOT EXISTS cm.referredTopics; +|======================================================================= + +[[comparison-operators]] +Comparison Operators +^^^^^^^^^^^^^^^^^^^^ + +Comparison operators are used to compare values. The comparison +operators fall into one of two sub-categories: missing value comparisons +and regular value comparisons. The query language (and JSON) has two +ways of representing missing information in a object - the presence of +the field with a NULL for its value (as in SQL), and the absence of the +field (which JSON permits). For example, the first of the following +objects represents Jack, whose friend is Jill. In the other examples, +Jake is friendless a la SQL, with a friend field that is NULL, while Joe +is friendless in a more natural (for JSON) way, i.e., by not having a +friend field. + +[[examples]] +Examples + +\{"name": "Jack", "friend": "Jill"} + +\{"name": "Jake", "friend": NULL} + +\{"name": "Joe"} + +The following table enumerates all of the query language's comparison +operators. + +[cols=",,",options="header",] +|======================================================================= +|Operator |Purpose |Example +|IS NULL |Test if a value is NULL |SELECT * FROM ChirpMessages cm WHERE +cm.user.name IS NULL; + +|IS NOT NULL |Test if a value is not NULL |SELECT * FROM ChirpMessages +cm WHERE cm.user.name IS NOT NULL; + +|IS MISSING |Test if a value is MISSING |SELECT * FROM ChirpMessages cm +WHERE cm.user.name IS MISSING; + +|IS NOT MISSING |Test if a value is not MISSING |SELECT * FROM +ChirpMessages cm WHERE cm.user.name IS NOT MISSING; + +|IS UNKNOWN |Test if a value is NULL or MISSING |SELECT * FROM +ChirpMessages cm WHERE cm.user.name IS UNKNOWN; + +|IS NOT UNKNOWN |Test if a value is neither NULL nor MISSING |SELECT * +FROM ChirpMessages cm WHERE cm.user.name IS NOT UNKNOWN; + +|IS KNOWN (IS VALUED) |Test if a value is neither NULL nor MISSING +|SELECT * FROM ChirpMessages cm WHERE cm.user.name IS KNOWN; + +|IS NOT KNOWN (IS NOT VALUED) |Test if a value is NULL or MISSING +|SELECT * FROM ChirpMessages cm WHERE cm.user.name IS NOT KNOWN; + +|BETWEEN |Test if a value is between a start value and a end value. The +comparison is inclusive to both start and end values. |SELECT * FROM +ChirpMessages cm WHERE cm.chirpId BETWEEN 10 AND 20; + +|= |Equality test |SELECT * FROM ChirpMessages cm WHERE cm.chirpId=10; + +|!= |Inequality test |SELECT * FROM ChirpMessages cm WHERE +cm.chirpId!=10; + +|<> |Inequality test |SELECT * FROM ChirpMessages cm WHERE +cm.chirpId<>10; + +|< |Less than |SELECT * FROM ChirpMessages cm WHERE cm.chirpId<10; + +|> |Greater than |SELECT * FROM ChirpMessages cm WHERE cm.chirpId>10; + +|<= |Less than or equal to |SELECT * FROM ChirpMessages cm WHERE +cm.chirpId<=10; + +|>= |Greater than or equal to |SELECT * FROM ChirpMessages cm WHERE +cm.chirpId>=10; + +|LIKE |Test if the left side matches a pattern defined on the right +side; in the pattern, "%" matches any string while "_" matches any +character. |SELECT * FROM ChirpMessages cm WHERE cm.user.name LIKE +"%Giesen%"; + +|NOT LIKE |Test if the left side does not match a pattern defined on the +right side; in the pattern, "%" matches any string while "_" matches any +character. |SELECT * FROM ChirpMessages cm WHERE cm.user.name NOT LIKE +"%Giesen%"; +|======================================================================= + +The following table summarizes how the missing value comparison +operators work. + +[cols=",,,",options="header",] +|=================================================== +|Operator |Non-NULL/Non-MISSING value |NULL |MISSING +|IS NULL |FALSE |TRUE |MISSING +|IS NOT NULL |TRUE |FALSE |MISSING +|IS MISSING |FALSE |FALSE |TRUE +|IS NOT MISSING |TRUE |TRUE |FALSE +|IS UNKNOWN |FALSE |TRUE |TRUE +|IS NOT UNKNOWN |TRUE |FALSE |FALSE +|IS KNOWN (IS VALUED) |TRUE |FALSE |FALSE +|IS NOT KNOWN (IS NOT VALUED) |FALSE |TRUE |TRUE +|=================================================== + +[[logical-operators]] +Logical Operators +^^^^^^^^^^^^^^^^^ + +Logical operators perform logical `NOT`, `AND`, and `OR` operations over +Boolean values (`TRUE` and `FALSE`) plus `NULL` and `MISSING`. + +[cols=",,",options="header",] +|======================================================================= +|Operator |Purpose |Example +|NOT |Returns true if the following condition is false, otherwise +returns false |SELECT VALUE NOT TRUE; + +|AND |Returns true if both branches are true, otherwise returns false +|SELECT VALUE TRUE AND FALSE; + +|OR |Returns true if one branch is true, otherwise returns false |SELECT +VALUE FALSE OR FALSE; +|======================================================================= + +The following table is the truth table for `AND` and `OR`. + +[cols=",,,",options="header",] +|================================== +|A |B |A AND B |A OR B +|TRUE |TRUE |TRUE |TRUE +|TRUE |FALSE |FALSE |TRUE +|TRUE |NULL |NULL |TRUE +|TRUE |MISSING |MISSING |TRUE +|FALSE |FALSE |FALSE |FALSE +|FALSE |NULL |FALSE |NULL +|FALSE |MISSING |FALSE |MISSING +|NULL |NULL |NULL |NULL +|NULL |MISSING |MISSING |NULL +|MISSING |MISSING |MISSING |MISSING +|================================== + +The following table demonstrates the results of `NOT` on all possible +inputs. + +[cols=",",options="header",] +|================ +|A |NOT A +|TRUE |FALSE +|FALSE |TRUE +|NULL |NULL +|MISSING |MISSING +|================ + +[[quantified-expressions]] +Quantified Expressions +~~~~~~~~~~~~~~~~~~~~~~ + +---------------------------------------------------------------------------------------------------------------- +QuantifiedExpression ::= ( (|) | ) Variable Expression ( "," Variable "in" Expression )* + Expression ()? +---------------------------------------------------------------------------------------------------------------- + +Quantified expressions are used for expressing existential or universal +predicates involving the elements of a collection. + +The following pair of examples illustrate the use of a quantified +expression to test that every (or some) element in the set [1, 2, 3] of +integers is less than three. The first example yields `FALSE` and second +example yields `TRUE`. + +It is useful to note that if the set were instead the empty set, the +first expression would yield `TRUE` ("every" value in an empty set +satisfies the condition) while the second expression would yield `FALSE` +(since there isn't "some" value, as there are no values in the set, that +satisfies the condition). + +A quantified expression will return a `NULL` (or `MISSING`) if the first +expression in it evaluates to `NULL` (or `MISSING`). A type error will +be raised if the first expression in a quantified expression does not +return a collection. + +[[examples-1]] +Examples + +-------------------------------------- +EVERY x IN [ 1, 2, 3 ] SATISFIES x < 3 +SOME x IN [ 1, 2, 3 ] SATISFIES x < 3 +-------------------------------------- + +[[path-expressions]] +Path Expressions +~~~~~~~~~~~~~~~~ + +-------------------------------------------------------------- +PathExpression ::= PrimaryExpression ( Field | Index )* +Field ::= "." Identifier +Index ::= "[" Expression (":" ( Expression )? )? "]" +-------------------------------------------------------------- + +Components of complex types in the data model are accessed via path +expressions. Path access can be applied to the result of a query +expression that yields an instance of a complex type, for example, an +object or an array instance. + +For objects, path access is based on field names, and it accesses the +field whose name was specified. For arrays, path access is based on +(zero-based) array-style indexing. Array indexes can be used to retrieve +either a single element from an array, or a whole subset of an array. +Accessing a single element is achieved by providing a single index +argument (zero-based element position), while obtaining a subset of an +array is achieved by providing the `start` and `end` (zero-based) index +positions; the returned subset is from position `start` to position +`end - 1`; the `end` position argument is optional. Multisets have +similar behavior to arrays, except for retrieving arbitrary items as the +order of items is not fixed in multisets. + +Attempts to access non-existent fields or out-of-bound array elements +produce the special value `MISSING`. Type errors will be raised for +inappropriate use of a path expression, such as applying a field +accessor to a numeric value. + +The following examples illustrate field access for an object, +index-based element access or subset retrieval of an array, and also a +composition thereof. + +[[examples-2]] +Examples + +-------------------------------------------------------- +({"name": "MyABCs", "array": [ "a", "b", "c"]}).array + +(["a", "b", "c"])[2] + +({"name": "MyABCs", "array": [ "a", "b", "c"]}).array[2] + +(["a", "b", "c"])[0:2] + +(["a", "b", "c"])[0:] +-------------------------------------------------------- + +[[primary-expressions]] +Primary Expressions +~~~~~~~~~~~~~~~~~~~ + +--------------------------------------- +PrimaryExpr ::= Literal + | VariableReference + | ParameterReference + | ParenthesizedExpression + | FunctionCallExpression + | CaseExpression + | Constructor +--------------------------------------- + +The most basic building block for any expression in the query language +is PrimaryExpression. This can be a simple literal (constant) value, a +reference to a query variable that is in scope, a parenthesized +expression, a function call, or a newly constructed instance of the data +model (such as a newly constructed object, array, or multiset of data +model instances). + +[[literals]] +Literals +~~~~~~~~ + +----------------------------------------------------------- +Literal ::= StringLiteral + | IntegerLiteral + | FloatLiteral + | DoubleLiteral + | + | + | + | +StringLiteral ::= "\"" ( + + | + | + | + | + | + | + | + | ~["\"","\\"])* + "\"" + | "\'"( + + | + | + | + | + | + | + | + | ~["\'","\\"])* + "\'" + ::= "\\\'" + ::= "\\\"" + ::= "\\\\" + ::= "\\/" + ::= "\\b" + ::= "\\f" + ::= "\\n" + ::= "\\r" + ::= "\\t" + +IntegerLiteral ::= + ::= ["0" - "9"]+ +FloatLiteral ::= ( "f" | "F" ) + | ( "." ( "f" | "F" ) )? + | "." ( "f" | "F" ) +DoubleLiteral ::= "." + | "." +----------------------------------------------------------- + +Literals (constants) in a query can be strings, integers, floating point +values, double values, boolean constants, or special constant values +like `NULL` and `MISSING`. The `NULL` value is like a `NULL` in SQL; it +is used to represent an unknown field value. The special value `MISSING` +is only meaningful in the context of field accesses; it occurs when the +accessed field simply does not exist at all in a object being accessed. + +The following are some simple examples of literals. + +[[examples-3]] +Examples + +------------- +'a string' +"test string" +42 +------------- + +Different from standard SQL, double quotes play the same role as single +quotes and may be used for string literals in queries as well. + +[[variable-references]] +Variable References +^^^^^^^^^^^^^^^^^^^ + +---------------------------------------------------------------------------- +VariableReference ::= | + ::= ( | "_") ( | | "_" | "$")* + ::= ["A" - "Z", "a" - "z"] +DelimitedIdentifier ::= "`" ( + | + | + | + | + | + | + | + | ~["`","\\"])* + "`" +---------------------------------------------------------------------------- + +A variable in a query can be bound to any legal data model value. A +variable reference refers to the value to which an in-scope variable is +bound. (E.g., a variable binding may originate from one of the `FROM`, +`WITH` or `LET` clauses of a `SELECT` statement or from an input +parameter in the context of a function body.) Backticks, for example, +`id`, are used for delimited identifiers. Delimiting is needed when a +variable's desired name clashes with a keyword or includes characters +not allowed in regular identifiers. More information on exactly how +variable references are resolved can be found in the appendix section on +Variable Resolution. + +[[examples-4]] +Examples + +------------- +tweet +id +`SELECT` +`my-function` +------------- + +[[parameter-references]] +Parameter References +^^^^^^^^^^^^^^^^^^^^ + +------------------------------------------------------------------------------------------ +ParameterReference ::= NamedParameterReference | PositionalParameterReference +NamedParameterReference ::= "$" ( | ) +PositionalParameterReference ::= ("$" ) | "?" +------------------------------------------------------------------------------------------ + +A statement parameter is an external variable which value is provided +through the link:../api.html#queryservice[statement execution API]. An +error will be raised if the parameter is not bound at the query +execution time. Positional parameter numbering starts at 1. "?" +parameters are interpreted as latexmath:[$1, .. $]N in the order in +which they appear in the statement. + +[[examples-5]] +Examples + +--- +$id +$1 +? +--- + +[[parenthesized-expressions]] +Parenthesized Expressions +^^^^^^^^^^^^^^^^^^^^^^^^^ + +--------------------------------------------------------- +ParenthesizedExpression ::= "(" Expression ")" | Subquery +--------------------------------------------------------- + +An expression can be parenthesized to control the precedence order or +otherwise clarify a query. For composability, a subquery is also an +parenthesized expression. + +The following expression evaluates to the value 2. + +[[example]] +Example + +--------- +( 1 + 1 ) +--------- + +[[function-call-expressions]] +Function Call Expressions +^^^^^^^^^^^^^^^^^^^^^^^^^ + +------------------------------------------------------------------------------------------------------------ +FunctionCallExpression ::= ( FunctionName "(" ( Expression ( "," Expression )* )? ")" ) | WindowFunctionCall +------------------------------------------------------------------------------------------------------------ + +Functions are included in the query language, like most languages, as a +way to package useful functionality or to componentize complicated or +reusable computations. A function call is a legal query expression that +represents the value resulting from the evaluation of its body +expression with the given parameter bindings; the parameter value +bindings can themselves be any expressions in the query language. + +Note that Window functions, and aggregate functions used as window +functions, have a more complex syntax. Window function calls are +described in the section on link:#Over_clauses[OVER Clauses]. + +The following example is a (built-in) function call expression whose +value is 8. + +[[example-1]] +Example + +------------------ +length('a string') +------------------ + +[[case-expressions]] +Case Expressions +~~~~~~~~~~~~~~~~ + +---------------------------------------------------------------------------------------------------------------- +CaseExpression ::= SimpleCaseExpression | SearchedCaseExpression +SimpleCaseExpression ::= Expression ( Expression Expression )+ ( Expression )? +SearchedCaseExpression ::= ( Expression Expression )+ ( Expression )? +---------------------------------------------------------------------------------------------------------------- + +In a simple `CASE` expression, the query evaluator searches for the +first `WHEN` ... `THEN` pair in which the `WHEN` expression is equal to +the expression following `CASE` and returns the expression following +`THEN`. If none of the `WHEN` ... `THEN` pairs meet this condition, and +an `ELSE` branch exists, it returns the `ELSE` expression. Otherwise, +`NULL` is returned. + +In a searched CASE expression, the query evaluator searches from left to +right until it finds a `WHEN` expression that is evaluated to `TRUE`, +and then returns its corresponding `THEN` expression. If no condition is +found to be `TRUE`, and an `ELSE` branch exists, it returns the `ELSE` +expression. Otherwise, it returns `NULL`. + +The following example illustrates the form of a case expression. + +[[example-2]] +Example + +----------------------------------------------- +CASE (2 < 3) WHEN true THEN "yes" ELSE "no" END +----------------------------------------------- + +[[constructors]] +Constructors +^^^^^^^^^^^^ + +--------------------------------------------------------------------------------------- +Constructor ::= ArrayConstructor | MultisetConstructor | ObjectConstructor +ArrayConstructor ::= "[" ( Expression ( "," Expression )* )? "]" +MultisetConstructor ::= "{{" ( Expression ( "," Expression )* )? "}}" +ObjectConstructor ::= "{" ( FieldBinding ( "," FieldBinding )* )? "}" +FieldBinding ::= Expression ( ":" Expression )? +--------------------------------------------------------------------------------------- + +A major feature of the query language is its ability to construct new +data model instances. This is accomplished using its constructors for +each of the model's complex object structures, namely arrays, multisets, +and objects. Arrays are like JSON arrays, while multisets have bag +semantics. Objects are built from fields that are field-name/field-value +pairs, again like JSON. + +The following examples illustrate how to construct a new array with 4 +items and a new object with 2 fields respectively. Array elements can be +homogeneous (as in the first example), which is the common case, or they +may be heterogeneous (as in the second example). The data values and +field name values used to construct arrays, multisets, and objects in +constructors are all simply query expressions. Thus, the collection +elements, field names, and field values used in constructors can be +simple literals or they can come from query variable references or even +arbitrarily complex query expressions (subqueries). Type errors will be +raised if the field names in an object are not strings, and duplicate +field errors will be raised if they are not distinct. + +[[examples-6]] +Examples + +---------------------------------------------------------------------------- +[ 'a', 'b', 'c', 'c' ] + +[ 42, "forty-two!", { "rank" : "Captain", "name": "America" }, 3.14159 ] + +{ + 'project name': 'Hyracks', + 'project members': [ 'vinayakb', 'dtabass', 'chenli', 'tsotras', 'tillw' ] +} +---------------------------------------------------------------------------- + +If only one expression is specified instead of the +field-name/field-value pair in an object constructor then this +expression is supposed to provide the field value. The field name is +then automatically generated based on the kind of the value expression: + +* If it is a variable reference expression then generated field name is +the name of that variable. +* If it is a field access expression then generated field name is the +last identifier in that expression. +* For all other cases, a compilation error will be raised. + +[[example-3]] +Example + +------------------------------------------- +SELECT VALUE { user.alias, user.userSince } +FROM GleambookUsers user +WHERE user.id = 1; +------------------------------------------- + +This query outputs: + +-------------------------------------- +[ { + "alias": "Margarita", + "userSince": "2012-08-20T10:10:00" +} ] +-------------------------------------- diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr_title.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr_title.adoc new file mode 100644 index 00000000000..2bdb60b2190 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr_title.adoc @@ -0,0 +1,3 @@ +[[expressions]] +2. Expressions +-------------- diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_dataverse.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_dataverse.adoc new file mode 100644 index 00000000000..87ccfb713b0 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_dataverse.adoc @@ -0,0 +1,21 @@ +[[declarations]] +Declarations +~~~~~~~~~~~~ + +---------------------------------------- +DatabaseDeclaration ::= "USE" Identifier +---------------------------------------- + +At the uppermost level, the world of data is organized into data +namespaces called *dataverses*. To set the default dataverse for +statements, the USE statement is provided. + +As an example, the following statement sets the default dataverse to be +"TinySocial". + +[[example]] +Example + +--------------- +USE TinySocial; +--------------- diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_function.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_function.adoc new file mode 100644 index 00000000000..2d622d28213 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_function.adoc @@ -0,0 +1,34 @@ +When writing a complex query, it can sometimes be helpful to define one +or more auxilliary functions that each address a sub-piece of the +overall query. The declare function statement supports the creation of +such helper functions. In general, the function body (expression) can be +any legal query expression. + +----------------------------------------------------------------------------------------- +FunctionDeclaration ::= "DECLARE" "FUNCTION" Identifier ParameterList "{" Expression "}" +ParameterList ::= "(" ( ( "," )* )? ")" +----------------------------------------------------------------------------------------- + +The following is a simple example of a temporary function definition and +its use. + +[[example]] +Example + +--------------------------------------------------------- +DECLARE FUNCTION friendInfo(userId) { + (SELECT u.id, u.name, len(u.friendIds) AS friendCount + FROM GleambookUsers u + WHERE u.id = userId)[0] + }; + +SELECT VALUE friendInfo(2); +--------------------------------------------------------- + +For our sample data set, this returns: + +---------------------------------------------------- +[ + { "id": 2, "name": "IsbelDull", "friendCount": 2 } +] +---------------------------------------------------- diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query.adoc new file mode 100644 index 00000000000..6f164f4d527 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query.adoc @@ -0,0 +1,2615 @@ +[[select-statements]] +SELECT Statements +~~~~~~~~~~~~~~~~~ + +The following shows the (rich) grammar for the `SELECT` statement in the +query language. + +--------------------------------------------------------------------------------------- +SelectStatement ::= ( WithClause )? + SelectSetOperation (OrderbyClause )? ( LimitClause )? +SelectSetOperation ::= SelectBlock ( ( SelectBlock | Subquery ) )* +Subquery ::= "(" SelectStatement ")" + +SelectBlock ::= SelectClause + ( FromClause ( LetClause )?)? + ( WhereClause )? + ( GroupbyClause ( LetClause )? ( HavingClause )? )? + | + FromClause ( LetClause )? + ( WhereClause )? + ( GroupbyClause ( LetClause )? ( HavingClause )? )? + SelectClause + +SelectClause ::= ( | )? ( SelectRegular | SelectValue ) - SelectRegular ::= Projection ( "," Projection )* - SelectValue ::= ( | | ) Expression - Projection ::= ( Expression ( )? Identifier | "*" | Identifier "." "*" ) - - FromClause ::= FromTerm ( "," FromTerm )* - FromTerm ::= Expression (( )? Variable)? - ( ( JoinType )? ( JoinClause | UnnestClause ) )* - - JoinClause ::= Expression (( )? Variable)? Expression - UnnestClause ::= ( ) Expression - ( )? Variable ( Variable )? - JoinType ::= ( | ( )? ) - - WithClause ::= WithElement ( "," WithElement )* - LetClause ::= ( | ) LetElement ( "," LetElement )* - LetElement ::= Variable "=" Expression - WithElement ::= Variable Expression - - WhereClause ::= Expression - - GroupbyClause ::= Expression ( ( ()? Variable )? - ( "," Expression ( ()? Variable )? )* ) - ( Variable - ("(" VariableReference Identifier - ("," VariableReference Identifier )* ")")? - )? - HavingClause ::= Expression - - OrderbyClause ::= Expression ( | )? - ( "," Expression ( | )? )* - LimitClause ::= Expression ( Expression )? - -In this section, we will make use of two stored collections of objects (datasets), `GleambookUsers` and `GleambookMessages`, in a series of running examples to explain `SELECT` queries. The contents of the example collections are as follows: - -`GleambookUsers` collection (or, dataset): - - [ { - "id":1, - "alias":"Margarita", - "name":"MargaritaStoddard", - "nickname":"Mags", - "userSince":"2012-08-20T10:10:00", - "friendIds":[2,3,6,10], - "employment":[{ - "organizationName":"Codetechno", - "start-date":"2006-08-06" - }, - { - "organizationName":"geomedia", - "start-date":"2010-06-17", - "end-date":"2010-01-26" - }], - "gender":"F" - }, - { - "id":2, - "alias":"Isbel", - "name":"IsbelDull", - "nickname":"Izzy", - "userSince":"2011-01-22T10:10:00", - "friendIds":[1,4], - "employment":[{ - "organizationName":"Hexviafind", - "startDate":"2010-04-27" - }] - }, - { - "id":3, - "alias":"Emory", - "name":"EmoryUnk", - "userSince":"2012-07-10T10:10:00", - "friendIds":[1,5,8,9], - "employment":[{ - "organizationName":"geomedia", - "startDate":"2010-06-17", - "endDate":"2010-01-26" - }] - } ] - -`GleambookMessages` collection (or, dataset): - - [ { - "messageId":2, - "authorId":1, - "inResponseTo":4, - "senderLocation":[41.66,80.87], - "message":" dislike x-phone its touch-screen is horrible" - }, - { - "messageId":3, - "authorId":2, - "inResponseTo":4, - "senderLocation":[48.09,81.01], - "message":" like product-y the plan is amazing" - }, - { - "messageId":4, - "authorId":1, - "inResponseTo":2, - "senderLocation":[37.73,97.04], - "message":" can't stand acast the network is horrible:(" - }, - { - "messageId":6, - "authorId":2, - "inResponseTo":1, - "senderLocation":[31.5,75.56], - "message":" like product-z its platform is mind-blowing" - } - { - "messageId":8, - "authorId":1, - "inResponseTo":11, - "senderLocation":[40.33,80.87], - "message":" like ccast the 3G is awesome:)" - }, - { - "messageId":10, - "authorId":1, - "inResponseTo":12, - "senderLocation":[42.5,70.01], - "message":" can't stand product-w the touch-screen is terrible" - }, - { - "messageId":11, - "authorId":1, - "inResponseTo":1, - "senderLocation":[38.97,77.49], - "message":" can't stand acast its plan is terrible" - } ] - -## SELECT Clause -The `SELECT` clause always returns a collection value as its result (even if the result is empty or a singleton). - -### Select Element/Value/Raw -The `SELECT VALUE` clause returns an array or multiset that contains the results of evaluating the `VALUE` -expression, with one evaluation being performed per "binding tuple" (i.e., per `FROM` clause item) satisfying -the statement's selection criteria. -For historical reasons the query language also allows the keywords `ELEMENT` or `RAW` to be used in place of `VALUE` -(not recommended). - -If there is no FROM clause, the expression after `VALUE` is evaluated once with no binding tuples -(except those inherited from an outer environment). - -##### Example - - SELECT VALUE 1; - -This query returns: - - [ - 1 - ] - -The following example shows a query that selects one user from the GleambookUsers collection. - -##### Example - - SELECT VALUE user - FROM GleambookUsers user - WHERE user.id = 1; - -This query returns: - - [{ - "userSince": "2012-08-20T10:10:00.000Z", - "friendIds": [ - 2, - 3, - 6, - 10 - ], - "gender": "F", - "name": "MargaritaStoddard", - "nickname": "Mags", - "alias": "Margarita", - "id": 1, - "employment": [ - { - "organizationName": "Codetechno", - "start-date": "2006-08-06" - }, - { - "end-date": "2010-01-26", - "organizationName": "geomedia", - "start-date": "2010-06-17" - } - ] - } ] - -### SQL-style SELECT -The traditional SQL-style `SELECT` syntax is also supported in the query language. -This syntax can also be reformulated in a `SELECT VALUE` based manner. -(E.g., `SELECT expA AS fldA, expB AS fldB` is syntactic sugar for `SELECT VALUE { 'fldA': expA, 'fldB': expB }`.) -Unlike in SQL, the result of a query does not preserve the order of expressions in the `SELECT` clause. - -##### Example - SELECT user.alias user_alias, user.name user_name - FROM GleambookUsers user - WHERE user.id = 1; - -Returns: - - [ { - "user_name": "MargaritaStoddard", - "user_alias": "Margarita" - } ] - -### SELECT * -`SELECT *` returns an object with a nested field for each input tuple. -Each field has as its field name the name of a binding variable generated by either the `FROM` clause or `GROUP BY` -clause in the current enclosing `SELECT` statement, and its field value is the value of that binding variable. - -Note that the result of `SELECT *` is different from the result of query that selects all the fields of an object. - -##### Example - - SELECT * - FROM GleambookUsers user; - -Since `user` is the only binding variable generated in the `FROM` clause, this query returns: - - [ { - "user": { - "userSince": "2012-08-20T10:10:00.000Z", - "friendIds": [ - 2, - 3, - 6, - 10 - ], - "gender": "F", - "name": "MargaritaStoddard", - "nickname": "Mags", - "alias": "Margarita", - "id": 1, - "employment": [ - { - "organizationName": "Codetechno", - "start-date": "2006-08-06" - }, - { - "end-date": "2010-01-26", - "organizationName": "geomedia", - "start-date": "2010-06-17" - } - ] - } - }, { - "user": { - "userSince": "2011-01-22T10:10:00.000Z", - "friendIds": [ - 1, - 4 - ], - "name": "IsbelDull", - "nickname": "Izzy", - "alias": "Isbel", - "id": 2, - "employment": [ - { - "organizationName": "Hexviafind", - "startDate": "2010-04-27" - } - ] - } - }, { - "user": { - "userSince": "2012-07-10T10:10:00.000Z", - "friendIds": [ - 1, - 5, - 8, - 9 - ], - "name": "EmoryUnk", - "alias": "Emory", - "id": 3, - "employment": [ - { - "organizationName": "geomedia", - "endDate": "2010-01-26", - "startDate": "2010-06-17" - } - ] - } - } ] - - -##### Example - - SELECT * - FROM GleambookUsers u, GleambookMessages m - WHERE m.authorId = u.id and u.id = 2; - -This query does an inner join that we will discuss in [multiple from terms](#Multiple_from_terms). -Since both `u` and `m` are binding variables generated in the `FROM` clause, this query returns: - - [ { - "u": { - "userSince": "2011-01-22T10:10:00", - "friendIds": [ - 1, - 4 - ], - "name": "IsbelDull", - "nickname": "Izzy", - "alias": "Isbel", - "id": 2, - "employment": [ - { - "organizationName": "Hexviafind", - "startDate": "2010-04-27" - } - ] - }, - "m": { - "senderLocation": [ - 31.5, - 75.56 - ], - "inResponseTo": 1, - "messageId": 6, - "authorId": 2, - "message": " like product-z its platform is mind-blowing" - } - }, { - "u": { - "userSince": "2011-01-22T10:10:00", - "friendIds": [ - 1, - 4 - ], - "name": "IsbelDull", - "nickname": "Izzy", - "alias": "Isbel", - "id": 2, - "employment": [ - { - "organizationName": "Hexviafind", - "startDate": "2010-04-27" - } - ] - }, - "m": { - "senderLocation": [ - 48.09, - 81.01 - ], - "inResponseTo": 4, - "messageId": 3, - "authorId": 2, - "message": " like product-y the plan is amazing" - } - } ] - -### SELECT _variable_.* - -Whereas `SELECT *` returns all the fields bound to all the variables which are currently defined, -the notation `SELECT c.*` returns all the fields of the object bound to variable `c`. -The variable `c` must be bound to an object for this to work. - -##### Example - - SELECT user.* - FROM GleambookUsers user; - -Compare this query with the first example given under [SELECT *](#Select_star). -This query returns all users from the `GleambookUsers` dataset, -but the `user` variable name is omitted from the results: - - [ - { - "id": 1, - "alias": "Margarita", - "name": "MargaritaStoddard", - "nickname": "Mags", - "userSince": "2012-08-20T10:10:00", - "friendIds": [ - 2, - 3, - 6, - 10 - ], - "employment": [ - { - "organizationName": "Codetechno", - "start-date": "2006-08-06" - }, - { - "organizationName": "geomedia", - "start-date": "2010-06-17", - "end-date": "2010-01-26" - } - ], - "gender": "F" - }, - { - "id": 2, - "alias": "Isbel", - "name": "IsbelDull", - "nickname": "Izzy", - "userSince": "2011-01-22T10:10:00", - "friendIds": [ - 1, - 4 - ], - "employment": [ - { - "organizationName": "Hexviafind", - "startDate": "2010-04-27" - } - ] - }, - { - "id": 3, - "alias": "Emory", - "name": "EmoryUnk", - "userSince": "2012-07-10T10:10:00", - "friendIds": [ - 1, - 5, - 8, - 9 - ], - "employment": [ - { - "organizationName": "geomedia", - "startDate": "2010-06-17", - "endDate": "2010-01-26" - } - ] - } - ] - -### SELECT DISTINCT -The `DISTINCT` keyword is used to eliminate duplicate items in results. The following example shows how it works. - -##### Example - - SELECT DISTINCT * FROM [1, 2, 2, 3] AS foo; - -This query returns: - - [ { - "foo": 1 - }, { - "foo": 2 - }, { - "foo": 3 - } ] - -##### Example - - SELECT DISTINCT VALUE foo FROM [1, 2, 2, 3] AS foo; - -This version of the query returns: - - [ 1 - , 2 - , 3 - ] - -### Unnamed Projections -Similar to standard SQL, the query language supports unnamed projections (a.k.a, unnamed `SELECT` clause items), for which names are generated. -Name generation has three cases: - - * If a projection expression is a variable reference expression, its generated name is the name of the variable. - * If a projection expression is a field access expression, its generated name is the last identifier in the expression. - * For all other cases, the query processor will generate a unique name. - -##### Example - - SELECT substr(user.name, 10), user.alias - FROM GleambookUsers user - WHERE user.id = 1; - -This query outputs: - - [ { - "alias": "Margarita", - "$1": "Stoddard" - } ] - -In the result, `$1` is the generated name for `substr(user.name, 1)`, while `alias` is the generated name for `user.alias`. - -### Abbreviated Field Access Expressions -As in standard SQL, field access expressions can be abbreviated (not recommended!) when there is no ambiguity. In the next example, the variable `user` is the only possible variable reference for fields `id`, `name` and `alias` and thus could be omitted in the query. More information on abbbreviated field access can be found in the appendix section on Variable Resolution. - -##### Example - - SELECT substr(name, 10) AS lname, alias - FROM GleambookUsers user - WHERE id = 1; - -Outputs: - - [ { - "lname": "Stoddard", - "alias": "Margarita" - } ] - -## UNNEST Clause -For each of its input tuples, the `UNNEST` clause flattens a collection-valued expression into individual items, producing multiple tuples, each of which is one of the expression's original input tuples augmented with a flattened item from its collection. - -### Inner UNNEST -The following example is a query that retrieves the names of the organizations that a selected user has worked for. It uses the `UNNEST` clause to unnest the nested collection `employment` in the user's object. - -##### Example - - SELECT u.id AS userId, e.organizationName AS orgName - FROM GleambookUsers u - UNNEST u.employment e - WHERE u.id = 1; - -This query returns: - - [ { - "orgName": "Codetechno", - "userId": 1 - }, { - "orgName": "geomedia", - "userId": 1 - } ] - -Note that `UNNEST` has SQL's inner join semantics --- that is, if a user has no employment history, no tuple corresponding to that user will be emitted in the result. - -### Left Outer UNNEST -As an alternative, the `LEFT OUTER UNNEST` clause offers SQL's left outer join semantics. For example, no collection-valued field named `hobbies` exists in the object for the user whose id is 1, but the following query's result still includes user 1. - -##### Example - - SELECT u.id AS userId, h.hobbyName AS hobby - FROM GleambookUsers u - LEFT OUTER UNNEST u.hobbies h - WHERE u.id = 1; - -Returns: - - [ { - "userId": 1 - } ] - -Note that if `u.hobbies` is an empty collection or leads to a `MISSING` (as above) or `NULL` value for a given input tuple, there is no corresponding binding value for variable `h` for an input tuple. A `MISSING` value will be generated for `h` so that the input tuple can still be propagated. - -### Expressing Joins Using UNNEST -The `UNNEST` clause is similar to SQL's `JOIN` clause except that it allows its right argument to be correlated to its left argument, as in the examples above --- i.e., think "correlated cross-product". -The next example shows this via a query that joins two data sets, GleambookUsers and GleambookMessages, returning user/message pairs. The results contain one object per pair, with result objects containing the user's name and an entire message. The query can be thought of as saying "for each Gleambook user, unnest the `GleambookMessages` collection and filter the output with the condition `message.authorId = user.id`". - -##### Example - - SELECT u.name AS uname, m.message AS message - FROM GleambookUsers u - UNNEST GleambookMessages m - WHERE m.authorId = u.id; - -This returns: - - [ { - "uname": "MargaritaStoddard", - "message": " can't stand acast its plan is terrible" - }, { - "uname": "MargaritaStoddard", - "message": " dislike x-phone its touch-screen is horrible" - }, { - "uname": "MargaritaStoddard", - "message": " can't stand acast the network is horrible:(" - }, { - "uname": "MargaritaStoddard", - "message": " like ccast the 3G is awesome:)" - }, { - "uname": "MargaritaStoddard", - "message": " can't stand product-w the touch-screen is terrible" - }, { - "uname": "IsbelDull", - "message": " like product-z its platform is mind-blowing" - }, { - "uname": "IsbelDull", - "message": " like product-y the plan is amazing" - } ] - -Similarly, the above query can also be expressed as the `UNNEST`ing of a correlated subquery: - -##### Example - - SELECT u.name AS uname, m.message AS message - FROM GleambookUsers u - UNNEST ( - SELECT VALUE msg - FROM GleambookMessages msg - WHERE msg.authorId = u.id - ) AS m; - -## FROM clauses -A `FROM` clause is used for enumerating (i.e., conceptually iterating over) the contents of collections, as in SQL. - -### Binding expressions -In addition to stored collections, a `FROM` clause can iterate over any intermediate collection returned by a valid query expression. -In the tuple stream generated by a `FROM` clause, the ordering of the input tuples are not guaranteed to be preserved. - -##### Example - - SELECT VALUE foo - FROM [1, 2, 2, 3] AS foo - WHERE foo > 2; - -Returns: - - [ - 3 - ] - -### Multiple FROM Terms -The query language permits correlations among `FROM` terms. Specifically, a `FROM` binding expression can refer to variables defined to its left in the given `FROM` clause. Thus, the first unnesting example above could also be expressed as follows: - -##### Example - - SELECT u.id AS userId, e.organizationName AS orgName - FROM GleambookUsers u, u.employment e - WHERE u.id = 1; - - -### Expressing Joins Using FROM Terms -Similarly, the join intentions of the other `UNNEST`-based join examples above could be expressed as: - -##### Example - - SELECT u.name AS uname, m.message AS message - FROM GleambookUsers u, GleambookMessages m - WHERE m.authorId = u.id; - -##### Example - - SELECT u.name AS uname, m.message AS message - FROM GleambookUsers u, - ( - SELECT VALUE msg - FROM GleambookMessages msg - WHERE msg.authorId = u.id - ) AS m; - -Note that the first alternative is one of the SQL-92 approaches to expressing a join. - -### Implicit Binding Variables - -Similar to standard SQL, the query language supports implicit `FROM` binding variables (i.e., aliases), for which a binding variable is generated. -Variable generation falls into three cases: - - * If the binding expression is a variable reference expression, the generated variable's name will be the name of the referenced variable itself. - * If the binding expression is a field access expression (or a fully qualified name for a dataset), the generated - variable's name will be the last identifier (or the dataset name) in the expression. - * For all other cases, a compilation error will be raised. - -The next two examples show queries that do not provide binding variables in their `FROM` clauses. - -##### Example - - SELECT GleambookUsers.name, GleambookMessages.message - FROM GleambookUsers, GleambookMessages - WHERE GleambookMessages.authorId = GleambookUsers.id; - -Returns: - - [ { - "name": "MargaritaStoddard", - "message": " like ccast the 3G is awesome:)" - }, { - "name": "MargaritaStoddard", - "message": " can't stand product-w the touch-screen is terrible" - }, { - "name": "MargaritaStoddard", - "message": " can't stand acast its plan is terrible" - }, { - "name": "MargaritaStoddard", - "message": " dislike x-phone its touch-screen is horrible" - }, { - "name": "MargaritaStoddard", - "message": " can't stand acast the network is horrible:(" - }, { - "name": "IsbelDull", - "message": " like product-y the plan is amazing" - }, { - "name": "IsbelDull", - "message": " like product-z its platform is mind-blowing" - } ] - -##### Example - - SELECT GleambookUsers.name, GleambookMessages.message - FROM GleambookUsers, - ( - SELECT VALUE GleambookMessages - FROM GleambookMessages - WHERE GleambookMessages.authorId = GleambookUsers.id - ); - -Returns: - - Error: "Syntax error: Need an alias for the enclosed expression:\n(select element GleambookMessages\n from GleambookMessages as GleambookMessages\n where (GleambookMessages.authorId = GleambookUsers.id)\n )", - "query_from_user": "use TinySocial;\n\nSELECT GleambookUsers.name, GleambookMessages.message\n FROM GleambookUsers,\n (\n SELECT VALUE GleambookMessages\n FROM GleambookMessages\n WHERE GleambookMessages.authorId = GleambookUsers.id\n );" - -More information on implicit binding variables can be found in the appendix section on Variable Resolution. - -## JOIN Clauses -The join clause in the query language supports both inner joins and left outer joins from standard SQL. - -### Inner joins -Using a `JOIN` clause, the inner join intent from the preceding examples can also be expressed as follows: - -##### Example - - SELECT u.name AS uname, m.message AS message - FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id; - -### Left Outer Joins -The query language supports SQL's notion of left outer join. The following query is an example: - - SELECT u.name AS uname, m.message AS message - FROM GleambookUsers u LEFT OUTER JOIN GleambookMessages m ON m.authorId = u.id; - -Returns: - - [ { - "uname": "MargaritaStoddard", - "message": " like ccast the 3G is awesome:)" - }, { - "uname": "MargaritaStoddard", - "message": " can't stand product-w the touch-screen is terrible" - }, { - "uname": "MargaritaStoddard", - "message": " can't stand acast its plan is terrible" - }, { - "uname": "MargaritaStoddard", - "message": " dislike x-phone its touch-screen is horrible" - }, { - "uname": "MargaritaStoddard", - "message": " can't stand acast the network is horrible:(" - }, { - "uname": "IsbelDull", - "message": " like product-y the plan is amazing" - }, { - "uname": "IsbelDull", - "message": " like product-z its platform is mind-blowing" - }, { - "uname": "EmoryUnk" - } ] - -For non-matching left-side tuples, the query language produces `MISSING` values for the right-side binding variables; that is why the last object in the above result doesn't have a `message` field. Note that this is slightly different from standard SQL, which instead would fill in `NULL` values for the right-side fields. The reason for this difference is that, for non-matches in its join results, the query language views fields from the right-side as being "not there" (a.k.a. `MISSING`) instead of as being "there but unknown" (i.e., `NULL`). - -The left-outer join query can also be expressed using `LEFT OUTER UNNEST`: - - SELECT u.name AS uname, m.message AS message - FROM GleambookUsers u - LEFT OUTER UNNEST ( - SELECT VALUE message - FROM GleambookMessages message - WHERE message.authorId = u.id - ) m; - -In general, SQL-style join queries can also be expressed by `UNNEST` clauses and left outer join queries can be expressed by `LEFT OUTER UNNESTs`. - -### Variable scope in JOIN clauses - -Variables defined by `JOIN` subclauses are not visible to other subclauses in the same `FROM` clause. -This also applies to the `FROM` variable that starts the `JOIN` subclause. - -##### Example - - SELECT * FROM GleambookUsers u - JOIN (SELECT VALUE m - FROM GleambookMessages m - WHERE m.authorId = u.id) m - ON u.id = m.authorId; - -The variable `u` defined by the `FROM` clause is not visible inside the `JOIN` subclause, -so this query returns no results. - -## GROUP BY Clauses -The `GROUP BY` clause generalizes standard SQL's grouping and aggregation semantics, but it also retains backward compatibility with the standard (relational) SQL `GROUP BY` and aggregation features. - -### Group variables -In a `GROUP BY` clause, in addition to the binding variable(s) defined for the grouping key(s), the query language allows a user to define a *group variable* by using the clause's `GROUP AS` extension to denote the resulting group. -After grouping, then, the query's in-scope variables include the grouping key's binding variables as well as this group variable which will be bound to one collection value for each group. This per-group collection (i.e., multiset) value will be a set of nested objects in which each field of the object is the result of a renamed variable defined in parentheses following the group variable's name. The `GROUP AS` syntax is as follows: - - Variable ("(" VariableReference Identifier ("," VariableReference Identifier )* ")")? - -##### Example - - SELECT * - FROM GleambookMessages message - GROUP BY message.authorId AS uid GROUP AS msgs(message AS msg); - -This first example query returns: - - [ { - "msgs": [ - { - "msg": { - "senderLocation": [ - 38.97, - 77.49 - ], - "inResponseTo": 1, - "messageId": 11, - "authorId": 1, - "message": " can't stand acast its plan is terrible" - } - }, - { - "msg": { - "senderLocation": [ - 41.66, - 80.87 - ], - "inResponseTo": 4, - "messageId": 2, - "authorId": 1, - "message": " dislike x-phone its touch-screen is horrible" - } - }, - { - "msg": { - "senderLocation": [ - 37.73, - 97.04 - ], - "inResponseTo": 2, - "messageId": 4, - "authorId": 1, - "message": " can't stand acast the network is horrible:(" - } - }, - { - "msg": { - "senderLocation": [ - 40.33, - 80.87 - ], - "inResponseTo": 11, - "messageId": 8, - "authorId": 1, - "message": " like ccast the 3G is awesome:)" - } - }, - { - "msg": { - "senderLocation": [ - 42.5, - 70.01 - ], - "inResponseTo": 12, - "messageId": 10, - "authorId": 1, - "message": " can't stand product-w the touch-screen is terrible" - } - } - ], - "uid": 1 - }, { - "msgs": [ - { - "msg": { - "senderLocation": [ - 31.5, - 75.56 - ], - "inResponseTo": 1, - "messageId": 6, - "authorId": 2, - "message": " like product-z its platform is mind-blowing" - } - }, - { - "msg": { - "senderLocation": [ - 48.09, - 81.01 - ], - "inResponseTo": 4, - "messageId": 3, - "authorId": 2, - "message": " like product-y the plan is amazing" - } - } - ], - "uid": 2 - } ] - -As we can see from the above query result, each group in the example query's output has an associated group -variable value called `msgs` that appears in the `SELECT *`'s result. -This variable contains a collection of objects associated with the group; each of the group's `message` values -appears in the `msg` field of the objects in the `msgs` collection. - -The group variable in the query language makes more complex, composable, nested subqueries over a group possible, which is -important given the language's more complex data model (relative to SQL). -As a simple example of this, as we really just want the messages associated with each user, we might wish to avoid -the "extra wrapping" of each message as the `msg` field of an object. -(That wrapping is useful in more complex cases, but is essentially just in the way here.) -We can use a subquery in the `SELECT` clause to tunnel through the extra nesting and produce the desired result. - -##### Example - - SELECT uid, (SELECT VALUE g.msg FROM g) AS msgs - FROM GleambookMessages gbm - GROUP BY gbm.authorId AS uid - GROUP AS g(gbm as msg); - -This variant of the example query returns: - - [ { - "msgs": [ - { - "senderLocation": [ - 38.97, - 77.49 - ], - "inResponseTo": 1, - "messageId": 11, - "authorId": 1, - "message": " can't stand acast its plan is terrible" - }, - { - "senderLocation": [ - 41.66, - 80.87 - ], - "inResponseTo": 4, - "messageId": 2, - "authorId": 1, - "message": " dislike x-phone its touch-screen is horrible" - }, - { - "senderLocation": [ - 37.73, - 97.04 - ], - "inResponseTo": 2, - "messageId": 4, - "authorId": 1, - "message": " can't stand acast the network is horrible:(" - }, - { - "senderLocation": [ - 40.33, - 80.87 - ], - "inResponseTo": 11, - "messageId": 8, - "authorId": 1, - "message": " like ccast the 3G is awesome:)" - }, - { - "senderLocation": [ - 42.5, - 70.01 - ], - "inResponseTo": 12, - "messageId": 10, - "authorId": 1, - "message": " can't stand product-w the touch-screen is terrible" - } - ], - "uid": 1 - }, { - "msgs": [ - { - "senderLocation": [ - 31.5, - 75.56 - ], - "inResponseTo": 1, - "messageId": 6, - "authorId": 2, - "message": " like product-z its platform is mind-blowing" - }, - { - "senderLocation": [ - 48.09, - 81.01 - ], - "inResponseTo": 4, - "messageId": 3, - "authorId": 2, - "message": " like product-y the plan is amazing" - } - ], - "uid": 2 - } ] - -The next example shows a more interesting case involving the use of a subquery in the `SELECT` list. -Here the subquery further processes the groups. -There is no renaming in the declaration of the group variable `g` such that -`g` only has one field `gbm` which comes from the `FROM` clause. - -##### Example - - SELECT uid, - (SELECT VALUE g.gbm - FROM g - WHERE g.gbm.message LIKE '% like%' - ORDER BY g.gbm.messageId - LIMIT 2) AS msgs - FROM GleambookMessages gbm - GROUP BY gbm.authorId AS uid - GROUP AS g; - -This example query returns: - - [ { - "msgs": [ - { - "senderLocation": [ - 40.33, - 80.87 - ], - "inResponseTo": 11, - "messageId": 8, - "authorId": 1, - "message": " like ccast the 3G is awesome:)" - } - ], - "uid": 1 - }, { - "msgs": [ - { - "senderLocation": [ - 48.09, - 81.01 - ], - "inResponseTo": 4, - "messageId": 3, - "authorId": 2, - "message": " like product-y the plan is amazing" - }, - { - "senderLocation": [ - 31.5, - 75.56 - ], - "inResponseTo": 1, - "messageId": 6, - "authorId": 2, - "message": " like product-z its platform is mind-blowing" - } - ], - "uid": 2 - } ] - -### Implicit Grouping Key Variables -In the query language syntax, providing named binding variables for `GROUP BY` key expressions is optional. -If a grouping key is missing a user-provided binding variable, the underlying compiler will generate one. -Automatic grouping key variable naming falls into three cases, much like the treatment of unnamed projections: - - * If the grouping key expression is a variable reference expression, the generated variable gets the same name as the referred variable; - * If the grouping key expression is a field access expression, the generated variable gets the same name as the last identifier in the expression; - * For all other cases, the compiler generates a unique variable (but the user query is unable to refer to this generated variable). - -The next example illustrates a query that doesn't provide binding variables for its grouping key expressions. - -##### Example - - SELECT authorId, - (SELECT VALUE g.gbm - FROM g - WHERE g.gbm.message LIKE '% like%' - ORDER BY g.gbm.messageId - LIMIT 2) AS msgs - FROM GleambookMessages gbm - GROUP BY gbm.authorId - GROUP AS g; - -This query returns: - - [ { - "msgs": [ - { - "senderLocation": [ - 40.33, - 80.87 - ], - "inResponseTo": 11, - "messageId": 8, - "authorId": 1, - "message": " like ccast the 3G is awesome:)" - } - ], - "authorId": 1 - }, { - "msgs": [ - { - "senderLocation": [ - 48.09, - 81.01 - ], - "inResponseTo": 4, - "messageId": 3, - "authorId": 2, - "message": " like product-y the plan is amazing" - }, - { - "senderLocation": [ - 31.5, - 75.56 - ], - "inResponseTo": 1, - "messageId": 6, - "authorId": 2, - "message": " like product-z its platform is mind-blowing" - } - ], - "authorId": 2 - } ] - -Based on the three variable generation rules, the generated variable for the grouping key expression `message.authorId` -is `authorId` (which is how it is referred to in the example's `SELECT` clause). - -### Implicit Group Variables -The group variable itself is also optional in the `GROUP BY` syntax. -If a user's query does not declare the name and structure of the group variable using `GROUP AS`, -the query compiler will generate a unique group variable whose fields include all of the binding -variables defined in the `FROM` clause of the current enclosing `SELECT` statement. -In this case the user's query will not be able to refer to the generated group variable, -but is able to call SQL-92 aggregation functions as in SQL-92. - - -### Aggregation Functions -In the traditional SQL, which doesn't support nested data, grouping always also involves the use of aggregation -to compute properties of the groups (for example, the average number of messages per user rather than the actual set -of messages per user). -Each aggregation function in the query language takes a collection (for example, the group of messages) as its input and produces -a scalar value as its output. -These aggregation functions, being truly functional in nature (unlike in SQL), can be used anywhere in a -query where an expression is allowed. -The following table catalogs the built-in aggregation functions of the query language and also indicates how each one handles -`NULL`/`MISSING` values in the input collection or a completely empty input collection: - -| Function | NULL | MISSING | Empty Collection | -|----------------|--------------|--------------|------------------| -| STRICT_COUNT | counted | counted | 0 | -| STRICT_SUM | returns NULL | returns NULL | returns NULL | -| STRICT_MAX | returns NULL | returns NULL | returns NULL | -| STRICT_MIN | returns NULL | returns NULL | returns NULL | -| STRICT_AVG | returns NULL | returns NULL | returns NULL | -| STRICT_STDDEV_SAMP | returns NULL | returns NULL | returns NULL | -| STRICT_STDDEV_POP | returns NULL | returns NULL | returns NULL | -| STRICT_VAR_SAMP | returns NULL | returns NULL | returns NULL | -| STRICT_VAR_POP | returns NULL | returns NULL | returns NULL | -| STRICT_SKEWNESS | returns NULL | returns NULL | returns NULL | -| STRICT_KURTOSIS | returns NULL | returns NULL | returns NULL | -| ARRAY_COUNT | not counted | not counted | 0 | -| ARRAY_SUM | ignores NULL | ignores NULL | returns NULL | -| ARRAY_MAX | ignores NULL | ignores NULL | returns NULL | -| ARRAY_MIN | ignores NULL | ignores NULL | returns NULL | -| ARRAY_AVG | ignores NULL | ignores NULL | returns NULL | -| ARRAY_STDDEV_SAMP | ignores NULL | ignores NULL | returns NULL | -| ARRAY_STDDEV_POP | ignores NULL | ignores NULL | returns NULL | -| ARRAY_VAR_SAMP | ignores NULL | ignores NULL | returns NULL | -| ARRAY_VAR_POP | ignores NULL | ignores NULL | returns NULL | -| ARRAY_SKEWNESS | ignores NULL | ignores NULL | returns NULL | -| ARRAY_KURTOSIS | ignores NULL | ignores NULL | returns NULL | - -Notice that the query language offers two versions for each of the aggregate functions listed above. -For each function, the STRICT version handles `UNKNOWN` values in a semantically strict fashion, -where unknown values in the input result in unknown values in the output; and the ARRAY version -handles them in the ad hoc "just ignore the unknown values" fashion that the SQL standard chose to adopt. - -##### Example - - ARRAY_AVG( - ( - SELECT VALUE ARRAY_COUNT(friendIds) FROM GleambookUsers - ) - ); - -This example returns: - - 3.3333333333333335 - -##### Example - - SELECT uid AS uid, ARRAY_COUNT(grp) AS msgCnt - FROM GleambookMessages message - GROUP BY message.authorId AS uid - GROUP AS grp(message AS msg); - -This query returns: - - [ { - "uid": 1, - "msgCnt": 5 - }, { - "uid": 2, - "msgCnt": 2 - } ] - -Notice how the query forms groups where each group involves a message author and their messages. -(SQL cannot do this because the grouped intermediate result is non-1NF in nature.) -The query then uses the collection aggregate function ARRAY_COUNT to get the cardinality of each -group of messages. - -Each aggregation function in the query language supports the DISTINCT modifier that removes duplicate values from -the input collection. - -##### Example - - ARRAY_SUM(DISTINCT [1, 1, 2, 2, 3]) - -This query returns: - - 6 - -### SQL-92 Aggregation Functions -For compatibility with the traditional SQL aggregation functions, the query language also offers SQL-92's -aggregation function symbols (`COUNT`, `SUM`, `MAX`, `MIN`, `AVG`, `ARRAY_AGG`, `STDDEV_SAMP`, `STDDEV_POP`, `VAR_SAMP`, -`VAR_POP`) as supported syntactic sugar. -The query compiler rewrites queries that utilize these function symbols into queries that only -use the collection aggregate functions of the query language. The following example uses the SQL-92 syntax approach -to compute a result that is identical to that of the more explicit example above: - -##### Example - - SELECT uid, COUNT(*) AS msgCnt - FROM GleambookMessages msg - GROUP BY msg.authorId AS uid; - -It is important to realize that `COUNT` is actually **not** a built-in aggregation function. -Rather, the `COUNT` query above is using a special "sugared" function symbol that the query compiler -will rewrite as follows: - - SELECT uid AS uid, ARRAY_COUNT( (SELECT VALUE 1 FROM `$1` as g) ) AS msgCnt - FROM GleambookMessages msg - GROUP BY msg.authorId AS uid - GROUP AS `$1`(msg AS msg); - - -The same sort of rewritings apply to the function symbols `SUM`, `MAX`, `MIN`, `AVG`, `ARRAY_AGG`,`STDDEV_SAMP`, -`STDDEV_POP`, `VAR_SAMP`, and `VAR_POP`. -In contrast to the collection aggregate functions of the query language, these special SQL-92 function symbols -can only be used in the same way they are in standard SQL (i.e., with the same restrictions). - -The DISTINCT modifier is also supported for these aggregate functions. - -The following table shows the SQL-92 functions supported by the query language, their aliases where available, -and their corresponding built-in functions. - -| SQL-92 Function | Aliases | Corresponding Built-in Function | -|-----------------|-------------------------|---------------------------------| -| COUNT | | ARRAY_COUNT | -| SUM | | ARRAY_SUM | -| MAX | | ARRAY_MAX | -| MIN | | ARRAY_MIN | -| AVG | | ARRAY_AVG | -| ARRAY_AGG | | (none) | -| STDDEV_SAMP | STDDEV | ARRAY_STDDEV_SAMP | -| STDDEV_POP | | ARRAY_STDDEV_POP | -| VAR_SAMP | VARIANCE, VARIANCE_SAMP | ARRAY_VAR_SAMP | -| VAR_POP | VARIANCE_POP | ARRAY_VAR_POP | - -Note that the `ARRAY_AGG` function symbol is rewritten simply to return the result of the generated subquery, -without applying any built-in function. - -### SQL-92 Compliant GROUP BY Aggregations -The query language provides full support for SQL-92 `GROUP BY` aggregation queries. -The following query is such an example: - -##### Example - - SELECT msg.authorId, COUNT(*) - FROM GleambookMessages msg - GROUP BY msg.authorId; - -This query outputs: - - [ { - "authorId": 1, - "$1": 5 - }, { - "authorId": 2, - "$1": 2 - } ] - -In principle, a `msg` reference in the query's `SELECT` clause would be "sugarized" as a collection -(as described in [Implicit Group Variables](#Implicit_group_variables)). -However, since the SELECT expression `msg.authorId` is syntactically identical to a GROUP BY key expression, -it will be internally replaced by the generated group key variable. -The following is the equivalent rewritten query that will be generated by the compiler for the query above: - - SELECT authorId AS authorId, ARRAY_COUNT( (SELECT g.msg FROM `$1` AS g) ) - FROM GleambookMessages msg - GROUP BY msg.authorId AS authorId - GROUP AS `$1`(msg AS msg); - -### Column Aliases -The query language also allows column aliases to be used as `ORDER BY` keys. - -##### Example - - SELECT msg.authorId AS aid, COUNT(*) - FROM GleambookMessages msg - GROUP BY msg.authorId; - ORDER BY aid; - -This query returns: - - [ { - "$1": 5, - "aid": 1 - }, { - "$1": 2, - "aid": 2 - } ] - -## WHERE Clauses and HAVING Clauses -Both `WHERE` clauses and `HAVING` clauses are used to filter input data based on a condition expression. -Only tuples for which the condition expression evaluates to `TRUE` are propagated. -Note that if the condition expression evaluates to `NULL` or `MISSING` the input tuple will be discarded. - -## ORDER BY Clauses -The `ORDER BY` clause is used to globally sort data in either ascending order (i.e., `ASC`) or descending order (i.e., `DESC`). -During ordering, `MISSING` and `NULL` are treated as being smaller than any other value if they are encountered -in the ordering key(s). `MISSING` is treated as smaller than `NULL` if both occur in the data being sorted. -The ordering of values of a given type is consistent with its type's <= ordering; the ordering of values across types is implementation-defined but stable. -The following example returns all `GleambookUsers` in descending order by their number of friends. - -##### Example - - SELECT VALUE user - FROM GleambookUsers AS user - ORDER BY ARRAY_COUNT(user.friendIds) DESC; - -This query returns: - - [ { - "userSince": "2012-08-20T10:10:00.000Z", - "friendIds": [ - 2, - 3, - 6, - 10 - ], - "gender": "F", - "name": "MargaritaStoddard", - "nickname": "Mags", - "alias": "Margarita", - "id": 1, - "employment": [ - { - "organizationName": "Codetechno", - "start-date": "2006-08-06" - }, - { - "end-date": "2010-01-26", - "organizationName": "geomedia", - "start-date": "2010-06-17" - } - ] - }, { - "userSince": "2012-07-10T10:10:00.000Z", - "friendIds": [ - 1, - 5, - 8, - 9 - ], - "name": "EmoryUnk", - "alias": "Emory", - "id": 3, - "employment": [ - { - "organizationName": "geomedia", - "endDate": "2010-01-26", - "startDate": "2010-06-17" - } - ] - }, { - "userSince": "2011-01-22T10:10:00.000Z", - "friendIds": [ - 1, - 4 - ], - "name": "IsbelDull", - "nickname": "Izzy", - "alias": "Isbel", - "id": 2, - "employment": [ - { - "organizationName": "Hexviafind", - "startDate": "2010-04-27" - } - ] - } ] - -## LIMIT Clauses -The `LIMIT` clause is used to limit the result set to a specified constant size. -The use of the `LIMIT` clause is illustrated in the next example. - -##### Example - - SELECT VALUE user - FROM GleambookUsers AS user - ORDER BY len(user.friendIds) DESC - LIMIT 1; - -This query returns: - - [ { - "userSince": "2012-08-20T10:10:00.000Z", - "friendIds": [ - 2, - 3, - 6, - 10 - ], - "gender": "F", - "name": "MargaritaStoddard", - "nickname": "Mags", - "alias": "Margarita", - "id": 1, - "employment": [ - { - "organizationName": "Codetechno", - "start-date": "2006-08-06" - }, - { - "end-date": "2010-01-26", - "organizationName": "geomedia", - "start-date": "2010-06-17" - } - ] - } ] - -## WITH Clauses -As in standard SQL, `WITH` clauses are available to improve the modularity of a query. -The next query shows an example. - -##### Example - - WITH avgFriendCount AS ( - SELECT VALUE AVG(ARRAY_COUNT(user.friendIds)) - FROM GleambookUsers AS user - )[0] - SELECT VALUE user - FROM GleambookUsers user - WHERE ARRAY_COUNT(user.friendIds) > avgFriendCount; - -This query returns: - - [ { - "userSince": "2012-08-20T10:10:00.000Z", - "friendIds": [ - 2, - 3, - 6, - 10 - ], - "gender": "F", - "name": "MargaritaStoddard", - "nickname": "Mags", - "alias": "Margarita", - "id": 1, - "employment": [ - { - "organizationName": "Codetechno", - "start-date": "2006-08-06" - }, - { - "end-date": "2010-01-26", - "organizationName": "geomedia", - "start-date": "2010-06-17" - } - ] - }, { - "userSince": "2012-07-10T10:10:00.000Z", - "friendIds": [ - 1, - 5, - 8, - 9 - ], - "name": "EmoryUnk", - "alias": "Emory", - "id": 3, - "employment": [ - { - "organizationName": "geomedia", - "endDate": "2010-01-26", - "startDate": "2010-06-17" - } - ] - } ] - -The query is equivalent to the following, more complex, inlined form of the query: - - SELECT * - FROM GleambookUsers user - WHERE ARRAY_COUNT(user.friendIds) > - ( SELECT VALUE AVG(ARRAY_COUNT(user.friendIds)) - FROM GleambookUsers AS user - ) [0]; - -WITH can be particularly useful when a value needs to be used several times in a query. - -Before proceeding further, notice that both the WITH query and its equivalent inlined variant -include the syntax "[0]" -- this is due to a noteworthy difference between the query language and SQL-92. -In SQL-92, whenever a scalar value is expected and it is being produced by a query expression, -the SQL-92 query processor will evaluate the expression, check that there is only one row and column -in the result at runtime, and then coerce the one-row/one-column tabular result into a scalar value. -A JSON query language, being designed to deal with nested data and schema-less data, should not do this. -Collection-valued data is perfectly legal in most contexts, and its data is schema-less, -so the query processor rarely knows exactly what to expect where and such automatic conversion would often -not be desirable. Thus, in the queries above, the use of "[0]" extracts the first (i.e., 0th) element of -an array-valued query expression's result; this is needed above, even though the result is an array of one -element, to extract the only element in the singleton array and obtain the desired scalar for the comparison. - -## LET Clauses -Similar to `WITH` clauses, `LET` clauses can be useful when a (complex) expression is used several times within a query, allowing it to be written once to make the query more concise. The next query shows an example. - -##### Example - - SELECT u.name AS uname, messages AS messages - FROM GleambookUsers u - LET messages = (SELECT VALUE m - FROM GleambookMessages m - WHERE m.authorId = u.id) - WHERE EXISTS messages; - -This query lists `GleambookUsers` that have posted `GleambookMessages` and shows all authored messages for each listed user. It returns: - - [ { - "uname": "MargaritaStoddard", - "messages": [ - { - "senderLocation": [ - 38.97, - 77.49 - ], - "inResponseTo": 1, - "messageId": 11, - "authorId": 1, - "message": " can't stand acast its plan is terrible" - }, - { - "senderLocation": [ - 41.66, - 80.87 - ], - "inResponseTo": 4, - "messageId": 2, - "authorId": 1, - "message": " dislike x-phone its touch-screen is horrible" - }, - { - "senderLocation": [ - 37.73, - 97.04 - ], - "inResponseTo": 2, - "messageId": 4, - "authorId": 1, - "message": " can't stand acast the network is horrible:(" - }, - { - "senderLocation": [ - 40.33, - 80.87 - ], - "inResponseTo": 11, - "messageId": 8, - "authorId": 1, - "message": " like ccast the 3G is awesome:)" - }, - { - "senderLocation": [ - 42.5, - 70.01 - ], - "inResponseTo": 12, - "messageId": 10, - "authorId": 1, - "message": " can't stand product-w the touch-screen is terrible" - } - ] - }, { - "uname": "IsbelDull", - "messages": [ - { - "senderLocation": [ - 31.5, - 75.56 - ], - "inResponseTo": 1, - "messageId": 6, - "authorId": 2, - "message": " like product-z its platform is mind-blowing" - }, - { - "senderLocation": [ - 48.09, - 81.01 - ], - "inResponseTo": 4, - "messageId": 3, - "authorId": 2, - "message": " like product-y the plan is amazing" - } - ] - } ] - -This query is equivalent to the following query that does not use the `LET` clause: - - SELECT u.name AS uname, ( SELECT VALUE m - FROM GleambookMessages m - WHERE m.authorId = u.id - ) AS messages - FROM GleambookUsers u - WHERE EXISTS ( SELECT VALUE m - FROM GleambookMessages m - WHERE m.authorId = u.id - ); - -## UNION ALL -UNION ALL can be used to combine two input arrays or multisets into one. As in SQL, there is no ordering guarantee -on the contents of the output stream. -However, unlike SQL, the query language does not constrain what the data looks like on the input streams; in particular, -it allows heterogeneity on the input and output streams. -A type error will be raised if one of the inputs is not a collection. -The following odd but legal query is an example: - -##### Example - - SELECT u.name AS uname - FROM GleambookUsers u - WHERE u.id = 2 - UNION ALL - SELECT VALUE m.message - FROM GleambookMessages m - WHERE authorId=2; - -This query returns: - - [ - " like product-z its platform is mind-blowing" - , { - "uname": "IsbelDull" - }, " like product-y the plan is amazing" - ] - -## OVER Clauses ## - -All window functions must have an OVER clause to define the window partitions, -the order of tuples within those partitions, and the extent of the window frame. -Some window functions take additional window options, which are specified by -modifiers before the OVER clause. - -The query language has a dedicated set of window functions. -Aggregate functions can also be used as window functions, when they are used -with an OVER clause. - -### Window Function Call ### - - WindowFunctionCall ::= WindowFunctionType "(" WindowFunctionArguments ")" - (WindowFunctionOptions)? (Variable )? "(" WindowDefinition ")" - -#### Window Function Type #### - - WindowFunctionType ::= AggregateFunction | WindowFunction - -Refer to the [Aggregate Functions](builtins.html#AggregateFunctions) section -for a list of aggregate functions. - -Refer to the [Window Functions](builtins.html#WindowFunctions) section for a -list of window functions. - -#### Window Function Arguments #### - - WindowFunctionArguments ::= ( ()? Expression | - (Expression ("," Expression ("," Expression)? )? )? ) - -Refer to the [Aggregate Functions](builtins.html#AggregateFunctions) section or -the [Window Functions](builtins.html#WindowFunctions) section for details of -the arguments for individual functions. - -### Window Function Options ### - - WindowFunctionOptions ::= (NthValFrom)? (NullsTreatment)? - -Window function options cannot be used with [aggregate -functions](builtins.html#AggregateFunctions). - -Window function options can only be used with some [window -functions](builtins.html#WindowFunctions), as described below. - -#### Nth Val From #### - - NthValFrom ::= ( | ) - -The **nth val from** modifier determines whether the computation begins at the -first or last tuple in the window. - -This modifier can only be used with the `nth_value()` function. - -This modifier is optional. -If omitted, the default setting is `FROM FIRST`. - -#### Nulls Treatment #### - - NullsTreatment ::= ( | ) - -The **nulls treatment** modifier determines whether NULL values are included in -the computation, or ignored. -MISSING values are treated the same way as NULL values. - -This modifier can only be used with the `first_value()`, `last_value()`, -`nth_value()`, `lag()`, and `lead()` functions. - -This modifier is optional. -If omitted, the default setting is `RESPECT NULLS`. - -### Window Frame Variable ### - -The AS keyword enables you to specify an alias for the window frame contents. -It introduces a variable which will be bound to the contents of the frame. -When using a built-in [aggregate function](builtins.html#AggregateFunctions) as -a window function, the function’s argument must be a subquery which refers to -this alias, for example: - - SELECT ARRAY_COUNT(DISTINCT (FROM alias SELECT VALUE alias.src.field)) - OVER alias AS (PARTITION BY … ORDER BY …) - FROM source AS src - -The alias is not necessary when using a [window function](builtins.html#WindowFunctions), -or when using a standard SQL aggregate function with the OVER clause. - -#### Standard SQL Aggregate Functions with the OVER Clause #### - -A standard SQL aggregate function with an OVER clause is rewritten by the -query compiler using a built-in aggregate function over a frame variable. -For example, the following query with the `sum()` function: - - SELECT SUM(field) OVER (PARTITION BY … ORDER BY …) - FROM source AS src - -Is rewritten as the following query using the `array_sum()` function: - - SELECT ARRAY_SUM( (SELECT VALUE alias.src.field FROM alias) ) - OVER alias AS (PARTITION BY … ORDER BY …) - FROM source AS src - -This is similar to the way that standard SQL aggregate functions are rewritten -as built-in aggregate functions in the presence of the GROUP BY clause. - -### Window Definition ### - - WindowDefinition ::= (WindowPartitionClause)? (WindowOrderClause - (WindowFrameClause (WindowFrameExclusion)? )? )? - -The **window definition** specifies the partitioning, ordering, and framing for -window functions. - -#### Window Partition Clause #### - - WindowPartitionClause ::= Expression ("," Expression)* - -The **window partition clause** divides the tuples into logical partitions -using one or more expressions. - -This clause may be used with any [window function](builtins.html#WindowFunctions), -or any [aggregate function](builtins.html#AggregateFunctions) used as a window -function. - -This clause is optional. -If omitted, all tuples are united in a single partition. - -#### Window Order Clause #### - - WindowOrderClause ::= OrderingTerm ("," OrderingTerm)* - -The **window order clause** determines how tuples are ordered within each -partition. -The window function works on tuples in the order specified by this clause. - -This clause may be used with any [window function](builtins.html#WindowFunctions), -or any [aggregate function](builtins.html#AggregateFunctions) used as a window -function. - -This clause is optional. -If omitted, all tuples are considered peers, i.e. their order is tied. -When tuples in the window partition are tied, each window function behaves -differently. - -* The `row_number()` function returns a distinct number for each tuple. - If tuples are tied, the results may be unpredictable. - -* The `rank()`, `dense_rank()`, `percent_rank()`, and `cume_dist()` functions - return the same result for each tuple. - -* For other functions, if the [window frame](#Window_frame_clause) is - defined by `ROWS`, the results may be unpredictable. - If the window frame is defined by `RANGE` or `GROUPS`, the results are same - for each tuple. - -This clause may have multiple [ordering terms](#Ordering_term). -To reduce the number of ties, add additional [ordering terms](#Ordering_term). - -##### Note ##### - -This clause does not guarantee the overall order of the query results. -To guarantee the order of the final results, use the query ORDER BY clause. - -#### Ordering Term #### - - OrderingTerm ::= Expression ( | )? - -The **ordering term** specifies an ordering expression and collation. - -This clause has the same syntax and semantics as the ordering term for queries. -Refer to the [ORDER BY Clauses](#Order_By_clauses) section for details. - -#### Window Frame Clause #### - - WindowFrameClause ::= ( | | ) WindowFrameExtent - -The **window frame clause** defines the window frame. - -This clause can be used with all -[aggregate functions](builtins.html#AggregateFunctions) -and some [window functions](builtins.html#WindowFunctions) — -refer to the descriptions of individual functions for more details. - -This clause is allowed only when the [window order -clause](#Window_order_clause) is present. - -This clause is optional. - -* If this clause is omitted and there is no [window order - clause](#Window_order_clause), the window frame is the entire partition. - -* If this clause is omitted but there is a [window order - clause](#Window_order_clause), the window frame becomes all tuples - in the partition preceding the current tuple and its peers — the - same as `RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW`. - -The window frame can be defined in the following ways: - -* `ROWS`: Counts the exact number of tuples within the frame. - If window ordering doesn’t result in unique ordering, the function may - produce unpredictable results. - You can add a unique expression or more window ordering expressions to - produce unique ordering. - -* `RANGE`: Looks for a value offset within the frame. - The function produces deterministic results. - -* `GROUPS`: Counts all groups of tied rows within the frame. - The function produces deterministic results. - -##### Note ##### - -If this clause uses `RANGE` with either `Expression PRECEDING` or -`Expression FOLLOWING`, the [window order clause](#Window_order_clause) must -have only a single ordering term. - -The ordering term expression must evaluate to a number. - - -If these conditions are not met, the window frame will be empty, -which means the window function will return its default -value: in most cases this is NULL, except for `strict_count()` or -`array_count()`, whose default value is 0. - -This restriction does not apply when the window frame uses `ROWS` or -`GROUPS`. - -##### Tip ##### - -The `RANGE` window frame is commonly used to define window frames based -on date or time. - -If you want to use `RANGE` with either `Expression PRECEDING` or `Expression -FOLLOWING`, and you want to use an ordering expression based on date or time, -the expression in `Expression PRECEDING` or `Expression FOLLOWING` must use a -data type that can be added to the ordering expression. - -#### Window Frame Extent #### - - WindowFrameExtent ::= ( ( | Expression ) | ) | - - ( | | Expression ( | ) ) - - ( | | Expression ( | ) ) - -The **window frame extent clause** specifies the start point and end point of -the window frame. -The expression before `AND` is the start point and the expression after `AND` -is the end point. -If `BETWEEN` is omitted, you can only specify the start point; the end point -becomes `CURRENT ROW`. - -The window frame end point can’t be before the start point. -If this clause violates this restriction explicitly, an error will result. -If it violates this restriction implicitly, the window frame will be empty, -which means the window function will return its default value: -in most cases this is NULL, except for `strict_count()` or -`array_count()`, whose default value is 0. - -Window frame extents that result in an explicit violation are: - -* `BETWEEN CURRENT ROW AND Expression PRECEDING` - -* `BETWEEN Expression FOLLOWING AND Expression PRECEDING` - -* `BETWEEN Expression FOLLOWING AND CURRENT ROW` - -Window frame extents that result in an implicit violation are: - -* `BETWEEN UNBOUNDED PRECEDING AND Expression PRECEDING` — if `Expression` is - too high, some tuples may generate an empty window frame. - -* `BETWEEN Expression PRECEDING AND Expression PRECEDING` — if the second - `Expression` is greater than or equal to the first `Expression`, - all result sets will generate an empty window frame. - -* `BETWEEN Expression FOLLOWING AND Expression FOLLOWING` — if the first - `Expression` is greater than or equal to the second `Expression`, all result - sets will generate an empty window frame. - -* `BETWEEN Expression FOLLOWING AND UNBOUNDED FOLLOWING` — if `Expression` is - too high, some tuples may generate an empty window frame. - -* If the [window frame exclusion clause](#Window_frame_exclusion) is present, - any window frame specification may result in empty window frame. - -The `Expression` must be a positive constant or an expression that evaluates as -a positive number. -For `ROWS` or `GROUPS`, the `Expression` must be an integer. - -#### Window Frame Exclusion #### - - WindowFrameExclusion ::= ( | | | - ) - -The **window frame exclusion clause** enables you to exclude specified -tuples from the window frame. - -This clause can be used with all -[aggregate functions](builtins.html#AggregateFunctions) -and some [window functions](builtins.html#WindowFunctions) — -refer to the descriptions of individual functions for more details. - -This clause is allowed only when the [window frame -clause](#Window_frame_clause) is present. - -This clause is optional. -If this clause is omitted, the default is no exclusion — -the same as `EXCLUDE NO OTHERS`. - -* `EXCLUDE CURRENT ROW`: If the current tuple is still part of the window - frame, it is removed from the window frame. - -* `EXCLUDE GROUP`: The current tuple and any peers of the current tuple are - removed from the window frame. - -* `EXCLUDE TIES`: Any peers of the current tuple, but not the current tuple - itself, are removed from the window frame. - -* `EXCLUDE NO OTHERS`: No additional tuples are removed from the window frame. - -If the current tuple is already removed from the window frame, then it remains -removed from the window frame. - -## Subqueries -In the query language, an arbitrary subquery can appear anywhere that an expression can appear. -Unlike SQL-92, as was just alluded to, the subqueries in a SELECT list or a boolean predicate need -not return singleton, single-column relations. -Instead, they may return arbitrary collections. -For example, the following query is a variant of the prior group-by query examples; -it retrieves an array of up to two "dislike" messages per user. - -##### Example - - SELECT uid, - (SELECT VALUE m.msg - FROM msgs m - WHERE m.msg.message LIKE '%dislike%' - ORDER BY m.msg.messageId - LIMIT 2) AS msgs - FROM GleambookMessages message - GROUP BY message.authorId AS uid GROUP AS msgs(message AS msg); - -For our sample data set, this query returns: - - [ { - "msgs": [ - { - "senderLocation": [ - 41.66, - 80.87 - ], - "inResponseTo": 4, - "messageId": 2, - "authorId": 1, - "message": " dislike x-phone its touch-screen is horrible" - } - ], - "uid": 1 - }, { - "msgs": [ - - ], - "uid": 2 - } ] - -Note that a subquery, like a top-level `SELECT` statment, always returns a collection -- regardless of where -within a query the subquery occurs -- and again, its result is never automatically cast into a scalar. - -## Differences from SQL-92 -The query language offers the following additional features beyond SQL-92: - - * Fully composable and functional: A subquery can iterate over any intermediate collection and can appear anywhere in a query. - * Schema-free: The query language does not assume the existence of a static schema for any data that it processes. - * Correlated FROM terms: A right-side FROM term expression can refer to variables defined by FROM terms on its left. - * Powerful GROUP BY: In addition to a set of aggregate functions as in standard SQL, the groups created by the `GROUP BY` clause are directly usable in nested queries and/or to obtain nested results. - * Generalized SELECT clause: A SELECT clause can return any type of collection, while in SQL-92, a `SELECT` clause has to return a (homogeneous) collection of objects. - - -The following matrix is a quick "SQL-92 compatibility cheat sheet" for the query language. - -| Feature | The query language | SQL-92 | Why different? | -|----------|--------|-------|------------------| -| SELECT * | Returns nested objects | Returns flattened concatenated objects | Nested collections are 1st class citizens | -| SELECT list | order not preserved | order preserved | Fields in a JSON object are not ordered | -| Subquery | Returns a collection | The returned collection is cast into a scalar value if the subquery appears in a SELECT list or on one side of a comparison or as input to a function | Nested collections are 1st class citizens | -| LEFT OUTER JOIN | Fills in `MISSING`(s) for non-matches | Fills in `NULL`(s) for non-matches | "Absence" is more appropriate than "unknown" here | -| UNION ALL | Allows heterogeneous inputs and output | Input streams must be UNION-compatible and output field names are drawn from the first input stream | Heterogenity and nested collections are common | -| IN constant_expr | The constant expression has to be an array or multiset, i.e., [..,..,...] | The constant collection can be represented as comma-separated items in a paren pair | Nested collections are 1st class citizens | -| String literal | Double quotes or single quotes | Single quotes only | Double quoted strings are pervasive | -| Delimited identifiers | Backticks | Double quotes | Double quoted strings are pervasive | - -The following SQL-92 features are not implemented yet. However, the query language does not conflict with these features: - - * CROSS JOIN, NATURAL JOIN, UNION JOIN - * RIGHT and FULL OUTER JOIN - * INTERSECT, EXCEPT, UNION with set semantics - * CAST expression - * COALESCE expression - * ALL and SOME predicates for linking to subqueries - * UNIQUE predicate (tests a collection for duplicates) - * MATCH predicate (tests for referential integrity) - * Row and Table constructors - * Preserved order for expressions in a SELECT list - - diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/3_query_title.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/3_query_title.md deleted file mode 100644 index bc362607f62..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/3_query_title.md +++ /dev/null @@ -1,25 +0,0 @@ - - -# 3. Queries - -A query can be any legal expression or `SELECT` statement. A query always ends with a semicolon. - - Query ::= (Expression | SelectStatement) ";" - diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/4_error.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/4_error.md deleted file mode 100644 index 18fce14723a..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/4_error.md +++ /dev/null @@ -1,113 +0,0 @@ - - -A query can potentially result in one of the following errors: - - * syntax error, - * identifier resolution error, - * type error, - * resource error. - -If the query processor runs into any error, it will -terminate the ongoing processing of the query and -immediately return an error message to the client. - -## Syntax Errors -A valid query must satisfy the grammar rules of the query language. -Otherwise, a syntax error will be raised. - -##### Example - - SELECT * - GleambookUsers user - -Since the query misses a `FROM` keyword before the dataset `GleambookUsers`, -we will get a syntax error as follows: - - Syntax error: In line 2 >>GleambookUsers user;<< Encountered \"GleambookUsers\" at column 1. - -##### Example - - SELECT * - FROM GleambookUsers user - WHERE type="advertiser"; - -Since "type" is a reserved keyword in the query parser, -we will get a syntax error as follows: - - Error: Syntax error: In line 3 >>WHERE type="advertiser";<< Encountered 'type' "type" at column 7. - ==> WHERE type="advertiser"; - - -## Identifier Resolution Errors -Referring to an undefined identifier can cause an error if the identifier -cannot be successfully resolved as a valid field access. - -##### Example - - SELECT * - FROM GleambookUser user; - -If we have a typo as above in "GleambookUsers" that misses the dataset name's ending "s", -we will get an identifier resolution error as follows: - - Error: Cannot find dataset GleambookUser in dataverse Default nor an alias with name GleambookUser! - -##### Example - - SELECT name, message - FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id; - -If the compiler cannot figure out how to resolve an unqualified field name, which will occur if there is more than one variable in scope (e.g., `GleambookUsers u` and `GleambookMessages m` as above), -we will get an identifier resolution error as follows: - - Error: Cannot resolve ambiguous alias reference for undefined identifier name - - -## Type Errors - -The query compiler does type checks based on its available type information. -In addition, the query runtime also reports type errors if a data model instance -it processes does not satisfy the type requirement. - -##### Example - - abs("123"); - -Since function `abs` can only process numeric input values, -we will get a type error as follows: - - Error: Type mismatch: function abs expects its 1st input parameter to be of type tinyint, smallint, integer, bigint, float or double, but the actual input type is string - - -## Resource Errors -A query can potentially exhaust system resources, such -as the number of open files and disk spaces. -For instance, the following two resource errors could be potentially -be seen when running the system: - - Error: no space left on device - Error: too many open files - -The "no space left on device" issue usually can be fixed by -cleaning up disk spaces and reserving more disk spaces for the system. -The "too many open files" issue usually can be fixed by a system -administrator, following the instructions -[here](https://easyengine.io/tutorials/linux/increase-open-files-limit/). - diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/4_error_title.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/4_error_title.md deleted file mode 100644 index 6279d5ecdc5..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/4_error_title.md +++ /dev/null @@ -1,20 +0,0 @@ - - -# 4. Errors diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_ddl_dataset_index.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_ddl_dataset_index.md deleted file mode 100644 index 9e10aedba55..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_ddl_dataset_index.md +++ /dev/null @@ -1,296 +0,0 @@ - - -## Lifecycle Management Statements - - CreateStatement ::= "CREATE" ( DatabaseSpecification - | TypeSpecification - | DatasetSpecification - | IndexSpecification - | SynonymSpecification - | FunctionSpecification ) - - QualifiedName ::= Identifier ( "." Identifier )? - DoubleQualifiedName ::= Identifier "." Identifier ( "." Identifier )? - -The CREATE statement is used for creating dataverses as well as other persistent artifacts in a dataverse. -It can be used to create new dataverses, datatypes, datasets, indexes, and user-defined query functions. - -### Dataverses - - DatabaseSpecification ::= "DATAVERSE" Identifier IfNotExists - -The CREATE DATAVERSE statement is used to create new dataverses. -To ease the authoring of reusable query scripts, an optional IF NOT EXISTS clause is included to allow -creation to be requested either unconditionally or only if the dataverse does not already exist. -If this clause is absent, an error is returned if a dataverse with the indicated name already exists. - -The following example creates a new dataverse named TinySocial if one does not already exist. - -##### Example - - CREATE DATAVERSE TinySocial IF NOT EXISTS; - -### Types - - TypeSpecification ::= "TYPE" FunctionOrTypeName IfNotExists "AS" ObjectTypeDef - FunctionOrTypeName ::= QualifiedName - IfNotExists ::= ( )? - TypeExpr ::= ObjectTypeDef | TypeReference | ArrayTypeDef | MultisetTypeDef - ObjectTypeDef ::= ( | )? "{" ( ObjectField ( "," ObjectField )* )? "}" - ObjectField ::= Identifier ":" ( TypeExpr ) ( "?" )? - NestedField ::= Identifier ( "." Identifier )* - IndexField ::= NestedField ( ":" TypeReference )? - TypeReference ::= Identifier - ArrayTypeDef ::= "[" ( TypeExpr ) "]" - MultisetTypeDef ::= "{{" ( TypeExpr ) "}}" - -The CREATE TYPE statement is used to create a new named datatype. -This type can then be used to create stored collections or utilized when defining one or more other datatypes. -Much more information about the data model is available in the [data model reference guide](../datamodel.html). -A new type can be a object type, a renaming of another type, an array type, or a multiset type. -A object type can be defined as being either open or closed. -Instances of a closed object type are not permitted to contain fields other than those specified in the create type statement. -Instances of an open object type may carry additional fields, and open is the default for new types if neither option is specified. - -The following example creates a new object type called GleambookUser type. -Since it is defined as (defaulting to) being an open type, -instances will be permitted to contain more than what is specified in the type definition. -The first four fields are essentially traditional typed name/value pairs (much like SQL fields). -The friendIds field is a multiset of integers. -The employment field is an array of instances of another named object type, EmploymentType. - -##### Example - - CREATE TYPE GleambookUserType AS { - id: int, - alias: string, - name: string, - userSince: datetime, - friendIds: {{ int }}, - employment: [ EmploymentType ] - }; - -The next example creates a new object type, closed this time, called MyUserTupleType. -Instances of this closed type will not be permitted to have extra fields, -although the alias field is marked as optional and may thus be NULL or MISSING in legal instances of the type. -Note that the type of the id field in the example is UUID. -This field type can be used if you want to have this field be an autogenerated-PK field. -(Refer to the Datasets section later for more details on such fields.) - -##### Example - - CREATE TYPE MyUserTupleType AS CLOSED { - id: uuid, - alias: string?, - name: string - }; - -### Datasets - - DatasetSpecification ::= ( )? QualifiedName "(" QualifiedName ")" IfNotExists - PrimaryKey ( Identifier )? ( Properties )? - ( "USING" "COMPACTION" "POLICY" CompactionPolicy ( Configuration )? )? - ( Identifier )? - | - QualifiedName "(" QualifiedName ")" IfNotExists AdapterName - Configuration ( Properties )? - ( CompactionPolicy ( Configuration )? )? - AdapterName ::= Identifier - Configuration ::= "(" ( KeyValuePair ( "," KeyValuePair )* )? ")" - KeyValuePair ::= "(" StringLiteral "=" StringLiteral ")" - Properties ::= ( "(" Property ( "," Property )* ")" )? - Property ::= Identifier "=" ( StringLiteral | IntegerLiteral ) - FunctionSignature ::= FunctionOrTypeName "@" IntegerLiteral - PrimaryKey ::= NestedField ( "," NestedField )* ( )? - CompactionPolicy ::= Identifier - -The CREATE DATASET statement is used to create a new dataset. -Datasets are named, multisets of object type instances; -they are where data lives persistently and are the usual targets for queries. -Datasets are typed, and the system ensures that their contents conform to their type definitions. -An Internal dataset (the default kind) is a dataset whose content lives within and is managed by the system. -It is required to have a specified unique primary key field which uniquely identifies the contained objects. -(The primary key is also used in secondary indexes to identify the indexed primary data objects.) - -Internal datasets contain several advanced options that can be specified when appropriate. -One such option is that random primary key (UUID) values can be auto-generated by declaring the field to be UUID and putting "AUTOGENERATED" after the "PRIMARY KEY" identifier. -In this case, unlike other non-optional fields, a value for the auto-generated PK field should not be provided at insertion time by the user since each object's primary key field value will be auto-generated by the system. - -Another advanced option, when creating an Internal dataset, is to specify the merge policy to control which of the -underlying LSM storage components to be merged. -(The system supports Log-Structured Merge tree based physical storage for Internal datasets.) -Currently the system supports four different component merging policies that can be chosen per dataset: -no-merge, constant, prefix, and correlated-prefix. -The no-merge policy simply never merges disk components. -The constant policy merges disk components when the number of components reaches a constant number k that can be configured by the user. -The prefix policy relies on both component sizes and the number of components to decide which components to merge. -It works by first trying to identify the smallest ordered (oldest to newest) sequence of components such that the sequence does not contain a single component that exceeds some threshold size M and that either the sum of the component's sizes exceeds M or the number of components in the sequence exceeds another threshold C. -If such a sequence exists, the components in the sequence are merged together to form a single component. -Finally, the correlated-prefix policy is similar to the prefix policy, but it delegates the decision of merging the disk components of all the indexes in a dataset to the primary index. -When the correlated-prefix policy decides that the primary index needs to be merged (using the same decision criteria as for the prefix policy), then it will issue successive merge requests on behalf of all other indexes associated with the same dataset. -The system's default policy is the prefix policy except when there is a filter on a dataset, where the preferred policy for filters is the correlated-prefix. - -Another advanced option shown in the syntax above, related to performance and mentioned above, is that a **filter** can optionally be created on a field to further optimize range queries with predicates on the filter's field. -Filters allow some range queries to avoid searching all LSM components when the query conditions match the filter. -(Refer to [Filter-Based LSM Index Acceleration](../filters.html) for more information about filters.) - -An External dataset, in contrast to an Internal dataset, has data stored outside of the system's control. -Files living in HDFS or in the local filesystem(s) of a cluster's nodes are currently supported. -External dataset support allows queries to treat foreign data as though it were stored in the system, -making it possible to query "legacy" file data (for example, Hive data) without having to physically import it. -When defining an External dataset, an appropriate adapter type must be selected for the desired external data. -(See the [Guide to External Data](../externaldata.html) for more information on the available adapters.) - -The following example creates an Internal dataset for storing FacefookUserType objects. -It specifies that their id field is their primary key. - -#### Example - - CREATE INTERNAL DATASET GleambookUsers(GleambookUserType) PRIMARY KEY id; - -The next example creates another Internal dataset (the default kind when no dataset kind is specified) for storing MyUserTupleType objects. -It specifies that the id field should be used as the primary key for the dataset. -It also specifies that the id field is an auto-generated field, -meaning that a randomly generated UUID value should be assigned to each incoming object by the system. -(A user should therefore not attempt to provide a value for this field.) -Note that the id field's declared type must be UUID in this case. - -#### Example - - CREATE DATASET MyUsers(MyUserTupleType) PRIMARY KEY id AUTOGENERATED; - -The next example creates an External dataset for querying LineItemType objects. -The choice of the `hdfs` adapter means that this dataset's data actually resides in HDFS. -The example CREATE statement also provides parameters used by the hdfs adapter: -the URL and path needed to locate the data in HDFS and a description of the data format. - -#### Example - - CREATE EXTERNAL DATASET LineItem(LineItemType) USING hdfs ( - ("hdfs"="hdfs://HOST:PORT"), - ("path"="HDFS_PATH"), - ("input-format"="text-input-format"), - ("format"="delimited-text"), - ("delimiter"="|")); - - -### Indices - - IndexSpecification ::= ( Identifier IfNotExists QualifiedName - "(" ( IndexField ) ( "," IndexField )* ")" ( IndexType)? ()?) - | - Identifier? IfNotExists QualifiedName ( )? - IndexType ::= | | | "(" IntegerLiteral ")" - -The CREATE INDEX statement creates a secondary index on one or more fields of a specified dataset. -Supported index types include `BTREE` for totally ordered datatypes, `RTREE` for spatial data, -and `KEYWORD` and `NGRAM` for textual (string) data. -An index can be created on a nested field (or fields) by providing a valid path expression as an index field identifier. - -An indexed field is not required to be part of the datatype associated with a dataset if the dataset's datatype -is declared as open **and** if the field's type is provided along with its name and if the `ENFORCED` keyword is -specified at the end of the index definition. -`ENFORCING` an open field introduces a check that makes sure that the actual type of the indexed field -(if the optional field exists in the object) always matches this specified (open) field type. - -The following example creates a btree index called gbAuthorIdx on the authorId field of the GleambookMessages dataset. -This index can be useful for accelerating exact-match queries, range search queries, and joins involving the author-id -field. - -#### Example - - CREATE INDEX gbAuthorIdx ON GleambookMessages(authorId) TYPE BTREE; - -The following example creates an open btree index called gbSendTimeIdx on the (non-declared) `sendTime` field of the GleambookMessages dataset having datetime type. -This index can be useful for accelerating exact-match queries, range search queries, and joins involving the `sendTime` field. -The index is enforced so that records that do not have the `sendTime` field or have a mismatched type on the field -cannot be inserted into the dataset. - -#### Example - - CREATE INDEX gbSendTimeIdx ON GleambookMessages(sendTime: datetime?) TYPE BTREE ENFORCED; - -The following example creates an open btree index called gbReadTimeIdx on the (non-declared) `readTime` -field of the GleambookMessages dataset having datetime type. -This index can be useful for accelerating exact-match queries, range search queries, -and joins involving the `readTime` field. -The index is not enforced so that records that do not have the `readTime` field or have a mismatched type on the field -can still be inserted into the dataset. - -#### Example - - CREATE INDEX gbReadTimeIdx ON GleambookMessages(readTime: datetime?); - -The following example creates a btree index called crpUserScrNameIdx on screenName, -a nested field residing within a object-valued user field in the ChirpMessages dataset. -This index can be useful for accelerating exact-match queries, range search queries, -and joins involving the nested screenName field. -Such nested fields must be singular, i.e., one cannot index through (or on) an array-valued field. - -#### Example - - CREATE INDEX crpUserScrNameIdx ON ChirpMessages(user.screenName) TYPE BTREE; - -The following example creates an rtree index called gbSenderLocIdx on the sender-location field of the GleambookMessages dataset. This index can be useful for accelerating queries that use the [`spatial-intersect` function](functions.html#spatial-intersect) in a predicate involving the sender-location field. - -#### Example - - CREATE INDEX gbSenderLocIndex ON GleambookMessages("sender-location") TYPE RTREE; - -The following example creates a 3-gram index called fbUserIdx on the name field of the GleambookUsers dataset. This index can be used to accelerate some similarity or substring maching queries on the name field. For details refer to the document on [similarity queries](similarity.html#NGram_Index). - -#### Example - - CREATE INDEX fbUserIdx ON GleambookUsers(name) TYPE NGRAM(3); - -The following example creates a keyword index called fbMessageIdx on the message field of the GleambookMessages dataset. This keyword index can be used to optimize queries with token-based similarity predicates on the message field. For details refer to the document on [similarity queries](similarity.html#Keyword_Index). - -#### Example - - CREATE INDEX fbMessageIdx ON GleambookMessages(message) TYPE KEYWORD; - -The following example creates a special secondary index which holds only the primary keys. -This index is useful for speeding up aggregation queries which involve only primary keys. -The name of the index is optional. If the name is not specified, the system will generate -one. When the user would like to drop this index, the metadata can be queried to find the system-generated name. - -#### Example - - CREATE PRIMARY INDEX gb_pk_idx ON GleambookMessages; - -An example query that can be accelerated using the primary-key index: - - SELECT COUNT(*) FROM GleambookMessages; - -To look up the the above primary-key index, issue the following query: - - SELECT VALUE i - FROM Metadata.`Index` i - WHERE i.DataverseName = "TinySocial" AND i.DatasetName = "GleambookMessages"; - -The query returns: - - [ { "DataverseName": "TinySocial", "DatasetName": "GleambookMessages", "IndexName": "GleambookMessages", "IndexStructure": "BTREE", "SearchKey": [ [ "messageId" ] ], "IsPrimary": true, "Timestamp": "Wed Nov 07 17:25:11 PST 2018", "PendingOp": 0 } - , { "DataverseName": "TinySocial", "DatasetName": "GleambookMessages", "IndexName": "gb_pk_idx", "IndexStructure": "BTREE", "SearchKey": [ ], "IsPrimary": false, "Timestamp": "Wed Nov 07 17:25:11 PST 2018", "PendingOp": 0 } - ] - -Remember that `CREATE PRIMARY INDEX` creates a secondary index. -That is the reason the `IsPrimary` field is false. -The primary-key index can be identified by the fact that the `SearchKey` field is empty since it only contains primary key fields. \ No newline at end of file diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_ddl_dml.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_ddl_dml.md deleted file mode 100644 index 7a2223e6790..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_ddl_dml.md +++ /dev/null @@ -1,92 +0,0 @@ - - -## Modification statements - -### INSERTs - - InsertStatement ::= QualifiedName Query - -The INSERT statement is used to insert new data into a dataset. -The data to be inserted comes from a query expression. -This expression can be as simple as a constant expression, or in general it can be any legal query. -In case the dataset has an auto-generated primary key, when performing an INSERT operation, the system allows the user to manually add the -auto-generated key field in the INSERT statement, or skip that field and the system will automatically generate it and add it. However, -it is important to note that if the a record already exists in the dataset with the auto-generated key provided by the user, then -that operation is going to fail. As a general rule, insertion will fail if the dataset already has data with the primary key value(s) -being inserted. - -Inserts are processed transactionally by the system. -The transactional scope of each insert transaction is the insertion of a single object plus its affiliated secondary index entries (if any). -If the query part of an insert returns a single object, then the INSERT statement will be a single, atomic transaction. -If the query part returns multiple objects, each object being inserted will be treated as a separate tranaction. - -The target dataset name may be a synonym introduced by CREATE SYNONYM statement. - -The following example illustrates a query-based insertion. - -##### Example - - INSERT INTO UsersCopy (SELECT VALUE user FROM GleambookUsers user) - -### UPSERTs - - UpsertStatement ::= QualifiedName Query - -The UPSERT statement syntactically mirrors the INSERT statement discussed above. -The difference lies in its semantics, which for UPSERT are "add or replace" instead of the INSERT "add if not present, else error" semantics. -Whereas an INSERT can fail if another object already exists with the specified key, the analogous UPSERT will replace the previous object's value -with that of the new object in such cases. Like the INSERT statement, the system allows the user to manually provide the auto-generated key -for datasets with an auto-generated key as its primary key. This operation will insert the record if no record with that key already exists, but -if a record with the key already exists, then the operation will be converted to a replace/update operation. - -The target dataset name may be a synonym introduced by CREATE SYNONYM statement. - -The following example illustrates a query-based upsert operation. - -##### Example - - UPSERT INTO UsersCopy (SELECT VALUE user FROM GleambookUsers user) - -*Editor's note: Upserts currently work in AQL but are not yet enabled (at the moment) in the current query language. - -### DELETEs - - DeleteStatement ::= QualifiedName ( ( )? Variable )? ( Expression )? - -The DELETE statement is used to delete data from a target dataset. -The data to be deleted is identified by a boolean expression involving the variable bound to the target dataset in the DELETE statement. - -Deletes are processed transactionally by the system. -The transactional scope of each delete transaction is the deletion of a single object plus its affiliated secondary index entries (if any). -If the boolean expression for a delete identifies a single object, then the DELETE statement itself will be a single, atomic transaction. -If the expression identifies multiple objects, then each object deleted will be handled as a separate transaction. - -The target dataset name may be a synonym introduced by CREATE SYNONYM statement. - -The following examples illustrate single-object deletions. - -##### Example - - DELETE FROM GleambookUsers user WHERE user.id = 8; - -##### Example - - DELETE FROM GleambookUsers WHERE id = 5; - diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_ddl_function_removal.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_ddl_function_removal.md deleted file mode 100644 index 34b611f875c..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_ddl_function_removal.md +++ /dev/null @@ -1,110 +0,0 @@ - - -### Functions - -The CREATE FUNCTION statement creates a **named** function that can then be used and reused in queries. -The body of a function can be any query expression involving the function's parameters. - - FunctionSpecification ::= "FUNCTION" FunctionOrTypeName IfNotExists ParameterList "{" Expression "}" - -The following is an example of a CREATE FUNCTION statement which is similar to our earlier DECLARE FUNCTION example. -It differs from that example in that it results in a function that is persistently registered by name in the specified dataverse (the current dataverse being used, if not otherwise specified). - -##### Example - - CREATE FUNCTION friendInfo(userId) { - (SELECT u.id, u.name, len(u.friendIds) AS friendCount - FROM GleambookUsers u - WHERE u.id = userId)[0] - }; - -### Synonyms - - SynonymSpecification ::= "SYNONYM" QualifiedName "FOR" QualifiedName IfNotExists - -The CREATE SYNONYM statement creates a synonym for a given dataset. -This synonym may be used used instead of the dataset name in SELECT, INSERT, UPSERT, DELETE, and LOAD statements. -The target dataset does not need to exist when the synonym is created. - -##### Example - - CREATE DATASET GleambookUsers(GleambookUserType) PRIMARY KEY id; - - CREATE SYNONYM GleambookUsersSynonym FOR GleambookUsers; - - SELECT * FROM GleambookUsersSynonym; - -More information on how synonyms are resolved can be found in the appendix section on Variable Resolution. - -### Removal - - DropStatement ::= "DROP" ( "DATAVERSE" Identifier IfExists - | "TYPE" FunctionOrTypeName IfExists - | "DATASET" QualifiedName IfExists - | "INDEX" DoubleQualifiedName IfExists - | "SYNONYM" QualifiedName IfExists - | "FUNCTION" FunctionSignature IfExists ) - IfExists ::= ( "IF" "EXISTS" )? - -The DROP statement is the inverse of the CREATE statement. It can be used to drop dataverses, datatypes, datasets, indexes, functions, and synonyms. - -The following examples illustrate some uses of the DROP statement. - -##### Example - - DROP DATASET GleambookUsers IF EXISTS; - - DROP INDEX GleambookMessages.gbSenderLocIndex; - - DROP TYPE TinySocial2.GleambookUserType; - - DROP FUNCTION friendInfo@1; - - DROP SYNONYM GleambookUsersSynonym; - - DROP DATAVERSE TinySocial; - -When an artifact is dropped, it will be droppped from the current dataverse if none is specified -(see the DROP DATASET example above) or from the specified dataverse (see the DROP TYPE example above) -if one is specified by fully qualifying the artifact name in the DROP statement. -When specifying an index to drop, the index name must be qualified by the dataset that it indexes. -When specifying a function to drop, since the query language allows functions to be overloaded by their number of arguments, -the identifying name of the function to be dropped must explicitly include that information. -(`friendInfo@1` above denotes the 1-argument function named friendInfo in the current dataverse.) - -### Load Statement - - LoadStatement ::= QualifiedName AdapterName Configuration ( )? - -The LOAD statement is used to initially populate a dataset via bulk loading of data from an external file. -An appropriate adapter must be selected to handle the nature of the desired external data. -The LOAD statement accepts the same adapters and the same parameters as discussed earlier for External datasets. -(See the [guide to external data](externaldata.html) for more information on the available adapters.) -If a dataset has an auto-generated primary key field, the file to be imported should not include that field in it. - -The target dataset name may be a synonym introduced by CREATE SYNONYM statement. - -The following example shows how to bulk load the GleambookUsers dataset from an external file containing data that has been prepared in ADM (Asterix Data Model) format. - -##### Example - - LOAD DATASET GleambookUsers USING localfs - (("path"="127.0.0.1:///Users/bignosqlfan/tinysocialnew/gbu.adm"),("format"="adm")); - diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_ddl_head.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_ddl_head.md deleted file mode 100644 index 83fa4c9243b..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_ddl_head.md +++ /dev/null @@ -1,36 +0,0 @@ - - -# 5. DDL and DML statements - - Statement ::= ( ( SingleStatement )? ( ";" )+ )* - SingleStatement ::= DatabaseDeclaration - | FunctionDeclaration - | CreateStatement - | DropStatement - | LoadStatement - | SetStatement - | InsertStatement - | DeleteStatement - | Query - -In addition to queries, an implementation of the query language needs to support statements for data definition -and manipulation purposes as well as controlling the context to be used in evaluating query expressions. -This section details the DDL and DML statements supported in the query language as realized today in Apache AsterixDB. - diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_1_keywords.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_1_keywords.md deleted file mode 100644 index d7c02d8db82..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_1_keywords.md +++ /dev/null @@ -1,42 +0,0 @@ - - -All reserved keywords are listed in the following table: - -| | | | | | | -| ----|-----|-------|----|-----|----| -| AND | ANY | APPLY | AS | ASC | AT | -| AUTOGENERATED | BETWEEN | BTREE | BY | CASE | CLOSED | -| CREATE | COMPACTION | COMPACT | CONNECT | CORRELATE | DATASET | -| COLLECTION | DATAVERSE | DECLARE | DEFINITION | DECLARE | DEFINITION | -| DELETE | DESC | DISCONNECT | DISTINCT | DROP | ELEMENT | -| ELEMENT | EXPLAIN | ELSE | ENFORCED | END | EVERY | -| EXCEPT | EXIST | EXTERNAL | FEED | FILTER | FLATTEN | -| FOR | FROM | FULL | FUNCTION | GROUP | HAVING | -| HINTS | IF | INTO | IN | INDEX | INGESTION | -| INNER | INSERT | INTERNAL | INTERSECT | IS | JOIN | -| KEYWORD | LEFT | LETTING | LET | LIKE | LIMIT | -| LOAD | NODEGROUP | NGRAM | NOT | OFFSET | ON | -| OPEN | OR | ORDER | OUTER | OUTPUT | OVER | -| PATH | POLICY | PRE-SORTED | PRIMARY | RAW | REFRESH | -| RETURN | RTREE | RUN | SATISFIES | SECONDARY | SELECT | -| SET | SOME | TEMPORARY | THEN | TYPE | UNKNOWN | -| UNNEST | UPDATE | USE | USING | VALUE | WHEN | -| WHERE | WITH | WRITE | | | | - diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_1_title.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_1_title.md deleted file mode 100644 index 108f6379062..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_1_title.md +++ /dev/null @@ -1,20 +0,0 @@ - - -# Appendix 1. Reserved keywords diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_index_only.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_index_only.md deleted file mode 100644 index 7a7125974d3..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_index_only.md +++ /dev/null @@ -1,37 +0,0 @@ - - -## Controlling Index-Only-Plan Parameter -By default, the system tries to build an index-only plan whenever utilizing a secondary index is possible. -For example, if a SELECT or JOIN query can utilize an enforced B+Tree or R-Tree index on a field, the optimizer -checks whether a secondary-index search alone can generate the result that the query asks for. It -mainly checks two conditions: (1) predicates used in WHERE only uses the primary key field and/or secondary key field -and (2) the result does not return any other fields. If these two conditions hold, it builds an index-only plan. -Since an index-only plan only searches a secondary-index to answer a query, it is faster than -a non-index-only plan that needs to search the primary index. -However, this index-only plan can be turned off per query by setting the following parameter. - -* **noindexonly**: if this is set to true, the index-only-plan will not be applied; the default value is false. - -##### Example - - SET noindexonly 'true'; - - SELECT m.message AS message - FROM GleambookMessages m where m.message = " love product-b its shortcut-menu is awesome:)"; diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_parallel_sort.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_parallel_sort.md deleted file mode 100644 index 350117b3db5..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_parallel_sort.md +++ /dev/null @@ -1,41 +0,0 @@ - - -## Parallel Sort Parameter -The following parameter enables you to activate or deactivate full parallel sort for order-by operations. - -When full parallel sort is inactive (`false`), each existing data partition is sorted (in parallel), -and then all data partitions are merged into a single node. - -When full parallel sort is active (`true`), the data is first sampled, and then repartitioned -so that each partition contains data that is greater than the previous partition. -The data in each partition is then sorted (in parallel), -but the sorted partitions are not merged into a single node. - -* **compiler.sort.parallel**: A boolean specifying whether full parallel sort is active (`true`) or inactive (`false`). - The default value is `true`. - -##### Example - - SET `compiler.sort.parallel` "true"; - - SELECT VALUE user - FROM GleambookUsers AS user - ORDER BY ARRAY_LENGTH(user.friendIds) DESC; - diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_parameters.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_parameters.md deleted file mode 100644 index f87771b2600..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_parameters.md +++ /dev/null @@ -1,97 +0,0 @@ - - -The SET statement can be used to override some cluster-wide configuration parameters for a specific request: - - SET - -As parameter identifiers are qualified names (containing a '.') they have to be escaped using backticks (\`\`). -Note that changing query parameters will not affect query correctness but only impact performance -characteristics, such as response time and throughput. - -## Parallelism Parameter -The system can execute each request using multiple cores on multiple machines (a.k.a., partitioned parallelism) -in a cluster. A user can manually specify the maximum execution parallelism for a request to scale it up and down -using the following parameter: - -* **compiler.parallelism**: the maximum number of CPU cores can be used to process a query. -There are three cases of the value *p* for compiler.parallelism: - - - *p* \< 0 or *p* \> the total number of cores in a cluster: the system will use all available cores in the - cluster; - - - *p* = 0 (the default): the system will use the storage parallelism (the number of partitions of stored datasets) - as the maximum parallelism for query processing; - - - all other cases: the system will use the user-specified number as the maximum number of CPU cores to use for - executing the query. - -##### Example - - SET `compiler.parallelism` "16"; - - SELECT u.name AS uname, m.message AS message - FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id; - -## Memory Parameters -In the system, each blocking runtime operator such as join, group-by and order-by -works within a fixed memory budget, and can gracefully spill to disks if -the memory budget is smaller than the amount of data they have to hold. -A user can manually configure the memory budget of those operators within a query. -The supported configurable memory parameters are: - -* **compiler.groupmemory**: the memory budget that each parallel group-by operator instance can use; - 32MB is the default budget. - -* **compiler.sortmemory**: the memory budget that each parallel sort operator instance can use; - 32MB is the default budget. - -* **compiler.joinmemory**: the memory budget that each parallel hash join operator instance can use; - 32MB is the default budget. - -* **compiler.windowmemory**: the memory budget that each parallel window aggregate operator instance can use; - 32MB is the default budget. - -For each memory budget value, you can use a 64-bit integer value -with a 1024-based binary unit suffix (for example, B, KB, MB, GB). -If there is no user-provided suffix, "B" is the default suffix. See the following examples. - -##### Example - - SET `compiler.groupmemory` "64MB"; - - SELECT msg.authorId, COUNT(*) - FROM GleambookMessages msg - GROUP BY msg.authorId; - -##### Example - - SET `compiler.sortmemory` "67108864"; - - SELECT VALUE user - FROM GleambookUsers AS user - ORDER BY ARRAY_LENGTH(user.friendIds) DESC; - -##### Example - - SET `compiler.joinmemory` "132000KB"; - - SELECT u.name AS uname, m.message AS message - FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id; - diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_title.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_title.md deleted file mode 100644 index 41a3845c8b3..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_title.md +++ /dev/null @@ -1,20 +0,0 @@ - - -## Appendix 2. Performance Tuning diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_3_resolution.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_3_resolution.md deleted file mode 100644 index 988d89f3122..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_3_resolution.md +++ /dev/null @@ -1,273 +0,0 @@ - - -In this Appendix, we'll look at how variables are bound and how names are resolved. -Names can appear in every clause of a query. -Sometimes a name consists of just a single identifier, e.g., `region` or `revenue`. -More often a name will consist of two identifiers separated by a dot, e.g., `customer.address`. -Occasionally a name may have more than two identifiers, e.g., `policy.owner.address.zipcode`. -*Resolving* a name means determining exactly what the (possibly multi-part) name refers to. -It is necessary to have well-defined rules for how to resolve a name in cases of ambiguity. -(In the absence of schemas, such cases arise more commonly, and also differently, than they do in SQL.) - -The basic job of each clause in a query block is to bind variables. -Each clause sees the variables bound by previous clauses and may bind additional variables. -Names are always resolved with respect to the variables that are bound ("in scope") at the place where the name use in question occurs. -It is possible that the name resolution process will fail, which may lead to an empty result or an error message. - -One important bit of background: When the system is reading a query and resolving its names, it has a list of all the available dataverses and datasets. -As a result, it knows whether `a.b` is a valid name for dataset `b` in dataverse `a`. -However, the system does not in general have knowledge of the schemas of the data inside the datasets; remember that this is a much more open world. -As a result, in general the system cannot know whether any object in a particular dataset will have a field named `c`. -These assumptions affect how errors are handled. -If you try to access dataset `a.b` and no dataset by that name exists, you will get an error and your query will not run. -However, if you try to access a field `c` in a collection of objects, your query will run and return `missing` for each object that doesn't have a field named `c` – this is because it’s possible that some object (someday) could have such a field. - -## Binding Variables - -Variables can be bound in the following ways: - -1. WITH and LET clauses bind a variable to the result of an expression in a straightforward way - - Examples: - - `WITH cheap_parts AS (SELECT partno FROM parts WHERE price < 100)` - binds the variable `cheap_parts` to the result of the subquery. - - `LET pay = salary + bonus` - binds the variable `pay` to the result of evaluating the expression `salary + bonus`. - -2. FROM, GROUP BY, and SELECT clauses have optional AS subclauses that contain an expression and a name (called an *iteration variable* in a FROM clause, or an alias in GROUP BY or SELECT.) - - Examples: - - `FROM customer AS c, order AS o` - - `GROUP BY salary + bonus AS total_pay` - - `SELECT MAX(price) AS highest_price` - - An AS subclause always binds the name (as a variable) to the result of the expression (or, in the case of a FROM clause, to the *individual members* of the collection identified by the expression.) - - It's always a good practice to use the keyword AS when defining an alias or iteration variable. - However, as in SQL, the syntax allows the keyword AS to be omitted. - For example, the FROM clause above could have been written like this: - - `FROM customer c, order o` - - Omitting the keyword AS does not affect the binding of variables. - The FROM clause in this example binds variables c and o whether the keyword AS is used or not. - - In certain cases, a variable is automatically bound even if no alias or variable-name is specified. - Whenever an expression could have been followed by an AS subclause, if the expression consists of a simple name or a path expression, that expression binds a variable whose name is the same as the simple name or the last step in the path expression. - Here are some examples: - - `FROM customer, order` binds iteration variables named `customer` and `order` - - `GROUP BY address.zipcode` binds a variable named `zipcode` - - `SELECT item[0].price` binds a variable named `price` - - Note that a FROM clause iterates over a collection (usually a dataset), binding a variable to each member of the collection in turn. - The name of the collection remains in scope, but it is not a variable. - For example, consider this FROM clause used in a self-join: - - `FROM customer AS c1, customer AS c2` - - This FROM clause joins the customer dataset to itself, binding the iteration variables c1 and c2 to objects in the left-hand-side and right-hand-side of the join, respectively. - After the FROM clause, c1 and c2 are in scope as variables, and customer remains accessible as a dataset name but not as a variable. - -3. Special rules for GROUP BY: - - 1. If a GROUP BY clause specifies an expression that has no explicit alias, it binds a pseudo-variable that is lexicographically identical to the expression itself. - For example: - - `GROUP BY salary + bonus` binds a pseudo-variable named `salary + bonus`. - - This rule allows subsequent clauses to refer to the grouping expression (salary + bonus) even though its constituent variables (salary and bonus) are no longer in scope. - For example, the following query is valid: - - FROM employee - GROUP BY salary + bonus - HAVING salary + bonus > 1000 - SELECT salary + bonus, COUNT(*) AS how_many - - While it might have been more elegant to explicitly require an alias in cases like this, the pseudo-variable rule is retained for SQL compatibility. - Note that the expression `salary + bonus` is not *actually* evaluated in the HAVING and SELECT clauses (and could not be since `salary` and `bonus` are no longer individually in scope). - Instead, the expression `salary + bonus` is treated as a reference to the pseudo-variable defined in the GROUP BY clause. - - 2. A GROUP BY clause may be followed by a GROUP AS clause that binds a variable to the group. - The purpose of this variable is to make the individual objects inside the group visible to subqueries that may need to iterate over them. - - The GROUP AS variable is bound to a multiset of objects. - Each object represents one of the members of the group. - Since the group may have been formed from a join, each of the member-objects contains a nested object for each variable bound by the nearest FROM clause (and its LET subclause, if any). - These nested objects, in turn, contain the actual fields of the group-member. - To understand this process, consider the following query fragment: - - FROM parts AS p, suppliers AS s - WHERE p.suppno = s.suppno - GROUP BY p.color GROUP AS g - - Suppose that the objects in `parts` have fields `partno`, `color`, and `suppno`. - Suppose that the objects in suppliers have fields `suppno` and `location`. - - Then, for each group formed by the GROUP BY, the variable g will be bound to a multiset with the following structure: - - [ { "p": { "partno": "p1", "color": "red", "suppno": "s1" }, - "s": { "suppno": "s1", "location": "Denver" } }, - { "p": { "partno": "p2", "color": "red", "suppno": "s2" }, - "s": { "suppno": "s2", "location": "Atlanta" } }, - ... - ] - -## Scoping - -In general, the variables that are in scope at a particular position are those variables that were bound earlier in the current query block, in outer (enclosing) query blocks, or in a WITH clause at the beginning of the query. -More specific rules follow. - -The clauses in a query block are conceptually processed in the following order: - -* FROM (followed by LET subclause, if any) -* WHERE -* GROUP BY (followed by LET subclause, if any) -* HAVING -* SELECT or SELECT VALUE -* ORDER BY -* OFFSET -* LIMIT - -During processing of each clause, the variables that are in scope are those variables that are bound in the following places: - -1. In earlier clauses of the same query block (as defined by the ordering given above). - - Example: `FROM orders AS o SELECT o.date` - The variable `o` in the SELECT clause is bound, in turn, to each object in the dataset `orders`. - -2. In outer query blocks in which the current query block is nested. - In case of duplication, the innermost binding wins. - -3. In the WITH clause (if any) at the beginning of the query. - -However, in a query block where a GROUP BY clause is present: - -1. In clauses processed before GROUP BY, scoping rules are the same as though no GROUP BY were present. - -2. In clauses processed after GROUP BY, the variables bound in the nearest FROM-clause (and its LET subclause, if any) are removed from scope and replaced by the variables bound in the GROUP BY clause (and its LET subclause, if any). - However, this replacement does not apply inside the arguments of the five SQL special aggregating functions (MIN, MAX, AVG, SUM, and COUNT). - These functions still need to see the individual data items over which they are computing an aggregation. - For example, after `FROM employee AS e GROUP BY deptno`, it would not be valid to reference `e.salary`, but `AVG(e.salary)` would be valid. - -Special case: In an expression inside a FROM clause, a variable is in scope if it was bound in an earlier expression in the same FROM clause. -Example: - - FROM orders AS o, o.items AS i - -The reason for this special case is to support iteration over nested collections. - -Note that, since the SELECT clause comes *after* the WHERE and GROUP BY clauses in conceptual processing order, any variables defined in SELECT are not visible in WHERE or GROUP BY. -Therefore the following query will not return what might be the expected result (since in the WHERE clause, `pay` will be interpreted as a field in the `emp` object rather than as the computed value `salary + bonus`): - - SELECT name, salary + bonus AS pay - FROM emp - WHERE pay > 1000 - ORDER BY pay - -The likely intent of the query above can be accomplished as follows: - - FROM emp AS e - LET pay = e.salary + e.bonus - WHERE pay > 1000 - SELECT e.name, pay - ORDER BY pay - -Note that variables defined by `JOIN` subclauses are not visible to other subclauses in the same `FROM` clause. -This also applies to the `FROM` variable that starts the `JOIN` subclause. - -## Resolving Names - -The process of name resolution begins with the leftmost identifier in the name. -The rules for resolving the leftmost identifier are: - -1. _In a FROM clause_: Names in a FROM clause identify the collections over which the query block will iterate. - These collections may be stored datasets or may be the results of nested query blocks. - A stored dataset may be in a named dataverse or in the default dataverse. - Thus, if the two-part name `a.b` is in a FROM clause, a might represent a dataverse and `b` might represent a dataset in that dataverse. - Another example of a two-part name in a FROM clause is `FROM orders AS o, o.items AS i`. - In `o.items`, `o` represents an order object bound earlier in the FROM clause, and items represents the items object inside that order. - - The rules for resolving the leftmost identifier in a FROM clause (including a JOIN subclause), or in the expression following IN in a quantified predicate, are as follows: - - 1. If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable. - (Note that in the case of a subquery, an in-scope variable might have been bound in an outer query block; this is called a correlated subquery.) - - 2. Otherwise, if the identifier is the first part of a two-part name like `a.b`, the name is treated as `dataverse.dataset`. - If the identifier stands alone as a one-part name, it is treated as the name of a dataset in the default dataverse. - If the designated dataset exists then the identifier is resolved to that dataset, - otherwise if a synonym with given name exists then the identifier is resolved to the target dataset of that - synonym (potentially recursively if this synonym points to another synonym). An error will result if the designated - dataset or a synonym with this name does not exist. - - Datasets take precedence over synonyms, so if both a dataset and a synonym have the same name then the - resolution is to the dataset. - -2. _Elsewhere in a query block_: In clauses other than FROM, a name typically identifies a field of some object. - For example, if the expression `a.b` is in a SELECT or WHERE clause, it's likely that `a` represents an object and `b` represents a field in that object. - - The rules for resolving the leftmost identifier in clauses other than the ones listed in Rule 1 are: - - 1. If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable. - (In the case of a correlated subquery, the in-scope variable might have been bound in an outer query block.) - - 2. (The "Single Variable Rule"): Otherwise, if the FROM clause in the current query block binds exactly one variable, the identifier is treated as a field access on the object bound to that variable. - For example, in the query `FROM customer SELECT address`, the identifier address is treated as a field in the object bound to the variable customer. - At runtime, if the object bound to customer has no `address` field, the `address` expression will return `missing`. - If the FROM clause in the current query block binds multiple variables, name resolution fails with an "ambiguous name" error. - If there's no FROM clause in the current query block, name resolution fails with an "undefined identifier" error. - Note that the Single Variable Rule searches for bound variables only in the current query block, not in outer (containing) blocks. - The purpose of this rule is to permit the compiler to resolve field-references unambiguously without relying on any schema information. - Also note that variables defined by LET clauses do not participate in the resolution process performed by this rule. - - Exception: In a query that has a GROUP BY clause, the Single Variable Rule does not apply in any clauses that occur after the GROUP BY because, in these clauses, the variables bound by the FROM clause are no longer in scope. - In clauses after GROUP BY, only Rule 2.1 applies. - -3. In an ORDER BY clause following a UNION ALL expression: - - The leftmost identifier is treated as a field-access on the objects that are generated by the UNION ALL. - For example: - - query-block-1 - UNION ALL - query-block-2 - ORDER BY salary - - In the result of this query, objects that have a foo field will be ordered by the value of this field; objects that have no foo field will appear at at the beginning of the query result (in ascending order) or at the end (in descending order.) - -4. _In a standalone expression_: If a query consists of a standalone expression then identifiers inside that - expression are resolved according to Rule 1. - For example, if the whole query is `ARRAY_COUNT(a.b)` then `a.b` will be treated as dataset `b` contained in - dataverse `a`. - Note that this rule only applies to identifiers which are located directly inside a standalone expression. - Identifiers inside SELECT statements in a standalone expression are still resolved according to Rules 1-3. - For example, if the whole query is `ARRAY_SUM( (FROM employee AS e SELECT VALUE salary) )` then `salary` is resolved - as `e.salary` following the "Single Variable Rule" (Rule 2.2). - -5. Once the leftmost identifier has been resolved, the following dots and identifiers in the name (if any) are treated as a path expression that navigates to a field nested inside that object. - The name resolves to the field at the end of the path. - If this field does not exist, the value `missing` is returned. diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_3_title.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_3_title.md deleted file mode 100644 index ef1202e55f6..00000000000 --- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_3_title.md +++ /dev/null @@ -1,20 +0,0 @@ - - -## Appendix 3. Variable Bindings and Name Resolution From 8b6df7d2840da94bee4898cebce136e5685263f9 Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Fri, 24 Jan 2020 16:26:12 +0000 Subject: [PATCH 09/22] Reorganise main directory Change-Id: I1def33b3a84a27c1b2f44d8cd32be7416160fd44 --- .../src/main/{markdown => }/datamodel/datamodel_composite.md | 0 .../src/main/{markdown => }/datamodel/datamodel_header.md | 0 .../src/main/{markdown => }/datamodel/datamodel_incomplete.md | 0 .../main/{markdown => }/datamodel/datamodel_primitive_common.md | 0 .../main/{markdown => }/datamodel/datamodel_primitive_delta.md | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename asterixdb/asterix-doc/src/main/{markdown => }/datamodel/datamodel_composite.md (100%) rename asterixdb/asterix-doc/src/main/{markdown => }/datamodel/datamodel_header.md (100%) rename asterixdb/asterix-doc/src/main/{markdown => }/datamodel/datamodel_incomplete.md (100%) rename asterixdb/asterix-doc/src/main/{markdown => }/datamodel/datamodel_primitive_common.md (100%) rename asterixdb/asterix-doc/src/main/{markdown => }/datamodel/datamodel_primitive_delta.md (100%) diff --git a/asterixdb/asterix-doc/src/main/markdown/datamodel/datamodel_composite.md b/asterixdb/asterix-doc/src/main/datamodel/datamodel_composite.md similarity index 100% rename from asterixdb/asterix-doc/src/main/markdown/datamodel/datamodel_composite.md rename to asterixdb/asterix-doc/src/main/datamodel/datamodel_composite.md diff --git a/asterixdb/asterix-doc/src/main/markdown/datamodel/datamodel_header.md b/asterixdb/asterix-doc/src/main/datamodel/datamodel_header.md similarity index 100% rename from asterixdb/asterix-doc/src/main/markdown/datamodel/datamodel_header.md rename to asterixdb/asterix-doc/src/main/datamodel/datamodel_header.md diff --git a/asterixdb/asterix-doc/src/main/markdown/datamodel/datamodel_incomplete.md b/asterixdb/asterix-doc/src/main/datamodel/datamodel_incomplete.md similarity index 100% rename from asterixdb/asterix-doc/src/main/markdown/datamodel/datamodel_incomplete.md rename to asterixdb/asterix-doc/src/main/datamodel/datamodel_incomplete.md diff --git a/asterixdb/asterix-doc/src/main/markdown/datamodel/datamodel_primitive_common.md b/asterixdb/asterix-doc/src/main/datamodel/datamodel_primitive_common.md similarity index 100% rename from asterixdb/asterix-doc/src/main/markdown/datamodel/datamodel_primitive_common.md rename to asterixdb/asterix-doc/src/main/datamodel/datamodel_primitive_common.md diff --git a/asterixdb/asterix-doc/src/main/markdown/datamodel/datamodel_primitive_delta.md b/asterixdb/asterix-doc/src/main/datamodel/datamodel_primitive_delta.md similarity index 100% rename from asterixdb/asterix-doc/src/main/markdown/datamodel/datamodel_primitive_delta.md rename to asterixdb/asterix-doc/src/main/datamodel/datamodel_primitive_delta.md From df312b7fc41d215f94afcfef830cf6a804ba2dda Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Fri, 24 Jan 2020 16:26:31 +0000 Subject: [PATCH 10/22] Update asterix-docs POM Change-Id: I1f2d14fee99c6dc42e151d1495fe04d5ac901257 --- asterixdb/asterix-doc/pom.xml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/asterixdb/asterix-doc/pom.xml b/asterixdb/asterix-doc/pom.xml index 2cde42eca81..704ee568eef 100644 --- a/asterixdb/asterix-doc/pom.xml +++ b/asterixdb/asterix-doc/pom.xml @@ -51,17 +51,8 @@ pre-site - - - - - - - - - - + From caa6347fb1adcc6a18e0abca386d7e491c79910a Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Fri, 24 Jan 2020 16:40:04 +0000 Subject: [PATCH 11/22] Correct path to partials Change-Id: I7152de058f6bb8b6f7fa262c689e81770fb59437 --- asterixdb/asterix-doc/src/site/asciidoc/aql/builtins.adoc | 2 +- asterixdb/asterix-doc/src/site/asciidoc/sqlpp/builtins.adoc | 2 +- asterixdb/asterix-doc/src/site/asciidoc/sqlpp/manual.adoc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/asterixdb/asterix-doc/src/site/asciidoc/aql/builtins.adoc b/asterixdb/asterix-doc/src/site/asciidoc/aql/builtins.adoc index dd40dead182..770645d35a2 100644 --- a/asterixdb/asterix-doc/src/site/asciidoc/aql/builtins.adoc +++ b/asterixdb/asterix-doc/src/site/asciidoc/aql/builtins.adoc @@ -1,5 +1,5 @@ = Builtin Functions -:includedir: ../../../shared/modules/builtins/partials +:includedir: ../../shared/modules/builtins/partials :toc: include::{includedir}/0_toc_common.adoc[] diff --git a/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/builtins.adoc b/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/builtins.adoc index ca4aa0fceae..ca42d4be836 100644 --- a/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/builtins.adoc +++ b/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/builtins.adoc @@ -1,5 +1,5 @@ = Builtin Functions -:includedir: ../../../shared/modules/builtins/partials +:includedir: ../../shared/modules/builtins/partials :toc: include::{includedir}/0_toc_common.adoc[] diff --git a/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/manual.adoc b/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/manual.adoc index 0149b81aaf4..75300f8f0d0 100644 --- a/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/manual.adoc +++ b/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/manual.adoc @@ -1,5 +1,5 @@ = The Query Language -:includedir: ../../../shared/modules/sqlpp/partials +:includedir: ../../shared/modules/sqlpp/partials :toc: include::{includedir}/1_intro.adoc[] From caa74681428410354b67543f4b582696ba64bfd2 Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Sat, 25 Jan 2020 12:22:28 +0000 Subject: [PATCH 12/22] Correcting builtin partials Change-Id: I696d2f0700aca45dfddc07d7d75fa07b47b13408 --- .../builtins/partials/10_comparison.adoc | 10 +- .../modules/builtins/partials/11_type.adoc | 54 ++++------- .../modules/builtins/partials/12_misc.adoc | 25 ++--- .../builtins/partials/13_conditional.adoc | 36 +++---- .../modules/builtins/partials/14_window.adoc | 39 +++----- .../modules/builtins/partials/15_bitwise.adoc | 27 ++---- .../builtins/partials/1_numeric_common.adoc | 79 ++++++--------- .../builtins/partials/1_numeric_delta.adoc | 4 +- .../builtins/partials/2_string_common.adoc | 67 +++++-------- .../builtins/partials/2_string_delta.adoc | 19 ++-- .../modules/builtins/partials/3_binary.adoc | 18 ++-- .../modules/builtins/partials/4_spatial.adoc | 40 +++----- .../builtins/partials/5_similarity.adoc | 19 ++-- .../builtins/partials/6_tokenizing.adoc | 7 +- .../modules/builtins/partials/7_allens.adoc | 22 ++--- .../modules/builtins/partials/7_temporal.adoc | 97 +++++++------------ .../modules/builtins/partials/8_record.adoc | 49 ++++------ .../builtins/partials/9_aggregate_aql.adoc | 34 +++---- .../builtins/partials/9_aggregate_sql.adoc | 70 +++++-------- 19 files changed, 248 insertions(+), 468 deletions(-) diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/10_comparison.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/10_comparison.adoc index 663a0d3fdfb..4970fefd1f7 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/10_comparison.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/10_comparison.adoc @@ -1,10 +1,8 @@ [[comparison-functions]] -Comparison Functions -~~~~~~~~~~~~~~~~~~~~ +== Comparison Functions [[greatest]] -greatest -^^^^^^^^ +=== greatest * Syntax: + @@ -37,8 +35,7 @@ items. ------------------------- [[least]] -least -^^^^^ +=== least * Syntax: + @@ -69,3 +66,4 @@ items. ----------------------- { "v1": 1, "v2": -0.5 } ----------------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/11_type.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/11_type.adoc index d7126486016..22c6777e7d1 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/11_type.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/11_type.adoc @@ -1,10 +1,8 @@ [[type-functions]] -Type Functions -~~~~~~~~~~~~~~ +== Type Functions [[is_array]] -is_array -^^^^^^^^ +=== is_array * Syntax: + @@ -43,8 +41,7 @@ value. The function has an alias `isarray`. [[is_atomic-is_atom]] -is_atomic (is_atom) -^^^^^^^^^^^^^^^^^^^ +=== is_atomic (is_atom) * Syntax: + @@ -83,8 +80,7 @@ link:../datamodel.html#PrimitiveTypes[primitive] type. The function has three aliases: `isatomic`, `is_atom`, and `isatom`. [[is_boolean-is_bool]] -is_boolean (is_bool) -^^^^^^^^^^^^^^^^^^^^ +=== is_boolean (is_bool) * Syntax: + @@ -123,8 +119,7 @@ value. The function has three aliases: `isboolean`, `is_bool`, and `isbool`. [[is_number-is_num]] -is_number (is_num) -^^^^^^^^^^^^^^^^^^ +=== is_number (is_num) * Syntax: + @@ -164,8 +159,7 @@ value. The function has three aliases: `isnumber`, `is_num`, and `isnum`. [[is_object-is_obj]] -is_object (is_obj) -^^^^^^^^^^^^^^^^^^ +=== is_object (is_obj) * Syntax: + @@ -203,8 +197,7 @@ false, "h": false, "i": true } The function has three aliases: `isobject`, `is_obj`, and `isobj`. [[is_string-is_str]] -is_string (is_str) -^^^^^^^^^^^^^^^^^^ +=== is_string (is_str) * Syntax: + @@ -243,8 +236,7 @@ value. The function has three aliases: `isstring`, `is_str`, and `isstr`. [[is_null]] -is_null -^^^^^^^ +=== is_null * Syntax: + @@ -271,8 +263,7 @@ is_null(expr) The function has an alias `isnull`. [[is_missing]] -is_missing -^^^^^^^^^^ +=== is_missing * Syntax: + @@ -299,8 +290,7 @@ value. The function has an alias `ismissing`. [[is_unknown]] -is_unknown -^^^^^^^^^^ +=== is_unknown * Syntax: + @@ -328,8 +318,7 @@ value. The function has an alias `isunknown`. [[to_array]] -to_array -^^^^^^^^ +=== to_array * Syntax: + @@ -364,8 +353,7 @@ item is returned The function has an alias `toarray`. [[to_atomic-to_atom]] -to_atomic (to_atom) -^^^^^^^^^^^^^^^^^^^ +=== to_atomic (to_atom) * Syntax: + @@ -406,8 +394,7 @@ result of invoking to_atomic() on the value of that field is returned The function has three aliases: `toatomic`, `to_atom`, and `toatom`. [[to_boolean-to_bool]] -to_boolean (to_bool) -^^^^^^^^^^^^^^^^^^^^ +=== to_boolean (to_bool) * Syntax: + @@ -449,8 +436,7 @@ has no fields, otherwise `true` The function has three aliases: `toboolean`, `to_bool`, and `tobool`. [[to_bigint]] -to_bigint -^^^^^^^^^ +=== to_bigint * Syntax: + @@ -495,8 +481,7 @@ returned The function has an alias `tobigint`. [[to_double]] -to_double -^^^^^^^^^ +=== to_double * Syntax: + @@ -538,8 +523,7 @@ returned The function has an alias `todouble`. [[to_number-to_num]] -to_number (to_num) -^^^^^^^^^^^^^^^^^^ +=== to_number (to_num) * Syntax: + @@ -582,8 +566,7 @@ returned The function has three aliases: `tonumber`, `to_num`, and `tonum`. [[to_object-to_obj]] -to_object (to_obj) -^^^^^^^^^^^^^^^^^^ +=== to_object (to_obj) * Syntax: + @@ -615,8 +598,7 @@ to_object(expr) The function has three aliases: `toobject`, `to_obj`, and `toobj`. [[to_string-to_str]] -to_string (to_str) -^^^^^^^^^^^^^^^^^^ +=== to_string (to_str) * Syntax: + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/12_misc.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/12_misc.adoc index 4b5141ddb9c..827d5921f30 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/12_misc.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/12_misc.adoc @@ -1,10 +1,8 @@ [[miscellaneous-functions]] -Miscellaneous Functions -~~~~~~~~~~~~~~~~~~~~~~~ +== Miscellaneous Functions [[uuid]] -uuid -^^^^ +=== uuid * Syntax: + @@ -18,8 +16,7 @@ uuid() ** a generated, random `uuid`. [[len]] -len -^^^ +=== len * Syntax: + @@ -46,8 +43,7 @@ len(["Hello", "World"]) - [[not]] -not -^^^ +=== not * Syntax: + @@ -75,8 +71,7 @@ not(expr) --------------------------------------- [[random]] -random -^^^^^^ +=== random * Syntax: + @@ -102,8 +97,7 @@ representing the seed number. ----------------------------------------------------------------- [[range]] -range -^^^^^ +=== range * Syntax: + @@ -134,8 +128,7 @@ range(0, 3); -------------- [[switch_case]] -switch_case -^^^^^^^^^^^ +=== switch_case * Syntax: + @@ -191,8 +184,7 @@ switch_case( - [[deep_equal]] -deep_equal -^^^^^^^^^^ +=== deep_equal * Syntax: + @@ -233,3 +225,4 @@ deep_equal( ----- false ----- + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/13_conditional.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/13_conditional.adoc index b0c9166eccc..0b4ba1fd667 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/13_conditional.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/13_conditional.adoc @@ -1,10 +1,8 @@ [[conditional-functions]] -Conditional Functions -~~~~~~~~~~~~~~~~~~~~~ +== Conditional Functions [[if_null-ifnull]] -if_null (ifnull) -^^^^^^^^^^^^^^^^ +=== if_null (ifnull) * Syntax: + @@ -37,8 +35,7 @@ specified The function has an alias `ifnull`. [[if_missing-ifmissing]] -if_missing (ifmissing) -^^^^^^^^^^^^^^^^^^^^^^ +=== if_missing (ifmissing) * Syntax: + @@ -72,8 +69,7 @@ specified The function has an alias `ifmissing`. [[if_missing_or_null-ifmissingornull-coalesce]] -if_missing_or_null (ifmissingornull, coalesce) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== if_missing_or_null (ifmissingornull, coalesce) * Syntax: + @@ -106,8 +102,7 @@ no arguments specified The function has two aliases: `ifmissingornull` and `coalesce`. [[if_inf-ifinf]] -if_inf (ifinf) -^^^^^^^^^^^^^^ +=== if_inf (ifinf) * Syntax: + @@ -146,8 +141,7 @@ encountered before the first non-infinite number argument The function has an alias `ifinf`. [[if_nan-ifnan]] -if_nan (ifnan) -^^^^^^^^^^^^^^ +=== if_nan (ifnan) * Syntax: + @@ -186,8 +180,7 @@ encountered before the first non-`NaN` number argument The function has an alias `ifnan`. [[if_nan_or_inf-ifnanorinf]] -if_nan_or_inf (ifnanorinf) -^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== if_nan_or_inf (ifnanorinf) * Syntax: + @@ -226,8 +219,7 @@ encountered before the first non-infinite and non-`NaN` number argument The function has an alias `ifnanorinf`. [[null_if-nullif]] -null_if (nullif) -^^^^^^^^^^^^^^^^ +=== null_if (nullif) * Syntax: + @@ -262,8 +254,7 @@ or The function has an alias `nullif`. [[missing_if-missingif]] -missing_if (missingif) -^^^^^^^^^^^^^^^^^^^^^^ +=== missing_if (missingif) * Syntax: + @@ -298,8 +289,7 @@ otherwise returns the first argument. The function has an alias `missingif`. [[nan_if-nanif]] -nan_if (nanif) -^^^^^^^^^^^^^^ +=== nan_if (nanif) * Syntax: + @@ -333,8 +323,7 @@ otherwise returns the first argument. The function has an alias `nanif`. [[posinf_if-posinfif]] -posinf_if (posinfif) -^^^^^^^^^^^^^^^^^^^^ +=== posinf_if (posinfif) * Syntax: + @@ -368,8 +357,7 @@ otherwise returns the first argument. The function has an alias `posinfif`. [[neginf_if-neginfif]] -neginf_if (neginfif) -^^^^^^^^^^^^^^^^^^^^ +=== neginf_if (neginfif) * Syntax: + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc index 8e3fa8f660e..f1a82503de3 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc @@ -1,6 +1,5 @@ [[window-functions]] -Window Functions -~~~~~~~~~~~~~~~~ +== Window Functions Window functions are used to compute an aggregate or cumulative value, based on a portion of the tuples selected by a query. For each input @@ -21,8 +20,7 @@ described in the section on link:manual.html#SELECT_statements[SELECT Statements]. [[cume_dist]] -cume_dist -^^^^^^^^^ +=== cume_dist * Syntax: + @@ -101,8 +99,7 @@ FROM GleambookMessages AS m; -------------------- [[dense_rank]] -dense_rank -^^^^^^^^^^ +=== dense_rank * Syntax: + @@ -192,8 +189,7 @@ FROM GleambookMessages AS m; ---------------------- [[first_value]] -first_value -^^^^^^^^^^^ +=== first_value * Syntax: + @@ -301,8 +297,7 @@ FROM GleambookMessages AS m; --------------------------- [[lag]] -lag -^^^ +=== lag * Syntax: + @@ -403,8 +398,7 @@ FROM GleambookMessages AS m; ------------------------------------------------- [[last_value]] -last_value -^^^^^^^^^^ +=== last_value * Syntax: + @@ -519,8 +513,7 @@ window frame would always be the current tuple. This would mean that the longest message would always be the same as the current message. [[lead]] -lead -^^^^ +=== lead * Syntax: + @@ -621,8 +614,7 @@ FROM GleambookMessages AS m; ----------------------------------------------- [[nth_value]] -nth_value -^^^^^^^^^ +=== nth_value * Syntax: + @@ -820,8 +812,7 @@ messages. ➁ The second longest message from this author. [[ntile]] -ntile -^^^^^ +=== ntile * Syntax: + @@ -904,8 +895,7 @@ FROM GleambookMessages AS m; ------------------- [[percent_rank]] -percent_rank -^^^^^^^^^^^^ +=== percent_rank * Syntax: + @@ -983,8 +973,7 @@ FROM GleambookMessages AS m; -------------------- [[rank]] -rank -^^^^ +=== rank * Syntax: + @@ -1076,8 +1065,7 @@ FROM GleambookMessages AS m; ---------------------- [[ratio_to_report]] -ratio_to_report -^^^^^^^^^^^^^^^ +=== ratio_to_report * Syntax: + @@ -1156,8 +1144,7 @@ FROM GleambookMessages AS m; ---------------------------------------- [[row_number]] -row_number -^^^^^^^^^^ +=== row_number * Syntax: + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/15_bitwise.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/15_bitwise.adoc index d5faf446d63..b40820853c0 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/15_bitwise.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/15_bitwise.adoc @@ -1,6 +1,5 @@ [[bitwise-functions]] -Bitwise Functions -~~~~~~~~~~~~~~~~~ +== Bitwise Functions All Bit/Binary functions can only operate on 64-bit signed integers. @@ -14,8 +13,7 @@ Bit (LSB) and bit 32 is the Most Significant Bit (MSB). (MSB) Bit 32 → `0000 0000 0000 0000 0000 0000 0000 0000` ← Bit 1 (LSB) [[bitand]] -bitand -^^^^^^ +=== bitand * Syntax: + @@ -100,8 +98,7 @@ This results in 2 (0010 in binary) because only the 2nd bit from the right is 1 in all three numbers. [[bitclear]] -bitclear -^^^^^^^^ +=== bitclear * Syntax: + @@ -169,8 +166,7 @@ This results in 4 (0*00*1*00*) because bits 1, 2, 4, and 5 changed to zero. [[bitnot]] -bitnot -^^^^^^ +=== bitnot * Syntax: + @@ -213,8 +209,7 @@ This results in -4 (*1111 1111 1111 1111 1111 1111 1111 1100* in binary) because all bits changed. [[bitor]] -bitor -^^^^^ +=== bitor * Syntax: + @@ -286,8 +281,7 @@ This results in 15 (1111 in binary) because there is at least one 1 in each of the four rightmost bits. [[bitset]] -bitset -^^^^^^ +=== bitset * Syntax: + @@ -356,8 +350,7 @@ This results in 15 (*1*11*1* in binary) because bit 1 and 4 changed to ones. [[bitshift]] -bitshift -^^^^^^^^ +=== bitshift * Syntax: + @@ -483,8 +476,7 @@ This results in 1 because the 1-bit wrapped left, around to the Least Significant Digit position. [[bittest]] -bittest -^^^^^^^ +=== bittest * Syntax: + @@ -588,8 +580,7 @@ set to 0 (zero). The function has an alias `isbitset`. [[bitxor]] -bitxor -^^^^^^ +=== bitxor * Syntax: + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_common.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_common.adoc index ae9453fa406..fa88266c8f6 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_common.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_common.adoc @@ -1,10 +1,8 @@ [[numeric-functions]] -Numeric Functions -~~~~~~~~~~~~~~~~~ +== Numeric Functions [[abs]] -abs -^^^ +=== abs * Syntax: + @@ -33,8 +31,7 @@ argument, --------------------------------------------------------------------------- [[acos]] -acos -^^^^ +=== acos * Syntax: + @@ -64,8 +61,7 @@ is in the range of -1 (inclusive) to 1 (inclusive), -------------------------------------------------------------------------------------------------------- [[asin]] -asin -^^^^ +=== asin * Syntax: + @@ -95,8 +91,7 @@ in the range of -1 (inclusive) to 1 (inclusive), --------------------------------------------------------------------------------------------------------- [[atan]] -atan -^^^^ +=== atan * Syntax: + @@ -124,8 +119,7 @@ atan(numeric_value) --------------------------------------------------------------------------------------------------------------------- [[atan2]] -atan2 -^^^^^ +=== atan2 * Syntax: + @@ -157,8 +151,7 @@ atan2(numeric_value1, numeric_value2) ---------------------------------------------------------------- [[ceil]] -ceil -^^^^ +=== ceil * Syntax: + @@ -196,8 +189,7 @@ argument, -------------------------------------------------------------------- [[cos]] -cos -^^^ +=== cos * Syntax: + @@ -225,8 +217,7 @@ cos(numeric_value) --------------------------------------------------------------------------------------------------------------------- [[cosh]] -cosh -^^^^ +=== cosh * Syntax: + @@ -254,8 +245,7 @@ cosh(numeric_value) -------------------------------------------------------------------------------------------------------------------- [[degrees]] -degrees -^^^^^^^ +=== degrees * Syntax: + @@ -284,8 +274,7 @@ type `double`, --------------- [[e]] -e -^ +=== e * Syntax: + @@ -306,8 +295,7 @@ e() --------------------------- [[exp]] -exp -^^^ +=== exp * Syntax: + @@ -335,8 +323,7 @@ exp(numeric_value) ---------------------------------------------------------------------------------------------------------- [[floor]] -floor -^^^^^ +=== floor * Syntax: + @@ -374,8 +361,7 @@ argument, -------------------------------------------------------------------- [[ln]] -ln -^^ +=== ln * Syntax: + @@ -403,8 +389,7 @@ ln(numeric_value) -------------------------------------------------------------------------------------------------------------- [[log]] -log -^^^ +=== log * Syntax: + @@ -432,8 +417,7 @@ log(numeric_value) ------------------------------------------------------------------------------------------------ [[pi]] -pi -^^ +=== pi * Syntax: + @@ -454,8 +438,7 @@ pi() --------------------------- [[power]] -power -^^^^^ +=== power * Syntax: + @@ -486,8 +469,7 @@ power(numeric_value1, numeric_value2) ---------------------------------------------- [[radians]] -radians -^^^^^^^ +=== radians * Syntax: + @@ -516,8 +498,7 @@ type `double`, --------------------------- [[round]] -round -^^^^^ +=== round * Syntax: + @@ -571,8 +552,7 @@ following type: ----------------------------------------------------------------------------------------------------------------------- [[sign]] -sign -^^^^ +=== sign * Syntax: + @@ -601,8 +581,7 @@ sign(numeric_value) ------------------------------------------------ [[sin]] -sin -^^^ +=== sin * Syntax: + @@ -630,8 +609,7 @@ sin(numeric_value) -------------------------------------------------------------------------------------------------------------------- [[sinh]] -sinh -^^^^ +=== sinh * Syntax: + @@ -659,8 +637,7 @@ sinh(numeric_value) -------------------------------------------------------------------------------------------------------------------- [[sqrt]] -sqrt -^^^^ +=== sqrt * Syntax: + @@ -688,8 +665,7 @@ sqrt(numeric_value) ------------------------------------------------------------------------------------------------------ [[tan]] -tan -^^^ +=== tan * Syntax: + @@ -717,8 +693,7 @@ tan(numeric_value) --------------------------------------------------------------------------------------------------------------------- [[tanh]] -tanh -^^^^ +=== tanh * Syntax: + @@ -746,8 +721,7 @@ tanh(numeric_value) ------------------------------------------------------------------------------------------------------------------- [[trunc]] -trunc -^^^^^ +=== trunc * Syntax: + @@ -780,3 +754,4 @@ non-integer, and non-bigint value. ------------------------------------------------------------ { "v1": 1, "v2": 2, "v3": 0.12, "v4": 10.0, "v5": 1000.525 } ------------------------------------------------------------ + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_delta.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_delta.adoc index 01ea20333d1..5adde87a6ff 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_delta.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/1_numeric_delta.adoc @@ -1,6 +1,5 @@ [[round_half_to_even]] -round_half_to_even -^^^^^^^^^^^^^^^^^^ +=== round_half_to_even * Syntax: + @@ -45,3 +44,4 @@ non-integer, or non-bigint value. ----------------------------------------------------------------------------------------------------------------- { "v1": 2013, "v2": -4036, "v3": 1.0, "v4": -2013.0, "v5": -2014.0, "v6": -2013.89, "v7": 2013, "v8": -2013.256 } ----------------------------------------------------------------------------------------------------------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_common.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_common.adoc index a98a9863b91..13e916f7896 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_common.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_common.adoc @@ -1,10 +1,8 @@ [[string-functions]] -String Functions -~~~~~~~~~~~~~~~~ +== String Functions [[concat]] -concat -^^^^^^ +=== concat * Syntax: + @@ -34,8 +32,7 @@ concat("test ", "driven ", "development"); ------------------------- [[contains]] -contains -^^^^^^^^ +=== contains * Syntax: + @@ -70,8 +67,7 @@ index] can be utilized for this function. --------------------------- [[ends_with]] -ends_with -^^^^^^^^^ +=== ends_with * Syntax: + @@ -107,8 +103,7 @@ ending substring. --------------------------- [[initcap-or-title]] -initcap (or title) -^^^^^^^^^^^^^^^^^^ +=== initcap (or title) * Syntax: + @@ -137,8 +132,7 @@ alias called "title". ---------------------------------------------------------- [[length]] -length -^^^^^^ +=== length * Syntax: + @@ -166,8 +160,7 @@ length("test string"); -- [[lower]] -lower -^^^^^ +=== lower * Syntax: + @@ -194,8 +187,7 @@ lower("ASTERIXDB"); ----------- [[ltrim]] -ltrim -^^^^^ +=== ltrim * Syntax: + @@ -225,8 +217,7 @@ ltrim("me like x-phone", "eml"); --------------- [[position]] -position -^^^^^^^^ +=== position * Syntax: + @@ -266,8 +257,7 @@ returns the 1-based position. Below are the aliases for each version: --------------------------------------- [[regexp_contains]] -regexp_contains -^^^^^^^^^^^^^^^ +=== regexp_contains * Syntax: + @@ -308,8 +298,7 @@ multiline (m), case_insensitive (i), and comments and whitespace (x). --------------------------- [[regexp_like]] -regexp_like -^^^^^^^^^^^ +=== regexp_like * Syntax: + @@ -349,8 +338,7 @@ multiline (m), case_insensitive (i), and comments and whitespace (x). --------------------------- [[regexp_position]] -regexp_position -^^^^^^^^^^^^^^^ +=== regexp_position * Syntax: + @@ -398,8 +386,7 @@ appears in `string` (starting at 0), or -1 if it does not appear. ---------------------------------------- [[regexp_replace]] -regexp_replace -^^^^^^^^^^^^^^ +=== regexp_replace * Syntax: + @@ -443,8 +430,7 @@ regexp_replace(" like x-phone the voicemail_service is awesome", " like x-phone" ------------------------------------------------- [[repeat]] -repeat -^^^^^^ +=== repeat * Syntax: + @@ -477,8 +463,7 @@ repeat("test", 3); -------------- [[replace]] -replace -^^^^^^^ +=== replace * Syntax: + @@ -520,8 +505,7 @@ type error, ---------------------------------------------------------- [[reverse]] -reverse -^^^^^^^ +=== reverse * Syntax: + @@ -551,8 +535,7 @@ reverse("hello"); ------- [[rtrim]] -rtrim -^^^^^ +=== rtrim * Syntax: + @@ -585,8 +568,7 @@ rtrim(string[, chars]); ------------------------------------ [[split]] -split -^^^^^ +=== split * Syntax: + @@ -614,8 +596,7 @@ split("test driven development", " "); ----------------------------------- [[starts_with]] -starts_with -^^^^^^^^^^^ +=== starts_with * Syntax: + @@ -651,8 +632,7 @@ starting substring. --------------------------- [[substr]] -substr -^^^^^^ +=== substr * Syntax: + @@ -699,8 +679,7 @@ starting offset is not within string bounds or `length` is negative. The function has an alias `substring`. [[trim]] -trim -^^^^ +=== trim * Syntax: + @@ -730,8 +709,7 @@ trim("i like x-phone", "xphoen"); -------- [[upper]] -upper -^^^^^ +=== upper * Syntax: + @@ -756,3 +734,4 @@ upper("hello") ------- "HELLO" ------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_delta.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_delta.adoc index 6dc453c3b78..27d1e3eae57 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_delta.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_delta.adoc @@ -1,6 +1,5 @@ [[string_concat]] -string_concat -^^^^^^^^^^^^^ +=== string_concat * Syntax: + @@ -33,8 +32,7 @@ string_concat(["ASTERIX", " ", "ROCKS!"]); ---------------- [[string_join]] -string_join -^^^^^^^^^^^ +=== string_join * Syntax: + @@ -71,8 +69,7 @@ string_join(["ASTERIX", "ROCKS~"], "!! "); ------------------ [[string_to_codepoint]] -string_to_codepoint -^^^^^^^^^^^^^^^^^^^ +=== string_to_codepoint * Syntax: + @@ -99,8 +96,7 @@ string_to_codepoint("Hello ASTERIX!"); -------------------------------------------------------------- [[codepoint_to_string]] -codepoint_to_string -^^^^^^^^^^^^^^^^^^^ +=== codepoint_to_string * Syntax: + @@ -132,8 +128,7 @@ codepoint_to_string([72, 101, 108, 108, 111, 32, 65, 83, 84, 69, 82, 73, 88, 33] ---------------- [[substring_before]] -substring_before -^^^^^^^^^^^^^^^^ +=== substring_before * Syntax: + @@ -163,8 +158,7 @@ substring_before(" like x-phone", "x-phone"); -------- [[substring_after]] -substring_after -^^^^^^^^^^^^^^^ +=== substring_after * Syntax: + @@ -190,3 +184,4 @@ substring_after(" like x-phone", "xph"); ----- "one" ----- + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/3_binary.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/3_binary.adoc index 22410f7e9d6..803315d9cca 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/3_binary.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/3_binary.adoc @@ -1,10 +1,8 @@ [[binary-functions]] -Binary Functions -~~~~~~~~~~~~~~~~ +== Binary Functions [[parse_binary]] -parse_binary -^^^^^^^^^^^^ +=== parse_binary * Syntax: + @@ -31,8 +29,7 @@ parse_binary('QXN0ZXJpeAE=',"base64") ]; hex("4173746572697801") ] [[print_binary]] -print_binary -^^^^^^^^^^^^ +=== print_binary * Syntax: + @@ -60,8 +57,7 @@ Currently we support `hex` and `base64` format. -------------------------------------- [[binary_length]] -binary_length -^^^^^^^^^^^^^ +=== binary_length * Syntax: + @@ -84,8 +80,7 @@ binary_length(hex("00AA")) 2 [[sub_binary]] -sub_binary -^^^^^^^^^^ +=== sub_binary * Syntax: + @@ -120,8 +115,7 @@ hex("DD") --------- [[binary_concat]] -binary_concat -^^^^^^^^^^^^^ +=== binary_concat * Syntax: + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/4_spatial.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/4_spatial.adoc index 96f6a300648..35b830d5311 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/4_spatial.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/4_spatial.adoc @@ -1,10 +1,8 @@ [[spatial-functions]] -Spatial Functions -~~~~~~~~~~~~~~~~~ +== Spatial Functions [[create_point]] -create_point -^^^^^^^^^^^^ +=== create_point * Syntax: + @@ -33,8 +31,7 @@ create_point(x, y) ------------------------------- [[create_line]] -create_line -^^^^^^^^^^^ +=== create_line * Syntax: + @@ -64,8 +61,7 @@ create_line(point1, point2) --------------------------------------- [[create_rectangle]] -create_rectangle -^^^^^^^^^^^^^^^^ +=== create_rectangle * Syntax: + @@ -97,8 +93,7 @@ and `point2`, ------------------------------------------------- [[create_circle]] -create_circle -^^^^^^^^^^^^^ +=== create_circle * Syntax: + @@ -130,8 +125,7 @@ provided in `point` and `radius`. ------------------------------------- [[create_polygon]] -create_polygon -^^^^^^^^^^^^^^ +=== create_polygon * Syntax: + @@ -167,8 +161,7 @@ array will cause a type error. --------------------------------------------------------- [[get_xget_y]] -get_x/get_y -^^^^^^^^^^^ +=== get_x/get_y * Syntax: + @@ -195,8 +188,7 @@ get_x(point) or get_y(point) -------------------------------------------- [[get_points]] -get_points -^^^^^^^^^^ +=== get_points * Syntax: + @@ -225,8 +217,7 @@ get_points(create_polygon([1.0,1.0,2.0,2.0,3.0,3.0,4.0,4.0])) -------------------------------------------------------------------------- [[get_centerget_radius]] -get_center/get_radius -^^^^^^^^^^^^^^^^^^^^^ +=== get_center/get_radius * Syntax: + @@ -258,8 +249,7 @@ respectively. ----------------------------------------------------------- [[spatial_distance]] -spatial_distance -^^^^^^^^^^^^^^^^ +=== spatial_distance * Syntax: + @@ -288,8 +278,7 @@ spatial_distance(point("47.44,80.65"), create_point(30.0,70.0)); ------------------ [[spatial_area]] -spatial_area -^^^^^^^^^^^^ +=== spatial_area * Syntax: + @@ -316,8 +305,7 @@ spatial_area(create_circle(create_point(0.0,0.0), 5.0)); ----------- [[spatial_intersect]] -spatial_intersect -^^^^^^^^^^^^^^^^^ +=== spatial_intersect * Syntax: + @@ -349,8 +337,7 @@ true ---- [[spatial_cell]] -spatial_cell -^^^^^^^^^^^^ +=== spatial_cell * Syntax: + @@ -382,3 +369,4 @@ spatial_cell(point("39.28,70.48"), create_point(20.0,50.0), 5.5, 6.0); --------------------------------- rectangle("36.5,68.0 42.0,74.0"); --------------------------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/5_similarity.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/5_similarity.adoc index dd83c9bd482..db5cfe54390 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/5_similarity.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/5_similarity.adoc @@ -1,14 +1,12 @@ [[similarity-functions]] -Similarity Functions -~~~~~~~~~~~~~~~~~~~~ +== Similarity Functions AsterixDB supports queries with different similarity functions, including http://en.wikipedia.org/wiki/Levenshtein_distance[edit distance] and https://en.wikipedia.org/wiki/Jaccard_index[Jaccard]. [[edit_distance]] -edit_distance -^^^^^^^^^^^^^ +=== edit_distance * Syntax: + @@ -42,8 +40,7 @@ edit_distance("SuzannaTillson", "Suzanna Tilson"); - [[edit_distance_check]] -edit_distance_check -^^^^^^^^^^^^^^^^^^^ +=== edit_distance_check * Syntax: + @@ -87,8 +84,7 @@ edit_distance_check("happy","hapr",2); ----------- [[edit_distance_contains]] -edit_distance_contains -^^^^^^^^^^^^^^^^^^^^^^ +=== edit_distance_contains * Syntax: + @@ -131,8 +127,7 @@ edit_distance_contains("happy","hapr",2); ----------- [[similarity_jaccard]] -similarity_jaccard -^^^^^^^^^^^^^^^^^^ +=== similarity_jaccard * Syntax: + @@ -170,8 +165,7 @@ similarity_jaccard([1,5,8,9], [1,5,9,10]); --- [[similarity_jaccard_check]] -similarity_jaccard_check -^^^^^^^^^^^^^^^^^^^^^^^^ +=== similarity_jaccard_check * Syntax: + @@ -215,3 +209,4 @@ similarity_jaccard_check([1,5,8,9], [1,5,9,10], 0.6); -------------- [ false, 0.0 ] -------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/6_tokenizing.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/6_tokenizing.adoc index 391e7f02c3e..d161992c7f1 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/6_tokenizing.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/6_tokenizing.adoc @@ -1,10 +1,8 @@ [[tokenizing-functions]] -Tokenizing Functions -~~~~~~~~~~~~~~~~~~~~ +== Tokenizing Functions [[word_tokens]] -word_tokens -^^^^^^^^^^^ +=== word_tokens * Syntax: + @@ -30,3 +28,4 @@ word_tokens("I like the phone, awesome!"); ------------------------------------------ [ "i", "like", "the", "phone", "awesome" ] ------------------------------------------ + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_allens.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_allens.adoc index ccc611d4253..5b552998466 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_allens.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_allens.adoc @@ -1,6 +1,5 @@ [[interval_before-interval_after]] -interval_before, interval_after -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== interval_before, interval_after * Syntax: + @@ -39,8 +38,7 @@ another interval. --------------------------------------------------- [[interval_covers-interval_covered_by]] -interval_covers, interval_covered_by -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== interval_covers, interval_covered_by * Syntax: + @@ -82,8 +80,7 @@ interval2.start <= interval1.start AND interval2.end >= interval1.end -------------------------------------------------------- [[interval_overlaps-interval_overlapped_by]] -interval_overlaps, interval_overlapped_by -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== interval_overlaps, interval_overlapped_by * Syntax: + @@ -132,8 +129,7 @@ the Allen's relations on the definition of overlap. ------------------------------------------- [[interval_overlapping]] -interval_overlapping -^^^^^^^^^^^^^^^^^^^^ +=== interval_overlapping Note that `interval_overlapping` is not an Allen's Relation, but syntactic sugar we added for the case that the intersect of two @@ -176,8 +172,7 @@ interval1.start < interval2.end AND interval1.end > interval2.start ---------------------------------------------- [[interval_meets-interval_met_by]] -interval_meets, interval_met_by -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== interval_meets, interval_met_by * Syntax: + @@ -217,8 +212,7 @@ interval. -------------------------------- [[interval_starts-interval_started_by]] -interval_starts, interval_started_by -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== interval_starts, interval_started_by * Syntax: + @@ -263,8 +257,7 @@ AND interval2.end <= interval1.end -------------------------------------------------------- [[interval_ends-interval_ended_by]] -interval_ends, interval_ended_by -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== interval_ends, interval_ended_by * Syntax: + @@ -304,3 +297,4 @@ interval2.end = interval1.end AND interval2.start >= interval1.start ---------------------------------------------------- { "interval_ends": true, "interval_ended_by": true } ---------------------------------------------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_temporal.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_temporal.adoc index c2276773474..be349cd3f75 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_temporal.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_temporal.adoc @@ -1,10 +1,8 @@ [[temporal-functions]] -Temporal Functions -~~~~~~~~~~~~~~~~~~ +== Temporal Functions [[get_yearget_monthget_dayget_hourget_minuteget_secondget_millisecond]] -get_year/get_month/get_day/get_hour/get_minute/get_second/get_millisecond -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== get_year/get_month/get_day/get_hour/get_minute/get_second/get_millisecond * Syntax: + @@ -41,8 +39,7 @@ following types: `date`, `datetime`, `time`, and `duration`. -------------------------------------------------------------------------------------- [[adjust_datetime_for_timezone]] -adjust_datetime_for_timezone -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== adjust_datetime_for_timezone * Syntax: + @@ -75,8 +72,7 @@ adjust_datetime_for_timezone(datetime("2008-04-26T10:10:00"), "+08:00"); ------------------------------- [[adjust_time_for_timezone]] -adjust_time_for_timezone -^^^^^^^^^^^^^^^^^^^^^^^^ +=== adjust_time_for_timezone * Syntax: + @@ -109,8 +105,7 @@ adjust_time_for_timezone(get_time_from_datetime(datetime("2008-04-26T10:10:00")) -------------------- [[calendar_duration_from_datetime]] -calendar_duration_from_datetime -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== calendar_duration_from_datetime * Syntax: + @@ -147,8 +142,7 @@ duration("P5Y2M24DT10H10M") --------------------------- [[get_year_month_durationget_day_time_duration]] -get_year_month_duration/get_day_time_duration -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== get_year_month_duration/get_day_time_duration * Syntax: + @@ -175,8 +169,7 @@ year_month_duration("P1Y") -------------------------- [[months_from_year_month_durationms_from_day_time_duration]] -months_from_year_month_duration/ms_from_day_time_duration -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== months_from_year_month_duration/ms_from_day_time_duration * Syntax: + @@ -207,8 +200,7 @@ months_from_year_month_duration/ms_from_day_time_duration(duration_value) --------------------------------------- [[duration_from_monthsduration_from_ms]] -duration_from_months/duration_from_ms -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== duration_from_months/duration_from_ms * Syntax: + @@ -236,8 +228,7 @@ duration("P8M") --------------- [[duration_from_interval]] -duration_from_interval -^^^^^^^^^^^^^^^^^^^^^^ +=== duration_from_interval * Syntax: + @@ -274,8 +265,7 @@ duration_from_interval(interval_value) ---------------------------------------------- [[current_date]] -current_date -^^^^^^^^^^^^ +=== current_date * Syntax: + @@ -288,8 +278,7 @@ current_date() ** a `date` value of the date when the function is called. [[current_time]] -current_time -^^^^^^^^^^^^ +=== current_time * Syntax: + @@ -302,8 +291,7 @@ current_time() ** a `time` value of the time when the function is called. [[current_datetime]] -current_datetime -^^^^^^^^^^^^^^^^ +=== current_datetime * Syntax: + @@ -316,8 +304,7 @@ current_datetime() ** a `datetime` value of the datetime when the function is called. [[get_date_from_datetime]] -get_date_from_datetime -^^^^^^^^^^^^^^^^^^^^^^ +=== get_date_from_datetime * Syntax: + @@ -332,8 +319,7 @@ get_date_from_datetime(datetime) ** any other non-datetime input value will cause a type error. [[get_time_from_datetime]] -get_time_from_datetime -^^^^^^^^^^^^^^^^^^^^^^ +=== get_time_from_datetime * Syntax: + @@ -360,8 +346,7 @@ time("10:10:00.000Z") --------------------- [[day_of_week]] -day_of_week -^^^^^^^^^^^ +=== day_of_week * Syntax: + @@ -388,8 +373,7 @@ day_of_week(datetime("2012-12-30T12:12:12.039Z")); - [[date_from_unix_time_in_days]] -date_from_unix_time_in_days -^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== date_from_unix_time_in_days * Syntax: + @@ -409,8 +393,7 @@ representing the number of days. ** any other non-numeric input value will cause a type error. [[datetime_from_unix_time_in_ms]] -datetime_from_unix_time_in_ms -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== datetime_from_unix_time_in_ms * Syntax: + @@ -430,8 +413,7 @@ since 1970-01-01T00:00:00Z, ** any other non-numeric input value will cause a type error. [[datetime_from_unix_time_in_secs]] -datetime_from_unix_time_in_secs -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== datetime_from_unix_time_in_secs * Syntax: + @@ -451,8 +433,7 @@ representing the number of seconds. ** any other non-numeric input value will cause a type error. [[datetime_from_date_time]] -datetime_from_date_time -^^^^^^^^^^^^^^^^^^^^^^^ +=== datetime_from_date_time * Syntax: @@ -472,8 +453,7 @@ datetime_from_date_time(date,time) *** or, the second argument is any other non-time value. [[time_from_unix_time_in_ms]] -time_from_unix_time_in_ms -^^^^^^^^^^^^^^^^^^^^^^^^^ +=== time_from_unix_time_in_ms * Syntax: + @@ -507,8 +487,7 @@ representing the number of milliseconds. --------------------------------------------------------------------------------------------------------------- [[unix_time_from_date_in_days]] -unix_time_from_date_in_days -^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== unix_time_from_date_in_days * Syntax: + @@ -526,8 +505,7 @@ for `date_value`. ** any other non-date input value will cause a type error. [[unix_time_from_datetime_in_ms]] -unix_time_from_datetime_in_ms -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== unix_time_from_datetime_in_ms * Syntax: + @@ -545,8 +523,7 @@ unix_time_from_datetime_in_ms(datetime_value) ** any other non-datetime input value will cause a type error. [[unix_time_from_datetime_in_secs]] -unix_time_from_datetime_in_secs -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== unix_time_from_datetime_in_secs * Syntax: + @@ -564,8 +541,7 @@ unix_time_from_datetime_in_secs(datetime_value) ** any other non-datetime input value will cause a type error. [[unix_time_from_time_in_ms]] -unix_time_from_time_in_ms -^^^^^^^^^^^^^^^^^^^^^^^^^ +=== unix_time_from_time_in_ms * Syntax: + @@ -597,8 +573,7 @@ unix_time_from_time_in_ms(time_value) --------------------------------------------------------------------------------------------------------------- [[parse_dateparse_timeparse_datetime]] -parse_date/parse_time/parse_datetime -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== parse_date/parse_time/parse_datetime * Syntax: @@ -641,8 +616,7 @@ time("00:30:30.000Z") --------------------- [[print_dateprint_timeprint_datetime]] -print_date/print_time/print_datetime -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== print_date/print_time/print_datetime * Syntax: + @@ -686,8 +660,7 @@ print_time(time("00:30:30.000Z"),"m:s"); ------- [[get_interval_start-get_interval_end]] -get_interval_start, get_interval_end -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== get_interval_start, get_interval_end * Syntax: + @@ -719,8 +692,7 @@ the interval) representing the starting or ending time, [[get_interval_start_dateget_interval_start_datetimeget_interval_start_time-get_interval_end_dateget_interval_end_datetimeget_interval_end_time]] get_interval_start_date/get_interval_start_datetimeget_interval_start_time, -get_interval_end_date/get_interval_end_datetime/get_interval_end_time -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== get_interval_end_date/get_interval_end_datetime/get_interval_end_time * Syntax: + @@ -763,8 +735,7 @@ representing the starting or ending time, ------------------------------------------------- [[get_overlapping_interval]] -get_overlapping_interval -^^^^^^^^^^^^^^^^^^^^^^^^ +=== get_overlapping_interval * Syntax: + @@ -808,8 +779,7 @@ interval must be of the same type. --------------------------------------------------------------------------------------------------- [[interval_bin]] -interval_bin -^^^^^^^^^^^^ +=== interval_bin * Syntax: + @@ -869,8 +839,7 @@ non-year_month_duration/non-day_time_duration value. ----------------------------------------------------------------------------------------------- [[interval_start_from_datetimedatetime]] -interval_start_from_date/time/datetime -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +=== interval_start_from_date/time/datetime * Syntax: + @@ -915,8 +884,7 @@ non-date/non-time/non-datetime value, --------------------------------------------------------------------------------------------------- [[overlap_bins]] -overlap_bins -^^^^^^^^^^^^ +=== overlap_bins * Return Value: ** a `interval` value representing the bin containing the `time_to_bin` @@ -991,3 +959,4 @@ non-year_month_duration/non-day_time_duration value. ] }; --------------------------------------------------------------------------------------------------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/8_record.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/8_record.adoc index 3c66c12a71a..ed8deae4c2d 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/8_record.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/8_record.adoc @@ -1,10 +1,8 @@ [[object-functions]] -Object Functions -~~~~~~~~~~~~~~~~ +== Object Functions [[get_object_fields]] -get_object_fields -^^^^^^^^^^^^^^^^^ +=== get_object_fields * Syntax: + @@ -98,8 +96,7 @@ get_object_field_value({ ----------- [[object_remove_fields]] -object_remove_fields -^^^^^^^^^^^^^^^^^^^^ +=== object_remove_fields * Syntax: + @@ -143,8 +140,7 @@ object_remove_fields( ----------------------------- [[object_add_fields]] -object_add_fields -^^^^^^^^^^^^^^^^^ +=== object_add_fields * Syntax: + @@ -191,8 +187,7 @@ object_add_fields( ------------------------------------------------------- [[object_merge]] -object_merge -^^^^^^^^^^^^ +=== object_merge * Syntax: + @@ -249,8 +244,7 @@ object_merge( ------------------------------- [[object_length]] -object_length -^^^^^^^^^^^^^ +=== object_length * Syntax: + @@ -284,8 +278,7 @@ object_length( - [[object_names]] -object_names -^^^^^^^^^^^^ +=== object_names * Syntax: + @@ -318,8 +311,7 @@ object_names( ------------------------------ [[object_remove]] -object_remove -^^^^^^^^^^^^^ +=== object_remove * Syntax: + @@ -360,8 +352,7 @@ object_remove( ------------------------- [[object_rename]] -object_rename -^^^^^^^^^^^^^ +=== object_rename * Syntax: + @@ -407,8 +398,7 @@ object_rename( ----------------------------------------------- [[object_unwrap]] -object_unwrap -^^^^^^^^^^^^^ +=== object_unwrap * Syntax: + @@ -445,8 +435,7 @@ object_unwrap( --- [[object_replace]] -object_replace -^^^^^^^^^^^^^^ +=== object_replace * Syntax: + @@ -490,8 +479,7 @@ object_replace( ----------------------------------------------- [[object_add]] -object_add -^^^^^^^^^^ +=== object_add * Syntax: + @@ -537,8 +525,7 @@ object_add( ------------------------------------------------ [[object_put]] -object_put -^^^^^^^^^^ +=== object_put * Syntax: + @@ -583,8 +570,7 @@ object_put( ----------------------------------------------- [[object_values]] -object_values -^^^^^^^^^^^^^ +=== object_values * Syntax: + @@ -620,8 +606,7 @@ object_values( ----------------------------------- [[object_pairs]] -object_pairs -^^^^^^^^^^^^ +=== object_pairs * Syntax: + @@ -660,8 +645,7 @@ object_pairs( ------------------------------------------------------------------- [[pairs]] -pairs -^^^^^ +=== pairs * Syntax: + @@ -705,3 +689,4 @@ pairs( [ "state", "CA" ] ] ----------------------------------------------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_aql.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_aql.adoc index 53b23c68b63..88505313683 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_aql.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_aql.adoc @@ -1,13 +1,11 @@ [[aggregate-functions-array-functions]] -Aggregate Functions (Array Functions) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +== Aggregate Functions (Array Functions) This section contains detailed descriptions of each AQL aggregate function (i.e., array function). [[sql-count]] -sql-count -^^^^^^^^^ +=== sql-count * Syntax: + @@ -38,8 +36,7 @@ sql-count( ['hello', 'world', 1, 2, 3, null, missing] ); - [[sql-avg]] -sql-avg -^^^^^^^ +=== sql-avg * Syntax: + @@ -76,8 +73,7 @@ sql-avg( [1.2, 2.3, 3.4, 0, null] ); ----- [[sql-sum]] -sql-sum -^^^^^^^ +=== sql-sum * Syntax: + @@ -116,8 +112,7 @@ sql-sum( [1.2, 2.3, 3.4, 0, null, missing] ); --- [[sql-sql_min]] -sql-sql_min -^^^^^^^^^^^ +=== sql-sql_min * Syntax: + @@ -155,8 +150,7 @@ sql-min( [1.2, 2.3, 3.4, 0, null, missing] ); --- [[sql-max]] -sql-max -^^^^^^^ +=== sql-max * Syntax: + @@ -194,8 +188,7 @@ sql-max( [1.2, 2.3, 3.4, 0, null, missing] ); --- [[count]] -count -^^^^^ +=== count * Syntax: + @@ -224,8 +217,7 @@ count( [1, 2, null, missing] ); - [[avg]] -avg -^^^ +=== avg * Syntax: + @@ -259,8 +251,7 @@ avg( [100, 200, 300] ); --------- [[sum]] -sum -^^^ +=== sum * Syntax: + @@ -296,8 +287,7 @@ sum( [100, 200, 300] ); --- [[sql-min]] -sql-min -^^^^^^^ +=== sql-min * Syntax: + @@ -334,8 +324,7 @@ min( [10.2, 100, 5] ); --- [[sql-max-1]] -sql-max -^^^^^^^ +=== sql-max * Syntax: + @@ -370,3 +359,4 @@ max( [10.2, 100, 5] ); ----- 100.0 ----- + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc index 740825f55e9..e80e01c6df4 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc @@ -1,6 +1,5 @@ [[aggregate-functions-array-functions]] -Aggregate Functions (Array Functions) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +== Aggregate Functions (Array Functions) This section contains detailed descriptions of the built-in aggregate functions in the query language. @@ -25,8 +24,7 @@ with an OVER clause. Refer to link:manual.html#Over_clauses[OVER Clauses] for details. [[array_count]] -array_count -^^^^^^^^^^^ +=== array_count * Syntax: + @@ -57,8 +55,7 @@ array_count( ['hello', 'world', 1, 2, 3, null, missing] ); - [[array_avg]] -array_avg -^^^^^^^^^ +=== array_avg * Syntax: + @@ -95,8 +92,7 @@ array_avg( [1.2, 2.3, 3.4, 0, null] ); ----- [[array_sum]] -array_sum -^^^^^^^^^ +=== array_sum * Syntax: + @@ -135,8 +131,7 @@ array_sum( [1.2, 2.3, 3.4, 0, null, missing] ); --- [[array_min]] -array_min -^^^^^^^^^ +=== array_min * Syntax: + @@ -174,8 +169,7 @@ array_min( [1.2, 2.3, 3.4, 0, null, missing] ); --- [[array_max]] -array_max -^^^^^^^^^ +=== array_max * Syntax: + @@ -213,8 +207,7 @@ array_max( [1.2, 2.3, 3.4, 0, null, missing] ); --- [[array_stddev_samp]] -array_stddev_samp -^^^^^^^^^^^^^^^^^ +=== array_stddev_samp * Syntax: + @@ -251,8 +244,7 @@ array_stddev_samp( [1.2, 2.3, 3.4, 0, null] ); ------------------ [[array_stddev_pop]] -array_stddev_pop -^^^^^^^^^^^^^^^^ +=== array_stddev_pop * Syntax: + @@ -289,8 +281,7 @@ array_stddev_pop( [1.2, 2.3, 3.4, 0, null] ); ------------------ [[array_var_samp]] -array_var_samp -^^^^^^^^^^^^^^ +=== array_var_samp * Syntax: + @@ -327,8 +318,7 @@ array_var_samp( [1.2, 2.3, 3.4, 0, null] ); ------------------ [[array_var_pop]] -array_var_pop -^^^^^^^^^^^^^ +=== array_var_pop * Syntax: + @@ -365,8 +355,7 @@ array_var_pop( [1.2, 2.3, 3.4, 0, null] ); ------------------ [[array_skewness]] -array_skewness -^^^^^^^^^^^^^^ +=== array_skewness * Syntax: + @@ -403,8 +392,7 @@ array_skewness( [1.2, 2.3, 3.4, 0, null] ); -------------------- [[array_kurtosis]] -array_kurtosis -^^^^^^^^^^^^^^ +=== array_kurtosis * Syntax: + @@ -441,8 +429,7 @@ array_kurtosis( [1.2, 2.3, 3.4, 0, null] ); ------------------ [[strict_count]] -strict_count -^^^^^^^^^^^^ +=== strict_count * Syntax: + @@ -471,8 +458,7 @@ strict_count( [1, 2, null, missing] ); - [[strict_avg]] -strict_avg -^^^^^^^^^^ +=== strict_avg * Syntax: + @@ -506,8 +492,7 @@ strict_avg( [100, 200, 300] ); ----- [[strict_sum]] -strict_sum -^^^^^^^^^^ +=== strict_sum * Syntax: + @@ -543,8 +528,7 @@ strict_sum( [100, 200, 300] ); --- [[strict_min]] -strict_min -^^^^^^^^^^ +=== strict_min * Syntax: + @@ -581,8 +565,7 @@ strict_min( [10.2, 100, 5] ); --- [[strict_max]] -strict_max -^^^^^^^^^^ +=== strict_max * Syntax: + @@ -619,8 +602,7 @@ strict_max( [10.2, 100, 5] ); ----- [[strict_stddev_samp]] -strict_stddev_samp -^^^^^^^^^^^^^^^^^^ +=== strict_stddev_samp * Syntax: + @@ -655,8 +637,7 @@ strict_stddev_samp( [100, 200, 300] ); ----- [[strict_stddev_pop]] -strict_stddev_pop -^^^^^^^^^^^^^^^^^ +=== strict_stddev_pop * Syntax: + @@ -691,8 +672,7 @@ strict_stddev_pop( [100, 200, 300] ); ----------------- [[strict_var_samp]] -strict_var_samp -^^^^^^^^^^^^^^^ +=== strict_var_samp * Syntax: + @@ -727,8 +707,7 @@ strict_var_samp( [100, 200, 300] ); ------- [[strict_var_pop]] -strict_var_pop -^^^^^^^^^^^^^^ +=== strict_var_pop * Syntax: + @@ -763,8 +742,7 @@ strict_var_pop( [100, 200, 300] ); ----------------- [[strict_skewness]] -strict_skewness -^^^^^^^^^^^^^^^ +=== strict_skewness * Syntax: + @@ -798,8 +776,7 @@ strict_skewness( [100, 200, 300] ); --- [[strict_kurtosis]] -strict_kurtosis -^^^^^^^^^^^^^^^ +=== strict_kurtosis * Syntax: + @@ -832,3 +809,4 @@ strict_kurtosis( [100, 200, 300] ); ---- -1.5 ---- + From 69c0795a8c29a9cbdbdfa42d3876c55246abd6c3 Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Sat, 25 Jan 2020 13:01:28 +0000 Subject: [PATCH 13/22] Correcting manual partials Change-Id: I1d6b75374dd5f540978e01e83cbce6590bb43e6c --- .../modules/sqlpp/partials/1_intro.adoc | 3 +- .../shared/modules/sqlpp/partials/2_expr.adoc | 46 ++--- .../modules/sqlpp/partials/2_expr_title.adoc | 3 +- .../sqlpp/partials/3_declare_dataverse.adoc | 4 +- .../sqlpp/partials/3_declare_function.adoc | 1 + .../modules/sqlpp/partials/3_query.adoc | 164 ++++++------------ .../modules/sqlpp/partials/3_query_title.adoc | 4 +- .../modules/sqlpp/partials/4_error.adoc | 12 +- .../modules/sqlpp/partials/4_error_title.adoc | 3 +- .../sqlpp/partials/5_ddl_dataset_index.adoc | 48 ++--- .../modules/sqlpp/partials/5_ddl_dml.adoc | 13 +- .../partials/5_ddl_function_removal.adoc | 13 +- .../modules/sqlpp/partials/5_ddl_head.adoc | 3 +- .../sqlpp/partials/appendix_1_title.adoc | 3 +- .../sqlpp/partials/appendix_2_index_only.adoc | 4 +- .../partials/appendix_2_parallel_sort.adoc | 4 +- .../sqlpp/partials/appendix_2_parameters.adoc | 7 +- .../sqlpp/partials/appendix_2_title.adoc | 3 +- .../sqlpp/partials/appendix_3_resolution.adoc | 23 +-- .../sqlpp/partials/appendix_3_title.adoc | 3 +- 20 files changed, 134 insertions(+), 230 deletions(-) diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/1_intro.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/1_intro.adoc index 29b6328aa36..f2933767a10 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/1_intro.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/1_intro.adoc @@ -1,6 +1,5 @@ [[introduction]] -1. Introduction ---------------- +== 1. Introduction This document is intended as a reference guide to the full syntax and semantics of AsterixDB's query language, a SQL-based language for diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr.adoc index 07a1fa1817e..3aa8734a943 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr.adoc @@ -14,8 +14,7 @@ Note that in the following text, words enclosed in angle brackets denote keywords that are not case-sensitive. [[operator-expressions]] -Operator Expressions -~~~~~~~~~~~~~~~~~~~~ +=== Operator Expressions Operators perform a specific operation on the input values or expressions. The syntax of an operator expression is as follows: @@ -66,8 +65,7 @@ exceptions listed in link:#Comparison_operators[comparison operators] and link:#Logical_operators[logical operators]. [[arithmetic-operators]] -Arithmetic Operators -^^^^^^^^^^^^^^^^^^^^ +==== Arithmetic Operators Arithmetic operators are used to exponentiate, add, subtract, multiply, and divide numeric values, or concatenate string values. @@ -96,8 +94,7 @@ integers) |SELECT VALUE 5 / 2; |======================================================================= [[collection-operators]] -Collection Operators -^^^^^^^^^^^^^^^^^^^^ +==== Collection Operators Collection operators are used for membership tests (IN, NOT IN) or empty collection tests (EXISTS, NOT EXISTS). @@ -119,8 +116,7 @@ ChirpMessages cm WHERE NOT EXISTS cm.referredTopics; |======================================================================= [[comparison-operators]] -Comparison Operators -^^^^^^^^^^^^^^^^^^^^ +==== Comparison Operators Comparison operators are used to compare values. The comparison operators fall into one of two sub-categories: missing value comparisons @@ -222,8 +218,7 @@ operators work. |=================================================== [[logical-operators]] -Logical Operators -^^^^^^^^^^^^^^^^^ +==== Logical Operators Logical operators perform logical `NOT`, `AND`, and `OR` operations over Boolean values (`TRUE` and `FALSE`) plus `NULL` and `MISSING`. @@ -271,8 +266,7 @@ inputs. |================ [[quantified-expressions]] -Quantified Expressions -~~~~~~~~~~~~~~~~~~~~~~ +=== Quantified Expressions ---------------------------------------------------------------------------------------------------------------- QuantifiedExpression ::= ( (|) | ) Variable Expression ( "," Variable "in" Expression )* @@ -307,8 +301,7 @@ SOME x IN [ 1, 2, 3 ] SATISFIES x < 3 -------------------------------------- [[path-expressions]] -Path Expressions -~~~~~~~~~~~~~~~~ +=== Path Expressions -------------------------------------------------------------- PathExpression ::= PrimaryExpression ( Field | Index )* @@ -358,8 +351,7 @@ Examples -------------------------------------------------------- [[primary-expressions]] -Primary Expressions -~~~~~~~~~~~~~~~~~~~ +=== Primary Expressions --------------------------------------- PrimaryExpr ::= Literal @@ -379,8 +371,7 @@ model (such as a newly constructed object, array, or multiset of data model instances). [[literals]] -Literals -~~~~~~~~ +=== Literals ----------------------------------------------------------- Literal ::= StringLiteral @@ -454,8 +445,7 @@ Different from standard SQL, double quotes play the same role as single quotes and may be used for string literals in queries as well. [[variable-references]] -Variable References -^^^^^^^^^^^^^^^^^^^ +==== Variable References ---------------------------------------------------------------------------- VariableReference ::= | @@ -495,8 +485,7 @@ id ------------- [[parameter-references]] -Parameter References -^^^^^^^^^^^^^^^^^^^^ +==== Parameter References ------------------------------------------------------------------------------------------ ParameterReference ::= NamedParameterReference | PositionalParameterReference @@ -521,8 +510,7 @@ $1 --- [[parenthesized-expressions]] -Parenthesized Expressions -^^^^^^^^^^^^^^^^^^^^^^^^^ +==== Parenthesized Expressions --------------------------------------------------------- ParenthesizedExpression ::= "(" Expression ")" | Subquery @@ -542,8 +530,7 @@ Example --------- [[function-call-expressions]] -Function Call Expressions -^^^^^^^^^^^^^^^^^^^^^^^^^ +==== Function Call Expressions ------------------------------------------------------------------------------------------------------------ FunctionCallExpression ::= ( FunctionName "(" ( Expression ( "," Expression )* )? ")" ) | WindowFunctionCall @@ -571,8 +558,7 @@ length('a string') ------------------ [[case-expressions]] -Case Expressions -~~~~~~~~~~~~~~~~ +=== Case Expressions ---------------------------------------------------------------------------------------------------------------- CaseExpression ::= SimpleCaseExpression | SearchedCaseExpression @@ -603,8 +589,7 @@ CASE (2 < 3) WHEN true THEN "yes" ELSE "no" END ----------------------------------------------- [[constructors]] -Constructors -^^^^^^^^^^^^ +==== Constructors --------------------------------------------------------------------------------------- Constructor ::= ArrayConstructor | MultisetConstructor | ObjectConstructor @@ -675,3 +660,4 @@ This query outputs: "userSince": "2012-08-20T10:10:00" } ] -------------------------------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr_title.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr_title.adoc index 2bdb60b2190..f13195404db 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr_title.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr_title.adoc @@ -1,3 +1,2 @@ [[expressions]] -2. Expressions --------------- +== 2. Expressions diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_dataverse.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_dataverse.adoc index 87ccfb713b0..b93760c35d4 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_dataverse.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_dataverse.adoc @@ -1,6 +1,5 @@ [[declarations]] -Declarations -~~~~~~~~~~~~ +=== Declarations ---------------------------------------- DatabaseDeclaration ::= "USE" Identifier @@ -19,3 +18,4 @@ Example --------------- USE TinySocial; --------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_function.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_function.adoc index 2d622d28213..72e86c61177 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_function.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_declare_function.adoc @@ -32,3 +32,4 @@ For our sample data set, this returns: { "id": 2, "name": "IsbelDull", "friendCount": 2 } ] ---------------------------------------------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query.adoc index 6f164f4d527..78453154d6f 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query.adoc @@ -1,6 +1,5 @@ [[select-statements]] -SELECT Statements -~~~~~~~~~~~~~~~~~ +=== SELECT Statements The following shows the (rich) grammar for the `SELECT` statement in the query language. @@ -162,15 +161,13 @@ example collections are as follows: ----------------------------------------------------------------- [[select-clause]] -SELECT Clause -~~~~~~~~~~~~~ +=== SELECT Clause The `SELECT` clause always returns a collection value as its result (even if the result is empty or a singleton). [[select-elementvalueraw]] -Select Element/Value/Raw -^^^^^^^^^^^^^^^^^^^^^^^^ +==== Select Element/Value/Raw The `SELECT VALUE` clause returns an array or multiset that contains the results of evaluating the `VALUE` expression, with one evaluation being @@ -241,8 +238,7 @@ This query returns: --------------------------------------------- [[sql-style-select]] -SQL-style SELECT -^^^^^^^^^^^^^^^^ +==== SQL-style SELECT The traditional SQL-style `SELECT` syntax is also supported in the query language. This syntax can also be reformulated in a `SELECT VALUE` based @@ -269,13 +265,13 @@ Returns: } ] ------------------------------------- -[[select-select-returns-an-object-with-a-nested-field-for-each-input-tuple.-each-field-has-as-its-field-name-the-name-of-a-binding-variable-generated-by-either-the-from-clause-or-group-by-clause-in-the-current-enclosing-select-statement-and-its-field-value-is-the-value-of-that-binding-variable.]] +// TODO: check markup from original + SELECT * `SELECT *` returns an object with a nested field for each input tuple. Each field has as its field name the name of a binding variable generated by either the `FROM` clause or `GROUP BY` clause in the current enclosing `SELECT` statement, and its field value is the value of that binding variable. -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Note that the result of `SELECT *` is different from the result of query that selects all the fields of an object. @@ -434,8 +430,7 @@ returns: ----------------------------------------------------------------- [[select-variable.]] -SELECT _variable_.* -^^^^^^^^^^^^^^^^^^^ +==== SELECT _variable_.* Whereas `SELECT *` returns all the fields bound to all the variables which are currently defined, the notation `SELECT c.*` returns all the @@ -522,8 +517,7 @@ the results: ----------------------------------------- [[select-distinct]] -SELECT DISTINCT -^^^^^^^^^^^^^^^ +==== SELECT DISTINCT The `DISTINCT` keyword is used to eliminate duplicate items in results. The following example shows how it works. @@ -564,8 +558,7 @@ This version of the query returns: --- [[unnamed-projections]] -Unnamed Projections -^^^^^^^^^^^^^^^^^^^ +==== Unnamed Projections Similar to standard SQL, the query language supports unnamed projections (a.k.a, unnamed `SELECT` clause items), for which names are generated. @@ -599,8 +592,7 @@ In the result, `$1` is the generated name for `substr(user.name, 1)`, while `alias` is the generated name for `user.alias`. [[abbreviated-field-access-expressions]] -Abbreviated Field Access Expressions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +==== Abbreviated Field Access Expressions As in standard SQL, field access expressions can be abbreviated (not recommended!) when there is no ambiguity. In the next example, the @@ -628,8 +620,7 @@ Outputs: ------------------------ [[unnest-clause]] -UNNEST Clause -~~~~~~~~~~~~~ +=== UNNEST Clause For each of its input tuples, the `UNNEST` clause flattens a collection-valued expression into individual items, producing multiple @@ -637,8 +628,7 @@ tuples, each of which is one of the expression's original input tuples augmented with a flattened item from its collection. [[inner-unnest]] -Inner UNNEST -^^^^^^^^^^^^ +==== Inner UNNEST The following example is a query that retrieves the names of the organizations that a selected user has worked for. It uses the `UNNEST` @@ -672,8 +662,7 @@ has no employment history, no tuple corresponding to that user will be emitted in the result. [[left-outer-unnest]] -Left Outer UNNEST -^^^^^^^^^^^^^^^^^ +==== Left Outer UNNEST As an alternative, the `LEFT OUTER UNNEST` clause offers SQL's left outer join semantics. For example, no collection-valued field named @@ -705,8 +694,7 @@ corresponding binding value for variable `h` for an input tuple. A still be propagated. [[expressing-joins-using-unnest]] -Expressing Joins Using UNNEST -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +==== Expressing Joins Using UNNEST The `UNNEST` clause is similar to SQL's `JOIN` clause except that it allows its right argument to be correlated to its left argument, as in @@ -772,15 +760,13 @@ UNNEST ( -------------------------------------------- [[from-clauses]] -FROM clauses -~~~~~~~~~~~~ +=== FROM clauses A `FROM` clause is used for enumerating (i.e., conceptually iterating over) the contents of collections, as in SQL. [[binding-expressions]] -Binding expressions -^^^^^^^^^^^^^^^^^^^ +==== Binding expressions In addition to stored collections, a `FROM` clause can iterate over any intermediate collection returned by a valid query expression. In the @@ -805,8 +791,7 @@ Returns: --- [[multiple-from-terms]] -Multiple FROM Terms -^^^^^^^^^^^^^^^^^^^ +==== Multiple FROM Terms The query language permits correlations among `FROM` terms. Specifically, a `FROM` binding expression can refer to variables defined @@ -823,8 +808,7 @@ WHERE u.id = 1; ---------------------------------------------------- [[expressing-joins-using-from-terms]] -Expressing Joins Using FROM Terms -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +==== Expressing Joins Using FROM Terms Similarly, the join intentions of the other `UNNEST`-based join examples above could be expressed as: @@ -855,8 +839,7 @@ Note that the first alternative is one of the SQL-92 approaches to expressing a join. [[implicit-binding-variables]] -Implicit Binding Variables -^^^^^^^^^^^^^^^^^^^^^^^^^^ +==== Implicit Binding Variables Similar to standard SQL, the query language supports implicit `FROM` binding variables (i.e., aliases), for which a binding variable is @@ -933,15 +916,13 @@ More information on implicit binding variables can be found in the appendix section on Variable Resolution. [[join-clauses]] -JOIN Clauses -~~~~~~~~~~~~ +=== JOIN Clauses The join clause in the query language supports both inner joins and left outer joins from standard SQL. [[inner-joins]] -Inner joins -^^^^^^^^^^^ +==== Inner joins Using a `JOIN` clause, the inner join intent from the preceding examples can also be expressed as follows: @@ -955,8 +936,7 @@ FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id; -------------------------------------------------------------------- [[left-outer-joins]] -Left Outer Joins -^^^^^^^^^^^^^^^^ +==== Left Outer Joins The query language supports SQL's notion of left outer join. The following query is an example: @@ -1022,8 +1002,7 @@ clauses and left outer join queries can be expressed by `LEFT OUTER UNNESTs`. [[variable-scope-in-join-clauses]] -Variable scope in JOIN clauses -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +==== Variable scope in JOIN clauses Variables defined by `JOIN` subclauses are not visible to other subclauses in the same `FROM` clause. This also applies to the `FROM` @@ -1044,16 +1023,14 @@ The variable `u` defined by the `FROM` clause is not visible inside the `JOIN` subclause, so this query returns no results. [[group-by-clauses]] -GROUP BY Clauses -~~~~~~~~~~~~~~~~ +=== GROUP BY Clauses The `GROUP BY` clause generalizes standard SQL's grouping and aggregation semantics, but it also retains backward compatibility with the standard (relational) SQL `GROUP BY` and aggregation features. [[group-variables]] -Group variables -^^^^^^^^^^^^^^^ +==== Group variables In a `GROUP BY` clause, in addition to the binding variable(s) defined for the grouping key(s), the query language allows a user to define a @@ -1354,8 +1331,7 @@ This example query returns: --------------------------------------------------------------------- [[implicit-grouping-key-variables]] -Implicit Grouping Key Variables -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +==== Implicit Grouping Key Variables In the query language syntax, providing named binding variables for `GROUP BY` key expressions is optional. If a grouping key is missing a @@ -1438,8 +1414,7 @@ the grouping key expression `message.authorId` is `authorId` (which is how it is referred to in the example's `SELECT` clause). [[implicit-group-variables]] -Implicit Group Variables -^^^^^^^^^^^^^^^^^^^^^^^^ +==== Implicit Group Variables The group variable itself is also optional in the `GROUP BY` syntax. If a user's query does not declare the name and structure of the group @@ -1451,8 +1426,7 @@ group variable, but is able to call SQL-92 aggregation functions as in SQL-92. [[aggregation-functions]] -Aggregation Functions -^^^^^^^^^^^^^^^^^^^^^ +==== Aggregation Functions In the traditional SQL, which doesn't support nested data, grouping always also involves the use of aggregation to compute properties of the @@ -1562,8 +1536,7 @@ This query returns: - [[sql-92-aggregation-functions]] -SQL-92 Aggregation Functions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +==== SQL-92 Aggregation Functions For compatibility with the traditional SQL aggregation functions, the query language also offers SQL-92's aggregation function symbols @@ -1629,8 +1602,7 @@ the result of the generated subquery, without applying any built-in function. [[sql-92-compliant-group-by-aggregations]] -SQL-92 Compliant GROUP BY Aggregations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +==== SQL-92 Compliant GROUP BY Aggregations The query language provides full support for SQL-92 `GROUP BY` aggregation queries. The following query is such an example: @@ -1672,8 +1644,7 @@ GROUP AS `$1`(msg AS msg); ------------------------------------------------------------------------- [[column-aliases]] -Column Aliases -^^^^^^^^^^^^^^ +==== Column Aliases The query language also allows column aliases to be used as `ORDER BY` keys. @@ -1701,8 +1672,7 @@ This query returns: ------------ [[where-clauses-and-having-clauses]] -WHERE Clauses and HAVING Clauses -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +=== WHERE Clauses and HAVING Clauses Both `WHERE` clauses and `HAVING` clauses are used to filter input data based on a condition expression. Only tuples for which the condition @@ -1711,8 +1681,7 @@ condition expression evaluates to `NULL` or `MISSING` the input tuple will be discarded. [[order-by-clauses]] -ORDER BY Clauses -~~~~~~~~~~~~~~~~ +=== ORDER BY Clauses The `ORDER BY` clause is used to globally sort data in either ascending order (i.e., `ASC`) or descending order (i.e., `DESC`). During ordering, @@ -1798,8 +1767,7 @@ This query returns: ----------------------------------------------- [[limit-clauses]] -LIMIT Clauses -~~~~~~~~~~~~~ +=== LIMIT Clauses The `LIMIT` clause is used to limit the result set to a specified constant size. The use of the `LIMIT` clause is illustrated in the next @@ -1846,8 +1814,7 @@ This query returns: ----------------------------------------------- [[with-clauses]] -WITH Clauses -~~~~~~~~~~~~ +=== WITH Clauses As in standard SQL, `WITH` clauses are available to improve the modularity of a query. The next query shows an example. @@ -1947,8 +1914,7 @@ element in the singleton array and obtain the desired scalar for the comparison. [[let-clauses]] -LET Clauses -~~~~~~~~~~~ +=== LET Clauses Similar to `WITH` clauses, `LET` clauses can be useful when a (complex) expression is used several times within a query, allowing it to be @@ -2068,8 +2034,7 @@ WHERE EXISTS ( SELECT VALUE m -------------------------------------------------- [[union-all]] -UNION ALL -~~~~~~~~~ +=== UNION ALL UNION ALL can be used to combine two input arrays or multisets into one. As in SQL, there is no ordering guarantee on the contents of the output @@ -2104,8 +2069,7 @@ This query returns: ------------------------------------------------ [[over-clauses]] -OVER Clauses -~~~~~~~~~~~~ +=== OVER Clauses All window functions must have an OVER clause to define the window partitions, the order of tuples within those partitions, and the extent @@ -2117,8 +2081,7 @@ functions can also be used as window functions, when they are used with an OVER clause. [[window-function-call]] -Window Function Call -^^^^^^^^^^^^^^^^^^^^ +==== Window Function Call ------------------------------------------------------------------------- WindowFunctionCall ::= WindowFunctionType "(" WindowFunctionArguments ")" @@ -2126,8 +2089,7 @@ WindowFunctionCall ::= WindowFunctionType "(" WindowFunctionArguments ")" ------------------------------------------------------------------------- [[window-function-type]] -Window Function Type -++++++++++++++++++++ +===== Window Function Type --------------------------------------------------------- WindowFunctionType ::= AggregateFunction | WindowFunction @@ -2140,8 +2102,7 @@ Refer to the link:builtins.html#WindowFunctions[Window Functions] section for a list of window functions. [[window-function-arguments]] -Window Function Arguments -+++++++++++++++++++++++++ +===== Window Function Arguments -------------------------------------------------------- WindowFunctionArguments ::= ( ()? Expression | @@ -2153,8 +2114,7 @@ section or the link:builtins.html#WindowFunctions[Window Functions] section for details of the arguments for individual functions. [[window-function-options]] -Window Function Options -^^^^^^^^^^^^^^^^^^^^^^^ +==== Window Function Options --------------------------------------------------------- WindowFunctionOptions ::= (NthValFrom)? (NullsTreatment)? @@ -2168,8 +2128,7 @@ link:builtins.html#WindowFunctions[window functions], as described below. [[nth-val-from]] -Nth Val From -++++++++++++ +===== Nth Val From ------------------------------------------ NthValFrom ::= ( | ) @@ -2184,8 +2143,7 @@ This modifier is optional. If omitted, the default setting is `FROM FIRST`. [[nulls-treatment]] -Nulls Treatment -+++++++++++++++ +===== Nulls Treatment --------------------------------------------------- NullsTreatment ::= ( | ) @@ -2202,8 +2160,7 @@ This modifier is optional. If omitted, the default setting is `RESPECT NULLS`. [[window-frame-variable]] -Window Frame Variable -^^^^^^^^^^^^^^^^^^^^^ +==== Window Frame Variable The AS keyword enables you to specify an alias for the window frame contents. It introduces a variable which will be bound to the contents @@ -2223,8 +2180,7 @@ link:builtins.html#WindowFunctions[window function], or when using a standard SQL aggregate function with the OVER clause. [[standard-sql-aggregate-functions-with-the-over-clause]] -Standard SQL Aggregate Functions with the OVER Clause -+++++++++++++++++++++++++++++++++++++++++++++++++++++ +===== Standard SQL Aggregate Functions with the OVER Clause A standard SQL aggregate function with an OVER clause is rewritten by the query compiler using a built-in aggregate function over a frame @@ -2248,8 +2204,7 @@ rewritten as built-in aggregate functions in the presence of the GROUP BY clause. [[window-definition]] -Window Definition -^^^^^^^^^^^^^^^^^ +==== Window Definition ---------------------------------------------------------------- WindowDefinition ::= (WindowPartitionClause)? (WindowOrderClause @@ -2260,8 +2215,7 @@ The *window definition* specifies the partitioning, ordering, and framing for window functions. [[window-partition-clause]] -Window Partition Clause -+++++++++++++++++++++++ +===== Window Partition Clause ----------------------------------------------------------------------- WindowPartitionClause ::= Expression ("," Expression)* @@ -2279,8 +2233,7 @@ This clause is optional. If omitted, all tuples are united in a single partition. [[window-order-clause]] -Window Order Clause -+++++++++++++++++++ +===== Window Order Clause ------------------------------------------------------------------- WindowOrderClause ::= OrderingTerm ("," OrderingTerm)* @@ -2319,8 +2272,7 @@ To guarantee the order of the final results, use the query ORDER BY clause. [[ordering-term]] -Ordering Term -+++++++++++++ +===== Ordering Term ----------------------------------------------- OrderingTerm ::= Expression ( | )? @@ -2333,8 +2285,7 @@ queries. Refer to the link:#Order_By_clauses[ORDER BY Clauses] section for details. [[window-frame-clause]] -Window Frame Clause -+++++++++++++++++++ +===== Window Frame Clause ----------------------------------------------------------------------- WindowFrameClause ::= ( | | ) WindowFrameExtent @@ -2401,8 +2352,7 @@ on date or time, the expression in `Expression PRECEDING` or ordering expression. [[window-frame-extent]] -Window Frame Extent -+++++++++++++++++++ +===== Window Frame Extent ------------------------------------------------------------------------------------------ WindowFrameExtent ::= ( ( | Expression ) | ) | @@ -2454,8 +2404,7 @@ evaluates as a positive number. For `ROWS` or `GROUPS`, the `Expression` must be an integer. [[window-frame-exclusion]] -Window Frame Exclusion -++++++++++++++++++++++ +===== Window Frame Exclusion ------------------------------------------------------------------------- WindowFrameExclusion ::= ( | | | @@ -2489,8 +2438,7 @@ If the current tuple is already removed from the window frame, then it remains removed from the window frame. [[subqueries]] -Subqueries -~~~~~~~~~~ +=== Subqueries In the query language, an arbitrary subquery can appear anywhere that an expression can appear. Unlike SQL-92, as was just alluded to, the @@ -2544,8 +2492,7 @@ a collection -- regardless of where within a query the subquery occurs -- and again, its result is never automatically cast into a scalar. [[differences-from-sql-92]] -Differences from SQL-92 -~~~~~~~~~~~~~~~~~~~~~~~ +=== Differences from SQL-92 The query language offers the following additional features beyond SQL-92: @@ -2613,3 +2560,4 @@ query language does not conflict with these features: * MATCH predicate (tests for referential integrity) * Row and Table constructors * Preserved order for expressions in a SELECT list + diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query_title.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query_title.adoc index f41cdac425e..bc3d1551736 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query_title.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query_title.adoc @@ -1,6 +1,5 @@ [[queries]] -3. Queries ----------- +== 3. Queries A query can be any legal expression or `SELECT` statement. A query always ends with a semicolon. @@ -8,3 +7,4 @@ always ends with a semicolon. -------------------------------------------- Query ::= (Expression | SelectStatement) ";" -------------------------------------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/4_error.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/4_error.adoc index 1a8451af89e..103175a69e4 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/4_error.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/4_error.adoc @@ -10,8 +10,7 @@ ongoing processing of the query and immediately return an error message to the client. [[syntax-errors]] -Syntax Errors -~~~~~~~~~~~~~ +=== Syntax Errors A valid query must satisfy the grammar rules of the query language. Otherwise, a syntax error will be raised. @@ -49,8 +48,7 @@ Error: Syntax error: In line 3 >>WHERE type="advertiser";<< Encountered 'type' " -------------------------------------------------------------------------------------------------- [[identifier-resolution-errors]] -Identifier Resolution Errors -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +=== Identifier Resolution Errors Referring to an undefined identifier can cause an error if the identifier cannot be successfully resolved as a valid field access. @@ -89,8 +87,7 @@ Error: Cannot resolve ambiguous alias reference for undefined identifier name ----------------------------------------------------------------------------- [[type-errors]] -Type Errors -~~~~~~~~~~~ +=== Type Errors The query compiler does type checks based on its available type information. In addition, the query runtime also reports type errors if @@ -112,8 +109,7 @@ Error: Type mismatch: function abs expects its 1st input parameter to be of type ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [[resource-errors]] -Resource Errors -~~~~~~~~~~~~~~~ +=== Resource Errors A query can potentially exhaust system resources, such as the number of open files and disk spaces. For instance, the following two resource diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/4_error_title.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/4_error_title.adoc index 546deabebbe..6a44d815138 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/4_error_title.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/4_error_title.adoc @@ -1,3 +1,2 @@ [[errors]] -4. Errors ---------- +== 4. Errors diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_dataset_index.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_dataset_index.adoc index 503a18e2473..77ba3496f2f 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_dataset_index.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_dataset_index.adoc @@ -1,6 +1,5 @@ [[lifecycle-management-statements]] -Lifecycle Management Statements -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +=== Lifecycle Management Statements --------------------------------------------------------------------- CreateStatement ::= "CREATE" ( DatabaseSpecification @@ -20,8 +19,7 @@ dataverses, datatypes, datasets, indexes, and user-defined query functions. [[dataverses]] - Dataverses -^^^^^^^^^^^ +==== Dataverses ------------------------------------------------------------ DatabaseSpecification ::= "DATAVERSE" Identifier IfNotExists @@ -45,8 +43,7 @@ CREATE DATAVERSE TinySocial IF NOT EXISTS; ------------------------------------------ [[types]] - Types -^^^^^^ +==== Types --------------------------------------------------------------------------------------------- TypeSpecification ::= "TYPE" FunctionOrTypeName IfNotExists "AS" ObjectTypeDef @@ -116,8 +113,7 @@ CREATE TYPE MyUserTupleType AS CLOSED { --------------------------------------- [[datasets]] - Datasets -^^^^^^^^^ +==== Datasets ------------------------------------------------------------------------------------------------------------------- DatasetSpecification ::= ( )? QualifiedName "(" QualifiedName ")" IfNotExists @@ -206,8 +202,7 @@ FacefookUserType objects. It specifies that their id field is their primary key. [[example-3]] -Example -+++++++ +===== Example ------------------------------------------------------------------------- CREATE INTERNAL DATASET GleambookUsers(GleambookUserType) PRIMARY KEY id; @@ -223,8 +218,7 @@ provide a value for this field.) Note that the id field's declared type must be UUID in this case. [[example-4]] -Example -+++++++ +===== Example --------------------------------------------------------------------- CREATE DATASET MyUsers(MyUserTupleType) PRIMARY KEY id AUTOGENERATED; @@ -237,8 +231,7 @@ parameters used by the hdfs adapter: the URL and path needed to locate the data in HDFS and a description of the data format. [[example-5]] -Example -+++++++ +===== Example ----------------------------------------------------------- CREATE EXTERNAL DATASET LineItem(LineItemType) USING hdfs ( @@ -250,8 +243,7 @@ CREATE EXTERNAL DATASET LineItem(LineItemType) USING hdfs ( ----------------------------------------------------------- [[indices]] -Indices -^^^^^^^ +==== Indices ----------------------------------------------------------------------------------------------------- IndexSpecification ::= ( Identifier IfNotExists QualifiedName @@ -282,8 +274,7 @@ useful for accelerating exact-match queries, range search queries, and joins involving the author-id field. [[example-6]] -Example -+++++++ +===== Example ------------------------------------------------------------------- CREATE INDEX gbAuthorIdx ON GleambookMessages(authorId) TYPE BTREE; @@ -298,8 +289,7 @@ the `sendTime` field or have a mismatched type on the field cannot be inserted into the dataset. [[example-7]] -Example -+++++++ +===== Example ----------------------------------------------------------------------------------------- CREATE INDEX gbSendTimeIdx ON GleambookMessages(sendTime: datetime?) TYPE BTREE ENFORCED; @@ -314,8 +304,7 @@ have the `readTime` field or have a mismatched type on the field can still be inserted into the dataset. [[example-8]] -Example -+++++++ +===== Example --------------------------------------------------------------------- CREATE INDEX gbReadTimeIdx ON GleambookMessages(readTime: datetime?); @@ -329,8 +318,7 @@ nested screenName field. Such nested fields must be singular, i.e., one cannot index through (or on) an array-valued field. [[example-9]] -Example -+++++++ +===== Example ---------------------------------------------------------------------------- CREATE INDEX crpUserScrNameIdx ON ChirpMessages(user.screenName) TYPE BTREE; @@ -343,8 +331,7 @@ link:functions.html#spatial-intersect[`spatial-intersect` function] in a predicate involving the sender-location field. [[example-10]] -Example -+++++++ +===== Example --------------------------------------------------------------------------------- CREATE INDEX gbSenderLocIndex ON GleambookMessages("sender-location") TYPE RTREE; @@ -357,8 +344,7 @@ field. For details refer to the document on link:similarity.html#NGram_Index[similarity queries]. [[example-11]] -Example -+++++++ +===== Example ------------------------------------------------------------- CREATE INDEX fbUserIdx ON GleambookUsers(name) TYPE NGRAM(3); @@ -371,8 +357,7 @@ the message field. For details refer to the document on link:similarity.html#Keyword_Index[similarity queries]. [[example-12]] -Example -+++++++ +===== Example --------------------------------------------------------------------- CREATE INDEX fbMessageIdx ON GleambookMessages(message) TYPE KEYWORD; @@ -386,8 +371,7 @@ When the user would like to drop this index, the metadata can be queried to find the system-generated name. [[example-13]] -Example -+++++++ +===== Example ---------------------------------------------------- CREATE PRIMARY INDEX gb_pk_idx ON GleambookMessages; diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_dml.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_dml.adoc index f8e1d0c0d60..6a096f5f340 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_dml.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_dml.adoc @@ -1,10 +1,8 @@ [[modification-statements]] -Modification statements -~~~~~~~~~~~~~~~~~~~~~~~ +=== Modification statements [[inserts]] -INSERTs -^^^^^^^ +==== INSERTs ------------------------------------------------------- InsertStatement ::= QualifiedName Query @@ -44,8 +42,7 @@ INSERT INTO UsersCopy (SELECT VALUE user FROM GleambookUsers user) ------------------------------------------------------------------ [[upserts]] -UPSERTs -^^^^^^^ +==== UPSERTs ------------------------------------------------------- UpsertStatement ::= QualifiedName Query @@ -79,8 +76,7 @@ UPSERT INTO UsersCopy (SELECT VALUE user FROM GleambookUsers user) (at the moment) in the current query language. [[deletes]] -DELETEs -^^^^^^^ +==== DELETEs ------------------------------------------------------------------------------------------------- DeleteStatement ::= QualifiedName ( ( )? Variable )? ( Expression )? @@ -116,3 +112,4 @@ Example ---------------------------------------- DELETE FROM GleambookUsers WHERE id = 5; ---------------------------------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_function_removal.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_function_removal.adoc index d0fe14e88b9..d02c5156c6b 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_function_removal.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_function_removal.adoc @@ -1,6 +1,5 @@ [[functions]] - Functions -^^^^^^^^^^ +==== Functions The CREATE FUNCTION statement creates a *named* function that can then be used and reused in queries. The body of a function can be any query @@ -28,8 +27,7 @@ CREATE FUNCTION friendInfo(userId) { --------------------------------------------------------- [[synonyms]] - Synonyms -^^^^^^^^^ +==== Synonyms -------------------------------------------------------------------------------- SynonymSpecification ::= "SYNONYM" QualifiedName "FOR" QualifiedName IfNotExists @@ -55,8 +53,7 @@ More information on how synonyms are resolved can be found in the appendix section on Variable Resolution. [[removal]] - Removal -^^^^^^^^ +==== Removal ------------------------------------------------------------------------ DropStatement ::= "DROP" ( "DATAVERSE" Identifier IfExists @@ -103,8 +100,7 @@ explicitly include that information. (`friendInfo@1` above denotes the 1-argument function named friendInfo in the current dataverse.) [[load-statement]] -Load Statement -^^^^^^^^^^^^^^ +==== Load Statement ----------------------------------------------------------------------------------------------------- LoadStatement ::= QualifiedName AdapterName Configuration ( )? @@ -133,3 +129,4 @@ Example LOAD DATASET GleambookUsers USING localfs (("path"="127.0.0.1:///Users/bignosqlfan/tinysocialnew/gbu.adm"),("format"="adm")); --------------------------------------------------------------------------------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_head.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_head.adoc index 571bf160d42..c7853d51eef 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_head.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/5_ddl_head.adoc @@ -1,6 +1,5 @@ [[ddl-and-dml-statements]] -5. DDL and DML statements -------------------------- +== 5. DDL and DML statements ------------------------------------------------------ Statement ::= ( ( SingleStatement )? ( ";" )+ )* diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_1_title.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_1_title.adoc index 8d7e96eae8c..6b4e0d4dedb 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_1_title.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_1_title.adoc @@ -1,3 +1,2 @@ [[appendix-1.-reserved-keywords]] -Appendix 1. Reserved keywords ------------------------------ +== Appendix 1. Reserved keywords diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_index_only.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_index_only.adoc index aeaa70e3e52..bceb6e7ed96 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_index_only.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_index_only.adoc @@ -1,6 +1,5 @@ [[controlling-index-only-plan-parameter]] -Controlling Index-Only-Plan Parameter -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +=== Controlling Index-Only-Plan Parameter By default, the system tries to build an index-only plan whenever utilizing a secondary index is possible. For example, if a SELECT or @@ -27,3 +26,4 @@ SET noindexonly 'true'; SELECT m.message AS message FROM GleambookMessages m where m.message = " love product-b its shortcut-menu is awesome:)"; -------------------------------------------------------------------------------------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_parallel_sort.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_parallel_sort.adoc index 77d517b1479..f90fb49de58 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_parallel_sort.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_parallel_sort.adoc @@ -1,6 +1,5 @@ [[parallel-sort-parameter]] -Parallel Sort Parameter -~~~~~~~~~~~~~~~~~~~~~~~ +=== Parallel Sort Parameter The following parameter enables you to activate or deactivate full parallel sort for order-by operations. @@ -29,3 +28,4 @@ SELECT VALUE user FROM GleambookUsers AS user ORDER BY ARRAY_LENGTH(user.friendIds) DESC; ------------------------------------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_parameters.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_parameters.adoc index b9d49b20e1f..5dd11fdbee7 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_parameters.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_parameters.adoc @@ -11,8 +11,7 @@ parameters will not affect query correctness but only impact performance characteristics, such as response time and throughput. [[parallelism-parameter]] -Parallelism Parameter -~~~~~~~~~~~~~~~~~~~~~ +=== Parallelism Parameter The system can execute each request using multiple cores on multiple machines (a.k.a., partitioned parallelism) in a cluster. A user can @@ -41,8 +40,7 @@ FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id; -------------------------------------------------------------------- [[memory-parameters]] -Memory Parameters -~~~~~~~~~~~~~~~~~ +=== Memory Parameters In the system, each blocking runtime operator such as join, group-by and order-by works within a fixed memory budget, and can gracefully spill to @@ -96,3 +94,4 @@ SET `compiler.joinmemory` "132000KB"; SELECT u.name AS uname, m.message AS message FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id; -------------------------------------------------------------------- + diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_title.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_title.adoc index 60d11568903..b1f1ceb8555 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_title.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_2_title.adoc @@ -1,3 +1,2 @@ [[appendix-2.-performance-tuning]] -Appendix 2. Performance Tuning -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +== Appendix 2. Performance Tuning diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_3_resolution.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_3_resolution.adoc index 8bd03e8f474..b7fb982a313 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_3_resolution.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_3_resolution.adoc @@ -32,8 +32,7 @@ because it’s possible that some object (someday) could have such a field. [[binding-variables]] -Binding Variables -~~~~~~~~~~~~~~~~~ +=== Binding Variables Variables can be bound in the following ways: @@ -163,8 +162,7 @@ bound to a multiset with the following structure: ------------------------------------------------------------ [[scoping]] -Scoping -~~~~~~~ +=== Scoping In general, the variables that are in scope at a particular position are those variables that were bound earlier in the current query block, in @@ -249,8 +247,7 @@ other subclauses in the same `FROM` clause. This also applies to the `FROM` variable that starts the `JOIN` subclause. [[resolving-names]] -Resolving Names -~~~~~~~~~~~~~~~ +=== Resolving Names The process of name resolution begins with the leftmost identifier in the name. The rules for resolving the leftmost identifier are: @@ -268,11 +265,12 @@ clause, and items represents the items object inside that order. The rules for resolving the leftmost identifier in a FROM clause (including a JOIN subclause), or in the expression following IN in a quantified predicate, are as follows: -1. If the identifier matches a variable-name that is in scope, it + +i. If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable. (Note that in the case of a subquery, an in-scope variable might have been bound in an outer query block; this is called a correlated subquery.) -2. Otherwise, if the identifier is the first part of a two-part name +ii. Otherwise, if the identifier is the first part of a two-part name like `a.b`, the name is treated as `dataverse.dataset`. If the identifier stands alone as a one-part name, it is treated as the name of a dataset in the default dataverse. If the designated dataset exists @@ -284,6 +282,7 @@ synonym with this name does not exist. + Datasets take precedence over synonyms, so if both a dataset and a synonym have the same name then the resolution is to the dataset. + 2. _Elsewhere in a query block_: In clauses other than FROM, a name typically identifies a field of some object. For example, if the expression `a.b` is in a SELECT or WHERE clause, it's likely that `a` @@ -291,11 +290,12 @@ represents an object and `b` represents a field in that object. + The rules for resolving the leftmost identifier in clauses other than the ones listed in Rule 1 are: -1. If the identifier matches a variable-name that is in scope, it + +i. If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable. (In the case of a correlated subquery, the in-scope variable might have been bound in an outer query block.) -2. (The "Single Variable Rule"): Otherwise, if the FROM clause in the +ii. (The "Single Variable Rule"): Otherwise, if the FROM clause in the current query block binds exactly one variable, the identifier is treated as a field access on the object bound to that variable. For example, in the query `FROM customer SELECT address`, the identifier @@ -316,6 +316,7 @@ Exception: In a query that has a GROUP BY clause, the Single Variable Rule does not apply in any clauses that occur after the GROUP BY because, in these clauses, the variables bound by the FROM clause are no longer in scope. In clauses after GROUP BY, only Rule 2.1 applies. + 3. In an ORDER BY clause following a UNION ALL expression: + The leftmost identifier is treated as a field-access on the objects that @@ -332,6 +333,7 @@ In the result of this query, objects that have a foo field will be ordered by the value of this field; objects that have no foo field will appear at at the beginning of the query result (in ascending order) or at the end (in descending order.) + 4. _In a standalone expression_: If a query consists of a standalone expression then identifiers inside that expression are resolved according to Rule 1. For example, if the whole query is @@ -342,6 +344,7 @@ SELECT statements in a standalone expression are still resolved according to Rules 1-3. For example, if the whole query is `ARRAY_SUM( (FROM employee AS e SELECT VALUE salary) )` then `salary` is resolved as `e.salary` following the "Single Variable Rule" (Rule 2.2). + 5. Once the leftmost identifier has been resolved, the following dots and identifiers in the name (if any) are treated as a path expression that navigates to a field nested inside that object. The name resolves diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_3_title.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_3_title.adoc index 6ca6356c432..8ed20df51c3 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_3_title.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_3_title.adoc @@ -1,3 +1,2 @@ [[appendix-3.-variable-bindings-and-name-resolution]] -Appendix 3. Variable Bindings and Name Resolution -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +== Appendix 3. Variable Bindings and Name Resolution From 5573eaa1cebf96a8ed04d9972ef130078f9ccde4 Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Sat, 25 Jan 2020 13:01:42 +0000 Subject: [PATCH 14/22] Setting toc levels Change-Id: Ic39c81c5fe5a8b883e627e3d0d0941673872ae7e --- asterixdb/asterix-doc/src/site/asciidoc/aql/builtins.adoc | 1 + asterixdb/asterix-doc/src/site/asciidoc/sqlpp/builtins.adoc | 1 + asterixdb/asterix-doc/src/site/asciidoc/sqlpp/manual.adoc | 1 + 3 files changed, 3 insertions(+) diff --git a/asterixdb/asterix-doc/src/site/asciidoc/aql/builtins.adoc b/asterixdb/asterix-doc/src/site/asciidoc/aql/builtins.adoc index 770645d35a2..e51d2b37141 100644 --- a/asterixdb/asterix-doc/src/site/asciidoc/aql/builtins.adoc +++ b/asterixdb/asterix-doc/src/site/asciidoc/aql/builtins.adoc @@ -1,6 +1,7 @@ = Builtin Functions :includedir: ../../shared/modules/builtins/partials :toc: +:toclevels: 1 include::{includedir}/0_toc_common.adoc[] include::{includedir}/1_numeric_common.adoc[] diff --git a/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/builtins.adoc b/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/builtins.adoc index ca42d4be836..5a3c2c547f1 100644 --- a/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/builtins.adoc +++ b/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/builtins.adoc @@ -1,6 +1,7 @@ = Builtin Functions :includedir: ../../shared/modules/builtins/partials :toc: +:toclevels: 1 include::{includedir}/0_toc_common.adoc[] include::{includedir}/1_numeric_common.adoc[] diff --git a/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/manual.adoc b/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/manual.adoc index 75300f8f0d0..0c2011d8af0 100644 --- a/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/manual.adoc +++ b/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/manual.adoc @@ -1,6 +1,7 @@ = The Query Language :includedir: ../../shared/modules/sqlpp/partials :toc: +:toclevels: 1 include::{includedir}/1_intro.adoc[] include::{includedir}/2_expr_title.adoc[] From d0496ef6d9af16415e855b6a07785bf1fa2bd6ef Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Mon, 27 Jan 2020 19:46:55 +0000 Subject: [PATCH 15/22] Create datamodel Asciidoc partials Change-Id: Ic6f81ecaa6bd959649235268f1799b3ee3361bde --- .../src/main/datamodel/datamodel_composite.md | 57 --- .../main/datamodel/datamodel_incomplete.md | 54 --- .../datamodel/datamodel_primitive_common.md | 49 --- .../datamodel/datamodel_primitive_delta.md | 269 ------------- .../datamodel/datamodel_composite.adoc | 84 ++++ .../datamodel/datamodel_incomplete.adoc | 56 +++ .../datamodel/datamodel_primitive_common.adoc | 48 +++ .../datamodel/datamodel_primitive_delta.adoc | 372 ++++++++++++++++++ 8 files changed, 560 insertions(+), 429 deletions(-) delete mode 100644 asterixdb/asterix-doc/src/main/datamodel/datamodel_composite.md delete mode 100644 asterixdb/asterix-doc/src/main/datamodel/datamodel_incomplete.md delete mode 100644 asterixdb/asterix-doc/src/main/datamodel/datamodel_primitive_common.md delete mode 100644 asterixdb/asterix-doc/src/main/datamodel/datamodel_primitive_delta.md create mode 100644 asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_composite.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_incomplete.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_primitive_common.adoc create mode 100644 asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_primitive_delta.adoc diff --git a/asterixdb/asterix-doc/src/main/datamodel/datamodel_composite.md b/asterixdb/asterix-doc/src/main/datamodel/datamodel_composite.md deleted file mode 100644 index 92b037405d0..00000000000 --- a/asterixdb/asterix-doc/src/main/datamodel/datamodel_composite.md +++ /dev/null @@ -1,57 +0,0 @@ - - - -## Derived Types ## - -### Object ### -An `object` contains a set of fields, where each field is described by its name and type. An object type may be defined as either open or closed. Open objects (instances of open object types) are permitted to contain fields that are not part of the type definition, while closed objects do not permit their instances to carry extra fields. An example type definition for an object is: - - create type SoldierType as open { - name: string?, - rank: string, - serialno: int - }; - -Syntactically, object constructors are surrounded by curly braces "{...}". -Some examples of legitimate instances of the above type include: - - { "name": "Joe Blow", "rank": "Sergeant", "serialno": 1234567 } - { "rank": "Private", "serialno": 9876543 } - { "name": "Sally Forth", "rank": "Major", "serialno": 2345678, "gender": "F" } - -The first instance has all of the type's prescribed content. The second instance is missing the name field, which is fine because it is optional (due to the ?). The third instance has an extra field; that is fine because the type definition specifies that it is open (which is also true by default, if open is not specified). To more tightly control object content, specifying closed instead of open in the type definition for SoldierType would have made the third example instance an invalid instance of the type. - -### Array ### -An `array` is a container that holds a fixed number of values. Array constructors are denoted by brackets: "[...]". - -An example would be - - - ["alice", 123, "bob", null] - - -### Multiset ### -A `multiset` is a generalization of the concept of a set that, unlike a set, allows multiple instances of the multiset's elements. - Multiset constructors are denoted by two opening curly braces followed by data and two closing curly braces, like "{{...}}". - -An example would be - - - {{"hello", 9328, "world", [1, 2, null]}} diff --git a/asterixdb/asterix-doc/src/main/datamodel/datamodel_incomplete.md b/asterixdb/asterix-doc/src/main/datamodel/datamodel_incomplete.md deleted file mode 100644 index c65ed85d4dc..00000000000 --- a/asterixdb/asterix-doc/src/main/datamodel/datamodel_incomplete.md +++ /dev/null @@ -1,54 +0,0 @@ - - -## Incomplete Information Types ## - -### Null ### -`null` is a special value that is often used to represent an unknown value. -For example, a user might not be able to know the value of a field and let it be `null`. - - * Example: - - { "field": null }; - - - * The expected result is: - - { "field": null } - - -### Missing ### -`missing` indicates that a name-value pair is missing from an object. -If a missing name-value pair is accessed, an empty result value is returned by the query. - -As neither the data model nor the system enforces homogeneity for datasets or collections, -items in a dataset or collection can be of heterogeneous types and -so a field can be present in one object and `missing` in another. - - * Example: - - { "field": missing }; - - - * The expected result is: - - { } - -Since a field with value `missing` means the field is absent, we get an empty object. - diff --git a/asterixdb/asterix-doc/src/main/datamodel/datamodel_primitive_common.md b/asterixdb/asterix-doc/src/main/datamodel/datamodel_primitive_common.md deleted file mode 100644 index 4c0b2e04ff1..00000000000 --- a/asterixdb/asterix-doc/src/main/datamodel/datamodel_primitive_common.md +++ /dev/null @@ -1,49 +0,0 @@ - - -## Primitive Types ## - -### Boolean ### -`boolean` data type can have one of the two values: _*true*_ or _*false*_. - - * Example: - - { "true": true, "false": false }; - - - * The expected result is: - - { "true": true, "false": false } - - -### String ### -`string` represents a sequence of characters. The total length of the sequence can be up to 2,147,483,648. - - * Example: - - { "v1": string("This is a string."), "v2": string("\"This is a quoted string\"") }; - - - * The expected result is: - - { "v1": "This is a string.", "v2": "\"This is a quoted string\"" } - - - - diff --git a/asterixdb/asterix-doc/src/main/datamodel/datamodel_primitive_delta.md b/asterixdb/asterix-doc/src/main/datamodel/datamodel_primitive_delta.md deleted file mode 100644 index dc353817158..00000000000 --- a/asterixdb/asterix-doc/src/main/datamodel/datamodel_primitive_delta.md +++ /dev/null @@ -1,269 +0,0 @@ - - -### Tinyint / Smallint / Integer (Int) / Bigint ### -Integer types using 8, 16, 32, or 64 bits. The ranges of these types are: - -- `tinyint`: -128 to 127 -- `smallint`: -32768 to 32767 -- `integer`: -2147483648 to 2147483647 -- `bigint`: -9223372036854775808 to 9223372036854775807 - -`int` is an abbreviated alias for integer. - - * Example: - - { "tinyint": tiny("125"), "smallint": smallint("32765"), "integer": 294967295, "bigint": bigint("1700000000000000000")}; - - - * The expected result is: - - { "tinyint": 125, "smallint": 32765, "integer": 294967295, "bigint": 1700000000000000000 } - -### Float ### -`float` represents approximate numeric data values using 4 bytes. The range of a float value can be -from 2^(-149) to (2-2^(-23)·2^(127) for both positive and negative. Beyond these ranges will get `INF` or `-INF`. - - * Example: - - { "v1": float("NaN"), "v2": float("INF"), "v3": float("-INF"), "v4": float("-2013.5") }; - - - * The expected result is: - - { "v1": "NaN", "v2": "INF", "v3": "-INF", "v4": -2013.5 } - - -### Double (double precision) ### -`double` represents approximate numeric data values using 8 bytes. The range of a double value can be from (2^(-1022)) to (2-2^(-52))·2^(1023) -for both positive and negative. Beyond these ranges will get `INF` or `-INF`. - - * Example: - - { "v1": double("NaN"), "v2": double("INF"), "v3": double("-INF"), "v4": "-2013.593823748327284" }; - - - * The expected result is: - - { "v1": "NaN", "v2": "INF", "v3": "-INF", "v4": -2013.5938237483274 } - -`Double precision` is an alias of `double`. - -### Binary ### -`binary` represents a sequence of bytes. It can be constructed from a `hex` or a `base64` string sequence. -The total length of the byte sequence can be up to 2,147,483,648. - - * Example: - - { - "hex1" : hex("ABCDEF0123456789"), - "hex2": hex("abcdef0123456789"), - "base64_1" : base64("0123456789qwertyui+/"), - "base64_2" : base64('QXN0ZXJpeA==') - }; - - * The default output format is in `hex` format. Thus, the expected result is: - - { - "hex1": hex("ABCDEF0123456789"), - "hex2": hex("ABCDEF0123456789"), - "base64_1": hex("D35DB7E39EBBF3DAB07ABB72BA2FBF"), - "base64_2": hex("41737465726978") - } - - -### Point ### -`point` is the fundamental two-dimensional building block for spatial types. It consists of two `double` coordinates x and y. - - * Example: - - { "v1": point("80.10d, -10E5"), "v2": point("5.10E-10d, -10E5") }; - - - * The expected result is: - - { "v1": point("80.1,-1000000.0"), "v2": point("5.1E-10,-1000000.0") } - - -### Line ### -`line` consists of two points that represent the start and the end points of a line segment. - - * Example: - - { "v1": line("10.1234,11.1e-1 +10.2E-2,-11.22"), "v2": line("0.1234,-1.00e-10 +10.5E-2,-01.02") }; - - - * The expected result is: - - { "v1": line("10.1234,1.11 0.102,-11.22"), "v2": line("0.1234,-1.0E-10 0.105,-1.02") } - - -### Rectangle ### -`rectangle` consists of two points that represent the _*bottom left*_ and _*upper right*_ corners of a rectangle. - - * Example: - - { "v1": rectangle("5.1,11.8 87.6,15.6548"), "v2": rectangle("0.1234,-1.00e-10 5.5487,0.48765") }; - - - * The expected result is: - - { "v1": rectangle("5.1,11.8 87.6,15.6548"), "v2": rectangle("0.1234,-1.0E-10 5.5487,0.48765") } - - -### Circle ### -`circle` consists of one point that represents the center of the circle and a radius of type `double`. - - * Example: - - { "v1": circle("10.1234,11.1e-1 +10.2E-2"), "v2": circle("0.1234,-1.00e-10 +10.5E-2") }; - - - * The expected result is: - - { "v1": circle("10.1234,1.11 0.102"), "v2": circle("0.1234,-1.0E-10 0.105") } - - -### Polygon ### -`polygon` consists of _*n*_ points that represent the vertices of a _*simple closed*_ polygon. - - * Example: - - { - "v1": polygon("-1.2,+1.3e2 -2.14E+5,2.15 -3.5e+2,03.6 -4.6E-3,+4.81"), - "v2": polygon("-1.0,+10.5e2 -02.15E+50,2.5 -1.0,+3.3e3 -2.50E+05,20.15 +3.5e+2,03.6 -4.60E-3,+4.75 -2,+1.0e2 -2.00E+5,20.10 30.5,03.25 -4.33E-3,+4.75") - }; - - - * The expected result is: - - { - "v1": polygon("-1.2,130.0 -214000.0,2.15 -350.0,3.6 -0.0046,4.81"), - "v2": polygon("-1.0,1050.0 -2.15E50,2.5 -1.0,3300.0 -250000.0,20.15 350.0,3.6 -0.0046,4.75 -2.0,100.0 -200000.0,20.1 30.5,3.25 -0.00433,4.75") } - } - - -### Date ### -`date` represents a time point along the Gregorian calendar system specified by the year, month and day. ASTERIX supports the date from `-9999-01-01` to `9999-12-31`. - -A date value can be represented in two formats, extended format and basic format. - - * Extended format is represented as `[-]yyyy-mm-dd` for `year-month-day`. Each field should be padded if there are less digits than the format specified. - * Basic format is in the format of `[-]yyyymmdd`. - - * Example: - - { "v1": date("2013-01-01"), "v2": date("-19700101") }; - - - * The expected result is: - - { "v1": date("2013-01-01"), "v2": date("-1970-01-01") } - - -### Time ### -`time` type describes the time within the range of a day. It is represented by three fields: hour, minute and second. Millisecond field is optional as the fraction of the second field. Its extended format is as `hh:mm:ss[.mmm]` and the basic format is `hhmmss[mmm]`. The value domain is from `00:00:00.000` to `23:59:59.999`. - -Timezone field is optional for a time value. Timezone is represented as `[+|-]hh:mm` for extended format or `[+|-]hhmm` for basic format. Note that the sign designators cannot be omitted. `Z` can also be used to represent the UTC local time. If no timezone information is given, it is UTC by default. - - * Example: - - { "v1": time("12:12:12.039Z"), "v2": time("000000000-0800") }; - - - * The expected result is: - - { "v1": time("12:12:12.039Z"), "v2": time("08:00:00.000Z") } - - -### Datetime (Timestamp) ### -A `datetime` value is a combination of an `date` and `time`, representing a fixed time point along the Gregorian calendar system. The value is among `-9999-01-01 00:00:00.000` and `9999-12-31 23:59:59.999`. - -A `datetime` value is represented as a combination of the representation of its `date` part and `time` part, separated by a separator `T`. Either extended or basic format can be used, and the two parts should be the same format. - -Millisecond field and timezone field are optional, as specified in the `time` type. - - * Example: - - { "v1": datetime("2013-01-01T12:12:12.039Z"), "v2": datetime("-19700101T000000000-0800") }; - - - * The expected result is: - - { "v1": datetime("2013-01-01T12:12:12.039Z"), "v2": datetime("-1970-01-01T08:00:00.000Z") } - -`timestamp` is an alias of `datetime`. - -### Duration/Year_month_duration/Day_time_duration ### -`duration` represents a duration of time. A duration value is specified by integers on at least one of the following fields: year, month, day, hour, minute, second, and millisecond. - -A duration value is in the format of `[-]PnYnMnDTnHnMn.mmmS`. The millisecond part (as the fraction of the second field) is optional, and when no millisecond field is used, the decimal point should also be absent. - -Negative durations are also supported for the arithmetic operations between time instance types (`date`, `time` and `datetime`), and is used to roll the time back for the given duration. For example `date("2012-01-01") + duration("-P3D")` will return `date("2011-12-29")`. - -There are also two sub-duration types, namely `year_month_duration` and `day_time_duration`. -`year_month_duration` represents only the years and months of a duration, -while `day_time_duration` represents only the day to millisecond fields. -Different from the `duration` type, both these two subtypes are totally ordered, so they can be used for comparison and -index construction. - -Note that a canonical representation of the duration is always returned, regardless whether the duration is in the canonical representation or not from the user's input. More information about canonical representation can be found from [XPath dayTimeDuration Canonical Representation](http://www.w3.org/TR/xpath-functions/#canonical-dayTimeDuration) and [yearMonthDuration Canonical Representation](http://www.w3.org/TR/xpath-functions/#canonical-yearMonthDuration). - - * Example: - - { "v1": duration("P100Y12MT12M"), "v2": duration("-PT20.943S") }; - - - * The expected result is: - - { "v1": duration("P101YT12M"), "v2": duration("-PT20.943S") } - - -### Interval ### -`interval` represents inclusive-exclusive ranges of time. It is defined by two time point values with the same temporal type(`date`, `time` or `datetime`). - - * Example: - - { - "v1": interval(date("2013-01-01"), date("20130505")), - "v2": interval(time("00:01:01"), time("213901049+0800")), - "v3": interval(datetime("2013-01-01T00:01:01"), datetime("20130505T213901049+0800")) - }; - - - * The expected result is: - - { - "v1": interval(date("2013-01-01"), date("2013-05-05")), - "v2": interval(time("00:01:01.000Z"), time("13:39:01.049Z")), - "v3": interval(datetime("2013-01-01T00:01:01.000Z"), datetime("2013-05-05T13:39:01.049Z")) - } - -### UUID ### -`uuid` represents a UUID value, which stands for Universally unique identifier. It is defined by a canonical format using hexadecimal text with inserted hyphen characters. (E.g.: 5a28ce1e-6a74-4201-9e8f-683256e5706f). This type is generally used to store auto-generated primary key values. - - * Example: - - return { "v1":uuid("5c848e5c-6b6a-498f-8452-8847a2957421") } - - - * The expected result is: - - { "v1": uuid("5c848e5c-6b6a-498f-8452-8847a2957421") } - diff --git a/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_composite.adoc b/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_composite.adoc new file mode 100644 index 00000000000..255f405177b --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_composite.adoc @@ -0,0 +1,84 @@ +//// +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +//// + +[[derived-types]] +== Derived Types + +[[object]] +=== Object + +An `object` contains a set of fields, where each field is described by its +name and type. An object type may be defined as either open or closed. +Open objects (instances of open object types) are permitted to contain +fields that are not part of the type definition, while closed objects do +not permit their instances to carry extra fields. An example type +definition for an object is: + +------------------------------------- + create type SoldierType as open { + name: string?, + rank: string, + serialno: int + }; +------------------------------------- + +Syntactically, object constructors are surrounded by curly braces +"\{...}". Some examples of legitimate instances of the above type +include: + +---------------------------------------------------------------------------------- + { "name": "Joe Blow", "rank": "Sergeant", "serialno": 1234567 } + { "rank": "Private", "serialno": 9876543 } + { "name": "Sally Forth", "rank": "Major", "serialno": 2345678, "gender": "F" } +---------------------------------------------------------------------------------- + +The first instance has all of the type's prescribed content. The second +instance is missing the name field, which is fine because it is optional +(due to the ?). The third instance has an extra field; that is fine +because the type definition specifies that it is open (which is also +true by default, if open is not specified). To more tightly control +object content, specifying closed instead of open in the type definition +for SoldierType would have made the third example instance an invalid +instance of the type. + +[[array]] +=== Array + +An `array` is a container that holds a fixed number of values. Array +constructors are denoted by brackets: "[...]". + +An example would be + +------------------------------- + ["alice", 123, "bob", null] +------------------------------- + +[[multiset]] +=== Multiset + +A `multiset` is a generalization of the concept of a set that, unlike a +set, allows multiple instances of the multiset's elements. Multiset +constructors are denoted by two opening curly braces followed by data +and two closing curly braces, like "\{\{...}}". + +An example would be + +-------------------------------------------- + {{"hello", 9328, "world", [1, 2, null]}} +-------------------------------------------- diff --git a/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_incomplete.adoc b/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_incomplete.adoc new file mode 100644 index 00000000000..1b9d4fb6a57 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_incomplete.adoc @@ -0,0 +1,56 @@ +//// +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +//// + +[#IncompleteInformationTypes] +== Incomplete Information Types + +[#IncompleteInformationTypesNull] +=== Null + +`null` is a special value that is often used to represent an unknown value. +For example, a user might not be able to know the value of a field and let it be `null`. + +* Example: + + { "field": null }; + +* The expected result is: + + { "field": null } + +[#IncompleteInformationTypesMissing] +=== Missing + +`missing` indicates that a name-value pair is missing from an object. +If a missing name-value pair is accessed, an empty result value is returned by the query. + +As neither the data model nor the system enforces homogeneity for datasets or collections, +items in a dataset or collection can be of heterogeneous types and +so a field can be present in one object and `missing` in another. + +* Example: + + { "field": missing }; + +* The expected result is: + + { } + +Since a field with value `missing` means the field is absent, we get an empty object. + diff --git a/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_primitive_common.adoc b/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_primitive_common.adoc new file mode 100644 index 00000000000..5ed3651ab5a --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_primitive_common.adoc @@ -0,0 +1,48 @@ +//// +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +//// + +[#PrimitiveTypes] +== Primitive Types + +[#PrimitiveTypesBoolean] +=== Boolean + +`boolean` data type can have one of the two values: _*true*_ or _*false*_. + +* Example: + + { "true": true, "false": false }; + +* The expected result is: + + { "true": true, "false": false } + +[#PrimitiveTypesString] +=== String + +`string` represents a sequence of characters. The total length of the sequence can be up to 2,147,483,648. + +* Example: + + { "v1": string("This is a string."), "v2": string("\"This is a quoted string\"") }; + +* The expected result is: + + { "v1": "This is a string.", "v2": "\"This is a quoted string\"" } + diff --git a/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_primitive_delta.adoc b/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_primitive_delta.adoc new file mode 100644 index 00000000000..75513c1fd86 --- /dev/null +++ b/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_primitive_delta.adoc @@ -0,0 +1,372 @@ +//// +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +//// + +[[tinyint-smallint-integer-int-bigint]] +=== Tinyint / Smallint / Integer (Int) / Bigint + +Integer types using 8, 16, 32, or 64 bits. The ranges of these types +are: + +* `tinyint`: -128 to 127 +* `smallint`: -32768 to 32767 +* `integer`: -2147483648 to 2147483647 +* `bigint`: -9223372036854775808 to 9223372036854775807 + +`int` is an abbreviated alias for integer. + +* Example: ++ +------------------------------------------------------------------------------------------------------------------------ +{ "tinyint": tiny("125"), "smallint": smallint("32765"), "integer": 294967295, "bigint": bigint("1700000000000000000")}; +------------------------------------------------------------------------------------------------------------------------ +* The expected result is: ++ +------------------------------------------------------------------------------------------ +{ "tinyint": 125, "smallint": 32765, "integer": 294967295, "bigint": 1700000000000000000 } +------------------------------------------------------------------------------------------ + +[[float]] +=== Float + +`float` represents approximate numeric data values using 4 bytes. The +range of a float value can be from 2^(-149) to (2-2^(-23)·2^(127) for +both positive and negative. Beyond these ranges will get `INF` or +`-INF`. + +* Example: ++ +---------------------------------------------------------------------------------------- +{ "v1": float("NaN"), "v2": float("INF"), "v3": float("-INF"), "v4": float("-2013.5") }; +---------------------------------------------------------------------------------------- +* The expected result is: ++ +--------------------------------------------------------- +{ "v1": "NaN", "v2": "INF", "v3": "-INF", "v4": -2013.5 } +--------------------------------------------------------- + +[[double-double-precision]] +=== Double (double precision) + +`double` represents approximate numeric data values using 8 bytes. The +range of a double value can be from (2^(-1022)) to (2-2^(-52))·2^(1023) +for both positive and negative. Beyond these ranges will get `INF` or +`-INF`. + +* Example: ++ +-------------------------------------------------------------------------------------------------- +{ "v1": double("NaN"), "v2": double("INF"), "v3": double("-INF"), "v4": "-2013.593823748327284" }; +-------------------------------------------------------------------------------------------------- +* The expected result is: ++ +--------------------------------------------------------------------- +{ "v1": "NaN", "v2": "INF", "v3": "-INF", "v4": -2013.5938237483274 } +--------------------------------------------------------------------- + +`Double precision` is an alias of `double`. + +[[binary]] +=== Binary + +`binary` represents a sequence of bytes. It can be constructed from a +`hex` or a `base64` string sequence. The total length of the byte +sequence can be up to 2,147,483,648. + +* Example: ++ +---------------------------------------------- +{ + "hex1" : hex("ABCDEF0123456789"), + "hex2": hex("abcdef0123456789"), + "base64_1" : base64("0123456789qwertyui+/"), + "base64_2" : base64('QXN0ZXJpeA==') +}; +---------------------------------------------- +* The default output format is in `hex` format. Thus, the expected +result is: ++ +---------------------------------------------------- +{ + "hex1": hex("ABCDEF0123456789"), + "hex2": hex("ABCDEF0123456789"), + "base64_1": hex("D35DB7E39EBBF3DAB07ABB72BA2FBF"), + "base64_2": hex("41737465726978") +} +---------------------------------------------------- + +[[point]] +=== Point + +`point` is the fundamental two-dimensional building block for spatial +types. It consists of two `double` coordinates x and y. + +* Example: ++ +------------------------------------------------------------------ +{ "v1": point("80.10d, -10E5"), "v2": point("5.10E-10d, -10E5") }; +------------------------------------------------------------------ +* The expected result is: ++ +--------------------------------------------------------------------- +{ "v1": point("80.1,-1000000.0"), "v2": point("5.1E-10,-1000000.0") } +--------------------------------------------------------------------- + +[[line]] +=== Line + +`line` consists of two points that represent the start and the end +points of a line segment. + +* Example: ++ +-------------------------------------------------------------------------------------------------- +{ "v1": line("10.1234,11.1e-1 +10.2E-2,-11.22"), "v2": line("0.1234,-1.00e-10 +10.5E-2,-01.02") }; +-------------------------------------------------------------------------------------------------- +* The expected result is: ++ +-------------------------------------------------------------------------------------- +{ "v1": line("10.1234,1.11 0.102,-11.22"), "v2": line("0.1234,-1.0E-10 0.105,-1.02") } +-------------------------------------------------------------------------------------- + +[[rectangle]] +=== Rectangle + +`rectangle` consists of two points that represent the __bottom left__ +and __upper right__ corners of a rectangle. + +* Example: ++ +------------------------------------------------------------------------------------------------- +{ "v1": rectangle("5.1,11.8 87.6,15.6548"), "v2": rectangle("0.1234,-1.00e-10 5.5487,0.48765") }; +------------------------------------------------------------------------------------------------- +* The expected result is: ++ +----------------------------------------------------------------------------------------------- +{ "v1": rectangle("5.1,11.8 87.6,15.6548"), "v2": rectangle("0.1234,-1.0E-10 5.5487,0.48765") } +----------------------------------------------------------------------------------------------- + +[[circle]] +=== Circle + +`circle` consists of one point that represents the center of the circle +and a radius of type `double`. + +* Example: ++ +---------------------------------------------------------------------------------------- +{ "v1": circle("10.1234,11.1e-1 +10.2E-2"), "v2": circle("0.1234,-1.00e-10 +10.5E-2") }; +---------------------------------------------------------------------------------------- +* The expected result is: ++ +----------------------------------------------------------------------------- +{ "v1": circle("10.1234,1.11 0.102"), "v2": circle("0.1234,-1.0E-10 0.105") } +----------------------------------------------------------------------------- + +[[polygon]] +=== Polygon + +`polygon` consists of __n__ points that represent the vertices of a +__simple closed__ polygon. + +* Example: ++ +--------------------------------------------------------------------------------------------------------------------------------------------------------- +{ + "v1": polygon("-1.2,+1.3e2 -2.14E+5,2.15 -3.5e+2,03.6 -4.6E-3,+4.81"), + "v2": polygon("-1.0,+10.5e2 -02.15E+50,2.5 -1.0,+3.3e3 -2.50E+05,20.15 +3.5e+2,03.6 -4.60E-3,+4.75 -2,+1.0e2 -2.00E+5,20.10 30.5,03.25 -4.33E-3,+4.75") +}; +--------------------------------------------------------------------------------------------------------------------------------------------------------- +* The expected result is: ++ +-------------------------------------------------------------------------------------------------------------------------------------------------- +{ + "v1": polygon("-1.2,130.0 -214000.0,2.15 -350.0,3.6 -0.0046,4.81"), + "v2": polygon("-1.0,1050.0 -2.15E50,2.5 -1.0,3300.0 -250000.0,20.15 350.0,3.6 -0.0046,4.75 -2.0,100.0 -200000.0,20.1 30.5,3.25 -0.00433,4.75") } +} +-------------------------------------------------------------------------------------------------------------------------------------------------- + +[[date]] +=== Date + +`date` represents a time point along the Gregorian calendar system +specified by the year, month and day. ASTERIX supports the date from +`-9999-01-01` to `9999-12-31`. + +A date value can be represented in two formats, extended format and +basic format. + +* Extended format is represented as `[-]yyyy-mm-dd` for +`year-month-day`. Each field should be padded if there are less digits +than the format specified. +* Basic format is in the format of `[-]yyyymmdd`. +* Example: ++ +------------------------------------------------------ +{ "v1": date("2013-01-01"), "v2": date("-19700101") }; +------------------------------------------------------ +* The expected result is: ++ +------------------------------------------------------- +{ "v1": date("2013-01-01"), "v2": date("-1970-01-01") } +------------------------------------------------------- + +[[time]] +=== Time + +`time` type describes the time within the range of a day. It is +represented by three fields: hour, minute and second. Millisecond field +is optional as the fraction of the second field. Its extended format is +as `hh:mm:ss[.mmm]` and the basic format is `hhmmss[mmm]`. The value +domain is from `00:00:00.000` to `23:59:59.999`. + +Timezone field is optional for a time value. Timezone is represented as +`[+|-]hh:mm` for extended format or `[+|-]hhmm` for basic format. Note +that the sign designators cannot be omitted. `Z` can also be used to +represent the UTC local time. If no timezone information is given, it is +UTC by default. + +* Example: ++ +-------------------------------------------------------------- +{ "v1": time("12:12:12.039Z"), "v2": time("000000000-0800") }; +-------------------------------------------------------------- +* The expected result is: ++ +------------------------------------------------------------ +{ "v1": time("12:12:12.039Z"), "v2": time("08:00:00.000Z") } +------------------------------------------------------------ + +[[datetime-timestamp]] +=== Datetime (Timestamp) + +A `datetime` value is a combination of an `date` and `time`, +representing a fixed time point along the Gregorian calendar system. The +value is among `-9999-01-01 00:00:00.000` and `9999-12-31 23:59:59.999`. + +A `datetime` value is represented as a combination of the representation +of its `date` part and `time` part, separated by a separator `T`. Either +extended or basic format can be used, and the two parts should be the +same format. + +Millisecond field and timezone field are optional, as specified in the +`time` type. + +* Example: ++ +------------------------------------------------------------------------------------------- +{ "v1": datetime("2013-01-01T12:12:12.039Z"), "v2": datetime("-19700101T000000000-0800") }; +------------------------------------------------------------------------------------------- +* The expected result is: ++ +------------------------------------------------------------------------------------------- +{ "v1": datetime("2013-01-01T12:12:12.039Z"), "v2": datetime("-1970-01-01T08:00:00.000Z") } +------------------------------------------------------------------------------------------- + +`timestamp` is an alias of `datetime`. + +[[durationyear_month_durationday_time_duration]] +=== Duration/Year_month_duration/Day_time_duration + +`duration` represents a duration of time. A duration value is specified +by integers on at least one of the following fields: year, month, day, +hour, minute, second, and millisecond. + +A duration value is in the format of `[-]PnYnMnDTnHnMn.mmmS`. The +millisecond part (as the fraction of the second field) is optional, and +when no millisecond field is used, the decimal point should also be +absent. + +Negative durations are also supported for the arithmetic operations +between time instance types (`date`, `time` and `datetime`), and is used +to roll the time back for the given duration. For example +`date("2012-01-01") + duration("-P3D")` will return +`date("2011-12-29")`. + +There are also two sub-duration types, namely `year_month_duration` and +`day_time_duration`. `year_month_duration` represents only the years and +months of a duration, while `day_time_duration` represents only the day +to millisecond fields. Different from the `duration` type, both these +two subtypes are totally ordered, so they can be used for comparison and +index construction. + +Note that a canonical representation of the duration is always returned, +regardless whether the duration is in the canonical representation or +not from the user's input. More information about canonical +representation can be found from +http://www.w3.org/TR/xpath-functions/#canonical-dayTimeDuration[XPath +dayTimeDuration Canonical Representation] and +http://www.w3.org/TR/xpath-functions/#canonical-yearMonthDuration[yearMonthDuration +Canonical Representation]. + +* Example: ++ +----------------------------------------------------------------- +{ "v1": duration("P100Y12MT12M"), "v2": duration("-PT20.943S") }; +----------------------------------------------------------------- +* The expected result is: ++ +------------------------------------------------------------- +{ "v1": duration("P101YT12M"), "v2": duration("-PT20.943S") } +------------------------------------------------------------- + +[[interval]] +=== Interval + +`interval` represents inclusive-exclusive ranges of time. It is defined +by two time point values with the same temporal type(`date`, `time` or +`datetime`). + +* Example: ++ +-------------------------------------------------------------------------------------- +{ + "v1": interval(date("2013-01-01"), date("20130505")), + "v2": interval(time("00:01:01"), time("213901049+0800")), + "v3": interval(datetime("2013-01-01T00:01:01"), datetime("20130505T213901049+0800")) +}; +-------------------------------------------------------------------------------------- +* The expected result is: ++ +-------------------------------------------------------------------------------------------- +{ + "v1": interval(date("2013-01-01"), date("2013-05-05")), + "v2": interval(time("00:01:01.000Z"), time("13:39:01.049Z")), + "v3": interval(datetime("2013-01-01T00:01:01.000Z"), datetime("2013-05-05T13:39:01.049Z")) +} +-------------------------------------------------------------------------------------------- + +[[uuid]] +=== UUID + +`uuid` represents a UUID value, which stands for Universally unique +identifier. It is defined by a canonical format using hexadecimal text +with inserted hyphen characters. (E.g.: +5a28ce1e-6a74-4201-9e8f-683256e5706f). This type is generally used to +store auto-generated primary key values. + +* Example: ++ +------------------------------------------------------------ +return { "v1":uuid("5c848e5c-6b6a-498f-8452-8847a2957421") } +------------------------------------------------------------ +* The expected result is: ++ +------------------------------------------------------ +{ "v1": uuid("5c848e5c-6b6a-498f-8452-8847a2957421") } +------------------------------------------------------ From 0ef7d5c583deb780b7ee00fb67a7b0aeb786d170 Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Mon, 27 Jan 2020 19:47:18 +0000 Subject: [PATCH 16/22] Add datamodel main file Change-Id: I3b58343583c8943d264ea5a7bc008472c93a6cd3 --- .../src/main/datamodel/datamodel_header.md | 55 ------------------- .../src/site/asciidoc/datamodel.adoc | 36 ++++++++++++ 2 files changed, 36 insertions(+), 55 deletions(-) delete mode 100644 asterixdb/asterix-doc/src/main/datamodel/datamodel_header.md create mode 100644 asterixdb/asterix-doc/src/site/asciidoc/datamodel.adoc diff --git a/asterixdb/asterix-doc/src/main/datamodel/datamodel_header.md b/asterixdb/asterix-doc/src/main/datamodel/datamodel_header.md deleted file mode 100644 index cc66a3f7861..00000000000 --- a/asterixdb/asterix-doc/src/main/datamodel/datamodel_header.md +++ /dev/null @@ -1,55 +0,0 @@ - - -# The Asterix Data Model (ADM) # - -## Table of Contents ## - -* [Primitive Types](#PrimitiveTypes) - * [Boolean](#PrimitiveTypesBoolean) - * [String](#PrimitiveTypesString) - * [Tinyint / Smallint / Integer (Int) / Bigint](#PrimitiveTypesInt) - * [Float](#PrimitiveTypesFloat) - * [Double (Double Precision)](#PrimitiveTypesDouble) - * [Binary](#PrimitiveTypesBinary) - * [Point](#PrimitiveTypesPoint) - * [Line](#PrimitiveTypesLine) - * [Rectangle](#PrimitiveTypesRectangle) - * [Circle](#PrimitiveTypesCircle) - * [Polygon](#PrimitiveTypesPolygon) - * [Date](#PrimitiveTypesDate) - * [Time](#PrimitiveTypesTime) - * [Datetime (Timestamp)](#PrimitiveTypesDateTime) - * [Duration/Year_month_duration/Day_time_duration](#PrimitiveTypesDuration) - * [Interval](#PrimitiveTypesInterval) - * [UUID](#PrimitiveTypesUUID) -* [Incomplete Information Types](#IncompleteInformationTypes) - * [Null](#IncompleteInformationTypesNull) - * [Missing](#IncompleteInformationTypesMissing) -* [Derived Types](#DerivedTypes) - * [Object](#DerivedTypesObject) - * [Array](#DerivedTypesArray) - * [Multiset](#DerivedTypesMultiset) - -An instance of Asterix data model (ADM) can be a _*primitive type*_ (`boolean`, -`tinyint`, `smallint`, `integer`, `bigint`, `string`, `float`, `double`, `date`, -`time`, `datetime`, etc.), a _*special type*_ (`null` or `missing`), or a _*derived type*_. - -The type names are case-insensitive, e.g., both `BIGINT` and `bigint` are acceptable. - diff --git a/asterixdb/asterix-doc/src/site/asciidoc/datamodel.adoc b/asterixdb/asterix-doc/src/site/asciidoc/datamodel.adoc new file mode 100644 index 00000000000..522ff52cca5 --- /dev/null +++ b/asterixdb/asterix-doc/src/site/asciidoc/datamodel.adoc @@ -0,0 +1,36 @@ +//// +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +//// + += The Asterix Data Model (ADM) +:includedir: ../shared/modules/datamodel/partials +:toc: +:toclevels: 2 + +An instance of Asterix data model (ADM) can be a __primitive type__ +(`boolean`, `tinyint`, `smallint`, `integer`, `bigint`, `string`, +`float`, `double`, `date`, `time`, `datetime`, etc.), a __special type__ +(`null` or `missing`), or a __derived type__. + +The type names are case-insensitive, e.g., both `BIGINT` and `bigint` +are acceptable. + +include::{includedir}/datamodel_primitive_common.adoc[] +include::{includedir}/datamodel_primitive_delta.adoc[] +include::{includedir}/datamodel_incomplete.adoc[] +include::{includedir}/datamodel_composite.adoc[] From d0a6a624cf80efa0ede043e24a7d7999c6057b73 Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Mon, 27 Jan 2020 19:55:22 +0000 Subject: [PATCH 17/22] Update POM for datamodel Change-Id: I1b61b267f78f6cc4df33477e90cce1f2b63f75e6 --- asterixdb/asterix-doc/pom.xml | 3 --- 1 file changed, 3 deletions(-) diff --git a/asterixdb/asterix-doc/pom.xml b/asterixdb/asterix-doc/pom.xml index 704ee568eef..8f21e7d1903 100644 --- a/asterixdb/asterix-doc/pom.xml +++ b/asterixdb/asterix-doc/pom.xml @@ -51,9 +51,6 @@ pre-site - - - From ec4c28929602330a39f405190b6dda51f16f7c62 Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Mon, 27 Jan 2020 19:55:57 +0000 Subject: [PATCH 18/22] Correct path for datamodel partials Change-Id: Ia11ec778bc279b48e65456824c4acb5e3a803410 --- .../modules/datamodel/{ => partials}/datamodel_composite.adoc | 0 .../modules/datamodel/{ => partials}/datamodel_incomplete.adoc | 0 .../datamodel/{ => partials}/datamodel_primitive_common.adoc | 0 .../datamodel/{ => partials}/datamodel_primitive_delta.adoc | 0 asterixdb/asterix-doc/src/site/asciidoc/datamodel.adoc | 2 +- 5 files changed, 1 insertion(+), 1 deletion(-) rename asterixdb/asterix-doc/src/shared/modules/datamodel/{ => partials}/datamodel_composite.adoc (100%) rename asterixdb/asterix-doc/src/shared/modules/datamodel/{ => partials}/datamodel_incomplete.adoc (100%) rename asterixdb/asterix-doc/src/shared/modules/datamodel/{ => partials}/datamodel_primitive_common.adoc (100%) rename asterixdb/asterix-doc/src/shared/modules/datamodel/{ => partials}/datamodel_primitive_delta.adoc (100%) diff --git a/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_composite.adoc b/asterixdb/asterix-doc/src/shared/modules/datamodel/partials/datamodel_composite.adoc similarity index 100% rename from asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_composite.adoc rename to asterixdb/asterix-doc/src/shared/modules/datamodel/partials/datamodel_composite.adoc diff --git a/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_incomplete.adoc b/asterixdb/asterix-doc/src/shared/modules/datamodel/partials/datamodel_incomplete.adoc similarity index 100% rename from asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_incomplete.adoc rename to asterixdb/asterix-doc/src/shared/modules/datamodel/partials/datamodel_incomplete.adoc diff --git a/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_primitive_common.adoc b/asterixdb/asterix-doc/src/shared/modules/datamodel/partials/datamodel_primitive_common.adoc similarity index 100% rename from asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_primitive_common.adoc rename to asterixdb/asterix-doc/src/shared/modules/datamodel/partials/datamodel_primitive_common.adoc diff --git a/asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_primitive_delta.adoc b/asterixdb/asterix-doc/src/shared/modules/datamodel/partials/datamodel_primitive_delta.adoc similarity index 100% rename from asterixdb/asterix-doc/src/shared/modules/datamodel/datamodel_primitive_delta.adoc rename to asterixdb/asterix-doc/src/shared/modules/datamodel/partials/datamodel_primitive_delta.adoc diff --git a/asterixdb/asterix-doc/src/site/asciidoc/datamodel.adoc b/asterixdb/asterix-doc/src/site/asciidoc/datamodel.adoc index 522ff52cca5..3704c6d5141 100644 --- a/asterixdb/asterix-doc/src/site/asciidoc/datamodel.adoc +++ b/asterixdb/asterix-doc/src/site/asciidoc/datamodel.adoc @@ -18,7 +18,7 @@ under the License. //// = The Asterix Data Model (ADM) -:includedir: ../shared/modules/datamodel/partials +:includedir: ../../shared/modules/datamodel/partials :toc: :toclevels: 2 From e934d64df251e07b5612c4e82e8789d47559af88 Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Mon, 27 Jan 2020 21:13:55 +0000 Subject: [PATCH 19/22] Fix errors after testing Change-Id: Ibcc340d88b176303a2926f3954150e6b2a6fbc5f --- .../src/shared/modules/builtins/partials/15_bitwise.adoc | 1 + 1 file changed, 1 insertion(+) diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/15_bitwise.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/15_bitwise.adoc index b40820853c0..4670643efce 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/15_bitwise.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/15_bitwise.adoc @@ -654,3 +654,4 @@ and 15 (1111 in binary). + This returns 10 (1010 in binary) because 3 XOR 6 equals 5 (0101 in binary), and then 5 XOR 15 equals 10 (1010 in binary). + From e465e8b7b63a4b326235662262d3d2cbc3ab9e3d Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Mon, 27 Jan 2020 21:25:00 +0000 Subject: [PATCH 20/22] Further corrections after testing Change-Id: Ifb4e8b9762a4baadda70e1d49dbb6c8562b003c5 --- .../modules/builtins/partials/3_binary.adoc | 1 + .../modules/builtins/partials/7_temporal.adoc | 3 +-- .../sqlpp/partials/appendix_3_resolution.adoc | 18 +++++++++--------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/3_binary.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/3_binary.adoc index 803315d9cca..b778c25c706 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/3_binary.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/3_binary.adoc @@ -139,3 +139,4 @@ binary_concat([hex("42"), hex(""), hex('42')]); * The expected result is + hex("4242") + diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_temporal.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_temporal.adoc index be349cd3f75..2ed62c60679 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_temporal.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/7_temporal.adoc @@ -691,8 +691,7 @@ the interval) representing the starting or ending time, ---------------------------------------------------------- [[get_interval_start_dateget_interval_start_datetimeget_interval_start_time-get_interval_end_dateget_interval_end_datetimeget_interval_end_time]] -get_interval_start_date/get_interval_start_datetimeget_interval_start_time, -=== get_interval_end_date/get_interval_end_datetime/get_interval_end_time +=== get_interval_start_date/get_interval_start_datetime/get_interval_start_time, get_interval_end_date/get_interval_end_datetime/get_interval_end_time * Syntax: + diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_3_resolution.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_3_resolution.adoc index b7fb982a313..7920ad6fb54 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_3_resolution.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/appendix_3_resolution.adoc @@ -252,7 +252,7 @@ other subclauses in the same `FROM` clause. This also applies to the The process of name resolution begins with the leftmost identifier in the name. The rules for resolving the leftmost identifier are: -1. _In a FROM clause_: Names in a FROM clause identify the collections +. _In a FROM clause_: Names in a FROM clause identify the collections over which the query block will iterate. These collections may be stored datasets or may be the results of nested query blocks. A stored dataset may be in a named dataverse or in the default dataverse. Thus, if the @@ -266,11 +266,11 @@ The rules for resolving the leftmost identifier in a FROM clause (including a JOIN subclause), or in the expression following IN in a quantified predicate, are as follows: -i. If the identifier matches a variable-name that is in scope, it + .. If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable. (Note that in the case of a subquery, an in-scope variable might have been bound in an outer query block; this is called a correlated subquery.) -ii. Otherwise, if the identifier is the first part of a two-part name + .. Otherwise, if the identifier is the first part of a two-part name like `a.b`, the name is treated as `dataverse.dataset`. If the identifier stands alone as a one-part name, it is treated as the name of a dataset in the default dataverse. If the designated dataset exists @@ -283,7 +283,7 @@ synonym with this name does not exist. Datasets take precedence over synonyms, so if both a dataset and a synonym have the same name then the resolution is to the dataset. -2. _Elsewhere in a query block_: In clauses other than FROM, a name +. _Elsewhere in a query block_: In clauses other than FROM, a name typically identifies a field of some object. For example, if the expression `a.b` is in a SELECT or WHERE clause, it's likely that `a` represents an object and `b` represents a field in that object. @@ -291,11 +291,11 @@ represents an object and `b` represents a field in that object. The rules for resolving the leftmost identifier in clauses other than the ones listed in Rule 1 are: -i. If the identifier matches a variable-name that is in scope, it + .. If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable. (In the case of a correlated subquery, the in-scope variable might have been bound in an outer query block.) -ii. (The "Single Variable Rule"): Otherwise, if the FROM clause in the + .. (The "Single Variable Rule"): Otherwise, if the FROM clause in the current query block binds exactly one variable, the identifier is treated as a field access on the object bound to that variable. For example, in the query `FROM customer SELECT address`, the identifier @@ -317,7 +317,7 @@ Rule does not apply in any clauses that occur after the GROUP BY because, in these clauses, the variables bound by the FROM clause are no longer in scope. In clauses after GROUP BY, only Rule 2.1 applies. -3. In an ORDER BY clause following a UNION ALL expression: +. In an ORDER BY clause following a UNION ALL expression: + The leftmost identifier is treated as a field-access on the objects that are generated by the UNION ALL. For example: @@ -334,7 +334,7 @@ ordered by the value of this field; objects that have no foo field will appear at at the beginning of the query result (in ascending order) or at the end (in descending order.) -4. _In a standalone expression_: If a query consists of a standalone +. _In a standalone expression_: If a query consists of a standalone expression then identifiers inside that expression are resolved according to Rule 1. For example, if the whole query is `ARRAY_COUNT(a.b)` then `a.b` will be treated as dataset `b` contained @@ -345,7 +345,7 @@ according to Rules 1-3. For example, if the whole query is `ARRAY_SUM( (FROM employee AS e SELECT VALUE salary) )` then `salary` is resolved as `e.salary` following the "Single Variable Rule" (Rule 2.2). -5. Once the leftmost identifier has been resolved, the following dots +. Once the leftmost identifier has been resolved, the following dots and identifiers in the name (if any) are treated as a path expression that navigates to a field nested inside that object. The name resolves to the field at the end of the path. If this field does not exist, the From 4e79e763ac309f55f956fa7707e52575299120c1 Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Tue, 28 Jan 2020 00:34:02 +0000 Subject: [PATCH 21/22] Attributes and conditionals Change-Id: Ie961aa0fc0b524dd9ee52a4735d74f227e78ac44 --- .../modules/builtins/partials/11_type.adoc | 4 +- .../modules/builtins/partials/14_window.adoc | 91 +++++++++---------- .../builtins/partials/2_string_common.adoc | 14 ++- .../builtins/partials/9_aggregate_sql.adoc | 6 +- .../shared/modules/sqlpp/partials/2_expr.adoc | 24 ++--- .../modules/sqlpp/partials/3_query.adoc | 81 ++++++++--------- .../src/site/asciidoc/aql/builtins.adoc | 19 ++++ .../src/site/asciidoc/sqlpp/builtins.adoc | 19 ++++ .../src/site/asciidoc/sqlpp/manual.adoc | 5 + 9 files changed, 153 insertions(+), 110 deletions(-) diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/11_type.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/11_type.adoc index 22c6777e7d1..4dca5988eb8 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/11_type.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/11_type.adoc @@ -49,7 +49,7 @@ The function has an alias `isarray`. is_atomic(expr) --------------- * Checks whether the given expression is evaluated to be a value of a -link:../datamodel.html#PrimitiveTypes[primitive] type. +{primitive-types}[primitive] type. * Arguments: ** `expr` : an expression (any type is allowed). * Return Value: @@ -361,7 +361,7 @@ The function has an alias `toarray`. to_atomic(expr) --------------- * Converts input value to a -link:../datamodel.html#PrimitiveTypes[primitive] value +{primitive-types}[primitive] value * Arguments: ** `expr` : an expression * Return Value: diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc index f1a82503de3..9e1131da191 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc @@ -10,13 +10,13 @@ The tuples are not grouped into a single output tuple — each tuple remains separate in the query output. All window functions must be used with an OVER clause. Refer to -link:manual.html#Over_clauses[OVER Clauses] for details. +{over-clauses}[OVER Clauses] for details. Window functions cannot appear in the FROM clause clause or LIMIT clause. The examples in this section use the `GleambookMessages` dataset, -described in the section on link:manual.html#SELECT_statements[SELECT +described in the section on {select-statements}[SELECT Statements]. [[cume_dist]] @@ -38,9 +38,9 @@ window order clause is omitted, the function returns the same result * Arguments: ** None. * Clauses: -** (Optional) link:manual.html#Window_partition_clause[Window Partition +** (Optional) {window-partition-clause}[Window Partition Clause]. -** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +** (Optional) {window-order-clause}[Window Order Clause]. * Return Value: ** A number greater than 0 and less than or equal to 1. The higher the value, the higher the ranking. @@ -121,9 +121,9 @@ ranked 2, the next dense rank is 3. * Arguments: ** None. * Clauses: -** (Optional) link:manual.html#Window_partition_clause[Window Partition +** (Optional) {window-partition-clause}[Window Partition Clause]. -** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +** (Optional) {window-order-clause}[Window Order Clause]. * Return Value: ** An integer, greater than or equal to 1. * Example: @@ -200,9 +200,11 @@ FIRST_VALUE(expr) [nulls-treatment] OVER (window-definition) frame, where the window frame is specified by the window definition. * Arguments: ** `expr`: The value that you want to return from the first tuple in the -window frame. [link:#fn_1[1]] +window frame. footnote:fn_1[If the query contains the GROUP BY clause or any +{aggregate-functions}[aggregate functions], this expression must only +depend on GROUP BY expressions or aggregate functions.] * Modifiers: -** link:manual.html#Nulls_treatment[Nulls Treatment]: (Optional) +** {nulls-treatment}[Nulls Treatment]: (Optional) Determines how NULL or MISSING values are treated when finding the first value in the window frame. *** `IGNORE NULLS`: If the values for any tuples evaluate to NULL or @@ -213,10 +215,10 @@ MISSING, those tuples are included when finding the first tuple. + If this modifier is omitted, the default is `RESPECT NULLS`. * Clauses: -** (Optional) link:manual.html#Window_partition_clause[Window Partition +** (Optional) {window-partition-clause}[Window Partition Clause]. -** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. -** (Optional) link:manual.html#Window_frame_clause[Window Frame Clause]. +** (Optional) {window-order-clause}[Window Order Clause]. +** (Optional) {window-frame-clause}[Window Frame Clause]. * Return Value: ** The specified value from the first tuple. The order of the tuples is determined by the window order clause. @@ -311,13 +313,13 @@ The window order clause determines the sort order of the tuples. If the window order clause is omitted, the return values may be unpredictable. * Arguments: ** `expr`: The value that you want to return from the offset tuple. -[link:#fn_1[1]] +footnote:fn_1[] ** `offset`: (Optional) A positive integer. If omitted, the default is 1. ** `default`: (Optional) The value to return when the offset goes out of partition scope. If omitted, the default is NULL. * Modifiers: -** link:manual.html#Nulls_treatment[Nulls Treatment]: (Optional) +** {nulls-treatment}[Nulls Treatment]: (Optional) Determines how NULL or MISSING values are treated when finding the offset tuple in the window partition. *** `IGNORE NULLS`: If the values for any tuples evaluate to NULL or @@ -327,9 +329,9 @@ MISSING, those tuples are included when finding the offset tuple. + If this modifier is omitted, the default is `RESPECT NULLS`. * Clauses: -** (Optional) link:manual.html#Window_partition_clause[Window Partition +** (Optional) {window-partition-clause}[Window Partition Clause]. -** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +** (Optional) {window-order-clause}[Window Order Clause]. * Return Value: ** The specified value from the offset tuple. ** If the offset tuple is out of partition scope, it returns the default @@ -409,9 +411,9 @@ LAST_VALUE(expr) [nulls-treatment] OVER (window-definition) frame, where the window frame is specified by the window definition. * Arguments: ** `expr`: The value that you want to return from the last tuple in the -window frame. [link:#fn_1[1]] +window frame. footnote:fn_1[] * Modifiers: -** link:manual.html#Nulls_treatment[Nulls Treatment]: (Optional) +** {nulls-treatment}[Nulls Treatment]: (Optional) Determines how NULL or MISSING values are treated when finding the last tuple in the window frame. *** `IGNORE NULLS`: If the values for any tuples evaluate to NULL or @@ -422,10 +424,10 @@ MISSING, those tuples are included when finding the last tuple. + If this modifier is omitted, the default is `RESPECT NULLS`. * Clauses: -** (Optional) link:manual.html#Window_partition_clause[Window Partition +** (Optional) {window-partition-clause}[Window Partition Clause]. -** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. -** (Optional) link:manual.html#Window_frame_clause[Window Frame Clause]. +** (Optional) {window-order-clause}[Window Order Clause]. +** (Optional) {window-frame-clause}[Window Frame Clause]. * Return Value: ** The specified value from the last tuple. The order of the tuples is determined by the window order clause. @@ -527,13 +529,13 @@ The window order clause determines the sort order of the tuples. If the window order clause is omitted, the return values may be unpredictable. * Arguments: ** `expr`: The value that you want to return from the offset tuple. -[link:#fn_1[1]] +footnote:fn_1[] ** `offset`: (Optional) A positive integer. If omitted, the default is 1. ** `default`: (Optional) The value to return when the offset goes out of window partition scope. If omitted, the default is NULL. * Modifiers: -** link:manual.html#Nulls_treatment[Nulls Treatment]: (Optional) +** {nulls-treatment}[Nulls Treatment]: (Optional) Determines how NULL or MISSING values are treated when finding the offset tuple in the window partition. *** `IGNORE NULLS`: If the values for any tuples evaluate to NULL or @@ -543,9 +545,9 @@ MISSING, those tuples are included when finding the offset tuple. + If this modifier is omitted, the default is `RESPECT NULLS`. * Clauses: -** (Optional) link:manual.html#Window_partition_clause[Window Partition +** (Optional) {window-partition-clause}[Window Partition Clause]. -** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +** (Optional) {window-order-clause}[Window Order Clause]. * Return Value: ** The specified value from the offset tuple. ** If the offset tuple is out of partition scope, it returns the default @@ -625,11 +627,11 @@ NTH_VALUE(expr, offset) [nthval-from] [nulls-treatment] OVER (window-definition) where the window frame is specified by the window definition. * Arguments: ** `expr`: The value that you want to return from the offset tuple in -the window frame. [link:#fn_1[1]] +the window frame. footnote:fn_1[] ** `offset`: The number of the offset tuple within the window frame, counting from 1. * Modifiers: -** link:manual.html#Nth_val_from[Nth Val From]: (Optional) Determines +** {nth-val-from}[Nth Val From]: (Optional) Determines where the function starts counting the offset. *** `FROM FIRST`: Counting starts at the first tuple in the window frame. In this case, an offset of 1 is the first tuple in the window @@ -640,7 +642,7 @@ the second-to-last tuple, and so on. + The order of the tuples is determined by the window order clause. If this modifier is omitted, the default is `FROM FIRST`. -** link:manual.html#Nulls_treatment[Nulls Treatment]: (Optional) +** {nulls-treatment}[Nulls Treatment]: (Optional) Determines how NULL or MISSING values are treated when finding the offset tuple in the window frame. *** `IGNORE NULLS`: If the values for any tuples evaluate to NULL or @@ -650,10 +652,10 @@ MISSING, those tuples are included when finding the offset tuple. + If this modifier is omitted, the default is `RESPECT NULLS`. * Clauses: -** (Optional) link:manual.html#Window_partition_clause[Window Partition +** (Optional) {window-partition-clause}[Window Partition Clause]. -** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. -** (Optional) link:manual.html#Window_frame_clause[Window Frame Clause]. +** (Optional) {window-order-clause}[Window Order Clause]. +** (Optional) {window-frame-clause}[Window Frame Clause]. * Return Value: ** The specified value from the offset tuple. ** In the following cases, this function may return unpredictable @@ -835,9 +837,9 @@ undefined order. window partition. This argument can be an expression and must evaluate to a number. If the number is not an integer, it will be truncated. * Clauses: -** (Optional) link:manual.html#Window_partition_clause[Window Partition +** (Optional) {window-partition-clause}[Window Partition Clause]. -** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +** (Optional) {window-order-clause}[Window Order Clause]. * Return Value: ** An value greater than or equal to 1 and less than or equal to the number of tiles. @@ -912,9 +914,9 @@ for each tuple. * Arguments: ** None. * Clauses: -** (Optional) link:manual.html#Window_partition_clause[Window Partition +** (Optional) {window-partition-clause}[Window Partition Clause]. -** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +** (Optional) {window-order-clause}[Window Order Clause]. * Return Value: ** A number between 0 and 1. The higher the value, the higher the ranking. @@ -998,9 +1000,9 @@ instead. * Arguments: ** None. * Clauses: -** (Optional) link:manual.html#Window_partition_clause[Window Partition +** (Optional) {window-partition-clause}[Window Partition Clause]. -** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +** (Optional) {window-order-clause}[Window Order Clause]. * Return Value: ** An integer, greater than or equal to 1. * Example: @@ -1076,12 +1078,12 @@ RATIO_TO_REPORT(expr) OVER (window-definition) the sum of values for all tuples in the window frame. * Arguments: ** `expr`: The value for which you want to calculate the fractional -ratio. [link:#fn_1[1]] +ratio. footnote:fn_1[] * Clauses: -** (Optional) link:manual.html#Window_partition_clause[Window Partition +** (Optional) {window-partition-clause}[Window Partition Clause]. -** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. -** (Optional) link:manual.html#Window_frame_clause[Window Frame Clause]. +** (Optional) {window-order-clause}[Window Order Clause]. +** (Optional) {window-frame-clause}[Window Frame Clause]. * Return Value: ** A number between 0 and 1, representing the fractional ratio of the value for the current tuple to the sum of values for all tuples in the @@ -1159,9 +1161,9 @@ window order clause is omitted, the return values may be unpredictable. * Arguments: ** None. * Clauses: -** (Optional) link:manual.html#Window_partition_clause[Window Partition +** (Optional) {window-partition-clause}[Window Partition Clause]. -** (Optional) link:manual.html#Window_order_clause[Window Order Clause]. +** (Optional) {window-order-clause}[Window Order Clause]. * Return Value: ** An integer, greater than or equal to 1. * Example: @@ -1218,8 +1220,3 @@ FROM GleambookMessages AS m; ] -------------------- -''''' - -\1. If the query contains the GROUP BY clause or any -link:#AggregateFunctions[aggregate functions], this expression must only -depend on GROUP BY expressions or aggregate functions. diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_common.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_common.adoc index 13e916f7896..59e441284b1 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_common.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/2_string_common.adoc @@ -52,9 +52,11 @@ contains(string, substring_to_contain) `missing` value, ** any other non-string input value will cause a type error, ** `false` otherwise. +ifeval::[{n_gram} == true] * Note: an link:similarity.html#UsingIndexesToSupportSimilarityQueries[n_gram index] can be utilized for this function. +endif::[] * Example: + ------------------------------------------------------------------------------ @@ -122,13 +124,15 @@ alias called "title". ** any other non-string input value will cause a type error. * Example: + +[subs=normal] --------------------------------------------------------------------------- -{ "v1": initcap("ASTERIXDB is here!"), "v2": title("ASTERIXDB is here!") }; +{ "v1": initcap("{upper-name} is here!"), "v2": title("{upper-name} is here!") }; --------------------------------------------------------------------------- * The expected result is: + +[subs=normal] ---------------------------------------------------------- -{ "v1": "Asterixdb Is Here!", "v2": "Asterixdb Is Here!" } +{ "v1": "{title-name} Is Here!", "v2": "{title-name} Is Here!" } ---------------------------------------------------------- [[length]] @@ -177,13 +181,15 @@ lower(string) ** any other non-string input value will cause a type error. * Example: + +[subs=normal] ------------------- -lower("ASTERIXDB"); +lower("{upper-name}"); ------------------- * The expected result is: + +[subs=normal] ----------- -"asterixdb" +"{lower-name}" ----------- [[ltrim]] diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc index e80e01c6df4..4f9442ccf8b 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc @@ -9,18 +9,18 @@ The query language also supports standard SQL aggregate functions (e.g., functions in the query language, but just syntactic sugars over corresponding builtin aggregate functions (e.g., `ARRAY_MIN`, `ARRAY_MAX`, `ARRAY_SUM`, `ARRAY_COUNT`, and `ARRAY_AVG`). Refer to -link:manual.html#SQL-92_aggregation_functions[SQL-92 Aggregation +{sql-92-aggregation-functions}[SQL-92 Aggregation Functions] for details. The `DISTINCT` keyword may be used with built-in aggregate functions and standard SQL aggregate functions. It may also be used with aggregate functions used as window functions. It determines whether the function aggregates all values in the group, or distinct values only. Refer to -link:manual.html#Aggregation_functions[Aggregation Functions] for +{aggregation-functions}[Aggregation Functions] for details. Aggregate functions may be used as window functions when they are used -with an OVER clause. Refer to link:manual.html#Over_clauses[OVER +with an OVER clause. Refer to {over-clauses}[OVER Clauses] for details. [[array_count]] diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr.adoc index 3aa8734a943..4a321550512 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/2_expr.adoc @@ -29,13 +29,13 @@ OperatorExpression ::= PathExpression The language provides a full set of operators that you can use within its statements. Here are the categories of operators: -* link:#Arithmetic_operators[Arithmetic Operators], to perform basic +* <>, to perform basic mathematical operations; -* link:#Collection_operators[Collection Operators], to evaluate +* <>, to evaluate expressions on collections or objects; -* link:#Comparison_operators[Comparison Operators], to compare two +* <>, to compare two expressions; -* link:#Logical_operators[Logical Operators], to combine operators using +* <>, to combine operators using Boolean logic. The following table summarizes the precedence order (from higher to @@ -61,10 +61,10 @@ In general, if any operand evaluates to a `MISSING` value, the enclosing operator will return `MISSING`; if none of operands evaluates to a `MISSING` value but there is an operand evaluates to a `NULL` value, the enclosing operator will return `NULL`. However, there are a few -exceptions listed in link:#Comparison_operators[comparison operators] -and link:#Logical_operators[logical operators]. +exceptions listed in <> +and <>. -[[arithmetic-operators]] +[[Arithmetic_operators]] ==== Arithmetic Operators Arithmetic operators are used to exponentiate, add, subtract, multiply, @@ -93,7 +93,7 @@ integers) |SELECT VALUE 5 / 2; ||| |String concatenation |SELECT VALUE "ab"||"c"||"d"; |======================================================================= -[[collection-operators]] +[[Collection_operators]] ==== Collection Operators Collection operators are used for membership tests (IN, NOT IN) or empty @@ -115,7 +115,7 @@ ChirpMessages cm WHERE EXISTS cm.referredTopics; ChirpMessages cm WHERE NOT EXISTS cm.referredTopics; |======================================================================= -[[comparison-operators]] +[[Comparison_operators]] ==== Comparison Operators Comparison operators are used to compare values. The comparison @@ -217,7 +217,7 @@ operators work. |IS NOT KNOWN (IS NOT VALUED) |FALSE |TRUE |TRUE |=================================================== -[[logical-operators]] +[[Logical_operators]] ==== Logical Operators Logical operators perform logical `NOT`, `AND`, and `OR` operations over @@ -494,7 +494,7 @@ PositionalParameterReference ::= ("$" ) | "?" ------------------------------------------------------------------------------------------ A statement parameter is an external variable which value is provided -through the link:../api.html#queryservice[statement execution API]. An +through the {service-api}[statement execution API]. An error will be raised if the parameter is not bound at the query execution time. Positional parameter numbering starts at 1. "?" parameters are interpreted as latexmath:[$1, .. $]N in the order in @@ -545,7 +545,7 @@ bindings can themselves be any expressions in the query language. Note that Window functions, and aggregate functions used as window functions, have a more complex syntax. Window function calls are -described in the section on link:#Over_clauses[OVER Clauses]. +described in the section on {over-clauses}[OVER Clauses]. The following example is a (built-in) function call expression whose value is 8. diff --git a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query.adoc b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query.adoc index 78453154d6f..c4f5013d3ad 100644 --- a/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/sqlpp/partials/3_query.adoc @@ -265,9 +265,10 @@ Returns: } ] ------------------------------------- -// TODO: check markup from original +[[Select_star]] +==== SELECT * -SELECT * `SELECT *` returns an object with a nested field for each input +`SELECT *` returns an object with a nested field for each input tuple. Each field has as its field name the name of a binding variable generated by either the `FROM` clause or `GROUP BY` clause in the current enclosing `SELECT` statement, and its field value is the value @@ -365,7 +366,7 @@ WHERE m.authorId = u.id and u.id = 2; ------------------------------------------ This query does an inner join that we will discuss in -link:#Multiple_from_terms[multiple from terms]. Since both `u` and `m` +<>. Since both `u` and `m` are binding variables generated in the `FROM` clause, this query returns: @@ -446,7 +447,7 @@ FROM GleambookUsers user; ------------------------- Compare this query with the first example given under -link:#Select_star[SELECT *]. This query returns all users from the +<>. This query returns all users from the `GleambookUsers` dataset, but the `user` variable name is omitted from the results: @@ -790,7 +791,7 @@ Returns: ] --- -[[multiple-from-terms]] +[[Multiple_from_terms]] ==== Multiple FROM Terms The query language permits correlations among `FROM` terms. @@ -1413,7 +1414,7 @@ Based on the three variable generation rules, the generated variable for the grouping key expression `message.authorId` is `authorId` (which is how it is referred to in the example's `SELECT` clause). -[[implicit-group-variables]] +[[Implicit_group_variables]] ==== Implicit Group Variables The group variable itself is also optional in the `GROUP BY` syntax. If @@ -1630,7 +1631,7 @@ This query outputs: In principle, a `msg` reference in the query's `SELECT` clause would be "sugarized" as a collection (as described in -link:#Implicit_group_variables[Implicit Group Variables]). However, +<>). However, since the SELECT expression `msg.authorId` is syntactically identical to a GROUP BY key expression, it will be internally replaced by the generated group key variable. The following is the equivalent rewritten @@ -1680,7 +1681,7 @@ expression evaluates to `TRUE` are propagated. Note that if the condition expression evaluates to `NULL` or `MISSING` the input tuple will be discarded. -[[order-by-clauses]] +[[Order_By_clauses]] === ORDER BY Clauses The `ORDER BY` clause is used to globally sort data in either ascending @@ -2095,10 +2096,10 @@ WindowFunctionCall ::= WindowFunctionType "(" WindowFunctionArguments ")" WindowFunctionType ::= AggregateFunction | WindowFunction --------------------------------------------------------- -Refer to the link:builtins.html#AggregateFunctions[Aggregate Functions] +Refer to the {aggregate-functions}[Aggregate Functions] section for a list of aggregate functions. -Refer to the link:builtins.html#WindowFunctions[Window Functions] +Refer to the {window-functions}[Window Functions] section for a list of window functions. [[window-function-arguments]] @@ -2109,8 +2110,8 @@ WindowFunctionArguments ::= ( ()? Expression | (Expression ("," Expression ("," Expression)? )? )? ) -------------------------------------------------------- -Refer to the link:builtins.html#AggregateFunctions[Aggregate Functions] -section or the link:builtins.html#WindowFunctions[Window Functions] +Refer to the {aggregate-functions}[Aggregate Functions] +section or the {window-functions}[Window Functions] section for details of the arguments for individual functions. [[window-function-options]] @@ -2121,10 +2122,10 @@ WindowFunctionOptions ::= (NthValFrom)? (NullsTreatment)? --------------------------------------------------------- Window function options cannot be used with -link:builtins.html#AggregateFunctions[aggregate functions]. +{aggregate-functions}[aggregate functions]. Window function options can only be used with some -link:builtins.html#WindowFunctions[window functions], as described +{window-functions}[window functions], as described below. [[nth-val-from]] @@ -2165,7 +2166,7 @@ This modifier is optional. If omitted, the default setting is The AS keyword enables you to specify an alias for the window frame contents. It introduces a variable which will be bound to the contents of the frame. When using a built-in -link:builtins.html#AggregateFunctions[aggregate function] as a window +{aggregate-functions}[aggregate function] as a window function, the function’s argument must be a subquery which refers to this alias, for example: @@ -2176,7 +2177,7 @@ FROM source AS src ---------------------------------------------------------------------- The alias is not necessary when using a -link:builtins.html#WindowFunctions[window function], or when using a +{window-functions}[window function], or when using a standard SQL aggregate function with the OVER clause. [[standard-sql-aggregate-functions-with-the-over-clause]] @@ -2225,14 +2226,14 @@ The *window partition clause* divides the tuples into logical partitions using one or more expressions. This clause may be used with any -link:builtins.html#WindowFunctions[window function], or any -link:builtins.html#AggregateFunctions[aggregate function] used as a +{window-functions}[window function], or any +{aggregate-functions}[aggregate function] used as a window function. This clause is optional. If omitted, all tuples are united in a single partition. -[[window-order-clause]] +[[Window_order_clause]] ===== Window Order Clause ------------------------------------------------------------------- @@ -2244,8 +2245,8 @@ partition. The window function works on tuples in the order specified by this clause. This clause may be used with any -link:builtins.html#WindowFunctions[window function], or any -link:builtins.html#AggregateFunctions[aggregate function] used as a +{window-functions}[window function], or any +{aggregate-functions}[aggregate function] used as a window function. This clause is optional. If omitted, all tuples are considered peers, @@ -2256,13 +2257,12 @@ each window function behaves differently. If tuples are tied, the results may be unpredictable. * The `rank()`, `dense_rank()`, `percent_rank()`, and `cume_dist()` functions return the same result for each tuple. -* For other functions, if the link:#Window_frame_clause[window frame] is +* For other functions, if the <> is defined by `ROWS`, the results may be unpredictable. If the window frame is defined by `RANGE` or `GROUPS`, the results are same for each tuple. -This clause may have multiple link:#Ordering_term[ordering terms]. To -reduce the number of ties, add additional link:#Ordering_term[ordering -terms]. +This clause may have multiple <>. To +reduce the number of ties, add additional <>. [[note]] Note @@ -2271,7 +2271,7 @@ This clause does not guarantee the overall order of the query results. To guarantee the order of the final results, use the query ORDER BY clause. -[[ordering-term]] +[[Ordering_term]] ===== Ordering Term ----------------------------------------------- @@ -2281,10 +2281,10 @@ OrderingTerm ::= Expression ( | )? The *ordering term* specifies an ordering expression and collation. This clause has the same syntax and semantics as the ordering term for -queries. Refer to the link:#Order_By_clauses[ORDER BY Clauses] section +queries. Refer to the <> section for details. -[[window-frame-clause]] +[[Window_frame_clause]] ===== Window Frame Clause ----------------------------------------------------------------------- @@ -2294,20 +2294,19 @@ WindowFrameClause ::= ( | | ) WindowFrameExtent The *window frame clause* defines the window frame. This clause can be used with all -link:builtins.html#AggregateFunctions[aggregate functions] and some -link:builtins.html#WindowFunctions[window functions] — refer to the +{aggregate-functions}[aggregate functions] and some +{window-functions}[window functions] — refer to the descriptions of individual functions for more details. -This clause is allowed only when the link:#Window_order_clause[window -order clause] is present. +This clause is allowed only when the <> is present. This clause is optional. * If this clause is omitted and there is no -link:#Window_order_clause[window order clause], the window frame is the +<>, the window frame is the entire partition. * If this clause is omitted but there is a -link:#Window_order_clause[window order clause], the window frame becomes +<>, the window frame becomes all tuples in the partition preceding the current tuple and its peers — the same as `RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW`. @@ -2326,8 +2325,7 @@ function produces deterministic results. Note If this clause uses `RANGE` with either `Expression PRECEDING` or -`Expression FOLLOWING`, the link:#Window_order_clause[window order -clause] must have only a single ordering term. +`Expression FOLLOWING`, the <> must have only a single ordering term. The ordering term expression must evaluate to a number. @@ -2395,7 +2393,7 @@ result sets will generate an empty window frame. * `BETWEEN Expression FOLLOWING AND UNBOUNDED FOLLOWING` — if `Expression` is too high, some tuples may generate an empty window frame. -* If the link:#Window_frame_exclusion[window frame exclusion clause] is +* If the <> is present, any window frame specification may result in empty window frame. @@ -2403,7 +2401,7 @@ The `Expression` must be a positive constant or an expression that evaluates as a positive number. For `ROWS` or `GROUPS`, the `Expression` must be an integer. -[[window-frame-exclusion]] +[[Window_frame_exclusion]] ===== Window Frame Exclusion ------------------------------------------------------------------------- @@ -2415,12 +2413,11 @@ The *window frame exclusion clause* enables you to exclude specified tuples from the window frame. This clause can be used with all -link:builtins.html#AggregateFunctions[aggregate functions] and some -link:builtins.html#WindowFunctions[window functions] — refer to the +{aggregate-functions}[aggregate functions] and some +{window-functions}[window functions] — refer to the descriptions of individual functions for more details. -This clause is allowed only when the link:#Window_frame_clause[window -frame clause] is present. +This clause is allowed only when the <> is present. This clause is optional. If this clause is omitted, the default is no exclusion — the same as `EXCLUDE NO OTHERS`. diff --git a/asterixdb/asterix-doc/src/site/asciidoc/aql/builtins.adoc b/asterixdb/asterix-doc/src/site/asciidoc/aql/builtins.adoc index e51d2b37141..244f3096037 100644 --- a/asterixdb/asterix-doc/src/site/asciidoc/aql/builtins.adoc +++ b/asterixdb/asterix-doc/src/site/asciidoc/aql/builtins.adoc @@ -3,6 +3,25 @@ :toc: :toclevels: 1 +:data-model: xref:../datamodel.adoc +:primitive-types: xref:../datamodel.adoc#PrimitiveTypes +:over-clauses: xref:manual.adoc#Over_clauses +:window-partition-clause: xref:manual.adoc#Window_partition_clause +:window-order-clause: xref:manual.adoc#Window_order_clause +:window-frame-clause: xref:manual.adoc#Window_frame_clause +:nulls-treatment: xref:manual.adoc#Nulls_treatment +:nth-val-from: xref:manual.adoc#Nth_val_from +:select-statements: xref:manual.adoc#SELECT_statements +:sql-92-aggregation-functions: xref:manual.adoc#SQL-92_aggregation_functions +:aggregation-functions: xref:manual.adoc#Aggregation_functions +:aggregate-functions: xref:builtins.adoc#AggregateFunctions + +:n_gram: true +:upper-name: ASTERIXDB +:camel-name: AsterixDB +:title-name: Asterixdb +:lower-name: asterixdb + include::{includedir}/0_toc_common.adoc[] include::{includedir}/1_numeric_common.adoc[] include::{includedir}/1_numeric_delta.adoc[] diff --git a/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/builtins.adoc b/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/builtins.adoc index 5a3c2c547f1..02f80554680 100644 --- a/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/builtins.adoc +++ b/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/builtins.adoc @@ -3,6 +3,25 @@ :toc: :toclevels: 1 +:data-model: xref:../datamodel.adoc +:primitive-types: xref:../datamodel.adoc#PrimitiveTypes +:over-clauses: xref:manual.adoc#Over_clauses +:window-partition-clause: xref:manual.adoc#Window_partition_clause +:window-order-clause: xref:manual.adoc#Window_order_clause +:window-frame-clause: xref:manual.adoc#Window_frame_clause +:nulls-treatment: xref:manual.adoc#Nulls_treatment +:nth-val-from: xref:manual.adoc#Nth_val_from +:select-statements: xref:manual.adoc#SELECT_statements +:sql-92-aggregation-functions: xref:manual.adoc#SQL-92_aggregation_functions +:aggregation-functions: xref:manual.adoc#Aggregation_functions +:aggregate-functions: xref:builtins.adoc#AggregateFunctions + +:n_gram: true +:upper-name: ASTERIXDB +:camel-name: AsterixDB +:title-name: Asterixdb +:lower-name: asterixdb + include::{includedir}/0_toc_common.adoc[] include::{includedir}/1_numeric_common.adoc[] include::{includedir}/1_numeric_delta.adoc[] diff --git a/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/manual.adoc b/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/manual.adoc index 0c2011d8af0..0272b607626 100644 --- a/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/manual.adoc +++ b/asterixdb/asterix-doc/src/site/asciidoc/sqlpp/manual.adoc @@ -3,6 +3,11 @@ :toc: :toclevels: 1 +:aggregate-functions: xref:builtins.adoc#AggregateFunctions +:window-functions: xref:builtins.adoc#WindowFunctions +:over-clauses: xref:manual.adoc#Over_clauses +:service-api: link:../api.html#queryservice + include::{includedir}/1_intro.adoc[] include::{includedir}/2_expr_title.adoc[] include::{includedir}/2_expr.adoc[] From 486807cfa6327140213f2282cd731c2984388473 Mon Sep 17 00:00:00 2001 From: Simon Dew Date: Tue, 28 Jan 2020 00:38:28 +0000 Subject: [PATCH 22/22] Fix IDs Change-Id: I779f43404d73e6ed5bf48757dc2626f1a68f23b1 --- .../src/shared/modules/builtins/partials/14_window.adoc | 2 +- .../src/shared/modules/builtins/partials/9_aggregate_aql.adoc | 2 +- .../src/shared/modules/builtins/partials/9_aggregate_sql.adoc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc index 9e1131da191..8ce22e19f94 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/14_window.adoc @@ -1,4 +1,4 @@ -[[window-functions]] +[[WindowFunctions]] == Window Functions Window functions are used to compute an aggregate or cumulative value, diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_aql.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_aql.adoc index 88505313683..57286f865bb 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_aql.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_aql.adoc @@ -1,4 +1,4 @@ -[[aggregate-functions-array-functions]] +[[AggregateFunctions]] == Aggregate Functions (Array Functions) This section contains detailed descriptions of each AQL aggregate diff --git a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc index 4f9442ccf8b..1ceb5e8eb2c 100644 --- a/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc +++ b/asterixdb/asterix-doc/src/shared/modules/builtins/partials/9_aggregate_sql.adoc @@ -1,4 +1,4 @@ -[[aggregate-functions-array-functions]] +[[AggregateFunctions]] == Aggregate Functions (Array Functions) This section contains detailed descriptions of the built-in aggregate