Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,21 @@ public void parquetWritePrimitivesGZipClassName() throws Exception {
runWritePrimitivesScenario("pxf_parquet_write_primitives_gzip_classname", "pxf_parquet_read_primitives_gzip_classname", "parquet_write_primitives_gzip_classname", new String[]{"COMPRESSION_CODEC=org.apache.hadoop.io.compress.GzipCodec"});
}

@Test(groups = {"features", "gpdb", "security", "hcfs"})
public void parquetWritePrimitivesSnappy() throws Exception {
runWritePrimitivesScenario("pxf_parquet_write_primitives_snappy", "pxf_parquet_read_primitives_snappy", "parquet_write_primitives_snappy", new String[]{"COMPRESSION_CODEC=snappy"});
}

@Test(groups = {"features", "gpdb", "security", "hcfs"})
public void parquetWritePrimitivesUncompressed() throws Exception {
runWritePrimitivesScenario("pxf_parquet_write_primitives_uncompressed", "pxf_parquet_read_primitives_uncompressed", "parquet_write_primitives_uncompressed", new String[]{"COMPRESSION_CODEC=uncompressed"});
}

@Test(groups = {"features", "gpdb", "security", "hcfs"})
public void parquetWritePrimitivesZStd() throws Exception {
runWritePrimitivesScenario("pxf_parquet_write_primitives_zstd", "pxf_parquet_read_primitives_zstd", "parquet_write_primitives_zstd", new String[]{"COMPRESSION_CODEC=zstd"});
}

// Numeric precision not defined, test writing data precision in [1, 38]. All the data should be written correctly.
@Test(groups = {"features", "gpdb", "security", "hcfs"})
public void parquetWriteUndefinedPrecisionNumeric() throws Exception {
Expand Down
4 changes: 2 additions & 2 deletions docs/content/hdfs_parquet.html.md.erb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ under the License.

Use the PXF HDFS connector to read and write Parquet-format data. This section describes how to read and write HDFS files that are stored in Parquet format, including how to create, query, and insert into external tables that reference files in the HDFS data store.

PXF supports reading or writing Parquet files compressed with these codecs: `snappy`, `gzip`, and `lzo`.
PXF supports reading or writing Parquet files compressed with these codecs: `snappy`, `gzip`, and `zstd`.

PXF currently supports reading and writing primitive Parquet data types only.

Expand Down Expand Up @@ -182,7 +182,7 @@ The PXF `hdfs:parquet` profile supports encoding- and compression-related write

| Write Option | Value Description |
|-------|-------------------------------------|
| COMPRESSION_CODEC | The compression codec alias. Supported compression codecs for writing Parquet data include: `snappy`, `gzip`, `lzo`, and `uncompressed` . If this option is not provided, PXF compresses the data using `snappy` compression. |
| COMPRESSION_CODEC | The compression codec alias. Supported compression codecs for writing Parquet data include: `snappy`, `gzip`, `zstd`, and `uncompressed` . If this option is not provided, PXF compresses the data using `snappy` compression. |
| ROWGROUP_SIZE | A Parquet file consists of one or more row groups, a logical partitioning of the data into rows. `ROWGROUP_SIZE` identifies the size (in bytes) of the row group. The default row group size is `8 * 1024 * 1024` bytes. |
| PAGE_SIZE | A row group consists of column chunks that are divided up into pages. `PAGE_SIZE` is the size (in bytes) of such a page. The default page size is `1 * 1024 * 1024` bytes. |
| ENABLE\_DICTIONARY | A boolean value that specifies whether or not to enable dictionary encoding. The default value is `true`; dictionary encoding is enabled when PXF writes Parquet files. |
Expand Down
3 changes: 2 additions & 1 deletion server/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ configure(javaProjects) {
}

// Parquet dependencies
dependency("org.apache.parquet:parquet-format:2.7.0")
dependency("org.apache.parquet:parquet-format:2.11.0")
dependencySet(group:"org.apache.parquet", version:"${parquetVersion}") {
entry("parquet-column")
entry("parquet-common")
Expand All @@ -173,6 +173,7 @@ configure(javaProjects) {
entry("parquet-hadoop")
entry("parquet-jackson")
entry("parquet-pig")
entry("parquet-format-structures")
}

// Thrift dependencies
Expand Down
2 changes: 1 addition & 1 deletion server/gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ hiveVersion=2.3.8
hiveStorageApiVersion=2.7.3
hbaseVersion=1.3.2
junitVersion=4.11
parquetVersion=1.11.1
parquetVersion=1.12.3
awsJavaSdk=1.12.261
springBootVersion=2.7.18
org.gradle.daemon=true
Expand Down
1 change: 1 addition & 0 deletions server/pxf-hdfs/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ dependencies {
implementation("org.apache.hadoop:hadoop-hdfs") { transitive = false }
implementation("org.apache.hadoop:hadoop-hdfs-client") { transitive = false }
implementation("org.apache.parquet:parquet-format") { transitive = false }
implementation("org.apache.parquet:parquet-format-structures") { transitive = false }
implementation("org.apache.parquet:parquet-column") { transitive = false }
implementation("org.apache.parquet:parquet-common") { transitive = false }
implementation("org.apache.parquet:parquet-encoding") { transitive = false }
Expand Down
Loading