-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Add snippets for JDBC using Managed I/O #10239
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
3c15a74
e518462
dd86fb7
91593b6
57d2b5a
be51dc1
e21d4d6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| /* | ||
| * Copyright 2026 Google LLC | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.example.dataflow; | ||
|
|
||
| // [START dataflow_postgres_read] | ||
| import com.google.common.collect.ImmutableMap; | ||
| import org.apache.beam.sdk.Pipeline; | ||
| import org.apache.beam.sdk.PipelineResult; | ||
| import org.apache.beam.sdk.io.TextIO; | ||
| import org.apache.beam.sdk.managed.Managed; | ||
| import org.apache.beam.sdk.options.Description; | ||
| import org.apache.beam.sdk.options.PipelineOptions; | ||
| import org.apache.beam.sdk.options.PipelineOptionsFactory; | ||
| import org.apache.beam.sdk.transforms.MapElements; | ||
| import org.apache.beam.sdk.values.TypeDescriptors; | ||
|
|
||
| public class PostgresRead { | ||
|
|
||
| public interface Options extends PipelineOptions { | ||
| @Description("The jdbc url of PostgreSQL database to read from.") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "The jdbc url" -> "The JDBC URL" |
||
| String getJdbcUrl(); | ||
|
|
||
| void setJdbcUrl(String value); | ||
|
|
||
| @Description("The table of PostgresSQL to read from.") | ||
| String getTable(); | ||
|
|
||
| void setTable(String value); | ||
|
|
||
| @Description("The username of PostgreSQL database.") | ||
| String getUsername(); | ||
|
|
||
| void setUsername(String value); | ||
|
|
||
| @Description("The password of PostgreSQL database.") | ||
| String getPassword(); | ||
|
|
||
| void setPassword(String value); | ||
|
|
||
| @Description("Path to write the output file") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add period at end to match other strings. Also is this a local path, GCS, other? Might be worth being explicit about that unless it's clear / widely known. |
||
| String getOutputPath(); | ||
|
|
||
| void setOutputPath(String value); | ||
| } | ||
|
|
||
| public static PipelineResult.State main(String[] args) { | ||
| // Parse the pipeline options passed into the application. Example: | ||
| // --runner=DirectRunner --jdbcUrl=$JDBC_URL --table=$TABLE | ||
| // --username=$USERNAME --password=$PASSWORD --outputPath=$OUTPUT_FILE | ||
| // For more information, see | ||
| // https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-options | ||
| var options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); | ||
| Pipeline pipeline = createPipeline(options); | ||
| return pipeline.run().waitUntilFinish(); | ||
| } | ||
|
|
||
| public static Pipeline createPipeline(Options options) { | ||
|
|
||
| // Create configuration parameters for the Managed I/O transform. | ||
| ImmutableMap<String, Object> config = | ||
| ImmutableMap.<String, Object>builder() | ||
| .put("jdbc_url", options.getJdbcUrl()) | ||
| .put("location", options.getTable()) | ||
| .put("username", options.getUsername()) | ||
| .put("password", options.getPassword()) | ||
| .build(); | ||
|
|
||
| // Build the pipeline. | ||
| var pipeline = Pipeline.create(options); | ||
| pipeline | ||
| // Read data from Postgres database via managed io. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Options, depending on which makes most sense: |
||
| .apply(Managed.read(Managed.POSTGRES).withConfig(config)) | ||
| .getSinglePCollection() | ||
| // Convert each row to a string. | ||
| .apply( | ||
| MapElements.into(TypeDescriptors.strings()) | ||
| .via((row -> String.format("%d,%s", row.getInt32("id"), row.getString("name"))))) | ||
| // Write strings to a text file. | ||
| .apply(TextIO.write().to(options.getOutputPath()).withSuffix(".txt").withNumShards(1)); | ||
| return pipeline; | ||
| } | ||
| } | ||
| // [END dataflow_postgres_read] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| /* | ||
| * Copyright 2026 Google LLC | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.example.dataflow; | ||
|
|
||
| // [START dataflow_postgres_write] | ||
| import static org.apache.beam.sdk.schemas.Schema.toSchema; | ||
|
|
||
| import com.google.common.collect.ImmutableMap; | ||
| import java.util.Arrays; | ||
| import java.util.List; | ||
| import java.util.stream.Stream; | ||
| import org.apache.beam.sdk.Pipeline; | ||
| import org.apache.beam.sdk.PipelineResult; | ||
| import org.apache.beam.sdk.managed.Managed; | ||
| import org.apache.beam.sdk.options.Description; | ||
| import org.apache.beam.sdk.options.PipelineOptions; | ||
| import org.apache.beam.sdk.options.PipelineOptionsFactory; | ||
| import org.apache.beam.sdk.schemas.Schema; | ||
| import org.apache.beam.sdk.transforms.Create; | ||
| import org.apache.beam.sdk.values.Row; | ||
|
|
||
| public class PostgresWrite { | ||
|
|
||
| private static Schema INPUT_SCHEMA = | ||
| Stream.of( | ||
| Schema.Field.of("id", Schema.FieldType.INT32), | ||
| Schema.Field.of("name", Schema.FieldType.STRING)) | ||
| .collect(toSchema()); | ||
|
|
||
| private static List<Row> ROWS = | ||
| Arrays.asList( | ||
| Row.withSchema(INPUT_SCHEMA) | ||
| .withFieldValue("id", 1) | ||
| .withFieldValue("name", "John Doe") | ||
| .build(), | ||
| Row.withSchema(INPUT_SCHEMA) | ||
| .withFieldValue("id", 2) | ||
| .withFieldValue("name", "Jane Smith") | ||
| .build()); | ||
|
|
||
| public interface Options extends PipelineOptions { | ||
| @Description("The jdbc url of PostgreSQL database to write to.") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. JDBC URL |
||
| String getJdbcUrl(); | ||
|
|
||
| void setJdbcUrl(String value); | ||
|
|
||
| @Description("The table of PostgresSQL to write to.") | ||
| String getTable(); | ||
|
|
||
| void setTable(String value); | ||
|
|
||
| @Description("The username of PostgreSQL database.") | ||
| String getUsername(); | ||
|
|
||
| void setUsername(String value); | ||
|
|
||
| @Description("The password of PostgreSQL database.") | ||
| String getPassword(); | ||
|
|
||
| void setPassword(String value); | ||
| } | ||
|
|
||
| public static PipelineResult.State main(String[] args) { | ||
| // Parse the pipeline options passed into the application. Example: | ||
| // --runner=DirectRunner --jdbcUrl=$JDBC_URL --table=$TABLE | ||
| // --username=$USERNAME --password=$PASSWORD | ||
| // For more information, see | ||
| // https://beam.apache.org/documentation/programming-guide/#configuring-pipeline-options | ||
| var options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); | ||
| Pipeline pipeline = createPipeline(options); | ||
| return pipeline.run().waitUntilFinish(); | ||
| } | ||
|
|
||
| public static Pipeline createPipeline(Options options) { | ||
|
|
||
| // Create configuration parameters for the Managed I/O transform. | ||
| ImmutableMap<String, Object> config = | ||
| ImmutableMap.<String, Object>builder() | ||
| .put("jdbc_url", options.getJdbcUrl()) | ||
| .put("location", options.getTable()) | ||
| .put("username", options.getUsername()) | ||
| .put("password", options.getPassword()) | ||
| .build(); | ||
|
|
||
| // Build the pipeline. | ||
| var pipeline = Pipeline.create(options); | ||
| pipeline | ||
| // Create data to write to Postgres. | ||
| .apply(Create.of(ROWS)) | ||
| .setRowSchema(INPUT_SCHEMA) | ||
| // Write rows to Postgres database via managed io. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment as other file |
||
| .apply(Managed.write(Managed.POSTGRES).withConfig(config)) | ||
| .getSinglePCollection(); | ||
| return pipeline; | ||
| } | ||
| } | ||
| // [END dataflow_postgres_write] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| /* | ||
| * Copyright 2026 Google LLC | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package com.example.dataflow; | ||
|
|
||
| import static org.junit.Assert.assertEquals; | ||
| import static org.junit.Assert.assertTrue; | ||
|
|
||
| import java.io.File; | ||
| import java.io.IOException; | ||
| import java.nio.file.Files; | ||
| import java.nio.file.Paths; | ||
| import java.sql.Connection; | ||
| import java.sql.DriverManager; | ||
| import java.sql.Statement; | ||
| import org.apache.beam.sdk.PipelineResult; | ||
| import org.junit.After; | ||
| import org.junit.Before; | ||
| import org.junit.Test; | ||
| import org.testcontainers.containers.PostgreSQLContainer; | ||
|
|
||
| public class PostgresReadIT { | ||
|
|
||
| private static final String TABLE_NAME = "test_read_table"; | ||
| private static final String OUTPUT_PATH = "test-output"; | ||
| // The TextIO connector appends this suffix to the pipeline output file. | ||
| private static final String OUTPUT_FILE_SUFFIX = "-00000-of-00001.txt"; | ||
| private static final String OUTPUT_FILE_NAME = OUTPUT_PATH + OUTPUT_FILE_SUFFIX; | ||
|
|
||
| private static final PostgreSQLContainer<?> postgres = | ||
| new PostgreSQLContainer<>("postgres:15-alpine"); | ||
shunping marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| @Before | ||
| public void setUp() throws Exception { | ||
| postgres.start(); | ||
|
|
||
| // Initialize the database with table and data | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add period at end |
||
| try (Connection conn = | ||
| DriverManager.getConnection( | ||
| postgres.getJdbcUrl(), postgres.getUsername(), postgres.getPassword())) { | ||
|
|
||
| Statement stmt = conn.createStatement(); | ||
| stmt.execute( | ||
| String.format("CREATE TABLE %s (id INT PRIMARY KEY, name VARCHAR(255))", TABLE_NAME)); | ||
| stmt.execute(String.format("INSERT INTO %s (id, name) VALUES (1, 'John Doe')", TABLE_NAME)); | ||
| stmt.execute(String.format("INSERT INTO %s (id, name) VALUES (2, 'Jane Smith')", TABLE_NAME)); | ||
| } | ||
| } | ||
|
|
||
| @After | ||
| public void tearDown() throws IOException { | ||
| if (postgres != null) { | ||
| postgres.stop(); | ||
| } | ||
| Files.deleteIfExists(Paths.get(OUTPUT_FILE_NAME)); | ||
| } | ||
|
|
||
| @Test | ||
| public void testPostgresRead() throws IOException { | ||
| // Execute the Beam pipeline | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. period at end |
||
| PipelineResult.State state = | ||
| PostgresRead.main( | ||
| new String[] { | ||
| "--runner=DirectRunner", | ||
| "--jdbcUrl=" + postgres.getJdbcUrl(), | ||
| "--table=" + TABLE_NAME, | ||
| "--username=" + postgres.getUsername(), | ||
| "--password=" + postgres.getPassword(), | ||
| "--outputPath=" + OUTPUT_PATH | ||
| }); | ||
|
|
||
| assertEquals(PipelineResult.State.DONE, state); | ||
| verifyOutput(); | ||
| } | ||
|
|
||
| private void verifyOutput() throws IOException { | ||
| File outputFile = new File(OUTPUT_FILE_NAME); | ||
| assertTrue("Output file should exist", outputFile.exists()); | ||
|
|
||
| String content = Files.readString(Paths.get(OUTPUT_FILE_NAME)); | ||
|
|
||
| assertTrue(content.contains("1,John Doe")); | ||
| assertTrue(content.contains("2,Jane Smith")); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.