From 2aec497a137c373d60069523212735e4419d284b Mon Sep 17 00:00:00 2001
From: Daniel Chen <dchen1@linkedin.com>
Date: Wed, 23 Jan 2019 16:46:46 -0800
Subject: [PATCH] Documentation updates

---
 README.md                                     | 17 ++++++++++-------
 .../apache/beam/examples/KafkaWordCount.java  | 13 ++++++++-----
 .../org/apache/beam/examples/WordCount.java   | 19 +++++++++++--------
 3 files changed, 29 insertions(+), 20 deletions(-)
diff --git a/README.md b/README.md
index 8ac19a9..f58d8b3 100644
--- a/README.md
+++ b/README.md
@@ -62,8 +62,9 @@ $ scripts/grid start zookeeper
 You can run directly within the project using maven:
 
 ```
+$ mvn package
 $ mvn compile exec:java -Dexec.mainClass=org.apache.beam.examples.WordCount \
-    -Dexec.args="--runner=SamzaRunner" -P samza-runner
+    -Dexec.args="--inputFile=pom.xml --output=counts --runner=SamzaRunner" -P samza-runner
 ```
 
 ### Packaging Your Application
@@ -77,15 +78,15 @@ After packaging, we deploy and explode the tgz in the deploy folder:
 
 ### Standalone Cluster with Zookeeper
 You can use the `run-beam-standalone.sh` script included in this repo to run an example
-in standalone mode. The config file is provided as `config/standalone.properties`. Note by
-default we create one single input partition for the whole input. To set the number of 
+in standalone mode. The config file is provided as `config/standalone.properties`. Note that by
+default we create a single input partition for the whole input. To set the number of 
 partitions, you can add "--maxSourceParallelism=" argument. For example, "--maxSourceParallelism=2"
 will create two partitions of the input file, based on size.  
 
 ```
 $ deploy/examples/bin/run-beam-standalone.sh org.apache.beam.examples.WordCount \
     --configFilePath=$PWD/deploy/examples/config/standalone.properties \
-    --inputFile=/Users/xiliu/opensource/samza-beam-examples/pom.xml --output=word-counts.txt \
+    --inputFile=$PWD/pom.xml --output=word-counts.txt \
     --maxSourceParallelism=2
 ```
 
@@ -101,13 +102,15 @@ $ deploy/examples/bin/run-beam-standalone.sh org.apache.beam.examples.KafkaWordC
 
 ###  Yarn Cluster
 Similar to running standalone, we can use the `run-beam-yarn.sh` to run the examples
-in Yarn cluster. The config file is provided as `config/yarn.properties`. To run the 
-WordCount example in yarn:
+in Yarn cluster. The config file is provided as `config/yarn.properties`. 
+Note that for yarn, we don't need to wait after submitting the job, so there is no need for `waitUntilFinish()`. 
+Please change `p.run().waitUtilFinish()` to `p.run()` in the `WordCount.java` class.
+To run the WordCount example in yarn:
 
 ```
  $ deploy/examples/bin/run-beam-yarn.sh org.apache.beam.examples.WordCount \
     --configFilePath=$PWD/deploy/examples/config/yarn.properties \
-    --inputFile=/Users/xiliu/opensource/samza-beam-examples/pom.xml \
+    --inputFile=$PWD/pom.xml \
     --output=/tmp/word-counts.txt --maxSourceParallelism=2
 ```
 
diff --git a/src/main/java/org/apache/beam/examples/KafkaWordCount.java b/src/main/java/org/apache/beam/examples/KafkaWordCount.java
index 7d1658d..02d4e5a 100644
--- a/src/main/java/org/apache/beam/examples/KafkaWordCount.java
+++ b/src/main/java/org/apache/beam/examples/KafkaWordCount.java
@@ -67,13 +67,16 @@
  * <p>To run in standalone with zookeeper:
  * (large parallelism will enforce each partition in a task)
  * <pre>{@code
- * $ deploy/examples/bin/run-beam-standalone.sh org.apache.beam.examples.KafkaWordCount --configFilePath=$PWD/deploy/examples/config/standalone.properties --maxSourceParallelism=1024
+ * $ ./deploy/examples/bin/run-beam-standalone.sh org.apache.beam.examples.KafkaWordCount --configFilePath=$PWD/deploy/examples/config/standalone.properties --maxSourceParallelism=1024
  * }</pre>
  *
  * <p>To run in yarn:
+ * For yarn, we don't need to wait after submitting the job, so there is no need for
+ * waitUntilFinish(). Please change p.run().waitUtilFinish() to p.run().
+ *
  * (large parallelism will enforce each partition in a task)
  * <pre>{@code
- * $ deploy/examples/bin/run-beam-yarn.sh org.apache.beam.examples.KafkaWordCount --configFilePath=$PWD/deploy/examples/config/yarn.properties --maxSourceParallelism=1024
+ * $ ./deploy/examples/bin/run-beam-yarn.sh org.apache.beam.examples.KafkaWordCount --configFilePath=$PWD/deploy/examples/config/yarn.properties --maxSourceParallelism=1024
  * }</pre>
  *
  * <p>To produce some test data:
@@ -127,9 +130,9 @@ public static void main(String[] args) {
             .withKeySerializer(StringSerializer.class)
             .withValueSerializer(StringSerializer.class));
 
-    //For yarn, we don't need to wait after submitting the job,
-    //so there is no need for waitUntilFinish(). Please use
-    //p.run()
+    // For yarn, we don't need to wait after submitting the job,
+    // so there is no need for waitUntilFinish(). Please use
+    // p.run()
     p.run().waitUntilFinish();
   }
 }
diff --git a/src/main/java/org/apache/beam/examples/WordCount.java b/src/main/java/org/apache/beam/examples/WordCount.java
index a647c0a..dedc28f 100644
--- a/src/main/java/org/apache/beam/examples/WordCount.java
+++ b/src/main/java/org/apache/beam/examples/WordCount.java
@@ -70,18 +70,21 @@
  * <p>To execute this example in standalone with zookeeper:
  * (split the input by 2)
  * <pre>{@code
- * $ deploy/examples/bin/run-beam-standalone.sh org.apache.beam.examples.WordCount \
+ * $ ./deploy/examples/bin/run-beam-standalone.sh org.apache.beam.examples.WordCount \
  *     --configFilePath=$PWD/deploy/examples/config/standalone.properties \
- *     --inputFile=/Users/xiliu/opensource/samza-beam-examples/pom.xml --output=word-counts.txt \
+ *     --inputFile=$PWD/pom.xml --output=word-counts.txt \
  *     --maxSourceParallelism=2
  * }</pre>
  *
  * <p>To execute this example in yarn:
+ * For yarn, we don't need to wait after submitting the job, so there is no need for
+ * waitUntilFinish(). Please change p.run().waitUtilFinish() to p.run().
+ *
  * (split the input by 2)
  * <pre>{@code
- * $ deploy/examples/bin/run-beam-yarn.sh org.apache.beam.examples.WordCount \
+ * $ ./deploy/examples/bin/run-beam-yarn.sh org.apache.beam.examples.WordCount \
  *     --configFilePath=$PWD/deploy/examples/config/yarn.properties \
- *     --inputFile=/Users/xiliu/opensource/samza-beam-examples/pom.xml \
+ *     --inputFile=$PWD/pom.xml \
  *     --output=/tmp/word-counts.txt --maxSourceParallelism=2
  * }</pre>
  */
@@ -187,10 +190,10 @@ static void runWordCount(WordCountOptions options) {
         .apply(MapElements.via(new FormatAsTextFn()))
         .apply("WriteCounts", TextIO.write().to(options.getOutput()).withoutSharding());
 
-    //For yarn, we don't need to wait after submitting the job,
-    //so there is no need for waitUntilFinish(). Please use
-    //p.run()
-    p.run().waitUntilFinish();
+    // For yarn, we don't need to wait after submitting the job,
+    // so there is no need for waitUntilFinish(). Please use
+    // p.run()
+    p.run();
   }
 
   public static void main(String[] args) {