From 36f125998121bd0a6245575538ec3dfbc8db2022 Mon Sep 17 00:00:00 2001 From: Yida Wang Date: Sat, 30 Nov 2013 19:28:01 +0000 Subject: [PATCH 1/2] switch to a valid path in ExampleArcMicroformat --- src/java/org/commoncrawl/examples/ExampleArcMicroformat.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/commoncrawl/examples/ExampleArcMicroformat.java b/src/java/org/commoncrawl/examples/ExampleArcMicroformat.java index 0e3b971..70d7c54 100644 --- a/src/java/org/commoncrawl/examples/ExampleArcMicroformat.java +++ b/src/java/org/commoncrawl/examples/ExampleArcMicroformat.java @@ -180,7 +180,7 @@ public int run(String[] args) configFile = args[1]; // For this example, only look at a single ARC files. - String inputPath = "s3n://aws-publicdatasets/common-crawl/parse-output/segment/1341690163490/1341782443295_1551.arc.gz"; + String inputPath = "s3n://aws-publicdatasets/common-crawl/parse-output/segment/1346823845675/1346871947461_4036.arc.gz"; // Switch to this if you'd like to look at all ARC files. May take many minutes just to read the file listing. //String inputPath = "s3n://aws-publicdatasets/common-crawl/parse-output/segment/*/*.arc.gz"; From 376b62a44908f232af37a24062ecb5686ef4951c Mon Sep 17 00:00:00 2001 From: Yida Wang Date: Sat, 30 Nov 2013 20:17:37 +0000 Subject: [PATCH 2/2] switch to a valid path in ExampleMetadataStats --- src/java/org/commoncrawl/examples/ExampleMetadataStats.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java/org/commoncrawl/examples/ExampleMetadataStats.java b/src/java/org/commoncrawl/examples/ExampleMetadataStats.java index bbfaebc..d90230a 100644 --- a/src/java/org/commoncrawl/examples/ExampleMetadataStats.java +++ b/src/java/org/commoncrawl/examples/ExampleMetadataStats.java @@ -208,7 +208,7 @@ public int run(String[] args) // If you would like to process all segments, comment this out and // uncomment the block of code below - String inputPath = baseInputPath + "/1341690154994/metadata-00062"; + String inputPath = baseInputPath + "/1346823845675/metadata-04379"; LOG.info("adding input path '" + inputPath + "'"); FileInputFormat.addInputPath(job, new Path(inputPath));