From 52a6e47175c5fdbc0d24ecffdccdb8057344343f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rene=CC=81=20Kriegler?= Date: Thu, 16 Oct 2014 22:38:15 +0100 Subject: [PATCH 1/6] Bumped Solr version to 4.8.1 --- run-example.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) mode change 100644 => 100755 run-example.sh diff --git a/run-example.sh b/run-example.sh old mode 100644 new mode 100755 index 04d6783..9b9df9c --- a/run-example.sh +++ b/run-example.sh @@ -13,8 +13,9 @@ tmpdir=/tmp/solr-map-reduce ####################### # Using Solr 4.8 -solr_distrib="solr-4.8.0" -solr_distrib_url="http://apache.mirrors.lucidnetworks.net/lucene/solr/4.8.0/$solr_distrib.tgz" +solr_version="4.8.1" +solr_distrib="solr-$solr_version" +solr_distrib_url="http://apache.mirrors.lucidnetworks.net/lucene/solr/$solr_version/$solr_distrib.tgz" # you should replace with a local mirror. Find one at http://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-2.2.0/ hadoop_distrib="hadoop-2.2.0" From 9e659793108a779ab42717217b6fb0a369cd198b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rene=CC=81=20Kriegler?= Date: Wed, 22 Oct 2014 17:20:27 +0200 Subject: [PATCH 2/6] Setting Solr version to 4.10.1 --- run-example.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run-example.sh b/run-example.sh index 9b9df9c..e329241 100755 --- a/run-example.sh +++ b/run-example.sh @@ -12,8 +12,8 @@ tmpdir=/tmp/solr-map-reduce ## Solr + Hadoop Dists ####################### -# Using Solr 4.8 -solr_version="4.8.1" +# Using Solr 4.10 +solr_version="4.10.1" solr_distrib="solr-$solr_version" solr_distrib_url="http://apache.mirrors.lucidnetworks.net/lucene/solr/$solr_version/$solr_distrib.tgz" From 1bfa57f2b7de78c301d8979572ca71508279ae10 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Wed, 12 Nov 2014 10:01:41 -0500 Subject: [PATCH 3/6] small doco fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ace567f..4832cf8 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ This is currently using: Hadoop 2.2.0 -Solr 4.8 +Solr 4.10.1 Web URLs From 99b5d897baf933d76419e687efcabc5f7c363060 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Thu, 13 Nov 2014 10:09:19 -0500 Subject: [PATCH 4/6] make it easier to play with this demo --- add-third-solr.sh | 21 +++++++++++++++++++ run-just-morphline.sh | 49 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100755 add-third-solr.sh create mode 100755 run-just-morphline.sh diff --git a/add-third-solr.sh b/add-third-solr.sh new file mode 100755 index 0000000..b479626 --- /dev/null +++ b/add-third-solr.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# ADD THIRD SOLR - Demonstrates adding another Solr process, and it replicating data from and into HDFS. +# +####################### + +solr_version="4.10.1" + +# return absolute path +function absPath { + echo $(cd $(dirname $1); pwd)/$(basename $1) +} + +hadoop_conf_dir=`absPath "hadoop_conf/conf"` +echo "hadoop_conf: $hadoop_conf_dir" + +cp -r -f solr-${solr_version}/example solr-${solr_version}/example3 + +cd solr-${solr_version} +cd example3 +java -Xmx512m -Djetty.port=7575 -DzkHost=127.0.0.1:9983 -Dsolr.directoryFactory=solr.HdfsDirectoryFactory -Dsolr.lock.type=hdfs -Dsolr.hdfs.home=hdfs://127.0.0.1:8020/solr3 -Dsolr.hdfs.confdir=$hadoop_conf_dir -DSTOP.PORT=6575 -DSTOP.KEY=key -jar start.jar 1>example3.log 2>&1 & \ No newline at end of file diff --git a/run-just-morphline.sh b/run-just-morphline.sh new file mode 100755 index 0000000..f95edae --- /dev/null +++ b/run-just-morphline.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +# RUN JUST MORPHLINE SCRIPT - Handles all the environment setup, and just runs the morphline step. +# +####################### + +# this gets hard coded in the configs - keep in sync +tmpdir=/tmp/solr-map-reduce + +## Solr + Hadoop Dists +####################### + +# Using Solr 4.8 +solr_distrib="solr-4.10.1" +s +# you should replace with a local mirror. Find one at http://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-2.2.0/ +hadoop_distrib="hadoop-2.2.0" + +######################################################### +# NameNode port: 8020, DataNode ports: 50010, 50020, ResourceManager port: 8032 ZooKeeper port: 9983, Solr port: 8983 +# NameNode web port: 50070, DataNodes web port: 50075 +######################################################### + + +# collection to work with +collection=collection1 + +# return absolute path +function absPath { + echo $(cd $(dirname $1); pwd)/$(basename $1) +} + +hadoop_conf_dir=`absPath "hadoop_conf/conf"` +echo "hadoop_conf: $hadoop_conf_dir" + +hadoopHome=`absPath "$hadoop_distrib"` +echo "HADOOP_HOME=$hadoopHome" +export HADOOP_HOME=$hadoopHome +export HADOOP_LOG_DIR=$tmpdir/logs +export HADOOP_CONF_DIR=$hadoop_conf_dir + +# +## Build an index with map-reduce and deploy it to SolrCloud +####################### + +source $solr_distrib/example/scripts/map-reduce/set-map-reduce-classpath.sh + +$hadoop_distrib/bin/hadoop --config $hadoop_conf_dir jar $solr_distrib/dist/solr-map-reduce-*.jar -D 'mapred.child.java.opts=-Xmx500m' -libjars "$HADOOP_LIBJAR" --morphline-file readAvroContainer.conf --zk-host 127.0.0.1:9983 --output-dir hdfs://127.0.0.1:8020/outdir --collection $collection --log4j log4j.properties --go-live --verbose "hdfs://127.0.0.1:8020/indir" + From c90361737c3f7fb3c2be000fc042cf7bfe6160d1 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Fri, 14 Nov 2014 20:31:20 -0500 Subject: [PATCH 5/6] add a mention about the --dry-run parameter --- run-just-morphline.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/run-just-morphline.sh b/run-just-morphline.sh index f95edae..0716626 100755 --- a/run-just-morphline.sh +++ b/run-just-morphline.sh @@ -41,6 +41,8 @@ export HADOOP_CONF_DIR=$hadoop_conf_dir # ## Build an index with map-reduce and deploy it to SolrCloud +## Add --dry-run parameter to the end to see it run without +## actually putting the documents into Solr! ####################### source $solr_distrib/example/scripts/map-reduce/set-map-reduce-classpath.sh From 5dd6b49dfc71b3dcefdd588b92730b178e72e0e7 Mon Sep 17 00:00:00 2001 From: Eric Pugh Date: Thu, 18 Dec 2014 14:46:18 -0500 Subject: [PATCH 6/6] fix up URLs for current versions of Solr and Hadoop, and provide some documentation. --- run-example.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/run-example.sh b/run-example.sh index e329241..34602a7 100755 --- a/run-example.sh +++ b/run-example.sh @@ -12,14 +12,14 @@ tmpdir=/tmp/solr-map-reduce ## Solr + Hadoop Dists ####################### -# Using Solr 4.10 -solr_version="4.10.1" +# Check the Mirrors to see what the latest version of Hadoop and Solr are +# that they host. Known to work with Solr 4.10.1 and Hadoop 2.2.0 +solr_version="4.10.2" solr_distrib="solr-$solr_version" solr_distrib_url="http://apache.mirrors.lucidnetworks.net/lucene/solr/$solr_version/$solr_distrib.tgz" -# you should replace with a local mirror. Find one at http://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-2.2.0/ -hadoop_distrib="hadoop-2.2.0" -hadoop_distrib_url="http://www.eng.lsu.edu/mirrors/apache/hadoop/common/hadoop-2.2.0/$hadoop_distrib.tar.gz" +hadoop_distrib="hadoop-2.6.0" +hadoop_distrib_url="http://www.eng.lsu.edu/mirrors/apache/hadoop/common/$hadoop_distrib/$hadoop_distrib.tar.gz" ######################################################### # NameNode port: 8020, DataNode ports: 50010, 50020, ResourceManager port: 8032 ZooKeeper port: 9983, Solr port: 8983