diff --git a/README.md b/README.md index ace567f..4832cf8 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ This is currently using: Hadoop 2.2.0 -Solr 4.8 +Solr 4.10.1 Web URLs diff --git a/add-third-solr.sh b/add-third-solr.sh new file mode 100755 index 0000000..b479626 --- /dev/null +++ b/add-third-solr.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# ADD THIRD SOLR - Demonstrates adding another Solr process, and it replicating data from and into HDFS. +# +####################### + +solr_version="4.10.1" + +# return absolute path +function absPath { + echo $(cd $(dirname $1); pwd)/$(basename $1) +} + +hadoop_conf_dir=`absPath "hadoop_conf/conf"` +echo "hadoop_conf: $hadoop_conf_dir" + +cp -r -f solr-${solr_version}/example solr-${solr_version}/example3 + +cd solr-${solr_version} +cd example3 +java -Xmx512m -Djetty.port=7575 -DzkHost=127.0.0.1:9983 -Dsolr.directoryFactory=solr.HdfsDirectoryFactory -Dsolr.lock.type=hdfs -Dsolr.hdfs.home=hdfs://127.0.0.1:8020/solr3 -Dsolr.hdfs.confdir=$hadoop_conf_dir -DSTOP.PORT=6575 -DSTOP.KEY=key -jar start.jar 1>example3.log 2>&1 & \ No newline at end of file diff --git a/run-example.sh b/run-example.sh old mode 100644 new mode 100755 index 04d6783..34602a7 --- a/run-example.sh +++ b/run-example.sh @@ -12,13 +12,14 @@ tmpdir=/tmp/solr-map-reduce ## Solr + Hadoop Dists ####################### -# Using Solr 4.8 -solr_distrib="solr-4.8.0" -solr_distrib_url="http://apache.mirrors.lucidnetworks.net/lucene/solr/4.8.0/$solr_distrib.tgz" - -# you should replace with a local mirror. Find one at http://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-2.2.0/ -hadoop_distrib="hadoop-2.2.0" -hadoop_distrib_url="http://www.eng.lsu.edu/mirrors/apache/hadoop/common/hadoop-2.2.0/$hadoop_distrib.tar.gz" +# Check the Mirrors to see what the latest version of Hadoop and Solr are +# that they host. Known to work with Solr 4.10.1 and Hadoop 2.2.0 +solr_version="4.10.2" +solr_distrib="solr-$solr_version" +solr_distrib_url="http://apache.mirrors.lucidnetworks.net/lucene/solr/$solr_version/$solr_distrib.tgz" + +hadoop_distrib="hadoop-2.6.0" +hadoop_distrib_url="http://www.eng.lsu.edu/mirrors/apache/hadoop/common/$hadoop_distrib/$hadoop_distrib.tar.gz" ######################################################### # NameNode port: 8020, DataNode ports: 50010, 50020, ResourceManager port: 8032 ZooKeeper port: 9983, Solr port: 8983 diff --git a/run-just-morphline.sh b/run-just-morphline.sh new file mode 100755 index 0000000..0716626 --- /dev/null +++ b/run-just-morphline.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# RUN JUST MORPHLINE SCRIPT - Handles all the environment setup, and just runs the morphline step. +# +####################### + +# this gets hard coded in the configs - keep in sync +tmpdir=/tmp/solr-map-reduce + +## Solr + Hadoop Dists +####################### + +# Using Solr 4.8 +solr_distrib="solr-4.10.1" +s +# you should replace with a local mirror. Find one at http://www.apache.org/dyn/closer.cgi/hadoop/common/hadoop-2.2.0/ +hadoop_distrib="hadoop-2.2.0" + +######################################################### +# NameNode port: 8020, DataNode ports: 50010, 50020, ResourceManager port: 8032 ZooKeeper port: 9983, Solr port: 8983 +# NameNode web port: 50070, DataNodes web port: 50075 +######################################################### + + +# collection to work with +collection=collection1 + +# return absolute path +function absPath { + echo $(cd $(dirname $1); pwd)/$(basename $1) +} + +hadoop_conf_dir=`absPath "hadoop_conf/conf"` +echo "hadoop_conf: $hadoop_conf_dir" + +hadoopHome=`absPath "$hadoop_distrib"` +echo "HADOOP_HOME=$hadoopHome" +export HADOOP_HOME=$hadoopHome +export HADOOP_LOG_DIR=$tmpdir/logs +export HADOOP_CONF_DIR=$hadoop_conf_dir + +# +## Build an index with map-reduce and deploy it to SolrCloud +## Add --dry-run parameter to the end to see it run without +## actually putting the documents into Solr! +####################### + +source $solr_distrib/example/scripts/map-reduce/set-map-reduce-classpath.sh + +$hadoop_distrib/bin/hadoop --config $hadoop_conf_dir jar $solr_distrib/dist/solr-map-reduce-*.jar -D 'mapred.child.java.opts=-Xmx500m' -libjars "$HADOOP_LIBJAR" --morphline-file readAvroContainer.conf --zk-host 127.0.0.1:9983 --output-dir hdfs://127.0.0.1:8020/outdir --collection $collection --log4j log4j.properties --go-live --verbose "hdfs://127.0.0.1:8020/indir" +