spark支持standlone、yarn、mesos等多种运行模式,其中standlone模式主要用于线下环境的测试,线上都采用yarn或者mesos进行资源的管控、容错。
1
2
3
4
5
6
7
| #!/usr/bin/env bash
export JAVA_HOME=/export/servers/jdk1.8.0_172
export SPARK_HOME=/export/App/spark-2.3.0
export HADOOP_HOME=/export/App/hadoop-2.7.6
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_DIST_CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
| SPARK_HOME=/export/App/spark-2.3.0
SPARK_MASTER_HOST=10.194.138.200
SPARK_MASTER_PORT=7077
# start spark master
$SPARK_HOME/sbin/start-master.sh
$SPARK_HOME/sbin/start-slave.sh spark://$SPARK_MASTER_HOST:$SPARK_MASTER_PORT
# 本地模式10线程
$SPARK_HOME/bin/run-example SparkPi 1000 --master 10.194.138.200[10] > /tmp/Sparkpilog.txt
#Spark Standalone 集群模式运行
$SPARK_HOME/bin/spark-submit \
--master spark://10.194.138.200:7077 \
--class org.apache.spark.examples.SparkPi \
examples/jars/spark-examples_2.11-2.3.0.jar 10000
# spark yarn cluster 模式
// pi
$SPARK_HOME/bin/spark-submit --master yarn \
--deploy-mode cluster \
--class org.apache.spark.examples.SparkPi \
examples/jars/spark-examples_2.11-2.3.0.jar 10000
// wordcount
$SPARK_HOME/bin/spark-submit --master yarn \
--deploy-mode cluster \
--class org.apache.spark.examples.JavaWordCount \
examples/jars/spark-examples_2.11-2.3.0.jar hdfs://10.194.138.200:9000/tmp/sample.txt hdfs://10.194.138.200:9000/tmp/output
$SPARK_HOME/bin/spark-submit --class org.apache.spark.examples.JavaWordCount \
--master yarn-cluster examples/jars/spark-examples_2.11-2.3.0.jar /tmp/sample.txt
//spark stream
$ tail -f /data1/cfs/log/*/*.log | nc -lk 9999
$ $SPARK_HOME/bin/run-example org.apache.spark.examples.streaming.NetworkWordCount 10.199.136.40 9999 > /tmp/output.txt
|
需要启动:
$SPARK_HOME/conf/spark-defaults.conf
- https://blog.csdn.net/u011094454/article/details/78992293
- https://www.jianshu.com/p/dd7c7243e7f9?from=singlemessage
- Spark和MapReduce相比,都有哪些优势? - 简书