-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathhash
executable file
·67 lines (55 loc) · 2.05 KB
/
hash
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
# Convenience script to run hashing over a set repositories
E_NO_SPARK=2
E_BUILD_FAILED=1
jar="target/gemini-uber.jar"
deps_jar="target/gemini-deps.jar"
build_command="./sbt assembly"
build_deps_command="./sbt assemblyPackageDependency"
current_dir="$(dirname "$0")"
app_class="tech.sourced.gemini.cmd.HashSparkApp"
app_name="Gemini - hashing"
hash java >/dev/null 2>&1 || { echo "Please install Java" >&2; exit 1; }
if [[ ! -f "${deps_jar}" ]]; then
echo "${deps_jar} not found. Running build '${build_deps_command}'"
if ! $build_deps_command ; then
exit "${E_BUILD_FAILED}"
fi
fi
if [[ ! -f "${jar}" ]]; then
echo "${jar} not found. Running build '${build_command}'"
if ! $build_command ; then
exit "${E_BUILD_FAILED}"
fi
fi
sparkSubmit() {
if hash spark-submit 2>/dev/null; then
echo "Using spark-submit from PATH, which is $(which spark-submit)"
exec spark-submit "$@"
elif [[ -n "${SPARK_HOME}" ]]; then
echo "Using spark-submit from ${SPARK_HOME}"
exec "${SPARK_HOME}/bin/spark-submit" "$@"
else
echo "Please, install and configure Apache Spark, set SPARK_HOME"
exit "${E_NO_SPARK}"
fi
}
# --conf "spark.local.dir=/spark-temp-data" \
# --conf "spark.executor.extraJavaOptions=-Djava.io.tmpdir=/spark-temp-data" \
# --conf "spark.eventLog.enabled=true" \
# --conf "spark.eventLog.dir=/repositories"
# https://issues.apache.org/jira/browse/SPARK-16784
# spark.executor.extraJavaOptions=-Dlog4j.configuration=log4j-spark.properties
sparkSubmit \
--class "${app_class}" \
--master "${MASTER:=local[*]}" \
--name "${app_name}" \
--jars "${deps_jar}" \
--conf "spark.driver.memory=${DRIVER_MEMORY:=2g}" \
--conf "spark.executor.memory=${EXECUTOR_MEMORY:=4g}" \
--conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
--conf "spark.tech.sourced.engine.skip.read.errors=true" \
--conf "spark.files.maxPartitionBytes=12582912" \
--driver-java-options "-Dlog4j.configuration=jar:file:${jar}!/log4j.properties" \
"${jar}" \
"$@"