2016-05-31 115 views
1

林星火1.3.1(在Ubuntu 14.04)stand alone,SBT 0.13.10,并试图执行以下脚本:星火之间不同的行为火花提交和火花外壳

package co.some.sheker 
import java.sql.Date 
import org.apache.spark.{SparkContext, SparkConf} 
import SparkContext._ 
import org.apache.spark.sql.{Row, SQLContext} 
import com.datastax.spark.connector._ 
import java.sql._ 
import org.apache.spark.sql._ 
import org.apache.spark.sql.cassandra.CassandraSQLContext 
import java.io.PushbackReader 
import java.lang.{ StringBuilder => JavaStringBuilder } 
import java.io.StringReader 
import com.datastax.spark.connector.cql.CassandraConnector 
import org.joda.time.{DateTimeConstants} 

case class TableKey(key1: String, key2: String) 

object myclass{ 
    def main(args: scala.Array[String]) { 
    val conf = ... 
    val sc = new SparkContext(conf) 
    val sqlContext = new SQLContext(sc) 
    val csc = new CassandraSQLContext(sc) 
    val data_x = csc.sql("select distinct key1, key2 from keyspace.table where key1 = 'sheker'").map(row => (row(0).toString, row(1).toString)) 
    println("Done cross mapping") 
    val snapshotsFiltered = data_x.map(x => TableKey(x._1,x._2)).joinWithCassandraTable("keyspace", "table") 
    println("Done join") 
    val jsons = snapshotsFiltered.map(_._2.getString("json")) 
... 

    sc.stop() 
    println("Done.") 
} 
} 

使用:

/home/user/spark-1.3.1/bin/spark-submit --master spark://1.1.1.1:7077 --driver-class-path /home/user/spark-cassandra-connector-java-assembly-1.3.1-FAT.jar --properties-file prop.conf --class "myclass" "myjar.jar" 

prop.conf文件是:

spark.cassandra.connection.host myhost 
spark.serializer org.apache.spark.serializer.KryoSerializer 
spark.eventLog.enabled true 
spark.eventLog.dir /var/tmp/eventLog 
spark.executor.extraClassPath /home/ubuntu/spark-cassandra-connector-java-assembly-1.3.1-FAT.jar 

我得到这个例外

Done cross mapping 
Exception in thread "main" java.lang.NoSuchMethodError: com.datastax.spark.connector.mapper.ColumnMapper$.defaultColumnMapper(Lscala/reflect/ClassTag;Lscala/reflect/api/TypeTags$TypeTag;)Lcom/datastax/spark/connector/mapper/ColumnMapper; 
    at co.crowdx.aggregation.CassandraToElasticTransformater$.main(CassandraToElasticTransformater.scala:79) 
    at co.crowdx.aggregation.CassandraToElasticTransformater.main(CassandraToElasticTransformater.scala) 
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) 
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
    at java.lang.reflect.Method.invoke(Method.java:606) 
    at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:569) 
    at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:166) 
    at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:189) 
    at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:110) 
    at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) 
Done Sending Signal aggregation job to Spark 

和奇怪的部分是,当我试图在外壳的运行从脚本 - 命令做工精细。即时通讯使用:

/home/user/spark-1.3.1/bin/spark-shell --master spark://1.1.1.1:7077 --driver-class-path /home/ubuntu/spark-cassandra-connector-java-assembly-1.3.1-FAT.jar --properties-file prop.conf 

Build.scala文件:

import sbt._ 
import Keys._ 
import sbtassembly.Plugin._ 
import AssemblyKeys._ 

object AggregationsBuild extends Build { 
    lazy val buildSettings = Defaults.defaultSettings ++ Seq(
version := "1.0.0", 
organization := "co.sheker", 
scalaVersion := "2.10.4" 
) 

lazy val app = Project(
"geo-aggregations", 
file("."), 
settings = buildSettings ++ assemblySettings ++ Seq(
    parallelExecution in Test := false, 
    libraryDependencies ++= Seq(
    "com.datastax.spark" %% "spark-cassandra-connector" % "1.2.1", 
    // spark will already be on classpath when using spark-submit. 
    // marked as provided, so that it isn't included in assembly. 
    "org.apache.spark" %% "spark-core" % "1.2.1" % "provided", 
    "org.apache.spark" %% "spark-catalyst" % "1.2.1" % "provided", 
    "org.apache.spark" %% "spark-sql" % "1.2.1" % "provided", 
    "org.scalatest" %% "scalatest" % "2.1.5" % "test", 
    "org.postgresql" % "postgresql" % "9.4-1201-jdbc41", 
"com.github.nscala-time" %% "nscala-time" % "2.4.0", 
"org.elasticsearch" % "elasticsearch-hadoop" % "2.2.0" % "provided" 
), 
    resolvers += "conjars.org" at "http://conjars.org/repo", 
resolvers += "clojars" at "https://clojars.org/repo" 
) 
) 
} 

有什么不对?为什么它在提交时失败,但不在shell中?

+0

你是如何创建提交的jar?你需要做一个装配,而不是一个包装。 – Reactormonk

+0

@Reactormonk - 是的,我做“sbt程序集”给我的“myjar.jar”文件 – Rada

+4

你说你使用的是Spark 1.3,但你的版本包含Spark 1.2.1的依赖关系。你检查了吗?我相信你的火花驱动程序的版本与你应用程序中的版本不同,导致你得到的错误。 – eliasah

回答

1

你说,你正在使用的火花1.3,但你的版本包含火花1.2.1依赖。

就像我在注释中说,我相信你的火花驱动程序的版本是从一个在你的应用程序,它会导致你得到的错误不同。