Tuesday, 15 March 2011

Spark App exception running in IntelliJ IDEA -



Spark App exception running in IntelliJ IDEA -

can help me out?

i running spark app in intellij idea.

object maindriver { def main(args: array[string]) { val conf = new sparkconf().setappname("spark sentiment analysis").setmaster("local[2]") val sc = new sparkcontext(conf) val poswords = sc.textfile("src/main/resources/hu_liu_positive_word_list.txt") val negwords = sc.textfile("src/main/resources/hu_liu_negative_word_list.txt") val nltkstopwords = sc.textfile("src/main/resources/stopwords/english") val morestopwds = sc.parallelize(list("cant", "didnt", "doesnt", "dont", "goes", "isnt", "hes", "shes", "thats", "theres", "theyre", "wont", "youll", "youre", "youve", "br", "ve", "re", "vs", "dick", "ginger", "hollywood", "jack", "jill", "john", "karloff", "kudrow", "orson", "peter", "tcm", "tom", "toni", "welles", "william", "wolheim", "nikita")) val stopwordsrdd = (nltkstopwords union morestopwds).filter(_ != "").cache() val stopwordslist = sc.broadcast(stopwordsrdd.collect()) val intrainunsup = sc.wholetextfiles("src/main/resources/reviews/train/unsup") val parsedtrainunsup = intrainunsup mapvalues ( _ map { case c: char if character.isletterordigit(c) => c case _ => ' ' } split (" ") filter (_.trim() != "") filter (_.length() > 1) map (_.tolowercase()) filter (!stopwordslist.value.contains(_)) ) val wordfreqdist = parsedtrainunsup flatmap { case (x, y) => y } map (w => (w, 1)) reducebykey (_ + _) val positems = (poswords map ((_, -1))) bring together wordfreqdist mapvalues { case (x, y) => y} val sortedpositems = positems map (_.swap) sortbykey (false) map (_.swap) //this not useful now... val negitems = (negwords map ((_, -1))) bring together wordfreqdist mapvalues { case (x, y) => y} val sortednegitems = negitems map (_.swap) sortbykey (false) map (_.swap) //this not useful now... //get top 25 hot items //implicit val top(25), defining sort on 2nd element implicit val pairsortbyvalue = new ordering[(string, int)] { override def compare(a: (string, int), b: (string, int)) = a._2 compare b._2 } println("top 25 positive words in unsup dataset") positems.top(25).foreach(println) println("top 25 negative words in unsup dataset") negitems.top(25).foreach(println) sc.stop() } }

this code runs if utilize spark-submit.

but throws exceptions when straight run in intellij thought (menu: run > run...). after looking it, seems wrong val intrainunsup = sc.wholetextfiles("src/main/resources/reviews/train/unsup"), because when intrainunsup.saveastextfile("test file"), throws same exception.

14/11/11 10:21:07 error executor.executor: exception in task 0.0 in stage 1.0 (tid 4) java.lang.runtimeexception: java.lang.reflect.invocationtargetexception @ org.apache.hadoop.mapreduce.lib.input.combinefilerecordreader.initnextrecordreader(combinefilerecordreader.java:164) @ org.apache.hadoop.mapreduce.lib.input.combinefilerecordreader.<init>(combinefilerecordreader.java:126) @ org.apache.spark.input.wholetextfileinputformat.createrecordreader(wholetextfileinputformat.scala:44) @ org.apache.spark.rdd.newhadooprdd$$anon$1.<init>(newhadooprdd.scala:115) @ org.apache.spark.rdd.newhadooprdd.compute(newhadooprdd.scala:103) @ org.apache.spark.rdd.newhadooprdd.compute(newhadooprdd.scala:65) @ org.apache.spark.rdd.rdd.computeorreadcheckpoint(rdd.scala:262) @ org.apache.spark.rdd.rdd.iterator(rdd.scala:229) @ org.apache.spark.rdd.mappedrdd.compute(mappedrdd.scala:31) @ org.apache.spark.rdd.rdd.computeorreadcheckpoint(rdd.scala:262) @ org.apache.spark.rdd.rdd.iterator(rdd.scala:229) @ org.apache.spark.scheduler.resulttask.runtask(resulttask.scala:62) @ org.apache.spark.scheduler.task.run(task.scala:54) @ org.apache.spark.executor.executor$taskrunner.run(executor.scala:177) @ java.util.concurrent.threadpoolexecutor$worker.runtask(threadpoolexecutor.java:895) @ java.util.concurrent.threadpoolexecutor$worker.run(threadpoolexecutor.java:918) @ java.lang.thread.run(thread.java:695) caused by: java.lang.reflect.invocationtargetexception @ sun.reflect.nativeconstructoraccessorimpl.newinstance0(native method) @ sun.reflect.nativeconstructoraccessorimpl.newinstance(nativeconstructoraccessorimpl.java:39) @ sun.reflect.delegatingconstructoraccessorimpl.newinstance(delegatingconstructoraccessorimpl.java:27) @ java.lang.reflect.constructor.newinstance(constructor.java:513) @ org.apache.hadoop.mapreduce.lib.input.combinefilerecordreader.initnextrecordreader(combinefilerecordreader.java:155) ... 16 more caused by: java.lang.incompatibleclasschangeerror: found class org.apache.hadoop.mapreduce.taskattemptcontext, interface expected @ org.apache.spark.input.wholetextfilerecordreader.<init>(wholetextfilerecordreader.scala:40) ... 21 more 14/11/11 10:21:07 error executor.executor: exception in task 1.0 in stage 1.0 (tid 5) java.lang.runtimeexception: java.lang.reflect.invocationtargetexception @ org.apache.hadoop.mapreduce.lib.input.combinefilerecordreader.initnextrecordreader(combinefilerecordreader.java:164) @ org.apache.hadoop.mapreduce.lib.input.combinefilerecordreader.<init>(combinefilerecordreader.java:126) @ org.apache.spark.input.wholetextfileinputformat.createrecordreader(wholetextfileinputformat.scala:44) @ org.apache.spark.rdd.newhadooprdd$$anon$1.<init>(newhadooprdd.scala:115) @ org.apache.spark.rdd.newhadooprdd.compute(newhadooprdd.scala:103) @ org.apache.spark.rdd.newhadooprdd.compute(newhadooprdd.scala:65) @ org.apache.spark.rdd.rdd.computeorreadcheckpoint(rdd.scala:262) @ org.apache.spark.rdd.rdd.iterator(rdd.scala:229) @ org.apache.spark.rdd.mappedrdd.compute(mappedrdd.scala:31) @ org.apache.spark.rdd.rdd.computeorreadcheckpoint(rdd.scala:262) @ org.apache.spark.rdd.rdd.iterator(rdd.scala:229) @ org.apache.spark.scheduler.resulttask.runtask(resulttask.scala:62) @ org.apache.spark.scheduler.task.run(task.scala:54) @ org.apache.spark.executor.executor$taskrunner.run(executor.scala:177) @ java.util.concurrent.threadpoolexecutor$worker.runtask(threadpoolexecutor.java:895) @ java.util.concurrent.threadpoolexecutor$worker.run(threadpoolexecutor.java:918) @ java.lang.thread.run(thread.java:695) caused by: java.lang.reflect.invocationtargetexception @ sun.reflect.nativeconstructoraccessorimpl.newinstance0(native method) @ sun.reflect.nativeconstructoraccessorimpl.newinstance(nativeconstructoraccessorimpl.java:39) @ sun.reflect.delegatingconstructoraccessorimpl.newinstance(delegatingconstructoraccessorimpl.java:27) @ java.lang.reflect.constructor.newinstance(constructor.java:513) @ org.apache.hadoop.mapreduce.lib.input.combinefilerecordreader.initnextrecordreader(combinefilerecordreader.java:155) ... 16 more caused by: java.lang.incompatibleclasschangeerror: found class org.apache.hadoop.mapreduce.taskattemptcontext, interface expected @ org.apache.spark.input.wholetextfilerecordreader.<init>(wholetextfilerecordreader.scala:40) ... 21 more 14/11/11 10:21:07 warn scheduler.tasksetmanager: lost task 1.0 in stage 1.0 (tid 5, localhost): java.lang.runtimeexception: java.lang.reflect.invocationtargetexception org.apache.hadoop.mapreduce.lib.input.combinefilerecordreader.initnextrecordreader(combinefilerecordreader.java:164) org.apache.hadoop.mapreduce.lib.input.combinefilerecordreader.<init>(combinefilerecordreader.java:126) org.apache.spark.input.wholetextfileinputformat.createrecordreader(wholetextfileinputformat.scala:44) org.apache.spark.rdd.newhadooprdd$$anon$1.<init>(newhadooprdd.scala:115) org.apache.spark.rdd.newhadooprdd.compute(newhadooprdd.scala:103) org.apache.spark.rdd.newhadooprdd.compute(newhadooprdd.scala:65) org.apache.spark.rdd.rdd.computeorreadcheckpoint(rdd.scala:262) org.apache.spark.rdd.rdd.iterator(rdd.scala:229) org.apache.spark.rdd.mappedrdd.compute(mappedrdd.scala:31) org.apache.spark.rdd.rdd.computeorreadcheckpoint(rdd.scala:262) org.apache.spark.rdd.rdd.iterator(rdd.scala:229) org.apache.spark.scheduler.resulttask.runtask(resulttask.scala:62) org.apache.spark.scheduler.task.run(task.scala:54) org.apache.spark.executor.executor$taskrunner.run(executor.scala:177) java.util.concurrent.threadpoolexecutor$worker.runtask(threadpoolexecutor.java:895) java.util.concurrent.threadpoolexecutor$worker.run(threadpoolexecutor.java:918) java.lang.thread.run(thread.java:695) 14/11/11 10:21:07 error scheduler.tasksetmanager: task 1 in stage 1.0 failed 1 times; aborting job 14/11/11 10:21:07 info scheduler.taskschedulerimpl: removed taskset 1.0, tasks have completed, pool 14/11/11 10:21:07 info scheduler.tasksetmanager: lost task 0.0 in stage 1.0 (tid 4) on executor localhost: java.lang.runtimeexception (java.lang.reflect.invocationtargetexception) [duplicate 1] 14/11/11 10:21:07 info scheduler.taskschedulerimpl: removed taskset 1.0, tasks have completed, pool 14/11/11 10:21:07 info scheduler.taskschedulerimpl: cancelling stage 1 14/11/11 10:21:07 info scheduler.dagscheduler: failed run saveastextfile @ maindriver.scala:30 exception in thread "main" org.apache.spark.sparkexception: job aborted due stage failure: task 1 in stage 1.0 failed 1 times, recent failure: lost task 1.0 in stage 1.0 (tid 5, localhost): java.lang.runtimeexception: java.lang.reflect.invocationtargetexception org.apache.hadoop.mapreduce.lib.input.combinefilerecordreader.initnextrecordreader(combinefilerecordreader.java:164) org.apache.hadoop.mapreduce.lib.input.combinefilerecordreader.<init>(combinefilerecordreader.java:126) org.apache.spark.input.wholetextfileinputformat.createrecordreader(wholetextfileinputformat.scala:44) org.apache.spark.rdd.newhadooprdd$$anon$1.<init>(newhadooprdd.scala:115) org.apache.spark.rdd.newhadooprdd.compute(newhadooprdd.scala:103) org.apache.spark.rdd.newhadooprdd.compute(newhadooprdd.scala:65) org.apache.spark.rdd.rdd.computeorreadcheckpoint(rdd.scala:262) org.apache.spark.rdd.rdd.iterator(rdd.scala:229) org.apache.spark.rdd.mappedrdd.compute(mappedrdd.scala:31) org.apache.spark.rdd.rdd.computeorreadcheckpoint(rdd.scala:262) org.apache.spark.rdd.rdd.iterator(rdd.scala:229) org.apache.spark.scheduler.resulttask.runtask(resulttask.scala:62) org.apache.spark.scheduler.task.run(task.scala:54) org.apache.spark.executor.executor$taskrunner.run(executor.scala:177) java.util.concurrent.threadpoolexecutor$worker.runtask(threadpoolexecutor.java:895) java.util.concurrent.threadpoolexecutor$worker.run(threadpoolexecutor.java:918) java.lang.thread.run(thread.java:695) driver stacktrace: @ org.apache.spark.scheduler.dagscheduler.org$apache$spark$scheduler$dagscheduler$$failjobandindependentstages(dagscheduler.scala:1185) @ org.apache.spark.scheduler.dagscheduler$$anonfun$abortstage$1.apply(dagscheduler.scala:1174) @ org.apache.spark.scheduler.dagscheduler$$anonfun$abortstage$1.apply(dagscheduler.scala:1173) @ scala.collection.mutable.resizablearray$class.foreach(resizablearray.scala:59) @ scala.collection.mutable.arraybuffer.foreach(arraybuffer.scala:47) @ org.apache.spark.scheduler.dagscheduler.abortstage(dagscheduler.scala:1173) @ org.apache.spark.scheduler.dagscheduler$$anonfun$handletasksetfailed$1.apply(dagscheduler.scala:688) @ org.apache.spark.scheduler.dagscheduler$$anonfun$handletasksetfailed$1.apply(dagscheduler.scala:688) @ scala.option.foreach(option.scala:236) @ org.apache.spark.scheduler.dagscheduler.handletasksetfailed(dagscheduler.scala:688) @ org.apache.spark.scheduler.dagschedulereventprocessactor$$anonfun$receive$2.applyorelse(dagscheduler.scala:1391) @ akka.actor.actorcell.receivemessage(actorcell.scala:498) @ akka.actor.actorcell.invoke(actorcell.scala:456) @ akka.dispatch.mailbox.processmailbox(mailbox.scala:237) @ akka.dispatch.mailbox.run(mailbox.scala:219) @ akka.dispatch.forkjoinexecutorconfigurator$akkaforkjointask.exec(abstractdispatcher.scala:386) @ scala.concurrent.forkjoin.forkjointask.doexec(forkjointask.java:260) @ scala.concurrent.forkjoin.forkjoinpool$workqueue.runtask(forkjoinpool.java:1339) @ scala.concurrent.forkjoin.forkjoinpool.runworker(forkjoinpool.java:1979) @ scala.concurrent.forkjoin.forkjoinworkerthread.run(forkjoinworkerthread.java:107)

intellij-idea apache-spark

No comments:

Post a Comment