Example
scala> val rdd = sc.parallelize(Seq( | ("math", 55), | ("math", 56), | ("english", 57), | ("english", 58), | ("science", 59), | ("science", 54))) rdd: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[31] at parallelize at:21 scala> //Example : countByKey() scala> val result1 = rdd.countByKey() result1: scala.collection.Map[String,Long] = Map(math -> 2, english -> 2, science -> 2) scala> //Example : collectAsMap() scala> val reslt2 = rdd.collectAsMap() reslt2: scala.collection.Map[String,Int] = Map(math -> 56, science -> 54, english -> 58) scala> //Example : lookup() scala> val result3 = rdd.lookup("math") result3: Seq[Int] = WrappedArray(55, 56)