06 December 2015

join(), leftOuterJoin() & rightOuterJoin() Example

Example
scala> //Input Data
scala> val rdd1 = sc.parallelize(Seq(
     |                ("math",    55),
     |                ("math",    56),
     |                ("english", 57),
     |                ("english", 58),
     |                ("science", 59),
     |                ("science", 54)))
rdd1: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[45] at parallelize at :21

scala> val rdd2 = sc.parallelize(Seq(
     |                ("math",    60),
     |                ("math",    65),
     |                ("science", 61),
     |                ("science", 62),
     |                ("history", 63),
     |                ("history", 64)))
rdd2: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[46] at parallelize at :21

scala> //join() Example
scala> val joined = rdd1.join(rdd2)
joined: org.apache.spark.rdd.RDD[(String, (Int, Int))] = MapPartitionsRDD[49] at join at :25

scala> //Result
scala> joined.collect()
res15: Array[(String, (Int, Int))] = Array((math,(55,60)), (math,(55,65)), (math,(56,60)), (math,(56,65)), (science,(59,61)), (science,(59,62)), (science,(54,61)), (science,(54,62)))

scala> //leftOuterJoin() Example
scala> val leftJoined = rdd1.leftOuterJoin(rdd2)
leftJoined: org.apache.spark.rdd.RDD[(String, (Int, Option[Int]))] = MapPartitionsRDD[52] at leftOuterJoin at :25

scala> //Result
scala> leftJoined.collect()
res16: Array[(String, (Int, Option[Int]))] = Array((math,(55,Some(60))), (math,(55,Some(65))), (math,(56,Some(60))), (math,(56,Some(65))), (english,(57,None)), (english,(58,None)), (science,(59,Some(61))), (science,(59,Some(62))), (science,(54,Some(61))), (science,(54,Some(62))))

scala> //rightOuterJoin() Example
scala> val rightJoined = rdd1.rightOuterJoin(rdd2)
rightJoined: org.apache.spark.rdd.RDD[(String, (Option[Int], Int))] = MapPartitionsRDD[55] at rightOuterJoin at :25

scala> //Result
scala> rightJoined.collect()
res17: Array[(String, (Option[Int], Int))] = Array((math,(Some(55),60)), (math,(Some(55),65)), (math,(Some(56),60)), (math,(Some(56),65)), (history,(None,63)), (history,(None,64)), (science,(Some(59),61)), (science,(Some(59),62)), (science,(Some(54),61)), (science,(Some(54),62)))