Example
scala> //Input Data
scala> val rdd1 = sc.parallelize(Seq(
| ("math", 55),
| ("math", 56),
| ("english", 57),
| ("english", 58),
| ("science", 59),
| ("science", 54)))
rdd1: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[45] at parallelize at :21
scala> val rdd2 = sc.parallelize(Seq(
| ("math", 60),
| ("math", 65),
| ("science", 61),
| ("science", 62),
| ("history", 63),
| ("history", 64)))
rdd2: org.apache.spark.rdd.RDD[(String, Int)] = ParallelCollectionRDD[46] at parallelize at :21
scala> //join() Example
scala> val joined = rdd1.join(rdd2)
joined: org.apache.spark.rdd.RDD[(String, (Int, Int))] = MapPartitionsRDD[49] at join at :25
scala> //Result
scala> joined.collect()
res15: Array[(String, (Int, Int))] = Array((math,(55,60)), (math,(55,65)), (math,(56,60)), (math,(56,65)), (science,(59,61)), (science,(59,62)), (science,(54,61)), (science,(54,62)))
scala> //leftOuterJoin() Example
scala> val leftJoined = rdd1.leftOuterJoin(rdd2)
leftJoined: org.apache.spark.rdd.RDD[(String, (Int, Option[Int]))] = MapPartitionsRDD[52] at leftOuterJoin at :25
scala> //Result
scala> leftJoined.collect()
res16: Array[(String, (Int, Option[Int]))] = Array((math,(55,Some(60))), (math,(55,Some(65))), (math,(56,Some(60))), (math,(56,Some(65))), (english,(57,None)), (english,(58,None)), (science,(59,Some(61))), (science,(59,Some(62))), (science,(54,Some(61))), (science,(54,Some(62))))
scala> //rightOuterJoin() Example
scala> val rightJoined = rdd1.rightOuterJoin(rdd2)
rightJoined: org.apache.spark.rdd.RDD[(String, (Option[Int], Int))] = MapPartitionsRDD[55] at rightOuterJoin at :25
scala> //Result
scala> rightJoined.collect()
res17: Array[(String, (Option[Int], Int))] = Array((math,(Some(55),60)), (math,(Some(55),65)), (math,(Some(56),60)), (math,(Some(56),65)), (history,(None,63)), (history,(None,64)), (science,(Some(59),61)), (science,(Some(59),62)), (science,(Some(54),61)), (science,(Some(54),62)))