case class App(id: Int, name: String)
val data1 = sc.parallelize(Array((1, "a"), (2, "b"), (3, "c")))
val df1 = data.map { case (id, name) => App(id, name) }.toDF
val data2 = sc.parallelize(Array((1, "A"), (3, "B"), (5, "C")))
val df2 = data.map { case (id, name) => App(id, name) }.toDF
df1.show()
+---+----+
| id|name|
+---+----+
| 1| a|
| 2| b|
| 3| c|
+---+----+
df2.show()
+---+----+
| id|name|
+---+----+
| 1| A|
| 3| B|
| 5| C|
+---+----+
df1.join(df2) // Cartesian join
df1.join(df2, "id")
+---+----+----+
| id|name|name|
+---+----+----+
| 1| a| A|
| 3| c| B|
+---+----+----+
df1.join(df2, df1("id") === df2("id")).show()
+---+----+---+----+
| id|name| id|name|
+---+----+---+----+
| 1| a| 1| A|
| 3| c| 3| B|
+---+----+---+----+
df1.join(df2, df1("id") === df2("id")).drop(df2("id")).show()
+---+----+----+
| id|name|name|
+---+----+----+
| 1| a| A|
| 3| c| B|
+---+----+----+
没有评论:
发表评论