Skip to content

Instantly share code, notes, and snippets.

@2efPer
Last active May 13, 2019 03:24
Show Gist options
  • Select an option

  • Save 2efPer/433e4d9ef454156bf804bedfdc564250 to your computer and use it in GitHub Desktop.

Select an option

Save 2efPer/433e4d9ef454156bf804bedfdc564250 to your computer and use it in GitHub Desktop.
cheatsheet
//create a dataframe
val df = sc.parallelize(Seq(
(1441637160, 10.0,"hello"),
(1441637170, 20.0,"world"),
(1441637180, 30.0,"test"),
(1441637210, 40.0,"word"),
(1441637220, 10.0,"test2"),
(1441637230, 0.0,"test3"))).toDF("timestamp", "value","words")
// make multiple rows to one row
df.withColumn("newrow", explode(array("value", "words"))).drop("value", "words")
//json manipulate
import org.apache.spark.sql.functions._
case class Obj(name: String, `type`: String, code: String)
val df = Seq(
("abc", Obj("file", "sample", "123")),
("abc", Obj("image", "sample", "456")),
("xyz", Obj("doc", "sample", "707"))
).
toDF("key", "object").
select($"key", array(to_json($"object")).as("objects"))
df.show(false)
// +---+-----------------------------------------------+
// |key|objects |
// +---+-----------------------------------------------+
// |abc|[{"name":"file","type":"sample","code":"123"}] |
// |abc|[{"name":"image","type":"sample","code":"456"}]|
// |xyz|[{"name":"doc","type":"sample","code":"707"}] |
// +---+-----------------------------------------------+
df.groupBy($"key").agg(collect_list($"objects"(0)).as("objects")).
show(false)
// +---+---------------------------------------------------------------------------------------------+
// |key|objects |
// +---+---------------------------------------------------------------------------------------------+
// |xyz|[{"name":"doc","type":"sample","code":"707"}] |
// |abc|[{"name":"file","type":"sample","code":"123"}, {"name":"image","type":"sample","code":"456"}]|
// +---+---------------------------------------------------------------------------------------------+
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment