Skip to content

Commit 20817ad

Browse files
YegorMaksymchukYehor Maksymchuk
authored andcommitted
EZSPA-698: Fix review comments
1 parent fb53052 commit 20817ad

File tree

1 file changed

+4
-124
lines changed

1 file changed

+4
-124
lines changed

examples/spark-3.2.0/apps/src/main/scala/com/hpe/examples/spark/sql/SqlOnGpuExample.scala

Lines changed: 4 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -7,138 +7,18 @@ object SqlOnGpuExample {
77
def main(args: Array[String]): Unit = {
88
val spark = SparkSession.builder()
99
.getOrCreate()
10+
val sc = spark.sparkContext
1011

11-
simpleSelect(spark)
12-
crossJoin(spark)
13-
leftJoin(spark)
14-
whereSelect(spark)
15-
unionSelect(spark)
16-
filesInteraction(spark)
17-
spark.stop()
18-
}
19-
20-
def simpleSelect(spark: SparkSession): Unit = {
21-
println("SIMPLE SELECT")
2212
import spark.implicits._
13+
2314
val viewName = "df"
24-
val df = Seq(1, 2, 3).toDF("value")
15+
val df = sc.parallelize(Seq(1, 2, 3)).toDF("value")
2516
df.createOrReplaceTempView(viewName)
2617

2718
spark.sql(s"SELECT value FROM $viewName WHERE value <>1").explain()
2819
spark.sql(s"SELECT value FROM $viewName WHERE value <>1").show()
29-
}
30-
31-
def crossJoin(spark: SparkSession): Unit = {
32-
println("CROSS JOIN")
33-
import spark.implicits._
34-
35-
val sc = spark.sparkContext
36-
val recordsAmount = 50
37-
38-
39-
val df1 = sc.parallelize(1 to recordsAmount, 1)
40-
.toDF("test")
41-
42-
val df2 = sc.parallelize(recordsAmount to recordsAmount * 2, 1)
43-
.toDF("test")
44-
45-
df1.crossJoin(df2).sample(0.1).explain()
46-
df1.crossJoin(df2).sample(0.1).show()
47-
}
48-
49-
def leftJoin(spark: SparkSession): Unit = {
50-
println("LEFT JOIN")
51-
52-
import spark.implicits._
53-
val firstViewName = "leftJoinFirst"
54-
val secondViewName = "leftJoinSecond"
55-
56-
Seq((1, "test1"), (2, "test2"), (3, "test3"))
57-
.toDF("key", "string")
58-
.createTempView(firstViewName)
59-
60-
Seq((1, "test4"), (4, "test5"), (2, "test6"))
61-
.toDF("key", "string")
62-
.createTempView(secondViewName)
63-
64-
65-
val df = spark.sql(s"SELECT * FROM $firstViewName LEFT JOIN " +
66-
s"leftJoinSecond ON $firstViewName.key=$secondViewName.key")
67-
68-
df.explain()
69-
df.show()
70-
71-
}
72-
73-
def whereSelect(spark: SparkSession): Unit = {
74-
println("WHERE SELECT")
75-
76-
import spark.implicits._
77-
78-
79-
val whereSelectView = "whereSelectView"
80-
Seq(
81-
(1, "first"),
82-
(2, "second"),
83-
(3, "third")
84-
).toDF("key", "string")
85-
.createTempView(whereSelectView)
86-
87-
val df = spark.sql(s"SELECT * FROM $whereSelectView WHERE key >= 2")
88-
89-
df.explain()
90-
df.show()
91-
}
92-
93-
def unionSelect(spark: SparkSession): Unit = {
94-
println("UNION SELECT")
95-
96-
import spark.implicits._
97-
98-
val firstDF = Seq(
99-
(1, "first"),
100-
(2, "second"),
101-
(3, "third")
102-
).toDF("key", "string")
103-
104-
val secondDF = Seq(
105-
(100, "John"),
106-
(200, "Mike"),
107-
(300, "Bob")
108-
).toDF("age", "name")
109-
110-
val resultDF = firstDF.union(secondDF)
111-
112-
resultDF.explain()
113-
resultDF.show()
114-
}
115-
116-
def filesInteraction(spark: SparkSession): Unit = {
117-
println("FILES INTERACTION")
118-
import spark.implicits._
119-
val pathPrefix = s"/tmp/filesInteraction${System.currentTimeMillis()}"
120-
121-
Set("parquet", "orc", "json").foreach(
122-
format => {
123-
println(format)
124-
println("Write")
125-
val path = s"$pathPrefix$format"
126-
(1 to 100)
127-
.toDF("val")
128-
.write
129-
.format(format)
130-
.save(path)
131-
132-
println("Read")
133-
val df = spark.read
134-
.format(format)
135-
.load(path)
136-
.filter($"val" >= 95)
137-
df.explain()
138-
df.show()
139-
}
140-
)
14120

21+
spark.stop()
14222
}
14323

14424
}

0 commit comments

Comments
 (0)