@@ -7,138 +7,18 @@ object SqlOnGpuExample {
77 def main (args : Array [String ]): Unit = {
88 val spark = SparkSession .builder()
99 .getOrCreate()
10+ val sc = spark.sparkContext
1011
11- simpleSelect(spark)
12- crossJoin(spark)
13- leftJoin(spark)
14- whereSelect(spark)
15- unionSelect(spark)
16- filesInteraction(spark)
17- spark.stop()
18- }
19-
20- def simpleSelect (spark : SparkSession ): Unit = {
21- println(" SIMPLE SELECT" )
2212 import spark .implicits ._
13+
2314 val viewName = " df"
24- val df = Seq (1 , 2 , 3 ).toDF(" value" )
15+ val df = sc.parallelize( Seq (1 , 2 , 3 ) ).toDF(" value" )
2516 df.createOrReplaceTempView(viewName)
2617
2718 spark.sql(s " SELECT value FROM $viewName WHERE value <>1 " ).explain()
2819 spark.sql(s " SELECT value FROM $viewName WHERE value <>1 " ).show()
29- }
30-
31- def crossJoin (spark : SparkSession ): Unit = {
32- println(" CROSS JOIN" )
33- import spark .implicits ._
34-
35- val sc = spark.sparkContext
36- val recordsAmount = 50
37-
38-
39- val df1 = sc.parallelize(1 to recordsAmount, 1 )
40- .toDF(" test" )
41-
42- val df2 = sc.parallelize(recordsAmount to recordsAmount * 2 , 1 )
43- .toDF(" test" )
44-
45- df1.crossJoin(df2).sample(0.1 ).explain()
46- df1.crossJoin(df2).sample(0.1 ).show()
47- }
48-
49- def leftJoin (spark : SparkSession ): Unit = {
50- println(" LEFT JOIN" )
51-
52- import spark .implicits ._
53- val firstViewName = " leftJoinFirst"
54- val secondViewName = " leftJoinSecond"
55-
56- Seq ((1 , " test1" ), (2 , " test2" ), (3 , " test3" ))
57- .toDF(" key" , " string" )
58- .createTempView(firstViewName)
59-
60- Seq ((1 , " test4" ), (4 , " test5" ), (2 , " test6" ))
61- .toDF(" key" , " string" )
62- .createTempView(secondViewName)
63-
64-
65- val df = spark.sql(s " SELECT * FROM $firstViewName LEFT JOIN " +
66- s " leftJoinSecond ON $firstViewName.key= $secondViewName.key " )
67-
68- df.explain()
69- df.show()
70-
71- }
72-
73- def whereSelect (spark : SparkSession ): Unit = {
74- println(" WHERE SELECT" )
75-
76- import spark .implicits ._
77-
78-
79- val whereSelectView = " whereSelectView"
80- Seq (
81- (1 , " first" ),
82- (2 , " second" ),
83- (3 , " third" )
84- ).toDF(" key" , " string" )
85- .createTempView(whereSelectView)
86-
87- val df = spark.sql(s " SELECT * FROM $whereSelectView WHERE key >= 2 " )
88-
89- df.explain()
90- df.show()
91- }
92-
93- def unionSelect (spark : SparkSession ): Unit = {
94- println(" UNION SELECT" )
95-
96- import spark .implicits ._
97-
98- val firstDF = Seq (
99- (1 , " first" ),
100- (2 , " second" ),
101- (3 , " third" )
102- ).toDF(" key" , " string" )
103-
104- val secondDF = Seq (
105- (100 , " John" ),
106- (200 , " Mike" ),
107- (300 , " Bob" )
108- ).toDF(" age" , " name" )
109-
110- val resultDF = firstDF.union(secondDF)
111-
112- resultDF.explain()
113- resultDF.show()
114- }
115-
116- def filesInteraction (spark : SparkSession ): Unit = {
117- println(" FILES INTERACTION" )
118- import spark .implicits ._
119- val pathPrefix = s " /tmp/filesInteraction ${System .currentTimeMillis()}"
120-
121- Set (" parquet" , " orc" , " json" ).foreach(
122- format => {
123- println(format)
124- println(" Write" )
125- val path = s " $pathPrefix$format"
126- (1 to 100 )
127- .toDF(" val" )
128- .write
129- .format(format)
130- .save(path)
131-
132- println(" Read" )
133- val df = spark.read
134- .format(format)
135- .load(path)
136- .filter($" val" >= 95 )
137- df.explain()
138- df.show()
139- }
140- )
14120
21+ spark.stop()
14222 }
14323
14424}
0 commit comments