Skip to content

Commit 2be5972

Browse files
committed
SQL examples for JDBC Dataframes
1 parent 0bc9aba commit 2be5972

File tree

2 files changed

+171
-1
lines changed

2 files changed

+171
-1
lines changed

build.sbt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ scalaVersion := "2.12.10"
66

77

88
libraryDependencies ++= Seq(
9-
"org.apache.spark" %% "spark-sql" % "3.1.2"
9+
"org.apache.spark" %% "spark-sql" % "3.1.2",
10+
"org.postgresql" % "postgresql" % "42.2.23"
1011
)
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
package com.techmonad.learn.jdbc
2+
3+
import com.techmonad.learn.SparkSessionProvider
4+
5+
/**
6+
* Postgres SQL queries:
7+
* *
8+
* postgres=# create database test_db;
9+
* CREATE DATABASE
10+
* postgres=# \c test_db;
11+
* You are now connected to database "test_db" as user "postgres".
12+
* test_db=# \d
13+
* Did not find any relations.
14+
* test_db=# create table employee(id int primary key, name varchar(50), salary int, dept_id int, manager_id int );
15+
* CREATE TABLE
16+
* test_db=# insert into employee values(1, 'bob', 50, 1, 5);
17+
* INSERT 0 1
18+
* test_db=# insert into employee values(2, 'Rob', 500, 1, 5);
19+
* INSERT 0 1
20+
* test_db=# insert into employee values(3, 'Ravi', 1000, 1, 4);
21+
* INSERT 0 1
22+
* test_db=# insert into employee values(4, 'Rajan', 5000, 1, 0);
23+
* INSERT 0 1
24+
* test_db=# insert into employee values(5, 'Ragu', 5000, 1, 0);
25+
* INSERT 0 1
26+
* test_db=# insert into employee values(6, 'Boby', 4000, 2, 3);
27+
* INSERT 0 1
28+
* test_db=# insert into employee values(7, 'Bably', 3000, 2, 3);
29+
* INSERT 0 1
30+
* test_db=# insert into employee values(8, 'Boby singh', 2000, 2, 4);
31+
* INSERT 0 1
32+
* test_db=# insert into employee values(9, 'BabaJI' , 5000, 2, 4);
33+
* INSERT 0 1
34+
* test_db=# insert into employee values(10, 'Bajarangi' , 6000, 2, 0);
35+
* INSERT 0 1
36+
* test_db=# insert into employee values(11, 'Bajigar' , 8000, 2, 10);
37+
* INSERT 0 1
38+
* ### MAX salary by department
39+
* test_db=# select dept_id, max(salary) from employee group by dept_id order by dept_id ;
40+
* 1 | 5000
41+
* 2 | 8000
42+
* *
43+
* #########Employee name with manager name
44+
* test_db=# select e.name,m.name from employee e, employee m where e.manager_id=m.id;
45+
* bob | Ragu
46+
* Rob | Ragu
47+
* Ravi | Rajan
48+
* Boby | Ravi
49+
* Bably | Ravi
50+
* Boby singh | Rajan
51+
* BabaJI | Rajan
52+
* Bajigar | Bajarangi
53+
* ########## find the employees whose salary are greater than manager
54+
* test_db=# select e.name, e.salary ,m.name as manager, m.salary manager_salary from employee e, employee m where e.manager_id=m.id and e.salary > m.salary;
55+
* name | salary | manager | manager_salary
56+
* ---------+--------+-----------+----------------
57+
* Boby | 4000 | Ravi | 1000
58+
* Bably | 3000 | Ravi | 1000
59+
* Bajigar | 8000 | Bajarangi | 6000
60+
* (3 rows)
61+
* ###########top two salary by department
62+
* test_db=# select e.dept_id, e.name, e.salary from (select dept_id, name, salary, row_number() over (partition by dept_id order by salary desc) as rank from employee) e where rank < 3;
63+
* dept_id | name | salary
64+
* ---------+-----------+--------
65+
* 1 | Rajan | 5000
66+
* 1 | Ragu | 5000
67+
* 2 | Bajigar | 8000
68+
* 2 | Bajarangi | 6000
69+
* (4 rows)
70+
*
71+
*
72+
*/
73+
74+
object JDBCDataFrameOps extends SparkSessionProvider {
75+
/**
76+
* SQL queries
77+
*
78+
*/
79+
80+
def main(args: Array[String]): Unit = {
81+
val df =
82+
spark
83+
.read
84+
.format("jdbc")
85+
.option("driver", "org.postgresql.Driver")
86+
.option("url", "jdbc:postgresql://localhost/test_db?user=postgres&password=postgres")
87+
.option("dbtable", "employee")
88+
.load()
89+
df.createOrReplaceTempView("emp")
90+
91+
/* ###########All ROWS #############################*/
92+
spark.sql("select * from emp")
93+
/*
94+
+---+----------+------+-------+----------+
95+
| id| name|salary|dept_id|manager_id|
96+
+---+----------+------+-------+----------+
97+
| 1| bob| 50| 1| 5|
98+
| 2| Rob| 500| 1| 5|
99+
| 3| Ravi| 1000| 1| 4|
100+
| 4| Rajan| 5000| 1| 0|
101+
| 5| Ragu| 5000| 1| 0|
102+
| 6| Boby| 4000| 2| 3|
103+
| 7| Bably| 3000| 2| 3|
104+
| 8|Boby singh| 2000| 2| 4|
105+
| 9| BabaJI| 5000| 2| 4|
106+
| 10| Bajarangi| 6000| 2| 0|
107+
| 11| Bajigar| 8000| 2| 10|
108+
+---+----------+------+-------+----------+
109+
*/
110+
111+
112+
/* ################## MAX salary by department ############################## */
113+
spark.sql("select dept_id, max(salary) max_salary from emp group by dept_id order by dept_id").show()
114+
/*
115+
+-------+----------+
116+
|dept_id|max_salary|
117+
+-------+----------+
118+
| 1| 5000|
119+
| 2| 8000|
120+
+-------+----------+
121+
*/
122+
123+
/*#########################Employee name with manager name ######################### */
124+
spark.sql("select e.name, m.name as manager_name from emp e, emp m where e.manager_id=m.id").show()
125+
/*
126+
+----------+------------+
127+
| name|manager_name|
128+
+----------+------------+
129+
| Boby| Ravi|
130+
| Bably| Ravi|
131+
| bob| Ragu|
132+
| Rob| Ragu|
133+
| Ravi| Rajan|
134+
|Boby singh| Rajan|
135+
| BabaJI| Rajan|
136+
| Bajigar| Bajarangi|
137+
+----------+------------+
138+
*/
139+
140+
/*########################### find the employees whose salary are greater than manager ######################*/
141+
spark.sql("select e.name, e.salary, m.name m_name, m.salary m_salary from emp e, emp m where e.manager_id=m.id and e.salary > m.salary").show()
142+
/*
143+
+-------+------+---------+--------+
144+
| name|salary| m_name|m_salary|
145+
+-------+------+---------+--------+
146+
| Boby| 4000| Ravi| 1000|
147+
| Bably| 3000| Ravi| 1000|
148+
|Bajigar| 8000|Bajarangi| 6000|
149+
+-------+------+---------+--------+
150+
*/
151+
152+
/* ################top two salaries by department #####################################################*/
153+
val sql = "select er.dept_id, er.salary from (select dept_id, salary, rank() over (partition by dept_id order by salary desc) as rank from emp ) er where rank <3 "
154+
spark.sql(sql).show()
155+
156+
/*
157+
+-------+------+
158+
|dept_id|salary|
159+
+-------+------+
160+
| 1| 5000|
161+
| 1| 5000|
162+
| 2| 8000|
163+
| 2| 6000|
164+
+-------+------+
165+
*/
166+
167+
}
168+
169+
}

0 commit comments

Comments
 (0)