-
Notifications
You must be signed in to change notification settings - Fork 528
/
Copy path06_Advanced_SQL.sql
873 lines (706 loc) · 23.7 KB
/
06_Advanced_SQL.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
/******** 1) GROUP BY **********/
/*
When we group by, we apply the function PER GROUP, NOT on the ENTIRE DATA SET.
Group by use Split, Apply, Combine strategry.
*/
/* How many employees worked in each department ? */
SELECT d.dept_name AS "Department Name" ,COUNT(e.emp_no) AS "Number Of Employee"
FROM employees e
JOIN dept_emp de ON e.emp_no = de.emp_no
JOIN departments d ON d.dept_no = de.dept_no
GROUP BY d.dept_no
ORDER BY 1;
/*------------------------------------------------------------------------------------------------------------*/
/************ 2) HAVING Keyword *************/
/*
"Having" applies filters to a group as a whole
**** Order of Operations ****
FROM
WHERE
GROUP BY
HAVING
SELECT
ORDER
*/
/* How many employees worked in each department, but with employees more than 25000 ? */
SELECT d.dept_name AS "Department Name" ,COUNT(e.emp_no) AS "Number Of Employee"
FROM employees e
JOIN dept_emp de ON e.emp_no = de.emp_no
JOIN departments d ON d.dept_no = de.dept_no
GROUP BY d.dept_name
HAVING COUNT(e.emp_no) > 25000
ORDER BY 1;
/* How many Female employees worked in each department, but with employees more than 25000 ? */
SELECT d.dept_name AS "Department Name" ,COUNT(e.emp_no) AS "Number Of Employee"
FROM employees e
JOIN dept_emp de ON e.emp_no = de.emp_no
JOIN departments d ON d.dept_no = de.dept_no
WHERE e.gender='F'
GROUP BY d.dept_name
HAVING COUNT(e.emp_no) > 25000
ORDER BY 1;
/*------------------------------------------------------------------------------------------------------------*/
/********** 3) Ordering Group Data **********/
SELECT d.dept_name AS "Department Name" ,COUNT(e.emp_no) AS "Number Of Employee"
FROM employees e
JOIN dept_emp de ON e.emp_no = de.emp_no
JOIN departments d ON d.dept_no = de.dept_no
GROUP BY d.dept_name
HAVING COUNT(e.emp_no) > 25000
ORDER BY 2 DESC;
/********* 4) GROUP BY Mental Model ***********/
/* What are the 8 employees who got the most salary bumps? */
-- SELECT e.emp_no, CONCAT(e.first_name, e.last_name) AS "Name", s.salary, s.from_date, s.to_date
SELECT emp_no, MAX(from_date)
FROM salaries
GROUP BY emp_no;
/*------------------------------------------------------------------------------------------------------------*/
/*********** 5) GROUPING SETS**********/
/******* UNION / UNION ALL *********/
/*
SELECT col1, SUM(col2)
FROM table
GROUP BY col1
UNION / UNION ALL
SELECT SUM(col2)
FROM table
UNION ALL doesn't remove DUPLICATE Records.
*/
SELECT NULL AS "prod_id", sum(ol.quantity)
FROM orderlines AS ol
UNION
SELECT prod_id AS "prod_id", sum(ol.quantity)
FROM orderlines AS ol
GROUP BY prod_id
ORDER BY prod_id DESC;
/*------------------------------------------------------------------------------------------------------------*/
/*********** GROUPING SETS ***********/
/*
A Subclause of GROUP BY that allows you to define multiple grouping
It is very useful when we want to combine multiple grouping
*/
-- same result as using above UNION code, but in same query
-- here we are combining Two Sets (one for getting Total, one for per each product)
SELECT prod_id, sum(ol.quantity)
FROM orderlines AS ol
GROUP BY
GROUPING SETS(
(),
(prod_id)
)
ORDER BY prod_id DESC;
/* we can add in multiple groups as we need */
SELECT prod_id, orderlineid, sum(ol.quantity)
FROM orderlines AS ol
GROUP BY
GROUPING SETS(
(),
(prod_id),
(orderlineid)
)
ORDER BY prod_id DESC, orderlineid DESC;
/*------------------------------------------------------------------------------------------------------------*/
/************ GROUPING SETS for info from High Level to Details Level ***********/
SELECT
EXTRACT(YEAR FROM orderdate) AS "YEAR",
EXTRACT(MONTH FROM orderdate) AS "MONTH",
EXTRACT(DAY FROM orderdate) AS "DAY",
SUM(quantity)AS "TOTAL QUANTITY"
FROM orderlines
GROUP BY
GROUPING SETS(
(EXTRACT(YEAR FROM orderdate)), -- yearly
(EXTRACT(MONTH FROM orderdate)), -- monthly
(EXTRACT(DAY FROM orderdate)), -- daily
(
EXTRACT(YEAR FROM orderdate), -- month and year
EXTRACT(MONTH FROM orderdate)
),
(
EXTRACT(MONTH FROM orderdate), -- month and day
EXTRACT(DAY FROM orderdate)
),
(
EXTRACT(YEAR FROM orderdate), -- year, month and day
EXTRACT(MONTH FROM orderdate),
EXTRACT(DAY FROM orderdate)
),
() -- nothing in particular (TOTAL AMOUNT)
)
ORDER BY 1,2,3;
/*------------------------------------------------------------------------------------------------------------*/
/************ 6) ROLLUP ***************/
/* roll up can provide a very similar result as above using grouping sets, but with less code */
SELECT
EXTRACT(YEAR FROM orderdate) AS "YEAR",
EXTRACT(MONTH FROM orderdate) AS "MONTH",
EXTRACT(DAY FROM orderdate) AS "DAY",
SUM(quantity)AS "TOTAL QUANTITY"
FROM orderlines
GROUP BY
ROLLUP(
EXTRACT(YEAR FROM orderdate),
EXTRACT(MONTH FROM orderdate),
EXTRACT(DAY FROM orderdate)
)
ORDER BY 1,2,3;
/*------------------------------------------------------------------------------------------------------------*/
/******************** 8/9) WINDOW Functions ******************/
/*
Window functions CREATE a NEW COLUMN based on functions performed on a SUBSET or "WINDOW" of the data.
window_function(agr1, agr2) OVER(
[PARTITION BY partition_expression]
[ORDER BY sort_expression [ASC | DESC] [NULLS {FIRST | LAST}]]
)
*/
-- Here we can see in the result that max salary is 158,220. Because query returns all data, then LIMIT say cut it off for 100 rows only.
-- That's why OVER() is calculated on the window or subset of data (in this case the entire data were returned).
SELECT *,
MAX(salary) OVER()
FROM salaries
LIMIT 100;
-- in this case, the maximum salary is 69,999. Because of WHERE conditions, the data were filtered out.
-- and OVER() is using on that subset or window of the returned data (in this case the results of WHERE filtered data).
SELECT
*,
MAX(salary) OVER()
FROM salaries
WHERE salary < 70000
ORDER BY salary DESC;
/******************** 10) PARTITON BY ******************/
/*
Divide Rows into Groups to apply the function against (Optional)
*/
/* Employee salary compairing average salary of departments */
SELECT
s.emp_no, s.salary,d.dept_name,
AVG(s.salary)
OVER(
PARTITION BY(d.dept_name)
)
FROM salaries s
JOIN dept_emp de ON s.emp_no = de.emp_no
JOIN departments d ON d.dept_no = de.dept_no;
/******************** 11) ORDER BY ******************/
/*
ORDER BY changes the FRAME of the window
It tells SQL to take into account of everything before up until to this point (becoming Cumulative)
*/
-- against the window of entire data
SELECT emp_no,
COUNT(salary) OVER()
FROM salaries;
-- using PARTION BY
-- Counting salary by each unique emp_no partion
SELECT emp_no,
COUNT(salary) OVER(
PARTITION BY(emp_no)
)
FROM salaries;
-- using ORDER BY
-- Count number are becoming Cumulative
SELECT emp_no,
COUNT(salary) OVER(
ORDER BY emp_no
)
FROM salaries;
/********************************* FRAME Clause *******************************************************/
/*
When using Frame clause in a window function, we can create a SUB-RANGE or FRAME
For example: when we use ORDER BY, we look at the PARTATIONED data in a different len (FRAME).
NOTE:
Without ORDER BY, by default the framing is usually ALL PARTITION ROWs (Entire Window)
With ORDER BY, by default the framing is usually everything before the CURRENT ROW AND the CURRENT ROW (Cumulatively)
|----------------------------------------------------------------------------------------------------
| Keys | Meaning |
|---------------------------------------|-----------------------------------------------------------|
| ROWS or RANGE | Whether you want to use a RANGE or ROWS as a FRAME |
| PRECEDING | Rows Before the CURRENT ONE |
| FOLLOWING | Rows After the CURRENT ONE |
| UNBOUNDED PRECEDING or FOLLOWING | Returns All Before and After |
| CURRENT ROW | Your Current Row |
-----------------------------------------------------------------------------------------------------
*/
-- In this case, we can see that every salary is unique. Because we are using ORDER BY, each row is cumulativly
-- counted within the partition of each employee's window. As a result, it like like 1, (1+1 becomes 2), etc.
SELECT emp_no,
salary,
COUNT(salary) OVER(
PARTITION BY emp_no
ORDER BY salary
)
FROM Salaries;
-- This one returns the same results as using PARTION BY only. The reason is we are looking at the data through the lends of Range.
-- For range using unbounded precedning and following, we are comparing against data with the entire data within that Partition.
SELECT emp_no,
salary,
COUNT(salary) OVER(
PARTITION BY emp_no
ORDER BY salary
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
FROM Salaries;
-- same reults as RANGE results
SELECT emp_no,
salary,
COUNT(salary) OVER(
PARTITION BY emp_no
ORDER BY salary
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
FROM Salaries;
-- same like ORDER BY
SELECT emp_no,
salary,
COUNT(salary) OVER(
PARTITION BY emp_no
ORDER BY salary
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
)
FROM Salaries;
/*------------------------------------------------------------------------------------------------------------*/
/************* 13) Solving for Current Salary ***********/
-- using GROUP BY isn't a good way to solve this problem because we need to pass in a lot of condition in GROUP BY clause.
SELECT emp_no, salary, to_date
FROM salaries
GROUP BY emp_no, salary, to_date
ORDER BY to_date DESC
LIMIT 10;
-- using window function for this problem
-- within frame, we compare the salary with salary of following and preceding one along the way.
-- LAST VALUE returns that very last value that won the salary comparing competition.
-- We order by from date Ascending order, so we knew ahead that the current salary should be the one on the most bottom.
SELECT
DISTINCT e.emp_no, e.first_name, d.dept_name,
LAST_VALUE(s.salary) OVER(
PARTITION BY e.emp_no
ORDER BY s.from_date
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS "Current Salary"
FROM salaries s
JOIN employees e USING(emp_no)
JOIN dept_emp de USING (emp_no)
JOIN departments d USING (dept_no)
ORDER BY emp_no;
-- checking out the unique salary for each employees
SELECT emp_no, salary, from_date, to_date,
COUNT(salary) OVER(
PARTITION BY emp_no
ORDER BY to_date
)
FROM salaries;
/*-------------------------------------------------------------------------------------------------------------*/
/************************************ WINDOW FUNCTIONS ****************************************************/
/*
---------------------------------------------------------------------------------------------------------------------
| Function | Purpose |
----------------------------|---------------------------------------------------------------------------------------|
| SUM / MIN / MAX / AVG | Get the sum, min, .. of all the records in the partition |
| FIRST_VALUE | Return the value evaluated against the first row within the partition. |
| LAST_VALUE | Return the value evaluated against the last row within the partition. |
| NTH_VALUE | Return the value evaluated against the nth row in ordered partition. |
| PERCENT_RANK | Return the relative rank of the current row (rank-1) / (total rows - 1) |
| RANK | Rank the current row within its partition with gaps. |
| ROW_NUMBER | Number the current row within its partition starting from 1. (regardelss of framing)|
| LAG / LEAD | Access the values from the previous or next row. |
--------------------------------------------------------------------------------------------------------------------
*/
/************* 14) FIRST_VALUE ***********/
/* I want to know how my price compares to the item with the LOWEST price in the SAME category */
SELECT
prod_id, price, category,
FIRST_VALUE(price) OVER(
PARTITION BY category
ORDER BY price
) AS "Cheapest in the category"
FROM products
ORDER BY category, prod_id;
-- getting the same result using MIN which is easier, not needing ORDER BY too.
SELECT
prod_id, price, category,
MIN(price) OVER(
PARTITION BY category
) AS "Cheapest in the category"
FROM products
ORDER BY category, prod_id;
/************* 15) LAST VALUE ****************/
/* I want to know how my price to the item with the HIGHEST PRICE in the SAME category */
SELECT
prod_id, price, category,
LAST_VALUE(price) OVER(
PARTITION BY category
ORDER BY price
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS "Most Expensive Price in Category"
FROM products
ORDER BY category, prod_id;
-- using MAX
SELECT
prod_id, price, category,
MAX(price) OVER(
PARTITION BY category
) AS "Highest Price in Category"
FROM products
ORDER BY category, prod_id;
/****************** 16) SUM ************************/
/* I want to see how much Cumulatively a customer has ordered at our store */
SELECT
customerid, orderid, orderdate, netamount,
SUM(netamount) OVER(
PARTITION BY customerid
ORDER BY orderid
) AS "Cumulative Spending"
FROM orders
ORDER BY customerid, orderid;
/**************** 17) ROW_NUMBER ****************/
-- ROW_NUMBER ignores the framing
-- no need to put parameters in ROW_NUMBER() function
/* I want to know where my product is positioned in the category by price */
SELECT
category, prod_id, price,
ROW_NUMBER() OVER(
PARTITION BY category
ORDER BY price
) AS "Position in category by price"
FROM products
ORDER BY category
/*------------------------------------------------------------------------------------------------------------*/
/********************* 19) Conditional Statements ***********************/
/********** CASE ************/
/*
SELECT a,
CASE
WHEN a=1 THEN 'one'
WHEN a=2 THEN 'two'
ELSE 'other'
END
FROM test;
*/
-- 1) CASE statement can be used anywhere
SELECT
orderid, customerid,
CASE
WHEN customerid=1 THEN 'my first customer'
ELSE 'not my first customer'
END AS "customer status",
netamount
FROM orders
ORDER BY customerid;
-- 2) using CASE in combination with WHERE
SELECT
orderid, customerid, netamount
FROM orders
WHERE
CASE
WHEN customerid > 10 THEN netamount < 100
ELSE netamount > 100
END
ORDER BY customerid;
-- 3) using CASE statement with Aggregate function
/* doing gesture of good faith, refunding 100$ for that order where spending is less than 100$ */
SELECT
SUM(
CASE
WHEN netamount < 100 THEN -100
ELSE netamount
END
) AS "Returns",
SUM(netamount) AS "Normal Total",
FROM orders;
/* ----------------------------------------------------------------------------------------------------------- */
/******************* 20) NULL IF *******************/
/*
Use NULLIF to fill in empty spots with a NULL value to avoid divide by zero issues
NULLIF(val1, val2)
if value 1 is equal to value 2, return NULL
*/
SELECT NULLIF(0, 0); -- returns null
SELECT NULLIF('ABC', 'DEF'); -- returns ABC
/* ----------------------------------------------------------------------------------------------------------- */
/******************** 21) VIEWS *********************/
/*
Views allow you to store the results and query of previously run queries.
There are 2 types of views: 1) Materialized and 2) Non-Materialized Views.
1) Materialzed View - stores the data PHYSICIALLY AND PERIODICALLY UPDATES it when tables change.
2) Non-Materialized View - Query gets RE-RUN each time the view is called on.
*/
/*************** 22) VIEW syntax **************/
/*
+ views are OUTPUT of query we ran.
+ views act like TABLES you can query them.
+ (Non-Materialized View): views tak VERY LITTLE SPACE to store. We only store the definition of the view, NOT ALL the data that it returns.
*/
-- Create a view
CREATE VIEW view_name
AS query;
-- Update a view
CREATE OR REPLACE view_name
AS query;
-- Rename a view
ALTER VIEW exisitng_view_name RENAME TO new_view_name;
-- Delete a view
DROP VIEW IF EXISTS view_name;
/*************** 23) Using VIEWS ******************/
-- get the last salary change of each employee
CREATE VIEW last_salary_change AS
SELECT e.emp_no,
MAX(s.from_date)
FROM salaries s
JOIN employees e USING(emp_no)
JOIN dept_emp de USING(emp_no)
JOIN departments d USING(dept_no)
GROUP BY e.emp_no
ORDER BY e.emp_no;
-- query entire data from that view
SELECT * FROM last_salary_change;
-- combine with view to get the latest salary of each employee
SELECT
s.emp_no, d.dept_name, s.from_date, s.salary
FROM last_salary_change lsc
JOIN salaries s USING(emp_no)
JOIN dept_emp de USING(emp_no)
JOIN departments d USING(dept_no)
WHERE s.from_date = lsc.max
ORDER BY s.emp_no;
/*--------------------------------------------------------------------------------------------------------------*/
/**************** 24) Indexes ****************/
/*
Index is the construct to improve Querying Performance.
Think of it like a table of contents, it helps you find where a piece of data is.
Pros: Speed up querying
Cons: Slows down data Insertion and Updates
***** Types of Indexes *****
- Single Column
- Multi Column
- Unique
- Partial
- Implicit Indexes (done by default)
*/
-- Create an index
CREATE UNIQUE INDEX idx_name
ON table_name(column1, column2, ...);
-- Delete an index
DELETE INDEX idx_name;
/*
**** When to Use Indexes *****
- Index Foreign Keys
- Index Primary Keys and Unique Columns
- Index on Columns that end up in the ORDER BY/WHERE clause VERY OFTEN.
***** When NOT to use Indexes ******
- Don't add Index just to add Index
- Don't use Index on Small Table
- Don't use on Tables that are UPDATED FREQUENTLY.
- Don't use on Columns that can contain NULL values
- Don't use on Columns that have Large Values.
*/
/***************** 25) Indexes Types ******************/
/*
Single Column Index : retrieving data that satisfies ONE condition.
Multi Column Index : retrieving data that satisfies MULIPLE Conditions.
UNIQUE : For Speed and Integrity
PARTIAL : Index Over a SUBSET of a Table (CREATE INDEX name ON table (<expression);)
IMPLICIT : Automatically creaed by the database: (Primary Key, Unique Key)
*/
EXPLAIN ANALYZE
SELECT "name", district, countrycode
FROM city
WHERE countrycode IN ('TUN', 'BE', 'NL');
-- Single Index
CREATE INDEX idx_countrycode
ON city(countrycode);
-- Partial Index
CREATE INDEX idx_countrycode
ON city(countrycode) WHERE countrycode IN ('TUN', 'BE', 'NL');
EXPLAIN ANALYZE
SELECT "name", district, countrycode
FROM city
WHERE countrycode IN ('PSE', 'ZWE', 'USA');
/************************** 26) Index Algorithms *********************/
/*
POSTGRESQL provides Several types of indexes:
B-TREE
HASH
GIN
GIST
Each Index types use different algorithms.
*/
-- we can extend which algorithm to use while creating index
CREATE UNIQUE INDEX idx_name
ON tbl_name USING <method> (column1, column2, ...)
-- by default, it is created using B-TREE
CREATE INDEX idx_countrycode
ON city(countrycode);
-- but we can specify which algorithm to use (example: HASH)
CREATE INDEX idx_countrycode
ON city USING HASH (countrycode);
/*************************** When to use which Algorithms? *************************/
/*
********* B-TREE ***********
Default Algorithm
Best Used for COMPARISONS with
<, >
<=, >=
=
BETWEEN
IN
IS NULL
IS NOT NULL
********** HASH **********
Can only handle Equality = Operations.
*********** GIN (Generalized Inverted Index) ************
Best used when Multiple Values are stored in a Single Field.
*********** GIST (Generalized Search Tree) ***********
Useful in Indexing Geometric Data and Full-Text Search.
*/
-- testing for HASH
EXPLAIN ANALYZE
SELECT "name", district, countrycode
FROM city
WHERE countrycode='BEL' OR countrycode='TUN' OR countrycode='NL';
/* ----------------------------------------------------------------------------------------------------------- */
/********************** 27) Subqueries ************************/
/*
Subqueries can be used in SELECT, FROM, HAVING, WHERE.
For HAVING and WHERE clause, subquery must return SINGLE value record.
*/
SELECT
title, price,
(SELECT AVG(price) FROM products) AS "global average price"
FROM products;
-- Subquery can returns A Single Result or Row Sets
SELECT
title, price,
(SELECT AVG(price) FROM products) AS "global average price" -- return single result
FROM (
SELECT * FROM products -- return row sets
) AS "products_sub";
/************ 29) Types of Subqueries *************/
/*
Single Row
Multiple Row
Multiple Column
Correlated
Nested
*/
-- Single Row: returns Zero or One Row
SELECT emp_no, salary
FROM salaries
WHERE salary > (
SELECT AVG(salary) FROM salaries
);
-- Multiple Row: returns One or More Rows
SELECT title, price, category
FROM products
WHERE category IN (
SELECT category FROM categories
WHERE categoryname IN ('Comedy', 'Family', 'Classics')
);
-- Multiple Columns: returns ONE or More columns
SELECT emp_no, salary, dea.avg AS "Department average salary"
FROM salaries s
JOIN dept_emp as de USING(emp_no)
JOIN(
SELECT dept_no, AVG(salary) FROM salaries AS s2
JOIN dept_emp AS de2 USING(emp_no)
GROUP BY dept_no
) AS dea USING (dept_no)
WHERE salary > dea.avg;
-- Correlated: Reference ONE or More columns in the OUTER statement - Runs against Each Row
/* Get the most recent salary of employee */
SELECT emp_no, salary AS "most recent salary", from_date
FROM salaries AS s
WHERE from_date = (
SELECT MAX(s2.from_date) AS max
FROM salaries AS s2
WHERE s2.emp_no = s.emp_no
)
ORDER BY emp_no;
-- Nested : Subquery in Subquery
SELECT orderlineid, prod_id, quantity
FROM orderlines
JOIN(
SELECT prod_id
FROM products
WHERE category IN(
SELECT category FROM categories
WHERE categoryname IN('Comedy', 'Family', 'Classics')
)
) AS limited USING(prod_id);
/*************** 30) Using Subqueries ************/
SELECT
first_name,
last_name,
birth_date,
AGE(birth_date)
FROM employees
WHERE AGE(birth_date) > (SELECT AVG(AGE(birth_date)) FROM employees);
/* Show the salary with title of the employee using Subquery, instead of JOIN */
SELECT emp_no, salary, from_date,
(SELECT title FROM titles AS t
WHERE t.emp_no=s.emp_no AND t.from_date=s.from_date)
FROM salaries s
ORDER BY emp_no;
EXPLAIN ANALYZE
SELECT emp_no, salary AS "most recent salary", from_date
FROM salaries AS s
WHERE from_date = (
SELECT MAX(s2.from_date) AS max
FROM salaries AS s2
WHERE s2.emp_no = s.emp_no
)
ORDER BY emp_no;
/********************** 32) Subqueries Operators *******************/
/*
EXISTS : Check if the subquery returns any rows
*/
SELECT firstname, lastname, income
FROM customers AS c
WHERE EXISTS(
SELECT * FROM orders as o
WHERE c.customerid = o.customerid AND totalamount > 400
) AND income > 90000
/*
IN : Check if the value is equal to any of the rows in the return (NULL yields NULL)
NOT IN : Check if the value is NOT equal to any of the rows in the return (NULL yields NULL)
*/
SELECT prod_id
FROM products
WHERE category IN(
SELECT category FROM categories
WHERE categoryname IN ('Comedy', 'Family', 'Classics')
);
SELECT prod_id
FROM products
WHERE category IN(
SELECT category FROM categories
WHERE categoryname NOT IN ('Comedy', 'Family', 'Classics')
);
/*
ANY / SOME : check each row against the operator and if any comparison matches, return TRUE.
*/
SELECT prod_id
FROM products
WHERE category = ANY(
SELECT category FROM categories
WHERE categoryname IN ('Comedy', 'Family', 'Classics')
);
/*
ALL : check each row against the operator and if all comparisions match, return true.
*/
SELECT prod_id, title, sales
FROM products
JOIN inventory as i USING(prod_id)
WHERE i.sales > ALL(
SELECT AVG(sales) FROM inventory
JOIN products as p1 USING(prod_id)
GROUP BY p1.category
);
/*
Single Value Comparison
*/
SELECT prod_id
FROM products
WHERE category = (
SELECT category FROM categories
WHERE categoryname IN ('Comedy')
);