forked from Rebecca-S-Liu/Feb2023
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SQLQuery_2.sql
343 lines (291 loc) · 11.7 KB
/
SQLQuery_2.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
--SELECT: retrieve
--WHERE: filter
--ORDER BY: sort
--JOIN: work on multile tables in one query
--Aggregation functions: perform a calculation on a set of values a return a single aggregated result
--1. COUNT(): return the number of rows
SELECT COUNT(OrderID) AS TotalNumOfRows
FROM Orders
SELECT COUNT(*) AS TotalNumOfRows
FROM Orders
--COUNT(*) vs. COUNT(colName):
--COUNT(*) will include null values, but COUNT(colName) will not
SELECT FirstName, Region
FROM Employees
SELECT COUNT(Region), COUNT(*)
FROM Employees
--use w/ GROUP BY: group rows that have the same values into summary rows
--find total number of orders placed by each customers
SELECT c.CustomerID, c.ContactName, c.City, c.Country, COUNT(o.OrderID) AS NumOfOrders
FROM Orders o INNER JOIN Customers c ON o.CustomerID = c.CustomerID
GROUP BY c.CustomerID, c.ContactName, c.City, c.Country
ORDER BY NumOfOrders DESC
--a more complex template:
--only retreive total order numbers where customers located in USA or Canada, and order number should be greater than or equal to 10
SELECT c.CustomerID, c.ContactName, c.City, c.Country, COUNT(o.OrderID) AS NumOfOrders
FROM Orders o INNER JOIN Customers c ON o.CustomerID = c.CustomerID
WHERE c.Country IN ('USA', 'Canada')
GROUP BY c.CustomerID, c.ContactName, c.City, c.Country
HAVING COUNT(o.OrderID) >= 10
ORDER BY NumOfOrders DESC
--SELECT fields, aggregate(fileds)
--FROM table JOIN table2 ON ...
--WHERE criteria --optional
--GROUP BY fileds -- use when have both aggregated and non-aggregated fileds
--HAVING criteria --optional
--ORDER BY fields DESC --optional
--WHERE vs. HAVING
--1. both are used as filters, HAVING will apply only to groups as a whole, but WHERE is applied to individual rows
--2. WHERE goes before aggregation, but HAVING goes after aggregations
--sql execution order
--FROM/JOIN -> WHERE -> GROUP BY -> HAVING -> SELECT -> DINSINCT -> ORDER BY
-- |__________________________|
-- cannot use alias in SELECT
SELECT c.CustomerID, c.ContactName, c.City, c.Country AS Cty, COUNT(o.OrderID) AS NumOfOrders
FROM Orders o INNER JOIN Customers c ON o.CustomerID = c.CustomerID
WHERE cty IN ('USA', 'Canada')
GROUP BY c.CustomerID, c.ContactName, c.City, cty
HAVING NumOfOrders >= 10
ORDER BY NumOfOrders DESC
--3. WHERE can be used with SELECT, UPDATE or DELETE, but HAVING can only be used in SELECT
SELECT *
FROM Products
UPDATE Products
SET UnitPrice = 20
WHERE ProductID = 1
--COUNT DISTINCT: only count unique values
SELECT City
FROM Customers
SELECT COUNT(City), COUNT(DISTINCT City)
FROM Customers
--2. AVG(): return the average value of a numeric column
--list average revenue for each customer
SELECT c.CustomerID, c.ContactName, AVG(od.UnitPrice * od.Quantity) AS AvgRevenue
FROM Customers c JOIN Orders o ON c.CustomerID = o.CustomerID JOIN [Order Details] od ON od.OrderID = o.OrderID
GROUP BY c.CustomerID, c.ContactName
--3. SUM():
--list sum of revenue for each customer
SELECT c.CustomerID, c.ContactName, SUM(od.UnitPrice * od.Quantity) AS TotalRevenue
FROM Customers c JOIN Orders o ON c.CustomerID = o.CustomerID JOIN [Order Details] od ON od.OrderID = o.OrderID
GROUP BY c.CustomerID, c.ContactName
--4. MAX():
--list maxinum revenue from each customer
SELECT c.CustomerID, c.ContactName, MAX(od.UnitPrice * od.Quantity) AS MaxRevenue
FROM Customers c JOIN Orders o ON c.CustomerID = o.CustomerID JOIN [Order Details] od ON od.OrderID = o.OrderID
GROUP BY c.CustomerID, c.ContactName
--5.MIN():
--list the cheapeast product bought by each customer
SELECT c.CustomerID, c.ContactName, MIN(od.UnitPrice) AS CheapestProduct
FROM Customers c JOIN Orders o ON c.CustomerID = o.CustomerID JOIN [Order Details] od ON od.OrderID = o.OrderID
GROUP BY c.CustomerID, c.ContactName
--TOP predicate: SELECT a specific number or a certain percentage of records
--retrieve top 5 most expensive products
SELECT ProductName, UnitPrice
FROM Products
ORDER BY UnitPrice DESC
SELECT TOP 5 ProductName, UnitPrice
FROM Products
ORDER BY UnitPrice DESC
--retrieve top 10 percent most expensive products
SELECT TOP 10 PERCENT ProductName, UnitPrice
FROM Products
ORDER BY UnitPrice DESC
--list top 5 customers who created the most total revenue
SELECT TOP 5 c.CustomerID, c.ContactName, SUM(od.UnitPrice * od.Quantity) AS TotalRevenue
FROM Customers c JOIN Orders o ON c.CustomerID = o.CustomerID JOIN [Order Details] od ON od.OrderID = o.OrderID
GROUP BY c.CustomerID, c.ContactName
ORDER BY TotalRevenue DESC
SELECT TOP 5 ContactName
FROM Customers
--LIMIT: we dont have LIMIT in sql server, use TOP instead
--Subquery: a SELECT statement that is embedded in another SQL statement
--find the customers from the same city where Alejandra Camino lives
SELECT ContactName, City
FROM Customers
WHERE City IN (
SELECT City
FROM Customers
WHERE ContactName = 'Alejandra Camino'
)
--find customers who make any orders
--join
SELECT DISTINCT c.CustomerID, c.ContactName, c.City, c.Country
FROM Customers c INNER JOIN Orders o ON c.CustomerID = o.CustomerID
--subquery
SELECT CustomerId, ContactName, City, Country
FROM Customers
WHERE CustomerId IN
(SELECT DISTINCT CustomerID
FROM Orders)
--subquery vs. join
--1) JOIN can only be used in FROM clause, but subquery can be used in SELECT, FROM, WHERE, HAVING, ORDER BY
--JOIN
SELECT o.OrderDate, e.FirstName, e.LastName
FROM Orders o JOIN Employees e ON o.EmployeeID = e.EmployeeID
WHERE e.City = 'London'
ORDER BY o.OrderDate, e.FirstName, e.LastName
--subquery
SELECT o.OrderDate,
(SELECT e1.FirstName FROM Employees e1 WHERE o.EmployeeID = e1.EmployeeID) AS FirstName,
(SELECT e2.LastName FROM Employees e2 WHERE o.EmployeeID = e2.EmployeeID) AS LastName
FROM Orders o
WHERE (
SELECT e3.City
FROM Employees e3
WHERE e3.EmployeeID = o.EmployeeID
) IN ('London')
ORDER BY o.OrderDate, (SELECT e1.FirstName FROM Employees e1 WHERE o.EmployeeID = e1.EmployeeID), (SELECT e2.LastName FROM Employees e2 WHERE o.EmployeeID = e2.EmployeeID)
--2) subquery is easy to understand and maintain
--find customers who never placed any order
--JOIN
SELECT c.CustomerID, c.ContactName, c.City, c.Country
FROM Customers c LEFT JOIN Orders o ON c.CustomerID = o.CustomerID
WHERE o.OrderID is null
--subquery
SELECT c.CustomerID, c.ContactName, c.City, c.Country
FROM Customers c
WHERE CustomerID NOT IN (
SELECT DISTINCT CustomerID
FROM Orders
)
--3) usually JOIN has a better performance than subquery
--query: INNER JOIN/ LEFT JOIN
--physical join: HASH JOIN, MERGE JOIN, NESTED LOOP JOIN
--Correlated Subquery: inner query is dependent on the outer query
--Customer name and total number of orders by customer
--JOIN
SELECT c.ContactName, Count(o.OrderID) AS TotalNumOfOrders
FROM Customers c LEFT JOIN Orders o ON c.CustomerID = o.CustomerID
GROUP BY c.ContactName
ORDER BY TotalNumOfOrders DESC
--correlated subquery
SELECT c.ContactName,
(SELECT COUNT(o.OrderID) FROM Orders o WHERE o.CustomerID = c.CustomerID) AS TotalNumOfOrders
FROM Customers c
ORDER BY TotalNumOfOrders DESC
--correlated subquery
SELECT o.OrderDate,
(SELECT e1.FirstName FROM Employees e1 WHERE e1.EmployeeID = o.EmployeeID) FirstName,
(SELECT e2.LastName FROM Employees e2 WHERE e2.EmployeeID = o.EmployeeID) LastName
FROM Orders o
ORDER BY FirstName, OrderDate
--join
SELECT o.OrderDate, e.FirstName, e.LastName
FROM Orders o JOIN Employees e ON e.EmployeeID = o.EmployeeID
ORDER BY FirstName, OrderDate
--derived table: subquery in from clause
--syntax
SELECT CustomerID, ContactName
FROM
(SELECT *
FROM Customers) dt
--customers and the number of orders they made
SELECT c.ContactName, c.City, c.Country, COUNT(o.OrderID) AS TotalNumOfOrders
FROM Customers c LEFT JOIN Orders o ON c.CustomerID = o.CustomerID
GROUP BY c.ContactName, c.City, c.Country
ORDER BY TotalNumOfOrders DESC
--derived table
SELECT c.ContactName, c.City, c.Country, dt.TotalNumOfOrders
FROM Customers c LEFT JOIN (
SELECT CustomerID, COUNT(OrderID) AS TotalNumOfOrders
FROM Orders
GROUP BY CustomerID
) dt ON c.CustomerID = dt.CustomerID
ORDER BY dt.TotalNumOfOrders DESC
--Union vs. Union ALL:
--common features:
--1. both are used to combine different result sets vertically
SELECT City, Country
FROM Customers
UNION
SELECT City, Country
FROM Employees
SELECT City, Country
FROM Customers
UNION ALL
SELECT City, Country
FROM Employees
--2. criteria
--number of cols must be the same
SELECT City, Country, CustomerID
FROM Customers
UNION
SELECT City, Country
FROM Employees
--data types of each column must be identical
SELECT City, Country, Region
FROM Customers
UNION
SELECT City, Country, EmployeeID
FROM Employees
--differences
--1. UNION will remove duplicate values, UNION ALL will not
--2. UNION: the records from the first column will be sorted ascendingly
--3. UNION cannot be used in recursive cte, but UNION ALL can
--Window Function: operate on a set of rows and return a single aggregated value for each row by adding extra columns
--RANK(): give a rank based on certain order
--rank for product price, when there is a tie, there will be a value gap
SELECT ProductID, ProductName, UnitPrice, RANK() OVER (ORDER BY UnitPrice DESC) RNK
FROM Products
--product with the 2nd highest price
SELECT dt.ProductID, dt.ProductName, dt.UnitPrice, dt.RNK
FROM
(SELECT ProductID, ProductName, UnitPrice, RANK() OVER (ORDER BY UnitPrice DESC) RNK
FROM Products) dt
WHERE dt.RNK = 2
--DENSE_RANK():
SELECT ProductID, ProductName, UnitPrice, RANK() OVER (ORDER BY UnitPrice DESC) RNK, DENSE_RANK() OVER (ORDER BY UnitPrice DESC) DenseRNK
FROM Products
--ROW_NUMBER(): return the row number of the sorted records starting from 1
SELECT ProductID, ProductName, UnitPrice, RANK() OVER (ORDER BY UnitPrice DESC) RNK, DENSE_RANK() OVER (ORDER BY UnitPrice DESC) DenseRNK, ROW_NUMBER() OVER (ORDER BY UnitPrice DESC) RowNum
FROM Products
--partition by: divide the result set into paritions and perform calculation on each subset
--list customers from every country with the ranking for number of orders
SELECT c.ContactName, c.Country, COUNT(o.OrderID) AS NumOfOrders, RANK() OVER (PARTITION BY c.Country ORDER BY COUNT(o.OrderID) DESC) RNK
FROM Customers c JOIN Orders o ON c.CustomerID = o.CustomerID
GROUP BY c.ContactName, c.Country
--- find top 3 customers from every country with maximum orders
SELECT dt.ContactName, dt.Country, dt.NumOfOrders, dt.RNK
FROM
(SELECT c.ContactName, c.Country, COUNT(o.OrderID) AS NumOfOrders, RANK() OVER (PARTITION BY c.Country ORDER BY COUNT(o.OrderID) DESC) RNK
FROM Customers c JOIN Orders o ON c.CustomerID = o.CustomerID
GROUP BY c.ContactName, c.Country) dt
WHERE dt.RNK <= 3
--cte: common table expression -- temporary named result set
SELECT c.ContactName, c.City, c.Country, dt.TotalNumOfOrders
FROM Customers c LEFT JOIN (
SELECT CustomerID, COUNT(OrderID) AS TotalNumOfOrders
FROM Orders
GROUP BY CustomerID
) dt ON c.CustomerID = dt.CustomerID
ORDER BY dt.TotalNumOfOrders DESC
WITH OrderCntCTE
AS
(
SELECT CustomerID, COUNT(OrderID) AS TotalNumOfOrders
FROM Orders
GROUP BY CustomerID
)
SELECT c.ContactName, c.City, c.Country, cte.TotalNumOfOrders
FROM Customers c LEFT JOIN OrderCntCTE cte ON c.CustomerID = cte.CustomerID
ORDER BY cte.TotalNumOfOrders DESC
--lifecycle: created and used in the very next select statement
--recursive CTE:
--initialization: initial call to the cte which passes in some values to get things started
--recursive rule
SELECT EmployeeID, FirstName, ReportsTo
FROM Employees
-- level 1: Andrew
-- level 2: Nancy, Janet, Margaret, Steven, Laura
-- level 3: Michael, Robert, Anne
WITH EmpHierachyCTE
AS
(
SELECT EmployeeID, FirstName, ReportsTo, 1 lvl
FROM Employees
WHERE ReportsTo is null
UNION ALL
SELECT e.EmployeeID, e.FirstName, e.ReportsTo, cte.lvl + 1
FROM Employees e INNER JOIN EmpHierachyCTE cte ON e.ReportsTo = cte.EmployeeID
)
SELECT * FROM EmpHierachyCTE