Ecommerce Data Analysis with SQL
!mkdir -p ~/.aws
!pip install -qq psycopg2-binary awscli boto3 s3fs
%%writefile ~/.aws/credentials
[default]
aws_access_key_id=
aws_secret_access_key=
region=us-east-1
output=json
import boto3
import json
%reload_ext sql
def get_secret(secret_name):
region_name = "us-east-1"
session = boto3.session.Session()
client = session.client(
service_name='secretsmanager',
region_name=region_name)
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
get_secret_value_response = json.loads(get_secret_value_response['SecretString'])
return get_secret_value_response
db_credentials = get_secret(secret_name='wysde')
USERNAME = db_credentials["RDS_POSTGRES_USERNAME"]
PASSWORD = db_credentials["RDS_POSTGRES_PASSWORD"]
HOST = db_credentials["RDS_POSTGRES_HOST"]
PORT = 5432
DBNAME = "postgres"
CONN = f"postgresql://{USERNAME}:{PASSWORD}@{HOST}:{PORT}/{DBNAME}"
%sql {CONN}
Basic SQL
LIMIT
Displays all the data in the occurred_at, account_id, and channel columns of the web_events table, and limits the output to only the first 15 rows
Note - LIMIT must be the last one.
%%sql
SELECT occurred_at, account_id, channel
FROM web_events
LIMIT 15;
occurred_at | account_id | channel |
---|
2015-10-06 17:13:58 | 1001 | direct |
2015-11-05 03:08:26 | 1001 | direct |
2015-12-04 03:57:24 | 1001 | direct |
2016-01-02 00:55:03 | 1001 | direct |
2016-02-01 19:02:33 | 1001 | direct |
2016-03-02 15:15:22 | 1001 | direct |
2016-04-01 10:58:55 | 1001 | direct |
2016-05-01 15:26:44 | 1001 | direct |
2016-05-31 20:53:47 | 1001 | direct |
2016-06-30 12:09:45 | 1001 | direct |
2016-07-30 03:06:26 | 1001 | direct |
2016-08-28 06:42:42 | 1001 | direct |
2016-09-26 23:14:59 | 1001 | direct |
2016-10-26 20:21:09 | 1001 | direct |
2016-11-25 22:52:29 | 1001 | direct |
ORDER BY
Write a query to return the 10 earliest orders in the orders table. Include the id, occurred_at, and total_amt_usd.
%%sql
SELECT id, occurred_at, total_amt_usd
FROM orders
ORDER BY occurred_at
LIMIT 10;
id | occurred_at | total_amt_usd |
---|
5786 | 2013-12-04 04:22:44 | 627.48 |
2415 | 2013-12-04 04:45:54 | 2646.77 |
4108 | 2013-12-04 04:53:25 | 2709.62 |
4489 | 2013-12-05 20:29:16 | 277.13 |
287 | 2013-12-05 20:33:56 | 3001.85 |
1946 | 2013-12-06 02:13:20 | 2802.90 |
6197 | 2013-12-06 12:55:22 | 7009.18 |
3122 | 2013-12-06 12:57:41 | 1992.13 |
6078 | 2013-12-06 13:14:47 | 6680.06 |
2932 | 2013-12-06 13:17:25 | 2075.94 |
Write a query to return the top 5 orders in terms of largest total_amt_usd. Include the id, account_id, and total_amt_usd.
%%sql
SELECT id, account_id, total_amt_usd
FROM orders
ORDER BY total_amt_usd DESC
LIMIT 5;
id | account_id | total_amt_usd |
---|
4016 | 4251 | 232207.07 |
3892 | 4161 | 112875.18 |
3963 | 4211 | 107533.55 |
5791 | 2861 | 95005.82 |
3778 | 4101 | 93547.84 |
Write a query to return the lowest 20 orders in terms of smallest total_amt_usd. Include the id, account_id, and total_amt_usd.
%%sql
SELECT id, account_id, total_amt_usd
FROM orders
ORDER BY total_amt_usd
Limit 20;
id | account_id | total_amt_usd |
---|
6375 | 3651 | 0.00 |
6523 | 3991 | 0.00 |
6323 | 3551 | 0.00 |
6312 | 3541 | 0.00 |
6435 | 3801 | 0.00 |
4446 | 1231 | 0.00 |
5612 | 2601 | 0.00 |
4844 | 1571 | 0.00 |
5057 | 1851 | 0.00 |
4625 | 1411 | 0.00 |
6009 | 3141 | 0.00 |
6281 | 3491 | 0.00 |
4445 | 1221 | 0.00 |
2518 | 2881 | 0.00 |
4490 | 1281 | 0.00 |
124 | 1131 | 0.00 |
4770 | 1521 | 0.00 |
1353 | 1951 | 0.00 |
5001 | 1791 | 0.00 |
6856 | 4451 | 0.00 |
Write a query that displays the order ID, account ID, and total dollar amount for all the orders, sorted first by the account ID (in ascending order), and then by the total dollar amount (in descending order).
%%sql
SELECT id, account_id, total_amt_usd
FROM orders
ORDER BY account_id, total_amt_usd DESC
LIMIT 10;
id | account_id | total_amt_usd |
---|
4308 | 1001 | 9426.71 |
4309 | 1001 | 9230.67 |
4316 | 1001 | 9134.31 |
4317 | 1001 | 8963.91 |
4314 | 1001 | 8863.24 |
4307 | 1001 | 8757.18 |
4311 | 1001 | 8672.95 |
4310 | 1001 | 8538.26 |
4312 | 1001 | 8343.09 |
4313 | 1001 | 8311.59 |
Write a query that again displays order ID, account ID, and total dollar amount for each order, but this time sorted first by total dollar amount (in descending order), and then by account ID (in ascending order).
%%sql
SELECT id, account_id, total_amt_usd
FROM orders
ORDER BY total_amt_usd DESC, account_id
LIMIT 10;
id | account_id | total_amt_usd |
---|
4016 | 4251 | 232207.07 |
3892 | 4161 | 112875.18 |
3963 | 4211 | 107533.55 |
5791 | 2861 | 95005.82 |
3778 | 4101 | 93547.84 |
6590 | 4111 | 93505.69 |
362 | 1301 | 93106.81 |
731 | 1521 | 92991.05 |
4562 | 1341 | 84099.62 |
3858 | 4151 | 82163.71 |
WHERE
Pulls the first 5 rows and all columns from the orders table that have a dollar amount of gloss_amt_usd greater than or equal to 1000.
%%sql
SELECT *
FROM orders
WHERE gloss_amt_usd >= 1000
LIMIT 5;
id | account_id | occurred_at | standard_qty | gloss_qty | poster_qty | total | standard_amt_usd | gloss_amt_usd | poster_amt_usd | total_amt_usd |
---|
14 | 1001 | 2016-10-26 20:31:30 | 97 | 143 | 54 | 294 | 484.03 | 1071.07 | 438.48 | 1993.58 |
62 | 1091 | 2014-10-13 12:12:55 | 146 | 196 | 3 | 345 | 728.54 | 1468.04 | 24.36 | 2220.94 |
88 | 1101 | 2015-06-24 13:08:15 | 182 | 339 | 17 | 538 | 908.18 | 2539.11 | 138.04 | 3585.33 |
121 | 1131 | 2016-08-10 23:47:41 | 273 | 134 | 0 | 407 | 1362.27 | 1003.66 | 0.00 | 2365.93 |
129 | 1141 | 2016-12-21 15:52:58 | 143 | 1045 | 2157 | 3345 | 713.57 | 7827.05 | 17514.84 | 26055.46 |
Pulls the first 10 rows and all columns from the orders table that have a total_amt_usd less than 500.
%%sql
SELECT *
FROM orders
WHERE total_amt_usd < 500
LIMIT 10;
id | account_id | occurred_at | standard_qty | gloss_qty | poster_qty | total | standard_amt_usd | gloss_amt_usd | poster_amt_usd | total_amt_usd |
---|
67 | 1091 | 2015-04-07 13:29:20 | 95 | 0 | 0 | 95 | 474.05 | 0.00 | 0.00 | 474.05 |
96 | 1101 | 2016-03-15 11:36:03 | 14 | 8 | 16 | 38 | 69.86 | 59.92 | 129.92 | 259.70 |
119 | 1131 | 2016-06-12 12:29:45 | 0 | 30 | 23 | 53 | 0.00 | 224.70 | 186.76 | 411.46 |
124 | 1131 | 2016-11-07 05:10:56 | 0 | 0 | 0 | 0 | 0.00 | 0.00 | 0.00 | 0.00 |
254 | 1251 | 2014-11-01 02:15:24 | 0 | 0 | 17 | 17 | 0.00 | 0.00 | 138.04 | 138.04 |
328 | 1291 | 2015-08-03 08:35:23 | 0 | 19 | 21 | 40 | 0.00 | 142.31 | 170.52 | 312.83 |
542 | 1421 | 2015-11-13 09:07:09 | 0 | 64 | 0 | 64 | 0.00 | 479.36 | 0.00 | 479.36 |
683 | 1501 | 2016-04-14 23:59:50 | 0 | 15 | 16 | 31 | 0.00 | 112.35 | 129.92 | 242.27 |
713 | 1521 | 2014-11-23 16:04:03 | 0 | 8 | 10 | 18 | 0.00 | 59.92 | 81.20 | 141.12 |
730 | 1521 | 2016-05-06 02:34:48 | 0 | 0 | 2 | 2 | 0.00 | 0.00 | 16.24 | 16.24 |
Filter the accounts table to include the company name, website, and the primary point of contact (primary_poc) just for the Exxon Mobil company in the accounts table.
%%sql
SELECT name, website, primary_poc
FROM accounts
WHERE name = 'Exxon Mobil'
name | website | primary_poc |
---|
Exxon Mobil | www.exxonmobil.com | Sung Shields |
Using the orders table:
Create a column that divides the standard_amt_usd by the standard_qty to find the unit price for standard paper for each order. Limit the results to the first 10 orders, and include the id and account_id fields.
%%sql
SELECT id, account_id, (standard_amt_usd/standard_qty) AS unit_price
FROM orders
LIMIT 10;
id | account_id | unit_price |
---|
1 | 1001 | 4.9900000000000000 |
2 | 1001 | 4.9900000000000000 |
3 | 1001 | 4.9900000000000000 |
4 | 1001 | 4.9900000000000000 |
5 | 1001 | 4.9900000000000000 |
6 | 1001 | 4.9900000000000000 |
7 | 1001 | 4.9900000000000000 |
8 | 1001 | 4.9900000000000000 |
9 | 1001 | 4.9900000000000000 |
10 | 1001 | 4.9900000000000000 |
Write a query that finds the percentage of revenue that comes from poster paper for each order. You will need to use only the columns that end with _usd. (Try to do this without using the total column.) Display the id and account_id fields also. NOTE - you will receive an error with the correct solution to this question. This occurs because at least one of the values in the data creates a division by zero in your formula. You will learn later how to fully handle this issue. For now, you can just limit your calculations to the first 10 orders, as we did in question #1, and you'll avoid that set of data that causes the problem.
%%sql
SELECT id, account_id, poster_amt_usd/(standard_amt_usd + gloss_amt_usd + poster_amt_usd) AS post_per
FROM orders
LIMIT 10;
id | account_id | post_per |
---|
1 | 1001 | 0.20019929527546921710 |
2 | 1001 | 0.26940158204455102647 |
3 | 1001 | 0E-20 |
4 | 1001 | 0E-20 |
5 | 1001 | 0.23117672777557473894 |
6 | 1001 | 0.34998360271726399625 |
7 | 1001 | 0.49862501668669069550 |
8 | 1001 | 0.59746613390507747783 |
9 | 1001 | 0.23737326760301367315 |
10 | 1001 | 0.07393917319249681297 |
LIKE
Use the accounts table to find All the companies whose names start with 'C'.
%%sql
SELECT id, name
FROM accounts
WHERE name LIKE 'C%'
LIMIT 10;
id | name |
---|
1061 | CVS Health |
1131 | Chevron |
1141 | Costco |
1201 | Cardinal Health |
1281 | Citigroup |
1361 | Comcast |
1531 | Cisco Systems |
1581 | Caterpillar |
1611 | Coca-Cola |
1781 | Cigna |
Use the accounts table to find All companies whose names contain the string 'one' somewhere in the name.
%%sql
SELECT id, name
FROM accounts
WHERE name LIKE '%one'
LIMIT 10;
id | name |
---|
1821 | INTL FCStone |
3791 | AutoZone |
Use the accounts table to find All companies whose names end with 's'.
%%sql
SELECT id, name
FROM accounts
WHERE name LIKE '%s'
LIMIT 10;
id | name |
---|
1071 | General Motors |
1441 | United Technologies |
1461 | Lowe's |
1531 | Cisco Systems |
1621 | HCA Holdings |
1651 | Tyson Foods |
1671 | Delta Air Lines |
1691 | Johnson Controls |
3361 | Ross Stores |
1791 | United Continental Holdings |
Use the accounts table to find the account name, primary_poc, and sales_rep_id for Walmart, Target, and Nordstrom.
%%sql
SELECT name, primary_poc, sales_rep_id
FROM accounts
WHERE name IN ('Walmart', 'Target', 'Nordstrom')
LIMIT 10;
name | primary_poc | sales_rep_id |
---|
Walmart | Tamara Tuma | 321500 |
Target | Luba Streett | 321660 |
Nordstrom | Yan Crater | 321820 |
Use the web_events table to find all information regarding individuals who were contacted via the channel of organic or adwords.
%%sql
SELECT *
FROM web_events
WHERE channel IN ('organic', 'adwords')
LIMIT 10;
id | account_id | occurred_at | channel |
---|
4395 | 1001 | 2015-10-22 05:02:47 | organic |
4396 | 1001 | 2015-10-22 14:04:20 | adwords |
4399 | 1001 | 2016-01-01 15:45:54 | adwords |
4401 | 1001 | 2016-02-07 17:44:10 | adwords |
4402 | 1001 | 2016-02-27 15:27:22 | organic |
4404 | 1001 | 2016-04-05 03:02:52 | organic |
4405 | 1001 | 2016-04-17 16:41:02 | organic |
4408 | 1001 | 2016-05-21 16:22:01 | organic |
4410 | 1001 | 2016-06-22 13:48:53 | adwords |
4414 | 1001 | 2016-08-12 09:31:22 | organic |
NOT
Use the accounts table to find the account name, primary poc, and sales rep id for all stores except Walmart, Target, and Nordstrom.
%%sql
SELECT name, primary_poc, sales_rep_id
FROM accounts
WHERE name NOT IN ('Walmart', 'Target', 'Nordstrom')
LIMIT 10;
name | primary_poc | sales_rep_id |
---|
Exxon Mobil | Sung Shields | 321510 |
Apple | Jodee Lupo | 321520 |
Berkshire Hathaway | Serafina Banda | 321530 |
McKesson | Angeles Crusoe | 321540 |
UnitedHealth Group | Savanna Gayman | 321550 |
CVS Health | Anabel Haskell | 321560 |
General Motors | Barrie Omeara | 321570 |
Ford Motor | Kym Hagerman | 321580 |
AT&T | Jamel Mosqueda | 321590 |
General Electric | Parker Hoggan | 321600 |
Use the web_events table to find all information regarding individuals who were contacted via any method except using organic or adwords methods.
%%sql
SELECT *
FROM web_events
WHERE channel NOT in ('organic', 'adwords')
LIMIT 10;
id | account_id | occurred_at | channel |
---|
1 | 1001 | 2015-10-06 17:13:58 | direct |
2 | 1001 | 2015-11-05 03:08:26 | direct |
3 | 1001 | 2015-12-04 03:57:24 | direct |
4 | 1001 | 2016-01-02 00:55:03 | direct |
5 | 1001 | 2016-02-01 19:02:33 | direct |
6 | 1001 | 2016-03-02 15:15:22 | direct |
7 | 1001 | 2016-04-01 10:58:55 | direct |
8 | 1001 | 2016-05-01 15:26:44 | direct |
9 | 1001 | 2016-05-31 20:53:47 | direct |
10 | 1001 | 2016-06-30 12:09:45 | direct |
Use the accounts table to find All the companies whose names do not start with 'C'.
%%sql
SELECT name
FROM accounts
WHERE name NOT LIKE 'C%'
LIMIT 10;
name |
---|
Walmart |
Exxon Mobil |
Apple |
Berkshire Hathaway |
McKesson |
UnitedHealth Group |
General Motors |
Ford Motor |
AT&T |
General Electric |
All companies whose names do not contain the string 'one' somewhere in the name.
%%sql
SELECT name
FROM accounts
WHERE name NOT LIKE '%one%'
LIMIT 10;
name |
---|
Walmart |
Exxon Mobil |
Apple |
Berkshire Hathaway |
McKesson |
UnitedHealth Group |
CVS Health |
General Motors |
Ford Motor |
AT&T |
All companies whose names do not end with 's'.
%%sql
SELECT name
FROM accounts
WHERE name NOT LIKE '%s'
LIMIT 10;
name |
---|
Walmart |
Exxon Mobil |
Apple |
Berkshire Hathaway |
McKesson |
UnitedHealth Group |
CVS Health |
Ford Motor |
AT&T |
General Electric |
AND and BETWEEN
Write a query that returns all the orders where the standard_qty is over 1000, the poster_qty is 0, and the gloss_qty is 0.
%%sql
SELECT *
FROM orders
WHERE standard_qty > 1000 AND poster_qty = 0 AND gloss_qty = 0
LIMIT 10;
id | account_id | occurred_at | standard_qty | gloss_qty | poster_qty | total | standard_amt_usd | gloss_amt_usd | poster_amt_usd | total_amt_usd |
---|
2613 | 2951 | 2016-08-15 00:06:12 | 1171 | 0 | 0 | 1171 | 5843.29 | 0.00 | 0.00 | 5843.29 |
3260 | 3491 | 2014-08-29 22:43:00 | 1552 | 0 | 0 | 1552 | 7744.48 | 0.00 | 0.00 | 7744.48 |
Using the accounts table, find all the companies whose names do not start with 'C' and end with 's'.
%%sql
SELECT name
FROM accounts
WHERE name NOT LIKE 'C%' AND name LIKE '%s'
LIMIT 10;
name |
---|
General Motors |
United Technologies |
Lowe's |
HCA Holdings |
Tyson Foods |
Delta Air Lines |
Johnson Controls |
Ross Stores |
United Continental Holdings |
American Express |
When you use the BETWEEN operator in SQL, do the results include the values of your endpoints, or not?
Figure out the answer to this important question by writing a query that displays the order date and gloss_qty data for all orders where gloss_qty is between 24 and 29.
Then look at your output to see if the BETWEEN operator included the begin and end values or not.
--there are a number of rows in the output of this query where the gloss_qty values are 24 or 29.
So the answer to the question is that yes, the BETWEEN operator in SQL is inclusive; that is, the endpoint values are included.
So the BETWEEN statement in this query is equivalent to having written "WHERE gloss_qty >= 24 AND gloss_qty <= 29.
%%sql
SELECT occurred_at, gloss_qty
FROM orders
WHERE gloss_qty BETWEEN 24 AND 29
LIMIT 10;
occurred_at | gloss_qty |
---|
2016-02-01 19:27:27 | 29 |
2016-03-02 15:29:32 | 24 |
2016-10-14 23:54:21 | 28 |
2015-08-09 18:29:20 | 24 |
2016-02-01 20:00:37 | 26 |
2016-04-30 07:46:13 | 26 |
2016-06-28 06:18:20 | 25 |
2016-08-26 16:47:57 | 25 |
2016-10-24 21:46:10 | 28 |
2016-11-23 04:38:57 | 25 |
Use the web_events table to find all information regarding individuals who were contacted via the organic or adwords channels, and started their account at any point in 2016, sorted from newest to oldest.
%%sql
SELECT *
FROM web_events
WHERE channel IN ('organic', 'adwords') AND occurred_at BETWEEN '2016-01-01' AND '2017-01-01'
ORDER BY occurred_at DESC
LIMIT 10;
id | account_id | occurred_at | channel |
---|
8493 | 4141 | 2016-12-31 16:31:23 | organic |
5661 | 1851 | 2016-12-31 06:55:38 | organic |
5562 | 1791 | 2016-12-31 02:08:50 | adwords |
7703 | 3351 | 2016-12-30 21:06:53 | adwords |
7921 | 3521 | 2016-12-30 20:15:48 | organic |
6416 | 2401 | 2016-12-30 17:51:36 | adwords |
4553 | 1151 | 2016-12-30 15:57:41 | organic |
8129 | 3781 | 2016-12-30 06:52:24 | organic |
6200 | 2281 | 2016-12-30 03:43:11 | adwords |
6937 | 2801 | 2016-12-29 14:51:48 | organic |
Find list of orders ids where either gloss_qty or poster_qty is greater than 4000. Only include the id field in the resulting table.
%%sql
SELECT id
FROM orders
WHERE gloss_qty > 4000 OR poster_qty > 4000
LIMIT 10;
id |
---|
362 |
731 |
1191 |
1913 |
1939 |
3778 |
3858 |
3963 |
4016 |
4230 |
Write a query that returns a list of orders where the standard_qty is zero and either the gloss_qty or poster_qty is over 1000.
%%sql
SELECT *
FROM orders
WHERE standard_qty =0 AND (gloss_qty > 1000 OR poster_qty > 1000)
LIMIT 10;
id | account_id | occurred_at | standard_qty | gloss_qty | poster_qty | total | standard_amt_usd | gloss_amt_usd | poster_amt_usd | total_amt_usd |
---|
1913 | 2461 | 2013-12-29 09:50:38 | 0 | 6450 | 45 | 6495 | 0.00 | 48310.50 | 365.40 | 48675.90 |
4369 | 1111 | 2015-11-15 17:47:46 | 0 | 486 | 2988 | 3474 | 0.00 | 3640.14 | 24262.56 | 27902.70 |
4391 | 1161 | 2016-06-04 08:58:10 | 0 | 106 | 2967 | 3073 | 0.00 | 793.94 | 24092.04 | 24885.98 |
4420 | 1191 | 2016-05-21 23:21:14 | 0 | 43 | 1448 | 1491 | 0.00 | 322.07 | 11757.76 | 12079.83 |
4448 | 1231 | 2016-06-25 12:27:15 | 0 | 3178 | 23 | 3201 | 0.00 | 23803.22 | 186.76 | 23989.98 |
4698 | 1451 | 2015-02-26 06:13:21 | 0 | 484 | 4901 | 5385 | 0.00 | 3625.16 | 39796.12 | 43421.28 |
4942 | 1701 | 2015-09-24 21:02:25 | 0 | 10744 | 95 | 10839 | 0.00 | 80472.56 | 771.40 | 81243.96 |
5032 | 1831 | 2016-05-26 17:48:19 | 0 | 1448 | 287 | 1735 | 0.00 | 10845.52 | 2330.44 | 13175.96 |
5191 | 2051 | 2015-05-17 14:17:59 | 0 | 1041 | 0 | 1041 | 0.00 | 7797.09 | 0.00 | 7797.09 |
5791 | 2861 | 2014-10-24 12:06:22 | 0 | 10 | 11691 | 11701 | 0.00 | 74.90 | 94930.92 | 95005.82 |
Find all the company names that start with a 'C' or 'W', and the primary contact contains 'ana' or 'Ana', but it doesn't contain 'eana'.
%%sql
SELECT *
FROM accounts
WHERE (name LIKE 'C%' OR name LIKE 'W%')
AND ((primary_poc LIKE '%ana%' OR primary_poc LIKE '%Ana%')
AND primary_poc NOT LIKE '%eana%');
id | name | website | lat | long | primary_poc | sales_rep_id |
---|
1061 | CVS Health | www.cvshealth.com | 41.46779585 | -73.76763638 | Anabel Haskell | 321560 |
1361 | Comcast | www.comcastcorporation.com | 42.54154764 | -76.24992387 | Shana Sanborn | 321650 |
SQL Joins
We use ON clause to specify a JOIN condition which is a logical statement to combine the table in FROM and JOIN statements.
%%sql
SELECT *
FROM orders
JOIN accounts
ON orders.account_id = accounts.id
LIMIT 10;
id | account_id | occurred_at | standard_qty | gloss_qty | poster_qty | total | standard_amt_usd | gloss_amt_usd | poster_amt_usd | total_amt_usd | id_1 | name | website | lat | long | primary_poc | sales_rep_id |
---|
1 | 1001 | 2015-10-06 17:31:14 | 123 | 22 | 24 | 169 | 613.77 | 164.78 | 194.88 | 973.43 | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 |
2 | 1001 | 2015-11-05 03:34:33 | 190 | 41 | 57 | 288 | 948.10 | 307.09 | 462.84 | 1718.03 | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 |
3 | 1001 | 2015-12-04 04:21:55 | 85 | 47 | 0 | 132 | 424.15 | 352.03 | 0.00 | 776.18 | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 |
4 | 1001 | 2016-01-02 01:18:24 | 144 | 32 | 0 | 176 | 718.56 | 239.68 | 0.00 | 958.24 | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 |
5 | 1001 | 2016-02-01 19:27:27 | 108 | 29 | 28 | 165 | 538.92 | 217.21 | 227.36 | 983.49 | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 |
6 | 1001 | 2016-03-02 15:29:32 | 103 | 24 | 46 | 173 | 513.97 | 179.76 | 373.52 | 1067.25 | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 |
7 | 1001 | 2016-04-01 11:20:18 | 101 | 33 | 92 | 226 | 503.99 | 247.17 | 747.04 | 1498.20 | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 |
8 | 1001 | 2016-05-01 15:55:51 | 95 | 47 | 151 | 293 | 474.05 | 352.03 | 1226.12 | 2052.20 | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 |
9 | 1001 | 2016-05-31 21:22:48 | 91 | 16 | 22 | 129 | 454.09 | 119.84 | 178.64 | 752.57 | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 |
10 | 1001 | 2016-06-30 12:32:05 | 94 | 46 | 8 | 148 | 469.06 | 344.54 | 64.96 | 878.56 | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 |
As we've learned, the SELECT clause indicates which column(s) of data you'd like to see in the output (For Example, orders.* gives us all the columns in orders table in the output). The FROM clause indicates the first table from which we're pulling data, and the JOIN indicates the second table. The ON clause specifies the column on which you'd like to merge the two tables together
Try pulling all the data from the accounts table, and all the data from the orders table.
%%sql
SELECT accounts.*, orders.*
FROM accounts
JOIN orders
ON orders.account_id = accounts.id
LIMIT 10;
id | name | website | lat | long | primary_poc | sales_rep_id | id_1 | account_id | occurred_at | standard_qty | gloss_qty | poster_qty | total | standard_amt_usd | gloss_amt_usd | poster_amt_usd | total_amt_usd |
---|
1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 1 | 1001 | 2015-10-06 17:31:14 | 123 | 22 | 24 | 169 | 613.77 | 164.78 | 194.88 | 973.43 |
1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 2 | 1001 | 2015-11-05 03:34:33 | 190 | 41 | 57 | 288 | 948.10 | 307.09 | 462.84 | 1718.03 |
1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 3 | 1001 | 2015-12-04 04:21:55 | 85 | 47 | 0 | 132 | 424.15 | 352.03 | 0.00 | 776.18 |
1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 4 | 1001 | 2016-01-02 01:18:24 | 144 | 32 | 0 | 176 | 718.56 | 239.68 | 0.00 | 958.24 |
1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 5 | 1001 | 2016-02-01 19:27:27 | 108 | 29 | 28 | 165 | 538.92 | 217.21 | 227.36 | 983.49 |
1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 6 | 1001 | 2016-03-02 15:29:32 | 103 | 24 | 46 | 173 | 513.97 | 179.76 | 373.52 | 1067.25 |
1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 7 | 1001 | 2016-04-01 11:20:18 | 101 | 33 | 92 | 226 | 503.99 | 247.17 | 747.04 | 1498.20 |
1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 8 | 1001 | 2016-05-01 15:55:51 | 95 | 47 | 151 | 293 | 474.05 | 352.03 | 1226.12 | 2052.20 |
1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 9 | 1001 | 2016-05-31 21:22:48 | 91 | 16 | 22 | 129 | 454.09 | 119.84 | 178.64 | 752.57 |
1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 10 | 1001 | 2016-06-30 12:32:05 | 94 | 46 | 8 | 148 | 469.06 | 344.54 | 64.96 | 878.56 |
Try pulling standard_qty, gloss_qty, and poster_qty from the orders table, and the website and the primary_poc from the accounts table.
%%sql
SELECT accounts.website, accounts.primary_poc, orders.standard_qty, orders.gloss_qty, orders.poster_qty
FROM accounts
JOIN orders
ON orders.account_id = accounts.id
LIMIT 10;
website | primary_poc | standard_qty | gloss_qty | poster_qty |
---|
www.walmart.com | Tamara Tuma | 123 | 22 | 24 |
www.walmart.com | Tamara Tuma | 190 | 41 | 57 |
www.walmart.com | Tamara Tuma | 85 | 47 | 0 |
www.walmart.com | Tamara Tuma | 144 | 32 | 0 |
www.walmart.com | Tamara Tuma | 108 | 29 | 28 |
www.walmart.com | Tamara Tuma | 103 | 24 | 46 |
www.walmart.com | Tamara Tuma | 101 | 33 | 92 |
www.walmart.com | Tamara Tuma | 95 | 47 | 151 |
www.walmart.com | Tamara Tuma | 91 | 16 | 22 |
www.walmart.com | Tamara Tuma | 94 | 46 | 8 |
Join all three of these tables - web_events, orders and accounts.
%%sql
SELECT *
FROM web_events
JOIN accounts
ON web_events.account_id = accounts.id
JOIN orders
ON accounts.id = orders.account_id
LIMIT 10;
id | account_id | occurred_at | channel | id_1 | name | website | lat | long | primary_poc | sales_rep_id | id_2 | account_id_1 | occurred_at_1 | standard_qty | gloss_qty | poster_qty | total | standard_amt_usd | gloss_amt_usd | poster_amt_usd | total_amt_usd |
---|
1 | 1001 | 2015-10-06 17:13:58 | direct | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 1 | 1001 | 2015-10-06 17:31:14 | 123 | 22 | 24 | 169 | 613.77 | 164.78 | 194.88 | 973.43 |
2 | 1001 | 2015-11-05 03:08:26 | direct | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 1 | 1001 | 2015-10-06 17:31:14 | 123 | 22 | 24 | 169 | 613.77 | 164.78 | 194.88 | 973.43 |
3 | 1001 | 2015-12-04 03:57:24 | direct | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 1 | 1001 | 2015-10-06 17:31:14 | 123 | 22 | 24 | 169 | 613.77 | 164.78 | 194.88 | 973.43 |
4 | 1001 | 2016-01-02 00:55:03 | direct | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 1 | 1001 | 2015-10-06 17:31:14 | 123 | 22 | 24 | 169 | 613.77 | 164.78 | 194.88 | 973.43 |
5 | 1001 | 2016-02-01 19:02:33 | direct | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 1 | 1001 | 2015-10-06 17:31:14 | 123 | 22 | 24 | 169 | 613.77 | 164.78 | 194.88 | 973.43 |
6 | 1001 | 2016-03-02 15:15:22 | direct | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 1 | 1001 | 2015-10-06 17:31:14 | 123 | 22 | 24 | 169 | 613.77 | 164.78 | 194.88 | 973.43 |
7 | 1001 | 2016-04-01 10:58:55 | direct | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 1 | 1001 | 2015-10-06 17:31:14 | 123 | 22 | 24 | 169 | 613.77 | 164.78 | 194.88 | 973.43 |
8 | 1001 | 2016-05-01 15:26:44 | direct | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 1 | 1001 | 2015-10-06 17:31:14 | 123 | 22 | 24 | 169 | 613.77 | 164.78 | 194.88 | 973.43 |
9 | 1001 | 2016-05-31 20:53:47 | direct | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 1 | 1001 | 2015-10-06 17:31:14 | 123 | 22 | 24 | 169 | 613.77 | 164.78 | 194.88 | 973.43 |
10 | 1001 | 2016-06-30 12:09:45 | direct | 1001 | Walmart | www.walmart.com | 40.23849561 | -75.10329704 | Tamara Tuma | 321500 | 1 | 1001 | 2015-10-06 17:31:14 | 123 | 22 | 24 | 169 | 613.77 | 164.78 | 194.88 | 973.43 |
Provide a table that provides the region for each sales_rep along with their associated accounts.
This time only for the Midwest region. Your final table should include three columns:
the region name, the sales rep name, and the account name. Sort the accounts alphabetically (A-Z) according to account name.
%%sql
SELECT r.name region, s.name rep, a.name account
FROM sales_reps AS s
JOIN region AS r
ON s.region_id = r.id
JOIN accounts AS a
ON s.id = a.sales_rep_id
WHERE r.name = 'Midwest'
ORDER BY a.name ASC
LIMIT 10;
region | rep | account |
---|
Midwest | Chau Rowles | Abbott Laboratories |
Midwest | Julie Starr | AbbVie |
Midwest | Cliff Meints | Aflac |
Midwest | Chau Rowles | Alcoa |
Midwest | Charles Bidwell | Altria Group |
Midwest | Delilah Krum | Amgen |
Midwest | Charles Bidwell | Arrow Electronics |
Midwest | Delilah Krum | AutoNation |
Midwest | Delilah Krum | Capital One Financial |
Midwest | Cordell Rieder | Centene |
Provide a table that provides the region for each sales_rep along with their associated accounts.
This time only for accounts where the sales rep has a first name starting with S and in the Midwest region.
Your final table should include three columns: the region name, the sales rep name, and the account name.
Sort the accounts alphabetically (A-Z) according to account name.
%%sql
SELECT r.name region, s.name rep, a.name account
FROM sales_reps AS s
JOIN region AS r
ON s.region_id = r.id
JOIN accounts AS a
ON s.id = a.sales_rep_id
WHERE s.name LIKE 'S%'
AND r.name = 'Midwest'
ORDER BY a.name ASC
LIMIT 10;
region | rep | account |
---|
Midwest | Sherlene Wetherington | Community Health Systems |
Midwest | Sherlene Wetherington | Progressive |
Midwest | Sherlene Wetherington | Rite Aid |
Midwest | Sherlene Wetherington | Time Warner Cable |
Midwest | Sherlene Wetherington | U.S. Bancorp |
Provide a table that provides the region for each sales_rep along with their associated accounts.
This time only for accounts where the sales rep has a last name starting with K and in the Midwest region.
Your final table should include three columns: the region name, the sales rep name, and the account name.
Sort the accounts alphabetically (A-Z) according to account name.
%%sql
SELECT r.name region, s.name rep, a.name account
FROM region r
JOIN sales_reps s
ON r.id = s.region_id
JOIN accounts a
ON r.id = a.sales_rep_id
WHERE s.name LIKE 'K%'
AND r.name = 'Midwest'
ORDER BY a.name
LIMIT 10;
SQL Aggregations
If the COUNT result of a column is less than the number of rows in the table, we know that difference is the number of NULLs.
%sql SELECT COUNT(*) AS account_count FROM accounts;
%sql SELECT COUNT(id) AS account_id_count FROM accounts;
Unlike COUNT, you can only use SUM on numeric columns. However, SUM will ignore NULL values, as do the other aggregation functions.
%%sql
SELECT SUM(standard_qty) AS standard,
SUM(gloss_qty) AS gloss,
SUM(poster_qty) AS poster
FROM orders;
standard | gloss | poster |
---|
1938346 | 1013773 | 723646 |
Find the total amount spent on standard_amt_usd and gloss_amt_usd paper for each order in the orders table.
This should give a dollar amount for each order in the table.
%%sql
SELECT SUM(standard_amt_usd) + SUM(gloss_amt_usd) AS total_usd
FROM orders
LIMIT 10;
Find the standard_amt_usd per unit of standard_qty paper.
Your solution should use both an aggregation and a mathematical operator.
%%sql
SELECT SUM(standard_amt_usd)/SUM(standard_qty) AS standard_price_per_unit
FROM orders
LIMIT 10;
standard_price_per_unit |
---|
4.9900000000000000 |
Functionally, MIN and MAX are similar to COUNT in that they can be used on non-numerical columns.
Depending on the column type, MIN will return the lowest number, earliest date, or non-numerical value as early in the alphabet as possible.
As you might suspect, MAX does the opposite—it returns the highest number, the latest date, or the non-numerical value closest alphabetically to “Z.”
%%sql
SELECT MIN(standard_qty) AS standard_min,
MAX(standard_qty) AS standard_max
FROM orders
LIMIT 10;
standard_min | standard_max |
---|
0 | 22591 |
Working with CASE statements
%%sql
SELECT
o.id AS order_id,
a.id AS account_id,
SUM(o.total_amt_usd) AS total_sales,
CASE
WHEN SUM(o.total_amt_usd) >= 3000 THEN 'Large'
ELSE 'Small'
END AS order_size
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY 1,
2
ORDER BY 3 DESC, 1, 2
LIMIT 10;
order_id | account_id | total_sales | order_size |
---|
4016 | 4251 | 232207.07 | Large |
3892 | 4161 | 112875.18 | Large |
3963 | 4211 | 107533.55 | Large |
5791 | 2861 | 95005.82 | Large |
3778 | 4101 | 93547.84 | Large |
6590 | 4111 | 93505.69 | Large |
362 | 1301 | 93106.81 | Large |
731 | 1521 | 92991.05 | Large |
4562 | 1341 | 84099.62 | Large |
3858 | 4151 | 82163.71 | Large |
%%sql
SELECT
CASE
WHEN total > 2000 THEN 'At Least 2000'
WHEN total BETWEEN 1000 AND 2000 THEN 'Between 1000 and 2000'
ELSE 'Less than 1000'
END AS order_size,
COUNT(*) AS count
FROM orders
GROUP BY 1
LIMIT 10;
order_size | count |
---|
Between 1000 and 2000 | 511 |
Less than 1000 | 6331 |
At Least 2000 | 70 |
%%sql
SELECT
a.name,
SUM(o.total_amt_usd) total_sales,
CASE
WHEN SUM(o.total_amt_usd) > 200000 THEN '200,000+'
WHEN SUM(o.total_amt_usd) BETWEEN 100000 AND 200000 THEN 'Between 100,000 and 200,000'
ELSE 'Under 100,000'
END AS group_level
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
name | total_sales | group_level |
---|
EOG Resources | 382873.30 | 200,000+ |
Mosaic | 345618.59 | 200,000+ |
IBM | 326819.48 | 200,000+ |
General Dynamics | 300694.79 | 200,000+ |
Republic Services | 293861.14 | 200,000+ |
Leucadia National | 291047.25 | 200,000+ |
Arrow Electronics | 281018.36 | 200,000+ |
Sysco | 278575.64 | 200,000+ |
Supervalu | 275288.30 | 200,000+ |
Archer Daniels Midland | 272672.84 | 200,000+ |
%%sql
SELECT
a.name,
SUM(o.total_amt_usd) total_sales,
CASE
WHEN SUM(o.total_amt_usd) > 200000 THEN '200,000+'
WHEN SUM(o.total_amt_usd) BETWEEN 100000 AND 200000 THEN 'Between 100,000 and 200,000'
ELSE 'Under 100,000'
END AS group_level
FROM accounts a
JOIN orders o
ON a.id = o.account_id
WHERE date_part('year', o.occurred_at) IN (2016, 2017)
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
name | total_sales | group_level |
---|
Pacific Life | 255319.18 | 200,000+ |
Mosaic | 172180.04 | Between 100,000 and 200,000 |
CHS | 163471.78 | Between 100,000 and 200,000 |
Core-Mark Holding | 148105.93 | Between 100,000 and 200,000 |
Disney | 129157.38 | Between 100,000 and 200,000 |
National Oilwell Varco | 121873.16 | Between 100,000 and 200,000 |
Sears Holdings | 114003.21 | Between 100,000 and 200,000 |
State Farm Insurance Cos. | 111810.55 | Between 100,000 and 200,000 |
Fidelity National Financial | 110027.29 | Between 100,000 and 200,000 |
BB&T Corp. | 107300.05 | Between 100,000 and 200,000 |
%%sql
SELECT
s.name AS rep_name,
COUNT(o.id) AS order_count,
CASE
WHEN COUNT(o.id) > 200 THEN 'Top'
ELSE 'Not'
END AS rep_level
FROM sales_reps s
JOIN accounts a
ON s.id = a.sales_rep_id
JOIN orders o
ON a.id = o.account_id
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
rep_name | order_count | rep_level |
---|
Earlie Schleusner | 335 | Top |
Vernita Plump | 299 | Top |
Tia Amato | 267 | Top |
Georgianna Chisholm | 256 | Top |
Moon Torian | 250 | Top |
Nelle Meaux | 241 | Top |
Maren Musto | 224 | Top |
Dorotha Seawell | 208 | Top |
Charles Bidwell | 205 | Top |
Maryanna Fiorentino | 204 | Top |
%%sql
SELECT
s.name AS rep_name,
COUNT(o.id) AS order_count,
SUM(o.total_amt_usd) AS total_sales,
CASE
WHEN COUNT(o.id) > 200 OR
SUM(o.total_amt_usd) > 750000 THEN 'top'
WHEN COUNT(o.id) > 150 OR
SUM(o.total_amt_usd) > 500000 THEN 'middle'
ELSE 'Low'
END AS rep_level
FROM sales_reps s
JOIN accounts a
ON s.id = a.sales_rep_id
JOIN orders o
ON a.id = o.account_id
GROUP BY 1
ORDER BY 3 DESC
LIMIT 10;
rep_name | order_count | total_sales | rep_level |
---|
Earlie Schleusner | 335 | 1098137.72 | top |
Tia Amato | 267 | 1010690.60 | top |
Vernita Plump | 299 | 934212.93 | top |
Georgianna Chisholm | 256 | 886244.12 | top |
Arica Stoltzfus | 186 | 810353.34 | top |
Dorotha Seawell | 208 | 766935.04 | top |
Nelle Meaux | 241 | 749076.16 | top |
Sibyl Lauria | 193 | 722084.27 | middle |
Maren Musto | 224 | 702697.29 | top |
Brandie Riva | 167 | 675917.64 | middle |
Working with Dates
%%sql
SELECT
date_part('year', occurred_at) AS sales_year,
SUM(total_amt_usd) AS total_sales
FROM orders o
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
sales_year | total_sales |
---|
2016.0 | 12864917.92 |
2015.0 | 5752004.94 |
2014.0 | 4069106.54 |
2013.0 | 377331.00 |
2017.0 | 78151.43 |
%%sql
SELECT
date_part('month', occurred_at) AS sales_month,
SUM(total_amt_usd) AS total_sales
FROM orders o
GROUP BY 1
ORDER BY 2
LIMIT 10;
sales_month | total_sales |
---|
2.0 | 1312616.64 |
1.0 | 1337661.87 |
5.0 | 1537082.23 |
4.0 | 1562037.74 |
3.0 | 1659987.88 |
6.0 | 1871118.52 |
8.0 | 1918107.22 |
7.0 | 1978731.15 |
9.0 | 2017216.88 |
11.0 | 2390033.75 |
%%sql
SELECT
date_part('year', occurred_at) AS sales_year,
SUM(total_amt_usd) AS total_sales
FROM orders o
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
sales_year | total_sales |
---|
2016.0 | 12864917.92 |
2015.0 | 5752004.94 |
2014.0 | 4069106.54 |
2013.0 | 377331.00 |
2017.0 | 78151.43 |
%%sql
SELECT
date_part('month', occurred_at) AS sales_month,
COUNT(o.id) AS total_orders
FROM orders o
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
sales_month | total_orders |
---|
12.0 | 882 |
11.0 | 713 |
10.0 | 675 |
8.0 | 603 |
9.0 | 602 |
7.0 | 571 |
6.0 | 527 |
5.0 | 518 |
3.0 | 482 |
4.0 | 472 |
%%sql
SELECT
a.name,
date_part('year', occurred_at) AS _year,
date_part('month', occurred_at) AS _month,
SUM(o.gloss_amt_usd) AS total_gloss_saless
FROM orders o
JOIN accounts a
ON a.id = o.account_id
WHERE a.name = 'Walmart'
GROUP BY 1,
2,
3
ORDER BY total_gloss_saless DESC
LIMIT 10;
name | _year | _month | total_gloss_saless |
---|
Walmart | 2016.0 | 5.0 | 9257.64 |
Walmart | 2016.0 | 1.0 | 5070.73 |
Walmart | 2015.0 | 11.0 | 4890.97 |
Walmart | 2016.0 | 4.0 | 4875.99 |
Walmart | 2015.0 | 12.0 | 4823.56 |
Walmart | 2016.0 | 3.0 | 4711.21 |
Walmart | 2016.0 | 9.0 | 4673.76 |
Walmart | 2016.0 | 2.0 | 4673.76 |
Walmart | 2016.0 | 8.0 | 4531.45 |
Walmart | 2016.0 | 11.0 | 4359.18 |
HAVING clause
%%sql
SELECT
s.name,
COUNT(*) num_accts
FROM sales_reps s
JOIN accounts a
ON s.id = a.sales_rep_id
GROUP BY 1
HAVING COUNT(*) > 5
LIMIT 10;
name | num_accts |
---|
Samuel Racine | 6 |
Elwood Shutt | 9 |
Michel Averette | 7 |
Brandie Riva | 10 |
Elba Felder | 6 |
Nelle Meaux | 7 |
Necole Victory | 6 |
Saran Ram | 10 |
Moon Torian | 10 |
Sibyl Lauria | 6 |
%%sql
SELECT
a.name AS account_name,
COUNT(o.id) AS order_id
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY account_name
HAVING COUNT(o.id) > 20
LIMIT 10;
account_name | order_id |
---|
Monsanto | 57 |
KKR | 55 |
Performance Food Group | 21 |
Paccar | 46 |
CST Brands | 49 |
Reynolds American | 23 |
Sears Holdings | 30 |
Aetna | 51 |
Gilead Sciences | 27 |
Altria Group | 51 |
%%sql
SELECT
a.name AS account_name,
COUNT(o.id) AS order_id
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY account_name
HAVING COUNT(o.id) > 20
ORDER BY order_id DESC
LIMIT 10;
account_name | order_id |
---|
Leucadia National | 71 |
Supervalu | 68 |
Sysco | 68 |
Arrow Electronics | 67 |
Mosaic | 66 |
Archer Daniels Midland | 66 |
General Dynamics | 66 |
Fluor | 65 |
Western Digital | 65 |
Philip Morris International | 65 |
%%sql
SELECT
a.name AS account_name,
SUM(o.total_amt_usd) total_usd
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY a.name
HAVING SUM(o.total_amt_usd) > 30000
ORDER BY total_usd DESC
LIMIT 10;
account_name | total_usd |
---|
EOG Resources | 382873.30 |
Mosaic | 345618.59 |
IBM | 326819.48 |
General Dynamics | 300694.79 |
Republic Services | 293861.14 |
Leucadia National | 291047.25 |
Arrow Electronics | 281018.36 |
Sysco | 278575.64 |
Supervalu | 275288.30 |
Archer Daniels Midland | 272672.84 |
%%sql
SELECT
a.name AS account_name,
SUM(o.total_amt_usd) total_usd
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY a.name
HAVING SUM(o.total_amt_usd) < 10000
ORDER BY total_usd DESC
LIMIT 10;
account_name | total_usd |
---|
General Motors | 9995.05 |
AIG | 9980.93 |
O'Reilly Automotive | 9914.23 |
WellCare Health Plans | 9743.13 |
CBRE Group | 9715.71 |
Newmont Mining | 9618.81 |
Macy's | 9613.32 |
Devon Energy | 9536.55 |
Eli Lilly | 9339.20 |
Xerox | 8759.93 |
%%sql
SELECT
a.name AS account_name,
SUM(o.total_amt_usd) total_usd
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY a.name
ORDER BY total_usd DESC
LIMIT 10;
account_name | total_usd |
---|
EOG Resources | 382873.30 |
Mosaic | 345618.59 |
IBM | 326819.48 |
General Dynamics | 300694.79 |
Republic Services | 293861.14 |
Leucadia National | 291047.25 |
Arrow Electronics | 281018.36 |
Sysco | 278575.64 |
Supervalu | 275288.30 |
Archer Daniels Midland | 272672.84 |
%%sql
SELECT
a.name AS account_name,
SUM(o.total_amt_usd) total_usd
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY a.name
ORDER BY total_usd ASC
LIMIT 10;
account_name | total_usd |
---|
Nike | 390.25 |
Delta Air Lines | 859.64 |
Level 3 Communications | 881.73 |
Deere | 1036.57 |
Bed Bath & Beyond | 1069.64 |
Las Vegas Sands | 1113.29 |
Assurant | 1235.81 |
Ball | 1982.74 |
Priceline Group | 2129.24 |
Bank of New York Mellon Corp. | 2155.98 |
%%sql
SELECT
a.name AS account_name,
w.channel,
COUNT(w.channel) channel_cnt
FROM accounts a
JOIN web_events w
ON a.id = w.account_id
WHERE w.channel = 'facebook'
GROUP BY a.name,
w.channel
HAVING COUNT(w.channel) > 6
LIMIT 10;
account_name | channel | channel_cnt |
---|
FirstEnergy | facebook | 10 |
Aetna | facebook | 10 |
Fluor | facebook | 8 |
Cisco Systems | facebook | 8 |
Lithia Motors | facebook | 10 |
General Mills | facebook | 9 |
Wells Fargo | facebook | 7 |
PayPal Holdings | facebook | 8 |
United Continental Holdings | facebook | 11 |
Laboratory Corp. of America | facebook | 7 |
%%sql
SELECT
a.name AS account_name,
w.channel,
COUNT(w.channel) channel_cnt
FROM accounts a
JOIN web_events w
ON a.id = w.account_id
WHERE w.channel = 'facebook'
GROUP BY a.name,
w.channel
ORDER BY channel_cnt DESC
LIMIT 10;
account_name | channel | channel_cnt |
---|
Gilead Sciences | facebook | 16 |
TJX | facebook | 15 |
AutoNation | facebook | 14 |
Charter Communications | facebook | 13 |
Disney | facebook | 12 |
Marathon Petroleum | facebook | 12 |
Philip Morris International | facebook | 12 |
United Continental Holdings | facebook | 11 |
EOG Resources | facebook | 11 |
Ecolab | facebook | 11 |
%%sql
SELECT
a.name AS account_name,
w.channel AS channel_name,
COUNT(w.channel) channel_cnt
FROM accounts a
JOIN web_events w
ON a.id = w.account_id
GROUP BY a.name,
w.channel
ORDER BY channel_cnt DESC, channel_name, account_name
LIMIT 10;
account_name | channel_name | channel_cnt |
---|
Leucadia National | direct | 52 |
Colgate-Palmolive | direct | 51 |
New York Life Insurance | direct | 51 |
Philip Morris International | direct | 49 |
ADP | direct | 48 |
AutoNation | direct | 48 |
BlackRock | direct | 48 |
Charter Communications | direct | 48 |
FirstEnergy | direct | 48 |
Altria Group | direct | 47 |
CONCAT
Each company in the accounts table wants to create an email address for each primary_poc. The email address should be the first name of the primary_poc . last name primary_poc @ company name .com.
%%sql
SELECT
concat
(
-- first name
LEFT(a.primary_poc, strpos(a.primary_poc, ' ') - 1),
'.',
-- last name
substr(a.primary_poc, strpos(a.primary_poc, ' ') + 1),
'@',
a.name,
'.com'
)
FROM accounts a
LIMIT 10;
concat |
---|
Tamara.Tuma@Walmart.com |
Sung.Shields@Exxon Mobil.com |
Jodee.Lupo@Apple.com |
Serafina.Banda@Berkshire Hathaway.com |
Angeles.Crusoe@McKesson.com |
Savanna.Gayman@UnitedHealth Group.com |
Anabel.Haskell@CVS Health.com |
Barrie.Omeara@General Motors.com |
Kym.Hagerman@Ford Motor.com |
Jamel.Mosqueda@AT&T.com |
You may have noticed that in the previous question some of the company names include spaces, which will certainly not work in an email address. See if you can create an email address that will work by removing all of the spaces in the account name, but otherwise your solution should be just as above. Some helpful documentation is here: https://www.postgresql.org/docs/8.1/functions-string.html
%%sql
SELECT
concat
(
-- poc first name
LEFT(a.primary_poc, strpos(a.primary_poc, ' ') - 1),
'.',
-- poc last name
substr(a.primary_poc, strpos(a.primary_poc, ' ') + 1),
'@',
-- company name
REPLACE(a.name, ' ', ''),
'.com'
)
FROM accounts a
LIMIT 10;
concat |
---|
Tamara.Tuma@Walmart.com |
Sung.Shields@ExxonMobil.com |
Jodee.Lupo@Apple.com |
Serafina.Banda@BerkshireHathaway.com |
Angeles.Crusoe@McKesson.com |
Savanna.Gayman@UnitedHealthGroup.com |
Anabel.Haskell@CVSHealth.com |
Barrie.Omeara@GeneralMotors.com |
Kym.Hagerman@FordMotor.com |
Jamel.Mosqueda@AT&T.com |
We would also like to create an initial password, which they will change after their first log in. The first password will be:
- the first letter of the primary_poc's first name (lowercase), then
- the last letter of their first name (lowercase),
- the first letter of their last name (lowercase),
- the last letter of their last name (lowercase),
- the number of letters in their first name,
- the number of letters in their last name, and then
- the name of the company they are working with, all capitalized with no spaces.
%%sql
SELECT
concat
(
LOWER(LEFT(a.primary_poc, 1)),
LOWER(substr(a.primary_poc, strpos(a.primary_poc, ' ') - 1, 1)),
LOWER(substr(a.primary_poc, strpos(a.primary_poc, ' ') + 1, 1)),
LOWER(RIGHT(a.primary_poc, 1)),
length(substr(a.primary_poc, 1, strpos(a.primary_poc, ' ') - 1)),
length(substr(a.primary_poc, strpos(a.primary_poc, ' ') + 1)),
UPPER(REPLACE(a.name, ' ', ''))
)
FROM accounts a
LIMIT 10;
concat |
---|
tata64WALMART |
sgss47EXXONMOBIL |
jelo54APPLE |
saba85BERKSHIREHATHAWAY |
asce76MCKESSON |
sagn76UNITEDHEALTHGROUP |
alhl67CVSHEALTH |
beoa66GENERALMOTORS |
kmhn38FORDMOTOR |
jlma58AT&T |
Suppose the company wants to assess the performance of all the sales representatives.
Each sales representative is assigned to work in a particular region.
To make it easier to understand for the HR team, display the concatenated salesreps.id, ‘’ (underscore),
and region.name as EMP_ID_REGION for each sales representative.
%%sql
SELECT
concat(s.id, '_', r.name)
FROM sales_reps s
JOIN region r
ON r.id = s.region_id
LIMIT 10;
concat |
---|
321500_Northeast |
321510_Northeast |
321520_Northeast |
321530_Northeast |
321540_Northeast |
321550_Northeast |
321560_Northeast |
321570_Northeast |
321580_Northeast |
321590_Northeast |
From the accounts table, display:
- the name of the client,
- the coordinate as concatenated (latitude, longitude),
- email id of the primary point of contact as
first letter of the primary_poc
last letter of the primary_poc
@extracted name and domain from the website
.
%%sql
SELECT
a.name,
concat('(', a.lat, ',', a.long, ')') AS lat_long,
concat(LEFT(a.primary_poc, 1), RIGHT(a.primary_poc, 1), '@', substr(a.website, 5)) AS poc_email
FROM accounts a
LIMIT 10;
name | lat_long | poc_email |
---|
Walmart | (40.23849561,-75.10329704) | Ta@walmart.com |
Exxon Mobil | (41.16915630,-73.84937379) | Ss@exxonmobil.com |
Apple | (42.29049481,-76.08400942) | Jo@apple.com |
Berkshire Hathaway | (40.94902131,-75.76389759) | Sa@berkshirehathaway.com |
McKesson | (42.21709326,-75.28499823) | Ae@mckesson.com |
UnitedHealth Group | (40.08792542,-75.57569396) | Sn@unitedhealthgroup.com |
CVS Health | (41.46779585,-73.76763638) | Al@cvshealth.com |
General Motors | (40.80551762,-76.71018140) | Ba@gm.com |
Ford Motor | (41.11394200,-75.85422452) | Kn@ford.com |
AT&T | (42.49746270,-74.90271225) | Ja@att.com |
From the webevents table, display the concatenated value of account_id, '' , channel, '_', count of web events of the particular channel.
%%sql
WITH t1
AS (SELECT
w1.account_id,
w1.channel,
COUNT(*) AS cnt
FROM web_events w1
GROUP BY 1,
2)
SELECT
concat(t1.account_id, '_', t1.channel, '_', t1.cnt)
FROM t1
LIMIT 10;
concat |
---|
2601_organic_3 |
2541_organic_8 |
2461_facebook_8 |
1121_organic_7 |
2191_adwords_2 |
1761_adwords_1 |
4301_organic_2 |
2051_organic_9 |
3631_twitter_2 |
2741_adwords_2 |
Window function
%%sql
SELECT id,
account_id,
DATE_TRUNC('year',occurred_at) AS year,
DENSE_RANK() OVER (PARTITION BY account_id ORDER BY DATE_TRUNC('year',occurred_at)) AS dense_rank,
total_amt_usd,
SUM(total_amt_usd) OVER account_year_window AS sum_total_amt_usd,
COUNT(total_amt_usd) OVER account_year_window AS count_total_amt_usd,
AVG(total_amt_usd) OVER account_year_window AS avg_total_amt_usd,
MIN(total_amt_usd) OVER account_year_window AS min_total_amt_usd,
MAX(total_amt_usd) OVER account_year_window AS max_total_amt_usd
FROM orders
window account_year_window as
(PARTITION BY account_id ORDER BY DATE_TRUNC('year',occurred_at))
LIMIT 10;
id | account_id | year | dense_rank | total_amt_usd | sum_total_amt_usd | count_total_amt_usd | avg_total_amt_usd | min_total_amt_usd | max_total_amt_usd |
---|
4307 | 1001 | 2015-01-01 00:00:00 | 1 | 8757.18 | 21651.53 | 5 | 4330.3060000000000000 | 776.18 | 9426.71 |
3 | 1001 | 2015-01-01 00:00:00 | 1 | 776.18 | 21651.53 | 5 | 4330.3060000000000000 | 776.18 | 9426.71 |
2 | 1001 | 2015-01-01 00:00:00 | 1 | 1718.03 | 21651.53 | 5 | 4330.3060000000000000 | 776.18 | 9426.71 |
1 | 1001 | 2015-01-01 00:00:00 | 1 | 973.43 | 21651.53 | 5 | 4330.3060000000000000 | 776.18 | 9426.71 |
4308 | 1001 | 2015-01-01 00:00:00 | 1 | 9426.71 | 21651.53 | 5 | 4330.3060000000000000 | 776.18 | 9426.71 |
6 | 1001 | 2016-01-01 00:00:00 | 2 | 1067.25 | 124014.87 | 28 | 4429.1025000000000000 | 752.57 | 9426.71 |
5 | 1001 | 2016-01-01 00:00:00 | 2 | 983.49 | 124014.87 | 28 | 4429.1025000000000000 | 752.57 | 9426.71 |
4 | 1001 | 2016-01-01 00:00:00 | 2 | 958.24 | 124014.87 | 28 | 4429.1025000000000000 | 752.57 | 9426.71 |
4318 | 1001 | 2016-01-01 00:00:00 | 2 | 7924.46 | 124014.87 | 28 | 4429.1025000000000000 | 752.57 | 9426.71 |
4309 | 1001 | 2016-01-01 00:00:00 | 2 | 9230.67 | 124014.87 | 28 | 4429.1025000000000000 | 752.57 | 9426.71 |
Create a running total of standard_amt_usd (in the orders table) over order time with no date truncation. Your final table should have two columns: one with the amount being added for each new row, and a second with the running total.
%%sql
SELECT standard_amt_usd,
SUM(standard_amt_usd) OVER (ORDER BY occurred_at) AS running_total
FROM orders
LIMIT 10;
standard_amt_usd | running_total |
---|
0.00 | 0.00 |
2445.10 | 2445.10 |
2634.72 | 5079.82 |
0.00 | 5079.82 |
2455.08 | 7534.90 |
2504.98 | 10039.88 |
264.47 | 10304.35 |
1536.92 | 11841.27 |
374.25 | 12215.52 |
1402.19 | 13617.71 |
Now, modify your query from the previous question to include partitions. Still create a running total of standard_amt_usd (in the orders table) over order time, but this time, date truncate occurred_at by year and partition by that same year-truncated occurred_at variable. Your final table should have three columns: One with the amount being added for each row, one for the truncated date, and a final column with the running total within each year.
%%sql
SELECT standard_amt_usd, date_trunc('year', occurred_at) as year,
SUM(standard_amt_usd) OVER (partition by date_trunc('year', occurred_at) order by occurred_at) AS running_total
FROM orders
LIMIT 10;
standard_amt_usd | year | running_total |
---|
0.00 | 2013-01-01 00:00:00 | 0.00 |
2445.10 | 2013-01-01 00:00:00 | 2445.10 |
2634.72 | 2013-01-01 00:00:00 | 5079.82 |
0.00 | 2013-01-01 00:00:00 | 5079.82 |
2455.08 | 2013-01-01 00:00:00 | 7534.90 |
2504.98 | 2013-01-01 00:00:00 | 10039.88 |
264.47 | 2013-01-01 00:00:00 | 10304.35 |
1536.92 | 2013-01-01 00:00:00 | 11841.27 |
374.25 | 2013-01-01 00:00:00 | 12215.52 |
1402.19 | 2013-01-01 00:00:00 | 13617.71 |
Subqueries
%%sql
SELECT
date_trunc('day', w.occurred_at) AS day,
w.channel AS channel,
COUNT(*) AS event_count
FROM web_events w
GROUP BY 1,
2
ORDER BY 3 DESC
LIMIT 10;
day | channel | event_count |
---|
2017-01-01 00:00:00 | direct | 21 |
2016-12-21 00:00:00 | direct | 21 |
2016-12-31 00:00:00 | direct | 19 |
2016-11-03 00:00:00 | direct | 18 |
2016-12-28 00:00:00 | direct | 17 |
2016-10-28 00:00:00 | direct | 16 |
2016-12-20 00:00:00 | direct | 16 |
2016-10-29 00:00:00 | direct | 15 |
2016-12-26 00:00:00 | direct | 15 |
2016-12-02 00:00:00 | direct | 15 |
Num events for each day for each channel
%%sql
SELECT
date_trunc('day', w.occurred_at) AS day,
w.channel AS channel,
COUNT(*) AS event_count
FROM web_events w
GROUP BY 1,
2
ORDER BY 3 DESC
LIMIT 10;
day | channel | event_count |
---|
2017-01-01 00:00:00 | direct | 21 |
2016-12-21 00:00:00 | direct | 21 |
2016-12-31 00:00:00 | direct | 19 |
2016-11-03 00:00:00 | direct | 18 |
2016-12-28 00:00:00 | direct | 17 |
2016-10-28 00:00:00 | direct | 16 |
2016-12-20 00:00:00 | direct | 16 |
2016-10-29 00:00:00 | direct | 15 |
2016-12-26 00:00:00 | direct | 15 |
2016-12-02 00:00:00 | direct | 15 |
Avg events per channel
%%sql
SELECT
sub.channel,
ROUND(AVG(sub.event_count), 2)
FROM (
-- number of events for each day for each channel
SELECT
date_trunc('day', w.occurred_at) AS day,
w.channel AS channel,
COUNT(*) AS event_count
FROM web_events w
GROUP BY 1,
2
ORDER BY 3 DESC) sub
GROUP BY 1
LIMIT 10;
channel | round |
---|
facebook | 1.60 |
organic | 1.67 |
twitter | 1.32 |
adwords | 1.57 |
banner | 1.29 |
direct | 4.90 |
Provide the name of the sales_rep in each region with the largest amount of total_amt_usd sales.
%%sql
SELECT
t3.region_name,
t3.rep_name,
t3.total_sales
FROM (
-- sales for region by rep
SELECT
r.name AS region_name,
sr.name AS rep_name,
SUM(o.total_amt_usd) total_sales
FROM accounts a
JOIN orders o
ON a.id = o.account_id
JOIN sales_reps sr
ON a.sales_rep_id = sr.id
JOIN region r
ON sr.region_id = r.id
GROUP BY 1,
2) AS t3
JOIN (SELECT
t1.region_name,
MAX(t1.total_sales) AS total_sales
FROM (
-- sales for region by rep
SELECT
r.name AS region_name,
sr.name AS rep_name,
SUM(o.total_amt_usd) total_sales
FROM accounts a
JOIN orders o
ON a.id = o.account_id
JOIN sales_reps sr
ON a.sales_rep_id = sr.id
JOIN region r
ON sr.region_id = r.id
GROUP BY 1,
2) AS t1
GROUP BY 1) AS t2
ON t3.region_name = t2.region_name
AND t2.total_sales = t3.total_sales
LIMIT 10;
region_name | rep_name | total_sales |
---|
Midwest | Charles Bidwell | 675637.19 |
Northeast | Tia Amato | 1010690.60 |
Southeast | Earlie Schleusner | 1098137.72 |
West | Georgianna Chisholm | 886244.12 |
For the region with the largest (sum) of sales total_amt_usd, how many total (count) orders were placed?
%%sql
SELECT
r.name AS region_name,
COUNT(o.total) AS ttl_cnt
FROM orders o
JOIN accounts a
ON o.account_id = a.id
JOIN sales_reps s
ON a.sales_rep_id = s.id
JOIN region r
ON s.region_id = r.id
WHERE r.name = (SELECT
t1.name
FROM (
-- region with the most sales
SELECT
r.name,
SUM(o.total_amt_usd) AS ttl_sls
FROM orders o
JOIN accounts a
ON o.account_id = a.id
JOIN sales_reps s
ON a.sales_rep_id = s.id
JOIN region r
ON s.region_id = r.id
GROUP BY 1
ORDER BY 2 DESC LIMIT 1) AS t1)
GROUP BY 1;
region_name | ttl_cnt |
---|
Northeast | 2357 |
How many accounts had more total purchases than the account name which has bought the most standard_qty paper throughout their lifetime as a customer?
%%sql
SELECT
a.name,
SUM(o.total) AS total_qty
FROM orders o
JOIN accounts a
ON a.id = o.account_id
GROUP BY 1
HAVING SUM(o.total) > (
-- what is the max standard_qty purchased by an account
SELECT
t1.ttl_qty
FROM (
-- which account has bought the most standard_qty paper throughout their lifetime as a customer?
SELECT
a.name,
SUM(o.standard_qty) AS max_std_qty,
SUM(o.total) AS ttl_qty
FROM orders o
JOIN accounts a
ON o.account_id = a.id
GROUP BY 1
ORDER BY 2 DESC LIMIT 1) AS t1);
name | total_qty |
---|
EOG Resources | 56410 |
Mosaic | 49246 |
IBM | 47506 |
For the customer that spent the most (in total over their lifetime as a customer) total_amt_usd, how many web_events did they have for each channel?
%%sql
SELECT
a.name,
a.id,
we.channel,
COUNT(*) AS cnt
FROM accounts a
JOIN web_events we
ON a.id = we.account_id
WHERE a.id = (
-- the id of the customer with the max total spending
SELECT
t1.id
FROM (
-- the customer with the max total spending
SELECT
a.name,
a.id,
SUM(o.total_amt_usd)
FROM orders o
JOIN accounts a
ON a.id = o.account_id
GROUP BY 1,
2
ORDER BY 3 DESC LIMIT 1) AS t1)
GROUP BY 1,
2,
3
name | id | channel | cnt |
---|
EOG Resources | 4211 | adwords | 12 |
EOG Resources | 4211 | banner | 4 |
EOG Resources | 4211 | direct | 44 |
EOG Resources | 4211 | facebook | 11 |
EOG Resources | 4211 | organic | 13 |
EOG Resources | 4211 | twitter | 5 |
What is the lifetime average amount spent in terms of total_amt_usd for the top 10 total spending accounts?
%%sql
SELECT
AVG(t2.total_sls_usd) AS avg_sls_for_top_10
FROM (
-- top 10 accounts by total spending
SELECT
a.id,
a.name,
SUM(o.total_amt_usd) AS total_sls_usd
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY 1,
2
ORDER BY 3 DESC LIMIT 10) AS t2;
avg_sls_for_top_10 |
---|
304846.969000000000 |
What is the lifetime average amount spent in terms of total_amt_usd, including only the companies that spent more per order, on average, than the average of all orders.
%%sql
SELECT
AVG(tmp.avg_sls_for_company)
FROM (SELECT
a.name,
ROUND(AVG(o.total_amt_usd), 2) AS avg_sls_for_company
FROM orders o
JOIN accounts a
ON a.id = o.account_id
GROUP BY 1
HAVING ROUND(AVG(o.total_amt_usd), 2) > (
-- average total_amt_usd per order for all orders
SELECT
ROUND(AVG(o1.total_amt_usd), 2) AS avg_ttl_per_order
FROM orders o1)) tmp;
Provide the name of the sales_rep in each region with the largest amount of total_amt_usd sales.
%%sql
WITH sales_for_region_by_rep
AS (
-- total sales by region for rep
SELECT
r.name AS region_name,
sr.name AS rep_name,
SUM(o.total_amt_usd) total_sales
FROM accounts a
JOIN orders o
ON a.id = o.account_id
JOIN sales_reps sr
ON a.sales_rep_id = sr.id
JOIN region r
ON sr.region_id = r.id
GROUP BY 1,
2
ORDER BY 3 DESC),
max_sales_for_region
AS (
-- max sales by region for rep, rep name removed
SELECT
t1.region_name,
MAX(t1.total_sales) AS total_sales
FROM (
-- sales for region by rep
SELECT
r.name AS region_name,
sr.name AS rep_name,
SUM(o.total_amt_usd) total_sales
FROM accounts a
JOIN orders o
ON a.id = o.account_id
JOIN sales_reps sr
ON a.sales_rep_id = sr.id
JOIN region r
ON sr.region_id = r.id
GROUP BY 1,
2) AS t1
GROUP BY 1)
SELECT
sales_for_region_by_rep.region_name,
sales_for_region_by_rep.rep_name,
sales_for_region_by_rep.total_sales
FROM sales_for_region_by_rep
JOIN max_sales_for_region
ON sales_for_region_by_rep.region_name = max_sales_for_region.region_name
AND sales_for_region_by_rep.total_sales = max_sales_for_region.total_sales
LIMIT 10;
region_name | rep_name | total_sales |
---|
Midwest | Charles Bidwell | 675637.19 |
Northeast | Tia Amato | 1010690.60 |
Southeast | Earlie Schleusner | 1098137.72 |
West | Georgianna Chisholm | 886244.12 |
For the region with the largest (sum) of sales total_amt_usd, how many total (count) orders were placed?
%%sql
WITH region_with_max_sales_and_sales
AS (
-- region name with the most sales
SELECT
t1.name
FROM (
-- region name with the most sales and sales total
SELECT
r.name,
SUM(o.total_amt_usd) AS ttl_sls
FROM orders o
JOIN accounts a
ON o.account_id = a.id
JOIN sales_reps s
ON a.sales_rep_id = s.id
JOIN region r
ON s.region_id = r.id
GROUP BY 1
ORDER BY 2 DESC LIMIT 1) AS t1)
SELECT
r.name AS region_name,
COUNT(o.total) AS ttl_cnt
FROM orders o
JOIN accounts a
ON o.account_id = a.id
JOIN sales_reps s
ON a.sales_rep_id = s.id
JOIN region r
ON s.region_id = r.id
JOIN region_with_max_sales_and_sales rms
ON rms.name = r.name
GROUP BY 1;
region_name | ttl_cnt |
---|
Northeast | 2357 |
How many accounts had more total purchases than the account name which has bought the most standard_qty paper throughout their lifetime as a customer?
%%sql
WITH t2
AS (
-- which account has bought the most standard_qty paper throughout their lifetime as a customer?
SELECT
a.name,
SUM(o.standard_qty) AS max_std_qty,
SUM(o.total) AS ttl_qty
FROM orders o
JOIN accounts a
ON o.account_id = a.id
GROUP BY 1
ORDER BY 2 DESC LIMIT 1)
SELECT
a.name,
SUM(o.total) AS total_qty
FROM orders o
JOIN accounts a
ON a.id = o.account_id
GROUP BY 1
HAVING SUM(o.total) > (SELECT
t2.ttl_qty
FROM t2);
name | total_qty |
---|
EOG Resources | 56410 |
Mosaic | 49246 |
IBM | 47506 |
For the customer that spent the most (in total over their lifetime as a customer) total_amt_usd, how many web_events did they have for each channel?
%%sql
WITH t2
AS (
-- the customer with the max total spending
SELECT
a.name,
a.id,
SUM(o.total_amt_usd)
FROM orders o
JOIN accounts a
ON a.id = o.account_id
GROUP BY 1,
2
ORDER BY 3 DESC LIMIT 1)
SELECT
a.name,
a.id,
we.channel,
COUNT(*) AS cnt
FROM accounts a
JOIN web_events we
ON a.id = we.account_id
WHERE a.id = (SELECT
t2.id
FROM t2)
GROUP BY 1,
2,
3
LIMIT 10;
name | id | channel | cnt |
---|
EOG Resources | 4211 | adwords | 12 |
EOG Resources | 4211 | banner | 4 |
EOG Resources | 4211 | direct | 44 |
EOG Resources | 4211 | facebook | 11 |
EOG Resources | 4211 | organic | 13 |
EOG Resources | 4211 | twitter | 5 |
What is the lifetime average amount spent in terms of total_amt_usd for the top 10 total spending accounts?
%%sql
WITH t2
AS (
-- top 10 accounts by total spending
SELECT
a.id,
a.name,
SUM(o.total_amt_usd) AS total_sls_usd
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY 1,
2
ORDER BY 3 DESC LIMIT 10)
SELECT
AVG(t2.total_sls_usd) AS avg_sls_for_top_10
FROM t2;
avg_sls_for_top_10 |
---|
304846.969000000000 |
What is the lifetime average amount spent in terms of total_amt_usd, including only the companies that spent more per order, on average, than the average of all orders.
%%sql
WITH t2
AS (SELECT
a.name,
ROUND(AVG(o.total_amt_usd), 2) AS avg_sls_for_company
FROM orders o
JOIN accounts a
ON a.id = o.account_id
GROUP BY 1
HAVING ROUND(AVG(o.total_amt_usd), 2) > (
-- average total_amt_usd per order for all orders
SELECT
ROUND(AVG(o1.total_amt_usd), 2) AS avg_ttl_per_order
FROM orders o1))
SELECT
AVG(t2.avg_sls_for_company)
FROM t2
LIMIT 10;