Skip to main content

Ecommerce Data Analysis with SQL

!mkdir -p ~/.aws
!pip install -qq psycopg2-binary awscli boto3 s3fs
%%writefile ~/.aws/credentials
[default]
aws_access_key_id=
aws_secret_access_key=
region=us-east-1
output=json
import boto3
import json

%reload_ext sql
def get_secret(secret_name):
region_name = "us-east-1"
session = boto3.session.Session()
client = session.client(
service_name='secretsmanager',
region_name=region_name)
get_secret_value_response = client.get_secret_value(SecretId=secret_name)
get_secret_value_response = json.loads(get_secret_value_response['SecretString'])
return get_secret_value_response

db_credentials = get_secret(secret_name='wysde')

USERNAME = db_credentials["RDS_POSTGRES_USERNAME"]
PASSWORD = db_credentials["RDS_POSTGRES_PASSWORD"]
HOST = db_credentials["RDS_POSTGRES_HOST"]
PORT = 5432
DBNAME = "postgres"
CONN = f"postgresql://{USERNAME}:{PASSWORD}@{HOST}:{PORT}/{DBNAME}"

%sql {CONN}
# SCHEMA = "retail"
# %sql CREATE SCHEMA IF NOT EXISTS {SCHEMA}
# %sql SET search_path = {SCHEMA}

Basic SQL

LIMIT

Displays all the data in the occurred_at, account_id, and channel columns of the web_events table, and limits the output to only the first 15 rows

Note - LIMIT must be the last one.

%%sql
SELECT occurred_at, account_id, channel
FROM web_events
LIMIT 15;
occurred_ataccount_idchannel
2015-10-06 17:13:581001direct
2015-11-05 03:08:261001direct
2015-12-04 03:57:241001direct
2016-01-02 00:55:031001direct
2016-02-01 19:02:331001direct
2016-03-02 15:15:221001direct
2016-04-01 10:58:551001direct
2016-05-01 15:26:441001direct
2016-05-31 20:53:471001direct
2016-06-30 12:09:451001direct
2016-07-30 03:06:261001direct
2016-08-28 06:42:421001direct
2016-09-26 23:14:591001direct
2016-10-26 20:21:091001direct
2016-11-25 22:52:291001direct

ORDER BY

Write a query to return the 10 earliest orders in the orders table. Include the id, occurred_at, and total_amt_usd.

%%sql
SELECT id, occurred_at, total_amt_usd
FROM orders
ORDER BY occurred_at
LIMIT 10;
idoccurred_attotal_amt_usd
57862013-12-04 04:22:44627.48
24152013-12-04 04:45:542646.77
41082013-12-04 04:53:252709.62
44892013-12-05 20:29:16277.13
2872013-12-05 20:33:563001.85
19462013-12-06 02:13:202802.90
61972013-12-06 12:55:227009.18
31222013-12-06 12:57:411992.13
60782013-12-06 13:14:476680.06
29322013-12-06 13:17:252075.94

Write a query to return the top 5 orders in terms of largest total_amt_usd. Include the id, account_id, and total_amt_usd.

%%sql
SELECT id, account_id, total_amt_usd
FROM orders
ORDER BY total_amt_usd DESC
LIMIT 5;
idaccount_idtotal_amt_usd
40164251232207.07
38924161112875.18
39634211107533.55
5791286195005.82
3778410193547.84

Write a query to return the lowest 20 orders in terms of smallest total_amt_usd. Include the id, account_id, and total_amt_usd.

%%sql
SELECT id, account_id, total_amt_usd
FROM orders
ORDER BY total_amt_usd
Limit 20;
idaccount_idtotal_amt_usd
637536510.00
652339910.00
632335510.00
631235410.00
643538010.00
444612310.00
561226010.00
484415710.00
505718510.00
462514110.00
600931410.00
628134910.00
444512210.00
251828810.00
449012810.00
12411310.00
477015210.00
135319510.00
500117910.00
685644510.00

Write a query that displays the order ID, account ID, and total dollar amount for all the orders, sorted first by the account ID (in ascending order), and then by the total dollar amount (in descending order).

%%sql
SELECT id, account_id, total_amt_usd
FROM orders
ORDER BY account_id, total_amt_usd DESC
LIMIT 10;
idaccount_idtotal_amt_usd
430810019426.71
430910019230.67
431610019134.31
431710018963.91
431410018863.24
430710018757.18
431110018672.95
431010018538.26
431210018343.09
431310018311.59

Write a query that again displays order ID, account ID, and total dollar amount for each order, but this time sorted first by total dollar amount (in descending order), and then by account ID (in ascending order).

%%sql
SELECT id, account_id, total_amt_usd
FROM orders
ORDER BY total_amt_usd DESC, account_id
LIMIT 10;
idaccount_idtotal_amt_usd
40164251232207.07
38924161112875.18
39634211107533.55
5791286195005.82
3778410193547.84
6590411193505.69
362130193106.81
731152192991.05
4562134184099.62
3858415182163.71

WHERE

Pulls the first 5 rows and all columns from the orders table that have a dollar amount of gloss_amt_usd greater than or equal to 1000.

%%sql
SELECT *
FROM orders
WHERE gloss_amt_usd >= 1000
LIMIT 5;
idaccount_idoccurred_atstandard_qtygloss_qtyposter_qtytotalstandard_amt_usdgloss_amt_usdposter_amt_usdtotal_amt_usd
1410012016-10-26 20:31:309714354294484.031071.07438.481993.58
6210912014-10-13 12:12:551461963345728.541468.0424.362220.94
8811012015-06-24 13:08:1518233917538908.182539.11138.043585.33
12111312016-08-10 23:47:4127313404071362.271003.660.002365.93
12911412016-12-21 15:52:58143104521573345713.577827.0517514.8426055.46

Pulls the first 10 rows and all columns from the orders table that have a total_amt_usd less than 500.

%%sql
SELECT *
FROM orders
WHERE total_amt_usd < 500
LIMIT 10;
idaccount_idoccurred_atstandard_qtygloss_qtyposter_qtytotalstandard_amt_usdgloss_amt_usdposter_amt_usdtotal_amt_usd
6710912015-04-07 13:29:20950095474.050.000.00474.05
9611012016-03-15 11:36:03148163869.8659.92129.92259.70
11911312016-06-12 12:29:4503023530.00224.70186.76411.46
12411312016-11-07 05:10:5600000.000.000.000.00
25412512014-11-01 02:15:240017170.000.00138.04138.04
32812912015-08-03 08:35:2301921400.00142.31170.52312.83
54214212015-11-13 09:07:090640640.00479.360.00479.36
68315012016-04-14 23:59:5001516310.00112.35129.92242.27
71315212014-11-23 16:04:030810180.0059.9281.20141.12
73015212016-05-06 02:34:4800220.000.0016.2416.24

Filter the accounts table to include the company name, website, and the primary point of contact (primary_poc) just for the Exxon Mobil company in the accounts table.

%%sql
SELECT name, website, primary_poc
FROM accounts
WHERE name = 'Exxon Mobil'
namewebsiteprimary_poc
Exxon Mobilwww.exxonmobil.comSung Shields

Using the orders table: Create a column that divides the standard_amt_usd by the standard_qty to find the unit price for standard paper for each order. Limit the results to the first 10 orders, and include the id and account_id fields.

%%sql
SELECT id, account_id, (standard_amt_usd/standard_qty) AS unit_price
FROM orders
LIMIT 10;
idaccount_idunit_price
110014.9900000000000000
210014.9900000000000000
310014.9900000000000000
410014.9900000000000000
510014.9900000000000000
610014.9900000000000000
710014.9900000000000000
810014.9900000000000000
910014.9900000000000000
1010014.9900000000000000

Write a query that finds the percentage of revenue that comes from poster paper for each order. You will need to use only the columns that end with _usd. (Try to do this without using the total column.) Display the id and account_id fields also. NOTE - you will receive an error with the correct solution to this question. This occurs because at least one of the values in the data creates a division by zero in your formula. You will learn later how to fully handle this issue. For now, you can just limit your calculations to the first 10 orders, as we did in question #1, and you'll avoid that set of data that causes the problem.

%%sql
SELECT id, account_id, poster_amt_usd/(standard_amt_usd + gloss_amt_usd + poster_amt_usd) AS post_per
FROM orders
LIMIT 10;
idaccount_idpost_per
110010.20019929527546921710
210010.26940158204455102647
310010E-20
410010E-20
510010.23117672777557473894
610010.34998360271726399625
710010.49862501668669069550
810010.59746613390507747783
910010.23737326760301367315
1010010.07393917319249681297

LIKE

Use the accounts table to find All the companies whose names start with 'C'.

%%sql
SELECT id, name
FROM accounts
WHERE name LIKE 'C%'
LIMIT 10;
idname
1061CVS Health
1131Chevron
1141Costco
1201Cardinal Health
1281Citigroup
1361Comcast
1531Cisco Systems
1581Caterpillar
1611Coca-Cola
1781Cigna

Use the accounts table to find All companies whose names contain the string 'one' somewhere in the name.

%%sql
SELECT id, name
FROM accounts
WHERE name LIKE '%one'
LIMIT 10;
idname
1821INTL FCStone
3791AutoZone

Use the accounts table to find All companies whose names end with 's'.

%%sql
SELECT id, name
FROM accounts
WHERE name LIKE '%s'
LIMIT 10;
idname
1071General Motors
1441United Technologies
1461Lowe's
1531Cisco Systems
1621HCA Holdings
1651Tyson Foods
1671Delta Air Lines
1691Johnson Controls
3361Ross Stores
1791United Continental Holdings

IN

Use the accounts table to find the account name, primary_poc, and sales_rep_id for Walmart, Target, and Nordstrom.

%%sql
SELECT name, primary_poc, sales_rep_id
FROM accounts
WHERE name IN ('Walmart', 'Target', 'Nordstrom')
LIMIT 10;
nameprimary_pocsales_rep_id
WalmartTamara Tuma321500
TargetLuba Streett321660
NordstromYan Crater321820

Use the web_events table to find all information regarding individuals who were contacted via the channel of organic or adwords.

%%sql
SELECT *
FROM web_events
WHERE channel IN ('organic', 'adwords')
LIMIT 10;
idaccount_idoccurred_atchannel
439510012015-10-22 05:02:47organic
439610012015-10-22 14:04:20adwords
439910012016-01-01 15:45:54adwords
440110012016-02-07 17:44:10adwords
440210012016-02-27 15:27:22organic
440410012016-04-05 03:02:52organic
440510012016-04-17 16:41:02organic
440810012016-05-21 16:22:01organic
441010012016-06-22 13:48:53adwords
441410012016-08-12 09:31:22organic

NOT

Use the accounts table to find the account name, primary poc, and sales rep id for all stores except Walmart, Target, and Nordstrom.

%%sql
SELECT name, primary_poc, sales_rep_id
FROM accounts
WHERE name NOT IN ('Walmart', 'Target', 'Nordstrom')
LIMIT 10;
nameprimary_pocsales_rep_id
Exxon MobilSung Shields321510
AppleJodee Lupo321520
Berkshire HathawaySerafina Banda321530
McKessonAngeles Crusoe321540
UnitedHealth GroupSavanna Gayman321550
CVS HealthAnabel Haskell321560
General MotorsBarrie Omeara321570
Ford MotorKym Hagerman321580
AT&TJamel Mosqueda321590
General ElectricParker Hoggan321600

Use the web_events table to find all information regarding individuals who were contacted via any method except using organic or adwords methods.

%%sql
SELECT *
FROM web_events
WHERE channel NOT in ('organic', 'adwords')
LIMIT 10;
idaccount_idoccurred_atchannel
110012015-10-06 17:13:58direct
210012015-11-05 03:08:26direct
310012015-12-04 03:57:24direct
410012016-01-02 00:55:03direct
510012016-02-01 19:02:33direct
610012016-03-02 15:15:22direct
710012016-04-01 10:58:55direct
810012016-05-01 15:26:44direct
910012016-05-31 20:53:47direct
1010012016-06-30 12:09:45direct

Use the accounts table to find All the companies whose names do not start with 'C'.

%%sql
SELECT name
FROM accounts
WHERE name NOT LIKE 'C%'
LIMIT 10;
name
Walmart
Exxon Mobil
Apple
Berkshire Hathaway
McKesson
UnitedHealth Group
General Motors
Ford Motor
AT&T
General Electric

All companies whose names do not contain the string 'one' somewhere in the name.

%%sql
SELECT name
FROM accounts
WHERE name NOT LIKE '%one%'
LIMIT 10;
name
Walmart
Exxon Mobil
Apple
Berkshire Hathaway
McKesson
UnitedHealth Group
CVS Health
General Motors
Ford Motor
AT&T

All companies whose names do not end with 's'.

%%sql
SELECT name
FROM accounts
WHERE name NOT LIKE '%s'
LIMIT 10;
name
Walmart
Exxon Mobil
Apple
Berkshire Hathaway
McKesson
UnitedHealth Group
CVS Health
Ford Motor
AT&T
General Electric

AND and BETWEEN

Write a query that returns all the orders where the standard_qty is over 1000, the poster_qty is 0, and the gloss_qty is 0.

%%sql
SELECT *
FROM orders
WHERE standard_qty > 1000 AND poster_qty = 0 AND gloss_qty = 0
LIMIT 10;
idaccount_idoccurred_atstandard_qtygloss_qtyposter_qtytotalstandard_amt_usdgloss_amt_usdposter_amt_usdtotal_amt_usd
261329512016-08-15 00:06:1211710011715843.290.000.005843.29
326034912014-08-29 22:43:0015520015527744.480.000.007744.48

Using the accounts table, find all the companies whose names do not start with 'C' and end with 's'.

%%sql
SELECT name
FROM accounts
WHERE name NOT LIKE 'C%' AND name LIKE '%s'
LIMIT 10;
name
General Motors
United Technologies
Lowe's
HCA Holdings
Tyson Foods
Delta Air Lines
Johnson Controls
Ross Stores
United Continental Holdings
American Express

When you use the BETWEEN operator in SQL, do the results include the values of your endpoints, or not? Figure out the answer to this important question by writing a query that displays the order date and gloss_qty data for all orders where gloss_qty is between 24 and 29. Then look at your output to see if the BETWEEN operator included the begin and end values or not. --there are a number of rows in the output of this query where the gloss_qty values are 24 or 29. So the answer to the question is that yes, the BETWEEN operator in SQL is inclusive; that is, the endpoint values are included. So the BETWEEN statement in this query is equivalent to having written "WHERE gloss_qty >= 24 AND gloss_qty <= 29.

%%sql
SELECT occurred_at, gloss_qty
FROM orders
WHERE gloss_qty BETWEEN 24 AND 29
LIMIT 10;
occurred_atgloss_qty
2016-02-01 19:27:2729
2016-03-02 15:29:3224
2016-10-14 23:54:2128
2015-08-09 18:29:2024
2016-02-01 20:00:3726
2016-04-30 07:46:1326
2016-06-28 06:18:2025
2016-08-26 16:47:5725
2016-10-24 21:46:1028
2016-11-23 04:38:5725

Use the web_events table to find all information regarding individuals who were contacted via the organic or adwords channels, and started their account at any point in 2016, sorted from newest to oldest.

%%sql
SELECT *
FROM web_events
WHERE channel IN ('organic', 'adwords') AND occurred_at BETWEEN '2016-01-01' AND '2017-01-01'
ORDER BY occurred_at DESC
LIMIT 10;
idaccount_idoccurred_atchannel
849341412016-12-31 16:31:23organic
566118512016-12-31 06:55:38organic
556217912016-12-31 02:08:50adwords
770333512016-12-30 21:06:53adwords
792135212016-12-30 20:15:48organic
641624012016-12-30 17:51:36adwords
455311512016-12-30 15:57:41organic
812937812016-12-30 06:52:24organic
620022812016-12-30 03:43:11adwords
693728012016-12-29 14:51:48organic

Find list of orders ids where either gloss_qty or poster_qty is greater than 4000. Only include the id field in the resulting table.

%%sql
SELECT id
FROM orders
WHERE gloss_qty > 4000 OR poster_qty > 4000
LIMIT 10;
id
362
731
1191
1913
1939
3778
3858
3963
4016
4230

Write a query that returns a list of orders where the standard_qty is zero and either the gloss_qty or poster_qty is over 1000.

%%sql
SELECT *
FROM orders
WHERE standard_qty =0 AND (gloss_qty > 1000 OR poster_qty > 1000)
LIMIT 10;
idaccount_idoccurred_atstandard_qtygloss_qtyposter_qtytotalstandard_amt_usdgloss_amt_usdposter_amt_usdtotal_amt_usd
191324612013-12-29 09:50:38064504564950.0048310.50365.4048675.90
436911112015-11-15 17:47:460486298834740.003640.1424262.5627902.70
439111612016-06-04 08:58:100106296730730.00793.9424092.0424885.98
442011912016-05-21 23:21:14043144814910.00322.0711757.7612079.83
444812312016-06-25 12:27:15031782332010.0023803.22186.7623989.98
469814512015-02-26 06:13:210484490153850.003625.1639796.1243421.28
494217012015-09-24 21:02:2501074495108390.0080472.56771.4081243.96
503218312016-05-26 17:48:190144828717350.0010845.522330.4413175.96
519120512015-05-17 14:17:5901041010410.007797.090.007797.09
579128612014-10-24 12:06:2201011691117010.0074.9094930.9295005.82

Find all the company names that start with a 'C' or 'W', and the primary contact contains 'ana' or 'Ana', but it doesn't contain 'eana'.

%%sql
SELECT *
FROM accounts
WHERE (name LIKE 'C%' OR name LIKE 'W%')
AND ((primary_poc LIKE '%ana%' OR primary_poc LIKE '%Ana%')
AND primary_poc NOT LIKE '%eana%');
idnamewebsitelatlongprimary_pocsales_rep_id
1061CVS Healthwww.cvshealth.com41.46779585-73.76763638Anabel Haskell321560
1361Comcastwww.comcastcorporation.com42.54154764-76.24992387Shana Sanborn321650

SQL Joins

We use ON clause to specify a JOIN condition which is a logical statement to combine the table in FROM and JOIN statements.

%%sql
SELECT *
FROM orders
JOIN accounts
ON orders.account_id = accounts.id
LIMIT 10;
idaccount_idoccurred_atstandard_qtygloss_qtyposter_qtytotalstandard_amt_usdgloss_amt_usdposter_amt_usdtotal_amt_usdid_1namewebsitelatlongprimary_pocsales_rep_id
110012015-10-06 17:31:141232224169613.77164.78194.88973.431001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500
210012015-11-05 03:34:331904157288948.10307.09462.841718.031001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500
310012015-12-04 04:21:5585470132424.15352.030.00776.181001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500
410012016-01-02 01:18:24144320176718.56239.680.00958.241001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500
510012016-02-01 19:27:271082928165538.92217.21227.36983.491001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500
610012016-03-02 15:29:321032446173513.97179.76373.521067.251001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500
710012016-04-01 11:20:181013392226503.99247.17747.041498.201001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500
810012016-05-01 15:55:519547151293474.05352.031226.122052.201001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500
910012016-05-31 21:22:48911622129454.09119.84178.64752.571001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500
1010012016-06-30 12:32:0594468148469.06344.5464.96878.561001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500

As we've learned, the SELECT clause indicates which column(s) of data you'd like to see in the output (For Example, orders.* gives us all the columns in orders table in the output). The FROM clause indicates the first table from which we're pulling data, and the JOIN indicates the second table. The ON clause specifies the column on which you'd like to merge the two tables together

Try pulling all the data from the accounts table, and all the data from the orders table.

%%sql
SELECT accounts.*, orders.*
FROM accounts
JOIN orders
ON orders.account_id = accounts.id
LIMIT 10;
idnamewebsitelatlongprimary_pocsales_rep_idid_1account_idoccurred_atstandard_qtygloss_qtyposter_qtytotalstandard_amt_usdgloss_amt_usdposter_amt_usdtotal_amt_usd
1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500110012015-10-06 17:31:141232224169613.77164.78194.88973.43
1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500210012015-11-05 03:34:331904157288948.10307.09462.841718.03
1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500310012015-12-04 04:21:5585470132424.15352.030.00776.18
1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500410012016-01-02 01:18:24144320176718.56239.680.00958.24
1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500510012016-02-01 19:27:271082928165538.92217.21227.36983.49
1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500610012016-03-02 15:29:321032446173513.97179.76373.521067.25
1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500710012016-04-01 11:20:181013392226503.99247.17747.041498.20
1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500810012016-05-01 15:55:519547151293474.05352.031226.122052.20
1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500910012016-05-31 21:22:48911622129454.09119.84178.64752.57
1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma3215001010012016-06-30 12:32:0594468148469.06344.5464.96878.56

Try pulling standard_qty, gloss_qty, and poster_qty from the orders table, and the website and the primary_poc from the accounts table.

%%sql
SELECT accounts.website, accounts.primary_poc, orders.standard_qty, orders.gloss_qty, orders.poster_qty
FROM accounts
JOIN orders
ON orders.account_id = accounts.id
LIMIT 10;
websiteprimary_pocstandard_qtygloss_qtyposter_qty
www.walmart.comTamara Tuma1232224
www.walmart.comTamara Tuma1904157
www.walmart.comTamara Tuma85470
www.walmart.comTamara Tuma144320
www.walmart.comTamara Tuma1082928
www.walmart.comTamara Tuma1032446
www.walmart.comTamara Tuma1013392
www.walmart.comTamara Tuma9547151
www.walmart.comTamara Tuma911622
www.walmart.comTamara Tuma94468

Join all three of these tables - web_events, orders and accounts.

%%sql
SELECT *
FROM web_events
JOIN accounts
ON web_events.account_id = accounts.id
JOIN orders
ON accounts.id = orders.account_id
LIMIT 10;
idaccount_idoccurred_atchannelid_1namewebsitelatlongprimary_pocsales_rep_idid_2account_id_1occurred_at_1standard_qtygloss_qtyposter_qtytotalstandard_amt_usdgloss_amt_usdposter_amt_usdtotal_amt_usd
110012015-10-06 17:13:58direct1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500110012015-10-06 17:31:141232224169613.77164.78194.88973.43
210012015-11-05 03:08:26direct1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500110012015-10-06 17:31:141232224169613.77164.78194.88973.43
310012015-12-04 03:57:24direct1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500110012015-10-06 17:31:141232224169613.77164.78194.88973.43
410012016-01-02 00:55:03direct1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500110012015-10-06 17:31:141232224169613.77164.78194.88973.43
510012016-02-01 19:02:33direct1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500110012015-10-06 17:31:141232224169613.77164.78194.88973.43
610012016-03-02 15:15:22direct1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500110012015-10-06 17:31:141232224169613.77164.78194.88973.43
710012016-04-01 10:58:55direct1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500110012015-10-06 17:31:141232224169613.77164.78194.88973.43
810012016-05-01 15:26:44direct1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500110012015-10-06 17:31:141232224169613.77164.78194.88973.43
910012016-05-31 20:53:47direct1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500110012015-10-06 17:31:141232224169613.77164.78194.88973.43
1010012016-06-30 12:09:45direct1001Walmartwww.walmart.com40.23849561-75.10329704Tamara Tuma321500110012015-10-06 17:31:141232224169613.77164.78194.88973.43

Provide a table that provides the region for each sales_rep along with their associated accounts. This time only for the Midwest region. Your final table should include three columns: the region name, the sales rep name, and the account name. Sort the accounts alphabetically (A-Z) according to account name.

%%sql
SELECT r.name region, s.name rep, a.name account
FROM sales_reps AS s
JOIN region AS r
ON s.region_id = r.id
JOIN accounts AS a
ON s.id = a.sales_rep_id
WHERE r.name = 'Midwest'
ORDER BY a.name ASC
LIMIT 10;
regionrepaccount
MidwestChau RowlesAbbott Laboratories
MidwestJulie StarrAbbVie
MidwestCliff MeintsAflac
MidwestChau RowlesAlcoa
MidwestCharles BidwellAltria Group
MidwestDelilah KrumAmgen
MidwestCharles BidwellArrow Electronics
MidwestDelilah KrumAutoNation
MidwestDelilah KrumCapital One Financial
MidwestCordell RiederCentene

Provide a table that provides the region for each sales_rep along with their associated accounts. This time only for accounts where the sales rep has a first name starting with S and in the Midwest region. Your final table should include three columns: the region name, the sales rep name, and the account name. Sort the accounts alphabetically (A-Z) according to account name.

%%sql
SELECT r.name region, s.name rep, a.name account
FROM sales_reps AS s
JOIN region AS r
ON s.region_id = r.id
JOIN accounts AS a
ON s.id = a.sales_rep_id
WHERE s.name LIKE 'S%'
AND r.name = 'Midwest'
ORDER BY a.name ASC
LIMIT 10;
regionrepaccount
MidwestSherlene WetheringtonCommunity Health Systems
MidwestSherlene WetheringtonProgressive
MidwestSherlene WetheringtonRite Aid
MidwestSherlene WetheringtonTime Warner Cable
MidwestSherlene WetheringtonU.S. Bancorp

Provide a table that provides the region for each sales_rep along with their associated accounts. This time only for accounts where the sales rep has a last name starting with K and in the Midwest region. Your final table should include three columns: the region name, the sales rep name, and the account name. Sort the accounts alphabetically (A-Z) according to account name.

%%sql
SELECT r.name region, s.name rep, a.name account
FROM region r
JOIN sales_reps s
ON r.id = s.region_id
JOIN accounts a
ON r.id = a.sales_rep_id
WHERE s.name LIKE 'K%'
AND r.name = 'Midwest'
ORDER BY a.name
LIMIT 10;
regionrepaccount

SQL Aggregations

If the COUNT result of a column is less than the number of rows in the table, we know that difference is the number of NULLs.

%sql SELECT COUNT(*) AS account_count FROM accounts;
account_count
351
%sql SELECT COUNT(id) AS account_id_count FROM accounts;
account_id_count
351

Unlike COUNT, you can only use SUM on numeric columns. However, SUM will ignore NULL values, as do the other aggregation functions.

%%sql
SELECT SUM(standard_qty) AS standard,
SUM(gloss_qty) AS gloss,
SUM(poster_qty) AS poster
FROM orders;
standardglossposter
19383461013773723646

Find the total amount spent on standard_amt_usd and gloss_amt_usd paper for each order in the orders table. This should give a dollar amount for each order in the table.

%%sql
SELECT SUM(standard_amt_usd) + SUM(gloss_amt_usd) AS total_usd
FROM orders
LIMIT 10;
total_usd
17265506.31

Find the standard_amt_usd per unit of standard_qty paper. Your solution should use both an aggregation and a mathematical operator.

%%sql
SELECT SUM(standard_amt_usd)/SUM(standard_qty) AS standard_price_per_unit
FROM orders
LIMIT 10;
standard_price_per_unit
4.9900000000000000

Functionally, MIN and MAX are similar to COUNT in that they can be used on non-numerical columns. Depending on the column type, MIN will return the lowest number, earliest date, or non-numerical value as early in the alphabet as possible. As you might suspect, MAX does the opposite—it returns the highest number, the latest date, or the non-numerical value closest alphabetically to “Z.”

%%sql
SELECT MIN(standard_qty) AS standard_min,
MAX(standard_qty) AS standard_max
FROM orders
LIMIT 10;
standard_minstandard_max
022591

Working with CASE statements

%%sql
SELECT
o.id AS order_id,
a.id AS account_id,
SUM(o.total_amt_usd) AS total_sales,
CASE
WHEN SUM(o.total_amt_usd) >= 3000 THEN 'Large'
ELSE 'Small'
END AS order_size
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY 1,
2
ORDER BY 3 DESC, 1, 2
LIMIT 10;
order_idaccount_idtotal_salesorder_size
40164251232207.07Large
38924161112875.18Large
39634211107533.55Large
5791286195005.82Large
3778410193547.84Large
6590411193505.69Large
362130193106.81Large
731152192991.05Large
4562134184099.62Large
3858415182163.71Large
%%sql
SELECT
CASE
WHEN total > 2000 THEN 'At Least 2000'
WHEN total BETWEEN 1000 AND 2000 THEN 'Between 1000 and 2000'
ELSE 'Less than 1000'
END AS order_size,
COUNT(*) AS count
FROM orders
GROUP BY 1
LIMIT 10;
order_sizecount
Between 1000 and 2000511
Less than 10006331
At Least 200070
%%sql
SELECT
a.name,
SUM(o.total_amt_usd) total_sales,
CASE
WHEN SUM(o.total_amt_usd) > 200000 THEN '200,000+'
WHEN SUM(o.total_amt_usd) BETWEEN 100000 AND 200000 THEN 'Between 100,000 and 200,000'
ELSE 'Under 100,000'
END AS group_level
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
nametotal_salesgroup_level
EOG Resources382873.30200,000+
Mosaic345618.59200,000+
IBM326819.48200,000+
General Dynamics300694.79200,000+
Republic Services293861.14200,000+
Leucadia National291047.25200,000+
Arrow Electronics281018.36200,000+
Sysco278575.64200,000+
Supervalu275288.30200,000+
Archer Daniels Midland272672.84200,000+
%%sql
SELECT
a.name,
SUM(o.total_amt_usd) total_sales,
CASE
WHEN SUM(o.total_amt_usd) > 200000 THEN '200,000+'
WHEN SUM(o.total_amt_usd) BETWEEN 100000 AND 200000 THEN 'Between 100,000 and 200,000'
ELSE 'Under 100,000'
END AS group_level
FROM accounts a
JOIN orders o
ON a.id = o.account_id
WHERE date_part('year', o.occurred_at) IN (2016, 2017)
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
nametotal_salesgroup_level
Pacific Life255319.18200,000+
Mosaic172180.04Between 100,000 and 200,000
CHS163471.78Between 100,000 and 200,000
Core-Mark Holding148105.93Between 100,000 and 200,000
Disney129157.38Between 100,000 and 200,000
National Oilwell Varco121873.16Between 100,000 and 200,000
Sears Holdings114003.21Between 100,000 and 200,000
State Farm Insurance Cos.111810.55Between 100,000 and 200,000
Fidelity National Financial110027.29Between 100,000 and 200,000
BB&T Corp.107300.05Between 100,000 and 200,000
%%sql
SELECT
s.name AS rep_name,
COUNT(o.id) AS order_count,
CASE
WHEN COUNT(o.id) > 200 THEN 'Top'
ELSE 'Not'
END AS rep_level
FROM sales_reps s
JOIN accounts a
ON s.id = a.sales_rep_id
JOIN orders o
ON a.id = o.account_id
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
rep_nameorder_countrep_level
Earlie Schleusner335Top
Vernita Plump299Top
Tia Amato267Top
Georgianna Chisholm256Top
Moon Torian250Top
Nelle Meaux241Top
Maren Musto224Top
Dorotha Seawell208Top
Charles Bidwell205Top
Maryanna Fiorentino204Top
%%sql
SELECT
s.name AS rep_name,
COUNT(o.id) AS order_count,
SUM(o.total_amt_usd) AS total_sales,
CASE
WHEN COUNT(o.id) > 200 OR
SUM(o.total_amt_usd) > 750000 THEN 'top'
WHEN COUNT(o.id) > 150 OR
SUM(o.total_amt_usd) > 500000 THEN 'middle'
ELSE 'Low'
END AS rep_level
FROM sales_reps s
JOIN accounts a
ON s.id = a.sales_rep_id
JOIN orders o
ON a.id = o.account_id
GROUP BY 1
ORDER BY 3 DESC
LIMIT 10;
rep_nameorder_counttotal_salesrep_level
Earlie Schleusner3351098137.72top
Tia Amato2671010690.60top
Vernita Plump299934212.93top
Georgianna Chisholm256886244.12top
Arica Stoltzfus186810353.34top
Dorotha Seawell208766935.04top
Nelle Meaux241749076.16top
Sibyl Lauria193722084.27middle
Maren Musto224702697.29top
Brandie Riva167675917.64middle

Working with Dates

%%sql
SELECT
date_part('year', occurred_at) AS sales_year,
SUM(total_amt_usd) AS total_sales
FROM orders o
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
sales_yeartotal_sales
2016.012864917.92
2015.05752004.94
2014.04069106.54
2013.0377331.00
2017.078151.43
%%sql
SELECT
date_part('month', occurred_at) AS sales_month,
SUM(total_amt_usd) AS total_sales
FROM orders o
GROUP BY 1
ORDER BY 2
LIMIT 10;
sales_monthtotal_sales
2.01312616.64
1.01337661.87
5.01537082.23
4.01562037.74
3.01659987.88
6.01871118.52
8.01918107.22
7.01978731.15
9.02017216.88
11.02390033.75
%%sql
SELECT
date_part('year', occurred_at) AS sales_year,
SUM(total_amt_usd) AS total_sales
FROM orders o
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
sales_yeartotal_sales
2016.012864917.92
2015.05752004.94
2014.04069106.54
2013.0377331.00
2017.078151.43
%%sql
SELECT
date_part('month', occurred_at) AS sales_month,
COUNT(o.id) AS total_orders
FROM orders o
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10;
sales_monthtotal_orders
12.0882
11.0713
10.0675
8.0603
9.0602
7.0571
6.0527
5.0518
3.0482
4.0472
%%sql
SELECT
a.name,
date_part('year', occurred_at) AS _year,
date_part('month', occurred_at) AS _month,
SUM(o.gloss_amt_usd) AS total_gloss_saless
FROM orders o
JOIN accounts a
ON a.id = o.account_id
WHERE a.name = 'Walmart'
GROUP BY 1,
2,
3
ORDER BY total_gloss_saless DESC
LIMIT 10;
name_year_monthtotal_gloss_saless
Walmart2016.05.09257.64
Walmart2016.01.05070.73
Walmart2015.011.04890.97
Walmart2016.04.04875.99
Walmart2015.012.04823.56
Walmart2016.03.04711.21
Walmart2016.09.04673.76
Walmart2016.02.04673.76
Walmart2016.08.04531.45
Walmart2016.011.04359.18

HAVING clause

%%sql
SELECT
s.name,
COUNT(*) num_accts
FROM sales_reps s
JOIN accounts a
ON s.id = a.sales_rep_id
GROUP BY 1
HAVING COUNT(*) > 5
LIMIT 10;
namenum_accts
Samuel Racine6
Elwood Shutt9
Michel Averette7
Brandie Riva10
Elba Felder6
Nelle Meaux7
Necole Victory6
Saran Ram10
Moon Torian10
Sibyl Lauria6
%%sql
SELECT
a.name AS account_name,
COUNT(o.id) AS order_id
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY account_name
HAVING COUNT(o.id) > 20
LIMIT 10;
account_nameorder_id
Monsanto57
KKR55
Performance Food Group21
Paccar46
CST Brands49
Reynolds American23
Sears Holdings30
Aetna51
Gilead Sciences27
Altria Group51
%%sql
SELECT
a.name AS account_name,
COUNT(o.id) AS order_id
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY account_name
HAVING COUNT(o.id) > 20
ORDER BY order_id DESC
LIMIT 10;
account_nameorder_id
Leucadia National71
Supervalu68
Sysco68
Arrow Electronics67
Mosaic66
Archer Daniels Midland66
General Dynamics66
Fluor65
Western Digital65
Philip Morris International65
%%sql
SELECT
a.name AS account_name,
SUM(o.total_amt_usd) total_usd
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY a.name
HAVING SUM(o.total_amt_usd) > 30000
ORDER BY total_usd DESC
LIMIT 10;
account_nametotal_usd
EOG Resources382873.30
Mosaic345618.59
IBM326819.48
General Dynamics300694.79
Republic Services293861.14
Leucadia National291047.25
Arrow Electronics281018.36
Sysco278575.64
Supervalu275288.30
Archer Daniels Midland272672.84
%%sql
SELECT
a.name AS account_name,
SUM(o.total_amt_usd) total_usd
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY a.name
HAVING SUM(o.total_amt_usd) < 10000
ORDER BY total_usd DESC
LIMIT 10;
account_nametotal_usd
General Motors9995.05
AIG9980.93
O'Reilly Automotive9914.23
WellCare Health Plans9743.13
CBRE Group9715.71
Newmont Mining9618.81
Macy's9613.32
Devon Energy9536.55
Eli Lilly9339.20
Xerox8759.93
%%sql
SELECT
a.name AS account_name,
SUM(o.total_amt_usd) total_usd
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY a.name
ORDER BY total_usd DESC
LIMIT 10;
account_nametotal_usd
EOG Resources382873.30
Mosaic345618.59
IBM326819.48
General Dynamics300694.79
Republic Services293861.14
Leucadia National291047.25
Arrow Electronics281018.36
Sysco278575.64
Supervalu275288.30
Archer Daniels Midland272672.84
%%sql
SELECT
a.name AS account_name,
SUM(o.total_amt_usd) total_usd
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY a.name
ORDER BY total_usd ASC
LIMIT 10;
account_nametotal_usd
Nike390.25
Delta Air Lines859.64
Level 3 Communications881.73
Deere1036.57
Bed Bath & Beyond1069.64
Las Vegas Sands1113.29
Assurant1235.81
Ball1982.74
Priceline Group2129.24
Bank of New York Mellon Corp.2155.98
%%sql
SELECT
a.name AS account_name,
w.channel,
COUNT(w.channel) channel_cnt
FROM accounts a
JOIN web_events w
ON a.id = w.account_id
WHERE w.channel = 'facebook'
GROUP BY a.name,
w.channel
HAVING COUNT(w.channel) > 6
LIMIT 10;
account_namechannelchannel_cnt
FirstEnergyfacebook10
Aetnafacebook10
Fluorfacebook8
Cisco Systemsfacebook8
Lithia Motorsfacebook10
General Millsfacebook9
Wells Fargofacebook7
PayPal Holdingsfacebook8
United Continental Holdingsfacebook11
Laboratory Corp. of Americafacebook7
%%sql
SELECT
a.name AS account_name,
w.channel,
COUNT(w.channel) channel_cnt
FROM accounts a
JOIN web_events w
ON a.id = w.account_id
WHERE w.channel = 'facebook'
GROUP BY a.name,
w.channel
ORDER BY channel_cnt DESC
LIMIT 10;
account_namechannelchannel_cnt
Gilead Sciencesfacebook16
TJXfacebook15
AutoNationfacebook14
Charter Communicationsfacebook13
Disneyfacebook12
Marathon Petroleumfacebook12
Philip Morris Internationalfacebook12
United Continental Holdingsfacebook11
EOG Resourcesfacebook11
Ecolabfacebook11
%%sql
SELECT
a.name AS account_name,
w.channel AS channel_name,
COUNT(w.channel) channel_cnt
FROM accounts a
JOIN web_events w
ON a.id = w.account_id
GROUP BY a.name,
w.channel
ORDER BY channel_cnt DESC, channel_name, account_name
LIMIT 10;
account_namechannel_namechannel_cnt
Leucadia Nationaldirect52
Colgate-Palmolivedirect51
New York Life Insurancedirect51
Philip Morris Internationaldirect49
ADPdirect48
AutoNationdirect48
BlackRockdirect48
Charter Communicationsdirect48
FirstEnergydirect48
Altria Groupdirect47

CONCAT

Each company in the accounts table wants to create an email address for each primary_poc. The email address should be the first name of the primary_poc . last name primary_poc @ company name .com.

%%sql
SELECT
concat
(
-- first name
LEFT(a.primary_poc, strpos(a.primary_poc, ' ') - 1),
'.',
-- last name
substr(a.primary_poc, strpos(a.primary_poc, ' ') + 1),
'@',
a.name,
'.com'
)
FROM accounts a
LIMIT 10;
concat
Tamara.Tuma@Walmart.com
Sung.Shields@Exxon Mobil.com
Jodee.Lupo@Apple.com
Serafina.Banda@Berkshire Hathaway.com
Angeles.Crusoe@McKesson.com
Savanna.Gayman@UnitedHealth Group.com
Anabel.Haskell@CVS Health.com
Barrie.Omeara@General Motors.com
Kym.Hagerman@Ford Motor.com
Jamel.Mosqueda@AT&T.com

You may have noticed that in the previous question some of the company names include spaces, which will certainly not work in an email address. See if you can create an email address that will work by removing all of the spaces in the account name, but otherwise your solution should be just as above. Some helpful documentation is here: https://www.postgresql.org/docs/8.1/functions-string.html

%%sql
SELECT
concat
(
-- poc first name
LEFT(a.primary_poc, strpos(a.primary_poc, ' ') - 1),
'.',
-- poc last name
substr(a.primary_poc, strpos(a.primary_poc, ' ') + 1),
'@',
-- company name
REPLACE(a.name, ' ', ''),
'.com'
)
FROM accounts a
LIMIT 10;
concat
Tamara.Tuma@Walmart.com
Sung.Shields@ExxonMobil.com
Jodee.Lupo@Apple.com
Serafina.Banda@BerkshireHathaway.com
Angeles.Crusoe@McKesson.com
Savanna.Gayman@UnitedHealthGroup.com
Anabel.Haskell@CVSHealth.com
Barrie.Omeara@GeneralMotors.com
Kym.Hagerman@FordMotor.com
Jamel.Mosqueda@AT&T.com

We would also like to create an initial password, which they will change after their first log in. The first password will be:

  • the first letter of the primary_poc's first name (lowercase), then
  • the last letter of their first name (lowercase),
  • the first letter of their last name (lowercase),
  • the last letter of their last name (lowercase),
  • the number of letters in their first name,
  • the number of letters in their last name, and then
  • the name of the company they are working with, all capitalized with no spaces.
%%sql
SELECT
concat
(
LOWER(LEFT(a.primary_poc, 1)),
LOWER(substr(a.primary_poc, strpos(a.primary_poc, ' ') - 1, 1)),
LOWER(substr(a.primary_poc, strpos(a.primary_poc, ' ') + 1, 1)),
LOWER(RIGHT(a.primary_poc, 1)),
length(substr(a.primary_poc, 1, strpos(a.primary_poc, ' ') - 1)),
length(substr(a.primary_poc, strpos(a.primary_poc, ' ') + 1)),
UPPER(REPLACE(a.name, ' ', ''))
)
FROM accounts a
LIMIT 10;
concat
tata64WALMART
sgss47EXXONMOBIL
jelo54APPLE
saba85BERKSHIREHATHAWAY
asce76MCKESSON
sagn76UNITEDHEALTHGROUP
alhl67CVSHEALTH
beoa66GENERALMOTORS
kmhn38FORDMOTOR
jlma58AT&T

Suppose the company wants to assess the performance of all the sales representatives. Each sales representative is assigned to work in a particular region. To make it easier to understand for the HR team, display the concatenated salesreps.id, ‘’ (underscore), and region.name as EMP_ID_REGION for each sales representative.

%%sql
SELECT
concat(s.id, '_', r.name)
FROM sales_reps s
JOIN region r
ON r.id = s.region_id
LIMIT 10;
concat
321500_Northeast
321510_Northeast
321520_Northeast
321530_Northeast
321540_Northeast
321550_Northeast
321560_Northeast
321570_Northeast
321580_Northeast
321590_Northeast

From the accounts table, display:

  • the name of the client,
  • the coordinate as concatenated (latitude, longitude),
  • email id of the primary point of contact as first letter of the primary_poc last letter of the primary_poc@extracted name and domain from the website.
%%sql
SELECT
a.name,
concat('(', a.lat, ',', a.long, ')') AS lat_long,
concat(LEFT(a.primary_poc, 1), RIGHT(a.primary_poc, 1), '@', substr(a.website, 5)) AS poc_email
FROM accounts a
LIMIT 10;
namelat_longpoc_email
Walmart(40.23849561,-75.10329704)Ta@walmart.com
Exxon Mobil(41.16915630,-73.84937379)Ss@exxonmobil.com
Apple(42.29049481,-76.08400942)Jo@apple.com
Berkshire Hathaway(40.94902131,-75.76389759)Sa@berkshirehathaway.com
McKesson(42.21709326,-75.28499823)Ae@mckesson.com
UnitedHealth Group(40.08792542,-75.57569396)Sn@unitedhealthgroup.com
CVS Health(41.46779585,-73.76763638)Al@cvshealth.com
General Motors(40.80551762,-76.71018140)Ba@gm.com
Ford Motor(41.11394200,-75.85422452)Kn@ford.com
AT&T(42.49746270,-74.90271225)Ja@att.com

From the webevents table, display the concatenated value of account_id, '' , channel, '_', count of web events of the particular channel.

%%sql
WITH t1
AS (SELECT
w1.account_id,
w1.channel,
COUNT(*) AS cnt
FROM web_events w1
GROUP BY 1,
2)
SELECT
concat(t1.account_id, '_', t1.channel, '_', t1.cnt)
FROM t1
LIMIT 10;
concat
2601_organic_3
2541_organic_8
2461_facebook_8
1121_organic_7
2191_adwords_2
1761_adwords_1
4301_organic_2
2051_organic_9
3631_twitter_2
2741_adwords_2

Window function

%%sql
SELECT id,
account_id,
DATE_TRUNC('year',occurred_at) AS year,
DENSE_RANK() OVER (PARTITION BY account_id ORDER BY DATE_TRUNC('year',occurred_at)) AS dense_rank,
total_amt_usd,
SUM(total_amt_usd) OVER account_year_window AS sum_total_amt_usd,
COUNT(total_amt_usd) OVER account_year_window AS count_total_amt_usd,
AVG(total_amt_usd) OVER account_year_window AS avg_total_amt_usd,
MIN(total_amt_usd) OVER account_year_window AS min_total_amt_usd,
MAX(total_amt_usd) OVER account_year_window AS max_total_amt_usd
FROM orders
window account_year_window as
(PARTITION BY account_id ORDER BY DATE_TRUNC('year',occurred_at))
LIMIT 10;
idaccount_idyeardense_ranktotal_amt_usdsum_total_amt_usdcount_total_amt_usdavg_total_amt_usdmin_total_amt_usdmax_total_amt_usd
430710012015-01-01 00:00:0018757.1821651.5354330.3060000000000000776.189426.71
310012015-01-01 00:00:001776.1821651.5354330.3060000000000000776.189426.71
210012015-01-01 00:00:0011718.0321651.5354330.3060000000000000776.189426.71
110012015-01-01 00:00:001973.4321651.5354330.3060000000000000776.189426.71
430810012015-01-01 00:00:0019426.7121651.5354330.3060000000000000776.189426.71
610012016-01-01 00:00:0021067.25124014.87284429.1025000000000000752.579426.71
510012016-01-01 00:00:002983.49124014.87284429.1025000000000000752.579426.71
410012016-01-01 00:00:002958.24124014.87284429.1025000000000000752.579426.71
431810012016-01-01 00:00:0027924.46124014.87284429.1025000000000000752.579426.71
430910012016-01-01 00:00:0029230.67124014.87284429.1025000000000000752.579426.71

Create a running total of standard_amt_usd (in the orders table) over order time with no date truncation. Your final table should have two columns: one with the amount being added for each new row, and a second with the running total.

%%sql
SELECT standard_amt_usd,
SUM(standard_amt_usd) OVER (ORDER BY occurred_at) AS running_total
FROM orders
LIMIT 10;
standard_amt_usdrunning_total
0.000.00
2445.102445.10
2634.725079.82
0.005079.82
2455.087534.90
2504.9810039.88
264.4710304.35
1536.9211841.27
374.2512215.52
1402.1913617.71

Now, modify your query from the previous question to include partitions. Still create a running total of standard_amt_usd (in the orders table) over order time, but this time, date truncate occurred_at by year and partition by that same year-truncated occurred_at variable. Your final table should have three columns: One with the amount being added for each row, one for the truncated date, and a final column with the running total within each year.

%%sql
SELECT standard_amt_usd, date_trunc('year', occurred_at) as year,
SUM(standard_amt_usd) OVER (partition by date_trunc('year', occurred_at) order by occurred_at) AS running_total
FROM orders
LIMIT 10;
standard_amt_usdyearrunning_total
0.002013-01-01 00:00:000.00
2445.102013-01-01 00:00:002445.10
2634.722013-01-01 00:00:005079.82
0.002013-01-01 00:00:005079.82
2455.082013-01-01 00:00:007534.90
2504.982013-01-01 00:00:0010039.88
264.472013-01-01 00:00:0010304.35
1536.922013-01-01 00:00:0011841.27
374.252013-01-01 00:00:0012215.52
1402.192013-01-01 00:00:0013617.71

Subqueries

%%sql
SELECT
date_trunc('day', w.occurred_at) AS day,
w.channel AS channel,
COUNT(*) AS event_count
FROM web_events w
GROUP BY 1,
2
ORDER BY 3 DESC
LIMIT 10;
daychannelevent_count
2017-01-01 00:00:00direct21
2016-12-21 00:00:00direct21
2016-12-31 00:00:00direct19
2016-11-03 00:00:00direct18
2016-12-28 00:00:00direct17
2016-10-28 00:00:00direct16
2016-12-20 00:00:00direct16
2016-10-29 00:00:00direct15
2016-12-26 00:00:00direct15
2016-12-02 00:00:00direct15

Num events for each day for each channel

%%sql
SELECT
date_trunc('day', w.occurred_at) AS day,
w.channel AS channel,
COUNT(*) AS event_count
FROM web_events w
GROUP BY 1,
2
ORDER BY 3 DESC
LIMIT 10;
daychannelevent_count
2017-01-01 00:00:00direct21
2016-12-21 00:00:00direct21
2016-12-31 00:00:00direct19
2016-11-03 00:00:00direct18
2016-12-28 00:00:00direct17
2016-10-28 00:00:00direct16
2016-12-20 00:00:00direct16
2016-10-29 00:00:00direct15
2016-12-26 00:00:00direct15
2016-12-02 00:00:00direct15

Avg events per channel

%%sql
SELECT
sub.channel,
ROUND(AVG(sub.event_count), 2)
FROM (
-- number of events for each day for each channel
SELECT
date_trunc('day', w.occurred_at) AS day,
w.channel AS channel,
COUNT(*) AS event_count
FROM web_events w
GROUP BY 1,
2
ORDER BY 3 DESC) sub
GROUP BY 1
LIMIT 10;
channelround
facebook1.60
organic1.67
twitter1.32
adwords1.57
banner1.29
direct4.90

Provide the name of the sales_rep in each region with the largest amount of total_amt_usd sales.

%%sql
SELECT
t3.region_name,
t3.rep_name,
t3.total_sales
FROM (
-- sales for region by rep
SELECT
r.name AS region_name,
sr.name AS rep_name,
SUM(o.total_amt_usd) total_sales
FROM accounts a
JOIN orders o
ON a.id = o.account_id
JOIN sales_reps sr
ON a.sales_rep_id = sr.id
JOIN region r
ON sr.region_id = r.id
GROUP BY 1,
2) AS t3
JOIN (SELECT
t1.region_name,
MAX(t1.total_sales) AS total_sales
FROM (
-- sales for region by rep
SELECT
r.name AS region_name,
sr.name AS rep_name,
SUM(o.total_amt_usd) total_sales
FROM accounts a
JOIN orders o
ON a.id = o.account_id
JOIN sales_reps sr
ON a.sales_rep_id = sr.id
JOIN region r
ON sr.region_id = r.id
GROUP BY 1,
2) AS t1
GROUP BY 1) AS t2
ON t3.region_name = t2.region_name
AND t2.total_sales = t3.total_sales
LIMIT 10;
region_namerep_nametotal_sales
MidwestCharles Bidwell675637.19
NortheastTia Amato1010690.60
SoutheastEarlie Schleusner1098137.72
WestGeorgianna Chisholm886244.12

For the region with the largest (sum) of sales total_amt_usd, how many total (count) orders were placed?

%%sql
SELECT
r.name AS region_name,
COUNT(o.total) AS ttl_cnt
FROM orders o
JOIN accounts a
ON o.account_id = a.id
JOIN sales_reps s
ON a.sales_rep_id = s.id
JOIN region r
ON s.region_id = r.id
WHERE r.name = (SELECT
t1.name
FROM (
-- region with the most sales
SELECT
r.name,
SUM(o.total_amt_usd) AS ttl_sls
FROM orders o
JOIN accounts a
ON o.account_id = a.id
JOIN sales_reps s
ON a.sales_rep_id = s.id
JOIN region r
ON s.region_id = r.id
GROUP BY 1
ORDER BY 2 DESC LIMIT 1) AS t1)
GROUP BY 1;
region_namettl_cnt
Northeast2357

How many accounts had more total purchases than the account name which has bought the most standard_qty paper throughout their lifetime as a customer?

%%sql
SELECT
a.name,
SUM(o.total) AS total_qty
FROM orders o
JOIN accounts a
ON a.id = o.account_id
GROUP BY 1
HAVING SUM(o.total) > (
-- what is the max standard_qty purchased by an account
SELECT
t1.ttl_qty
FROM (
-- which account has bought the most standard_qty paper throughout their lifetime as a customer?
SELECT
a.name,
SUM(o.standard_qty) AS max_std_qty,
SUM(o.total) AS ttl_qty
FROM orders o
JOIN accounts a
ON o.account_id = a.id
GROUP BY 1
ORDER BY 2 DESC LIMIT 1) AS t1);
nametotal_qty
EOG Resources56410
Mosaic49246
IBM47506

For the customer that spent the most (in total over their lifetime as a customer) total_amt_usd, how many web_events did they have for each channel?

%%sql
SELECT
a.name,
a.id,
we.channel,
COUNT(*) AS cnt
FROM accounts a
JOIN web_events we
ON a.id = we.account_id
WHERE a.id = (
-- the id of the customer with the max total spending
SELECT
t1.id
FROM (
-- the customer with the max total spending
SELECT
a.name,
a.id,
SUM(o.total_amt_usd)
FROM orders o
JOIN accounts a
ON a.id = o.account_id
GROUP BY 1,
2
ORDER BY 3 DESC LIMIT 1) AS t1)
GROUP BY 1,
2,
3
nameidchannelcnt
EOG Resources4211adwords12
EOG Resources4211banner4
EOG Resources4211direct44
EOG Resources4211facebook11
EOG Resources4211organic13
EOG Resources4211twitter5

What is the lifetime average amount spent in terms of total_amt_usd for the top 10 total spending accounts?

%%sql
SELECT
AVG(t2.total_sls_usd) AS avg_sls_for_top_10
FROM (
-- top 10 accounts by total spending
SELECT
a.id,
a.name,
SUM(o.total_amt_usd) AS total_sls_usd
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY 1,
2
ORDER BY 3 DESC LIMIT 10) AS t2;
avg_sls_for_top_10
304846.969000000000

What is the lifetime average amount spent in terms of total_amt_usd, including only the companies that spent more per order, on average, than the average of all orders.

%%sql
SELECT
AVG(tmp.avg_sls_for_company)
FROM (SELECT
a.name,
ROUND(AVG(o.total_amt_usd), 2) AS avg_sls_for_company
FROM orders o
JOIN accounts a
ON a.id = o.account_id
GROUP BY 1
HAVING ROUND(AVG(o.total_amt_usd), 2) > (
-- average total_amt_usd per order for all orders
SELECT
ROUND(AVG(o1.total_amt_usd), 2) AS avg_ttl_per_order
FROM orders o1)) tmp;
avg
4721.1402958579881657

Provide the name of the sales_rep in each region with the largest amount of total_amt_usd sales.

%%sql
WITH sales_for_region_by_rep
AS (
-- total sales by region for rep
SELECT
r.name AS region_name,
sr.name AS rep_name,
SUM(o.total_amt_usd) total_sales
FROM accounts a
JOIN orders o
ON a.id = o.account_id
JOIN sales_reps sr
ON a.sales_rep_id = sr.id
JOIN region r
ON sr.region_id = r.id
GROUP BY 1,
2
ORDER BY 3 DESC),
max_sales_for_region
AS (
-- max sales by region for rep, rep name removed
SELECT
t1.region_name,
MAX(t1.total_sales) AS total_sales
FROM (
-- sales for region by rep
SELECT
r.name AS region_name,
sr.name AS rep_name,
SUM(o.total_amt_usd) total_sales
FROM accounts a
JOIN orders o
ON a.id = o.account_id
JOIN sales_reps sr
ON a.sales_rep_id = sr.id
JOIN region r
ON sr.region_id = r.id
GROUP BY 1,
2) AS t1
GROUP BY 1)
SELECT
sales_for_region_by_rep.region_name,
sales_for_region_by_rep.rep_name,
sales_for_region_by_rep.total_sales
FROM sales_for_region_by_rep
JOIN max_sales_for_region
ON sales_for_region_by_rep.region_name = max_sales_for_region.region_name
AND sales_for_region_by_rep.total_sales = max_sales_for_region.total_sales
LIMIT 10;
region_namerep_nametotal_sales
MidwestCharles Bidwell675637.19
NortheastTia Amato1010690.60
SoutheastEarlie Schleusner1098137.72
WestGeorgianna Chisholm886244.12

For the region with the largest (sum) of sales total_amt_usd, how many total (count) orders were placed?

%%sql
WITH region_with_max_sales_and_sales
AS (
-- region name with the most sales
SELECT
t1.name
FROM (
-- region name with the most sales and sales total
SELECT
r.name,
SUM(o.total_amt_usd) AS ttl_sls
FROM orders o
JOIN accounts a
ON o.account_id = a.id
JOIN sales_reps s
ON a.sales_rep_id = s.id
JOIN region r
ON s.region_id = r.id
GROUP BY 1
ORDER BY 2 DESC LIMIT 1) AS t1)
SELECT
r.name AS region_name,
COUNT(o.total) AS ttl_cnt
FROM orders o
JOIN accounts a
ON o.account_id = a.id
JOIN sales_reps s
ON a.sales_rep_id = s.id
JOIN region r
ON s.region_id = r.id
JOIN region_with_max_sales_and_sales rms
ON rms.name = r.name
GROUP BY 1;
region_namettl_cnt
Northeast2357

How many accounts had more total purchases than the account name which has bought the most standard_qty paper throughout their lifetime as a customer?

%%sql
WITH t2
AS (
-- which account has bought the most standard_qty paper throughout their lifetime as a customer?
SELECT
a.name,
SUM(o.standard_qty) AS max_std_qty,
SUM(o.total) AS ttl_qty
FROM orders o
JOIN accounts a
ON o.account_id = a.id
GROUP BY 1
ORDER BY 2 DESC LIMIT 1)

SELECT
a.name,
SUM(o.total) AS total_qty
FROM orders o
JOIN accounts a
ON a.id = o.account_id
GROUP BY 1
HAVING SUM(o.total) > (SELECT
t2.ttl_qty
FROM t2);
nametotal_qty
EOG Resources56410
Mosaic49246
IBM47506

For the customer that spent the most (in total over their lifetime as a customer) total_amt_usd, how many web_events did they have for each channel?

%%sql
WITH t2
AS (
-- the customer with the max total spending
SELECT
a.name,
a.id,
SUM(o.total_amt_usd)
FROM orders o
JOIN accounts a
ON a.id = o.account_id
GROUP BY 1,
2
ORDER BY 3 DESC LIMIT 1)

SELECT
a.name,
a.id,
we.channel,
COUNT(*) AS cnt
FROM accounts a
JOIN web_events we
ON a.id = we.account_id
WHERE a.id = (SELECT
t2.id
FROM t2)
GROUP BY 1,
2,
3
LIMIT 10;
nameidchannelcnt
EOG Resources4211adwords12
EOG Resources4211banner4
EOG Resources4211direct44
EOG Resources4211facebook11
EOG Resources4211organic13
EOG Resources4211twitter5

What is the lifetime average amount spent in terms of total_amt_usd for the top 10 total spending accounts?

%%sql
WITH t2
AS (
-- top 10 accounts by total spending
SELECT
a.id,
a.name,
SUM(o.total_amt_usd) AS total_sls_usd
FROM accounts a
JOIN orders o
ON a.id = o.account_id
GROUP BY 1,
2
ORDER BY 3 DESC LIMIT 10)
SELECT
AVG(t2.total_sls_usd) AS avg_sls_for_top_10
FROM t2;
avg_sls_for_top_10
304846.969000000000

What is the lifetime average amount spent in terms of total_amt_usd, including only the companies that spent more per order, on average, than the average of all orders.

%%sql
WITH t2
AS (SELECT
a.name,
ROUND(AVG(o.total_amt_usd), 2) AS avg_sls_for_company
FROM orders o
JOIN accounts a
ON a.id = o.account_id
GROUP BY 1
HAVING ROUND(AVG(o.total_amt_usd), 2) > (
-- average total_amt_usd per order for all orders
SELECT
ROUND(AVG(o1.total_amt_usd), 2) AS avg_ttl_per_order
FROM orders o1))
SELECT
AVG(t2.avg_sls_for_company)
FROM t2
LIMIT 10;
avg
4721.1402958579881657