practical-sql-2/Chapter_11/Chapter_11.sql

168 lines
4.5 KiB
SQL

--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 11 Code Examples
--------------------------------------------------------------
-- Listing 11-1: Create Census 2011-2015 ACS 5-Year stats table and import data
CREATE TABLE acs_2011_2015_stats (
geoid varchar(14) CONSTRAINT geoid_key PRIMARY KEY,
county varchar(50) NOT NULL,
st varchar(20) NOT NULL,
pct_travel_60_min numeric(5,3) NOT NULL,
pct_bachelors_higher numeric(5,3) NOT NULL,
pct_masters_higher numeric(5,3) NOT NULL,
median_hh_income integer,
CHECK (pct_masters_higher <= pct_bachelors_higher)
);
COPY acs_2011_2015_stats
FROM 'C:\YourDirectory\acs_2011_2015_stats.csv'
WITH (FORMAT CSV, HEADER, DELIMITER ',');
SELECT * FROM acs_2011_2015_stats;
-- Listing 11-2: Using corr(Y, X) to measure the relationship between
-- education and income
SELECT corr(median_hh_income, pct_bachelors_higher)
AS bachelors_income_r
FROM acs_2011_2015_stats;
-- Listing 11-3: Using corr(Y, X) on additional variables
SELECT
round(
corr(median_hh_income, pct_bachelors_higher)::numeric, 2
) AS bachelors_income_r,
round(
corr(pct_travel_60_min, median_hh_income)::numeric, 2
) AS income_travel_r,
round(
corr(pct_travel_60_min, pct_bachelors_higher)::numeric, 2
) AS bachelors_travel_r
FROM acs_2011_2015_stats;
-- Listing 11-4: Regression slope and intercept functions
SELECT
round(
regr_slope(median_hh_income, pct_bachelors_higher)::numeric, 2
) AS slope,
round(
regr_intercept(median_hh_income, pct_bachelors_higher)::numeric, 2
) AS y_intercept
FROM acs_2011_2015_stats;
-- Listing 11-5: Calculating the coefficient of determination, or r-squared
SELECT round(
regr_r2(median_hh_income, pct_bachelors_higher)::numeric, 3
) AS r_squared
FROM acs_2011_2015_stats;
-- Bonus: Additional stats functions.
-- Variance
SELECT var_pop(median_hh_income)
FROM acs_2011_2015_stats;
-- Standard deviation of the entire population
SELECT stddev_pop(median_hh_income)
FROM acs_2011_2015_stats;
-- Covariance
SELECT covar_pop(median_hh_income, pct_bachelors_higher)
FROM acs_2011_2015_stats;
-- Listing 11-6: The rank() and dense_rank() window functions
CREATE TABLE widget_companies (
id bigserial,
company varchar(30) NOT NULL,
widget_output integer NOT NULL
);
INSERT INTO widget_companies (company, widget_output)
VALUES
('Morse Widgets', 125000),
('Springfield Widget Masters', 143000),
('Best Widgets', 196000),
('Acme Inc.', 133000),
('District Widget Inc.', 201000),
('Clarke Amalgamated', 620000),
('Stavesacre Industries', 244000),
('Bowers Widget Emporium', 201000);
SELECT
company,
widget_output,
rank() OVER (ORDER BY widget_output DESC),
dense_rank() OVER (ORDER BY widget_output DESC)
FROM widget_companies;
-- Listing 11-7: Applying rank() within groups using PARTITION BY
CREATE TABLE store_sales (
store varchar(30),
category varchar(30) NOT NULL,
unit_sales bigint NOT NULL,
CONSTRAINT store_category_key PRIMARY KEY (store, category)
);
INSERT INTO store_sales (store, category, unit_sales)
VALUES
('Broders', 'Cereal', 1104),
('Wallace', 'Ice Cream', 1863),
('Broders', 'Ice Cream', 2517),
('Cramers', 'Ice Cream', 2112),
('Broders', 'Beer', 641),
('Cramers', 'Cereal', 1003),
('Cramers', 'Beer', 640),
('Wallace', 'Cereal', 980),
('Wallace', 'Beer', 988);
SELECT
category,
store,
unit_sales,
rank() OVER (PARTITION BY category ORDER BY unit_sales DESC)
FROM store_sales;
-- Listing 11-8: Create and fill a 2015 FBI crime data table
CREATE TABLE fbi_crime_data_2015 (
st varchar(20),
city varchar(50),
population integer,
violent_crime integer,
property_crime integer,
burglary integer,
larceny_theft integer,
motor_vehicle_theft integer,
CONSTRAINT st_city_key PRIMARY KEY (st, city)
);
COPY fbi_crime_data_2015
FROM 'C:\YourDirectory\fbi_crime_data_2015.csv'
WITH (FORMAT CSV, HEADER, DELIMITER ',');
SELECT * FROM fbi_crime_data_2015
ORDER BY population DESC;
-- Listing 11-9: Find property crime rates per thousand in cities with 500,000
-- or more people
SELECT
city,
st,
population,
property_crime,
round(
(property_crime::numeric / population) * 1000, 1
) AS pc_per_1000
FROM fbi_crime_data_2015
WHERE population >= 500000
ORDER BY (property_crime::numeric / population) DESC;