More renumbering

This commit is contained in:
Anthony DeBarros 2020-02-26 21:43:12 -05:00
parent 01fc09a421
commit 204d3cc55d
33 changed files with 18 additions and 3353 deletions

View File

@ -1,34 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 1 Code Examples
--------------------------------------------------------------
-- Listing 1-1: Creating a database named analysis
CREATE DATABASE analysis;
-- Listing 1-2: Creating a table named teachers with six columns
CREATE TABLE teachers (
id bigserial,
first_name varchar(25),
last_name varchar(50),
school varchar(50),
hire_date date,
salary numeric
);
-- This command will remove (drop) the table.
-- DROP TABLE teachers;
-- Listing 1-3 Inserting data into the teachers table
INSERT INTO teachers (first_name, last_name, school, hire_date, salary)
VALUES ('Janet', 'Smith', 'F.D. Roosevelt HS', '2011-10-30', 36200),
('Lee', 'Reynolds', 'F.D. Roosevelt HS', '1993-05-22', 65000),
('Samuel', 'Cole', 'Myers Middle School', '2005-08-01', 43500),
('Samantha', 'Bush', 'Myers Middle School', '2011-10-30', 36200),
('Betty', 'Diaz', 'Myers Middle School', '2005-08-30', 43500),
('Kathleen', 'Roush', 'F.D. Roosevelt HS', '2010-10-22', 38500);

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 1: Creating Your First Database and Table ### Chapter 2: Creating Your First Database and Table
Introduces PostgreSQL, the pgAdmin user interface, and the code for loading a simple data set about teachers into a new database. Introduces PostgreSQL, the pgAdmin user interface, and the code for loading a simple data set about teachers into a new database.

View File

@ -1,104 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 2 Code Examples
--------------------------------------------------------------
-- Listing 2-1: Querying all rows and columns from the teachers table
SELECT * FROM teachers;
-- Listing 2-2: Querying a subset of columns
SELECT last_name, first_name, salary FROM teachers;
-- Listing 2-3: Querying distinct values in the school column
SELECT DISTINCT school
FROM teachers;
-- Listing 2-4: Querying distinct pairs of values in the school and salary
-- columns
SELECT DISTINCT school, salary
FROM teachers;
-- Listing 2-5: Sorting a column with ORDER BY
SELECT first_name, last_name, salary
FROM teachers
ORDER BY salary DESC;
-- Listing 2-6: Sorting multiple columns with ORDER BY
SELECT last_name, school, hire_date
FROM teachers
ORDER BY school ASC, hire_date DESC;
-- Listing 2-7: Filtering rows using WHERE
SELECT last_name, school, hire_date
FROM teachers
WHERE school = 'Myers Middle School';
-- Examples of WHERE comparison operators
-- Teachers with first name of Janet
SELECT first_name, last_name, school
FROM teachers
WHERE first_name = 'Janet';
-- School names not equal to F.D. Roosevelt HS
SELECT school
FROM teachers
WHERE school != 'F.D. Roosevelt HS';
-- Teachers hired before Jan. 1, 2000
SELECT first_name, last_name, hire_date
FROM teachers
WHERE hire_date < '2000-01-01';
-- Teachers earning 43,500 or more
SELECT first_name, last_name, salary
FROM teachers
WHERE salary >= 43500;
-- Teachers who earn between $40,000 and $65,000
SELECT first_name, last_name, school, salary
FROM teachers
WHERE salary BETWEEN 40000 AND 65000;
-- Listing 2-8: Filtering with LIKE AND ILIKE
SELECT first_name
FROM teachers
WHERE first_name LIKE 'sam%';
SELECT first_name
FROM teachers
WHERE first_name ILIKE 'sam%';
-- Listing 2-9: Combining operators using AND and OR
SELECT *
FROM teachers
WHERE school = 'Myers Middle School'
AND salary < 40000;
SELECT *
FROM teachers
WHERE last_name = 'Cole'
OR last_name = 'Bush';
SELECT *
FROM teachers
WHERE school = 'F.D. Roosevelt HS'
AND (salary < 38000 OR salary > 40000);
-- Listing 2-10: A SELECT statement including WHERE and ORDER BY
SELECT first_name, last_name, school, hire_date, salary
FROM teachers
WHERE school LIKE '%Roos%'
ORDER BY hire_date DESC;

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 2: Beginning Data Exploration with `SELECT` ### Chapter 3: Beginning Data Exploration with `SELECT`
Explores basic SQL query syntax, including how to sort and filter data. Explores basic SQL query syntax, including how to sort and filter data.

View File

@ -1,90 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 3 Code Examples
--------------------------------------------------------------
-- Listing 3-1: Character data types in action
CREATE TABLE char_data_types (
varchar_column varchar(10),
char_column char(10),
text_column text
);
INSERT INTO char_data_types
VALUES
('abc', 'abc', 'abc'),
('defghi', 'defghi', 'defghi');
COPY char_data_types TO 'C:\YourDirectory\typetest.txt'
WITH (FORMAT CSV, HEADER, DELIMITER '|');
-- Listing 3-2: Number data types in action
CREATE TABLE number_data_types (
numeric_column numeric(20,5),
real_column real,
double_column double precision
);
INSERT INTO number_data_types
VALUES
(.7, .7, .7),
(2.13579, 2.13579, 2.13579),
(2.1357987654, 2.1357987654, 2.1357987654);
SELECT * FROM number_data_types;
-- Listing 3-3: Rounding issues with float columns
-- Assumes table created and loaded with Listing 3-2
SELECT
numeric_column * 10000000 AS "Fixed",
real_column * 10000000 AS "Float"
FROM number_data_types
WHERE numeric_column = .7;
-- Listing 3-4: Timestamp and interval types in action
CREATE TABLE date_time_types (
timestamp_column timestamp with time zone,
interval_column interval
);
INSERT INTO date_time_types
VALUES
('2018-12-31 01:00 EST','2 days'),
('2018-12-31 01:00 PST','1 month'),
('2018-12-31 01:00 Australia/Melbourne','1 century'),
(now(),'1 week');
SELECT * FROM date_time_types;
-- Listing 3-5: Using the interval data type
-- Assumes script 3-4 has been run
SELECT
timestamp_column,
interval_column,
timestamp_column - interval_column AS new_date
FROM date_time_types;
-- Listing 3-6: Three CAST() examples
SELECT timestamp_column, CAST(timestamp_column AS varchar(10))
FROM date_time_types;
SELECT numeric_column,
CAST(numeric_column AS integer),
CAST(numeric_column AS varchar(6))
FROM number_data_types;
-- Does not work:
SELECT CAST(char_column AS integer) FROM char_data_types;
-- Alternate notation for CAST is the double-colon:
SELECT timestamp_column::varchar(10)
FROM date_time_types;

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 3: Understanding Data Types ### Chapter 4: Understanding Data Types
Explains the definitions for setting columns in a table to hold specific types of data, from text to dates to various forms of numbers. Explains the definitions for setting columns in a table to hold specific types of data, from text to dates to various forms of numbers.

View File

@ -1,212 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 4 Code Examples
--------------------------------------------------------------
-- Listing 4-1: Using COPY for data import
-- This is example syntax only; running it will produce an error
COPY table_name
FROM 'C:\YourDirectory\your_file.csv'
WITH (FORMAT CSV, HEADER);
-- Listing 4-2: A CREATE TABLE statement for Census county data
-- Full data dictionary available at: http://www.census.gov/prod/cen2010/doc/pl94-171.pdf
-- Note: Some columns have been given more descriptive names
CREATE TABLE us_counties_2010 (
geo_name varchar(90), -- Name of the geography
state_us_abbreviation varchar(2), -- State/U.S. abbreviation
summary_level varchar(3), -- Summary Level
region smallint, -- Region
division smallint, -- Division
state_fips varchar(2), -- State FIPS code
county_fips varchar(3), -- County code
area_land bigint, -- Area (Land) in square meters
area_water bigint, -- Area (Water) in square meters
population_count_100_percent integer, -- Population count (100%)
housing_unit_count_100_percent integer, -- Housing Unit count (100%)
internal_point_lat numeric(10,7), -- Internal point (latitude)
internal_point_lon numeric(10,7), -- Internal point (longitude)
-- This section is referred to as P1. Race:
p0010001 integer, -- Total population
p0010002 integer, -- Population of one race:
p0010003 integer, -- White Alone
p0010004 integer, -- Black or African American alone
p0010005 integer, -- American Indian and Alaska Native alone
p0010006 integer, -- Asian alone
p0010007 integer, -- Native Hawaiian and Other Pacific Islander alone
p0010008 integer, -- Some Other Race alone
p0010009 integer, -- Population of two or more races
p0010010 integer, -- Population of two races:
p0010011 integer, -- White; Black or African American
p0010012 integer, -- White; American Indian and Alaska Native
p0010013 integer, -- White; Asian
p0010014 integer, -- White; Native Hawaiian and Other Pacific Islander
p0010015 integer, -- White; Some Other Race
p0010016 integer, -- Black or African American; American Indian and Alaska Native
p0010017 integer, -- Black or African American; Asian
p0010018 integer, -- Black or African American; Native Hawaiian and Other Pacific Islander
p0010019 integer, -- Black or African American; Some Other Race
p0010020 integer, -- American Indian and Alaska Native; Asian
p0010021 integer, -- American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander
p0010022 integer, -- American Indian and Alaska Native; Some Other Race
p0010023 integer, -- Asian; Native Hawaiian and Other Pacific Islander
p0010024 integer, -- Asian; Some Other Race
p0010025 integer, -- Native Hawaiian and Other Pacific Islander; Some Other Race
p0010026 integer, -- Population of three races
p0010047 integer, -- Population of four races
p0010063 integer, -- Population of five races
p0010070 integer, -- Population of six races
-- This section is referred to as P2. HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO BY RACE
p0020001 integer, -- Total
p0020002 integer, -- Hispanic or Latino
p0020003 integer, -- Not Hispanic or Latino:
p0020004 integer, -- Population of one race:
p0020005 integer, -- White Alone
p0020006 integer, -- Black or African American alone
p0020007 integer, -- American Indian and Alaska Native alone
p0020008 integer, -- Asian alone
p0020009 integer, -- Native Hawaiian and Other Pacific Islander alone
p0020010 integer, -- Some Other Race alone
p0020011 integer, -- Two or More Races
p0020012 integer, -- Population of two races
p0020028 integer, -- Population of three races
p0020049 integer, -- Population of four races
p0020065 integer, -- Population of five races
p0020072 integer, -- Population of six races
-- This section is referred to as P3. RACE FOR THE POPULATION 18 YEARS AND OVER
p0030001 integer, -- Total
p0030002 integer, -- Population of one race:
p0030003 integer, -- White alone
p0030004 integer, -- Black or African American alone
p0030005 integer, -- American Indian and Alaska Native alone
p0030006 integer, -- Asian alone
p0030007 integer, -- Native Hawaiian and Other Pacific Islander alone
p0030008 integer, -- Some Other Race alone
p0030009 integer, -- Two or More Races
p0030010 integer, -- Population of two races
p0030026 integer, -- Population of three races
p0030047 integer, -- Population of four races
p0030063 integer, -- Population of five races
p0030070 integer, -- Population of six races
-- This section is referred to as P4. HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO BY RACE
-- FOR THE POPULATION 18 YEARS AND OVER
p0040001 integer, -- Total
p0040002 integer, -- Hispanic or Latino
p0040003 integer, -- Not Hispanic or Latino:
p0040004 integer, -- Population of one race:
p0040005 integer, -- White alone
p0040006 integer, -- Black or African American alone
p0040007 integer, -- American Indian and Alaska Native alone
p0040008 integer, -- Asian alone
p0040009 integer, -- Native Hawaiian and Other Pacific Islander alone
p0040010 integer, -- Some Other Race alone
p0040011 integer, -- Two or More Races
p0040012 integer, -- Population of two races
p0040028 integer, -- Population of three races
p0040049 integer, -- Population of four races
p0040065 integer, -- Population of five races
p0040072 integer, -- Population of six races
-- This section is referred to as H1. OCCUPANCY STATUS
h0010001 integer, -- Total housing units
h0010002 integer, -- Occupied
h0010003 integer -- Vacant
);
SELECT * FROM us_counties_2010;
-- Listing 4-3: Importing Census data using COPY
-- Note! If you run into an import error here, be sure you downloaded the code and
-- data for the book according to the steps listed on page xxvii in the Introduction.
-- Windows users: Please check the Note on page xxvii as well.
COPY us_counties_2010
FROM 'C:\YourDirectory\us_counties_2010.csv'
WITH (FORMAT CSV, HEADER);
-- Checking the data
SELECT * FROM us_counties_2010;
SELECT geo_name, state_us_abbreviation, area_land
FROM us_counties_2010
ORDER BY area_land DESC
LIMIT 3;
SELECT geo_name, state_us_abbreviation, internal_point_lon
FROM us_counties_2010
ORDER BY internal_point_lon DESC
LIMIT 5;
-- Listing 4-4: Creating a table to track supervisor salaries
CREATE TABLE supervisor_salaries (
town varchar(30),
county varchar(30),
supervisor varchar(30),
start_date date,
salary money,
benefits money
);
-- Listing 4-5: Importing salaries data from CSV to three table columns
COPY supervisor_salaries (town, supervisor, salary)
FROM 'C:\YourDirectory\supervisor_salaries.csv'
WITH (FORMAT CSV, HEADER);
-- Check the data
SELECT * FROM supervisor_salaries LIMIT 2;
-- Listing 4-6 Use a temporary table to add a default value to a column during
-- import
DELETE FROM supervisor_salaries;
CREATE TEMPORARY TABLE supervisor_salaries_temp (LIKE supervisor_salaries);
COPY supervisor_salaries_temp (town, supervisor, salary)
FROM 'C:\YourDirectory\supervisor_salaries.csv'
WITH (FORMAT CSV, HEADER);
INSERT INTO supervisor_salaries (town, county, supervisor, salary)
SELECT town, 'Some County', supervisor, salary
FROM supervisor_salaries_temp;
DROP TABLE supervisor_salaries_temp;
-- Check the data
SELECT * FROM supervisor_salaries LIMIT 2;
-- Listing 4-7: Export an entire table with COPY
COPY us_counties_2010
TO 'C:\YourDirectory\us_counties_export.txt'
WITH (FORMAT CSV, HEADER, DELIMITER '|');
-- Listing 4-8: Exporting selected columns from a table with COPY
COPY us_counties_2010 (geo_name, internal_point_lat, internal_point_lon)
TO 'C:\YourDirectory\us_counties_latlon_export.txt'
WITH (FORMAT CSV, HEADER, DELIMITER '|');
-- Listing 4-9: Exporting query results with COPY
COPY (
SELECT geo_name, state_us_abbreviation
FROM us_counties_2010
WHERE geo_name ILIKE '%mill%'
)
TO 'C:\YourDirectory\us_counties_mill_export.txt'
WITH (FORMAT CSV, HEADER, DELIMITER '|');

View File

@ -2,6 +2,6 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 4: Importing and Exporting Data ### Chapter 5: Importing and Exporting Data
Explains how to use SQL commands to load data from external files and then export it. Youll load a table of U.S. Census population data that youll use throughout the book. Explains how to use SQL commands to load data from external files and then export it. Youll load a table of U.S. Census population data that youll use throughout the book.

View File

@ -1,207 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 5 Code Examples
--------------------------------------------------------------
-- Listing 5-1: Basic addition, subtraction and multiplication with SQL
SELECT 2 + 2; -- addition
SELECT 9 - 1; -- subtraction
SELECT 3 * 4; -- multiplication
-- Listing 5-2: Integer and decimal division with SQL
SELECT 11 / 6; -- integer division
SELECT 11 % 6; -- modulo division
SELECT 11.0 / 6; -- decimal division
SELECT CAST(11 AS numeric(3,1)) / 6;
-- Listing 5-3: Exponents, roots and factorials with SQL
SELECT 3 ^ 4; -- exponentiation
SELECT |/ 10; -- square root (operator)
SELECT sqrt(10); -- square root (function)
SELECT ||/ 10; -- cube root
SELECT 4 !; -- factorial
-- Order of operations
SELECT 7 + 8 * 9; -- answer: 79
SELECT (7 + 8) * 9; -- answer: 135
SELECT 3 ^ 3 - 1; -- answer: 26
SELECT 3 ^ (3 - 1); -- answer: 9
-- Listing 5-4: Selecting Census population columns by race with aliases
SELECT geo_name,
state_us_abbreviation AS "st",
p0010001 AS "Total Population",
p0010003 AS "White Alone",
p0010004 AS "Black or African American Alone",
p0010005 AS "Am Indian/Alaska Native Alone",
p0010006 AS "Asian Alone",
p0010007 AS "Native Hawaiian and Other Pacific Islander Alone",
p0010008 AS "Some Other Race Alone",
p0010009 AS "Two or More Races"
FROM us_counties_2010;
-- Listing 5-5: Adding two columns in us_counties_2010
SELECT geo_name,
state_us_abbreviation AS "st",
p0010003 AS "White Alone",
p0010004 AS "Black Alone",
p0010003 + p0010004 AS "Total White and Black"
FROM us_counties_2010;
-- Listing 5-6: Checking Census data totals
SELECT geo_name,
state_us_abbreviation AS "st",
p0010001 AS "Total",
p0010003 + p0010004 + p0010005 + p0010006 + p0010007
+ p0010008 + p0010009 AS "All Races",
(p0010003 + p0010004 + p0010005 + p0010006 + p0010007
+ p0010008 + p0010009) - p0010001 AS "Difference"
FROM us_counties_2010
ORDER BY "Difference" DESC;
-- Listing 5-7: Calculating the percent of the population that is
-- Asian by county (percent of the whole)
SELECT geo_name,
state_us_abbreviation AS "st",
(CAST(p0010006 AS numeric(8,1)) / p0010001) * 100 AS "pct_asian"
FROM us_counties_2010
ORDER BY "pct_asian" DESC;
-- Listing 5-8: Calculating percent change
CREATE TABLE percent_change (
department varchar(20),
spend_2014 numeric(10,2),
spend_2017 numeric(10,2)
);
INSERT INTO percent_change
VALUES
('Building', 250000, 289000),
('Assessor', 178556, 179500),
('Library', 87777, 90001),
('Clerk', 451980, 650000),
('Police', 250000, 223000),
('Recreation', 199000, 195000);
SELECT department,
spend_2014,
spend_2017,
round( (spend_2017 - spend_2014) /
spend_2014 * 100, 1 ) AS "pct_change"
FROM percent_change;
-- Listing 5-9: Using sum() and avg() aggregate functions
SELECT sum(p0010001) AS "County Sum",
round(avg(p0010001), 0) AS "County Average"
FROM us_counties_2010;
-- Listing 5-10: Testing SQL percentile functions
CREATE TABLE percentile_test (
numbers integer
);
INSERT INTO percentile_test (numbers) VALUES
(1), (2), (3), (4), (5), (6);
SELECT
percentile_cont(.5)
WITHIN GROUP (ORDER BY numbers),
percentile_disc(.5)
WITHIN GROUP (ORDER BY numbers)
FROM percentile_test;
-- Listing 5-11: Using sum(), avg(), and percentile_cont() aggregate functions
SELECT sum(p0010001) AS "County Sum",
round(avg(p0010001), 0) AS "County Average",
percentile_cont(.5)
WITHIN GROUP (ORDER BY p0010001) AS "County Median"
FROM us_counties_2010;
-- Listing 5-12: Passing an array of values to percentile_cont()
-- quartiles
SELECT percentile_cont(array[.25,.5,.75])
WITHIN GROUP (ORDER BY p0010001) AS "quartiles"
FROM us_counties_2010;
-- Extra:
-- quintiles
SELECT percentile_cont(array[.2,.4,.6,.8])
WITHIN GROUP (ORDER BY p0010001) AS "quintiles"
FROM us_counties_2010;
-- deciles
SELECT percentile_cont(array[.1,.2,.3,.4,.5,.6,.7,.8,.9])
WITHIN GROUP (ORDER BY p0010001) AS "deciles"
FROM us_counties_2010;
-- Listing 5-13: Using unnest() to turn an array into rows
SELECT unnest(
percentile_cont(array[.25,.5,.75])
WITHIN GROUP (ORDER BY p0010001)
) AS "quartiles"
FROM us_counties_2010;
-- Listing 5-14: Creating a median() aggregate function in PostgreSQL
-- Source: https://wiki.postgresql.org/wiki/Aggregate_Median
CREATE OR REPLACE FUNCTION _final_median(anyarray)
RETURNS float8 AS
$$
WITH q AS
(
SELECT val
FROM unnest($1) val
WHERE VAL IS NOT NULL
ORDER BY 1
),
cnt AS
(
SELECT COUNT(*) AS c FROM q
)
SELECT AVG(val)::float8
FROM
(
SELECT val FROM q
LIMIT 2 - MOD((SELECT c FROM cnt), 2)
OFFSET GREATEST(CEIL((SELECT c FROM cnt) / 2.0) - 1,0)
) q2;
$$
LANGUAGE sql IMMUTABLE;
CREATE AGGREGATE median(anyelement) (
SFUNC=array_append,
STYPE=anyarray,
FINALFUNC=_final_median,
INITCOND='{}'
);
-- Listing 5-15: Using a median() aggregate function
SELECT sum(p0010001) AS "County Sum",
round(avg(p0010001), 0) AS "County Average",
median(p0010001) AS "County Median",
percentile_cont(.5)
WITHIN GROUP (ORDER BY P0010001) AS "50th Percentile"
FROM us_counties_2010;
-- Listing 5-16: Finding the most-frequent value with mode()
SELECT mode() WITHIN GROUP (ORDER BY p0010001)
FROM us_counties_2010;

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 5: Basic Math and Stats with SQL ### Chapter 6: Basic Math and Stats with SQL
Covers arithmetic operations and introduces aggregate functions for finding sums, averages, and medians. Covers arithmetic operations and introduces aggregate functions for finding sums, averages, and medians.

View File

@ -1,196 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 6 Code Examples
--------------------------------------------------------------
-- Listing 6-1: Creating the departments and employees tables
CREATE TABLE departments (
dept_id bigserial,
dept varchar(100),
city varchar(100),
CONSTRAINT dept_key PRIMARY KEY (dept_id),
CONSTRAINT dept_city_unique UNIQUE (dept, city)
);
CREATE TABLE employees (
emp_id bigserial,
first_name varchar(100),
last_name varchar(100),
salary integer,
dept_id integer REFERENCES departments (dept_id),
CONSTRAINT emp_key PRIMARY KEY (emp_id),
CONSTRAINT emp_dept_unique UNIQUE (emp_id, dept_id)
);
INSERT INTO departments (dept, city)
VALUES
('Tax', 'Atlanta'),
('IT', 'Boston');
INSERT INTO employees (first_name, last_name, salary, dept_id)
VALUES
('Nancy', 'Jones', 62500, 1),
('Lee', 'Smith', 59300, 1),
('Soo', 'Nguyen', 83000, 2),
('Janet', 'King', 95000, 2);
-- Listing 6-2: Joining the employees and departments tables
SELECT *
FROM employees JOIN departments
ON employees.dept_id = departments.dept_id;
-- Listing 6-3: Creating two tables to explore JOIN types
CREATE TABLE schools_left (
id integer CONSTRAINT left_id_key PRIMARY KEY,
left_school varchar(30)
);
CREATE TABLE schools_right (
id integer CONSTRAINT right_id_key PRIMARY KEY,
right_school varchar(30)
);
INSERT INTO schools_left (id, left_school) VALUES
(1, 'Oak Street School'),
(2, 'Roosevelt High School'),
(5, 'Washington Middle School'),
(6, 'Jefferson High School');
INSERT INTO schools_right (id, right_school) VALUES
(1, 'Oak Street School'),
(2, 'Roosevelt High School'),
(3, 'Morrison Elementary'),
(4, 'Chase Magnet Academy'),
(6, 'Jefferson High School');
-- Listing 6-4: Using JOIN
SELECT *
FROM schools_left JOIN schools_right
ON schools_left.id = schools_right.id;
-- Bonus: Also can be specified as INNER JOIN
SELECT *
FROM schools_left INNER JOIN schools_right
ON schools_left.id = schools_right.id;
-- Listing 6-5: Using LEFT JOIN
SELECT *
FROM schools_left LEFT JOIN schools_right
ON schools_left.id = schools_right.id;
-- Listing 6-6: Using RIGHT JOIN
SELECT *
FROM schools_left RIGHT JOIN schools_right
ON schools_left.id = schools_right.id;
-- Listing 6-7: Using FULL OUTER JOIN
SELECT *
FROM schools_left FULL OUTER JOIN schools_right
ON schools_left.id = schools_right.id;
-- Listing 6-8: Using CROSS JOIN
SELECT *
FROM schools_left CROSS JOIN schools_right;
-- Listing 6-9: Filtering to show missing values with IS NULL
SELECT *
FROM schools_left LEFT JOIN schools_right
ON schools_left.id = schools_right.id
WHERE schools_right.id IS NULL;
-- Listing 6-10: Querying specific columns in a join
SELECT schools_left.id,
schools_left.left_school,
schools_right.right_school
FROM schools_left LEFT JOIN schools_right
ON schools_left.id = schools_right.id;
-- Listing 6-11: Simplifying code with table aliases
SELECT lt.id,
lt.left_school,
rt.right_school
FROM schools_left AS lt LEFT JOIN schools_right AS rt
ON lt.id = rt.id;
-- Listing 6-12: Joining multiple tables
CREATE TABLE schools_enrollment (
id integer,
enrollment integer
);
CREATE TABLE schools_grades (
id integer,
grades varchar(10)
);
INSERT INTO schools_enrollment (id, enrollment)
VALUES
(1, 360),
(2, 1001),
(5, 450),
(6, 927);
INSERT INTO schools_grades (id, grades)
VALUES
(1, 'K-3'),
(2, '9-12'),
(5, '6-8'),
(6, '9-12');
SELECT lt.id, lt.left_school, en.enrollment, gr.grades
FROM schools_left AS lt LEFT JOIN schools_enrollment AS en
ON lt.id = en.id
LEFT JOIN schools_grades AS gr
ON lt.id = gr.id;
-- Listing 6-13: Performing math on joined Census tables
-- Decennial Census 2000. Full data dictionary at https://www.census.gov/prod/cen2000/doc/pl94-171.pdf
-- Note: Some non-number columns have been given more descriptive names
CREATE TABLE us_counties_2000 (
geo_name varchar(90), -- County/state name,
state_us_abbreviation varchar(2), -- State/U.S. abbreviation
state_fips varchar(2), -- State FIPS code
county_fips varchar(3), -- County code
p0010001 integer, -- Total population
p0010002 integer, -- Population of one race:
p0010003 integer, -- White Alone
p0010004 integer, -- Black or African American alone
p0010005 integer, -- American Indian and Alaska Native alone
p0010006 integer, -- Asian alone
p0010007 integer, -- Native Hawaiian and Other Pacific Islander alone
p0010008 integer, -- Some Other Race alone
p0010009 integer, -- Population of two or more races
p0010010 integer, -- Population of two races
p0020002 integer, -- Hispanic or Latino
p0020003 integer -- Not Hispanic or Latino:
);
COPY us_counties_2000
FROM 'C:\YourDirectory\us_counties_2000.csv'
WITH (FORMAT CSV, HEADER);
SELECT c2010.geo_name,
c2010.state_us_abbreviation AS state,
c2010.p0010001 AS pop_2010,
c2000.p0010001 AS pop_2000,
c2010.p0010001 - c2000.p0010001 AS raw_change,
round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
/ c2000.p0010001 * 100, 1 ) AS pct_change
FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
ON c2010.state_fips = c2000.state_fips
AND c2010.county_fips = c2000.county_fips
AND c2010.p0010001 <> c2000.p0010001
ORDER BY pct_change DESC;

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 6: Joining Tables in a Relational Database ### Chapter 7: Joining Tables in a Relational Database
Explains how to query multiple, related tables by joining them on key columns. Youll learn how and when to use different types of joins. Explains how to query multiple, related tables by joining them on key columns. Youll learn how and when to use different types of joins.

View File

@ -1,184 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 7 Code Examples
--------------------------------------------------------------
-- Listing 7-1: Declaring a single-column natural key as primary key
-- As a column constraint
CREATE TABLE natural_key_example (
license_id varchar(10) CONSTRAINT license_key PRIMARY KEY,
first_name varchar(50),
last_name varchar(50)
);
-- Drop the table before trying again
DROP TABLE natural_key_example;
-- As a table constraint
CREATE TABLE natural_key_example (
license_id varchar(10),
first_name varchar(50),
last_name varchar(50),
CONSTRAINT license_key PRIMARY KEY (license_id)
);
-- Listing 7-2: Example of a primary key violation
INSERT INTO natural_key_example (license_id, first_name, last_name)
VALUES ('T229901', 'Lynn', 'Malero');
INSERT INTO natural_key_example (license_id, first_name, last_name)
VALUES ('T229901', 'Sam', 'Tracy');
-- Listing 7-3: Declaring a composite primary key as a natural key
CREATE TABLE natural_key_composite_example (
student_id varchar(10),
school_day date,
present boolean,
CONSTRAINT student_key PRIMARY KEY (student_id, school_day)
);
-- Listing 7-4: Example of a composite primary key violation
INSERT INTO natural_key_composite_example (student_id, school_day, present)
VALUES(775, '1/22/2017', 'Y');
INSERT INTO natural_key_composite_example (student_id, school_day, present)
VALUES(775, '1/23/2017', 'Y');
INSERT INTO natural_key_composite_example (student_id, school_day, present)
VALUES(775, '1/23/2017', 'N');
-- Listing 7-5: Declaring a bigserial column as a surrogate key
CREATE TABLE surrogate_key_example (
order_number bigserial,
product_name varchar(50),
order_date date,
CONSTRAINT order_key PRIMARY KEY (order_number)
);
INSERT INTO surrogate_key_example (product_name, order_date)
VALUES ('Beachball Polish', '2015-03-17'),
('Wrinkle De-Atomizer', '2017-05-22'),
('Flux Capacitor', '1985-10-26');
SELECT * FROM surrogate_key_example;
-- Listing 7-6: A foreign key example
CREATE TABLE licenses (
license_id varchar(10),
first_name varchar(50),
last_name varchar(50),
CONSTRAINT licenses_key PRIMARY KEY (license_id)
);
CREATE TABLE registrations (
registration_id varchar(10),
registration_date date,
license_id varchar(10) REFERENCES licenses (license_id),
CONSTRAINT registration_key PRIMARY KEY (registration_id, license_id)
);
INSERT INTO licenses (license_id, first_name, last_name)
VALUES ('T229901', 'Lynn', 'Malero');
INSERT INTO registrations (registration_id, registration_date, license_id)
VALUES ('A203391', '3/17/2017', 'T229901');
INSERT INTO registrations (registration_id, registration_date, license_id)
VALUES ('A75772', '3/17/2017', 'T000001');
-- Listing 7-7: CHECK constraint examples
CREATE TABLE check_constraint_example (
user_id bigserial,
user_role varchar(50),
salary integer,
CONSTRAINT user_id_key PRIMARY KEY (user_id),
CONSTRAINT check_role_in_list CHECK (user_role IN('Admin', 'Staff')),
CONSTRAINT check_salary_not_zero CHECK (salary > 0)
);
-- Both of these will fail:
INSERT INTO check_constraint_example (user_role)
VALUES ('admin');
INSERT INTO check_constraint_example (salary)
VALUES (0);
-- Listing 7-8: UNIQUE constraint example
CREATE TABLE unique_constraint_example (
contact_id bigserial CONSTRAINT contact_id_key PRIMARY KEY,
first_name varchar(50),
last_name varchar(50),
email varchar(200),
CONSTRAINT email_unique UNIQUE (email)
);
INSERT INTO unique_constraint_example (first_name, last_name, email)
VALUES ('Samantha', 'Lee', 'slee@example.org');
INSERT INTO unique_constraint_example (first_name, last_name, email)
VALUES ('Betty', 'Diaz', 'bdiaz@example.org');
INSERT INTO unique_constraint_example (first_name, last_name, email)
VALUES ('Sasha', 'Lee', 'slee@example.org');
-- Listing 7-9: NOT NULL constraint example
CREATE TABLE not_null_example (
student_id bigserial,
first_name varchar(50) NOT NULL,
last_name varchar(50) NOT NULL,
CONSTRAINT student_id_key PRIMARY KEY (student_id)
);
-- Listing 7-10: Dropping and adding a primary key and a NOT NULL constraint
-- Drop
ALTER TABLE not_null_example DROP CONSTRAINT student_id_key;
-- Add
ALTER TABLE not_null_example ADD CONSTRAINT student_id_key PRIMARY KEY (student_id);
-- Drop
ALTER TABLE not_null_example ALTER COLUMN first_name DROP NOT NULL;
-- Add
ALTER TABLE not_null_example ALTER COLUMN first_name SET NOT NULL;
-- Listing 7-11: Importing New York City address data
CREATE TABLE new_york_addresses (
longitude numeric(9,6),
latitude numeric(9,6),
street_number varchar(10),
street varchar(32),
unit varchar(7),
postcode varchar(5),
id integer CONSTRAINT new_york_key PRIMARY KEY
);
COPY new_york_addresses
FROM 'C:\YourDirectory\city_of_new_york.csv'
WITH (FORMAT CSV, HEADER);
-- Listing 7-12: Benchmark queries for index performance
EXPLAIN ANALYZE SELECT * FROM new_york_addresses
WHERE street = 'BROADWAY';
EXPLAIN ANALYZE SELECT * FROM new_york_addresses
WHERE street = '52 STREET';
EXPLAIN ANALYZE SELECT * FROM new_york_addresses
WHERE street = 'ZWICKY AVENUE';
-- Listing 7-13: Creating a B-Tree index on the new_york_addresses table
CREATE INDEX street_idx ON new_york_addresses (street);

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 7: Table Design that Works for You ### Chapter 8: Table Design that Works for You
Covers how to set up tables to improve the organization and integrity of your data as well as how to speed up queries using indexes. Covers how to set up tables to improve the organization and integrity of your data as well as how to speed up queries using indexes.

View File

@ -1,299 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 8 Code Examples
--------------------------------------------------------------
-- Listing 8-1: Creating and filling the 2014 Public Libraries Survey table
CREATE TABLE pls_fy2014_pupld14a (
stabr varchar(2) NOT NULL,
fscskey varchar(6) CONSTRAINT fscskey2014_key PRIMARY KEY,
libid varchar(20) NOT NULL,
libname varchar(100) NOT NULL,
obereg varchar(2) NOT NULL,
rstatus integer NOT NULL,
statstru varchar(2) NOT NULL,
statname varchar(2) NOT NULL,
stataddr varchar(2) NOT NULL,
longitud numeric(10,7) NOT NULL,
latitude numeric(10,7) NOT NULL,
fipsst varchar(2) NOT NULL,
fipsco varchar(3) NOT NULL,
address varchar(35) NOT NULL,
city varchar(20) NOT NULL,
zip varchar(5) NOT NULL,
zip4 varchar(4) NOT NULL,
cnty varchar(20) NOT NULL,
phone varchar(10) NOT NULL,
c_relatn varchar(2) NOT NULL,
c_legbas varchar(2) NOT NULL,
c_admin varchar(2) NOT NULL,
geocode varchar(3) NOT NULL,
lsabound varchar(1) NOT NULL,
startdat varchar(10),
enddate varchar(10),
popu_lsa integer NOT NULL,
centlib integer NOT NULL,
branlib integer NOT NULL,
bkmob integer NOT NULL,
master numeric(8,2) NOT NULL,
libraria numeric(8,2) NOT NULL,
totstaff numeric(8,2) NOT NULL,
locgvt integer NOT NULL,
stgvt integer NOT NULL,
fedgvt integer NOT NULL,
totincm integer NOT NULL,
salaries integer,
benefit integer,
staffexp integer,
prmatexp integer NOT NULL,
elmatexp integer NOT NULL,
totexpco integer NOT NULL,
totopexp integer NOT NULL,
lcap_rev integer NOT NULL,
scap_rev integer NOT NULL,
fcap_rev integer NOT NULL,
cap_rev integer NOT NULL,
capital integer NOT NULL,
bkvol integer NOT NULL,
ebook integer NOT NULL,
audio_ph integer NOT NULL,
audio_dl float NOT NULL,
video_ph integer NOT NULL,
video_dl float NOT NULL,
databases integer NOT NULL,
subscrip integer NOT NULL,
hrs_open integer NOT NULL,
visits integer NOT NULL,
referenc integer NOT NULL,
regbor integer NOT NULL,
totcir integer NOT NULL,
kidcircl integer NOT NULL,
elmatcir integer NOT NULL,
loanto integer NOT NULL,
loanfm integer NOT NULL,
totpro integer NOT NULL,
totatten integer NOT NULL,
gpterms integer NOT NULL,
pitusr integer NOT NULL,
wifisess integer NOT NULL,
yr_sub integer NOT NULL
);
CREATE INDEX libname2014_idx ON pls_fy2014_pupld14a (libname);
CREATE INDEX stabr2014_idx ON pls_fy2014_pupld14a (stabr);
CREATE INDEX city2014_idx ON pls_fy2014_pupld14a (city);
CREATE INDEX visits2014_idx ON pls_fy2014_pupld14a (visits);
COPY pls_fy2014_pupld14a
FROM 'C:\YourDirectory\pls_fy2014_pupld14a.csv'
WITH (FORMAT CSV, HEADER);
-- Listing 8-2: Creating and filling the 2009 Public Libraries Survey table
CREATE TABLE pls_fy2009_pupld09a (
stabr varchar(2) NOT NULL,
fscskey varchar(6) CONSTRAINT fscskey2009_key PRIMARY KEY,
libid varchar(20) NOT NULL,
libname varchar(100) NOT NULL,
address varchar(35) NOT NULL,
city varchar(20) NOT NULL,
zip varchar(5) NOT NULL,
zip4 varchar(4) NOT NULL,
cnty varchar(20) NOT NULL,
phone varchar(10) NOT NULL,
c_relatn varchar(2) NOT NULL,
c_legbas varchar(2) NOT NULL,
c_admin varchar(2) NOT NULL,
geocode varchar(3) NOT NULL,
lsabound varchar(1) NOT NULL,
startdat varchar(10),
enddate varchar(10),
popu_lsa integer NOT NULL,
centlib integer NOT NULL,
branlib integer NOT NULL,
bkmob integer NOT NULL,
master numeric(8,2) NOT NULL,
libraria numeric(8,2) NOT NULL,
totstaff numeric(8,2) NOT NULL,
locgvt integer NOT NULL,
stgvt integer NOT NULL,
fedgvt integer NOT NULL,
totincm integer NOT NULL,
salaries integer,
benefit integer,
staffexp integer,
prmatexp integer NOT NULL,
elmatexp integer NOT NULL,
totexpco integer NOT NULL,
totopexp integer NOT NULL,
lcap_rev integer NOT NULL,
scap_rev integer NOT NULL,
fcap_rev integer NOT NULL,
cap_rev integer NOT NULL,
capital integer NOT NULL,
bkvol integer NOT NULL,
ebook integer NOT NULL,
audio integer NOT NULL,
video integer NOT NULL,
databases integer NOT NULL,
subscrip integer NOT NULL,
hrs_open integer NOT NULL,
visits integer NOT NULL,
referenc integer NOT NULL,
regbor integer NOT NULL,
totcir integer NOT NULL,
kidcircl integer NOT NULL,
loanto integer NOT NULL,
loanfm integer NOT NULL,
totpro integer NOT NULL,
totatten integer NOT NULL,
gpterms integer NOT NULL,
pitusr integer NOT NULL,
yr_sub integer NOT NULL,
obereg varchar(2) NOT NULL,
rstatus integer NOT NULL,
statstru varchar(2) NOT NULL,
statname varchar(2) NOT NULL,
stataddr varchar(2) NOT NULL,
longitud numeric(10,7) NOT NULL,
latitude numeric(10,7) NOT NULL,
fipsst varchar(2) NOT NULL,
fipsco varchar(3) NOT NULL
);
CREATE INDEX libname2009_idx ON pls_fy2009_pupld09a (libname);
CREATE INDEX stabr2009_idx ON pls_fy2009_pupld09a (stabr);
CREATE INDEX city2009_idx ON pls_fy2009_pupld09a (city);
CREATE INDEX visits2009_idx ON pls_fy2009_pupld09a (visits);
COPY pls_fy2009_pupld09a
FROM 'C:\YourDirectory\pls_fy2009_pupld09a.csv'
WITH (FORMAT CSV, HEADER);
-- Listing 8-3: Using count() for table row counts
SELECT count(*)
FROM pls_fy2014_pupld14a;
SELECT count(*)
FROM pls_fy2009_pupld09a;
-- Listing 8-4: Using count() for the number of values in a column
SELECT count(salaries)
FROM pls_fy2014_pupld14a;
-- Listing 8-5: Using count() for the number of distinct values in a column
SELECT count(libname)
FROM pls_fy2014_pupld14a;
SELECT count(DISTINCT libname)
FROM pls_fy2014_pupld14a;
-- Bonus: find duplicate libnames
SELECT libname, count(libname)
FROM pls_fy2014_pupld14a
GROUP BY libname
ORDER BY count(libname) DESC;
-- Bonus: see location of every Oxford Public Library
SELECT libname, city, stabr
FROM pls_fy2014_pupld14a
WHERE libname = 'OXFORD PUBLIC LIBRARY';
-- Listing 8-6: Finding the most and fewest visits using max() and min()
SELECT max(visits), min(visits)
FROM pls_fy2014_pupld14a;
-- Listing 8-7: Using GROUP BY on the stabr column
-- There are 56 in 2014.
SELECT stabr
FROM pls_fy2014_pupld14a
GROUP BY stabr
ORDER BY stabr;
-- Bonus: there are 55 in 2009.
SELECT stabr
FROM pls_fy2009_pupld09a
GROUP BY stabr
ORDER BY stabr;
-- Listing 8-8: Using GROUP BY on the city and stabr columns
SELECT city, stabr
FROM pls_fy2014_pupld14a
GROUP BY city, stabr
ORDER BY city, stabr;
-- Bonus: We can count some of the combos
SELECT city, stabr, count(*)
FROM pls_fy2014_pupld14a
GROUP BY city, stabr
ORDER BY count(*) DESC;
-- Listing 8-9: GROUP BY with count() on the stabr column
SELECT stabr, count(*)
FROM pls_fy2014_pupld14a
GROUP BY stabr
ORDER BY count(*) DESC;
-- Listing 8-10: GROUP BY with count() on the stabr and stataddr columns
SELECT stabr, stataddr, count(*)
FROM pls_fy2014_pupld14a
GROUP BY stabr, stataddr
ORDER BY stabr ASC, count(*) DESC;
-- Listing 8-11: Using the sum() aggregate function to total visits to
-- libraries in 2014 and 2009
-- 2014
SELECT sum(visits) AS visits_2014
FROM pls_fy2014_pupld14a
WHERE visits >= 0;
-- 2009
SELECT sum(visits) AS visits_2009
FROM pls_fy2009_pupld09a
WHERE visits >= 0;
-- Listing 8-12: Using sum() to total visits on joined 2014 and 2009 library tables
SELECT sum(pls14.visits) AS visits_2014,
sum(pls09.visits) AS visits_2009
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
ON pls14.fscskey = pls09.fscskey
WHERE pls14.visits >= 0 AND pls09.visits >= 0;
-- Listing 8-13: Using GROUP BY to track percent change in library visits by state
SELECT pls14.stabr,
sum(pls14.visits) AS visits_2014,
sum(pls09.visits) AS visits_2009,
round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
sum(pls09.visits) * 100, 2 ) AS pct_change
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
ON pls14.fscskey = pls09.fscskey
WHERE pls14.visits >= 0 AND pls09.visits >= 0
GROUP BY pls14.stabr
ORDER BY pct_change DESC;
-- Listing 8-14: Using HAVING to filter the results of an aggregate query
SELECT pls14.stabr,
sum(pls14.visits) AS visits_2014,
sum(pls09.visits) AS visits_2009,
round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
sum(pls09.visits) * 100, 2 ) AS pct_change
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
ON pls14.fscskey = pls09.fscskey
WHERE pls14.visits >= 0 AND pls09.visits >= 0
GROUP BY pls14.stabr
HAVING sum(pls14.visits) > 50000000
ORDER BY pct_change DESC;

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 8: Extracting Information by Grouping and Summarizing ### Chapter 9: Extracting Information by Grouping and Summarizing
Explains how to use aggregate functions to find trends in U.S. library use based on annual surveys. Explains how to use aggregate functions to find trends in U.S. library use based on annual surveys.

View File

@ -1,259 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 9 Code Examples
--------------------------------------------------------------
-- Listing 9-1: Importing the FSIS Meat, Poultry, and Egg Inspection Directory
-- https://catalog.data.gov/dataset/meat-poultry-and-egg-inspection-directory-by-establishment-name
CREATE TABLE meat_poultry_egg_inspect (
est_number varchar(50) CONSTRAINT est_number_key PRIMARY KEY,
company varchar(100),
street varchar(100),
city varchar(30),
st varchar(2),
zip varchar(5),
phone varchar(14),
grant_date date,
activities text,
dbas text
);
COPY meat_poultry_egg_inspect
FROM 'C:\YourDirectory\MPI_Directory_by_Establishment_Name.csv'
WITH (FORMAT CSV, HEADER, DELIMITER ',');
CREATE INDEX company_idx ON meat_poultry_egg_inspect (company);
-- Count the rows imported:
SELECT count(*) FROM meat_poultry_egg_inspect;
-- Listing 9-2: Finding multiple companies at the same address
SELECT company,
street,
city,
st,
count(*) AS address_count
FROM meat_poultry_egg_inspect
GROUP BY company, street, city, st
HAVING count(*) > 1
ORDER BY company, street, city, st;
-- Listing 9-3: Grouping and counting states
SELECT st,
count(*) AS st_count
FROM meat_poultry_egg_inspect
GROUP BY st
ORDER BY st;
-- Listing 9-4: Using IS NULL to find missing values in the st column
SELECT est_number,
company,
city,
st,
zip
FROM meat_poultry_egg_inspect
WHERE st IS NULL;
-- Listing 9-5: Using GROUP BY and count() to find inconsistent company names
SELECT company,
count(*) AS company_count
FROM meat_poultry_egg_inspect
GROUP BY company
ORDER BY company ASC;
-- Listing 9-6: Using length() and count() to test the zip column
SELECT length(zip),
count(*) AS length_count
FROM meat_poultry_egg_inspect
GROUP BY length(zip)
ORDER BY length(zip) ASC;
-- Listing 9-7: Filtering with length() to find short zip values
SELECT st,
count(*) AS st_count
FROM meat_poultry_egg_inspect
WHERE length(zip) < 5
GROUP BY st
ORDER BY st ASC;
-- Listing 9-8: Backing up a table
CREATE TABLE meat_poultry_egg_inspect_backup AS
SELECT * FROM meat_poultry_egg_inspect;
-- Check number of records:
SELECT
(SELECT count(*) FROM meat_poultry_egg_inspect) AS original,
(SELECT count(*) FROM meat_poultry_egg_inspect_backup) AS backup;
-- Listing 9-9: Creating and filling the st_copy column with ALTER TABLE and UPDATE
ALTER TABLE meat_poultry_egg_inspect ADD COLUMN st_copy varchar(2);
UPDATE meat_poultry_egg_inspect
SET st_copy = st;
-- Listing 9-10: Checking values in the st and st_copy columns
SELECT st,
st_copy
FROM meat_poultry_egg_inspect
ORDER BY st;
-- Listing 9-11: Updating the st column for three establishments
UPDATE meat_poultry_egg_inspect
SET st = 'MN'
WHERE est_number = 'V18677A';
UPDATE meat_poultry_egg_inspect
SET st = 'AL'
WHERE est_number = 'M45319+P45319';
UPDATE meat_poultry_egg_inspect
SET st = 'WI'
WHERE est_number = 'M263A+P263A+V263A';
-- Listing 9-12: Restoring original st column values
-- Restoring from the column backup
UPDATE meat_poultry_egg_inspect
SET st = st_copy;
-- Restoring from the table backup
UPDATE meat_poultry_egg_inspect original
SET st = backup.st
FROM meat_poultry_egg_inspect_backup backup
WHERE original.est_number = backup.est_number;
-- Listing 9-13: Creating and filling the company_standard column
ALTER TABLE meat_poultry_egg_inspect ADD COLUMN company_standard varchar(100);
UPDATE meat_poultry_egg_inspect
SET company_standard = company;
-- Listing 9-14: Use UPDATE to modify field values that match a string
UPDATE meat_poultry_egg_inspect
SET company_standard = 'Armour-Eckrich Meats'
WHERE company LIKE 'Armour%';
SELECT company, company_standard
FROM meat_poultry_egg_inspect
WHERE company LIKE 'Armour%';
-- Listing 9-15: Creating and filling the zip_copy column
ALTER TABLE meat_poultry_egg_inspect ADD COLUMN zip_copy varchar(5);
UPDATE meat_poultry_egg_inspect
SET zip_copy = zip;
-- Listing 9-16: Modify codes in the zip column missing two leading zeros
UPDATE meat_poultry_egg_inspect
SET zip = '00' || zip
WHERE st IN('PR','VI') AND length(zip) = 3;
-- Listing 9-17: Modify codes in the zip column missing one leading zero
UPDATE meat_poultry_egg_inspect
SET zip = '0' || zip
WHERE st IN('CT','MA','ME','NH','NJ','RI','VT') AND length(zip) = 4;
-- Listing 9-18: Creating and filling a state_regions table
CREATE TABLE state_regions (
st varchar(2) CONSTRAINT st_key PRIMARY KEY,
region varchar(20) NOT NULL
);
COPY state_regions
FROM 'C:\YourDirectory\state_regions.csv'
WITH (FORMAT CSV, HEADER, DELIMITER ',');
-- Listing 9-19: Adding and updating an inspection_date column
ALTER TABLE meat_poultry_egg_inspect ADD COLUMN inspection_date date;
UPDATE meat_poultry_egg_inspect inspect
SET inspection_date = '2019-12-01'
WHERE EXISTS (SELECT state_regions.region
FROM state_regions
WHERE inspect.st = state_regions.st
AND state_regions.region = 'New England');
-- Listing 9-20: Viewing updated inspection_date values
SELECT st, inspection_date
FROM meat_poultry_egg_inspect
GROUP BY st, inspection_date
ORDER BY st;
-- Listing 9-21: Delete rows matching an expression
DELETE FROM meat_poultry_egg_inspect
WHERE st IN('PR','VI');
-- Listing 9-22: Remove a column from a table using DROP
ALTER TABLE meat_poultry_egg_inspect DROP COLUMN zip_copy;
-- Listing 9-23: Remove a table from a database using DROP
DROP TABLE meat_poultry_egg_inspect_backup;
-- Listing 9-24: Demonstrating a transaction block
-- Start transaction and perform update
START TRANSACTION;
UPDATE meat_poultry_egg_inspect
SET company = 'AGRO Merchantss Oakland LLC'
WHERE company = 'AGRO Merchants Oakland, LLC';
-- view changes
SELECT company
FROM meat_poultry_egg_inspect
WHERE company LIKE 'AGRO%'
ORDER BY company;
-- Revert changes
ROLLBACK;
-- See restored state
SELECT company
FROM meat_poultry_egg_inspect
WHERE company LIKE 'AGRO%'
ORDER BY company;
-- Alternately, commit changes at the end:
START TRANSACTION;
UPDATE meat_poultry_egg_inspect
SET company = 'AGRO Merchants Oakland LLC'
WHERE company = 'AGRO Merchants Oakland, LLC';
COMMIT;
-- Listing 9-25: Backing up a table while adding and filling a new column
CREATE TABLE meat_poultry_egg_inspect_backup AS
SELECT *,
'2018-02-07'::date AS reviewed_date
FROM meat_poultry_egg_inspect;
-- Listing 9-26: Swapping table names using ALTER TABLE
ALTER TABLE meat_poultry_egg_inspect RENAME TO meat_poultry_egg_inspect_temp;
ALTER TABLE meat_poultry_egg_inspect_backup RENAME TO meat_poultry_egg_inspect;
ALTER TABLE meat_poultry_egg_inspect_temp RENAME TO meat_poultry_egg_inspect_backup;

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 9: Inspecting and Modifying Data ### Chapter 10: Inspecting and Modifying Data
Explores how to nd and x incomplete or inaccurate data using a collection of records about meat, egg, and poultry producers as an example. Explores how to find and remedy incomplete or inaccurate data using a collection of records about meat, egg, and poultry producers as an example.

View File

@ -1,167 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 10 Code Examples
--------------------------------------------------------------
-- Listing 10-1: Create Census 2011-2015 ACS 5-Year stats table and import data
CREATE TABLE acs_2011_2015_stats (
geoid varchar(14) CONSTRAINT geoid_key PRIMARY KEY,
county varchar(50) NOT NULL,
st varchar(20) NOT NULL,
pct_travel_60_min numeric(5,3) NOT NULL,
pct_bachelors_higher numeric(5,3) NOT NULL,
pct_masters_higher numeric(5,3) NOT NULL,
median_hh_income integer,
CHECK (pct_masters_higher <= pct_bachelors_higher)
);
COPY acs_2011_2015_stats
FROM 'C:\YourDirectory\acs_2011_2015_stats.csv'
WITH (FORMAT CSV, HEADER, DELIMITER ',');
SELECT * FROM acs_2011_2015_stats;
-- Listing 10-2: Using corr(Y, X) to measure the relationship between
-- education and income
SELECT corr(median_hh_income, pct_bachelors_higher)
AS bachelors_income_r
FROM acs_2011_2015_stats;
-- Listing 10-3: Using corr(Y, X) on additional variables
SELECT
round(
corr(median_hh_income, pct_bachelors_higher)::numeric, 2
) AS bachelors_income_r,
round(
corr(pct_travel_60_min, median_hh_income)::numeric, 2
) AS income_travel_r,
round(
corr(pct_travel_60_min, pct_bachelors_higher)::numeric, 2
) AS bachelors_travel_r
FROM acs_2011_2015_stats;
-- Listing 10-4: Regression slope and intercept functions
SELECT
round(
regr_slope(median_hh_income, pct_bachelors_higher)::numeric, 2
) AS slope,
round(
regr_intercept(median_hh_income, pct_bachelors_higher)::numeric, 2
) AS y_intercept
FROM acs_2011_2015_stats;
-- Listing 10-5: Calculating the coefficient of determination, or r-squared
SELECT round(
regr_r2(median_hh_income, pct_bachelors_higher)::numeric, 3
) AS r_squared
FROM acs_2011_2015_stats;
-- Bonus: Additional stats functions.
-- Variance
SELECT var_pop(median_hh_income)
FROM acs_2011_2015_stats;
-- Standard deviation of the entire population
SELECT stddev_pop(median_hh_income)
FROM acs_2011_2015_stats;
-- Covariance
SELECT covar_pop(median_hh_income, pct_bachelors_higher)
FROM acs_2011_2015_stats;
-- Listing 10-6: The rank() and dense_rank() window functions
CREATE TABLE widget_companies (
id bigserial,
company varchar(30) NOT NULL,
widget_output integer NOT NULL
);
INSERT INTO widget_companies (company, widget_output)
VALUES
('Morse Widgets', 125000),
('Springfield Widget Masters', 143000),
('Best Widgets', 196000),
('Acme Inc.', 133000),
('District Widget Inc.', 201000),
('Clarke Amalgamated', 620000),
('Stavesacre Industries', 244000),
('Bowers Widget Emporium', 201000);
SELECT
company,
widget_output,
rank() OVER (ORDER BY widget_output DESC),
dense_rank() OVER (ORDER BY widget_output DESC)
FROM widget_companies;
-- Listing 10-7: Applying rank() within groups using PARTITION BY
CREATE TABLE store_sales (
store varchar(30),
category varchar(30) NOT NULL,
unit_sales bigint NOT NULL,
CONSTRAINT store_category_key PRIMARY KEY (store, category)
);
INSERT INTO store_sales (store, category, unit_sales)
VALUES
('Broders', 'Cereal', 1104),
('Wallace', 'Ice Cream', 1863),
('Broders', 'Ice Cream', 2517),
('Cramers', 'Ice Cream', 2112),
('Broders', 'Beer', 641),
('Cramers', 'Cereal', 1003),
('Cramers', 'Beer', 640),
('Wallace', 'Cereal', 980),
('Wallace', 'Beer', 988);
SELECT
category,
store,
unit_sales,
rank() OVER (PARTITION BY category ORDER BY unit_sales DESC)
FROM store_sales;
-- Listing 10-8: Create and fill a 2015 FBI crime data table
CREATE TABLE fbi_crime_data_2015 (
st varchar(20),
city varchar(50),
population integer,
violent_crime integer,
property_crime integer,
burglary integer,
larceny_theft integer,
motor_vehicle_theft integer,
CONSTRAINT st_city_key PRIMARY KEY (st, city)
);
COPY fbi_crime_data_2015
FROM 'C:\YourDirectory\fbi_crime_data_2015.csv'
WITH (FORMAT CSV, HEADER, DELIMITER ',');
SELECT * FROM fbi_crime_data_2015
ORDER BY population DESC;
-- Listing 10-9: Find property crime rates per thousand in cities with 500,000
-- or more people
SELECT
city,
st,
population,
property_crime,
round(
(property_crime::numeric / population) * 1000, 1
) AS pc_per_1000
FROM fbi_crime_data_2015
WHERE population >= 500000
ORDER BY (property_crime::numeric / population) DESC;

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 10: Statistical Functions in SQL ### Chapter 11: Statistical Functions in SQL
Introduces correlation, regression, and ranking functions in SQL to help you derive more meaning from data sets. Introduces correlation, regression, and ranking functions in SQL to help you derive more meaning from data sets.

View File

@ -1,233 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 11 Code Examples
--------------------------------------------------------------
-- Listing 11-1: Extracting components of a timestamp value using date_part()
SELECT
date_part('year', '2019-12-01 18:37:12 EST'::timestamptz) AS "year",
date_part('month', '2019-12-01 18:37:12 EST'::timestamptz) AS "month",
date_part('day', '2019-12-01 18:37:12 EST'::timestamptz) AS "day",
date_part('hour', '2019-12-01 18:37:12 EST'::timestamptz) AS "hour",
date_part('minute', '2019-12-01 18:37:12 EST'::timestamptz) AS "minute",
date_part('seconds', '2019-12-01 18:37:12 EST'::timestamptz) AS "seconds",
date_part('timezone_hour', '2019-12-01 18:37:12 EST'::timestamptz) AS "tz",
date_part('week', '2019-12-01 18:37:12 EST'::timestamptz) AS "week",
date_part('quarter', '2019-12-01 18:37:12 EST'::timestamptz) AS "quarter",
date_part('epoch', '2019-12-01 18:37:12 EST'::timestamptz) AS "epoch";
-- Bonus: Using the SQL-standard extract() for similar datetime parsing:
SELECT extract('year' from '2019-12-01 18:37:12 EST'::timestamptz) AS "year";
-- Listing 11-2: Three functions for making datetimes from components
-- make a date
SELECT make_date(2018, 2, 22);
-- make a time
SELECT make_time(18, 4, 30.3);
-- make a timestamp with time zone
SELECT make_timestamptz(2018, 2, 22, 18, 4, 30.3, 'Europe/Lisbon');
-- Bonus: Retrieving the current date and time
SELECT
current_date,
current_time,
current_timestamp,
localtime,
localtimestamp,
now();
-- Listing 11-3: Comparing current_timestamp and clock_timestamp() during row insert
CREATE TABLE current_time_example (
time_id bigserial,
current_timestamp_col timestamp with time zone,
clock_timestamp_col timestamp with time zone
);
INSERT INTO current_time_example (current_timestamp_col, clock_timestamp_col)
(SELECT current_timestamp,
clock_timestamp()
FROM generate_series(1,1000));
SELECT * FROM current_time_example;
-- Time Zones
-- Listing 11-4: Showing your PostgreSQL server's default time zone
SHOW timezone; -- Note: You can see all run-time defaults with SHOW ALL;
-- Listing 11-5: Showing time zone abbreviations and names
SELECT * FROM pg_timezone_abbrevs;
SELECT * FROM pg_timezone_names;
-- Filter to find one
SELECT * FROM pg_timezone_names
WHERE name LIKE 'Europe%';
-- Listing 11-6: Setting the time zone for a client session
SET timezone TO 'US/Pacific';
CREATE TABLE time_zone_test (
test_date timestamp with time zone
);
INSERT INTO time_zone_test VALUES ('2020-01-01 4:00');
SELECT test_date
FROM time_zone_test;
SET timezone TO 'US/Eastern';
SELECT test_date
FROM time_zone_test;
SELECT test_date AT TIME ZONE 'Asia/Seoul'
FROM time_zone_test;
-- Math with dates!
SELECT '9/30/1929'::date - '9/27/1929'::date;
SELECT '9/30/1929'::date + '5 years'::interval;
-- Taxi Rides
-- Listing 11-7: Creating a table and importing NYC yellow taxi data
CREATE TABLE nyc_yellow_taxi_trips_2016_06_01 (
trip_id bigserial PRIMARY KEY,
vendor_id varchar(1) NOT NULL,
tpep_pickup_datetime timestamp with time zone NOT NULL,
tpep_dropoff_datetime timestamp with time zone NOT NULL,
passenger_count integer NOT NULL,
trip_distance numeric(8,2) NOT NULL,
pickup_longitude numeric(18,15) NOT NULL,
pickup_latitude numeric(18,15) NOT NULL,
rate_code_id varchar(2) NOT NULL,
store_and_fwd_flag varchar(1) NOT NULL,
dropoff_longitude numeric(18,15) NOT NULL,
dropoff_latitude numeric(18,15) NOT NULL,
payment_type varchar(1) NOT NULL,
fare_amount numeric(9,2) NOT NULL,
extra numeric(9,2) NOT NULL,
mta_tax numeric(5,2) NOT NULL,
tip_amount numeric(9,2) NOT NULL,
tolls_amount numeric(9,2) NOT NULL,
improvement_surcharge numeric(9,2) NOT NULL,
total_amount numeric(9,2) NOT NULL
);
COPY nyc_yellow_taxi_trips_2016_06_01 (
vendor_id,
tpep_pickup_datetime,
tpep_dropoff_datetime,
passenger_count,
trip_distance,
pickup_longitude,
pickup_latitude,
rate_code_id,
store_and_fwd_flag,
dropoff_longitude,
dropoff_latitude,
payment_type,
fare_amount,
extra,
mta_tax,
tip_amount,
tolls_amount,
improvement_surcharge,
total_amount
)
FROM 'C:\YourDirectory\yellow_tripdata_2016_06_01.csv'
WITH (FORMAT CSV, HEADER, DELIMITER ',');
CREATE INDEX tpep_pickup_idx
ON nyc_yellow_taxi_trips_2016_06_01 (tpep_pickup_datetime);
SELECT count(*) FROM nyc_yellow_taxi_trips_2016_06_01;
-- Listing 11-8: Counting taxi trips by hour
SELECT
date_part('hour', tpep_pickup_datetime) AS trip_hour,
count(*)
FROM nyc_yellow_taxi_trips_2016_06_01
GROUP BY trip_hour
ORDER BY trip_hour;
-- Listing 11-9: Exporting taxi pickups per hour to a CSV file
COPY
(SELECT
date_part('hour', tpep_pickup_datetime) AS trip_hour,
count(*)
FROM nyc_yellow_taxi_trips_2016_06_01
GROUP BY trip_hour
ORDER BY trip_hour
)
TO 'C:\YourDirectory\hourly_pickups_2016_06_01.csv'
WITH (FORMAT CSV, HEADER, DELIMITER ',');
-- Listing 11-10: Calculating median trip time by hour
SELECT
date_part('hour', tpep_pickup_datetime) AS trip_hour,
percentile_cont(.5)
WITHIN GROUP (ORDER BY
tpep_dropoff_datetime - tpep_pickup_datetime) AS median_trip
FROM nyc_yellow_taxi_trips_2016_06_01
GROUP BY trip_hour
ORDER BY trip_hour;
-- Listing 11-11: Creating a table to hold train trip data
SET timezone TO 'US/Central';
CREATE TABLE train_rides (
trip_id bigserial PRIMARY KEY,
segment varchar(50) NOT NULL,
departure timestamp with time zone NOT NULL,
arrival timestamp with time zone NOT NULL
);
INSERT INTO train_rides (segment, departure, arrival)
VALUES
('Chicago to New York', '2017-11-13 21:30 CST', '2017-11-14 18:23 EST'),
('New York to New Orleans', '2017-11-15 14:15 EST', '2017-11-16 19:32 CST'),
('New Orleans to Los Angeles', '2017-11-17 13:45 CST', '2017-11-18 9:00 PST'),
('Los Angeles to San Francisco', '2017-11-19 10:10 PST', '2017-11-19 21:24 PST'),
('San Francisco to Denver', '2017-11-20 9:10 PST', '2017-11-21 18:38 MST'),
('Denver to Chicago', '2017-11-22 19:10 MST', '2017-11-23 14:50 CST');
SELECT * FROM train_rides;
-- Listing 11-12: Calculating the length of each trip segment
SELECT segment,
to_char(departure, 'YYYY-MM-DD HH12:MI a.m. TZ') AS departure,
arrival - departure AS segment_time
FROM train_rides;
-- Listing 11-13: Calculating cumulative intervals using OVER
SELECT segment,
arrival - departure AS segment_time,
sum(arrival - departure) OVER (ORDER BY trip_id) AS cume_time
FROM train_rides;
-- Listing 11-14: Better formatting for cumulative trip time
SELECT segment,
arrival - departure AS segment_time,
sum(date_part('epoch', (arrival - departure)))
OVER (ORDER BY trip_id) * interval '1 second' AS cume_time
FROM train_rides;

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 11: Working with Dates and Times ### Chapter 12: Working with Dates and Times
Explains how to create, manipulate, and query dates and times in your database, including working with time zones, using data on New York City taxi trips and Amtrak train schedules. Explains how to create, manipulate, and query dates and times in your database, including working with time zones, using data on New York City taxi trips and Amtrak train schedules.

View File

@ -1,310 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 12 Code Examples
--------------------------------------------------------------
-- Listing 12-1: Using a subquery in a WHERE clause
SELECT geo_name,
state_us_abbreviation,
p0010001
FROM us_counties_2010
WHERE p0010001 >= (
SELECT percentile_cont(.9) WITHIN GROUP (ORDER BY p0010001)
FROM us_counties_2010
)
ORDER BY p0010001 DESC;
-- Listing 12-2: Using a subquery in a WHERE clause for DELETE
CREATE TABLE us_counties_2010_top10 AS
SELECT * FROM us_counties_2010;
DELETE FROM us_counties_2010_top10
WHERE p0010001 < (
SELECT percentile_cont(.9) WITHIN GROUP (ORDER BY p0010001)
FROM us_counties_2010_top10
);
SELECT count(*) FROM us_counties_2010_top10;
-- Listing 12-3: Subquery as a derived table in a FROM clause
SELECT round(calcs.average, 0) as average,
calcs.median,
round(calcs.average - calcs.median, 0) AS median_average_diff
FROM (
SELECT avg(p0010001) AS average,
percentile_cont(.5)
WITHIN GROUP (ORDER BY p0010001)::numeric(10,1) AS median
FROM us_counties_2010
)
AS calcs;
-- Listing 12-4: Joining two derived tables
SELECT census.state_us_abbreviation AS st,
census.st_population,
plants.plant_count,
round((plants.plant_count/census.st_population::numeric(10,1)) * 1000000, 1)
AS plants_per_million
FROM
(
SELECT st,
count(*) AS plant_count
FROM meat_poultry_egg_inspect
GROUP BY st
)
AS plants
JOIN
(
SELECT state_us_abbreviation,
sum(p0010001) AS st_population
FROM us_counties_2010
GROUP BY state_us_abbreviation
)
AS census
ON plants.st = census.state_us_abbreviation
ORDER BY plants_per_million DESC;
-- Listing 12-5: Adding a subquery to a column list
SELECT geo_name,
state_us_abbreviation AS st,
p0010001 AS total_pop,
(SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
FROM us_counties_2010) AS us_median
FROM us_counties_2010;
-- Listing 12-6: Using a subquery expression in a calculation
SELECT geo_name,
state_us_abbreviation AS st,
p0010001 AS total_pop,
(SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
FROM us_counties_2010) AS us_median,
p0010001 - (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
FROM us_counties_2010) AS diff_from_median
FROM us_counties_2010
WHERE (p0010001 - (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
FROM us_counties_2010))
BETWEEN -1000 AND 1000;
-- BONUS: Subquery expressions
-- If you'd like to try the IN, EXISTS, and NOT EXISTS expressions on pages 199-200,
-- here's the code to create a retirees table. The queries below are similar
-- to the hypothetical examples on pages 199 and 200. You will need the
-- employees table you created in Chapter 6.
-- Create table and insert data
CREATE TABLE retirees (
id int,
first_name varchar(50),
last_name varchar(50)
);
INSERT INTO retirees
VALUES (2, 'Lee', 'Smith'),
(4, 'Janet', 'King');
-- Generating values for the IN operator
SELECT first_name, last_name
FROM employees
WHERE emp_id IN (
SELECT id
FROM retirees);
-- Checking whether values exist (returns all rows from employees
-- if the expression evaluates as true)
SELECT first_name, last_name
FROM employees
WHERE EXISTS (
SELECT id
FROM retirees);
-- Using a correlated subquery to find matching values from employees
-- in retirees.
SELECT first_name, last_name
FROM employees
WHERE EXISTS (
SELECT id
FROM retirees
WHERE id = employees.emp_id);
-- Listing 12-7: Using a simple CTE to find large counties
WITH
large_counties (geo_name, st, p0010001)
AS
(
SELECT geo_name, state_us_abbreviation, p0010001
FROM us_counties_2010
WHERE p0010001 >= 100000
)
SELECT st, count(*)
FROM large_counties
GROUP BY st
ORDER BY count(*) DESC;
-- Bonus: You can also write this query as:
SELECT state_us_abbreviation, count(*)
FROM us_counties_2010
WHERE p0010001 >= 100000
GROUP BY state_us_abbreviation
ORDER BY count(*) DESC;
-- Listing 12-8: Using CTEs in a table join
WITH
counties (st, population) AS
(SELECT state_us_abbreviation, sum(population_count_100_percent)
FROM us_counties_2010
GROUP BY state_us_abbreviation),
plants (st, plants) AS
(SELECT st, count(*) AS plants
FROM meat_poultry_egg_inspect
GROUP BY st)
SELECT counties.st,
population,
plants,
round((plants/population::numeric(10,1))*1000000, 1) AS per_million
FROM counties JOIN plants
ON counties.st = plants.st
ORDER BY per_million DESC;
-- Listing 12-9: Using CTEs to minimize redundant code
WITH us_median AS
(SELECT percentile_cont(.5)
WITHIN GROUP (ORDER BY p0010001) AS us_median_pop
FROM us_counties_2010)
SELECT geo_name,
state_us_abbreviation AS st,
p0010001 AS total_pop,
us_median_pop,
p0010001 - us_median_pop AS diff_from_median
FROM us_counties_2010 CROSS JOIN us_median
WHERE (p0010001 - us_median_pop)
BETWEEN -1000 AND 1000;
-- Cross tabulations
-- Install the crosstab() function via the tablefunc module
CREATE EXTENSION tablefunc;
-- Listing 12-10: Creating and filling the ice_cream_survey table
CREATE TABLE ice_cream_survey (
response_id integer PRIMARY KEY,
office varchar(20),
flavor varchar(20)
);
COPY ice_cream_survey
FROM 'C:\YourDirectory\ice_cream_survey.csv'
WITH (FORMAT CSV, HEADER);
-- Listing 12-11: Generating the ice cream survey crosstab
SELECT *
FROM crosstab('SELECT office,
flavor,
count(*)
FROM ice_cream_survey
GROUP BY office, flavor
ORDER BY office',
'SELECT flavor
FROM ice_cream_survey
GROUP BY flavor
ORDER BY flavor')
AS (office varchar(20),
chocolate bigint,
strawberry bigint,
vanilla bigint);
-- Listing 12-12: Creating and filling a temperature_readings table
CREATE TABLE temperature_readings (
reading_id bigserial PRIMARY KEY,
station_name varchar(50),
observation_date date,
max_temp integer,
min_temp integer
);
COPY temperature_readings
(station_name, observation_date, max_temp, min_temp)
FROM 'C:\YourDirectory\temperature_readings.csv'
WITH (FORMAT CSV, HEADER);
-- Listing 12-13: Generating the temperature readings crosstab
SELECT *
FROM crosstab('SELECT
station_name,
date_part(''month'', observation_date),
percentile_cont(.5)
WITHIN GROUP (ORDER BY max_temp)
FROM temperature_readings
GROUP BY station_name,
date_part(''month'', observation_date)
ORDER BY station_name',
'SELECT month
FROM generate_series(1,12) month')
AS (station varchar(50),
jan numeric(3,0),
feb numeric(3,0),
mar numeric(3,0),
apr numeric(3,0),
may numeric(3,0),
jun numeric(3,0),
jul numeric(3,0),
aug numeric(3,0),
sep numeric(3,0),
oct numeric(3,0),
nov numeric(3,0),
dec numeric(3,0)
);
-- Listing 12-14: Re-classifying temperature data with CASE
SELECT max_temp,
CASE WHEN max_temp >= 90 THEN 'Hot'
WHEN max_temp BETWEEN 70 AND 89 THEN 'Warm'
WHEN max_temp BETWEEN 50 AND 69 THEN 'Pleasant'
WHEN max_temp BETWEEN 33 AND 49 THEN 'Cold'
WHEN max_temp BETWEEN 20 AND 32 THEN 'Freezing'
ELSE 'Inhumane'
END AS temperature_group
FROM temperature_readings;
-- Listing 12-15: Using CASE in a Common Table Expression
WITH temps_collapsed (station_name, max_temperature_group) AS
(SELECT station_name,
CASE WHEN max_temp >= 90 THEN 'Hot'
WHEN max_temp BETWEEN 70 AND 89 THEN 'Warm'
WHEN max_temp BETWEEN 50 AND 69 THEN 'Pleasant'
WHEN max_temp BETWEEN 33 AND 49 THEN 'Cold'
WHEN max_temp BETWEEN 20 AND 32 THEN 'Freezing'
ELSE 'Inhumane'
END
FROM temperature_readings)
SELECT station_name, max_temperature_group, count(*)
FROM temps_collapsed
GROUP BY station_name, max_temperature_group
ORDER BY station_name, count(*) DESC;

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 12: Advanced Query Techniques ### Chapter 13: Advanced Query Techniques
Explains how to use more complex SQL operations, such as subqueries and cross tabulations, and the CASE statement to reclassify values in a data set on temperature readings. Explains how to use more complex SQL operations, such as subqueries and cross tabulations, and the CASE statement to reclassify values in a data set on temperature readings.

View File

@ -1,363 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 13 Code Examples
--------------------------------------------------------------
-- Commonly used string functions
-- Full list at https://www.postgresql.org/docs/current/static/functions-string.html
-- Case formatting
SELECT upper('Neal7');
SELECT lower('Randy');
SELECT initcap('at the end of the day');
-- Note initcap's imperfect for acronyms
SELECT initcap('Practical SQL');
-- Character Information
SELECT char_length(' Pat ');
SELECT length(' Pat ');
SELECT position(', ' in 'Tan, Bella');
-- Removing characters
SELECT trim('s' from 'socks');
SELECT trim(trailing 's' from 'socks');
SELECT trim(' Pat ');
SELECT char_length(trim(' Pat ')); -- note the length change
SELECT ltrim('socks', 's');
SELECT rtrim('socks', 's');
-- Extracting and replacing characters
SELECT left('703-555-1212', 3);
SELECT right('703-555-1212', 8);
SELECT replace('bat', 'b', 'c');
-- Table 13-2: Regular Expression Matching Examples
-- Any character one or more times
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '.+');
-- One or two digits followed by a space and p.m.
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\d{1,2} (?:a.m.|p.m.)');
-- One or more word characters at the start
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '^\w+');
-- One or more word characters followed by any character at the end.
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\w+.$');
-- The words May or June
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from 'May|June');
-- Four digits
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\d{4}');
-- May followed by a space, digit, comma, space, and four digits.
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from 'May \d, \d{4}');
-- Turning Text to Data with Regular Expression Functions
-- Listing 13-2: Creating and loading the crime_reports table
-- Data from https://sheriff.loudoun.gov/dailycrime
CREATE TABLE crime_reports (
crime_id bigserial PRIMARY KEY,
date_1 timestamp with time zone,
date_2 timestamp with time zone,
street varchar(250),
city varchar(100),
crime_type varchar(100),
description text,
case_number varchar(50),
original_text text NOT NULL
);
COPY crime_reports (original_text)
FROM 'C:\YourDirectory\crime_reports.csv'
WITH (FORMAT CSV, HEADER OFF, QUOTE '"');
SELECT original_text FROM crime_reports;
-- Listing 13-3: Using regexp_match() to find the first date
SELECT crime_id,
regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}')
FROM crime_reports;
-- Listing 13-4: Using the regexp_matches() function with the 'g' flag
SELECT crime_id,
regexp_matches(original_text, '\d{1,2}\/\d{1,2}\/\d{2}', 'g')
FROM crime_reports;
-- Listing 13-5: Using regexp_match() to find the second date
-- Note that the result includes an unwanted hyphen
SELECT crime_id,
regexp_match(original_text, '-\d{1,2}\/\d{1,2}\/\d{1,2}')
FROM crime_reports;
-- Listing 13-6: Using a capture group to return only the date
-- Eliminates the hyphen
SELECT crime_id,
regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})')
FROM crime_reports;
-- Listing 13-7: Matching case number, date, crime type, and city
SELECT
regexp_match(original_text, '(?:C0|SO)[0-9]+') AS case_number,
regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}') AS date_1,
regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):') AS crime_type,
regexp_match(original_text, '(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n')
AS city
FROM crime_reports;
-- Bonus: Get all parsed elements at once
SELECT crime_id,
regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}') AS date_1,
CASE WHEN EXISTS (SELECT regexp_matches(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})'))
THEN regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})')
ELSE NULL
END AS date_2,
regexp_match(original_text, '\/\d{2}\n(\d{4})') AS hour_1,
CASE WHEN EXISTS (SELECT regexp_matches(original_text, '\/\d{2}\n\d{4}-(\d{4})'))
THEN regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})')
ELSE NULL
END AS hour_2,
regexp_match(original_text, 'hrs.\n(\d+ .+(?:Sq.|Plz.|Dr.|Ter.|Rd.))') AS street,
regexp_match(original_text, '(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n') AS city,
regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):') AS crime_type,
regexp_match(original_text, ':\s(.+)(?:C0|SO)') AS description,
regexp_match(original_text, '(?:C0|SO)[0-9]+') AS case_number
FROM crime_reports;
-- Listing 13-8: Retrieving a value from within an array
SELECT
crime_id,
(regexp_match(original_text, '(?:C0|SO)[0-9]+'))[1]
AS case_number
FROM crime_reports;
-- Listing 13-9: Updating the crime_reports date_1 column
UPDATE crime_reports
SET date_1 =
(
(regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1]
|| ' ' ||
(regexp_match(original_text, '\/\d{2}\n(\d{4})'))[1]
||' US/Eastern'
)::timestamptz;
SELECT crime_id,
date_1,
original_text
FROM crime_reports;
-- Listing 13-10: Updating all crime_reports columns
UPDATE crime_reports
SET date_1 =
(
(regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1]
|| ' ' ||
(regexp_match(original_text, '\/\d{2}\n(\d{4})'))[1]
||' US/Eastern'
)::timestamptz,
date_2 =
CASE
-- if there is no second date but there is a second hour
WHEN (SELECT regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})') IS NULL)
AND (SELECT regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})') IS NOT NULL)
THEN
((regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1]
|| ' ' ||
(regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})'))[1]
||' US/Eastern'
)::timestamptz
-- if there is both a second date and second hour
WHEN (SELECT regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})') IS NOT NULL)
AND (SELECT regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})') IS NOT NULL)
THEN
((regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})'))[1]
|| ' ' ||
(regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})'))[1]
||' US/Eastern'
)::timestamptz
-- if neither of those conditions exist, provide a NULL
ELSE NULL
END,
street = (regexp_match(original_text, 'hrs.\n(\d+ .+(?:Sq.|Plz.|Dr.|Ter.|Rd.))'))[1],
city = (regexp_match(original_text,
'(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n'))[1],
crime_type = (regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):'))[1],
description = (regexp_match(original_text, ':\s(.+)(?:C0|SO)'))[1],
case_number = (regexp_match(original_text, '(?:C0|SO)[0-9]+'))[1];
-- Listing 13-11: Viewing selected crime data
SELECT date_1,
street,
city,
crime_type
FROM crime_reports;
-- Listing 13-12: Using regular expressions in a WHERE clause
SELECT geo_name
FROM us_counties_2010
WHERE geo_name ~* '(.+lade.+|.+lare.+)'
ORDER BY geo_name;
SELECT geo_name
FROM us_counties_2010
WHERE geo_name ~* '.+ash.+' AND geo_name !~ 'Wash.+'
ORDER BY geo_name;
-- Listing 13-13: Regular expression functions to replace and split
SELECT regexp_replace('05/12/2018', '\d{4}', '2017');
SELECT regexp_split_to_table('Four,score,and,seven,years,ago', ',');
SELECT regexp_split_to_array('Phil Mike Tony Steve', ' ');
-- Listing 13-14: Finding an array length
SELECT array_length(regexp_split_to_array('Phil Mike Tony Steve', ' '), 1);
-- FULL TEXT SEARCH
-- Full-text search operators:
-- & (AND)
-- | (OR)
-- ! (NOT)
-- Listing 13-15: Converting text to tsvector data
SELECT to_tsvector('I am walking across the sitting room to sit with you.');
-- Listing 13-16: Converting search terms to tsquery data
SELECT to_tsquery('walking & sitting');
-- Listing 13-17: Querying a tsvector type with a tsquery
SELECT to_tsvector('I am walking across the sitting room') @@ to_tsquery('walking & sitting');
SELECT to_tsvector('I am walking across the sitting room') @@ to_tsquery('walking & running');
-- Listing 13-18: Creating and filling the president_speeches table
-- Sources:
-- https://archive.org/details/State-of-the-Union-Addresses-1945-2006
-- http://www.presidency.ucsb.edu/ws/index.php
-- https://www.eisenhower.archives.gov/all_about_ike/speeches.html
CREATE TABLE president_speeches (
sotu_id serial PRIMARY KEY,
president varchar(100) NOT NULL,
title varchar(250) NOT NULL,
speech_date date NOT NULL,
speech_text text NOT NULL,
search_speech_text tsvector
);
COPY president_speeches (president, title, speech_date, speech_text)
FROM 'C:\YourDirectory\sotu-1946-1977.csv'
WITH (FORMAT CSV, DELIMITER '|', HEADER OFF, QUOTE '@');
SELECT * FROM president_speeches;
-- Listing 13-19: Converting speeches to tsvector in the search_speech_text column
UPDATE president_speeches
SET search_speech_text = to_tsvector('english', speech_text);
-- Listing 13-20: Creating a GIN index for text search
CREATE INDEX search_idx ON president_speeches USING gin(search_speech_text);
-- Listing 13-21: Finding speeches containing the word "Vietnam"
SELECT president, speech_date
FROM president_speeches
WHERE search_speech_text @@ to_tsquery('Vietnam')
ORDER BY speech_date;
-- Listing 13-22: Displaying search results with ts_headline()
SELECT president,
speech_date,
ts_headline(speech_text, to_tsquery('Vietnam'),
'StartSel = <,
StopSel = >,
MinWords=5,
MaxWords=7,
MaxFragments=1')
FROM president_speeches
WHERE search_speech_text @@ to_tsquery('Vietnam');
-- Listing 13-23: Finding speeches with the word "transportation" but not "roads"
SELECT president,
speech_date,
ts_headline(speech_text, to_tsquery('transportation & !roads'),
'StartSel = <,
StopSel = >,
MinWords=5,
MaxWords=7,
MaxFragments=1')
FROM president_speeches
WHERE search_speech_text @@ to_tsquery('transportation & !roads');
-- Listing 13-24: Find speeches where "defense" follows "military"
SELECT president,
speech_date,
ts_headline(speech_text, to_tsquery('military <-> defense'),
'StartSel = <,
StopSel = >,
MinWords=5,
MaxWords=7,
MaxFragments=1')
FROM president_speeches
WHERE search_speech_text @@ to_tsquery('military <-> defense');
-- Bonus: Example with a distance of 2:
SELECT president,
speech_date,
ts_headline(speech_text, to_tsquery('military <2> defense'),
'StartSel = <,
StopSel = >,
MinWords=5,
MaxWords=7,
MaxFragments=2')
FROM president_speeches
WHERE search_speech_text @@ to_tsquery('military <2> defense');
-- Listing 13-25: Scoring relevance with ts_rank()
SELECT president,
speech_date,
ts_rank(search_speech_text,
to_tsquery('war & security & threat & enemy')) AS score
FROM president_speeches
WHERE search_speech_text @@ to_tsquery('war & security & threat & enemy')
ORDER BY score DESC
LIMIT 5;
-- Listing 13-26: Normalizing ts_rank() by speech length
SELECT president,
speech_date,
ts_rank(search_speech_text,
to_tsquery('war & security & threat & enemy'), 2)::numeric
AS score
FROM president_speeches
WHERE search_speech_text @@ to_tsquery('war & security & threat & enemy')
ORDER BY score DESC
LIMIT 5;

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 13: Mining Text to Find Meaningful Data ### Chapter 14: Mining Text to Find Meaningful Data
Covers how to use PostgreSQLs full text search engine and regular expressions to extract data from unstructured text, using a collection of speeches by U.S. presidents as an example. Covers how to use PostgreSQLs full text search engine and regular expressions to extract data from unstructured text, using a collection of speeches by U.S. presidents as an example.

View File

@ -1,252 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 14 Code Examples
--------------------------------------------------------------
-- Listing 14-1: Creating a gis_analysis database
CREATE DATABASE gis_analysis;
-- Note: Switch to this new database before continuing the examples
-- Listing 14-2: Loading the PostGIS extension
CREATE EXTENSION postgis;
SELECT postgis_full_version(); -- shows PostGIS version
-- Listing 14-3: Retrieving the well-known text for SRID 4326
SELECT srtext
FROM spatial_ref_sys
WHERE srid = 4326;
-- Listing 14-4: Using ST_GeomFromText() to create spatial objects
SELECT ST_GeomFromText('POINT(-74.9233606 42.699992)', 4326);
SELECT ST_GeomFromText('LINESTRING(-74.9 42.7, -75.1 42.7)', 4326);
SELECT ST_GeomFromText('POLYGON((-74.9 42.7, -75.1 42.7,
-75.1 42.6, -74.9 42.7))', 4326);
SELECT ST_GeomFromText('MULTIPOINT (-74.9 42.7, -75.1 42.7)', 4326);
SELECT ST_GeomFromText('MULTILINESTRING((-76.27 43.1, -76.06 43.08),
(-76.2 43.3, -76.2 43.4,
-76.4 43.1))', 4326);
SELECT ST_GeomFromText('MULTIPOLYGON((
(-74.92 42.7, -75.06 42.71,
-75.07 42.64, -74.92 42.7),
(-75.0 42.66, -75.0 42.64,
-74.98 42.64, -74.98 42.66,
-75.0 42.66)))', 4326);
-- Listing 14-5: Using ST_GeogFromText() to create spatial objects
SELECT
ST_GeogFromText('SRID=4326;MULTIPOINT(-74.9 42.7, -75.1 42.7, -74.924 42.6)');
-- Listing 14-6: Functions specific to making points
SELECT ST_PointFromText('POINT(-74.9233606 42.699992)', 4326);
SELECT ST_MakePoint(-74.9233606, 42.699992);
SELECT ST_SetSRID(ST_MakePoint(-74.9233606, 42.699992), 4326);
-- Listing 14-7: Functions specific to making LineStrings
SELECT ST_LineFromText('LINESTRING(-105.90 35.67,-105.91 35.67)', 4326);
SELECT ST_MakeLine(ST_MakePoint(-74.92, 42.69), ST_MakePoint(-74.12, 42.45));
-- Listing 14-8: Functions specific to making Polygons
SELECT ST_PolygonFromText('POLYGON((-74.9 42.7, -75.1 42.7,
-75.1 42.6, -74.9 42.7))', 4326);
SELECT ST_MakePolygon(
ST_GeomFromText('LINESTRING(-74.92 42.7, -75.06 42.71,
-75.07 42.64, -74.92 42.7)', 4326));
SELECT ST_MPolyFromText('MULTIPOLYGON((
(-74.92 42.7, -75.06 42.71,
-75.07 42.64, -74.92 42.7),
(-75.0 42.66, -75.0 42.64,
-74.98 42.64, -74.98 42.66,
-75.0 42.66)
))', 4326);
-- ANALYZING FARMERS MARKETS DATA
-- https://catalog.data.gov/dataset/farmers-markets-geographic-data
-- https://www.ams.usda.gov/local-food-directories/farmersmarkets
-- Listing 14-9: Create and load the farmers_markets table
CREATE TABLE farmers_markets (
fmid bigint PRIMARY KEY,
market_name varchar(100) NOT NULL,
street varchar(180),
city varchar(60),
county varchar(25),
st varchar(20) NOT NULL,
zip varchar(10),
longitude numeric(10,7),
latitude numeric(10,7),
organic varchar(1) NOT NULL
);
COPY farmers_markets
FROM 'C:\YourDirectory\farmers_markets.csv'
WITH (FORMAT CSV, HEADER);
SELECT count(*) FROM farmers_markets; -- should return 8,681 rows
-- Listing 14-10: Creating and indexing a geography column
-- There's also a function: https://postgis.net/docs/AddGeometryColumn.html
-- Add column
ALTER TABLE farmers_markets ADD COLUMN geog_point geography(POINT,4326);
-- Now fill that column with the lat/long
UPDATE farmers_markets
SET geog_point = ST_SetSRID(
ST_MakePoint(longitude,latitude),4326
)::geography;
-- Add a GiST index
CREATE INDEX market_pts_idx ON farmers_markets USING GIST (geog_point);
-- View the geography column
SELECT longitude,
latitude,
geog_point,
ST_AsText(geog_point)
FROM farmers_markets
WHERE longitude IS NOT NULL
LIMIT 5;
-- Listing 14-11: Using ST_DWithin() to locate farmers' markets within 10 kilometers of a point
SELECT market_name,
city,
st
FROM farmers_markets
WHERE ST_DWithin(geog_point,
ST_GeogFromText('POINT(-93.6204386 41.5853202)'),
10000)
ORDER BY market_name;
-- Listing 14-12: Using ST_Distance() to calculate the miles between Yankee Stadium
-- and Citi Field (Mets)
-- 1609.344 meters/mile
SELECT ST_Distance(
ST_GeogFromText('POINT(-73.9283685 40.8296466)'),
ST_GeogFromText('POINT(-73.8480153 40.7570917)')
) / 1609.344 AS mets_to_yanks;
-- Listing 14-13: Using ST_Distance() for each row in farmers_markets
SELECT market_name,
city,
round(
(ST_Distance(geog_point,
ST_GeogFromText('POINT(-93.6204386 41.5853202)')
) / 1609.344)::numeric(8,5), 2
) AS miles_from_dt
FROM farmers_markets
WHERE ST_DWithin(geog_point,
ST_GeogFromText('POINT(-93.6204386 41.5853202)'),
10000)
ORDER BY miles_from_dt ASC;
-- WORKING WITH SHAPEFILES
-- Resources:
-- TIGER/Line® Shapefiles and TIGER/Line® Files
-- https://www.census.gov/geo/maps-data/data/tiger-line.html
-- Cartographic Boundary Shapefiles - Counties
-- https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html
-- Listing 14-14: Checking the geom column's well-known text representation
SELECT ST_AsText(geom)
FROM us_counties_2010_shp
LIMIT 1;
-- Listing 14-15: Find the largest counties by area using ST_Area()
SELECT name10,
statefp10 AS st,
round(
( ST_Area(geom::geography) / 2589988.110336 )::numeric, 2
) AS square_miles
FROM us_counties_2010_shp
ORDER BY square_miles DESC
LIMIT 5;
-- Listing 14-16: Using ST_Within() to find the county belonging to a pair of coordinates
SELECT name10,
statefp10
FROM us_counties_2010_shp
WHERE ST_Within('SRID=4269;POINT(-118.3419063 34.0977076)'::geometry, geom);
-- SPATIAL JOINS
-- SANTA FE WATER AND ROAD DATA
-- http://www.santafenm.gov/santa_fe_river
-- Census 2016 Tiger/Line roads, water
-- https://www.census.gov/geo/maps-data/data/tiger-line.html
-- https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2016&layergroup=Roads
-- https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2016&layergroup=Water
-- RTTYP - Route Type Code Description
-- https://www.census.gov/geo/reference/rttyp.html
-- C County
-- I Interstate
-- M Common Name
-- O Other
-- S State recognized
-- U U.S.
-- MTFCC MAF/TIGER feature class code
-- https://www.census.gov/geo/reference/mtfcc.html
-- Here, H3010: A natural flowing waterway
-- Listing 14-17: Using ST_GeometryType() to determine geometry
SELECT ST_GeometryType(geom)
FROM santafe_linearwater_2016
LIMIT 1;
SELECT ST_GeometryType(geom)
FROM santafe_roads_2016
LIMIT 1;
-- Listing 14-18: Spatial join with ST_Intersects() to find roads crossing the Santa Fe river
SELECT water.fullname AS waterway,
roads.rttyp,
roads.fullname AS road
FROM santafe_linearwater_2016 water JOIN santafe_roads_2016 roads
ON ST_Intersects(water.geom, roads.geom)
WHERE water.fullname = 'Santa Fe Riv'
ORDER BY roads.fullname;
-- Listing 14-19: Using ST_Intersection() to show where roads cross the river
SELECT water.fullname AS waterway,
roads.rttyp,
roads.fullname AS road,
ST_AsText(ST_Intersection(water.geom, roads.geom))
FROM santafe_linearwater_2016 water JOIN santafe_roads_2016 roads
ON ST_Intersects(water.geom, roads.geom)
WHERE water.fullname = 'Santa Fe Riv'
ORDER BY roads.fullname
LIMIT 5;

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 14: Analyzing Spatial Data with PostGIS ### Chapter 15: Analyzing Spatial Data with PostGIS
Introduces data types and queries related to spatial objects, which will let you analyze geographical features like states, roads, and rivers. Introduces data types and queries related to spatial objects, which will let you analyze geographical features like states, roads, and rivers.

View File

@ -1,321 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 15 Code Examples
--------------------------------------------------------------
-- VIEWS
-- Listing 15-1: Creating a view that displays Nevada 2010 counties
CREATE OR REPLACE VIEW nevada_counties_pop_2010 AS
SELECT geo_name,
state_fips,
county_fips,
p0010001 AS pop_2010
FROM us_counties_2010
WHERE state_us_abbreviation = 'NV'
ORDER BY county_fips;
-- Listing 15-2: Querying the nevada_counties_pop_2010 view
SELECT *
FROM nevada_counties_pop_2010
LIMIT 5;
-- Listing 15-3: Creating a view showing population change for US counties
CREATE OR REPLACE VIEW county_pop_change_2010_2000 AS
SELECT c2010.geo_name,
c2010.state_us_abbreviation AS st,
c2010.state_fips,
c2010.county_fips,
c2010.p0010001 AS pop_2010,
c2000.p0010001 AS pop_2000,
round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
/ c2000.p0010001 * 100, 1 ) AS pct_change_2010_2000
FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
ON c2010.state_fips = c2000.state_fips
AND c2010.county_fips = c2000.county_fips
ORDER BY c2010.state_fips, c2010.county_fips;
-- Listing 15-4: Selecting columns from the county_pop_change_2010_2000 view
SELECT geo_name,
st,
pop_2010,
pct_change_2010_2000
FROM county_pop_change_2010_2000
WHERE st = 'NV'
LIMIT 5;
-- Listing 15-5: Creating a view on the employees table
CREATE OR REPLACE VIEW employees_tax_dept AS
SELECT emp_id,
first_name,
last_name,
dept_id
FROM employees
WHERE dept_id = 1
ORDER BY emp_id
WITH LOCAL CHECK OPTION;
SELECT * FROM employees_tax_dept;
-- Listing 15-6: Successful and rejected inserts via the employees_tax_dept view
INSERT INTO employees_tax_dept (first_name, last_name, dept_id)
VALUES ('Suzanne', 'Legere', 1);
INSERT INTO employees_tax_dept (first_name, last_name, dept_id)
VALUES ('Jamil', 'White', 2);
-- optional:
SELECT * FROM employees_tax_dept;
SELECT * FROM employees;
-- Listing 15-7: Updating a row via the employees_tax_dept view
UPDATE employees_tax_dept
SET last_name = 'Le Gere'
WHERE emp_id = 5;
SELECT * FROM employees_tax_dept;
-- Bonus: This will fail because the salary column is not in the view
UPDATE employees_tax_dept
SET salary = 100000
WHERE emp_id = 5;
-- Listing 15-8: Deleting a row via the employees_tax_dept view
DELETE FROM employees_tax_dept
WHERE emp_id = 5;
-- FUNCTIONS
-- https://www.postgresql.org/docs/current/static/plpgsql.html
-- Listing 15-9: Creating a percent_change function
-- To delete this function: DROP FUNCTION percent_change(numeric,numeric,integer);
CREATE OR REPLACE FUNCTION
percent_change(new_value numeric,
old_value numeric,
decimal_places integer DEFAULT 1)
RETURNS numeric AS
'SELECT round(
((new_value - old_value) / old_value) * 100, decimal_places
);'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
-- Listing 15-10: Testing the percent_change() function
SELECT percent_change(110, 108, 2);
-- Listing 15-11: Testing percent_change() on Census data
SELECT c2010.geo_name,
c2010.state_us_abbreviation AS st,
c2010.p0010001 AS pop_2010,
percent_change(c2010.p0010001, c2000.p0010001) AS pct_chg_func,
round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
/ c2000.p0010001 * 100, 1 ) AS pct_chg_formula
FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
ON c2010.state_fips = c2000.state_fips
AND c2010.county_fips = c2000.county_fips
ORDER BY pct_chg_func DESC
LIMIT 5;
-- Listing 15-12: Adding a column to the teachers table and seeing the data
ALTER TABLE teachers ADD COLUMN personal_days integer;
SELECT first_name,
last_name,
hire_date,
personal_days
FROM teachers;
-- Listing 15-13: Creating an update_personal_days() function
CREATE OR REPLACE FUNCTION update_personal_days()
RETURNS void AS $$
BEGIN
UPDATE teachers
SET personal_days =
CASE WHEN (now() - hire_date) BETWEEN '5 years'::interval
AND '10 years'::interval THEN 4
WHEN (now() - hire_date) > '10 years'::interval THEN 5
ELSE 3
END;
RAISE NOTICE 'personal_days updated!';
END;
$$ LANGUAGE plpgsql;
-- To run the function:
SELECT update_personal_days();
-- Listing 15-14: Enabling the PL/Python procedural language
CREATE EXTENSION plpythonu;
-- Listing 15-15: Using PL/Python to create the trim_county() function
CREATE OR REPLACE FUNCTION trim_county(input_string text)
RETURNS text AS $$
import re
cleaned = re.sub(r' County', '', input_string)
return cleaned
$$ LANGUAGE plpythonu;
-- Listing 15-16: Testing the trim_county() function
SELECT geo_name,
trim_county(geo_name)
FROM us_counties_2010
ORDER BY state_fips, county_fips
LIMIT 5;
-- TRIGGERS
-- Listing 15-17: Creating the grades and grades_history tables
CREATE TABLE grades (
student_id bigint,
course_id bigint,
course varchar(30) NOT NULL,
grade varchar(5) NOT NULL,
PRIMARY KEY (student_id, course_id)
);
INSERT INTO grades
VALUES
(1, 1, 'Biology 2', 'F'),
(1, 2, 'English 11B', 'D'),
(1, 3, 'World History 11B', 'C'),
(1, 4, 'Trig 2', 'B');
CREATE TABLE grades_history (
student_id bigint NOT NULL,
course_id bigint NOT NULL,
change_time timestamp with time zone NOT NULL,
course varchar(30) NOT NULL,
old_grade varchar(5) NOT NULL,
new_grade varchar(5) NOT NULL,
PRIMARY KEY (student_id, course_id, change_time)
);
-- Listing 15-18: Creating the record_if_grade_changed() function
CREATE OR REPLACE FUNCTION record_if_grade_changed()
RETURNS trigger AS
$$
BEGIN
IF NEW.grade <> OLD.grade THEN
INSERT INTO grades_history (
student_id,
course_id,
change_time,
course,
old_grade,
new_grade)
VALUES
(OLD.student_id,
OLD.course_id,
now(),
OLD.course,
OLD.grade,
NEW.grade);
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Listing 15-19: Creating the grades_update trigger
CREATE TRIGGER grades_update
AFTER UPDATE
ON grades
FOR EACH ROW
EXECUTE PROCEDURE record_if_grade_changed();
-- Listing 15-20: Testing the grades_update trigger
-- Initially, there should be 0 records in the history
SELECT * FROM grades_history;
-- Check the grades
SELECT * FROM grades;
-- Update a grade
UPDATE grades
SET grade = 'C'
WHERE student_id = 1 AND course_id = 1;
-- Now check the history
SELECT student_id,
change_time,
course,
old_grade,
new_grade
FROM grades_history;
-- Listing 15-21: Creating a temperature_test table
CREATE TABLE temperature_test (
station_name varchar(50),
observation_date date,
max_temp integer,
min_temp integer,
max_temp_group varchar(40),
PRIMARY KEY (station_name, observation_date)
);
-- Listing 15-22: Creating the classify_max_temp() function
CREATE OR REPLACE FUNCTION classify_max_temp()
RETURNS trigger AS
$$
BEGIN
CASE
WHEN NEW.max_temp >= 90 THEN
NEW.max_temp_group := 'Hot';
WHEN NEW.max_temp BETWEEN 70 AND 89 THEN
NEW.max_temp_group := 'Warm';
WHEN NEW.max_temp BETWEEN 50 AND 69 THEN
NEW.max_temp_group := 'Pleasant';
WHEN NEW.max_temp BETWEEN 33 AND 49 THEN
NEW.max_temp_group := 'Cold';
WHEN NEW.max_temp BETWEEN 20 AND 32 THEN
NEW.max_temp_group := 'Freezing';
ELSE NEW.max_temp_group := 'Inhumane';
END CASE;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Listing 15-23: Creating the temperature_insert trigger
CREATE TRIGGER temperature_insert
BEFORE INSERT
ON temperature_test
FOR EACH ROW
EXECUTE PROCEDURE classify_max_temp();
-- Listing 15-24: Inserting rows to test the temperature_update trigger
INSERT INTO temperature_test (station_name, observation_date, max_temp, min_temp)
VALUES
('North Station', '1/19/2019', 10, -3),
('North Station', '3/20/2019', 28, 19),
('North Station', '5/2/2019', 65, 42),
('North Station', '8/9/2019', 93, 74);
SELECT * FROM temperature_test;

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 15: Saving Time with Views, Functions, and Triggers: ### Chapter 17: Saving Time with Views, Functions, and Triggers:
Explains how to automate database tasks so you can avoid repeating routine work. Explains how to automate database tasks so you can avoid repeating routine work.

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 16: Using PostgreSQL from the Command Line ### Chapter 18: Using PostgreSQL from the Command Line
Covers how to use text commands at your computers command prompt to connect to your database and run queries. Covers how to use text commands at your computers command prompt to connect to your database and run queries.

View File

@ -1,104 +0,0 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 17 Code Examples
--------------------------------------------------------------
-- VACUUM
-- Listing 17-1: Creating a table to test vacuuming
CREATE TABLE vacuum_test (
integer_column integer
);
-- Listing 17-2: Determining the size of vacuum_test
SELECT pg_size_pretty(
pg_total_relation_size('vacuum_test')
);
-- optional: Determine database size
SELECT pg_size_pretty(
pg_database_size('analysis')
);
-- Listing 17-3: Inserting 500,000 rows into vacuum_test
INSERT INTO vacuum_test
SELECT * FROM generate_series(1,500000);
-- Test its size again
SELECT pg_size_pretty(
pg_table_size('vacuum_test')
);
-- Listing 17-4: Updating all rows in vacuum_test
UPDATE vacuum_test
SET integer_column = integer_column + 1;
-- Test its size again (35 MB)
SELECT pg_size_pretty(
pg_table_size('vacuum_test')
);
-- Listing 17-5: Viewing autovacuum statistics for vacuum_test
SELECT relname,
last_vacuum,
last_autovacuum,
vacuum_count,
autovacuum_count
FROM pg_stat_all_tables
WHERE relname = 'vacuum_test';
-- To see all columns available
SELECT *
FROM pg_stat_all_tables
WHERE relname = 'vacuum_test';
-- Listing 17-6: Running VACUUM manually
VACUUM vacuum_test;
VACUUM; -- vacuums the whole database
VACUUM VERBOSE; -- provides messages
-- Listing 17-7: Using VACUUM FULL to reclaim disk space
VACUUM FULL vacuum_test;
-- Test its size again
SELECT pg_size_pretty(
pg_table_size('vacuum_test')
);
-- SETTINGS
-- Listing 17-8: Showing the location of postgresql.conf
SHOW config_file;
-- Listing 17-10: Show the location of the data directory
SHOW data_directory;
-- reload settings
-- Mac and Linux: pg_ctl reload -D '/path/to/data/directory/'
-- Windows: pg_ctl reload -D "C:\path\to\data\directory\"
-- BACKUP AND RESTORE
-- Listing 17-11: Backing up the analysis database with pg_dump
pg_dump -d analysis -U [user_name] -Fc > analysis_backup.sql
-- Back up just a table
pg_dump -t 'train_rides' -d analysis -U [user_name] -Fc > train_backup.sql
-- Listing 17-12: Restoring the analysis database with pg_restore
pg_restore -C -d postgres -U postgres analysis_backup_custom.sql

View File

@ -2,7 +2,7 @@
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
### Chapter 17: Maintaining Your Database: ### Chapter 19: Maintaining Your Database:
Provides tips and procedures for tracking the size of your database, customizing settings, and backing up data. Provides tips and procedures for tracking the size of your database, customizing settings, and backing up data.