More renumbering
This commit is contained in:
parent
01fc09a421
commit
204d3cc55d
@ -1,34 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 1 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Listing 1-1: Creating a database named analysis
|
||||
|
||||
CREATE DATABASE analysis;
|
||||
|
||||
-- Listing 1-2: Creating a table named teachers with six columns
|
||||
|
||||
CREATE TABLE teachers (
|
||||
id bigserial,
|
||||
first_name varchar(25),
|
||||
last_name varchar(50),
|
||||
school varchar(50),
|
||||
hire_date date,
|
||||
salary numeric
|
||||
);
|
||||
|
||||
-- This command will remove (drop) the table.
|
||||
-- DROP TABLE teachers;
|
||||
|
||||
-- Listing 1-3 Inserting data into the teachers table
|
||||
|
||||
INSERT INTO teachers (first_name, last_name, school, hire_date, salary)
|
||||
VALUES ('Janet', 'Smith', 'F.D. Roosevelt HS', '2011-10-30', 36200),
|
||||
('Lee', 'Reynolds', 'F.D. Roosevelt HS', '1993-05-22', 65000),
|
||||
('Samuel', 'Cole', 'Myers Middle School', '2005-08-01', 43500),
|
||||
('Samantha', 'Bush', 'Myers Middle School', '2011-10-30', 36200),
|
||||
('Betty', 'Diaz', 'Myers Middle School', '2005-08-30', 43500),
|
||||
('Kathleen', 'Roush', 'F.D. Roosevelt HS', '2010-10-22', 38500);
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 1: Creating Your First Database and Table
|
||||
### Chapter 2: Creating Your First Database and Table
|
||||
|
||||
Introduces PostgreSQL, the pgAdmin user interface, and the code for loading a simple data set about teachers into a new database.
|
||||
|
||||
|
||||
@ -1,104 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 2 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Listing 2-1: Querying all rows and columns from the teachers table
|
||||
|
||||
SELECT * FROM teachers;
|
||||
|
||||
-- Listing 2-2: Querying a subset of columns
|
||||
|
||||
SELECT last_name, first_name, salary FROM teachers;
|
||||
|
||||
-- Listing 2-3: Querying distinct values in the school column
|
||||
|
||||
SELECT DISTINCT school
|
||||
FROM teachers;
|
||||
|
||||
-- Listing 2-4: Querying distinct pairs of values in the school and salary
|
||||
-- columns
|
||||
|
||||
SELECT DISTINCT school, salary
|
||||
FROM teachers;
|
||||
|
||||
-- Listing 2-5: Sorting a column with ORDER BY
|
||||
|
||||
SELECT first_name, last_name, salary
|
||||
FROM teachers
|
||||
ORDER BY salary DESC;
|
||||
|
||||
-- Listing 2-6: Sorting multiple columns with ORDER BY
|
||||
|
||||
SELECT last_name, school, hire_date
|
||||
FROM teachers
|
||||
ORDER BY school ASC, hire_date DESC;
|
||||
|
||||
-- Listing 2-7: Filtering rows using WHERE
|
||||
|
||||
SELECT last_name, school, hire_date
|
||||
FROM teachers
|
||||
WHERE school = 'Myers Middle School';
|
||||
|
||||
-- Examples of WHERE comparison operators
|
||||
|
||||
-- Teachers with first name of Janet
|
||||
SELECT first_name, last_name, school
|
||||
FROM teachers
|
||||
WHERE first_name = 'Janet';
|
||||
|
||||
-- School names not equal to F.D. Roosevelt HS
|
||||
SELECT school
|
||||
FROM teachers
|
||||
WHERE school != 'F.D. Roosevelt HS';
|
||||
|
||||
-- Teachers hired before Jan. 1, 2000
|
||||
SELECT first_name, last_name, hire_date
|
||||
FROM teachers
|
||||
WHERE hire_date < '2000-01-01';
|
||||
|
||||
-- Teachers earning 43,500 or more
|
||||
SELECT first_name, last_name, salary
|
||||
FROM teachers
|
||||
WHERE salary >= 43500;
|
||||
|
||||
-- Teachers who earn between $40,000 and $65,000
|
||||
SELECT first_name, last_name, school, salary
|
||||
FROM teachers
|
||||
WHERE salary BETWEEN 40000 AND 65000;
|
||||
|
||||
-- Listing 2-8: Filtering with LIKE AND ILIKE
|
||||
|
||||
SELECT first_name
|
||||
FROM teachers
|
||||
WHERE first_name LIKE 'sam%';
|
||||
|
||||
SELECT first_name
|
||||
FROM teachers
|
||||
WHERE first_name ILIKE 'sam%';
|
||||
|
||||
-- Listing 2-9: Combining operators using AND and OR
|
||||
|
||||
SELECT *
|
||||
FROM teachers
|
||||
WHERE school = 'Myers Middle School'
|
||||
AND salary < 40000;
|
||||
|
||||
SELECT *
|
||||
FROM teachers
|
||||
WHERE last_name = 'Cole'
|
||||
OR last_name = 'Bush';
|
||||
|
||||
SELECT *
|
||||
FROM teachers
|
||||
WHERE school = 'F.D. Roosevelt HS'
|
||||
AND (salary < 38000 OR salary > 40000);
|
||||
|
||||
-- Listing 2-10: A SELECT statement including WHERE and ORDER BY
|
||||
|
||||
SELECT first_name, last_name, school, hire_date, salary
|
||||
FROM teachers
|
||||
WHERE school LIKE '%Roos%'
|
||||
ORDER BY hire_date DESC;
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 2: Beginning Data Exploration with `SELECT`
|
||||
### Chapter 3: Beginning Data Exploration with `SELECT`
|
||||
|
||||
Explores basic SQL query syntax, including how to sort and filter data.
|
||||
|
||||
|
||||
@ -1,90 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 3 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Listing 3-1: Character data types in action
|
||||
|
||||
CREATE TABLE char_data_types (
|
||||
varchar_column varchar(10),
|
||||
char_column char(10),
|
||||
text_column text
|
||||
);
|
||||
|
||||
INSERT INTO char_data_types
|
||||
VALUES
|
||||
('abc', 'abc', 'abc'),
|
||||
('defghi', 'defghi', 'defghi');
|
||||
|
||||
COPY char_data_types TO 'C:\YourDirectory\typetest.txt'
|
||||
WITH (FORMAT CSV, HEADER, DELIMITER '|');
|
||||
|
||||
|
||||
-- Listing 3-2: Number data types in action
|
||||
|
||||
CREATE TABLE number_data_types (
|
||||
numeric_column numeric(20,5),
|
||||
real_column real,
|
||||
double_column double precision
|
||||
);
|
||||
|
||||
INSERT INTO number_data_types
|
||||
VALUES
|
||||
(.7, .7, .7),
|
||||
(2.13579, 2.13579, 2.13579),
|
||||
(2.1357987654, 2.1357987654, 2.1357987654);
|
||||
|
||||
SELECT * FROM number_data_types;
|
||||
|
||||
-- Listing 3-3: Rounding issues with float columns
|
||||
-- Assumes table created and loaded with Listing 3-2
|
||||
|
||||
SELECT
|
||||
numeric_column * 10000000 AS "Fixed",
|
||||
real_column * 10000000 AS "Float"
|
||||
FROM number_data_types
|
||||
WHERE numeric_column = .7;
|
||||
|
||||
-- Listing 3-4: Timestamp and interval types in action
|
||||
|
||||
CREATE TABLE date_time_types (
|
||||
timestamp_column timestamp with time zone,
|
||||
interval_column interval
|
||||
);
|
||||
|
||||
INSERT INTO date_time_types
|
||||
VALUES
|
||||
('2018-12-31 01:00 EST','2 days'),
|
||||
('2018-12-31 01:00 PST','1 month'),
|
||||
('2018-12-31 01:00 Australia/Melbourne','1 century'),
|
||||
(now(),'1 week');
|
||||
|
||||
SELECT * FROM date_time_types;
|
||||
|
||||
-- Listing 3-5: Using the interval data type
|
||||
-- Assumes script 3-4 has been run
|
||||
|
||||
SELECT
|
||||
timestamp_column,
|
||||
interval_column,
|
||||
timestamp_column - interval_column AS new_date
|
||||
FROM date_time_types;
|
||||
|
||||
-- Listing 3-6: Three CAST() examples
|
||||
|
||||
SELECT timestamp_column, CAST(timestamp_column AS varchar(10))
|
||||
FROM date_time_types;
|
||||
|
||||
SELECT numeric_column,
|
||||
CAST(numeric_column AS integer),
|
||||
CAST(numeric_column AS varchar(6))
|
||||
FROM number_data_types;
|
||||
|
||||
-- Does not work:
|
||||
SELECT CAST(char_column AS integer) FROM char_data_types;
|
||||
|
||||
-- Alternate notation for CAST is the double-colon:
|
||||
SELECT timestamp_column::varchar(10)
|
||||
FROM date_time_types;
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 3: Understanding Data Types
|
||||
### Chapter 4: Understanding Data Types
|
||||
|
||||
Explains the definitions for setting columns in a table to hold specific types of data, from text to dates to various forms of numbers.
|
||||
|
||||
|
||||
@ -1,212 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 4 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Listing 4-1: Using COPY for data import
|
||||
-- This is example syntax only; running it will produce an error
|
||||
|
||||
COPY table_name
|
||||
FROM 'C:\YourDirectory\your_file.csv'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
|
||||
-- Listing 4-2: A CREATE TABLE statement for Census county data
|
||||
-- Full data dictionary available at: http://www.census.gov/prod/cen2010/doc/pl94-171.pdf
|
||||
-- Note: Some columns have been given more descriptive names
|
||||
|
||||
CREATE TABLE us_counties_2010 (
|
||||
geo_name varchar(90), -- Name of the geography
|
||||
state_us_abbreviation varchar(2), -- State/U.S. abbreviation
|
||||
summary_level varchar(3), -- Summary Level
|
||||
region smallint, -- Region
|
||||
division smallint, -- Division
|
||||
state_fips varchar(2), -- State FIPS code
|
||||
county_fips varchar(3), -- County code
|
||||
area_land bigint, -- Area (Land) in square meters
|
||||
area_water bigint, -- Area (Water) in square meters
|
||||
population_count_100_percent integer, -- Population count (100%)
|
||||
housing_unit_count_100_percent integer, -- Housing Unit count (100%)
|
||||
internal_point_lat numeric(10,7), -- Internal point (latitude)
|
||||
internal_point_lon numeric(10,7), -- Internal point (longitude)
|
||||
|
||||
-- This section is referred to as P1. Race:
|
||||
p0010001 integer, -- Total population
|
||||
p0010002 integer, -- Population of one race:
|
||||
p0010003 integer, -- White Alone
|
||||
p0010004 integer, -- Black or African American alone
|
||||
p0010005 integer, -- American Indian and Alaska Native alone
|
||||
p0010006 integer, -- Asian alone
|
||||
p0010007 integer, -- Native Hawaiian and Other Pacific Islander alone
|
||||
p0010008 integer, -- Some Other Race alone
|
||||
p0010009 integer, -- Population of two or more races
|
||||
p0010010 integer, -- Population of two races:
|
||||
p0010011 integer, -- White; Black or African American
|
||||
p0010012 integer, -- White; American Indian and Alaska Native
|
||||
p0010013 integer, -- White; Asian
|
||||
p0010014 integer, -- White; Native Hawaiian and Other Pacific Islander
|
||||
p0010015 integer, -- White; Some Other Race
|
||||
p0010016 integer, -- Black or African American; American Indian and Alaska Native
|
||||
p0010017 integer, -- Black or African American; Asian
|
||||
p0010018 integer, -- Black or African American; Native Hawaiian and Other Pacific Islander
|
||||
p0010019 integer, -- Black or African American; Some Other Race
|
||||
p0010020 integer, -- American Indian and Alaska Native; Asian
|
||||
p0010021 integer, -- American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander
|
||||
p0010022 integer, -- American Indian and Alaska Native; Some Other Race
|
||||
p0010023 integer, -- Asian; Native Hawaiian and Other Pacific Islander
|
||||
p0010024 integer, -- Asian; Some Other Race
|
||||
p0010025 integer, -- Native Hawaiian and Other Pacific Islander; Some Other Race
|
||||
p0010026 integer, -- Population of three races
|
||||
p0010047 integer, -- Population of four races
|
||||
p0010063 integer, -- Population of five races
|
||||
p0010070 integer, -- Population of six races
|
||||
|
||||
-- This section is referred to as P2. HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO BY RACE
|
||||
p0020001 integer, -- Total
|
||||
p0020002 integer, -- Hispanic or Latino
|
||||
p0020003 integer, -- Not Hispanic or Latino:
|
||||
p0020004 integer, -- Population of one race:
|
||||
p0020005 integer, -- White Alone
|
||||
p0020006 integer, -- Black or African American alone
|
||||
p0020007 integer, -- American Indian and Alaska Native alone
|
||||
p0020008 integer, -- Asian alone
|
||||
p0020009 integer, -- Native Hawaiian and Other Pacific Islander alone
|
||||
p0020010 integer, -- Some Other Race alone
|
||||
p0020011 integer, -- Two or More Races
|
||||
p0020012 integer, -- Population of two races
|
||||
p0020028 integer, -- Population of three races
|
||||
p0020049 integer, -- Population of four races
|
||||
p0020065 integer, -- Population of five races
|
||||
p0020072 integer, -- Population of six races
|
||||
|
||||
-- This section is referred to as P3. RACE FOR THE POPULATION 18 YEARS AND OVER
|
||||
p0030001 integer, -- Total
|
||||
p0030002 integer, -- Population of one race:
|
||||
p0030003 integer, -- White alone
|
||||
p0030004 integer, -- Black or African American alone
|
||||
p0030005 integer, -- American Indian and Alaska Native alone
|
||||
p0030006 integer, -- Asian alone
|
||||
p0030007 integer, -- Native Hawaiian and Other Pacific Islander alone
|
||||
p0030008 integer, -- Some Other Race alone
|
||||
p0030009 integer, -- Two or More Races
|
||||
p0030010 integer, -- Population of two races
|
||||
p0030026 integer, -- Population of three races
|
||||
p0030047 integer, -- Population of four races
|
||||
p0030063 integer, -- Population of five races
|
||||
p0030070 integer, -- Population of six races
|
||||
|
||||
-- This section is referred to as P4. HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO BY RACE
|
||||
-- FOR THE POPULATION 18 YEARS AND OVER
|
||||
p0040001 integer, -- Total
|
||||
p0040002 integer, -- Hispanic or Latino
|
||||
p0040003 integer, -- Not Hispanic or Latino:
|
||||
p0040004 integer, -- Population of one race:
|
||||
p0040005 integer, -- White alone
|
||||
p0040006 integer, -- Black or African American alone
|
||||
p0040007 integer, -- American Indian and Alaska Native alone
|
||||
p0040008 integer, -- Asian alone
|
||||
p0040009 integer, -- Native Hawaiian and Other Pacific Islander alone
|
||||
p0040010 integer, -- Some Other Race alone
|
||||
p0040011 integer, -- Two or More Races
|
||||
p0040012 integer, -- Population of two races
|
||||
p0040028 integer, -- Population of three races
|
||||
p0040049 integer, -- Population of four races
|
||||
p0040065 integer, -- Population of five races
|
||||
p0040072 integer, -- Population of six races
|
||||
|
||||
-- This section is referred to as H1. OCCUPANCY STATUS
|
||||
h0010001 integer, -- Total housing units
|
||||
h0010002 integer, -- Occupied
|
||||
h0010003 integer -- Vacant
|
||||
);
|
||||
|
||||
SELECT * FROM us_counties_2010;
|
||||
|
||||
-- Listing 4-3: Importing Census data using COPY
|
||||
-- Note! If you run into an import error here, be sure you downloaded the code and
|
||||
-- data for the book according to the steps listed on page xxvii in the Introduction.
|
||||
-- Windows users: Please check the Note on page xxvii as well.
|
||||
|
||||
COPY us_counties_2010
|
||||
FROM 'C:\YourDirectory\us_counties_2010.csv'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
-- Checking the data
|
||||
|
||||
SELECT * FROM us_counties_2010;
|
||||
|
||||
SELECT geo_name, state_us_abbreviation, area_land
|
||||
FROM us_counties_2010
|
||||
ORDER BY area_land DESC
|
||||
LIMIT 3;
|
||||
|
||||
SELECT geo_name, state_us_abbreviation, internal_point_lon
|
||||
FROM us_counties_2010
|
||||
ORDER BY internal_point_lon DESC
|
||||
LIMIT 5;
|
||||
|
||||
|
||||
-- Listing 4-4: Creating a table to track supervisor salaries
|
||||
|
||||
CREATE TABLE supervisor_salaries (
|
||||
town varchar(30),
|
||||
county varchar(30),
|
||||
supervisor varchar(30),
|
||||
start_date date,
|
||||
salary money,
|
||||
benefits money
|
||||
);
|
||||
|
||||
-- Listing 4-5: Importing salaries data from CSV to three table columns
|
||||
|
||||
COPY supervisor_salaries (town, supervisor, salary)
|
||||
FROM 'C:\YourDirectory\supervisor_salaries.csv'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
-- Check the data
|
||||
SELECT * FROM supervisor_salaries LIMIT 2;
|
||||
|
||||
-- Listing 4-6 Use a temporary table to add a default value to a column during
|
||||
-- import
|
||||
|
||||
DELETE FROM supervisor_salaries;
|
||||
|
||||
CREATE TEMPORARY TABLE supervisor_salaries_temp (LIKE supervisor_salaries);
|
||||
|
||||
COPY supervisor_salaries_temp (town, supervisor, salary)
|
||||
FROM 'C:\YourDirectory\supervisor_salaries.csv'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
INSERT INTO supervisor_salaries (town, county, supervisor, salary)
|
||||
SELECT town, 'Some County', supervisor, salary
|
||||
FROM supervisor_salaries_temp;
|
||||
|
||||
DROP TABLE supervisor_salaries_temp;
|
||||
|
||||
-- Check the data
|
||||
SELECT * FROM supervisor_salaries LIMIT 2;
|
||||
|
||||
-- Listing 4-7: Export an entire table with COPY
|
||||
|
||||
COPY us_counties_2010
|
||||
TO 'C:\YourDirectory\us_counties_export.txt'
|
||||
WITH (FORMAT CSV, HEADER, DELIMITER '|');
|
||||
|
||||
|
||||
-- Listing 4-8: Exporting selected columns from a table with COPY
|
||||
|
||||
COPY us_counties_2010 (geo_name, internal_point_lat, internal_point_lon)
|
||||
TO 'C:\YourDirectory\us_counties_latlon_export.txt'
|
||||
WITH (FORMAT CSV, HEADER, DELIMITER '|');
|
||||
|
||||
-- Listing 4-9: Exporting query results with COPY
|
||||
|
||||
COPY (
|
||||
SELECT geo_name, state_us_abbreviation
|
||||
FROM us_counties_2010
|
||||
WHERE geo_name ILIKE '%mill%'
|
||||
)
|
||||
TO 'C:\YourDirectory\us_counties_mill_export.txt'
|
||||
WITH (FORMAT CSV, HEADER, DELIMITER '|');
|
||||
@ -2,6 +2,6 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 4: Importing and Exporting Data
|
||||
### Chapter 5: Importing and Exporting Data
|
||||
|
||||
Explains how to use SQL commands to load data from external files and then export it. You’ll load a table of U.S. Census population data that you’ll use throughout the book.
|
||||
|
||||
@ -1,207 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 5 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Listing 5-1: Basic addition, subtraction and multiplication with SQL
|
||||
|
||||
SELECT 2 + 2; -- addition
|
||||
SELECT 9 - 1; -- subtraction
|
||||
SELECT 3 * 4; -- multiplication
|
||||
|
||||
-- Listing 5-2: Integer and decimal division with SQL
|
||||
|
||||
SELECT 11 / 6; -- integer division
|
||||
SELECT 11 % 6; -- modulo division
|
||||
SELECT 11.0 / 6; -- decimal division
|
||||
SELECT CAST(11 AS numeric(3,1)) / 6;
|
||||
|
||||
-- Listing 5-3: Exponents, roots and factorials with SQL
|
||||
|
||||
SELECT 3 ^ 4; -- exponentiation
|
||||
SELECT |/ 10; -- square root (operator)
|
||||
SELECT sqrt(10); -- square root (function)
|
||||
SELECT ||/ 10; -- cube root
|
||||
SELECT 4 !; -- factorial
|
||||
|
||||
-- Order of operations
|
||||
|
||||
SELECT 7 + 8 * 9; -- answer: 79
|
||||
SELECT (7 + 8) * 9; -- answer: 135
|
||||
|
||||
SELECT 3 ^ 3 - 1; -- answer: 26
|
||||
SELECT 3 ^ (3 - 1); -- answer: 9
|
||||
|
||||
-- Listing 5-4: Selecting Census population columns by race with aliases
|
||||
|
||||
SELECT geo_name,
|
||||
state_us_abbreviation AS "st",
|
||||
p0010001 AS "Total Population",
|
||||
p0010003 AS "White Alone",
|
||||
p0010004 AS "Black or African American Alone",
|
||||
p0010005 AS "Am Indian/Alaska Native Alone",
|
||||
p0010006 AS "Asian Alone",
|
||||
p0010007 AS "Native Hawaiian and Other Pacific Islander Alone",
|
||||
p0010008 AS "Some Other Race Alone",
|
||||
p0010009 AS "Two or More Races"
|
||||
FROM us_counties_2010;
|
||||
|
||||
-- Listing 5-5: Adding two columns in us_counties_2010
|
||||
|
||||
SELECT geo_name,
|
||||
state_us_abbreviation AS "st",
|
||||
p0010003 AS "White Alone",
|
||||
p0010004 AS "Black Alone",
|
||||
p0010003 + p0010004 AS "Total White and Black"
|
||||
FROM us_counties_2010;
|
||||
|
||||
-- Listing 5-6: Checking Census data totals
|
||||
|
||||
SELECT geo_name,
|
||||
state_us_abbreviation AS "st",
|
||||
p0010001 AS "Total",
|
||||
p0010003 + p0010004 + p0010005 + p0010006 + p0010007
|
||||
+ p0010008 + p0010009 AS "All Races",
|
||||
(p0010003 + p0010004 + p0010005 + p0010006 + p0010007
|
||||
+ p0010008 + p0010009) - p0010001 AS "Difference"
|
||||
FROM us_counties_2010
|
||||
ORDER BY "Difference" DESC;
|
||||
|
||||
-- Listing 5-7: Calculating the percent of the population that is
|
||||
-- Asian by county (percent of the whole)
|
||||
|
||||
SELECT geo_name,
|
||||
state_us_abbreviation AS "st",
|
||||
(CAST(p0010006 AS numeric(8,1)) / p0010001) * 100 AS "pct_asian"
|
||||
FROM us_counties_2010
|
||||
ORDER BY "pct_asian" DESC;
|
||||
|
||||
-- Listing 5-8: Calculating percent change
|
||||
|
||||
CREATE TABLE percent_change (
|
||||
department varchar(20),
|
||||
spend_2014 numeric(10,2),
|
||||
spend_2017 numeric(10,2)
|
||||
);
|
||||
|
||||
INSERT INTO percent_change
|
||||
VALUES
|
||||
('Building', 250000, 289000),
|
||||
('Assessor', 178556, 179500),
|
||||
('Library', 87777, 90001),
|
||||
('Clerk', 451980, 650000),
|
||||
('Police', 250000, 223000),
|
||||
('Recreation', 199000, 195000);
|
||||
|
||||
SELECT department,
|
||||
spend_2014,
|
||||
spend_2017,
|
||||
round( (spend_2017 - spend_2014) /
|
||||
spend_2014 * 100, 1 ) AS "pct_change"
|
||||
FROM percent_change;
|
||||
|
||||
-- Listing 5-9: Using sum() and avg() aggregate functions
|
||||
|
||||
SELECT sum(p0010001) AS "County Sum",
|
||||
round(avg(p0010001), 0) AS "County Average"
|
||||
FROM us_counties_2010;
|
||||
|
||||
-- Listing 5-10: Testing SQL percentile functions
|
||||
|
||||
CREATE TABLE percentile_test (
|
||||
numbers integer
|
||||
);
|
||||
|
||||
INSERT INTO percentile_test (numbers) VALUES
|
||||
(1), (2), (3), (4), (5), (6);
|
||||
|
||||
SELECT
|
||||
percentile_cont(.5)
|
||||
WITHIN GROUP (ORDER BY numbers),
|
||||
percentile_disc(.5)
|
||||
WITHIN GROUP (ORDER BY numbers)
|
||||
FROM percentile_test;
|
||||
|
||||
-- Listing 5-11: Using sum(), avg(), and percentile_cont() aggregate functions
|
||||
|
||||
SELECT sum(p0010001) AS "County Sum",
|
||||
round(avg(p0010001), 0) AS "County Average",
|
||||
percentile_cont(.5)
|
||||
WITHIN GROUP (ORDER BY p0010001) AS "County Median"
|
||||
FROM us_counties_2010;
|
||||
|
||||
-- Listing 5-12: Passing an array of values to percentile_cont()
|
||||
|
||||
-- quartiles
|
||||
SELECT percentile_cont(array[.25,.5,.75])
|
||||
WITHIN GROUP (ORDER BY p0010001) AS "quartiles"
|
||||
FROM us_counties_2010;
|
||||
|
||||
-- Extra:
|
||||
-- quintiles
|
||||
SELECT percentile_cont(array[.2,.4,.6,.8])
|
||||
WITHIN GROUP (ORDER BY p0010001) AS "quintiles"
|
||||
FROM us_counties_2010;
|
||||
|
||||
-- deciles
|
||||
SELECT percentile_cont(array[.1,.2,.3,.4,.5,.6,.7,.8,.9])
|
||||
WITHIN GROUP (ORDER BY p0010001) AS "deciles"
|
||||
FROM us_counties_2010;
|
||||
|
||||
-- Listing 5-13: Using unnest() to turn an array into rows
|
||||
|
||||
SELECT unnest(
|
||||
percentile_cont(array[.25,.5,.75])
|
||||
WITHIN GROUP (ORDER BY p0010001)
|
||||
) AS "quartiles"
|
||||
FROM us_counties_2010;
|
||||
|
||||
-- Listing 5-14: Creating a median() aggregate function in PostgreSQL
|
||||
-- Source: https://wiki.postgresql.org/wiki/Aggregate_Median
|
||||
|
||||
CREATE OR REPLACE FUNCTION _final_median(anyarray)
|
||||
RETURNS float8 AS
|
||||
$$
|
||||
WITH q AS
|
||||
(
|
||||
SELECT val
|
||||
FROM unnest($1) val
|
||||
WHERE VAL IS NOT NULL
|
||||
ORDER BY 1
|
||||
),
|
||||
cnt AS
|
||||
(
|
||||
SELECT COUNT(*) AS c FROM q
|
||||
)
|
||||
SELECT AVG(val)::float8
|
||||
FROM
|
||||
(
|
||||
SELECT val FROM q
|
||||
LIMIT 2 - MOD((SELECT c FROM cnt), 2)
|
||||
OFFSET GREATEST(CEIL((SELECT c FROM cnt) / 2.0) - 1,0)
|
||||
) q2;
|
||||
$$
|
||||
LANGUAGE sql IMMUTABLE;
|
||||
|
||||
CREATE AGGREGATE median(anyelement) (
|
||||
SFUNC=array_append,
|
||||
STYPE=anyarray,
|
||||
FINALFUNC=_final_median,
|
||||
INITCOND='{}'
|
||||
);
|
||||
|
||||
-- Listing 5-15: Using a median() aggregate function
|
||||
|
||||
SELECT sum(p0010001) AS "County Sum",
|
||||
round(avg(p0010001), 0) AS "County Average",
|
||||
median(p0010001) AS "County Median",
|
||||
percentile_cont(.5)
|
||||
WITHIN GROUP (ORDER BY P0010001) AS "50th Percentile"
|
||||
FROM us_counties_2010;
|
||||
|
||||
-- Listing 5-16: Finding the most-frequent value with mode()
|
||||
|
||||
SELECT mode() WITHIN GROUP (ORDER BY p0010001)
|
||||
FROM us_counties_2010;
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 5: Basic Math and Stats with SQL
|
||||
### Chapter 6: Basic Math and Stats with SQL
|
||||
|
||||
Covers arithmetic operations and introduces aggregate functions for finding sums, averages, and medians.
|
||||
|
||||
|
||||
@ -1,196 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 6 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Listing 6-1: Creating the departments and employees tables
|
||||
|
||||
CREATE TABLE departments (
|
||||
dept_id bigserial,
|
||||
dept varchar(100),
|
||||
city varchar(100),
|
||||
CONSTRAINT dept_key PRIMARY KEY (dept_id),
|
||||
CONSTRAINT dept_city_unique UNIQUE (dept, city)
|
||||
);
|
||||
|
||||
CREATE TABLE employees (
|
||||
emp_id bigserial,
|
||||
first_name varchar(100),
|
||||
last_name varchar(100),
|
||||
salary integer,
|
||||
dept_id integer REFERENCES departments (dept_id),
|
||||
CONSTRAINT emp_key PRIMARY KEY (emp_id),
|
||||
CONSTRAINT emp_dept_unique UNIQUE (emp_id, dept_id)
|
||||
);
|
||||
|
||||
INSERT INTO departments (dept, city)
|
||||
VALUES
|
||||
('Tax', 'Atlanta'),
|
||||
('IT', 'Boston');
|
||||
|
||||
INSERT INTO employees (first_name, last_name, salary, dept_id)
|
||||
VALUES
|
||||
('Nancy', 'Jones', 62500, 1),
|
||||
('Lee', 'Smith', 59300, 1),
|
||||
('Soo', 'Nguyen', 83000, 2),
|
||||
('Janet', 'King', 95000, 2);
|
||||
|
||||
-- Listing 6-2: Joining the employees and departments tables
|
||||
|
||||
SELECT *
|
||||
FROM employees JOIN departments
|
||||
ON employees.dept_id = departments.dept_id;
|
||||
|
||||
-- Listing 6-3: Creating two tables to explore JOIN types
|
||||
|
||||
CREATE TABLE schools_left (
|
||||
id integer CONSTRAINT left_id_key PRIMARY KEY,
|
||||
left_school varchar(30)
|
||||
);
|
||||
|
||||
CREATE TABLE schools_right (
|
||||
id integer CONSTRAINT right_id_key PRIMARY KEY,
|
||||
right_school varchar(30)
|
||||
);
|
||||
|
||||
INSERT INTO schools_left (id, left_school) VALUES
|
||||
(1, 'Oak Street School'),
|
||||
(2, 'Roosevelt High School'),
|
||||
(5, 'Washington Middle School'),
|
||||
(6, 'Jefferson High School');
|
||||
|
||||
INSERT INTO schools_right (id, right_school) VALUES
|
||||
(1, 'Oak Street School'),
|
||||
(2, 'Roosevelt High School'),
|
||||
(3, 'Morrison Elementary'),
|
||||
(4, 'Chase Magnet Academy'),
|
||||
(6, 'Jefferson High School');
|
||||
|
||||
-- Listing 6-4: Using JOIN
|
||||
|
||||
SELECT *
|
||||
FROM schools_left JOIN schools_right
|
||||
ON schools_left.id = schools_right.id;
|
||||
|
||||
-- Bonus: Also can be specified as INNER JOIN
|
||||
|
||||
SELECT *
|
||||
FROM schools_left INNER JOIN schools_right
|
||||
ON schools_left.id = schools_right.id;
|
||||
|
||||
-- Listing 6-5: Using LEFT JOIN
|
||||
|
||||
SELECT *
|
||||
FROM schools_left LEFT JOIN schools_right
|
||||
ON schools_left.id = schools_right.id;
|
||||
|
||||
-- Listing 6-6: Using RIGHT JOIN
|
||||
|
||||
SELECT *
|
||||
FROM schools_left RIGHT JOIN schools_right
|
||||
ON schools_left.id = schools_right.id;
|
||||
|
||||
-- Listing 6-7: Using FULL OUTER JOIN
|
||||
|
||||
SELECT *
|
||||
FROM schools_left FULL OUTER JOIN schools_right
|
||||
ON schools_left.id = schools_right.id;
|
||||
|
||||
-- Listing 6-8: Using CROSS JOIN
|
||||
|
||||
SELECT *
|
||||
FROM schools_left CROSS JOIN schools_right;
|
||||
|
||||
-- Listing 6-9: Filtering to show missing values with IS NULL
|
||||
|
||||
SELECT *
|
||||
FROM schools_left LEFT JOIN schools_right
|
||||
ON schools_left.id = schools_right.id
|
||||
WHERE schools_right.id IS NULL;
|
||||
|
||||
-- Listing 6-10: Querying specific columns in a join
|
||||
SELECT schools_left.id,
|
||||
schools_left.left_school,
|
||||
schools_right.right_school
|
||||
FROM schools_left LEFT JOIN schools_right
|
||||
ON schools_left.id = schools_right.id;
|
||||
|
||||
-- Listing 6-11: Simplifying code with table aliases
|
||||
SELECT lt.id,
|
||||
lt.left_school,
|
||||
rt.right_school
|
||||
FROM schools_left AS lt LEFT JOIN schools_right AS rt
|
||||
ON lt.id = rt.id;
|
||||
|
||||
-- Listing 6-12: Joining multiple tables
|
||||
CREATE TABLE schools_enrollment (
|
||||
id integer,
|
||||
enrollment integer
|
||||
);
|
||||
|
||||
CREATE TABLE schools_grades (
|
||||
id integer,
|
||||
grades varchar(10)
|
||||
);
|
||||
|
||||
INSERT INTO schools_enrollment (id, enrollment)
|
||||
VALUES
|
||||
(1, 360),
|
||||
(2, 1001),
|
||||
(5, 450),
|
||||
(6, 927);
|
||||
|
||||
INSERT INTO schools_grades (id, grades)
|
||||
VALUES
|
||||
(1, 'K-3'),
|
||||
(2, '9-12'),
|
||||
(5, '6-8'),
|
||||
(6, '9-12');
|
||||
|
||||
SELECT lt.id, lt.left_school, en.enrollment, gr.grades
|
||||
FROM schools_left AS lt LEFT JOIN schools_enrollment AS en
|
||||
ON lt.id = en.id
|
||||
LEFT JOIN schools_grades AS gr
|
||||
ON lt.id = gr.id;
|
||||
|
||||
-- Listing 6-13: Performing math on joined Census tables
|
||||
-- Decennial Census 2000. Full data dictionary at https://www.census.gov/prod/cen2000/doc/pl94-171.pdf
|
||||
-- Note: Some non-number columns have been given more descriptive names
|
||||
|
||||
CREATE TABLE us_counties_2000 (
|
||||
geo_name varchar(90), -- County/state name,
|
||||
state_us_abbreviation varchar(2), -- State/U.S. abbreviation
|
||||
state_fips varchar(2), -- State FIPS code
|
||||
county_fips varchar(3), -- County code
|
||||
p0010001 integer, -- Total population
|
||||
p0010002 integer, -- Population of one race:
|
||||
p0010003 integer, -- White Alone
|
||||
p0010004 integer, -- Black or African American alone
|
||||
p0010005 integer, -- American Indian and Alaska Native alone
|
||||
p0010006 integer, -- Asian alone
|
||||
p0010007 integer, -- Native Hawaiian and Other Pacific Islander alone
|
||||
p0010008 integer, -- Some Other Race alone
|
||||
p0010009 integer, -- Population of two or more races
|
||||
p0010010 integer, -- Population of two races
|
||||
p0020002 integer, -- Hispanic or Latino
|
||||
p0020003 integer -- Not Hispanic or Latino:
|
||||
);
|
||||
|
||||
COPY us_counties_2000
|
||||
FROM 'C:\YourDirectory\us_counties_2000.csv'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
SELECT c2010.geo_name,
|
||||
c2010.state_us_abbreviation AS state,
|
||||
c2010.p0010001 AS pop_2010,
|
||||
c2000.p0010001 AS pop_2000,
|
||||
c2010.p0010001 - c2000.p0010001 AS raw_change,
|
||||
round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
|
||||
/ c2000.p0010001 * 100, 1 ) AS pct_change
|
||||
FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
|
||||
ON c2010.state_fips = c2000.state_fips
|
||||
AND c2010.county_fips = c2000.county_fips
|
||||
AND c2010.p0010001 <> c2000.p0010001
|
||||
ORDER BY pct_change DESC;
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 6: Joining Tables in a Relational Database
|
||||
### Chapter 7: Joining Tables in a Relational Database
|
||||
|
||||
Explains how to query multiple, related tables by joining them on key columns. You’ll learn how and when to use different types of joins.
|
||||
|
||||
|
||||
@ -1,184 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 7 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Listing 7-1: Declaring a single-column natural key as primary key
|
||||
|
||||
-- As a column constraint
|
||||
CREATE TABLE natural_key_example (
|
||||
license_id varchar(10) CONSTRAINT license_key PRIMARY KEY,
|
||||
first_name varchar(50),
|
||||
last_name varchar(50)
|
||||
);
|
||||
|
||||
-- Drop the table before trying again
|
||||
DROP TABLE natural_key_example;
|
||||
|
||||
-- As a table constraint
|
||||
CREATE TABLE natural_key_example (
|
||||
license_id varchar(10),
|
||||
first_name varchar(50),
|
||||
last_name varchar(50),
|
||||
CONSTRAINT license_key PRIMARY KEY (license_id)
|
||||
);
|
||||
|
||||
-- Listing 7-2: Example of a primary key violation
|
||||
INSERT INTO natural_key_example (license_id, first_name, last_name)
|
||||
VALUES ('T229901', 'Lynn', 'Malero');
|
||||
|
||||
INSERT INTO natural_key_example (license_id, first_name, last_name)
|
||||
VALUES ('T229901', 'Sam', 'Tracy');
|
||||
|
||||
-- Listing 7-3: Declaring a composite primary key as a natural key
|
||||
CREATE TABLE natural_key_composite_example (
|
||||
student_id varchar(10),
|
||||
school_day date,
|
||||
present boolean,
|
||||
CONSTRAINT student_key PRIMARY KEY (student_id, school_day)
|
||||
);
|
||||
|
||||
-- Listing 7-4: Example of a composite primary key violation
|
||||
|
||||
INSERT INTO natural_key_composite_example (student_id, school_day, present)
|
||||
VALUES(775, '1/22/2017', 'Y');
|
||||
|
||||
INSERT INTO natural_key_composite_example (student_id, school_day, present)
|
||||
VALUES(775, '1/23/2017', 'Y');
|
||||
|
||||
INSERT INTO natural_key_composite_example (student_id, school_day, present)
|
||||
VALUES(775, '1/23/2017', 'N');
|
||||
|
||||
-- Listing 7-5: Declaring a bigserial column as a surrogate key
|
||||
|
||||
CREATE TABLE surrogate_key_example (
|
||||
order_number bigserial,
|
||||
product_name varchar(50),
|
||||
order_date date,
|
||||
CONSTRAINT order_key PRIMARY KEY (order_number)
|
||||
);
|
||||
|
||||
INSERT INTO surrogate_key_example (product_name, order_date)
|
||||
VALUES ('Beachball Polish', '2015-03-17'),
|
||||
('Wrinkle De-Atomizer', '2017-05-22'),
|
||||
('Flux Capacitor', '1985-10-26');
|
||||
|
||||
SELECT * FROM surrogate_key_example;
|
||||
|
||||
-- Listing 7-6: A foreign key example
|
||||
|
||||
CREATE TABLE licenses (
|
||||
license_id varchar(10),
|
||||
first_name varchar(50),
|
||||
last_name varchar(50),
|
||||
CONSTRAINT licenses_key PRIMARY KEY (license_id)
|
||||
);
|
||||
|
||||
CREATE TABLE registrations (
|
||||
registration_id varchar(10),
|
||||
registration_date date,
|
||||
license_id varchar(10) REFERENCES licenses (license_id),
|
||||
CONSTRAINT registration_key PRIMARY KEY (registration_id, license_id)
|
||||
);
|
||||
|
||||
INSERT INTO licenses (license_id, first_name, last_name)
|
||||
VALUES ('T229901', 'Lynn', 'Malero');
|
||||
|
||||
INSERT INTO registrations (registration_id, registration_date, license_id)
|
||||
VALUES ('A203391', '3/17/2017', 'T229901');
|
||||
|
||||
INSERT INTO registrations (registration_id, registration_date, license_id)
|
||||
VALUES ('A75772', '3/17/2017', 'T000001');
|
||||
|
||||
-- Listing 7-7: CHECK constraint examples
|
||||
|
||||
CREATE TABLE check_constraint_example (
|
||||
user_id bigserial,
|
||||
user_role varchar(50),
|
||||
salary integer,
|
||||
CONSTRAINT user_id_key PRIMARY KEY (user_id),
|
||||
CONSTRAINT check_role_in_list CHECK (user_role IN('Admin', 'Staff')),
|
||||
CONSTRAINT check_salary_not_zero CHECK (salary > 0)
|
||||
);
|
||||
|
||||
-- Both of these will fail:
|
||||
INSERT INTO check_constraint_example (user_role)
|
||||
VALUES ('admin');
|
||||
|
||||
INSERT INTO check_constraint_example (salary)
|
||||
VALUES (0);
|
||||
|
||||
-- Listing 7-8: UNIQUE constraint example
|
||||
|
||||
CREATE TABLE unique_constraint_example (
|
||||
contact_id bigserial CONSTRAINT contact_id_key PRIMARY KEY,
|
||||
first_name varchar(50),
|
||||
last_name varchar(50),
|
||||
email varchar(200),
|
||||
CONSTRAINT email_unique UNIQUE (email)
|
||||
);
|
||||
|
||||
INSERT INTO unique_constraint_example (first_name, last_name, email)
|
||||
VALUES ('Samantha', 'Lee', 'slee@example.org');
|
||||
|
||||
INSERT INTO unique_constraint_example (first_name, last_name, email)
|
||||
VALUES ('Betty', 'Diaz', 'bdiaz@example.org');
|
||||
|
||||
INSERT INTO unique_constraint_example (first_name, last_name, email)
|
||||
VALUES ('Sasha', 'Lee', 'slee@example.org');
|
||||
|
||||
-- Listing 7-9: NOT NULL constraint example
|
||||
|
||||
CREATE TABLE not_null_example (
|
||||
student_id bigserial,
|
||||
first_name varchar(50) NOT NULL,
|
||||
last_name varchar(50) NOT NULL,
|
||||
CONSTRAINT student_id_key PRIMARY KEY (student_id)
|
||||
);
|
||||
|
||||
-- Listing 7-10: Dropping and adding a primary key and a NOT NULL constraint
|
||||
|
||||
-- Drop
|
||||
ALTER TABLE not_null_example DROP CONSTRAINT student_id_key;
|
||||
|
||||
-- Add
|
||||
ALTER TABLE not_null_example ADD CONSTRAINT student_id_key PRIMARY KEY (student_id);
|
||||
|
||||
-- Drop
|
||||
ALTER TABLE not_null_example ALTER COLUMN first_name DROP NOT NULL;
|
||||
|
||||
-- Add
|
||||
ALTER TABLE not_null_example ALTER COLUMN first_name SET NOT NULL;
|
||||
|
||||
-- Listing 7-11: Importing New York City address data
|
||||
|
||||
CREATE TABLE new_york_addresses (
|
||||
longitude numeric(9,6),
|
||||
latitude numeric(9,6),
|
||||
street_number varchar(10),
|
||||
street varchar(32),
|
||||
unit varchar(7),
|
||||
postcode varchar(5),
|
||||
id integer CONSTRAINT new_york_key PRIMARY KEY
|
||||
);
|
||||
|
||||
COPY new_york_addresses
|
||||
FROM 'C:\YourDirectory\city_of_new_york.csv'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
-- Listing 7-12: Benchmark queries for index performance
|
||||
|
||||
EXPLAIN ANALYZE SELECT * FROM new_york_addresses
|
||||
WHERE street = 'BROADWAY';
|
||||
|
||||
EXPLAIN ANALYZE SELECT * FROM new_york_addresses
|
||||
WHERE street = '52 STREET';
|
||||
|
||||
EXPLAIN ANALYZE SELECT * FROM new_york_addresses
|
||||
WHERE street = 'ZWICKY AVENUE';
|
||||
|
||||
-- Listing 7-13: Creating a B-Tree index on the new_york_addresses table
|
||||
|
||||
CREATE INDEX street_idx ON new_york_addresses (street);
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 7: Table Design that Works for You
|
||||
### Chapter 8: Table Design that Works for You
|
||||
|
||||
Covers how to set up tables to improve the organization and integrity of your data as well as how to speed up queries using indexes.
|
||||
|
||||
|
||||
@ -1,299 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 8 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Listing 8-1: Creating and filling the 2014 Public Libraries Survey table
|
||||
|
||||
CREATE TABLE pls_fy2014_pupld14a (
|
||||
stabr varchar(2) NOT NULL,
|
||||
fscskey varchar(6) CONSTRAINT fscskey2014_key PRIMARY KEY,
|
||||
libid varchar(20) NOT NULL,
|
||||
libname varchar(100) NOT NULL,
|
||||
obereg varchar(2) NOT NULL,
|
||||
rstatus integer NOT NULL,
|
||||
statstru varchar(2) NOT NULL,
|
||||
statname varchar(2) NOT NULL,
|
||||
stataddr varchar(2) NOT NULL,
|
||||
longitud numeric(10,7) NOT NULL,
|
||||
latitude numeric(10,7) NOT NULL,
|
||||
fipsst varchar(2) NOT NULL,
|
||||
fipsco varchar(3) NOT NULL,
|
||||
address varchar(35) NOT NULL,
|
||||
city varchar(20) NOT NULL,
|
||||
zip varchar(5) NOT NULL,
|
||||
zip4 varchar(4) NOT NULL,
|
||||
cnty varchar(20) NOT NULL,
|
||||
phone varchar(10) NOT NULL,
|
||||
c_relatn varchar(2) NOT NULL,
|
||||
c_legbas varchar(2) NOT NULL,
|
||||
c_admin varchar(2) NOT NULL,
|
||||
geocode varchar(3) NOT NULL,
|
||||
lsabound varchar(1) NOT NULL,
|
||||
startdat varchar(10),
|
||||
enddate varchar(10),
|
||||
popu_lsa integer NOT NULL,
|
||||
centlib integer NOT NULL,
|
||||
branlib integer NOT NULL,
|
||||
bkmob integer NOT NULL,
|
||||
master numeric(8,2) NOT NULL,
|
||||
libraria numeric(8,2) NOT NULL,
|
||||
totstaff numeric(8,2) NOT NULL,
|
||||
locgvt integer NOT NULL,
|
||||
stgvt integer NOT NULL,
|
||||
fedgvt integer NOT NULL,
|
||||
totincm integer NOT NULL,
|
||||
salaries integer,
|
||||
benefit integer,
|
||||
staffexp integer,
|
||||
prmatexp integer NOT NULL,
|
||||
elmatexp integer NOT NULL,
|
||||
totexpco integer NOT NULL,
|
||||
totopexp integer NOT NULL,
|
||||
lcap_rev integer NOT NULL,
|
||||
scap_rev integer NOT NULL,
|
||||
fcap_rev integer NOT NULL,
|
||||
cap_rev integer NOT NULL,
|
||||
capital integer NOT NULL,
|
||||
bkvol integer NOT NULL,
|
||||
ebook integer NOT NULL,
|
||||
audio_ph integer NOT NULL,
|
||||
audio_dl float NOT NULL,
|
||||
video_ph integer NOT NULL,
|
||||
video_dl float NOT NULL,
|
||||
databases integer NOT NULL,
|
||||
subscrip integer NOT NULL,
|
||||
hrs_open integer NOT NULL,
|
||||
visits integer NOT NULL,
|
||||
referenc integer NOT NULL,
|
||||
regbor integer NOT NULL,
|
||||
totcir integer NOT NULL,
|
||||
kidcircl integer NOT NULL,
|
||||
elmatcir integer NOT NULL,
|
||||
loanto integer NOT NULL,
|
||||
loanfm integer NOT NULL,
|
||||
totpro integer NOT NULL,
|
||||
totatten integer NOT NULL,
|
||||
gpterms integer NOT NULL,
|
||||
pitusr integer NOT NULL,
|
||||
wifisess integer NOT NULL,
|
||||
yr_sub integer NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX libname2014_idx ON pls_fy2014_pupld14a (libname);
|
||||
CREATE INDEX stabr2014_idx ON pls_fy2014_pupld14a (stabr);
|
||||
CREATE INDEX city2014_idx ON pls_fy2014_pupld14a (city);
|
||||
CREATE INDEX visits2014_idx ON pls_fy2014_pupld14a (visits);
|
||||
|
||||
COPY pls_fy2014_pupld14a
|
||||
FROM 'C:\YourDirectory\pls_fy2014_pupld14a.csv'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
-- Listing 8-2: Creating and filling the 2009 Public Libraries Survey table
|
||||
|
||||
CREATE TABLE pls_fy2009_pupld09a (
|
||||
stabr varchar(2) NOT NULL,
|
||||
fscskey varchar(6) CONSTRAINT fscskey2009_key PRIMARY KEY,
|
||||
libid varchar(20) NOT NULL,
|
||||
libname varchar(100) NOT NULL,
|
||||
address varchar(35) NOT NULL,
|
||||
city varchar(20) NOT NULL,
|
||||
zip varchar(5) NOT NULL,
|
||||
zip4 varchar(4) NOT NULL,
|
||||
cnty varchar(20) NOT NULL,
|
||||
phone varchar(10) NOT NULL,
|
||||
c_relatn varchar(2) NOT NULL,
|
||||
c_legbas varchar(2) NOT NULL,
|
||||
c_admin varchar(2) NOT NULL,
|
||||
geocode varchar(3) NOT NULL,
|
||||
lsabound varchar(1) NOT NULL,
|
||||
startdat varchar(10),
|
||||
enddate varchar(10),
|
||||
popu_lsa integer NOT NULL,
|
||||
centlib integer NOT NULL,
|
||||
branlib integer NOT NULL,
|
||||
bkmob integer NOT NULL,
|
||||
master numeric(8,2) NOT NULL,
|
||||
libraria numeric(8,2) NOT NULL,
|
||||
totstaff numeric(8,2) NOT NULL,
|
||||
locgvt integer NOT NULL,
|
||||
stgvt integer NOT NULL,
|
||||
fedgvt integer NOT NULL,
|
||||
totincm integer NOT NULL,
|
||||
salaries integer,
|
||||
benefit integer,
|
||||
staffexp integer,
|
||||
prmatexp integer NOT NULL,
|
||||
elmatexp integer NOT NULL,
|
||||
totexpco integer NOT NULL,
|
||||
totopexp integer NOT NULL,
|
||||
lcap_rev integer NOT NULL,
|
||||
scap_rev integer NOT NULL,
|
||||
fcap_rev integer NOT NULL,
|
||||
cap_rev integer NOT NULL,
|
||||
capital integer NOT NULL,
|
||||
bkvol integer NOT NULL,
|
||||
ebook integer NOT NULL,
|
||||
audio integer NOT NULL,
|
||||
video integer NOT NULL,
|
||||
databases integer NOT NULL,
|
||||
subscrip integer NOT NULL,
|
||||
hrs_open integer NOT NULL,
|
||||
visits integer NOT NULL,
|
||||
referenc integer NOT NULL,
|
||||
regbor integer NOT NULL,
|
||||
totcir integer NOT NULL,
|
||||
kidcircl integer NOT NULL,
|
||||
loanto integer NOT NULL,
|
||||
loanfm integer NOT NULL,
|
||||
totpro integer NOT NULL,
|
||||
totatten integer NOT NULL,
|
||||
gpterms integer NOT NULL,
|
||||
pitusr integer NOT NULL,
|
||||
yr_sub integer NOT NULL,
|
||||
obereg varchar(2) NOT NULL,
|
||||
rstatus integer NOT NULL,
|
||||
statstru varchar(2) NOT NULL,
|
||||
statname varchar(2) NOT NULL,
|
||||
stataddr varchar(2) NOT NULL,
|
||||
longitud numeric(10,7) NOT NULL,
|
||||
latitude numeric(10,7) NOT NULL,
|
||||
fipsst varchar(2) NOT NULL,
|
||||
fipsco varchar(3) NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX libname2009_idx ON pls_fy2009_pupld09a (libname);
|
||||
CREATE INDEX stabr2009_idx ON pls_fy2009_pupld09a (stabr);
|
||||
CREATE INDEX city2009_idx ON pls_fy2009_pupld09a (city);
|
||||
CREATE INDEX visits2009_idx ON pls_fy2009_pupld09a (visits);
|
||||
|
||||
COPY pls_fy2009_pupld09a
|
||||
FROM 'C:\YourDirectory\pls_fy2009_pupld09a.csv'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
-- Listing 8-3: Using count() for table row counts
|
||||
|
||||
SELECT count(*)
|
||||
FROM pls_fy2014_pupld14a;
|
||||
|
||||
SELECT count(*)
|
||||
FROM pls_fy2009_pupld09a;
|
||||
|
||||
-- Listing 8-4: Using count() for the number of values in a column
|
||||
|
||||
SELECT count(salaries)
|
||||
FROM pls_fy2014_pupld14a;
|
||||
|
||||
-- Listing 8-5: Using count() for the number of distinct values in a column
|
||||
|
||||
SELECT count(libname)
|
||||
FROM pls_fy2014_pupld14a;
|
||||
|
||||
SELECT count(DISTINCT libname)
|
||||
FROM pls_fy2014_pupld14a;
|
||||
|
||||
-- Bonus: find duplicate libnames
|
||||
SELECT libname, count(libname)
|
||||
FROM pls_fy2014_pupld14a
|
||||
GROUP BY libname
|
||||
ORDER BY count(libname) DESC;
|
||||
|
||||
-- Bonus: see location of every Oxford Public Library
|
||||
SELECT libname, city, stabr
|
||||
FROM pls_fy2014_pupld14a
|
||||
WHERE libname = 'OXFORD PUBLIC LIBRARY';
|
||||
|
||||
-- Listing 8-6: Finding the most and fewest visits using max() and min()
|
||||
SELECT max(visits), min(visits)
|
||||
FROM pls_fy2014_pupld14a;
|
||||
|
||||
-- Listing 8-7: Using GROUP BY on the stabr column
|
||||
|
||||
-- There are 56 in 2014.
|
||||
SELECT stabr
|
||||
FROM pls_fy2014_pupld14a
|
||||
GROUP BY stabr
|
||||
ORDER BY stabr;
|
||||
|
||||
-- Bonus: there are 55 in 2009.
|
||||
SELECT stabr
|
||||
FROM pls_fy2009_pupld09a
|
||||
GROUP BY stabr
|
||||
ORDER BY stabr;
|
||||
|
||||
-- Listing 8-8: Using GROUP BY on the city and stabr columns
|
||||
|
||||
SELECT city, stabr
|
||||
FROM pls_fy2014_pupld14a
|
||||
GROUP BY city, stabr
|
||||
ORDER BY city, stabr;
|
||||
|
||||
-- Bonus: We can count some of the combos
|
||||
SELECT city, stabr, count(*)
|
||||
FROM pls_fy2014_pupld14a
|
||||
GROUP BY city, stabr
|
||||
ORDER BY count(*) DESC;
|
||||
|
||||
-- Listing 8-9: GROUP BY with count() on the stabr column
|
||||
|
||||
SELECT stabr, count(*)
|
||||
FROM pls_fy2014_pupld14a
|
||||
GROUP BY stabr
|
||||
ORDER BY count(*) DESC;
|
||||
|
||||
-- Listing 8-10: GROUP BY with count() on the stabr and stataddr columns
|
||||
|
||||
SELECT stabr, stataddr, count(*)
|
||||
FROM pls_fy2014_pupld14a
|
||||
GROUP BY stabr, stataddr
|
||||
ORDER BY stabr ASC, count(*) DESC;
|
||||
|
||||
-- Listing 8-11: Using the sum() aggregate function to total visits to
|
||||
-- libraries in 2014 and 2009
|
||||
|
||||
-- 2014
|
||||
SELECT sum(visits) AS visits_2014
|
||||
FROM pls_fy2014_pupld14a
|
||||
WHERE visits >= 0;
|
||||
|
||||
-- 2009
|
||||
SELECT sum(visits) AS visits_2009
|
||||
FROM pls_fy2009_pupld09a
|
||||
WHERE visits >= 0;
|
||||
|
||||
-- Listing 8-12: Using sum() to total visits on joined 2014 and 2009 library tables
|
||||
|
||||
SELECT sum(pls14.visits) AS visits_2014,
|
||||
sum(pls09.visits) AS visits_2009
|
||||
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
|
||||
ON pls14.fscskey = pls09.fscskey
|
||||
WHERE pls14.visits >= 0 AND pls09.visits >= 0;
|
||||
|
||||
-- Listing 8-13: Using GROUP BY to track percent change in library visits by state
|
||||
|
||||
SELECT pls14.stabr,
|
||||
sum(pls14.visits) AS visits_2014,
|
||||
sum(pls09.visits) AS visits_2009,
|
||||
round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
|
||||
sum(pls09.visits) * 100, 2 ) AS pct_change
|
||||
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
|
||||
ON pls14.fscskey = pls09.fscskey
|
||||
WHERE pls14.visits >= 0 AND pls09.visits >= 0
|
||||
GROUP BY pls14.stabr
|
||||
ORDER BY pct_change DESC;
|
||||
|
||||
-- Listing 8-14: Using HAVING to filter the results of an aggregate query
|
||||
|
||||
SELECT pls14.stabr,
|
||||
sum(pls14.visits) AS visits_2014,
|
||||
sum(pls09.visits) AS visits_2009,
|
||||
round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
|
||||
sum(pls09.visits) * 100, 2 ) AS pct_change
|
||||
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
|
||||
ON pls14.fscskey = pls09.fscskey
|
||||
WHERE pls14.visits >= 0 AND pls09.visits >= 0
|
||||
GROUP BY pls14.stabr
|
||||
HAVING sum(pls14.visits) > 50000000
|
||||
ORDER BY pct_change DESC;
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 8: Extracting Information by Grouping and Summarizing
|
||||
### Chapter 9: Extracting Information by Grouping and Summarizing
|
||||
|
||||
Explains how to use aggregate functions to find trends in U.S. library use based on annual surveys.
|
||||
|
||||
|
||||
@ -1,259 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 9 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Listing 9-1: Importing the FSIS Meat, Poultry, and Egg Inspection Directory
|
||||
-- https://catalog.data.gov/dataset/meat-poultry-and-egg-inspection-directory-by-establishment-name
|
||||
|
||||
CREATE TABLE meat_poultry_egg_inspect (
|
||||
est_number varchar(50) CONSTRAINT est_number_key PRIMARY KEY,
|
||||
company varchar(100),
|
||||
street varchar(100),
|
||||
city varchar(30),
|
||||
st varchar(2),
|
||||
zip varchar(5),
|
||||
phone varchar(14),
|
||||
grant_date date,
|
||||
activities text,
|
||||
dbas text
|
||||
);
|
||||
|
||||
COPY meat_poultry_egg_inspect
|
||||
FROM 'C:\YourDirectory\MPI_Directory_by_Establishment_Name.csv'
|
||||
WITH (FORMAT CSV, HEADER, DELIMITER ',');
|
||||
|
||||
CREATE INDEX company_idx ON meat_poultry_egg_inspect (company);
|
||||
|
||||
-- Count the rows imported:
|
||||
SELECT count(*) FROM meat_poultry_egg_inspect;
|
||||
|
||||
-- Listing 9-2: Finding multiple companies at the same address
|
||||
SELECT company,
|
||||
street,
|
||||
city,
|
||||
st,
|
||||
count(*) AS address_count
|
||||
FROM meat_poultry_egg_inspect
|
||||
GROUP BY company, street, city, st
|
||||
HAVING count(*) > 1
|
||||
ORDER BY company, street, city, st;
|
||||
|
||||
-- Listing 9-3: Grouping and counting states
|
||||
SELECT st,
|
||||
count(*) AS st_count
|
||||
FROM meat_poultry_egg_inspect
|
||||
GROUP BY st
|
||||
ORDER BY st;
|
||||
|
||||
-- Listing 9-4: Using IS NULL to find missing values in the st column
|
||||
SELECT est_number,
|
||||
company,
|
||||
city,
|
||||
st,
|
||||
zip
|
||||
FROM meat_poultry_egg_inspect
|
||||
WHERE st IS NULL;
|
||||
|
||||
-- Listing 9-5: Using GROUP BY and count() to find inconsistent company names
|
||||
|
||||
SELECT company,
|
||||
count(*) AS company_count
|
||||
FROM meat_poultry_egg_inspect
|
||||
GROUP BY company
|
||||
ORDER BY company ASC;
|
||||
|
||||
-- Listing 9-6: Using length() and count() to test the zip column
|
||||
|
||||
SELECT length(zip),
|
||||
count(*) AS length_count
|
||||
FROM meat_poultry_egg_inspect
|
||||
GROUP BY length(zip)
|
||||
ORDER BY length(zip) ASC;
|
||||
|
||||
-- Listing 9-7: Filtering with length() to find short zip values
|
||||
|
||||
SELECT st,
|
||||
count(*) AS st_count
|
||||
FROM meat_poultry_egg_inspect
|
||||
WHERE length(zip) < 5
|
||||
GROUP BY st
|
||||
ORDER BY st ASC;
|
||||
|
||||
-- Listing 9-8: Backing up a table
|
||||
|
||||
CREATE TABLE meat_poultry_egg_inspect_backup AS
|
||||
SELECT * FROM meat_poultry_egg_inspect;
|
||||
|
||||
-- Check number of records:
|
||||
SELECT
|
||||
(SELECT count(*) FROM meat_poultry_egg_inspect) AS original,
|
||||
(SELECT count(*) FROM meat_poultry_egg_inspect_backup) AS backup;
|
||||
|
||||
-- Listing 9-9: Creating and filling the st_copy column with ALTER TABLE and UPDATE
|
||||
|
||||
ALTER TABLE meat_poultry_egg_inspect ADD COLUMN st_copy varchar(2);
|
||||
|
||||
UPDATE meat_poultry_egg_inspect
|
||||
SET st_copy = st;
|
||||
|
||||
-- Listing 9-10: Checking values in the st and st_copy columns
|
||||
|
||||
SELECT st,
|
||||
st_copy
|
||||
FROM meat_poultry_egg_inspect
|
||||
ORDER BY st;
|
||||
|
||||
-- Listing 9-11: Updating the st column for three establishments
|
||||
|
||||
UPDATE meat_poultry_egg_inspect
|
||||
SET st = 'MN'
|
||||
WHERE est_number = 'V18677A';
|
||||
|
||||
UPDATE meat_poultry_egg_inspect
|
||||
SET st = 'AL'
|
||||
WHERE est_number = 'M45319+P45319';
|
||||
|
||||
UPDATE meat_poultry_egg_inspect
|
||||
SET st = 'WI'
|
||||
WHERE est_number = 'M263A+P263A+V263A';
|
||||
|
||||
-- Listing 9-12: Restoring original st column values
|
||||
|
||||
-- Restoring from the column backup
|
||||
UPDATE meat_poultry_egg_inspect
|
||||
SET st = st_copy;
|
||||
|
||||
-- Restoring from the table backup
|
||||
UPDATE meat_poultry_egg_inspect original
|
||||
SET st = backup.st
|
||||
FROM meat_poultry_egg_inspect_backup backup
|
||||
WHERE original.est_number = backup.est_number;
|
||||
|
||||
-- Listing 9-13: Creating and filling the company_standard column
|
||||
|
||||
ALTER TABLE meat_poultry_egg_inspect ADD COLUMN company_standard varchar(100);
|
||||
|
||||
UPDATE meat_poultry_egg_inspect
|
||||
SET company_standard = company;
|
||||
|
||||
-- Listing 9-14: Use UPDATE to modify field values that match a string
|
||||
|
||||
UPDATE meat_poultry_egg_inspect
|
||||
SET company_standard = 'Armour-Eckrich Meats'
|
||||
WHERE company LIKE 'Armour%';
|
||||
|
||||
SELECT company, company_standard
|
||||
FROM meat_poultry_egg_inspect
|
||||
WHERE company LIKE 'Armour%';
|
||||
|
||||
-- Listing 9-15: Creating and filling the zip_copy column
|
||||
|
||||
ALTER TABLE meat_poultry_egg_inspect ADD COLUMN zip_copy varchar(5);
|
||||
|
||||
UPDATE meat_poultry_egg_inspect
|
||||
SET zip_copy = zip;
|
||||
|
||||
-- Listing 9-16: Modify codes in the zip column missing two leading zeros
|
||||
|
||||
UPDATE meat_poultry_egg_inspect
|
||||
SET zip = '00' || zip
|
||||
WHERE st IN('PR','VI') AND length(zip) = 3;
|
||||
|
||||
-- Listing 9-17: Modify codes in the zip column missing one leading zero
|
||||
|
||||
UPDATE meat_poultry_egg_inspect
|
||||
SET zip = '0' || zip
|
||||
WHERE st IN('CT','MA','ME','NH','NJ','RI','VT') AND length(zip) = 4;
|
||||
|
||||
-- Listing 9-18: Creating and filling a state_regions table
|
||||
|
||||
CREATE TABLE state_regions (
|
||||
st varchar(2) CONSTRAINT st_key PRIMARY KEY,
|
||||
region varchar(20) NOT NULL
|
||||
);
|
||||
|
||||
COPY state_regions
|
||||
FROM 'C:\YourDirectory\state_regions.csv'
|
||||
WITH (FORMAT CSV, HEADER, DELIMITER ',');
|
||||
|
||||
-- Listing 9-19: Adding and updating an inspection_date column
|
||||
|
||||
ALTER TABLE meat_poultry_egg_inspect ADD COLUMN inspection_date date;
|
||||
|
||||
UPDATE meat_poultry_egg_inspect inspect
|
||||
SET inspection_date = '2019-12-01'
|
||||
WHERE EXISTS (SELECT state_regions.region
|
||||
FROM state_regions
|
||||
WHERE inspect.st = state_regions.st
|
||||
AND state_regions.region = 'New England');
|
||||
|
||||
-- Listing 9-20: Viewing updated inspection_date values
|
||||
|
||||
SELECT st, inspection_date
|
||||
FROM meat_poultry_egg_inspect
|
||||
GROUP BY st, inspection_date
|
||||
ORDER BY st;
|
||||
|
||||
-- Listing 9-21: Delete rows matching an expression
|
||||
|
||||
DELETE FROM meat_poultry_egg_inspect
|
||||
WHERE st IN('PR','VI');
|
||||
|
||||
-- Listing 9-22: Remove a column from a table using DROP
|
||||
|
||||
ALTER TABLE meat_poultry_egg_inspect DROP COLUMN zip_copy;
|
||||
|
||||
-- Listing 9-23: Remove a table from a database using DROP
|
||||
|
||||
DROP TABLE meat_poultry_egg_inspect_backup;
|
||||
|
||||
-- Listing 9-24: Demonstrating a transaction block
|
||||
|
||||
-- Start transaction and perform update
|
||||
START TRANSACTION;
|
||||
|
||||
UPDATE meat_poultry_egg_inspect
|
||||
SET company = 'AGRO Merchantss Oakland LLC'
|
||||
WHERE company = 'AGRO Merchants Oakland, LLC';
|
||||
|
||||
-- view changes
|
||||
SELECT company
|
||||
FROM meat_poultry_egg_inspect
|
||||
WHERE company LIKE 'AGRO%'
|
||||
ORDER BY company;
|
||||
|
||||
-- Revert changes
|
||||
ROLLBACK;
|
||||
|
||||
-- See restored state
|
||||
SELECT company
|
||||
FROM meat_poultry_egg_inspect
|
||||
WHERE company LIKE 'AGRO%'
|
||||
ORDER BY company;
|
||||
|
||||
-- Alternately, commit changes at the end:
|
||||
START TRANSACTION;
|
||||
|
||||
UPDATE meat_poultry_egg_inspect
|
||||
SET company = 'AGRO Merchants Oakland LLC'
|
||||
WHERE company = 'AGRO Merchants Oakland, LLC';
|
||||
|
||||
COMMIT;
|
||||
|
||||
-- Listing 9-25: Backing up a table while adding and filling a new column
|
||||
|
||||
CREATE TABLE meat_poultry_egg_inspect_backup AS
|
||||
SELECT *,
|
||||
'2018-02-07'::date AS reviewed_date
|
||||
FROM meat_poultry_egg_inspect;
|
||||
|
||||
-- Listing 9-26: Swapping table names using ALTER TABLE
|
||||
|
||||
ALTER TABLE meat_poultry_egg_inspect RENAME TO meat_poultry_egg_inspect_temp;
|
||||
ALTER TABLE meat_poultry_egg_inspect_backup RENAME TO meat_poultry_egg_inspect;
|
||||
ALTER TABLE meat_poultry_egg_inspect_temp RENAME TO meat_poultry_egg_inspect_backup;
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 9: Inspecting and Modifying Data
|
||||
### Chapter 10: Inspecting and Modifying Data
|
||||
|
||||
Explores how to nd and x incomplete or inaccurate data using a collection of records about meat, egg, and poultry producers as an example.
|
||||
Explores how to find and remedy incomplete or inaccurate data using a collection of records about meat, egg, and poultry producers as an example.
|
||||
|
||||
|
||||
@ -1,167 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 10 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Listing 10-1: Create Census 2011-2015 ACS 5-Year stats table and import data
|
||||
|
||||
CREATE TABLE acs_2011_2015_stats (
|
||||
geoid varchar(14) CONSTRAINT geoid_key PRIMARY KEY,
|
||||
county varchar(50) NOT NULL,
|
||||
st varchar(20) NOT NULL,
|
||||
pct_travel_60_min numeric(5,3) NOT NULL,
|
||||
pct_bachelors_higher numeric(5,3) NOT NULL,
|
||||
pct_masters_higher numeric(5,3) NOT NULL,
|
||||
median_hh_income integer,
|
||||
CHECK (pct_masters_higher <= pct_bachelors_higher)
|
||||
);
|
||||
|
||||
COPY acs_2011_2015_stats
|
||||
FROM 'C:\YourDirectory\acs_2011_2015_stats.csv'
|
||||
WITH (FORMAT CSV, HEADER, DELIMITER ',');
|
||||
|
||||
SELECT * FROM acs_2011_2015_stats;
|
||||
|
||||
-- Listing 10-2: Using corr(Y, X) to measure the relationship between
|
||||
-- education and income
|
||||
|
||||
SELECT corr(median_hh_income, pct_bachelors_higher)
|
||||
AS bachelors_income_r
|
||||
FROM acs_2011_2015_stats;
|
||||
|
||||
-- Listing 10-3: Using corr(Y, X) on additional variables
|
||||
|
||||
SELECT
|
||||
round(
|
||||
corr(median_hh_income, pct_bachelors_higher)::numeric, 2
|
||||
) AS bachelors_income_r,
|
||||
round(
|
||||
corr(pct_travel_60_min, median_hh_income)::numeric, 2
|
||||
) AS income_travel_r,
|
||||
round(
|
||||
corr(pct_travel_60_min, pct_bachelors_higher)::numeric, 2
|
||||
) AS bachelors_travel_r
|
||||
FROM acs_2011_2015_stats;
|
||||
|
||||
-- Listing 10-4: Regression slope and intercept functions
|
||||
|
||||
SELECT
|
||||
round(
|
||||
regr_slope(median_hh_income, pct_bachelors_higher)::numeric, 2
|
||||
) AS slope,
|
||||
round(
|
||||
regr_intercept(median_hh_income, pct_bachelors_higher)::numeric, 2
|
||||
) AS y_intercept
|
||||
FROM acs_2011_2015_stats;
|
||||
|
||||
-- Listing 10-5: Calculating the coefficient of determination, or r-squared
|
||||
|
||||
SELECT round(
|
||||
regr_r2(median_hh_income, pct_bachelors_higher)::numeric, 3
|
||||
) AS r_squared
|
||||
FROM acs_2011_2015_stats;
|
||||
|
||||
-- Bonus: Additional stats functions.
|
||||
-- Variance
|
||||
SELECT var_pop(median_hh_income)
|
||||
FROM acs_2011_2015_stats;
|
||||
|
||||
-- Standard deviation of the entire population
|
||||
SELECT stddev_pop(median_hh_income)
|
||||
FROM acs_2011_2015_stats;
|
||||
|
||||
-- Covariance
|
||||
SELECT covar_pop(median_hh_income, pct_bachelors_higher)
|
||||
FROM acs_2011_2015_stats;
|
||||
|
||||
-- Listing 10-6: The rank() and dense_rank() window functions
|
||||
|
||||
CREATE TABLE widget_companies (
|
||||
id bigserial,
|
||||
company varchar(30) NOT NULL,
|
||||
widget_output integer NOT NULL
|
||||
);
|
||||
|
||||
INSERT INTO widget_companies (company, widget_output)
|
||||
VALUES
|
||||
('Morse Widgets', 125000),
|
||||
('Springfield Widget Masters', 143000),
|
||||
('Best Widgets', 196000),
|
||||
('Acme Inc.', 133000),
|
||||
('District Widget Inc.', 201000),
|
||||
('Clarke Amalgamated', 620000),
|
||||
('Stavesacre Industries', 244000),
|
||||
('Bowers Widget Emporium', 201000);
|
||||
|
||||
SELECT
|
||||
company,
|
||||
widget_output,
|
||||
rank() OVER (ORDER BY widget_output DESC),
|
||||
dense_rank() OVER (ORDER BY widget_output DESC)
|
||||
FROM widget_companies;
|
||||
|
||||
-- Listing 10-7: Applying rank() within groups using PARTITION BY
|
||||
|
||||
CREATE TABLE store_sales (
|
||||
store varchar(30),
|
||||
category varchar(30) NOT NULL,
|
||||
unit_sales bigint NOT NULL,
|
||||
CONSTRAINT store_category_key PRIMARY KEY (store, category)
|
||||
);
|
||||
|
||||
INSERT INTO store_sales (store, category, unit_sales)
|
||||
VALUES
|
||||
('Broders', 'Cereal', 1104),
|
||||
('Wallace', 'Ice Cream', 1863),
|
||||
('Broders', 'Ice Cream', 2517),
|
||||
('Cramers', 'Ice Cream', 2112),
|
||||
('Broders', 'Beer', 641),
|
||||
('Cramers', 'Cereal', 1003),
|
||||
('Cramers', 'Beer', 640),
|
||||
('Wallace', 'Cereal', 980),
|
||||
('Wallace', 'Beer', 988);
|
||||
|
||||
SELECT
|
||||
category,
|
||||
store,
|
||||
unit_sales,
|
||||
rank() OVER (PARTITION BY category ORDER BY unit_sales DESC)
|
||||
FROM store_sales;
|
||||
|
||||
-- Listing 10-8: Create and fill a 2015 FBI crime data table
|
||||
|
||||
CREATE TABLE fbi_crime_data_2015 (
|
||||
st varchar(20),
|
||||
city varchar(50),
|
||||
population integer,
|
||||
violent_crime integer,
|
||||
property_crime integer,
|
||||
burglary integer,
|
||||
larceny_theft integer,
|
||||
motor_vehicle_theft integer,
|
||||
CONSTRAINT st_city_key PRIMARY KEY (st, city)
|
||||
);
|
||||
|
||||
COPY fbi_crime_data_2015
|
||||
FROM 'C:\YourDirectory\fbi_crime_data_2015.csv'
|
||||
WITH (FORMAT CSV, HEADER, DELIMITER ',');
|
||||
|
||||
SELECT * FROM fbi_crime_data_2015
|
||||
ORDER BY population DESC;
|
||||
|
||||
-- Listing 10-9: Find property crime rates per thousand in cities with 500,000
|
||||
-- or more people
|
||||
|
||||
SELECT
|
||||
city,
|
||||
st,
|
||||
population,
|
||||
property_crime,
|
||||
round(
|
||||
(property_crime::numeric / population) * 1000, 1
|
||||
) AS pc_per_1000
|
||||
FROM fbi_crime_data_2015
|
||||
WHERE population >= 500000
|
||||
ORDER BY (property_crime::numeric / population) DESC;
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 10: Statistical Functions in SQL
|
||||
### Chapter 11: Statistical Functions in SQL
|
||||
|
||||
Introduces correlation, regression, and ranking functions in SQL to help you derive more meaning from data sets.
|
||||
|
||||
|
||||
@ -1,233 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 11 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Listing 11-1: Extracting components of a timestamp value using date_part()
|
||||
|
||||
SELECT
|
||||
date_part('year', '2019-12-01 18:37:12 EST'::timestamptz) AS "year",
|
||||
date_part('month', '2019-12-01 18:37:12 EST'::timestamptz) AS "month",
|
||||
date_part('day', '2019-12-01 18:37:12 EST'::timestamptz) AS "day",
|
||||
date_part('hour', '2019-12-01 18:37:12 EST'::timestamptz) AS "hour",
|
||||
date_part('minute', '2019-12-01 18:37:12 EST'::timestamptz) AS "minute",
|
||||
date_part('seconds', '2019-12-01 18:37:12 EST'::timestamptz) AS "seconds",
|
||||
date_part('timezone_hour', '2019-12-01 18:37:12 EST'::timestamptz) AS "tz",
|
||||
date_part('week', '2019-12-01 18:37:12 EST'::timestamptz) AS "week",
|
||||
date_part('quarter', '2019-12-01 18:37:12 EST'::timestamptz) AS "quarter",
|
||||
date_part('epoch', '2019-12-01 18:37:12 EST'::timestamptz) AS "epoch";
|
||||
|
||||
-- Bonus: Using the SQL-standard extract() for similar datetime parsing:
|
||||
|
||||
SELECT extract('year' from '2019-12-01 18:37:12 EST'::timestamptz) AS "year";
|
||||
|
||||
-- Listing 11-2: Three functions for making datetimes from components
|
||||
|
||||
-- make a date
|
||||
SELECT make_date(2018, 2, 22);
|
||||
-- make a time
|
||||
SELECT make_time(18, 4, 30.3);
|
||||
-- make a timestamp with time zone
|
||||
SELECT make_timestamptz(2018, 2, 22, 18, 4, 30.3, 'Europe/Lisbon');
|
||||
|
||||
-- Bonus: Retrieving the current date and time
|
||||
|
||||
SELECT
|
||||
current_date,
|
||||
current_time,
|
||||
current_timestamp,
|
||||
localtime,
|
||||
localtimestamp,
|
||||
now();
|
||||
|
||||
-- Listing 11-3: Comparing current_timestamp and clock_timestamp() during row insert
|
||||
|
||||
CREATE TABLE current_time_example (
|
||||
time_id bigserial,
|
||||
current_timestamp_col timestamp with time zone,
|
||||
clock_timestamp_col timestamp with time zone
|
||||
);
|
||||
|
||||
INSERT INTO current_time_example (current_timestamp_col, clock_timestamp_col)
|
||||
(SELECT current_timestamp,
|
||||
clock_timestamp()
|
||||
FROM generate_series(1,1000));
|
||||
|
||||
SELECT * FROM current_time_example;
|
||||
|
||||
-- Time Zones
|
||||
|
||||
-- Listing 11-4: Showing your PostgreSQL server's default time zone
|
||||
|
||||
SHOW timezone; -- Note: You can see all run-time defaults with SHOW ALL;
|
||||
|
||||
-- Listing 11-5: Showing time zone abbreviations and names
|
||||
|
||||
SELECT * FROM pg_timezone_abbrevs;
|
||||
SELECT * FROM pg_timezone_names;
|
||||
|
||||
-- Filter to find one
|
||||
SELECT * FROM pg_timezone_names
|
||||
WHERE name LIKE 'Europe%';
|
||||
|
||||
-- Listing 11-6: Setting the time zone for a client session
|
||||
|
||||
SET timezone TO 'US/Pacific';
|
||||
|
||||
CREATE TABLE time_zone_test (
|
||||
test_date timestamp with time zone
|
||||
);
|
||||
INSERT INTO time_zone_test VALUES ('2020-01-01 4:00');
|
||||
|
||||
SELECT test_date
|
||||
FROM time_zone_test;
|
||||
|
||||
SET timezone TO 'US/Eastern';
|
||||
|
||||
SELECT test_date
|
||||
FROM time_zone_test;
|
||||
|
||||
SELECT test_date AT TIME ZONE 'Asia/Seoul'
|
||||
FROM time_zone_test;
|
||||
|
||||
|
||||
-- Math with dates!
|
||||
|
||||
SELECT '9/30/1929'::date - '9/27/1929'::date;
|
||||
SELECT '9/30/1929'::date + '5 years'::interval;
|
||||
|
||||
|
||||
-- Taxi Rides
|
||||
|
||||
-- Listing 11-7: Creating a table and importing NYC yellow taxi data
|
||||
|
||||
CREATE TABLE nyc_yellow_taxi_trips_2016_06_01 (
|
||||
trip_id bigserial PRIMARY KEY,
|
||||
vendor_id varchar(1) NOT NULL,
|
||||
tpep_pickup_datetime timestamp with time zone NOT NULL,
|
||||
tpep_dropoff_datetime timestamp with time zone NOT NULL,
|
||||
passenger_count integer NOT NULL,
|
||||
trip_distance numeric(8,2) NOT NULL,
|
||||
pickup_longitude numeric(18,15) NOT NULL,
|
||||
pickup_latitude numeric(18,15) NOT NULL,
|
||||
rate_code_id varchar(2) NOT NULL,
|
||||
store_and_fwd_flag varchar(1) NOT NULL,
|
||||
dropoff_longitude numeric(18,15) NOT NULL,
|
||||
dropoff_latitude numeric(18,15) NOT NULL,
|
||||
payment_type varchar(1) NOT NULL,
|
||||
fare_amount numeric(9,2) NOT NULL,
|
||||
extra numeric(9,2) NOT NULL,
|
||||
mta_tax numeric(5,2) NOT NULL,
|
||||
tip_amount numeric(9,2) NOT NULL,
|
||||
tolls_amount numeric(9,2) NOT NULL,
|
||||
improvement_surcharge numeric(9,2) NOT NULL,
|
||||
total_amount numeric(9,2) NOT NULL
|
||||
);
|
||||
|
||||
COPY nyc_yellow_taxi_trips_2016_06_01 (
|
||||
vendor_id,
|
||||
tpep_pickup_datetime,
|
||||
tpep_dropoff_datetime,
|
||||
passenger_count,
|
||||
trip_distance,
|
||||
pickup_longitude,
|
||||
pickup_latitude,
|
||||
rate_code_id,
|
||||
store_and_fwd_flag,
|
||||
dropoff_longitude,
|
||||
dropoff_latitude,
|
||||
payment_type,
|
||||
fare_amount,
|
||||
extra,
|
||||
mta_tax,
|
||||
tip_amount,
|
||||
tolls_amount,
|
||||
improvement_surcharge,
|
||||
total_amount
|
||||
)
|
||||
FROM 'C:\YourDirectory\yellow_tripdata_2016_06_01.csv'
|
||||
WITH (FORMAT CSV, HEADER, DELIMITER ',');
|
||||
|
||||
CREATE INDEX tpep_pickup_idx
|
||||
ON nyc_yellow_taxi_trips_2016_06_01 (tpep_pickup_datetime);
|
||||
|
||||
SELECT count(*) FROM nyc_yellow_taxi_trips_2016_06_01;
|
||||
|
||||
-- Listing 11-8: Counting taxi trips by hour
|
||||
|
||||
SELECT
|
||||
date_part('hour', tpep_pickup_datetime) AS trip_hour,
|
||||
count(*)
|
||||
FROM nyc_yellow_taxi_trips_2016_06_01
|
||||
GROUP BY trip_hour
|
||||
ORDER BY trip_hour;
|
||||
|
||||
-- Listing 11-9: Exporting taxi pickups per hour to a CSV file
|
||||
|
||||
COPY
|
||||
(SELECT
|
||||
date_part('hour', tpep_pickup_datetime) AS trip_hour,
|
||||
count(*)
|
||||
FROM nyc_yellow_taxi_trips_2016_06_01
|
||||
GROUP BY trip_hour
|
||||
ORDER BY trip_hour
|
||||
)
|
||||
TO 'C:\YourDirectory\hourly_pickups_2016_06_01.csv'
|
||||
WITH (FORMAT CSV, HEADER, DELIMITER ',');
|
||||
|
||||
-- Listing 11-10: Calculating median trip time by hour
|
||||
|
||||
SELECT
|
||||
date_part('hour', tpep_pickup_datetime) AS trip_hour,
|
||||
percentile_cont(.5)
|
||||
WITHIN GROUP (ORDER BY
|
||||
tpep_dropoff_datetime - tpep_pickup_datetime) AS median_trip
|
||||
FROM nyc_yellow_taxi_trips_2016_06_01
|
||||
GROUP BY trip_hour
|
||||
ORDER BY trip_hour;
|
||||
|
||||
-- Listing 11-11: Creating a table to hold train trip data
|
||||
|
||||
SET timezone TO 'US/Central';
|
||||
|
||||
CREATE TABLE train_rides (
|
||||
trip_id bigserial PRIMARY KEY,
|
||||
segment varchar(50) NOT NULL,
|
||||
departure timestamp with time zone NOT NULL,
|
||||
arrival timestamp with time zone NOT NULL
|
||||
);
|
||||
|
||||
INSERT INTO train_rides (segment, departure, arrival)
|
||||
VALUES
|
||||
('Chicago to New York', '2017-11-13 21:30 CST', '2017-11-14 18:23 EST'),
|
||||
('New York to New Orleans', '2017-11-15 14:15 EST', '2017-11-16 19:32 CST'),
|
||||
('New Orleans to Los Angeles', '2017-11-17 13:45 CST', '2017-11-18 9:00 PST'),
|
||||
('Los Angeles to San Francisco', '2017-11-19 10:10 PST', '2017-11-19 21:24 PST'),
|
||||
('San Francisco to Denver', '2017-11-20 9:10 PST', '2017-11-21 18:38 MST'),
|
||||
('Denver to Chicago', '2017-11-22 19:10 MST', '2017-11-23 14:50 CST');
|
||||
|
||||
SELECT * FROM train_rides;
|
||||
|
||||
-- Listing 11-12: Calculating the length of each trip segment
|
||||
|
||||
SELECT segment,
|
||||
to_char(departure, 'YYYY-MM-DD HH12:MI a.m. TZ') AS departure,
|
||||
arrival - departure AS segment_time
|
||||
FROM train_rides;
|
||||
|
||||
-- Listing 11-13: Calculating cumulative intervals using OVER
|
||||
|
||||
SELECT segment,
|
||||
arrival - departure AS segment_time,
|
||||
sum(arrival - departure) OVER (ORDER BY trip_id) AS cume_time
|
||||
FROM train_rides;
|
||||
|
||||
-- Listing 11-14: Better formatting for cumulative trip time
|
||||
|
||||
SELECT segment,
|
||||
arrival - departure AS segment_time,
|
||||
sum(date_part('epoch', (arrival - departure)))
|
||||
OVER (ORDER BY trip_id) * interval '1 second' AS cume_time
|
||||
FROM train_rides;
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 11: Working with Dates and Times
|
||||
### Chapter 12: Working with Dates and Times
|
||||
|
||||
Explains how to create, manipulate, and query dates and times in your database, including working with time zones, using data on New York City taxi trips and Amtrak train schedules.
|
||||
|
||||
|
||||
@ -1,310 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 12 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Listing 12-1: Using a subquery in a WHERE clause
|
||||
|
||||
SELECT geo_name,
|
||||
state_us_abbreviation,
|
||||
p0010001
|
||||
FROM us_counties_2010
|
||||
WHERE p0010001 >= (
|
||||
SELECT percentile_cont(.9) WITHIN GROUP (ORDER BY p0010001)
|
||||
FROM us_counties_2010
|
||||
)
|
||||
ORDER BY p0010001 DESC;
|
||||
|
||||
-- Listing 12-2: Using a subquery in a WHERE clause for DELETE
|
||||
|
||||
CREATE TABLE us_counties_2010_top10 AS
|
||||
SELECT * FROM us_counties_2010;
|
||||
|
||||
DELETE FROM us_counties_2010_top10
|
||||
WHERE p0010001 < (
|
||||
SELECT percentile_cont(.9) WITHIN GROUP (ORDER BY p0010001)
|
||||
FROM us_counties_2010_top10
|
||||
);
|
||||
|
||||
SELECT count(*) FROM us_counties_2010_top10;
|
||||
|
||||
-- Listing 12-3: Subquery as a derived table in a FROM clause
|
||||
|
||||
SELECT round(calcs.average, 0) as average,
|
||||
calcs.median,
|
||||
round(calcs.average - calcs.median, 0) AS median_average_diff
|
||||
FROM (
|
||||
SELECT avg(p0010001) AS average,
|
||||
percentile_cont(.5)
|
||||
WITHIN GROUP (ORDER BY p0010001)::numeric(10,1) AS median
|
||||
FROM us_counties_2010
|
||||
)
|
||||
AS calcs;
|
||||
|
||||
-- Listing 12-4: Joining two derived tables
|
||||
|
||||
SELECT census.state_us_abbreviation AS st,
|
||||
census.st_population,
|
||||
plants.plant_count,
|
||||
round((plants.plant_count/census.st_population::numeric(10,1)) * 1000000, 1)
|
||||
AS plants_per_million
|
||||
FROM
|
||||
(
|
||||
SELECT st,
|
||||
count(*) AS plant_count
|
||||
FROM meat_poultry_egg_inspect
|
||||
GROUP BY st
|
||||
)
|
||||
AS plants
|
||||
JOIN
|
||||
(
|
||||
SELECT state_us_abbreviation,
|
||||
sum(p0010001) AS st_population
|
||||
FROM us_counties_2010
|
||||
GROUP BY state_us_abbreviation
|
||||
)
|
||||
AS census
|
||||
ON plants.st = census.state_us_abbreviation
|
||||
ORDER BY plants_per_million DESC;
|
||||
|
||||
-- Listing 12-5: Adding a subquery to a column list
|
||||
|
||||
SELECT geo_name,
|
||||
state_us_abbreviation AS st,
|
||||
p0010001 AS total_pop,
|
||||
(SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
|
||||
FROM us_counties_2010) AS us_median
|
||||
FROM us_counties_2010;
|
||||
|
||||
-- Listing 12-6: Using a subquery expression in a calculation
|
||||
|
||||
SELECT geo_name,
|
||||
state_us_abbreviation AS st,
|
||||
p0010001 AS total_pop,
|
||||
(SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
|
||||
FROM us_counties_2010) AS us_median,
|
||||
p0010001 - (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
|
||||
FROM us_counties_2010) AS diff_from_median
|
||||
FROM us_counties_2010
|
||||
WHERE (p0010001 - (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
|
||||
FROM us_counties_2010))
|
||||
BETWEEN -1000 AND 1000;
|
||||
|
||||
|
||||
-- BONUS: Subquery expressions
|
||||
-- If you'd like to try the IN, EXISTS, and NOT EXISTS expressions on pages 199-200,
|
||||
-- here's the code to create a retirees table. The queries below are similar
|
||||
-- to the hypothetical examples on pages 199 and 200. You will need the
|
||||
-- employees table you created in Chapter 6.
|
||||
|
||||
-- Create table and insert data
|
||||
CREATE TABLE retirees (
|
||||
id int,
|
||||
first_name varchar(50),
|
||||
last_name varchar(50)
|
||||
);
|
||||
|
||||
INSERT INTO retirees
|
||||
VALUES (2, 'Lee', 'Smith'),
|
||||
(4, 'Janet', 'King');
|
||||
|
||||
-- Generating values for the IN operator
|
||||
SELECT first_name, last_name
|
||||
FROM employees
|
||||
WHERE emp_id IN (
|
||||
SELECT id
|
||||
FROM retirees);
|
||||
|
||||
-- Checking whether values exist (returns all rows from employees
|
||||
-- if the expression evaluates as true)
|
||||
SELECT first_name, last_name
|
||||
FROM employees
|
||||
WHERE EXISTS (
|
||||
SELECT id
|
||||
FROM retirees);
|
||||
|
||||
-- Using a correlated subquery to find matching values from employees
|
||||
-- in retirees.
|
||||
SELECT first_name, last_name
|
||||
FROM employees
|
||||
WHERE EXISTS (
|
||||
SELECT id
|
||||
FROM retirees
|
||||
WHERE id = employees.emp_id);
|
||||
|
||||
|
||||
|
||||
-- Listing 12-7: Using a simple CTE to find large counties
|
||||
|
||||
WITH
|
||||
large_counties (geo_name, st, p0010001)
|
||||
AS
|
||||
(
|
||||
SELECT geo_name, state_us_abbreviation, p0010001
|
||||
FROM us_counties_2010
|
||||
WHERE p0010001 >= 100000
|
||||
)
|
||||
SELECT st, count(*)
|
||||
FROM large_counties
|
||||
GROUP BY st
|
||||
ORDER BY count(*) DESC;
|
||||
|
||||
-- Bonus: You can also write this query as:
|
||||
SELECT state_us_abbreviation, count(*)
|
||||
FROM us_counties_2010
|
||||
WHERE p0010001 >= 100000
|
||||
GROUP BY state_us_abbreviation
|
||||
ORDER BY count(*) DESC;
|
||||
|
||||
-- Listing 12-8: Using CTEs in a table join
|
||||
|
||||
WITH
|
||||
counties (st, population) AS
|
||||
(SELECT state_us_abbreviation, sum(population_count_100_percent)
|
||||
FROM us_counties_2010
|
||||
GROUP BY state_us_abbreviation),
|
||||
|
||||
plants (st, plants) AS
|
||||
(SELECT st, count(*) AS plants
|
||||
FROM meat_poultry_egg_inspect
|
||||
GROUP BY st)
|
||||
|
||||
SELECT counties.st,
|
||||
population,
|
||||
plants,
|
||||
round((plants/population::numeric(10,1))*1000000, 1) AS per_million
|
||||
FROM counties JOIN plants
|
||||
ON counties.st = plants.st
|
||||
ORDER BY per_million DESC;
|
||||
|
||||
-- Listing 12-9: Using CTEs to minimize redundant code
|
||||
|
||||
WITH us_median AS
|
||||
(SELECT percentile_cont(.5)
|
||||
WITHIN GROUP (ORDER BY p0010001) AS us_median_pop
|
||||
FROM us_counties_2010)
|
||||
|
||||
SELECT geo_name,
|
||||
state_us_abbreviation AS st,
|
||||
p0010001 AS total_pop,
|
||||
us_median_pop,
|
||||
p0010001 - us_median_pop AS diff_from_median
|
||||
FROM us_counties_2010 CROSS JOIN us_median
|
||||
WHERE (p0010001 - us_median_pop)
|
||||
BETWEEN -1000 AND 1000;
|
||||
|
||||
|
||||
-- Cross tabulations
|
||||
-- Install the crosstab() function via the tablefunc module
|
||||
|
||||
CREATE EXTENSION tablefunc;
|
||||
|
||||
-- Listing 12-10: Creating and filling the ice_cream_survey table
|
||||
|
||||
CREATE TABLE ice_cream_survey (
|
||||
response_id integer PRIMARY KEY,
|
||||
office varchar(20),
|
||||
flavor varchar(20)
|
||||
);
|
||||
|
||||
COPY ice_cream_survey
|
||||
FROM 'C:\YourDirectory\ice_cream_survey.csv'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
-- Listing 12-11: Generating the ice cream survey crosstab
|
||||
|
||||
SELECT *
|
||||
FROM crosstab('SELECT office,
|
||||
flavor,
|
||||
count(*)
|
||||
FROM ice_cream_survey
|
||||
GROUP BY office, flavor
|
||||
ORDER BY office',
|
||||
|
||||
'SELECT flavor
|
||||
FROM ice_cream_survey
|
||||
GROUP BY flavor
|
||||
ORDER BY flavor')
|
||||
|
||||
AS (office varchar(20),
|
||||
chocolate bigint,
|
||||
strawberry bigint,
|
||||
vanilla bigint);
|
||||
|
||||
-- Listing 12-12: Creating and filling a temperature_readings table
|
||||
|
||||
CREATE TABLE temperature_readings (
|
||||
reading_id bigserial PRIMARY KEY,
|
||||
station_name varchar(50),
|
||||
observation_date date,
|
||||
max_temp integer,
|
||||
min_temp integer
|
||||
);
|
||||
|
||||
COPY temperature_readings
|
||||
(station_name, observation_date, max_temp, min_temp)
|
||||
FROM 'C:\YourDirectory\temperature_readings.csv'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
-- Listing 12-13: Generating the temperature readings crosstab
|
||||
|
||||
SELECT *
|
||||
FROM crosstab('SELECT
|
||||
station_name,
|
||||
date_part(''month'', observation_date),
|
||||
percentile_cont(.5)
|
||||
WITHIN GROUP (ORDER BY max_temp)
|
||||
FROM temperature_readings
|
||||
GROUP BY station_name,
|
||||
date_part(''month'', observation_date)
|
||||
ORDER BY station_name',
|
||||
|
||||
'SELECT month
|
||||
FROM generate_series(1,12) month')
|
||||
|
||||
AS (station varchar(50),
|
||||
jan numeric(3,0),
|
||||
feb numeric(3,0),
|
||||
mar numeric(3,0),
|
||||
apr numeric(3,0),
|
||||
may numeric(3,0),
|
||||
jun numeric(3,0),
|
||||
jul numeric(3,0),
|
||||
aug numeric(3,0),
|
||||
sep numeric(3,0),
|
||||
oct numeric(3,0),
|
||||
nov numeric(3,0),
|
||||
dec numeric(3,0)
|
||||
);
|
||||
|
||||
-- Listing 12-14: Re-classifying temperature data with CASE
|
||||
|
||||
SELECT max_temp,
|
||||
CASE WHEN max_temp >= 90 THEN 'Hot'
|
||||
WHEN max_temp BETWEEN 70 AND 89 THEN 'Warm'
|
||||
WHEN max_temp BETWEEN 50 AND 69 THEN 'Pleasant'
|
||||
WHEN max_temp BETWEEN 33 AND 49 THEN 'Cold'
|
||||
WHEN max_temp BETWEEN 20 AND 32 THEN 'Freezing'
|
||||
ELSE 'Inhumane'
|
||||
END AS temperature_group
|
||||
FROM temperature_readings;
|
||||
|
||||
-- Listing 12-15: Using CASE in a Common Table Expression
|
||||
|
||||
WITH temps_collapsed (station_name, max_temperature_group) AS
|
||||
(SELECT station_name,
|
||||
CASE WHEN max_temp >= 90 THEN 'Hot'
|
||||
WHEN max_temp BETWEEN 70 AND 89 THEN 'Warm'
|
||||
WHEN max_temp BETWEEN 50 AND 69 THEN 'Pleasant'
|
||||
WHEN max_temp BETWEEN 33 AND 49 THEN 'Cold'
|
||||
WHEN max_temp BETWEEN 20 AND 32 THEN 'Freezing'
|
||||
ELSE 'Inhumane'
|
||||
END
|
||||
FROM temperature_readings)
|
||||
|
||||
SELECT station_name, max_temperature_group, count(*)
|
||||
FROM temps_collapsed
|
||||
GROUP BY station_name, max_temperature_group
|
||||
ORDER BY station_name, count(*) DESC;
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 12: Advanced Query Techniques
|
||||
### Chapter 13: Advanced Query Techniques
|
||||
|
||||
Explains how to use more complex SQL operations, such as subqueries and cross tabulations, and the CASE statement to reclassify values in a data set on temperature readings.
|
||||
|
||||
|
||||
@ -1,363 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 13 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Commonly used string functions
|
||||
-- Full list at https://www.postgresql.org/docs/current/static/functions-string.html
|
||||
|
||||
-- Case formatting
|
||||
SELECT upper('Neal7');
|
||||
SELECT lower('Randy');
|
||||
SELECT initcap('at the end of the day');
|
||||
-- Note initcap's imperfect for acronyms
|
||||
SELECT initcap('Practical SQL');
|
||||
|
||||
-- Character Information
|
||||
SELECT char_length(' Pat ');
|
||||
SELECT length(' Pat ');
|
||||
SELECT position(', ' in 'Tan, Bella');
|
||||
|
||||
-- Removing characters
|
||||
SELECT trim('s' from 'socks');
|
||||
SELECT trim(trailing 's' from 'socks');
|
||||
SELECT trim(' Pat ');
|
||||
SELECT char_length(trim(' Pat ')); -- note the length change
|
||||
SELECT ltrim('socks', 's');
|
||||
SELECT rtrim('socks', 's');
|
||||
|
||||
-- Extracting and replacing characters
|
||||
SELECT left('703-555-1212', 3);
|
||||
SELECT right('703-555-1212', 8);
|
||||
SELECT replace('bat', 'b', 'c');
|
||||
|
||||
|
||||
-- Table 13-2: Regular Expression Matching Examples
|
||||
|
||||
-- Any character one or more times
|
||||
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '.+');
|
||||
-- One or two digits followed by a space and p.m.
|
||||
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\d{1,2} (?:a.m.|p.m.)');
|
||||
-- One or more word characters at the start
|
||||
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '^\w+');
|
||||
-- One or more word characters followed by any character at the end.
|
||||
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\w+.$');
|
||||
-- The words May or June
|
||||
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from 'May|June');
|
||||
-- Four digits
|
||||
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\d{4}');
|
||||
-- May followed by a space, digit, comma, space, and four digits.
|
||||
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from 'May \d, \d{4}');
|
||||
|
||||
|
||||
-- Turning Text to Data with Regular Expression Functions
|
||||
|
||||
-- Listing 13-2: Creating and loading the crime_reports table
|
||||
-- Data from https://sheriff.loudoun.gov/dailycrime
|
||||
|
||||
CREATE TABLE crime_reports (
|
||||
crime_id bigserial PRIMARY KEY,
|
||||
date_1 timestamp with time zone,
|
||||
date_2 timestamp with time zone,
|
||||
street varchar(250),
|
||||
city varchar(100),
|
||||
crime_type varchar(100),
|
||||
description text,
|
||||
case_number varchar(50),
|
||||
original_text text NOT NULL
|
||||
);
|
||||
|
||||
COPY crime_reports (original_text)
|
||||
FROM 'C:\YourDirectory\crime_reports.csv'
|
||||
WITH (FORMAT CSV, HEADER OFF, QUOTE '"');
|
||||
|
||||
SELECT original_text FROM crime_reports;
|
||||
|
||||
-- Listing 13-3: Using regexp_match() to find the first date
|
||||
SELECT crime_id,
|
||||
regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}')
|
||||
FROM crime_reports;
|
||||
|
||||
-- Listing 13-4: Using the regexp_matches() function with the 'g' flag
|
||||
SELECT crime_id,
|
||||
regexp_matches(original_text, '\d{1,2}\/\d{1,2}\/\d{2}', 'g')
|
||||
FROM crime_reports;
|
||||
|
||||
-- Listing 13-5: Using regexp_match() to find the second date
|
||||
-- Note that the result includes an unwanted hyphen
|
||||
SELECT crime_id,
|
||||
regexp_match(original_text, '-\d{1,2}\/\d{1,2}\/\d{1,2}')
|
||||
FROM crime_reports;
|
||||
|
||||
-- Listing 13-6: Using a capture group to return only the date
|
||||
-- Eliminates the hyphen
|
||||
SELECT crime_id,
|
||||
regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})')
|
||||
FROM crime_reports;
|
||||
|
||||
-- Listing 13-7: Matching case number, date, crime type, and city
|
||||
|
||||
SELECT
|
||||
regexp_match(original_text, '(?:C0|SO)[0-9]+') AS case_number,
|
||||
regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}') AS date_1,
|
||||
regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):') AS crime_type,
|
||||
regexp_match(original_text, '(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n')
|
||||
AS city
|
||||
FROM crime_reports;
|
||||
|
||||
-- Bonus: Get all parsed elements at once
|
||||
|
||||
SELECT crime_id,
|
||||
regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}') AS date_1,
|
||||
CASE WHEN EXISTS (SELECT regexp_matches(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})'))
|
||||
THEN regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})')
|
||||
ELSE NULL
|
||||
END AS date_2,
|
||||
regexp_match(original_text, '\/\d{2}\n(\d{4})') AS hour_1,
|
||||
CASE WHEN EXISTS (SELECT regexp_matches(original_text, '\/\d{2}\n\d{4}-(\d{4})'))
|
||||
THEN regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})')
|
||||
ELSE NULL
|
||||
END AS hour_2,
|
||||
regexp_match(original_text, 'hrs.\n(\d+ .+(?:Sq.|Plz.|Dr.|Ter.|Rd.))') AS street,
|
||||
regexp_match(original_text, '(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n') AS city,
|
||||
regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):') AS crime_type,
|
||||
regexp_match(original_text, ':\s(.+)(?:C0|SO)') AS description,
|
||||
regexp_match(original_text, '(?:C0|SO)[0-9]+') AS case_number
|
||||
FROM crime_reports;
|
||||
|
||||
-- Listing 13-8: Retrieving a value from within an array
|
||||
|
||||
SELECT
|
||||
crime_id,
|
||||
(regexp_match(original_text, '(?:C0|SO)[0-9]+'))[1]
|
||||
AS case_number
|
||||
FROM crime_reports;
|
||||
|
||||
-- Listing 13-9: Updating the crime_reports date_1 column
|
||||
|
||||
UPDATE crime_reports
|
||||
SET date_1 =
|
||||
(
|
||||
(regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1]
|
||||
|| ' ' ||
|
||||
(regexp_match(original_text, '\/\d{2}\n(\d{4})'))[1]
|
||||
||' US/Eastern'
|
||||
)::timestamptz;
|
||||
|
||||
SELECT crime_id,
|
||||
date_1,
|
||||
original_text
|
||||
FROM crime_reports;
|
||||
|
||||
-- Listing 13-10: Updating all crime_reports columns
|
||||
|
||||
UPDATE crime_reports
|
||||
SET date_1 =
|
||||
(
|
||||
(regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1]
|
||||
|| ' ' ||
|
||||
(regexp_match(original_text, '\/\d{2}\n(\d{4})'))[1]
|
||||
||' US/Eastern'
|
||||
)::timestamptz,
|
||||
|
||||
date_2 =
|
||||
CASE
|
||||
-- if there is no second date but there is a second hour
|
||||
WHEN (SELECT regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})') IS NULL)
|
||||
AND (SELECT regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})') IS NOT NULL)
|
||||
THEN
|
||||
((regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1]
|
||||
|| ' ' ||
|
||||
(regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})'))[1]
|
||||
||' US/Eastern'
|
||||
)::timestamptz
|
||||
|
||||
-- if there is both a second date and second hour
|
||||
WHEN (SELECT regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})') IS NOT NULL)
|
||||
AND (SELECT regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})') IS NOT NULL)
|
||||
THEN
|
||||
((regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})'))[1]
|
||||
|| ' ' ||
|
||||
(regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})'))[1]
|
||||
||' US/Eastern'
|
||||
)::timestamptz
|
||||
-- if neither of those conditions exist, provide a NULL
|
||||
ELSE NULL
|
||||
END,
|
||||
street = (regexp_match(original_text, 'hrs.\n(\d+ .+(?:Sq.|Plz.|Dr.|Ter.|Rd.))'))[1],
|
||||
city = (regexp_match(original_text,
|
||||
'(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n'))[1],
|
||||
crime_type = (regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):'))[1],
|
||||
description = (regexp_match(original_text, ':\s(.+)(?:C0|SO)'))[1],
|
||||
case_number = (regexp_match(original_text, '(?:C0|SO)[0-9]+'))[1];
|
||||
|
||||
-- Listing 13-11: Viewing selected crime data
|
||||
|
||||
SELECT date_1,
|
||||
street,
|
||||
city,
|
||||
crime_type
|
||||
FROM crime_reports;
|
||||
|
||||
-- Listing 13-12: Using regular expressions in a WHERE clause
|
||||
|
||||
SELECT geo_name
|
||||
FROM us_counties_2010
|
||||
WHERE geo_name ~* '(.+lade.+|.+lare.+)'
|
||||
ORDER BY geo_name;
|
||||
|
||||
SELECT geo_name
|
||||
FROM us_counties_2010
|
||||
WHERE geo_name ~* '.+ash.+' AND geo_name !~ 'Wash.+'
|
||||
ORDER BY geo_name;
|
||||
|
||||
|
||||
-- Listing 13-13: Regular expression functions to replace and split
|
||||
|
||||
SELECT regexp_replace('05/12/2018', '\d{4}', '2017');
|
||||
|
||||
SELECT regexp_split_to_table('Four,score,and,seven,years,ago', ',');
|
||||
|
||||
SELECT regexp_split_to_array('Phil Mike Tony Steve', ' ');
|
||||
|
||||
-- Listing 13-14: Finding an array length
|
||||
|
||||
SELECT array_length(regexp_split_to_array('Phil Mike Tony Steve', ' '), 1);
|
||||
|
||||
|
||||
-- FULL TEXT SEARCH
|
||||
|
||||
-- Full-text search operators:
|
||||
-- & (AND)
|
||||
-- | (OR)
|
||||
-- ! (NOT)
|
||||
|
||||
-- Listing 13-15: Converting text to tsvector data
|
||||
|
||||
SELECT to_tsvector('I am walking across the sitting room to sit with you.');
|
||||
|
||||
-- Listing 13-16: Converting search terms to tsquery data
|
||||
|
||||
SELECT to_tsquery('walking & sitting');
|
||||
|
||||
-- Listing 13-17: Querying a tsvector type with a tsquery
|
||||
|
||||
SELECT to_tsvector('I am walking across the sitting room') @@ to_tsquery('walking & sitting');
|
||||
|
||||
SELECT to_tsvector('I am walking across the sitting room') @@ to_tsquery('walking & running');
|
||||
|
||||
-- Listing 13-18: Creating and filling the president_speeches table
|
||||
|
||||
-- Sources:
|
||||
-- https://archive.org/details/State-of-the-Union-Addresses-1945-2006
|
||||
-- http://www.presidency.ucsb.edu/ws/index.php
|
||||
-- https://www.eisenhower.archives.gov/all_about_ike/speeches.html
|
||||
|
||||
CREATE TABLE president_speeches (
|
||||
sotu_id serial PRIMARY KEY,
|
||||
president varchar(100) NOT NULL,
|
||||
title varchar(250) NOT NULL,
|
||||
speech_date date NOT NULL,
|
||||
speech_text text NOT NULL,
|
||||
search_speech_text tsvector
|
||||
);
|
||||
|
||||
COPY president_speeches (president, title, speech_date, speech_text)
|
||||
FROM 'C:\YourDirectory\sotu-1946-1977.csv'
|
||||
WITH (FORMAT CSV, DELIMITER '|', HEADER OFF, QUOTE '@');
|
||||
|
||||
SELECT * FROM president_speeches;
|
||||
|
||||
-- Listing 13-19: Converting speeches to tsvector in the search_speech_text column
|
||||
|
||||
UPDATE president_speeches
|
||||
SET search_speech_text = to_tsvector('english', speech_text);
|
||||
|
||||
-- Listing 13-20: Creating a GIN index for text search
|
||||
|
||||
CREATE INDEX search_idx ON president_speeches USING gin(search_speech_text);
|
||||
|
||||
-- Listing 13-21: Finding speeches containing the word "Vietnam"
|
||||
|
||||
SELECT president, speech_date
|
||||
FROM president_speeches
|
||||
WHERE search_speech_text @@ to_tsquery('Vietnam')
|
||||
ORDER BY speech_date;
|
||||
|
||||
-- Listing 13-22: Displaying search results with ts_headline()
|
||||
|
||||
SELECT president,
|
||||
speech_date,
|
||||
ts_headline(speech_text, to_tsquery('Vietnam'),
|
||||
'StartSel = <,
|
||||
StopSel = >,
|
||||
MinWords=5,
|
||||
MaxWords=7,
|
||||
MaxFragments=1')
|
||||
FROM president_speeches
|
||||
WHERE search_speech_text @@ to_tsquery('Vietnam');
|
||||
|
||||
-- Listing 13-23: Finding speeches with the word "transportation" but not "roads"
|
||||
|
||||
SELECT president,
|
||||
speech_date,
|
||||
ts_headline(speech_text, to_tsquery('transportation & !roads'),
|
||||
'StartSel = <,
|
||||
StopSel = >,
|
||||
MinWords=5,
|
||||
MaxWords=7,
|
||||
MaxFragments=1')
|
||||
FROM president_speeches
|
||||
WHERE search_speech_text @@ to_tsquery('transportation & !roads');
|
||||
|
||||
-- Listing 13-24: Find speeches where "defense" follows "military"
|
||||
|
||||
SELECT president,
|
||||
speech_date,
|
||||
ts_headline(speech_text, to_tsquery('military <-> defense'),
|
||||
'StartSel = <,
|
||||
StopSel = >,
|
||||
MinWords=5,
|
||||
MaxWords=7,
|
||||
MaxFragments=1')
|
||||
FROM president_speeches
|
||||
WHERE search_speech_text @@ to_tsquery('military <-> defense');
|
||||
|
||||
-- Bonus: Example with a distance of 2:
|
||||
SELECT president,
|
||||
speech_date,
|
||||
ts_headline(speech_text, to_tsquery('military <2> defense'),
|
||||
'StartSel = <,
|
||||
StopSel = >,
|
||||
MinWords=5,
|
||||
MaxWords=7,
|
||||
MaxFragments=2')
|
||||
FROM president_speeches
|
||||
WHERE search_speech_text @@ to_tsquery('military <2> defense');
|
||||
|
||||
-- Listing 13-25: Scoring relevance with ts_rank()
|
||||
|
||||
SELECT president,
|
||||
speech_date,
|
||||
ts_rank(search_speech_text,
|
||||
to_tsquery('war & security & threat & enemy')) AS score
|
||||
FROM president_speeches
|
||||
WHERE search_speech_text @@ to_tsquery('war & security & threat & enemy')
|
||||
ORDER BY score DESC
|
||||
LIMIT 5;
|
||||
|
||||
-- Listing 13-26: Normalizing ts_rank() by speech length
|
||||
|
||||
SELECT president,
|
||||
speech_date,
|
||||
ts_rank(search_speech_text,
|
||||
to_tsquery('war & security & threat & enemy'), 2)::numeric
|
||||
AS score
|
||||
FROM president_speeches
|
||||
WHERE search_speech_text @@ to_tsquery('war & security & threat & enemy')
|
||||
ORDER BY score DESC
|
||||
LIMIT 5;
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 13: Mining Text to Find Meaningful Data
|
||||
### Chapter 14: Mining Text to Find Meaningful Data
|
||||
|
||||
Covers how to use PostgreSQL’s full text search engine and regular expressions to extract data from unstructured text, using a collection of speeches by U.S. presidents as an example.
|
||||
|
||||
|
||||
@ -1,252 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 14 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Listing 14-1: Creating a gis_analysis database
|
||||
|
||||
CREATE DATABASE gis_analysis;
|
||||
-- Note: Switch to this new database before continuing the examples
|
||||
|
||||
-- Listing 14-2: Loading the PostGIS extension
|
||||
|
||||
CREATE EXTENSION postgis;
|
||||
|
||||
SELECT postgis_full_version(); -- shows PostGIS version
|
||||
|
||||
-- Listing 14-3: Retrieving the well-known text for SRID 4326
|
||||
|
||||
SELECT srtext
|
||||
FROM spatial_ref_sys
|
||||
WHERE srid = 4326;
|
||||
|
||||
-- Listing 14-4: Using ST_GeomFromText() to create spatial objects
|
||||
|
||||
SELECT ST_GeomFromText('POINT(-74.9233606 42.699992)', 4326);
|
||||
|
||||
SELECT ST_GeomFromText('LINESTRING(-74.9 42.7, -75.1 42.7)', 4326);
|
||||
|
||||
SELECT ST_GeomFromText('POLYGON((-74.9 42.7, -75.1 42.7,
|
||||
-75.1 42.6, -74.9 42.7))', 4326);
|
||||
|
||||
SELECT ST_GeomFromText('MULTIPOINT (-74.9 42.7, -75.1 42.7)', 4326);
|
||||
|
||||
SELECT ST_GeomFromText('MULTILINESTRING((-76.27 43.1, -76.06 43.08),
|
||||
(-76.2 43.3, -76.2 43.4,
|
||||
-76.4 43.1))', 4326);
|
||||
|
||||
SELECT ST_GeomFromText('MULTIPOLYGON((
|
||||
(-74.92 42.7, -75.06 42.71,
|
||||
-75.07 42.64, -74.92 42.7),
|
||||
(-75.0 42.66, -75.0 42.64,
|
||||
-74.98 42.64, -74.98 42.66,
|
||||
-75.0 42.66)))', 4326);
|
||||
|
||||
-- Listing 14-5: Using ST_GeogFromText() to create spatial objects
|
||||
|
||||
SELECT
|
||||
ST_GeogFromText('SRID=4326;MULTIPOINT(-74.9 42.7, -75.1 42.7, -74.924 42.6)');
|
||||
|
||||
-- Listing 14-6: Functions specific to making points
|
||||
|
||||
SELECT ST_PointFromText('POINT(-74.9233606 42.699992)', 4326);
|
||||
|
||||
SELECT ST_MakePoint(-74.9233606, 42.699992);
|
||||
SELECT ST_SetSRID(ST_MakePoint(-74.9233606, 42.699992), 4326);
|
||||
|
||||
-- Listing 14-7: Functions specific to making LineStrings
|
||||
|
||||
SELECT ST_LineFromText('LINESTRING(-105.90 35.67,-105.91 35.67)', 4326);
|
||||
SELECT ST_MakeLine(ST_MakePoint(-74.92, 42.69), ST_MakePoint(-74.12, 42.45));
|
||||
|
||||
-- Listing 14-8: Functions specific to making Polygons
|
||||
|
||||
SELECT ST_PolygonFromText('POLYGON((-74.9 42.7, -75.1 42.7,
|
||||
-75.1 42.6, -74.9 42.7))', 4326);
|
||||
|
||||
SELECT ST_MakePolygon(
|
||||
ST_GeomFromText('LINESTRING(-74.92 42.7, -75.06 42.71,
|
||||
-75.07 42.64, -74.92 42.7)', 4326));
|
||||
|
||||
SELECT ST_MPolyFromText('MULTIPOLYGON((
|
||||
(-74.92 42.7, -75.06 42.71,
|
||||
-75.07 42.64, -74.92 42.7),
|
||||
(-75.0 42.66, -75.0 42.64,
|
||||
-74.98 42.64, -74.98 42.66,
|
||||
-75.0 42.66)
|
||||
))', 4326);
|
||||
|
||||
|
||||
-- ANALYZING FARMERS MARKETS DATA
|
||||
-- https://catalog.data.gov/dataset/farmers-markets-geographic-data
|
||||
-- https://www.ams.usda.gov/local-food-directories/farmersmarkets
|
||||
|
||||
|
||||
-- Listing 14-9: Create and load the farmers_markets table
|
||||
|
||||
CREATE TABLE farmers_markets (
|
||||
fmid bigint PRIMARY KEY,
|
||||
market_name varchar(100) NOT NULL,
|
||||
street varchar(180),
|
||||
city varchar(60),
|
||||
county varchar(25),
|
||||
st varchar(20) NOT NULL,
|
||||
zip varchar(10),
|
||||
longitude numeric(10,7),
|
||||
latitude numeric(10,7),
|
||||
organic varchar(1) NOT NULL
|
||||
);
|
||||
|
||||
COPY farmers_markets
|
||||
FROM 'C:\YourDirectory\farmers_markets.csv'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
SELECT count(*) FROM farmers_markets; -- should return 8,681 rows
|
||||
|
||||
-- Listing 14-10: Creating and indexing a geography column
|
||||
-- There's also a function: https://postgis.net/docs/AddGeometryColumn.html
|
||||
|
||||
-- Add column
|
||||
ALTER TABLE farmers_markets ADD COLUMN geog_point geography(POINT,4326);
|
||||
|
||||
-- Now fill that column with the lat/long
|
||||
UPDATE farmers_markets
|
||||
SET geog_point = ST_SetSRID(
|
||||
ST_MakePoint(longitude,latitude),4326
|
||||
)::geography;
|
||||
|
||||
-- Add a GiST index
|
||||
CREATE INDEX market_pts_idx ON farmers_markets USING GIST (geog_point);
|
||||
|
||||
-- View the geography column
|
||||
SELECT longitude,
|
||||
latitude,
|
||||
geog_point,
|
||||
ST_AsText(geog_point)
|
||||
FROM farmers_markets
|
||||
WHERE longitude IS NOT NULL
|
||||
LIMIT 5;
|
||||
|
||||
-- Listing 14-11: Using ST_DWithin() to locate farmers' markets within 10 kilometers of a point
|
||||
|
||||
SELECT market_name,
|
||||
city,
|
||||
st
|
||||
FROM farmers_markets
|
||||
WHERE ST_DWithin(geog_point,
|
||||
ST_GeogFromText('POINT(-93.6204386 41.5853202)'),
|
||||
10000)
|
||||
ORDER BY market_name;
|
||||
|
||||
-- Listing 14-12: Using ST_Distance() to calculate the miles between Yankee Stadium
|
||||
-- and Citi Field (Mets)
|
||||
-- 1609.344 meters/mile
|
||||
|
||||
SELECT ST_Distance(
|
||||
ST_GeogFromText('POINT(-73.9283685 40.8296466)'),
|
||||
ST_GeogFromText('POINT(-73.8480153 40.7570917)')
|
||||
) / 1609.344 AS mets_to_yanks;
|
||||
|
||||
-- Listing 14-13: Using ST_Distance() for each row in farmers_markets
|
||||
|
||||
SELECT market_name,
|
||||
city,
|
||||
round(
|
||||
(ST_Distance(geog_point,
|
||||
ST_GeogFromText('POINT(-93.6204386 41.5853202)')
|
||||
) / 1609.344)::numeric(8,5), 2
|
||||
) AS miles_from_dt
|
||||
FROM farmers_markets
|
||||
WHERE ST_DWithin(geog_point,
|
||||
ST_GeogFromText('POINT(-93.6204386 41.5853202)'),
|
||||
10000)
|
||||
ORDER BY miles_from_dt ASC;
|
||||
|
||||
|
||||
-- WORKING WITH SHAPEFILES
|
||||
|
||||
-- Resources:
|
||||
-- TIGER/Line® Shapefiles and TIGER/Line® Files
|
||||
-- https://www.census.gov/geo/maps-data/data/tiger-line.html
|
||||
-- Cartographic Boundary Shapefiles - Counties
|
||||
-- https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html
|
||||
|
||||
-- Listing 14-14: Checking the geom column's well-known text representation
|
||||
|
||||
SELECT ST_AsText(geom)
|
||||
FROM us_counties_2010_shp
|
||||
LIMIT 1;
|
||||
|
||||
-- Listing 14-15: Find the largest counties by area using ST_Area()
|
||||
|
||||
SELECT name10,
|
||||
statefp10 AS st,
|
||||
round(
|
||||
( ST_Area(geom::geography) / 2589988.110336 )::numeric, 2
|
||||
) AS square_miles
|
||||
FROM us_counties_2010_shp
|
||||
ORDER BY square_miles DESC
|
||||
LIMIT 5;
|
||||
|
||||
-- Listing 14-16: Using ST_Within() to find the county belonging to a pair of coordinates
|
||||
|
||||
SELECT name10,
|
||||
statefp10
|
||||
FROM us_counties_2010_shp
|
||||
WHERE ST_Within('SRID=4269;POINT(-118.3419063 34.0977076)'::geometry, geom);
|
||||
|
||||
-- SPATIAL JOINS
|
||||
-- SANTA FE WATER AND ROAD DATA
|
||||
-- http://www.santafenm.gov/santa_fe_river
|
||||
|
||||
-- Census 2016 Tiger/Line roads, water
|
||||
-- https://www.census.gov/geo/maps-data/data/tiger-line.html
|
||||
-- https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2016&layergroup=Roads
|
||||
-- https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2016&layergroup=Water
|
||||
|
||||
-- RTTYP - Route Type Code Description
|
||||
-- https://www.census.gov/geo/reference/rttyp.html
|
||||
-- C County
|
||||
-- I Interstate
|
||||
-- M Common Name
|
||||
-- O Other
|
||||
-- S State recognized
|
||||
-- U U.S.
|
||||
|
||||
-- MTFCC MAF/TIGER feature class code
|
||||
-- https://www.census.gov/geo/reference/mtfcc.html
|
||||
-- Here, H3010: A natural flowing waterway
|
||||
|
||||
-- Listing 14-17: Using ST_GeometryType() to determine geometry
|
||||
|
||||
SELECT ST_GeometryType(geom)
|
||||
FROM santafe_linearwater_2016
|
||||
LIMIT 1;
|
||||
|
||||
SELECT ST_GeometryType(geom)
|
||||
FROM santafe_roads_2016
|
||||
LIMIT 1;
|
||||
|
||||
-- Listing 14-18: Spatial join with ST_Intersects() to find roads crossing the Santa Fe river
|
||||
|
||||
SELECT water.fullname AS waterway,
|
||||
roads.rttyp,
|
||||
roads.fullname AS road
|
||||
FROM santafe_linearwater_2016 water JOIN santafe_roads_2016 roads
|
||||
ON ST_Intersects(water.geom, roads.geom)
|
||||
WHERE water.fullname = 'Santa Fe Riv'
|
||||
ORDER BY roads.fullname;
|
||||
|
||||
-- Listing 14-19: Using ST_Intersection() to show where roads cross the river
|
||||
|
||||
SELECT water.fullname AS waterway,
|
||||
roads.rttyp,
|
||||
roads.fullname AS road,
|
||||
ST_AsText(ST_Intersection(water.geom, roads.geom))
|
||||
FROM santafe_linearwater_2016 water JOIN santafe_roads_2016 roads
|
||||
ON ST_Intersects(water.geom, roads.geom)
|
||||
WHERE water.fullname = 'Santa Fe Riv'
|
||||
ORDER BY roads.fullname
|
||||
LIMIT 5;
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 14: Analyzing Spatial Data with PostGIS
|
||||
### Chapter 15: Analyzing Spatial Data with PostGIS
|
||||
|
||||
Introduces data types and queries related to spatial objects, which will let you analyze geographical features like states, roads, and rivers.
|
||||
|
||||
|
||||
@ -1,321 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 15 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- VIEWS
|
||||
|
||||
-- Listing 15-1: Creating a view that displays Nevada 2010 counties
|
||||
|
||||
CREATE OR REPLACE VIEW nevada_counties_pop_2010 AS
|
||||
SELECT geo_name,
|
||||
state_fips,
|
||||
county_fips,
|
||||
p0010001 AS pop_2010
|
||||
FROM us_counties_2010
|
||||
WHERE state_us_abbreviation = 'NV'
|
||||
ORDER BY county_fips;
|
||||
|
||||
-- Listing 15-2: Querying the nevada_counties_pop_2010 view
|
||||
|
||||
SELECT *
|
||||
FROM nevada_counties_pop_2010
|
||||
LIMIT 5;
|
||||
|
||||
-- Listing 15-3: Creating a view showing population change for US counties
|
||||
|
||||
CREATE OR REPLACE VIEW county_pop_change_2010_2000 AS
|
||||
SELECT c2010.geo_name,
|
||||
c2010.state_us_abbreviation AS st,
|
||||
c2010.state_fips,
|
||||
c2010.county_fips,
|
||||
c2010.p0010001 AS pop_2010,
|
||||
c2000.p0010001 AS pop_2000,
|
||||
round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
|
||||
/ c2000.p0010001 * 100, 1 ) AS pct_change_2010_2000
|
||||
FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
|
||||
ON c2010.state_fips = c2000.state_fips
|
||||
AND c2010.county_fips = c2000.county_fips
|
||||
ORDER BY c2010.state_fips, c2010.county_fips;
|
||||
|
||||
-- Listing 15-4: Selecting columns from the county_pop_change_2010_2000 view
|
||||
|
||||
SELECT geo_name,
|
||||
st,
|
||||
pop_2010,
|
||||
pct_change_2010_2000
|
||||
FROM county_pop_change_2010_2000
|
||||
WHERE st = 'NV'
|
||||
LIMIT 5;
|
||||
|
||||
-- Listing 15-5: Creating a view on the employees table
|
||||
|
||||
CREATE OR REPLACE VIEW employees_tax_dept AS
|
||||
SELECT emp_id,
|
||||
first_name,
|
||||
last_name,
|
||||
dept_id
|
||||
FROM employees
|
||||
WHERE dept_id = 1
|
||||
ORDER BY emp_id
|
||||
WITH LOCAL CHECK OPTION;
|
||||
|
||||
SELECT * FROM employees_tax_dept;
|
||||
|
||||
-- Listing 15-6: Successful and rejected inserts via the employees_tax_dept view
|
||||
|
||||
INSERT INTO employees_tax_dept (first_name, last_name, dept_id)
|
||||
VALUES ('Suzanne', 'Legere', 1);
|
||||
|
||||
INSERT INTO employees_tax_dept (first_name, last_name, dept_id)
|
||||
VALUES ('Jamil', 'White', 2);
|
||||
|
||||
-- optional:
|
||||
SELECT * FROM employees_tax_dept;
|
||||
|
||||
SELECT * FROM employees;
|
||||
|
||||
-- Listing 15-7: Updating a row via the employees_tax_dept view
|
||||
|
||||
UPDATE employees_tax_dept
|
||||
SET last_name = 'Le Gere'
|
||||
WHERE emp_id = 5;
|
||||
|
||||
SELECT * FROM employees_tax_dept;
|
||||
|
||||
-- Bonus: This will fail because the salary column is not in the view
|
||||
UPDATE employees_tax_dept
|
||||
SET salary = 100000
|
||||
WHERE emp_id = 5;
|
||||
|
||||
-- Listing 15-8: Deleting a row via the employees_tax_dept view
|
||||
|
||||
DELETE FROM employees_tax_dept
|
||||
WHERE emp_id = 5;
|
||||
|
||||
|
||||
-- FUNCTIONS
|
||||
-- https://www.postgresql.org/docs/current/static/plpgsql.html
|
||||
|
||||
-- Listing 15-9: Creating a percent_change function
|
||||
-- To delete this function: DROP FUNCTION percent_change(numeric,numeric,integer);
|
||||
|
||||
CREATE OR REPLACE FUNCTION
|
||||
percent_change(new_value numeric,
|
||||
old_value numeric,
|
||||
decimal_places integer DEFAULT 1)
|
||||
RETURNS numeric AS
|
||||
'SELECT round(
|
||||
((new_value - old_value) / old_value) * 100, decimal_places
|
||||
);'
|
||||
LANGUAGE SQL
|
||||
IMMUTABLE
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
|
||||
-- Listing 15-10: Testing the percent_change() function
|
||||
|
||||
SELECT percent_change(110, 108, 2);
|
||||
|
||||
-- Listing 15-11: Testing percent_change() on Census data
|
||||
|
||||
SELECT c2010.geo_name,
|
||||
c2010.state_us_abbreviation AS st,
|
||||
c2010.p0010001 AS pop_2010,
|
||||
percent_change(c2010.p0010001, c2000.p0010001) AS pct_chg_func,
|
||||
round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
|
||||
/ c2000.p0010001 * 100, 1 ) AS pct_chg_formula
|
||||
FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
|
||||
ON c2010.state_fips = c2000.state_fips
|
||||
AND c2010.county_fips = c2000.county_fips
|
||||
ORDER BY pct_chg_func DESC
|
||||
LIMIT 5;
|
||||
|
||||
-- Listing 15-12: Adding a column to the teachers table and seeing the data
|
||||
|
||||
ALTER TABLE teachers ADD COLUMN personal_days integer;
|
||||
|
||||
SELECT first_name,
|
||||
last_name,
|
||||
hire_date,
|
||||
personal_days
|
||||
FROM teachers;
|
||||
|
||||
-- Listing 15-13: Creating an update_personal_days() function
|
||||
|
||||
CREATE OR REPLACE FUNCTION update_personal_days()
|
||||
RETURNS void AS $$
|
||||
BEGIN
|
||||
UPDATE teachers
|
||||
SET personal_days =
|
||||
CASE WHEN (now() - hire_date) BETWEEN '5 years'::interval
|
||||
AND '10 years'::interval THEN 4
|
||||
WHEN (now() - hire_date) > '10 years'::interval THEN 5
|
||||
ELSE 3
|
||||
END;
|
||||
RAISE NOTICE 'personal_days updated!';
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- To run the function:
|
||||
SELECT update_personal_days();
|
||||
|
||||
-- Listing 15-14: Enabling the PL/Python procedural language
|
||||
|
||||
CREATE EXTENSION plpythonu;
|
||||
|
||||
-- Listing 15-15: Using PL/Python to create the trim_county() function
|
||||
|
||||
CREATE OR REPLACE FUNCTION trim_county(input_string text)
|
||||
RETURNS text AS $$
|
||||
import re
|
||||
cleaned = re.sub(r' County', '', input_string)
|
||||
return cleaned
|
||||
$$ LANGUAGE plpythonu;
|
||||
|
||||
-- Listing 15-16: Testing the trim_county() function
|
||||
|
||||
SELECT geo_name,
|
||||
trim_county(geo_name)
|
||||
FROM us_counties_2010
|
||||
ORDER BY state_fips, county_fips
|
||||
LIMIT 5;
|
||||
|
||||
|
||||
-- TRIGGERS
|
||||
|
||||
-- Listing 15-17: Creating the grades and grades_history tables
|
||||
|
||||
CREATE TABLE grades (
|
||||
student_id bigint,
|
||||
course_id bigint,
|
||||
course varchar(30) NOT NULL,
|
||||
grade varchar(5) NOT NULL,
|
||||
PRIMARY KEY (student_id, course_id)
|
||||
);
|
||||
|
||||
INSERT INTO grades
|
||||
VALUES
|
||||
(1, 1, 'Biology 2', 'F'),
|
||||
(1, 2, 'English 11B', 'D'),
|
||||
(1, 3, 'World History 11B', 'C'),
|
||||
(1, 4, 'Trig 2', 'B');
|
||||
|
||||
CREATE TABLE grades_history (
|
||||
student_id bigint NOT NULL,
|
||||
course_id bigint NOT NULL,
|
||||
change_time timestamp with time zone NOT NULL,
|
||||
course varchar(30) NOT NULL,
|
||||
old_grade varchar(5) NOT NULL,
|
||||
new_grade varchar(5) NOT NULL,
|
||||
PRIMARY KEY (student_id, course_id, change_time)
|
||||
);
|
||||
|
||||
-- Listing 15-18: Creating the record_if_grade_changed() function
|
||||
|
||||
CREATE OR REPLACE FUNCTION record_if_grade_changed()
|
||||
RETURNS trigger AS
|
||||
$$
|
||||
BEGIN
|
||||
IF NEW.grade <> OLD.grade THEN
|
||||
INSERT INTO grades_history (
|
||||
student_id,
|
||||
course_id,
|
||||
change_time,
|
||||
course,
|
||||
old_grade,
|
||||
new_grade)
|
||||
VALUES
|
||||
(OLD.student_id,
|
||||
OLD.course_id,
|
||||
now(),
|
||||
OLD.course,
|
||||
OLD.grade,
|
||||
NEW.grade);
|
||||
END IF;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Listing 15-19: Creating the grades_update trigger
|
||||
|
||||
CREATE TRIGGER grades_update
|
||||
AFTER UPDATE
|
||||
ON grades
|
||||
FOR EACH ROW
|
||||
EXECUTE PROCEDURE record_if_grade_changed();
|
||||
|
||||
-- Listing 15-20: Testing the grades_update trigger
|
||||
|
||||
-- Initially, there should be 0 records in the history
|
||||
SELECT * FROM grades_history;
|
||||
|
||||
-- Check the grades
|
||||
SELECT * FROM grades;
|
||||
|
||||
-- Update a grade
|
||||
UPDATE grades
|
||||
SET grade = 'C'
|
||||
WHERE student_id = 1 AND course_id = 1;
|
||||
|
||||
-- Now check the history
|
||||
SELECT student_id,
|
||||
change_time,
|
||||
course,
|
||||
old_grade,
|
||||
new_grade
|
||||
FROM grades_history;
|
||||
|
||||
-- Listing 15-21: Creating a temperature_test table
|
||||
|
||||
CREATE TABLE temperature_test (
|
||||
station_name varchar(50),
|
||||
observation_date date,
|
||||
max_temp integer,
|
||||
min_temp integer,
|
||||
max_temp_group varchar(40),
|
||||
PRIMARY KEY (station_name, observation_date)
|
||||
);
|
||||
|
||||
-- Listing 15-22: Creating the classify_max_temp() function
|
||||
|
||||
CREATE OR REPLACE FUNCTION classify_max_temp()
|
||||
RETURNS trigger AS
|
||||
$$
|
||||
BEGIN
|
||||
CASE
|
||||
WHEN NEW.max_temp >= 90 THEN
|
||||
NEW.max_temp_group := 'Hot';
|
||||
WHEN NEW.max_temp BETWEEN 70 AND 89 THEN
|
||||
NEW.max_temp_group := 'Warm';
|
||||
WHEN NEW.max_temp BETWEEN 50 AND 69 THEN
|
||||
NEW.max_temp_group := 'Pleasant';
|
||||
WHEN NEW.max_temp BETWEEN 33 AND 49 THEN
|
||||
NEW.max_temp_group := 'Cold';
|
||||
WHEN NEW.max_temp BETWEEN 20 AND 32 THEN
|
||||
NEW.max_temp_group := 'Freezing';
|
||||
ELSE NEW.max_temp_group := 'Inhumane';
|
||||
END CASE;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Listing 15-23: Creating the temperature_insert trigger
|
||||
|
||||
CREATE TRIGGER temperature_insert
|
||||
BEFORE INSERT
|
||||
ON temperature_test
|
||||
FOR EACH ROW
|
||||
EXECUTE PROCEDURE classify_max_temp();
|
||||
|
||||
-- Listing 15-24: Inserting rows to test the temperature_update trigger
|
||||
|
||||
INSERT INTO temperature_test (station_name, observation_date, max_temp, min_temp)
|
||||
VALUES
|
||||
('North Station', '1/19/2019', 10, -3),
|
||||
('North Station', '3/20/2019', 28, 19),
|
||||
('North Station', '5/2/2019', 65, 42),
|
||||
('North Station', '8/9/2019', 93, 74);
|
||||
|
||||
SELECT * FROM temperature_test;
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 15: Saving Time with Views, Functions, and Triggers:
|
||||
### Chapter 17: Saving Time with Views, Functions, and Triggers:
|
||||
|
||||
Explains how to automate database tasks so you can avoid repeating routine work.
|
||||
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 16: Using PostgreSQL from the Command Line
|
||||
### Chapter 18: Using PostgreSQL from the Command Line
|
||||
|
||||
Covers how to use text commands at your computer’s command prompt to connect to your database and run queries.
|
||||
|
||||
|
||||
@ -1,104 +0,0 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 17 Code Examples
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- VACUUM
|
||||
|
||||
-- Listing 17-1: Creating a table to test vacuuming
|
||||
|
||||
CREATE TABLE vacuum_test (
|
||||
integer_column integer
|
||||
);
|
||||
|
||||
-- Listing 17-2: Determining the size of vacuum_test
|
||||
|
||||
SELECT pg_size_pretty(
|
||||
pg_total_relation_size('vacuum_test')
|
||||
);
|
||||
|
||||
-- optional: Determine database size
|
||||
SELECT pg_size_pretty(
|
||||
pg_database_size('analysis')
|
||||
);
|
||||
|
||||
-- Listing 17-3: Inserting 500,000 rows into vacuum_test
|
||||
|
||||
INSERT INTO vacuum_test
|
||||
SELECT * FROM generate_series(1,500000);
|
||||
|
||||
-- Test its size again
|
||||
SELECT pg_size_pretty(
|
||||
pg_table_size('vacuum_test')
|
||||
);
|
||||
|
||||
-- Listing 17-4: Updating all rows in vacuum_test
|
||||
|
||||
UPDATE vacuum_test
|
||||
SET integer_column = integer_column + 1;
|
||||
|
||||
-- Test its size again (35 MB)
|
||||
SELECT pg_size_pretty(
|
||||
pg_table_size('vacuum_test')
|
||||
);
|
||||
|
||||
-- Listing 17-5: Viewing autovacuum statistics for vacuum_test
|
||||
|
||||
SELECT relname,
|
||||
last_vacuum,
|
||||
last_autovacuum,
|
||||
vacuum_count,
|
||||
autovacuum_count
|
||||
FROM pg_stat_all_tables
|
||||
WHERE relname = 'vacuum_test';
|
||||
|
||||
-- To see all columns available
|
||||
SELECT *
|
||||
FROM pg_stat_all_tables
|
||||
WHERE relname = 'vacuum_test';
|
||||
|
||||
-- Listing 17-6: Running VACUUM manually
|
||||
|
||||
VACUUM vacuum_test;
|
||||
|
||||
VACUUM; -- vacuums the whole database
|
||||
|
||||
VACUUM VERBOSE; -- provides messages
|
||||
|
||||
-- Listing 17-7: Using VACUUM FULL to reclaim disk space
|
||||
|
||||
VACUUM FULL vacuum_test;
|
||||
|
||||
-- Test its size again
|
||||
SELECT pg_size_pretty(
|
||||
pg_table_size('vacuum_test')
|
||||
);
|
||||
|
||||
-- SETTINGS
|
||||
|
||||
-- Listing 17-8: Showing the location of postgresql.conf
|
||||
|
||||
SHOW config_file;
|
||||
|
||||
-- Listing 17-10: Show the location of the data directory
|
||||
|
||||
SHOW data_directory;
|
||||
|
||||
-- reload settings
|
||||
-- Mac and Linux: pg_ctl reload -D '/path/to/data/directory/'
|
||||
-- Windows: pg_ctl reload -D "C:\path\to\data\directory\"
|
||||
|
||||
|
||||
-- BACKUP AND RESTORE
|
||||
|
||||
-- Listing 17-11: Backing up the analysis database with pg_dump
|
||||
pg_dump -d analysis -U [user_name] -Fc > analysis_backup.sql
|
||||
|
||||
-- Back up just a table
|
||||
pg_dump -t 'train_rides' -d analysis -U [user_name] -Fc > train_backup.sql
|
||||
|
||||
-- Listing 17-12: Restoring the analysis database with pg_restore
|
||||
|
||||
pg_restore -C -d postgres -U postgres analysis_backup_custom.sql
|
||||
@ -2,7 +2,7 @@
|
||||
|
||||
[PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL).
|
||||
|
||||
### Chapter 17: Maintaining Your Database:
|
||||
### Chapter 19: Maintaining Your Database:
|
||||
|
||||
Provides tips and procedures for tracking the size of your database, customizing settings, and backing up data.
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user