--------------------------------------------------------------------------- -- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition -- by Anthony DeBarros -- Chapter 16 Code Examples ---------------------------------------------------------------------------- -- Listing 16-1: JSON with information on two films [{ "title": "The Incredibles", "year": 2004, "rating": { "MPAA": "PG" }, "characters": [{ "name": "Mr. Incredible", "actor": "Craig T. Nelson" }, { "name": "Elastigirl", "actor": "Holly Hunter" }, { "name": "Frozone", "actor": "Samuel L. Jackson" }], "genre": ["animation", "action", "sci-fi"] }, { "title": "Cinema Paradiso", "year": 1988, "characters": [{ "name": "Salvatore", "actor": "Salvatore Cascio" }, { "name": "Alfredo", "actor": "Philippe Noiret" }], "genre": ["romance", "drama"] }] -- Listing 16-2: Creating a table to hold JSON data and adding an index CREATE TABLE films ( id integer GENERATED ALWAYS AS IDENTITY PRIMARY KEY, film jsonb NOT NULL ); COPY films (film) FROM 'C:\YourDirectory\films.json'; CREATE INDEX idx_film ON films USING GIN (film); SELECT * FROM films; -- JSON AND JSONB EXTRACTION OPERATORS -- Listing 16-3: Retrieving a JSON key value with field extraction operators -- Returns the key value as a JSON data type SELECT id, film -> 'title' AS title FROM films ORDER BY id; -- Returns the key value as text SELECT id, film ->> 'title' AS title FROM films ORDER BY id; -- Returns the entire array as a JSON data type SELECT id, film -> 'genre' AS genre FROM films ORDER BY id; -- Listing 16-4: Retrieving a JSON array value with element extraction operators -- Extracts first element of the JSON array -- (array elements are indexed from zero, but negative integers count from the end). SELECT id, film -> 'genre' -> 0 AS genres FROM films ORDER BY id; SELECT id, film -> 'genre' -> -1 AS genres FROM films ORDER BY id; SELECT id, film -> 'genre' -> 2 AS genres FROM films ORDER BY id; -- Return the array element as text SELECT id, film -> 'genre' ->> 0 AS genres FROM films ORDER BY id; -- Listing 16-5: Retrieving a JSON key value with path extraction operators -- Retrieve the film's MPAA rating. SELECT id, film #> '{rating, MPAA}' AS mpaa_rating FROM films ORDER BY id; -- Retrieve the name of the first character SELECT id, film #> '{characters, 0, name}' AS name FROM films ORDER BY id; -- Same as above but return it as text SELECT id, film #>> '{characters, 0, name}' AS name FROM films ORDER BY id; -- JSONB CONTAINMENT AND EXISTENCE OPERATORS -- Listing 16-6: Demonstrating containment operators -- Does the film JSON value contain the following key/value pair? SELECT id, film ->> 'title' AS title, film @> '{"title": "The Incredibles"}'::jsonb AS is_incredible FROM films ORDER BY id; -- Using the containment operator in WHERE SELECT film ->> 'title' AS title, film ->> 'year' AS year FROM films WHERE film @> '{"title": "The Incredibles"}'::jsonb; -- Is the first JSON value contained in the second? SELECT film ->> 'title' AS title, film ->> 'year' AS year FROM films WHERE '{"title": "The Incredibles"}'::jsonb <@ film; -- Listing 16-7: Demonstrating existence operators -- Does the text string exist as a top-level key or array element within the JSON value? SELECT film ->> 'title' AS title FROM films WHERE film ? 'rating'; -- Do any of the strings in the text array exist as top-level keys or array elements? SELECT film ->> 'title' AS title, film ->> 'rating' AS rating, film ->> 'genre' AS genre FROM films WHERE film ?| '{rating, genre}'; -- Do all of the strings in the text array exist as top-level keys or array elements? SELECT film ->> 'title' AS title, film ->> 'rating' AS rating, film ->> 'genre' AS genre FROM films WHERE film ?& '{rating, genre}'; -- ANALYZING EARTHQUAKE DATA -- Listing 16-8: JSON with data on one earthquake { "type": "Feature", "properties": { "mag": 1.44, "place": "134 km W of Adak, Alaska", "time": 1612051063470, "updated": 1612139465880, "tz": null, "url": "https://earthquake.usgs.gov/earthquakes/eventpage/av91018173", "detail": "https://earthquake.usgs.gov/fdsnws/event/1/query?eventid=av91018173&format=geojson", "felt": null, "cdi": null, "mmi": null, "alert": null, "status": "reviewed", "tsunami": 0, "sig": 32, "net": "av", "code": "91018173", "ids": ",av91018173,", "sources": ",av,", "types": ",origin,phase-data,", "nst": 10, "dmin": null, "rms": 0.15, "gap": 174, "magType": "ml", "type": "earthquake", "title": "M 1.4 - 134 km W of Adak, Alaska" }, "geometry": { "type": "Point", "coordinates": [-178.581, 51.8418333333333, 22.48] }, "id": "av91018173" } -- Listing 16-9: Creating and loading an earthquakes table CREATE TABLE earthquakes ( id integer GENERATED ALWAYS AS IDENTITY PRIMARY KEY, earthquake jsonb NOT NULL ); COPY earthquakes (earthquake) FROM 'C:\YourDirectory\earthquakes.json'; CREATE INDEX idx_earthquakes ON earthquakes USING GIN (earthquake); SELECT * FROM earthquakes; -- Listing 16-10: Retrieving the earthquake time -- Note that the time is stored in epoch format SELECT id, earthquake #>> '{properties, time}' AS time FROM earthquakes ORDER BY id LIMIT 5; -- Listing 16-11: Converting the time value to a timestamp SELECT id, earthquake #>> '{properties, time}' as time, to_timestamp( (earthquake #>> '{properties, time}')::bigint / 1000 ) AS time_formatted FROM earthquakes ORDER BY id LIMIT 5; -- See and set time zone if desired SHOW timezone; SET timezone TO 'US/Eastern'; SET timezone TO 'UTC'; -- Listing 16-12: Finding the minimum and maximum earthquake times SELECT min(to_timestamp( (earthquake #>> '{properties, time}')::bigint / 1000 )) AT TIME ZONE 'UTC' AS min_timestamp, max(to_timestamp( (earthquake #>> '{properties, time}')::bigint / 1000 )) AT TIME ZONE 'UTC' AS max_timestamp FROM earthquakes; -- Listing 16-13: Finding the five earthquakes with the largest magnitude SELECT earthquake #>> '{properties, place}' AS place, to_timestamp((earthquake #>> '{properties, time}')::bigint / 1000) AT TIME ZONE 'UTC' AS time, (earthquake #>> '{properties, mag}')::numeric AS magnitude FROM earthquakes ORDER BY (earthquake #>> '{properties, mag}')::numeric DESC NULLS LAST LIMIT 5; -- Bonus: Instead of using a path extraction operator (#>>), you can also use field extraction: SELECT earthquake -> 'properties' ->> 'place' AS place, to_timestamp((earthquake -> 'properties' ->> 'time')::bigint / 1000) AT TIME ZONE 'UTC' AS time, (earthquake #>> '{properties, mag}')::numeric AS magnitude FROM earthquakes ORDER BY (earthquake #>> '{properties, mag}')::numeric DESC NULLS LAST LIMIT 5; -- Listing 16-14: Finding earthquakes with most Did You Feel It? reports -- https://earthquake.usgs.gov/data/dyfi/ SELECT earthquake #>> '{properties, place}' AS place, to_timestamp((earthquake #>> '{properties, time}')::bigint / 1000) AT TIME ZONE 'UTC' AS time, (earthquake #>> '{properties, mag}')::numeric AS magnitude, (earthquake #>> '{properties, felt}')::integer AS felt FROM earthquakes ORDER BY (earthquake #>> '{properties, felt}')::integer DESC NULLS LAST LIMIT 5; -- Listing 16-15: Extracting the earthquake's location data SELECT id, earthquake #>> '{geometry, coordinates}' AS coordinates, earthquake #>> '{geometry, coordinates, 0}' AS longitude, earthquake #>> '{geometry, coordinates, 1}' AS latitude FROM earthquakes ORDER BY id LIMIT 5; -- Listing 16-16: Converting JSON location data to PostGIS geography SELECT ST_SetSRID( ST_MakePoint( (earthquake #>> '{geometry, coordinates, 0}')::numeric, (earthquake #>> '{geometry, coordinates, 1}')::numeric ), 4326)::geography AS earthquake_point FROM earthquakes ORDER BY id; -- Listing 16-17: Finding earthquakes within 50 miles of downtown Tulsa, Oklahoma -- Add a column of the geography data type ALTER TABLE earthquakes ADD COLUMN earthquake_point geography(POINT, 4326); -- Update the earthquakes table with a Point UPDATE earthquakes SET earthquake_point = ST_SetSRID( ST_MakePoint( (earthquake #>> '{geometry, coordinates, 0}')::numeric, (earthquake #>> '{geometry, coordinates, 1}')::numeric ), 4326)::geography; CREATE INDEX quake_pt_idx ON earthquakes USING GIST (earthquake_point); -- Earthquakes 50 miles from Tulsa SELECT earthquake #>> '{properties, place}' AS place, to_timestamp((earthquake -> 'properties' ->> 'time')::bigint / 1000) AT TIME ZONE 'UTC' AS time, (earthquake #>> '{properties, mag}')::numeric AS magnitude, earthquake_point FROM earthquakes WHERE ST_DWithin(earthquake_point, ST_GeogFromText('POINT(-95.989505 36.155007)'), 80468) ORDER BY time; -- GENERATING AND MANIPULATING JSON -- Listing 16-18: Turning query results into JSON with row_to_json() and json_agg() -- Convert an entire row from the table SELECT to_json(employees) AS json_rows FROM employees; -- Selected columns from the table (returns key names as f1, f2, etc.) SELECT to_json(row(emp_id, last_name)) AS json_rows FROM employees; -- Generating key names with a subquery SELECT to_json(employees) AS json_rows FROM ( SELECT emp_id, last_name AS ln FROM employees ) AS employees; -- Aggregating the rows and converting to JSON SELECT json_agg(to_json(employees)) AS json FROM ( SELECT emp_id, last_name AS ln FROM employees ) AS employees; -- Listing 16-19: Adding a top-level key/value pair via concatenation -- Two examples UPDATE films SET film = film || '{"studio": "Pixar"}'::jsonb WHERE film @> '{"title": "The Incredibles"}'::jsonb; UPDATE films SET film = film || jsonb_build_object('studio', 'Pixar') WHERE film @> '{"title": "The Incredibles"}'::jsonb; SELECT film FROM films -- check the updated data WHERE film @> '{"title": "The Incredibles"}'::jsonb; -- Listing 16-20: Setting an array value at a path UPDATE films SET film = jsonb_set(film, '{genre}', film #> '{genre}' || '["World War II"]', false) WHERE film @> '{"title": "Cinema Paradiso"}'::jsonb; SELECT film FROM films -- check the updated data WHERE film @> '{"title": "Cinema Paradiso"}'::jsonb; -- Listing 16-21: Deleting values from JSON -- Removes the studio key/value pair from The Incredibles UPDATE films SET film = film - 'studio' WHERE film @> '{"title": "The Incredibles"}'::jsonb; -- Removes the third element in the genre array of Cinema Paradiso UPDATE films SET film = film #- '{genre, 2}' WHERE film @> '{"title": "Cinema Paradiso"}'::jsonb; -- JSON PROCESSING FUNCTIONS -- Listing 16-22: Finding the length of an array SELECT id, film ->> 'title' AS title, jsonb_array_length(film -> 'characters') AS num_characters FROM films ORDER BY id; -- Listing 16-23: Returning array elements as rows SELECT id, jsonb_array_elements(film -> 'genre') AS genre_jsonb, jsonb_array_elements_text(film -> 'genre') AS genre_text FROM films ORDER BY id; -- Listing 16-24: Returning key values from each item in an array SELECT id, jsonb_array_elements(film -> 'characters') FROM films ORDER BY id; WITH characters (id, json) AS ( SELECT id, jsonb_array_elements(film -> 'characters') FROM films ) SELECT id, json ->> 'name' AS name, json ->> 'actor' AS actor FROM characters ORDER BY id;