From 4815546ed1373fc8213a82147b819d58805e705c Mon Sep 17 00:00:00 2001 From: Tom Arns Date: Mon, 6 Apr 2026 17:20:09 -0700 Subject: [PATCH] Made the examples for the advanced database lesson executable. Students complained previously that there was no database which matched the queries. --- ...10Advanced SQL and Database Integration.md | 273 +++++++++++++++--- 1 file changed, 239 insertions(+), 34 deletions(-) diff --git a/lessons/10Advanced SQL and Database Integration.md b/lessons/10Advanced SQL and Database Integration.md index 6d2c099..c88b7bc 100644 --- a/lessons/10Advanced SQL and Database Integration.md +++ b/lessons/10Advanced SQL and Database Integration.md @@ -17,6 +17,105 @@ --- +## **Create the database for the lesson examples** + +Run this code in your lesson working area to populate a database which will be used to run the examples. + +```python +import sqlite3 +import os + +# Note, you need to create a 'db' directory if it isn't already in your workspace +DB_PATH = "db/company.db" + +# Start fresh so results are predictable when re-running this script +if os.path.exists(DB_PATH): + os.remove(DB_PATH) + +conn = sqlite3.connect(DB_PATH) +cur = conn.cursor() +cur.execute("PRAGMA foreign_keys = ON;") + +# --- Schema --- +cur.executescript(""" +CREATE TABLE Departments ( + department_id INTEGER PRIMARY KEY, + department_name TEXT NOT NULL UNIQUE, + manager_id INTEGER -- (left without FK to avoid circular reference) +); +CREATE TABLE Employees ( + employee_id INTEGER PRIMARY KEY, + first_name TEXT NOT NULL, + last_name TEXT NOT NULL, + department_id INTEGER NOT NULL, + title TEXT NOT NULL, + salary INTEGER NOT NULL, + hire_date TEXT DEFAULT (date('now')), + FOREIGN KEY (department_id) REFERENCES Departments(department_id) +); +""") + +# --- Seed data --- +cur.executemany( + "INSERT INTO Departments(department_id, department_name) VALUES (?, ?);", + [ + (10, "Engineering"), + (20, "Sales"), + (30, "HR"), + (40, "Finance"), + (50, "R&D"), + ], +) + +employees = [ + # Engineering (dept 10) + (1, "Alice", "Nguyen", 10, "Software Engineer", 120000, "2019-05-01"), + (2, "Bob", "Smith", 10, "Senior Software Engineer", 135000, "2018-07-15"), + (3, "Carol", "Zhang", 10, "Staff Engineer", 135000, "2017-03-20"), # tie + (4, "David", "Lee", 10, "QA Engineer", 95000, "2021-11-02"), + + # Sales (dept 20) + (5, "Eve", "Martinez", 20, "Sales Associate", 90000, "2020-01-10"), + (6, "Frank", "O'Connor", 20, "Account Executive", 110000, "2016-09-29"), + (7, "Grace", "Kim", 20, "Sales Manager", 105000, "2015-04-12"), + + # HR (dept 30) -> avg ~68.5k (so it will be filtered out by HAVING > 70000) + (8, "Heidi", "Brown", 30, "HR Generalist", 65000, "2022-06-03"), + (9, "Ivan", "Garcia", 30, "HR Manager", 72000, "2019-08-21"), + + # Finance (dept 40) + (10, "Judy", "Wilson", 40, "Financial Analyst", 125000, "2017-02-17"), + (11, "Karl", "Davis", 40, "Finance Director", 130000, "2014-12-09"), + + # R&D (dept 50) + (12, "Liam", "Patel", 50, "Research Scientist", 150000, "2018-10-31"), + (13, "Mia", "Chen", 50, "Principal Scientist", 150000, "2013-05-07"), # tie +] + +cur.executemany( + """INSERT INTO Employees + (employee_id, first_name, last_name, department_id, title, salary, hire_date) + VALUES (?, ?, ?, ?, ?, ?, ?);""", + employees, +) + +# Assign managers by employee_id for each department +cur.executemany( + "UPDATE Departments SET manager_id = ? WHERE department_id = ?;", + [ + (2, 10), # Engineering -> Bob Smith + (7, 20), # Sales -> Grace Kim + (9, 30), # HR -> Ivan Garcia + (11, 40), # Finance -> Karl Davis + (13, 50), # R&D -> Mia Chen + ], +) + +conn.commit() +conn.close() +print("company.db created with Departments(department_name, manager_id) and Employees.") +``` + ## **10.1 Understanding Subqueries** ### **Overview** @@ -27,7 +126,7 @@ Subqueries are nested SQL queries used to perform intermediate calculations or s - Common use cases include finding maximum, minimum, or aggregated values. ### **Example:** -Find the highest-paid employee in each department using a subquery. +This SQL code will find the highest-paid employee in each department using a subquery. ```sql SELECT department_id, employee_id, salary FROM Employees AS e @@ -86,27 +185,55 @@ Complex `JOIN`s allow you to retrieve data from multiple related tables. - `INNER JOIN`: Retrieves records with matching values in both tables. - `LEFT JOIN`: Retrieves all records from the left table and matching records from the right. -### **SQL Example: Create and Populate a Projects Table** -```sql +### **Python Example: Create and Populate a Projects Table** +```python +conn = sqlite3.connect("db/company.db") +cursor = conn.cursor() + +# create Projects table + +creation = """ CREATE TABLE Projects ( id INTEGER PRIMARY KEY, name TEXT NOT NULL, - department TEXT NOT NULL + department_id INTEGER NOT NULL, + FOREIGN KEY (department_id) REFERENCES Departments(department_id) ); +""" +cursor.execute(creation) + +# add department_ids +insertion = """ +INSERT INTO Projects (name, department_id) VALUES +('Project A', (SELECT department_id FROM Departments WHERE department_name = 'HR')), +('Project B', (SELECT department_id FROM Departments WHERE department_name = 'Engineering')), +('Project C', (SELECT department_id FROM Departments WHERE department_name = 'Finance')); +""" +cursor.execute(insertion) -INSERT INTO Projects (name, department) VALUES -('Project A', 'HR'), -('Project B', 'IT'), -('Project C', 'Finance'); +conn.commit() +conn.close() ``` -### **SQL Example: Complex JOIN** -List employees working in departments responsible for a specific project: -```sql -SELECT Employees.name, Projects.name AS project_name -FROM Employees -JOIN Projects ON Employees.department = Projects.department -WHERE Projects.name = 'Project A'; +### **Python Example: Complex JOIN** + +```python +# List employees working in departments responsible for a specific project: +conn = sqlite3.connect("db/company.db") +cursor = conn.cursor() + +# Execute the query - try it with all the projects +query = """ +SELECT (e.first_name || ' ' || e.last_name) AS employee_name, + p.name AS project_name +FROM Employees AS e +JOIN Projects AS p ON e.department_id = p.department_id +WHERE p.name = 'Project A'; +""" +cursor.execute(query) +print(cursor.fetchall()) + +conn.close() ``` --- @@ -117,22 +244,51 @@ WHERE Projects.name = 'Project A'; Aggregation functions like `MIN()`, `MAX()`, `COUNT()`, and `AVG()` allow you to summarize data across groups. ### **Task:** -Calculate the minimum and maximum salaries and the number of employees in each department. +Aggregation: Calculate the minimum and maximum salaries and the number of employees in each department. -### **SQL Example:** -```sql +### **Python Example:** +```python +conn = sqlite3.connect("db/company.db") +cursor = conn.cursor() + +query = """ SELECT department_id, MIN(salary) AS min_salary, MAX(salary) AS max_salary, COUNT(employee_id) AS num_employees FROM Employees GROUP BY department_id; +""" +cursor.execute(query) +print(cursor.fetchall()) + +conn.close() ``` ### **Key Notes:** - Use `GROUP BY` to organize results by department. - Ensure fields in the `SELECT` clause are either aggregated or part of the `GROUP BY`. +### **Python example which uses Inner Join so department name is used:** + +```python +""" +SELECT d.department_name AS department, + MIN(e.salary) AS min_salary, + MAX(e.salary) AS max_salary, + COUNT(e.employee_id) AS num_employees +FROM Employees AS e +JOIN Departments AS d + ON e.department_id = d.department_id +GROUP BY d.department_id, d.department_name +ORDER BY d.department_name; +""" +cursor.execute(query) +print(cursor.fetchall()) + +conn.close() +``` + --- ## **10.4 Aggregation with HAVING** @@ -143,8 +299,9 @@ The `HAVING` clause filters aggregated results after the `GROUP BY` operation. ### **Task:** List all departments where the average salary exceeds 70,000 and display the department manager. -### **SQL Example:** -```sql +### **Python Example:** +```python +query = """ SELECT d.department_name, d.manager_id, AVG(e.salary) AS avg_salary @@ -152,6 +309,36 @@ FROM Departments AS d JOIN Employees AS e ON d.department_id = e.department_id GROUP BY d.department_id HAVING AVG(e.salary) > 70000; +""" +cursor.execute(query) +print(cursor.fetchall()) + +conn.close() +``` + +### **Example which lists the manager by name** +```python +# This version reports the manager's name instead of id +query = """ +WITH dept_avg AS ( + SELECT e.department_id, AVG(e.salary) AS avg_salary + FROM Employees AS e + GROUP BY e.department_id +) +SELECT d.department_name, + (m.first_name || ' ' || m.last_name) AS manager_name, + da.avg_salary +FROM dept_avg AS da +JOIN Departments AS d + ON d.department_id = da.department_id +LEFT JOIN Employees AS m + ON m.employee_id = d.manager_id +WHERE da.avg_salary > 70000; +""" +cursor.execute(query) +print(cursor.fetchall()) + +conn.close() ``` ### **Key Notes:** @@ -252,11 +439,22 @@ Security is a critical part of database integration. Let's practice explaining S ### **Overview** SQL window functions allow for advanced analysis over a specified range of rows. For example, calculating the rank of employees within a department based on salary. -### **SQL Example:** -```sql -SELECT name, salary, department_id, - RANK() OVER (PARTITION BY department_id ORDER BY salary DESC) AS rank -FROM Employees; +### **Python Example:** +```python +conn = sqlite3.connect("db/company.db") +cursor = conn.cursor() + +query = """ +SELECT (e.first_name || ' ' || e.last_name) AS employee_name, + e.salary, + e.department_id, + RANK() OVER (PARTITION BY e.department_id ORDER BY e.salary DESC) AS rank +FROM Employees AS e; +""" +cursor.execute(query) +print(cursor.fetchall()) + +conn.close() ``` --- @@ -266,11 +464,21 @@ FROM Employees; ### **Overview** SQL provides functions for manipulating and querying date and time data, which are useful when working with time-based analysis. -### **SQL Example:** -```sql -SELECT name, date_of_birth, - JULIANDAY('now') - JULIANDAY(date_of_birth) AS age_in_days -FROM Employees; +### **Python Example:** +```python +conn = sqlite3.connect("db/company.db") +cursor = conn.cursor() + +query = """ +SELECT (e.first_name || ' ' || e.last_name) AS employee_name, + e.hire_date, + ROUND(JULIANDAY('now') - JULIANDAY(e.hire_date), 2) AS tenure_in_days +FROM Employees AS e; +""" +cursor.execute(query) +print(cursor.fetchall()) + +conn.close() ``` --- @@ -290,10 +498,7 @@ FROM Employees; ### **Example Python Script:** ```python -import sqlite3 - -# Connect to the database -conn = sqlite3.connect("company.db") +conn = sqlite3.connect("db/company.db") cursor = conn.cursor() # Aggregation with HAVING