diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..86c7669 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +materials diff --git a/week06/lab6.org b/week06/lab6.org index be599bd..ec00f0b 100644 --- a/week06/lab6.org +++ b/week06/lab6.org @@ -1,6 +1,9 @@ #+title: Lab 6 Amirlan Sharipov (BS-CS21-01) #+author: Amirlan Sharipov (BS-CS21-01) +* Disclaimer +Please, use the lab6.sql file to read/copy the source code. Also, the html version of this document looks better than the pdf one. + * Exercise 1 ** Table creation and insertion I used the schema provided in the slides. And then manually inserted data into the tables. diff --git a/week08/db-faker.py b/week08/db-faker.py new file mode 100644 index 0000000..8800054 --- /dev/null +++ b/week08/db-faker.py @@ -0,0 +1,18 @@ +import psycopg2 +from faker import Faker +# https://stackabuse.com/working-with-postgresql-in-python/ +con = psycopg2.connect(database="week08", user="postgres", + password="postgres", host="127.0.0.1", port="5432") + +print("Database opened successfully") +cur = con.cursor() +cur.execute('''CREATE TABLE CUSTOMER + (ID INT PRIMARY KEY NOT NULL, + Name TEXT NOT NULL, + Address TEXT NOT NULL, + review TEXT);''') +print("Table created successfully") +fake = Faker() +for i in range(10000): + cur.execute("INSERT INTO CUSTOMER (ID,Name,Address,review) VALUES ('"+ str(i)+"','"+fake.name()+"','"+fake.address()+"','"+fake.text()+"')") + con.commit() diff --git a/week08/dvdrental.tar b/week08/dvdrental.tar new file mode 100644 index 0000000..3066c0c Binary files /dev/null and b/week08/dvdrental.tar differ diff --git a/week08/ex2.sql b/week08/ex2.sql new file mode 100644 index 0000000..d5bc28d --- /dev/null +++ b/week08/ex2.sql @@ -0,0 +1,54 @@ +-- query +SELECT film.title FROM film INNER JOIN film_category ON film.film_id=film_category.film_id INNER JOIN category ON film_category.category_id=category.category_id WHERE (film.rating='R' OR film.rating='PG-13') AND (category.name='Horror' OR category.name='Sci-Fi') AND film.film_id NOT IN (SELECT DISTINCT film.film_id FROM film INNER JOIN inventory ON film.film_id=inventory.film_id INNER JOIN rental ON inventory.inventory_id=rental.inventory_id); + +-- explain analyze +EXPLAIN ANALYZE SELECT film.title FROM film INNER JOIN film_category ON film.film_id=film_category.film_id INNER JOIN category ON film_category.category_id=category.category_id WHERE (film.rating='R' OR film.rating='PG-13') AND (category.name='Horror' OR category.name='Sci-Fi') AND film.film_id NOT IN (SELECT DISTINCT film.film_id FROM film INNER JOIN inventory ON film.film_id=inventory.film_id INNER JOIN rental ON inventory.inventory_id=rental.inventory_id); +--EXPLAIN ANALYZE SELECT film.film_id FROM film INNER JOIN film_category ON film.film_id=film_category.film_id INNER JOIN category ON film_category.category_id=category.category_id INNER JOIN inventory ON film.film_id=inventory.film_id WHERE (film.rating='R' OR film.rating='PG-13') AND (category.name='Horror' OR category.name='Sci-Fi') AND NOT EXISTS (SELECT rental.inventory_id FROM rental WHERE rental.inventory_id=inventory.inventory_id); + +SELECT store.store_id FROM store +INNER JOIN +( + SELECT store.store_id, SUM(payment.amount) AS total FROM store + INNER JOIN inventory ON inventory.store_id=store.store_id + INNER JOIN rental ON rental.inventory_id=inventory.inventory_id + INNER JOIN payment ON payment.rental_id=rental.rental_id + GROUP BY store.store_id +) subq2 ON subq2.store_id=store.store_id +INNER JOIN address ON address.address_id=store.address_id +INNER JOIN city ON city.city_id=address.city_id WHERE (subq2.total, city.city_id) IN +( + SELECT MAX(subq.total), city.city_id FROM ( + SELECT store.store_id, SUM(payment.amount) AS total FROM store + INNER JOIN inventory ON inventory.store_id=store.store_id + INNER JOIN rental ON rental.inventory_id=inventory.inventory_id + INNER JOIN payment ON payment.rental_id=rental.rental_id + GROUP BY store.store_id + ) AS subq + INNER JOIN store ON store.store_id=subq.store_id + INNER JOIN address ON address.address_id=store.address_id + INNER JOIN city ON city.city_id=address.city_id GROUP BY city.city_id +); + +EXPLAIN ANALYZE SELECT store.store_id FROM store +INNER JOIN +( + SELECT store.store_id, SUM(payment.amount) AS total FROM store + INNER JOIN inventory ON inventory.store_id=store.store_id + INNER JOIN rental ON rental.inventory_id=inventory.inventory_id + INNER JOIN payment ON payment.rental_id=rental.rental_id + GROUP BY store.store_id +) subq2 ON subq2.store_id=store.store_id +INNER JOIN address ON address.address_id=store.address_id +INNER JOIN city ON city.city_id=address.city_id WHERE (subq2.total, city.city_id) IN +( + SELECT MAX(subq.total), city.city_id FROM ( + SELECT store.store_id, SUM(payment.amount) AS total FROM store + INNER JOIN inventory ON inventory.store_id=store.store_id + INNER JOIN rental ON rental.inventory_id=inventory.inventory_id + INNER JOIN payment ON payment.rental_id=rental.rental_id + GROUP BY store.store_id + ) AS subq + INNER JOIN store ON store.store_id=subq.store_id + INNER JOIN address ON address.address_id=store.address_id + INNER JOIN city ON city.city_id=address.city_id GROUP BY city.city_id +); diff --git a/week08/observations.txt b/week08/observations.txt new file mode 100644 index 0000000..68871a8 --- /dev/null +++ b/week08/observations.txt @@ -0,0 +1,86 @@ +# Before indexing + +EXPLAIN ANALYZE SELECT * from customer WHERE name='John Smith'; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Seq Scan on customer (cost=0.00..429.00 rows=3 width=211) (actual time=0.299..3.124 rows=3 loops=1) + Filter: (name = 'John Smith'::text) + Rows Removed by Filter: 9997 + Planning Time: 0.129 ms + Execution Time: 3.146 ms +(5 rows) + +# After indexing using btree(name): + +CREATE INDEX idx_name on customer using btree(name); +EXPLAIN ANALYZE SELECT * from customer WHERE name='John Smith'; + QUERY PLAN + +------------------------------------------------------------------------------------------------------- +---------- + Bitmap Heap Scan on customer (cost=4.31..15.45 rows=3 width=211) (actual time=0.051..0.056 rows=3 loo +ps=1) + Recheck Cond: (name = 'John Smith'::text) + Heap Blocks: exact=3 + -> Bitmap Index Scan on idx_name (cost=0.00..4.31 rows=3 width=0) (actual time=0.044..0.044 rows=3 + loops=1) + Index Cond: (name = 'John Smith'::text) + Planning Time: 0.423 ms + Execution Time: 0.086 ms + +# A complicated query with like doesn't benefit from hash indexing + +EXPLAIN ANALYZE SELECT * FROM customer WHERE address LIKE '%Warren%Port%'; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Seq Scan on customer (cost=0.00..429.00 rows=1 width=211) (actual time=3.544..4.110 rows=1 loops=1) + Filter: (address ~~ '%Warren%Port%'::text) + Rows Removed by Filter: 9999 + Planning Time: 0.125 ms + Execution Time: 4.131 ms +(5 rows) + +The query can be improved and benefit from the btree indexing: +EXPLAIN ANALYZE SELECT * FROM customer WHERE name >= 'Lisa' and name <= 'Lisb'; + QUERY PLAN + +------------------------------------------------------------------------------------------------------- +------------ + Bitmap Heap Scan on customer (cost=4.41..45.44 rows=12 width=211) (actual time=0.068..0.202 rows=90 l +oops=1) + Recheck Cond: ((name >= 'Lisa'::text) AND (name <= 'Lisb'::text)) + Heap Blocks: exact=83 + -> Bitmap Index Scan on idx_name (cost=0.00..4.41 rows=12 width=0) (actual time=0.042..0.042 rows= +90 loops=1) + Index Cond: ((name >= 'Lisa'::text) AND (name <= 'Lisb'::text)) + Planning Time: 0.174 ms + Execution Time: 0.233 ms + +Before indexing address: + +EXPLAIN ANALYZE SELECT * FROM customer WHERE address='USS Oneill FPO AE 11865'; + + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Seq Scan on customer (cost=0.00..429.00 rows=1 width=211) (actual time=2.639..2.639 rows=0 loops=1) + Filter: (address = 'USS Oneill FPO AE 11865'::text) + Rows Removed by Filter: 10000 + Planning Time: 0.084 ms + Execution Time: 2.657 ms +(5 rows) + +After indexing address: +CREATE INDEX idx_address on customer using HASH(address); +EXPLAIN ANALYZE SELECT * FROM customer WHERE address='USS Oneill FPO AE 11865'; + QUERY PLAN + +------------------------------------------------------------------------------------------------------- +----------------- + Index Scan using idx_address on customer (cost=0.00..8.02 rows=1 width=211) (actual time=0.016..0.016 + rows=0 loops=1) + Index Cond: (address = 'USS Oneill FPO AE 11865'::text) + Planning Time: 0.224 ms + Execution Time: 0.035 ms +(4 rows) + +