Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true

[dev-packages]

[packages]
pandas = "*"
psycopg2-binary = "*"
pymongo = "*"
dnspython = "*"
sqlalchemy = "*"

[requires]
python_version = "3.8"
236 changes: 236 additions & 0 deletions Pipfile.lock

Large diffs are not rendered by default.

45 changes: 45 additions & 0 deletions module1-introduction-to-sql/buddymove_holidayiq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import pandas as pd
import sqlite3


def connect_to_db(db_name="buddymove_holidayiq.sqlite3"):
return sqlite3.connect(db_name)


def execute_query(cursor, query):
cursor.execute(query)
return cursor.fetchall()


# Count how many rows you have - it should be 249!
TOTAL_ROWS = """
SELECT COUNT(*)
FROM review
"""


# How many users who reviewed at least 100 `Nature` in the category also
# reviewed at least 100 in the `Shopping` category?
TOTAL_USERS_NATURE_SHOPPING = """
SELECT COUNT(*)
FROM review
WHERE Nature > 100 AND Shopping > 100
"""


if __name__ == "__main__":
conn = connect_to_db()
curs = conn.cursor()

# df = pd.read_csv('buddymove_holidayiq.csv')
# df.to_sql('review', con=conn)

rows = execute_query(curs, TOTAL_ROWS)
users = execute_query(curs, TOTAL_USERS_NATURE_SHOPPING)

print("There are %d total rows." % (rows[0][0]))
print(
"There are %d total users who reviewed at least 100 in the 'Nature'"
" category as well as at least 100 in the 'Shopping' category."
% (users[0][0])
)
Binary file not shown.
131 changes: 131 additions & 0 deletions module1-introduction-to-sql/rpg_db_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import sqlite3


def connect_to_db(db_name="rpg_db.sqlite3"):
return sqlite3.connect(db_name)


def execute_query(cursor, query):
cursor.execute(query)
return cursor.fetchall()


# How many total Characters are there?
TOTAL_CHARACTERS = """
SELECT COUNT(*)
FROM charactercreator_character;
"""


# How many of each specific subclass?
TOTAL_FIGHTERS = """
SELECT COUNT(*)
FROM charactercreator_fighter;
"""


TOTAL_MAGES = """
SELECT COUNT(*)
FROM charactercreator_mage;
"""


TOTAL_CLERICS = """
SELECT COUNT(*)
FROM charactercreator_cleric;
"""


TOTAL_THIEFS = """
SELECT COUNT(*)
FROM charactercreator_thief;
"""


TOTAL_NECROMANCERS = """
SELECT COUNT(*)
FROM charactercreator_necromancer;
"""

# How many total Items?
TOTAL_ITEMS = """
SELECT COUNT(*)
FROM armory_item;
"""


# How many of the Items are weapons? How many are not?
TOTAL_WEAPONS = """
SELECT COUNT(*)
FROM armory_weapon;
"""


TOTAL_NON_WEAPONS = """
SELECT COUNT(*)
FROM armory_item
WHERE item_id < 138;
"""

# How many Items does each character have? (Return first 20 rows)
TOTAL_INVENTORY = """
SELECT character_id, COUNT(*)
FROM charactercreator_character_inventory
GROUP BY character_id;
"""


# How many Weapons does each character have? (Return first 20 rows)
TOTAL_WEAPON_INVENTORY = """
SELECT character_id, COUNT(*)
FROM charactercreator_character_inventory, armory_weapon
WHERE item_id = item_ptr_id
GROUP BY character_id
"""


if __name__ == "__main__":
conn = connect_to_db()
curs = conn.cursor()

characters = execute_query(curs, TOTAL_CHARACTERS)
fighters = execute_query(curs, TOTAL_FIGHTERS)
mages = execute_query(curs, TOTAL_MAGES)
clerics = execute_query(curs, TOTAL_CLERICS)
thiefs = execute_query(curs, TOTAL_THIEFS)
necromancers = execute_query(curs, TOTAL_NECROMANCERS)
items = execute_query(curs, TOTAL_ITEMS)
weapons = execute_query(curs, TOTAL_WEAPONS)
non_weapons = execute_query(curs, TOTAL_NON_WEAPONS)
total_inventory = execute_query(curs, TOTAL_INVENTORY)
total_weapon_inventory = execute_query(curs, TOTAL_WEAPON_INVENTORY)

print("There are %d total Characters." % (characters[0][0]))
print("There are %d total Fighters." % (fighters[0][0]))
print("There are %d total Mages." % (mages[0][0]))
print("There are %d total Clerics." % (clerics[0][0]))
print("There are %d total Thiefs." % (thiefs[0][0]))
print("There are %d total Necromancers." % (necromancers[0][0]))
print("There are %d total Items." % (items[0][0]))
print("There are %d total Weapons." % (weapons[0][0]))
print("There are %d total Non-Weapons." % (non_weapons[0][0]))

for pair in total_inventory[:20]:
print("Character %d has %d items." % (pair[0], pair[1]))
for pair in total_weapon_inventory[:20]:
print("Character %d has %d weapons." % (pair[0], pair[1]))

num_items = 0
num_weapons = 0
for pair in total_inventory:
num_items += pair[1]
for pair in total_weapon_inventory:
num_weapons += pair[1]
print(
"On average, each Character has %f items?"
% (num_items / characters[0][0])
)
print(
"On average, each Character has %f weapons?"
% (num_weapons / characters[0][0])
)
88 changes: 88 additions & 0 deletions module2-sql-for-analysis/insert_titanic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Then, set up a new table for the Titanic data (`titanic.csv`) - spend some time
# thinking about the schema to make sure it is appropriate for the columns.
# [Enumerated types](https://www.postgresql.org/docs/9.1/datatype-enum.html) may
# be useful. Once it is set up, write a `insert_titanic.py` script that uses
# `psycopg2` to connect to and upload the data from the csv, and add the file to
# our repo. Then start writing PostgreSQL queries to explore the data!

import psycopg2
import pandas as pd
import csv

# Looks similar to sqlite3, but needs auth/host info to connect
# Note - this is sensitive info (particularly password)
# and shouldn't be checked into git! More on how to handle next week

dbname = "ajkuvccu"
user = "ajkuvccu" # ElephantSQL happens to use same name for db and user
password = "FBOFpSpFdAFrxYUG-DBqN39wDQ0Mjc4V" # Sensitive! Don't share/commit
host = "isilo.db.elephantsql.com"


def create_type_class():
type_class_statement = """
CREATE TYPE class as ENUM ('1', '2', '3');
"""

pg_curs.execute(type_class_statement)
pg_conn.commit() # "Save" by committing


def drop():
pg_curs.execute("DROP TABLE titanic")
pg_conn.commit() # "Save" by committing


# Defining a function to refresh connection and cursor
def refresh_connection_and_cursor(conn, curs):
curs.close()
conn.close()
pg_conn = psycopg2.connect(
dbname=dbname, user=user, password=password, host=host
)
pg_curs = pg_conn.cursor()
return pg_conn, pg_curs


if __name__ == "__main__":
# If we make too many connections, the database complains! Be sure to close
# cursors and connections
pg_conn = psycopg2.connect(
dbname=dbname, user=user, password=password, host=host
)

pg_curs = pg_conn.cursor() # Works the same as SQLite!

# We're connected, but db is empty
# Let's run a simple example to populate (from the tk)
create_table_statement = """
CREATE TABLE titanic (
survived boolean,
pclass class,
name varchar(100),
sex varchar(6),
age float,
siblings_spouses_aboard integer,
parents_children_aboard integer,
fare float
);
"""
# NOTE - these types are PostgreSQL specific. This won't work in SQLite!

pg_conn, pg_curs = refresh_connection_and_cursor(pg_conn, pg_curs)

# pg_curs.execute(create_table_statement)
pg_conn.commit() # "Save" by committing

with open("titanic.csv", "r") as f:
reader = csv.reader(f)
next(reader) # Skipe the header row.
for row in reader:
pg_curs.execute(
"INSERT INTO titanic VALUES (%s, %s, %s, %s, %s, %s, %s, %s)",
row,
)
pg_conn.commit()

pg_curs.execute("SELECT * FROM 'public'.'titanic' LIMIT 100")
pg_conn.commit() # "Save" by committing
Binary file added module2-sql-for-analysis/rpg_db.sqlite3
Binary file not shown.
Loading