diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..c7258fab --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.dataScience.jupyterServerURI": "local" +} \ No newline at end of file diff --git a/Pipfile b/Pipfile new file mode 100644 index 00000000..53c32968 --- /dev/null +++ b/Pipfile @@ -0,0 +1,13 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[dev-packages] + +[packages] +python-dotenv = "*" +psycopg2-binary = "*" + +[requires] +python_version = "3.8" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 00000000..e6c5fbb1 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,67 @@ +{ + "_meta": { + "hash": { + "sha256": "d2f7f8cb967ddbd2921f1a455faef3341e4ca3985397f61c4320aa4cf8e395fe" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.8" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "psycopg2-binary": { + "hashes": [ + "sha256:0deac2af1a587ae12836aa07970f5cb91964f05a7c6cdb69d8425ff4c15d4e2c", + "sha256:0e4dc3d5996760104746e6cfcdb519d9d2cd27c738296525d5867ea695774e67", + "sha256:11b9c0ebce097180129e422379b824ae21c8f2a6596b159c7659e2e5a00e1aa0", + "sha256:1fabed9ea2acc4efe4671b92c669a213db744d2af8a9fc5d69a8e9bc14b7a9db", + "sha256:2dac98e85565d5688e8ab7bdea5446674a83a3945a8f416ad0110018d1501b94", + "sha256:42ec1035841b389e8cc3692277a0bd81cdfe0b65d575a2c8862cec7a80e62e52", + "sha256:6a32f3a4cb2f6e1a0b15215f448e8ce2da192fd4ff35084d80d5e39da683e79b", + "sha256:7312e931b90fe14f925729cde58022f5d034241918a5c4f9797cac62f6b3a9dd", + "sha256:7d92a09b788cbb1aec325af5fcba9fed7203897bbd9269d5691bb1e3bce29550", + "sha256:833709a5c66ca52f1d21d41865a637223b368c0ee76ea54ca5bad6f2526c7679", + "sha256:8cd0fb36c7412996859cb4606a35969dd01f4ea34d9812a141cd920c3b18be77", + "sha256:950bc22bb56ee6ff142a2cb9ee980b571dd0912b0334aa3fe0fe3788d860bea2", + "sha256:a0c50db33c32594305b0ef9abc0cb7db13de7621d2cadf8392a1d9b3c437ef77", + "sha256:a0eb43a07386c3f1f1ebb4dc7aafb13f67188eab896e7397aa1ee95a9c884eb2", + "sha256:aaa4213c862f0ef00022751161df35804127b78adf4a2755b9f991a507e425fd", + "sha256:ac0c682111fbf404525dfc0f18a8b5f11be52657d4f96e9fcb75daf4f3984859", + "sha256:ad20d2eb875aaa1ea6d0f2916949f5c08a19c74d05b16ce6ebf6d24f2c9f75d1", + "sha256:b4afc542c0ac0db720cf516dd20c0846f71c248d2b3d21013aa0d4ef9c71ca25", + "sha256:b8a3715b3c4e604bcc94c90a825cd7f5635417453b253499664f784fc4da0152", + "sha256:ba28584e6bca48c59eecbf7efb1576ca214b47f05194646b081717fa628dfddf", + "sha256:ba381aec3a5dc29634f20692349d73f2d21f17653bda1decf0b52b11d694541f", + "sha256:bd1be66dde2b82f80afb9459fc618216753f67109b859a361cf7def5c7968729", + "sha256:c2507d796fca339c8fb03216364cca68d87e037c1f774977c8fc377627d01c71", + "sha256:cec7e622ebc545dbb4564e483dd20e4e404da17ae07e06f3e780b2dacd5cee66", + "sha256:d14b140a4439d816e3b1229a4a525df917d6ea22a0771a2a78332273fd9528a4", + "sha256:d1b4ab59e02d9008efe10ceabd0b31e79519da6fb67f7d8e8977118832d0f449", + "sha256:d5227b229005a696cc67676e24c214740efd90b148de5733419ac9aaba3773da", + "sha256:e1f57aa70d3f7cc6947fd88636a481638263ba04a742b4a37dd25c373e41491a", + "sha256:e74a55f6bad0e7d3968399deb50f61f4db1926acf4a6d83beaaa7df986f48b1c", + "sha256:e82aba2188b9ba309fd8e271702bd0d0fc9148ae3150532bbb474f4590039ffb", + "sha256:ee69dad2c7155756ad114c02db06002f4cded41132cc51378e57aad79cc8e4f4", + "sha256:f5ab93a2cb2d8338b1674be43b442a7f544a0971da062a5da774ed40587f18f5" + ], + "index": "pypi", + "version": "==2.8.6" + }, + "python-dotenv": { + "hashes": [ + "sha256:8c10c99a1b25d9a68058a1ad6f90381a62ba68230ca93966882a4dbc3bc9c33d", + "sha256:c10863aee750ad720f4f43436565e4c1698798d763b63234fb5021b6c616e423" + ], + "index": "pypi", + "version": "==0.14.0" + } + }, + "develop": {} +} diff --git a/Unit-3-Sprint-2-SQL-and-Databases-Study-Guide.md b/Unit-3-Sprint-2-SQL-and-Databases-Study-Guide.md new file mode 100644 index 00000000..49093ed6 --- /dev/null +++ b/Unit-3-Sprint-2-SQL-and-Databases-Study-Guide.md @@ -0,0 +1,118 @@ +# Unit 3 Sprint 2 SQL and Databases Study Guide +​ +This study guide should reinforce and provide practice for all of the concepts you have seen in the past week. There are a mix of written questions and coding exercises, both are equally important to prepare you for the sprint challenge as well as to be able to speak on these topics comfortably in interviews and on the job. +​ +If you get stuck or are unsure of something remember the 20 minute rule. If that doesn't help, then research a solution with [google](https://www.google.com) or [StackOverflow](https://www.stackoverflow.com). Only once you have exhausted these methods should you turn to your Team Lead - they won't be there on your SC or during an interview. That being said, don't hesitate to ask for help if you truly are stuck. +​ +Have fun studying! +​ +## SQL +​ +**Concepts:** +​ +1. What is SQL? +2. What is a RDBMS? +3. What is an ETL pipeline? +4. What is a schema? +5. What does each letter in ACID stand for? Give an explanation for each and why they matter? + - **A** + - **C** + - **I** + - **D** +6. Explain each of the table relationships and give an example for each + - One-to-One + - One-to-Many + - Many-to-Many +​ +## Syntax +For the following section, give a brief explanation of each of the SQL commands. +​ +1. **SELECT** - +2. **WHERE** - +3. **LIMIT** - +4. **ORDER** - +5. **JOIN** - +6. **CREATE TABLE** - +7. **INSERT** - +8. **DISTINCT** - +9. **GROUP BY** - +10. **ORDER BY** - +11. **AVG** - +12. **MAX** - +13. **AS** - +​ +## Starting From Scratch +Create a file named `study_part1.py` and complete the exercise below. The only library you should need to import is `sqlite3`. Don't forget to be PEP8 compliant! +1. Create a new database file call `study_part1.sqlite3` +2. Create a table with the following columns + ``` + student - string + studied - string + grade - int + age - int + sex - string + ``` +​ +3. Fill the table with the following data +​ + ``` + 'Lion-O', 'True', 85, 24, 'Male' + 'Cheetara', 'True', 95, 22, 'Female' + 'Mumm-Ra', 'False', 65, 153, 'Male' + 'Snarf', 'False', 70, 15, 'Male' + 'Panthro', 'True', 80, 30, 'Male' + ``` +​ +4. Save your data. You can check that everything is working so far if you can view the table and data in DBBrowser +​ +5. Write the following queries to check your work. Querie outputs should be formatted for readability, don't simply print a number to the screen with no explanation, add context. +​ + ``` + What is the average age? Expected Result - 48.8 + What are the name of the female students? Expected Result - 'Cheetara' + How many students studied? Expected Results - 3 + Return all students and all columns, sorted by student names in alphabetical order. + ``` +​ +## Query All the Tables! +​ +### Setup +Before we get started you'll need a few things. +1. Download the [Chinook Database here](https://github.com/bundickm/Study-Guides/blob/master/data/Chinook_Sqlite.sqlite) +2. The schema can be [found here](https://github.com/bundickm/Study-Guides/blob/master/data/Chinook%20Schema.png) +3. Create a file named `study_part2.py` and complete the exercise below. The only library you should need to import is `sqlite3`. Don't forget to be PEP8 compliant! +4. Add a connection to the chinook database so that you can answer the queries below. +​ +### Queries +**Single Table Queries** +1. Find the average invoice total for each customer, return the details for the first 5 ID's +2. Return all columns in Customer for the first 5 customers residing in the United States +3. Which employee does not report to anyone? +4. Find the number of unique composers +5. How many rows are in the Track table? +​ +**Joins** +​ +6. Get the name of all Black Sabbath tracks and the albums they came off of +7. What is the most popular genre by number of tracks? +8. Find all customers that have spent over $45 +9. Find the first and last name, title, and the number of customers each employee has helped. If the customer count is 0 for an employee, it doesn't need to be displayed. Order the employees from most to least customers. +10. Return the first and last name of each employee and who they report to +​ +## NoSQL +​ +### Questions of Understanding +​ +1. What is a document store? +​ +2. What is a `key:value` pair? What data type in Python uses `key:value` pairs? +​ +3. Give an example of when it would be best to use a SQL Database and when it would be best to use a NoSQL Database +​ +4. What are some of the trade-offs between SQL and NoSQL? +​ +5. What does each letter in BASE stand for? Give an explanation for each and why they matter? + - B + - A + - S + - E \ No newline at end of file diff --git a/buddymove_holidayiq.py b/buddymove_holidayiq.py new file mode 100644 index 00000000..e69de29b diff --git a/module1-introduction-to-sql/Pipfile b/module1-introduction-to-sql/Pipfile new file mode 100644 index 00000000..b5846df1 --- /dev/null +++ b/module1-introduction-to-sql/Pipfile @@ -0,0 +1,11 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[dev-packages] + +[packages] + +[requires] +python_version = "3.8" diff --git a/module1-introduction-to-sql/buddymove_holidayiq.py b/module1-introduction-to-sql/buddymove_holidayiq.py new file mode 100644 index 00000000..c187a687 --- /dev/null +++ b/module1-introduction-to-sql/buddymove_holidayiq.py @@ -0,0 +1,34 @@ +import sqlite3 +import pandas as pd + +conn = sqlite3.connect('buddymove_holidayiq.sqlite3') +curs = conn.cursor() +review = pd.read_csv('buddymove_holidayiq.csv') +review.to_sql('review', con=conn, if_exists = 'replace') + + +def execute_query(cursor, query): + cursor.execute(query) + result = cursor.fetchall() + print(result) + + +ROW_COUNT= """ +SELECT COUNT(*) FROM review; +""" + + +print('Row Count:') +execute_query(curs, ROW_COUNT) + + +USER_COUNT = """ +SELECT COUNT(*) +FROM review +WHERE Nature >= 100 +AND Shopping >= 100; +""" + + +print('Users who love nature and shopping count:') +execute_query(curs, USER_COUNT) \ No newline at end of file diff --git a/module1-introduction-to-sql/buddymove_holidayiq.sqlite3 b/module1-introduction-to-sql/buddymove_holidayiq.sqlite3 new file mode 100644 index 00000000..0dc55aef Binary files /dev/null and b/module1-introduction-to-sql/buddymove_holidayiq.sqlite3 differ diff --git a/module1-introduction-to-sql/rpg_db_example.py b/module1-introduction-to-sql/rpg_db_example.py new file mode 100644 index 00000000..e69de29b diff --git a/module1-introduction-to-sql/rpg_queries.py b/module1-introduction-to-sql/rpg_queries.py new file mode 100644 index 00000000..c83a302a --- /dev/null +++ b/module1-introduction-to-sql/rpg_queries.py @@ -0,0 +1,167 @@ +import sqlite3 + + +conn = sqlite3.connect('rpg_db.sqlite3') +curs = conn.cursor() + + + +def execute_query(cursor, query): + cursor.execute(query) + result = cursor.fetchall() + print(result) + +TOTAL_CHARACTERS = """ + SELECT COUNT(DISTINCT name) + FROM charactercreator_character; +""" + + +print('Total Characters:') +execute_query(curs, TOTAL_CHARACTERS) + + +SUBCLASS_TOTALS = """ + SELECT ( + SELECT COUNT(character_ptr_id) + FROM charactercreator_cleric + ) AS Cleric, + ( + SELECT COUNT(character_ptr_id) + FROM charactercreator_fighter + ) AS Fighter, + ( + SELECT COUNT(character_ptr_id) + FROM charactercreator_mage + ) AS Mage, + ( + SELECT COUNT(mage_ptr_id) + FROM charactercreator_necromancer + ) AS Necromancer, + ( + SELECT COUNT(character_ptr_id) + FROM charactercreator_thief + ) AS Thief; +""" + + +print('Subclass Totals:') +execute_query(curs, SUBCLASS_TOTALS) + + +ITEM_TOTALS = """ + SELECT COUNT(item_id) + FROM armory_item +""" + + +print('Item Totals:') +execute_query(curs, ITEM_TOTALS) + + +WEAPONS_TOTAL = """ + SELECT COUNT(item_ptr_id) + FROM armory_weapon +""" + + +print('Weapons Totals:') +execute_query(curs, WEAPONS_TOTAL) + + +WEAPONS_DIFF = """ + SELECT ( + SELECT + count(item_id) as val2 + FROM armory_item + ) - ( + SELECT + COUNT(item_ptr_id) as val1 + FROM armory_weapon + ) as total_count +""" + + +print('Weapons Diff:') +execute_query(curs, WEAPONS_DIFF) + + +CHAR_ITEM_TOTALS = """ + SELECT character_id, COUNT(DISTINCT item_id)FROM + (SELECT cc.character_id, cc.name AS character_name, ai.item_id, ai.name AS item_name + FROM charactercreator_character as CC, + armory_item as ai, + charactercreator_character_inventory as cci + WHERE cc.character_id = cci.character_id + AND ai.item_id = cci.item_id) + GROUP BY 1 ORDER BY 2 DESC + LIMIT 20; + """ + + +print('Character Item Totals:') +execute_query(curs, CHAR_ITEM_TOTALS) + +CHAR_WEAPON_TOTALS = """ + SELECT character_id, COUNT(DISTINCT item_id)FROM + (SELECT cc.character_id, cc.name AS character_name, ai.item_id, ai.name AS item_name + FROM charactercreator_character as cc, + armory_item as ai, + armory_weapon as aw, + charactercreator_character_inventory as cci + WHERE cc.character_id = cci.character_id + AND ai.item_id = cci.item_id + AND ai.item_id = aw.item_ptr_id) + GROUP BY 1 ORDER BY 2 DESC + LIMIT 20; +""" + + +print('Character Weapon Totals:') +execute_query(curs, CHAR_WEAPON_TOTALS) + + +AVG_ITEM = """ + SELECT avg(ic) + FROM + ( + SELECT character_id, COUNT(DISTINCT item_id) as ic + FROM + (SELECT cc.character_id, cc.name AS character_name, ai.item_id, ai.name AS item_name + FROM charactercreator_character as CC, + armory_item as ai, + charactercreator_character_inventory as cci + WHERE cc.character_id = cci.character_id + AND ai.item_id = cci.item_id) + GROUP BY 1 ORDER BY 2 ASC + ) +""" + + +print('Character Items Average:') +execute_query(curs, AVG_ITEM) + +# TO-DO , this is the average weapon of those that have weapons +# find out how to make it average weapons for all. +AVG_WEAPON = """ +SELECT avg(wa) + FROM + ( + SELECT character_id, COUNT(DISTINCT item_id) as wa + FROM + (SELECT cc.character_id, cc.name AS character_name, ai.item_id, ai.name AS item_name + FROM charactercreator_character as cc, + armory_item as ai, + armory_weapon as aw, + charactercreator_character_inventory as cci + WHERE cc.character_id = cci.character_id + AND ai.item_id = cci.item_id + AND ai.item_id = aw.item_ptr_id) + GROUP BY 1 ORDER BY 2 DESC + ) +""" + + +print('Character Weapons Average:') +execute_query(curs, AVG_WEAPON) +print('this needs to be fixed') \ No newline at end of file diff --git a/module1-introduction-to-sql/test_db.sqlite3 b/module1-introduction-to-sql/test_db.sqlite3 new file mode 100644 index 00000000..cc940217 Binary files /dev/null and b/module1-introduction-to-sql/test_db.sqlite3 differ diff --git a/module2-sql-for-analysis/elephant_queries.py b/module2-sql-for-analysis/elephant_queries.py new file mode 100644 index 00000000..60325e82 --- /dev/null +++ b/module2-sql-for-analysis/elephant_queries.py @@ -0,0 +1,112 @@ +import os +from dotenv import load_dotenv +import psycopg2 +from psycopg2.extras import execute_values +import pandas as pd +import json + +# my test_table was loaded in via the sql editor on table + + +load_dotenv() #loads contents of the new.env file into the script's env + +DB_NAME = os.getenv("DB_NAME") +DB_USER = os.getenv("DB_USER") +DB_PASSWORD = os.getenv("DB_PASSWORD") +DB_HOST = os.getenv("DB_HOST") + +print(DB_NAME, DB_USER, DB_PASSWORD, DB_HOST) + + +### Connect to ElephantSQL-hosted PostgreSQL +conn = psycopg2.connect(dbname=DB_NAME, user=DB_USER, + password=DB_PASSWORD, host=DB_HOST) + +### A "cursor", a structure to iterate over db records to perform queries +cur = conn.cursor() + +query = """ +CREATE TABLE IF NOT EXISTS test_table ( + id SERIAL PRIMARY KEY, + name varchar(40) NOT NULL, + data JSONB +); +""" + + +### An example query +cur.execute('SELECT * from test_table;') + + +### Note - nothing happened yet! We need to actually *fetch* from the cursor +result = cur.fetchone() +print(result) + + +### insertion query +###insertion_query = """ +###INSERT INTO test_table (name, data) VALUES +###( +### 'A row name', +### null +###), +###( +### 'Another row, with JSON', +### '{"a": 1, "b": ["dog", "cat", 42], "c": true }'::JSONB +###); +###""" +### Insertion query 2 +###insertion_query = f"INSERT INTO {table_name} (name, data) VALUES %s" +###execute_values(cursor, insertion_query, [ +### ('A rowwwww', 'null'), +### ('Another row, with JSONNNNN', json.dumps(my_dict)), +### ('Third row', "3") +###]) + +my_dict={"a": 1, "b": ["dog", "cat", 42], "c": 'true' } +# Object oriented approach for insertion, must be in tuples. +insertion_query = "INSERT INTO test_table (name, data) VALUES %s" +execute_values(cur, insertion_query, [ + ('A rowwwww', 'null'), + ('Another row, with JSONNNNN', json.dumps(my_dict)), + ('Third row', "3") +]) + + +titanic_df = pd.read_csv('https://raw.githubusercontent.com/timrocar/DS-Unit-3-Sprint-2-SQL-and-Databases/master/module2-sql-for-analysis/titanic.csv') + + +tuple_list = [] +tuple_list = [list(row) for row in titanic_df.itertuples(index=False)] + + +CREATE_TABLE = """ + DROP TABLE IF EXISTS titanic_passengers; + CREATE TABLE titanic_passengers ( + Survived int, + Pclass int, + Name varchar(100), + Sex varchar(100), + Age float, + Siblings_Spouses_Aboard int, + Parents_Children_Aboard int, + Fare float + );""" + + +cur.execute(CREATE_TABLE) + + +insertion_query = """ +INSERT INTO titanic_passengers ( + Survived, Pclass, Name, Sex, + Age, Siblings_Spouses_Aboard, + Parents_Children_Aboard, Fare) VALUES %s +""" +execute_values(cur, insertion_query, tuple_list) + + + +conn.commit() + +cur.close() +conn.close() \ No newline at end of file diff --git a/module2-sql-for-analysis/mod2_rpg_queries.py b/module2-sql-for-analysis/mod2_rpg_queries.py new file mode 100644 index 00000000..d921bab7 --- /dev/null +++ b/module2-sql-for-analysis/mod2_rpg_queries.py @@ -0,0 +1,40 @@ +import os +from dotenv import load_dotenv +import psycopg2 +import sqlite3 +from psycopg2.extras import execute_values + + +#conn = sqlite3.connect('rpg_db.sqlite3') +#curs = conn.cursor() +#armory_query = 'SELECT * FROM armory_item;' +#curs.execute(armory_query) +#armory_tuples = curs.fetchone() + + + +load_dotenv() #loads contents of the new.env file into the script's env + +DB_NAME = os.getenv("DB_NAME") +DB_USER = os.getenv("DB_USER") +DB_PASSWORD = os.getenv("DB_PASSWORD") +DB_HOST = os.getenv("DB_HOST") + +print(DB_NAME, DB_USER, DB_PASSWORD, DB_HOST) + + +### Connect to ElephantSQL-hosted PostgreSQL +conn = psycopg2.connect(dbname=DB_NAME, user=DB_USER, + password=DB_PASSWORD, host=DB_HOST) + +### A "cursor", a structure to iterate over db records to perform queries +curs = conn.cursor() + + + + +#saving transactions +conn.commit() + +curs.close() +conn.close() \ No newline at end of file diff --git a/module3-nosql-and-document-oriented-databases/mongodb_playground.py b/module3-nosql-and-document-oriented-databases/mongodb_playground.py new file mode 100644 index 00000000..c73ce56e --- /dev/null +++ b/module3-nosql-and-document-oriented-databases/mongodb_playground.py @@ -0,0 +1,63 @@ +import pymongo +import dns + +client = pymongo.MongoClient("mongodb+srv://timrocar:AmI9EUqOUMcUTEdb@cluster0.suem0.mongodb.net/?retryWrites=true&w=majority") +db = client.test + + +db.test.insert_one({'x':1}) + + +db.test.count_documents({'x':1}) + + +print(db.test.find({'x':1})) + + +max_doc = { + 'food': 'rhino', + 'color': 'orange', + 'number': 7 +} + + +jim_doc = { + 'animal': 'rhino', + 'color': 'orange', + 'cities': ['New York', 'Chicago'] +} + + +all_docs = [jim_doc, max_doc] + + +db.test.insert_many(all_docs) + + +list(db.test.find()) + + +db.test.insert_one({ + 'food': 'cookies', + 'color': 'orange' +}) + + +list(db.test.find({'color': 'orange'})) + + +more_docs = [] +for i in range(10): + doc = {'even': i%2 ==0} + doc = + + + +df.test.insert_many(more_docs) + + +list(db.test.find({'even': True, 'value': 0})) + +list(db.test.find({'even': True, 'value': 1})) + +list(db.test.find({'even': True})) \ No newline at end of file