Skip to content
Open
Changes from 1 commit
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
bb5b8e3
explicando sobre pip install request
Jen456 Aug 23, 2025
9b791ef
Update README.md
anmerinoto Aug 23, 2025
6520cbe
Descripción del código
rebe1603 Aug 23, 2025
e496c06
agregando la explicacion de beautifulsoup4
julizzazeime456-afk Aug 23, 2025
2054114
Merge branch 'main' of https://github.com/anmerinoto/Python-Web-Scrap…
julizzazeime456-afk Aug 23, 2025
8cd4556
Merge pull request #1 from anmerinoto/JENNY_ALAVA
anmerinoto Aug 23, 2025
a3a0865
Merge branch 'main' into Saskia_Guerrrero
anmerinoto Aug 23, 2025
490c5f7
Merge pull request #2 from anmerinoto/Saskia_Guerrrero
anmerinoto Aug 23, 2025
7b7f34e
Revert "Descripción del código"
anmerinoto Aug 23, 2025
5645903
Merge pull request #3 from anmerinoto/revert-2-Saskia_Guerrrero
anmerinoto Aug 23, 2025
f153747
agregando la explicacion sobre la libreria time
julizzazeime456-afk Aug 23, 2025
e5cf9a1
Update README.md
anmerinoto Aug 23, 2025
bf3cd02
Traduccion de Python-Web-Scrapping
rebe1603 Aug 23, 2025
34ea12c
Resolucion dep portafolio
anmerinoto Aug 23, 2025
edd0806
Actualizar README.md
anmerinoto Aug 24, 2025
4434ccb
Actualizar README.md
anmerinoto Aug 24, 2025
3468d24
resolviendo conflictos
julizzazeime456-afk Aug 24, 2025
f03e289
Merge pull request #4 from anmerinoto/JENNY_ALAVA
anmerinoto Aug 24, 2025
6db99c7
Upgrade Files
anmerinoto Aug 24, 2025
6647386
Delete solutions/ILGA_Senate_Scraper.ipynb
anmerinoto Aug 24, 2025
b43919c
Delete solutions/debug_list.html
anmerinoto Aug 24, 2025
9633f04
Update 02_web_scraping.ipynb
anmerinoto Aug 24, 2025
05bc11e
Merge branch 'main' of https://github.com/anmerinoto/Python-Web-Scraping
anmerinoto Aug 24, 2025
03a5a5d
Merge pull request #5 from anmerinoto/main
anmerinoto Aug 24, 2025
aed5f90
Merge pull request #6 from anmerinoto/main
anmerinoto Aug 24, 2025
4d764f6
Upgrade rutina
anmerinoto Aug 24, 2025
29e23aa
Merge pull request #7 from anmerinoto/main
anmerinoto Aug 24, 2025
d9351f7
Merge pull request #8 from anmerinoto/main
anmerinoto Aug 24, 2025
daa2980
Merge pull request #9 from anmerinoto/JENNY_ALAVA
anmerinoto Aug 24, 2025
4347d8a
Merge pull request #10 from anmerinoto/Saskia_Guerrrero
anmerinoto Aug 24, 2025
4af9922
agrego traduccionn del paso 1
julizzazeime456-afk Aug 24, 2025
ac1f318
Merge pull request #11 from anmerinoto/main
anmerinoto Aug 25, 2025
0b6330f
Merge pull request #12 from anmerinoto/Saskia_Guerrrero
anmerinoto Aug 25, 2025
da766b4
Merge pull request #13 from anmerinoto/JENNY_ALAVA
anmerinoto Aug 25, 2025
a9846a4
Merge pull request #14 from anmerinoto/main
anmerinoto Aug 25, 2025
5d4965e
Merge pull request #15 from anmerinoto/main
anmerinoto Aug 25, 2025
69d44a3
Merge pull request #16 from anmerinoto/JENNY_ALAVA
anmerinoto Aug 25, 2025
65dcdc8
Merge pull request #17 from anmerinoto/main
anmerinoto Aug 25, 2025
032ac87
Merge pull request #18 from anmerinoto/JENNY_ALAVA
anmerinoto Aug 25, 2025
54e9da7
Merge pull request #19 from anmerinoto/Saskia_Guerrrero
anmerinoto Aug 25, 2025
b3f45a9
Traduccion en español web_scrapping
rebe1603 Aug 26, 2025
57b46d8
Merge pull request #21 from anmerinoto/main
anmerinoto Aug 26, 2025
ebc8fb1
Merge pull request #22 from anmerinoto/Saskia_Guerrrero
anmerinoto Aug 26, 2025
61a3d34
Merge branch 'main' into Saskia_Guerrero
rebe1603 Aug 26, 2025
cde9bfb
Merge pull request #20 from anmerinoto/Saskia_Guerrero
rebe1603 Aug 26, 2025
5701540
Arreglado notebook corrupto por conflictos
rebe1603 Aug 26, 2025
57db881
realizado el desafio de la parte 2
julizzazeime456-afk Aug 26, 2025
822040a
Jenny: reparando conflictos
julizzazeime456-afk Aug 26, 2025
2d90286
Jenny: analisis de la asamblea general de illinois
julizzazeime456-afk Aug 26, 2025
2930566
Merge pull request #23 from anmerinoto/main
anmerinoto Aug 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 15 additions & 14 deletions lessons/02_web_scraping.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install requests"
"%pip install requests # Instala la librería requests, que permite hacer peticiones HTTP, con ella puedes conectarte a páginas web y obtener su contenido."
]
},
{
Expand All @@ -60,7 +60,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install beautifulsoup4"
"%pip install beautifulsoup4 # Instala una librería para parsear HTML y XML, sirve para extraer información específica de una página web."
]
},
{
Expand All @@ -76,7 +76,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install lxml"
"%pip install lxml # Parsers rápido y eficiente para manejar HTML y XML."
]
},
{
Expand All @@ -87,7 +87,7 @@
},
"outputs": [],
"source": [
"# Import required libraries\n",
"# Importa las librerías necesarias para ejecutar el código.\n",
"from bs4 import BeautifulSoup\n",
"from datetime import datetime\n",
"import requests\n",
Expand Down Expand Up @@ -131,11 +131,11 @@
},
"outputs": [],
"source": [
"# Make a GET request\n",
"# Hace una solicitud HTTP de tipo GET a la página web.\n",
"req = requests.get('http://www.ilga.gov/senate/default.asp')\n",
"# Read the content of the server’s response\n",
"# Se obtiene el contenido de la respuesta del servidor en formato de texto.\n",
"src = req.text\n",
"# View some output\n",
"# Se imprime solo los primeros 1000 caracteres del contenido.\n",
"print(src[:1000])"
]
},
Expand All @@ -156,9 +156,9 @@
"metadata": {},
"outputs": [],
"source": [
"# Parse the response into an HTML tree\n",
"# Se crea un objeto BeautifulSoup a partir de la variable src (que contiene el HTML obtenido con requests).\n",
"soup = BeautifulSoup(src, 'lxml')\n",
"# Take a look\n",
"# Imprime de manera identada gracias a prettify los primeros 1000 caracteres.\n",
"print(soup.prettify()[:1000])"
]
},
Expand Down Expand Up @@ -194,9 +194,9 @@
"metadata": {},
"outputs": [],
"source": [
"# Find all elements with a certain tag\n",
"# Usa soup.find_all(\"a\") para buscar todos los elementos <a> del documento HTML, a hace referencia a los hipervínculos.\n",
"a_tags = soup.find_all(\"a\")\n",
"print(a_tags[:10])"
"print(a_tags[:10])#Imprime los primeros 10 elementos de la lista."
]
},
{
Expand All @@ -216,6 +216,7 @@
},
"outputs": [],
"source": [
"#Obtener todos los elementos <a> de la página y se imprime el primer enlace de cad lista.\n",
"a_tags = soup.find_all(\"a\")\n",
"a_tags_alt = soup(\"a\")\n",
"print(a_tags[0])\n",
Expand All @@ -235,7 +236,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(len(a_tags))"
"print(len(a_tags)) #Imprime el total de hipervínculos o enlaces que se encontraron en la página."
]
},
{
Expand All @@ -257,7 +258,7 @@
},
"outputs": [],
"source": [
"# Get only the 'a' tags in 'sidemenu' class\n",
"# Busca solo las etiquetas <a> que tengan la clase \"sidemenu\", y mestra los primeros 5 enlaces con la clase sidemenu.\n",
"side_menus = soup(\"a\", class_=\"sidemenu\")\n",
"side_menus[:5]"
]
Expand All @@ -279,7 +280,7 @@
},
"outputs": [],
"source": [
"# Get elements with \"a.sidemenu\" CSS Selector.\n",
"# Se usa soup.select() para buscar elementos con sintaxis CSS y muestra los primeros 5 enlaces con clase sidemenu.\n",
"selected = soup.select(\"a.sidemenu\")\n",
"selected[:5]"
]
Expand Down