diff --git a/Makefile b/Makefile
index 4a3804b..271e575 100644
--- a/Makefile
+++ b/Makefile
@@ -8,9 +8,6 @@ LAMBDA_NAME := check-garbage-day
LAMBDA_HANDLER := refusereminder
LAMBDA_FREQ := 12 hours
-BUILD_CONTAINER_NAME := mke-trash-pickup_libs
-BUILD_CONTAINER_IMAGE := amazonlinux:latest
-
venv:
virtualenv -p python$(PYTHON_VERSION) $(VENV_DIR)
@@ -19,34 +16,13 @@ venv:
deps : venv
$(VENV_DIR)/bin/pip$(PIP_VERSION) install -e .
-site-packages :
- @docker rm -f $(BUILD_CONTAINER_NAME) >&/dev/null || true
- @docker run \
- -id \
- -v $(shell pwd):/code:ro \
- --name $(BUILD_CONTAINER_NAME) \
- $(BUILD_CONTAINER_IMAGE)
- @docker exec -it $(BUILD_CONTAINER_NAME) yum install -y \
- gcc \
- libxml2-devel \
- libxslt-devel \
- python27 \
- python27-devel \
- python27-pip
- @docker exec -it $(BUILD_CONTAINER_NAME) pip install /code
- @docker cp \
- $(BUILD_CONTAINER_NAME):/usr/local/lib64/python2.7/$@ \
- ./$@-64
- @docker cp \
- $(BUILD_CONTAINER_NAME):/usr/local/lib/python2.7/$@ \
- ./$@
- @docker rm -f $(BUILD_CONTAINER_NAME)
+.PHONY : version
+version : venv
+ @echo "import mkerefuse; print(mkerefuse.__version__)" | $(VENV_DIR)/bin/python
.PHONY : ldist
ldist : site-packages
zip -r $(LDIST_ZIP) $(LAMBDA_HANDLER).py mkerefuse -x *.pyc
- cd site-packages-64 && zip -r $(LDIST_ZIP) *
- cd site-packages && zip -r $(LDIST_ZIP) *
.PHONY : s3-bucket
s3-bucket :
diff --git a/README.md b/README.md
index acc4f37..4a8dc1e 100644
--- a/README.md
+++ b/README.md
@@ -5,10 +5,6 @@ I'm lazy.
## Usage
### CloudFormation
-**Important:** To build the [lxml](http://lxml.de/) library for deployment
-to AWS Lambda, a Docker container will be employed during the `make ldist`
-target.
-
```sh
# Create the S3 bucket, build the code, deploy to S3
make s3-bucket ldist s3-deploy
@@ -27,6 +23,7 @@ make cloud \
**Other Variables**
| Name | Default | Description |
+| ---- | ------- | ----------- |
| `STACK_NAME` | `mke-trash-pickup` | CloudFormation stack name |
| `DEPLOY_BUCKET` | `mke-trash-pickup-12241` | S3 bucket for .zip deployment (Must be changed) |
| `LAMBDA_FREQ` | `12 hours` | How often the the scheduled event will check for changes |
@@ -40,30 +37,55 @@ $ mkerefusecheck \
--direction S \
--street 27th \
--street-type st
-2016-04-14 20:23:19 - mke-refuse - DEBUG - Parsing arguments
-2016-04-14 20:23:19 - mke-refuse - DEBUG - Composing query address
-2016-04-14 20:23:19 - mke-refuse - INFO - Executing query...
-2016-04-14 20:23:19 - requests.packages.urllib3.connectionpool - INFO - Starting new HTTP connection (1): mpw.milwaukee.gov
-2016-04-14 20:23:19 - requests.packages.urllib3.connectionpool - DEBUG - "POST /services/garbage_day HTTP/1.1" 200 None
-2016-04-14 20:23:19 - RefusePickup - INFO - Reading through 14152 bytes for 6 properties...
-2016-04-14 20:23:19 - RefusePickup - DEBUG - Searching for 'next_pickup_recycle_after': //*[@id="nConf"]/strong[4]
-2016-04-14 20:23:19 - RefusePickup - DEBUG - Searching for 'route_recyle': //*[@id="nConf"]/strong[3]
-2016-04-14 20:23:19 - RefusePickup - DEBUG - Searching for 'next_pickup_recycle_before': //*[@id="nConf"]/strong[5]
-2016-04-14 20:23:19 - RefusePickup - DEBUG - Searching for 'success_msg': //*[@id="nConf"]/h1
-2016-04-14 20:23:19 - RefusePickup - DEBUG - Searching for 'route_garbage': //*[@id="nConf"]/strong[1]
-2016-04-14 20:23:19 - RefusePickup - DEBUG - Searching for 'next_pickup_garbage': //*[@id="nConf"]/strong[2]
-2016-04-14 20:23:19 - mke-refuse - INFO - Query returned
+2016-12-29 12:50:08 - mke-refuse - DEBUG - Parsing arguments
+2016-12-29 12:50:08 - mke-refuse - DEBUG - Composing query address
+2016-12-29 12:50:08 - mke-refuse - INFO - Executing query...
+2016-12-29 12:50:08 - requests.packages.urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): mpw.milwaukee.gov
+2016-12-29 12:50:08 - requests.packages.urllib3.connectionpool - DEBUG - http://mpw.milwaukee.gov:80 "POST /services/garbage_day HTTP/1.1" 200 None
+2016-12-29 12:50:08 - RefusePickup - DEBUG - Parsing 13813 bytes of HTML
+2016-12-29 12:50:08 - RefusePickup - DEBUG - Searching for 'next_pickup_garbage' with 'The next garbage collection pickup for this location is: (?P[^<]+)'
+2016-12-29 12:50:08 - RefusePickup - DEBUG - Searching for 'route_garbage' with 'garbage pickup route for this location is (?P[^<]+)'
+2016-12-29 12:50:08 - RefusePickup - DEBUG - Searching for 'next_pickup_recycle_before' with 'The next estimated pickup time is between (?P[^<]+) and (?P[^<]+)'
+2016-12-29 12:50:08 - RefusePickup - DEBUG - Searching for 'route_recycle' with 'recycling pickup route for this location is (?P[^<]+)'
+2016-12-29 12:50:08 - RefusePickup - DEBUG - Searching for 'next_pickup_recycle_after' with 'The next estimated pickup time is between (?P[^<]+) and (?P[^<]+)'
+2016-12-29 12:50:08 - mke-refuse - INFO - Query returned
{
- "next_pickup_recycle_after": "TUESDAY MAY 3, 2016",
- "route_recyle": "SR01-3-07",
- "route_garbage": "SP1-3A",
- "success_msg": "2727 S 27TH ST - Address located!",
- "next_pickup_recycle_before": "May 9th - May 13th",
- "next_pickup_garbage": "TUESDAY APRIL 19, 2016"
+ "route_recycle": "NR1-2-3",
+ "next_pickup_garbage": "THURSDAY JANUARY 5, 2017",
+ "route_garbage": "NP1-2A",
+ "next_pickup_recycle_before": "THURSDAY JANUARY 5, 2017",
+ "next_pickup_recycle_after": "WEDNESDAY JANUARY 4, 2017"
}
```
### Advanced
+
+```python
+In [1]: from mkerefuse.refuse import RefuseQuery
+ ...: from mkerefuse.refuse import RefuseQueryAddress
+ ...:
+ ...: address = RefuseQueryAddress(
+ ...: house_number='2727',
+ ...: direction='S',
+ ...: street_name='27th',
+ ...: street_type='ST')
+ ...:
+ ...: pickup = RefuseQuery.Execute(address)
+ ...:
+ ...: print("Found garbage route: {}".format(pickup.route_garbage))
+ ...:
+ ...: pickup.to_dict()
+ ...:
+
+Found garbage route: SP1-3A
+Out[1]:
+{'next_pickup_garbage': u'THURSDAY DECEMBER 29, 2016',
+ 'next_pickup_recycle_after': '',
+ 'next_pickup_recycle_before': '',
+ 'route_garbage': u'SP1-3A',
+ 'route_recycle': ''}
+```
+
*See [Usage.ipynb](Usage.ipynb)*
### Development Setup
@@ -85,6 +107,10 @@ venv/bin/jupyter notebook
- **Submit:** Submit
### Form Response *(XPaths)*
+**Note:** These xpaths are still listed for historical reasons since parsing
+is now done via regex due to issues like
+[#5](https://github.com/tomislacker/python-mke-trash-pickup/issues/5).
+
- **Success Or Note:** `//*[@id="nConf"]/h1`
- **Winter Pickup Route:** `//*[@id="nConf"]/strong[1]`
- **Next Garbage Pickup:** `//*[@id="nConf"]/strong[2]`
@@ -103,5 +129,9 @@ curl \
## References
### Building Libraries for Lambda
+**Note:** These references are still listed for historical reasons since
+parsing is now done via regex due to issues like
+[#5](https://github.com/tomislacker/python-mke-trash-pickup/issues/5).
+
- [[azavea.com] Using Python's LXML in Amazon Lambda](https://www.azavea.com/blog/2016/06/27/using-python-lxml-amazon-lambda/)
- [[stackoverflow.com] Use LXML on AWS Lambda](http://stackoverflow.com/questions/36387664/use-lxml-on-aws-lambda)
diff --git a/Usage.ipynb b/Usage.ipynb
deleted file mode 100644
index a657667..0000000
--- a/Usage.ipynb
+++ /dev/null
@@ -1,119 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Overview\n",
- "This notebook will show you how to employe the `mkerefuse` module\n",
- "to discover the upcoming garbage & recycle pickups."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "from mkerefuse.refuse import RefuseQuery\n",
- "from mkerefuse.refuse import RefuseQueryAddress"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Define The Address"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "address = RefuseQueryAddress(\n",
- " house_number=2727,\n",
- " direction='S',\n",
- " street_name='27th',\n",
- " street_type='st')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Execute The Query"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "pickup = RefuseQuery.Execute(address)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Check Results"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "{\n",
- " \"next_pickup_recycle_before\": \"SATURDAY MARCH 12, 2016\",\n",
- " \"next_pickup_garbage\": \"MONDAY MARCH 14, 2016\",\n",
- " \"route_recyle\": \"SR2-2-16\",\n",
- " \"success_msg\": \"2727 S 27TH ST - Address located!\",\n",
- " \"next_pickup_recycle_after\": \"FRIDAY MARCH 11, 2016\",\n",
- " \"route_garbage\": \"SP1-3A\"\n",
- "}\n"
- ]
- }
- ],
- "source": [
- "print(repr(pickup))"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.3.5"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
diff --git a/mkerefuse/__main__.py b/mkerefuse/__main__.py
index 054aa1f..0fb7678 100755
--- a/mkerefuse/__main__.py
+++ b/mkerefuse/__main__.py
@@ -14,12 +14,13 @@
-s, --street STRING Street Name (ex: '27th')
-t, --street-type STRING Street Type
-T, --types List all Street Types
+
+ --html FILE Save the form output HTML for debug
"""
import logging
import sys
from docopt import docopt
from mkerefuse import __version__
-from mkerefuse.util import LogProducer
from mkerefuse.util import setup_logging
setup_logging()
@@ -50,7 +51,8 @@
# Execute the query
log.info("Executing query...")
-pickup = RefuseQuery.Execute(address)
+pickup = RefuseQuery.Execute(address,
+ html_output=args['--html'])
log.info("Query returned")
# Show the results
diff --git a/mkerefuse/refuse.py b/mkerefuse/refuse.py
index de036e5..05daa9c 100644
--- a/mkerefuse/refuse.py
+++ b/mkerefuse/refuse.py
@@ -1,41 +1,86 @@
+import json
+import logging
+import re
import requests
-from .util import XPathObject
+from .util import LogProducer
-class RefusePickup(XPathObject):
- """Defines attribute to XPath specification matching"""
+class RefusePickup(LogProducer):
+ """Parses a refuse pickup response"""
input_properties = {
- 'success_msg': '//*[@id="nConf"]/h1',
- 'route_garbage': '//*[@id="nConf"]/strong[1]',
- 'next_pickup_garbage': '//*[@id="nConf"]/strong[2]',
- 'route_recyle': '//*[@id="nConf"]/strong[3]',
- 'next_pickup_recycle_after': '//*[@id="nConf"]/strong[4]',
- 'next_pickup_recycle_before': '//*[@id="nConf"]/strong[5]',
+ 'route_garbage': r'garbage pickup route for this location is (?P[^<]+)',
+ 'next_pickup_garbage': r'The next garbage collection pickup for this location is: (?P[^<]+)',
+ 'route_recycle': r'recycling pickup route for this location is (?P[^<]+)',
+ 'next_pickup_recycle_after': r'The next estimated pickup time is between (?P[^<]+) and (?P[^<]+)',
+ 'next_pickup_recycle_before': r'The next estimated pickup time is between (?P[^<]+) and (?P[^<]+)',
}
- """Maps the key to an attr name & value to an XPath lookup"""
+ """Maps the key to an attr name & value to a regex search"""
pickup_time = '0700'
"""Define what time the refuse must be outside by to make pickup time"""
+ @classmethod
+ def from_html(cls, html_contents):
+ log = logging.getLogger(cls.__name__)
+
+ log.debug("Parsing {} bytes of HTML".format(len(html_contents)))
+
+ inst = cls()
+ for attr_name, regex in cls.input_properties.items():
+ log.debug("Searching for '{n}' with '{p}'".format(
+ n=attr_name,
+ p=regex
+ ))
+ pattern = re.compile(regex)
+ match = pattern.search(html_contents)
+
+ try:
+ setattr(inst, attr_name, match.group('value'))
+ except AttributeError:
+ # No value was found, by default set an empty string
+ setattr(inst, attr_name, '')
+
+ return inst
+
+ def to_dict(self):
+ """
+ Returns pickup information in a JSON blob
+
+ :return: JSON blob of pickup data
+ :rtype: dict
+ """
+ response_dict = {}
+ for key, value in self.input_properties.items():
+ response_dict.update({
+ key: getattr(self, key),
+ })
+ return response_dict
+
+ def __repr__(self):
+ return json.dumps(
+ self.to_dict(),
+ indent=4,
+ separators=(',', ': '))
+
class RefuseQueryAddress(object):
"""Defines an address to query for refuse pickup scheduling"""
STREET_TYPES = [
- 'AV', # Avenue
- 'BL', # Boulevard
- 'CR', # Circle
- 'CT', # Court
- 'DR', # Drive
- 'LA', # Lane
- 'PK', # Parkway
- 'PL', # Place
- 'RD', # Road
- 'SQ', # Square
- 'ST', # Street
- 'TR', # Terrace
- 'WY', # Way
+ 'AV', # Avenue
+ 'BL', # Boulevard
+ 'CR', # Circle
+ 'CT', # Court
+ 'DR', # Drive
+ 'LA', # Lane
+ 'PK', # Parkway
+ 'PL', # Place
+ 'RD', # Road
+ 'SQ', # Square
+ 'ST', # Street
+ 'TR', # Terrace
+ 'WY', # Way
]
"""Static list of address suffixes"""
@@ -92,11 +137,13 @@ class RefuseQuery(object):
"""Class to parse XHTML response with"""
@classmethod
- def Execute(cls, refuse_address):
+ def Execute(cls, refuse_address, html_output=None):
"""Queries the form URL & processes the response
:param refuse_address: Address to lookup
:type refuse_address: RefuseQueryAddress
+ :param html_output: Path to file for debugging HTML output
+ :type html_output: None|str
:return: Parsed response
:rtype: mkerefuse.refuse.RefusePickup
"""
@@ -109,5 +156,10 @@ def Execute(cls, refuse_address):
'stype': refuse_address.street_type,
'Submit': 'Submit',
})
- response_method = getattr(cls.parse_xpath, 'FromHTML')
+
+ if html_output is not None:
+ with open(html_output, 'w') as ofile:
+ ofile.write(response.text)
+
+ response_method = getattr(cls.parse_xpath, 'from_html')
return response_method(response.text)
diff --git a/mkerefuse/util.py b/mkerefuse/util.py
index 3e4f1f2..eba5222 100644
--- a/mkerefuse/util.py
+++ b/mkerefuse/util.py
@@ -1,9 +1,6 @@
-import json
import logging
import logging.config
import os.path
-import yaml
-from lxml import html
DEFAULT_LOGGING_CONFIG = {
@@ -26,6 +23,8 @@ def setup_logging(
config_path = config_path if not config_override else config_override
if os.path.exists(config_path):
+ import yaml
+
with open(config_path, "rt") as yaml_file:
config = yaml.load(yaml_file.read())
@@ -45,54 +44,4 @@ def __init__(self, subname=None):
if subname:
logger_name += " ({})".format(subname)
- self._log = logging.getLogger(logger_name)
-
-
-class XPathObject(LogProducer):
- """Helper for importing response [X]HTML into a class instance"""
-
- input_properties = {}
- """Dict of keys (property names) and XPaths (to read vals from)"""
-
- @classmethod
- def FromHTML(cls, html_contents):
- log = logging.getLogger(cls.__name__)
- inst = cls()
- log.info("Reading through {b} bytes for {c} properties...".format(
- b=len(html_contents),
- c=len(cls.input_properties)))
-
- tree = html.fromstring(html_contents)
-
- for attr_name, xpath in cls.input_properties.items():
- log.debug("Searching for '{n}': {x}".format(
- n=attr_name,
- x=xpath))
- elements = tree.xpath(xpath)
-
- if not len(elements):
- log.warn("Failed to find '{n}': {x}".format(
- n=attr_name,
- x=xpath))
- continue
-
- setattr(
- inst,
- attr_name,
- elements[0].text)
-
- return inst
-
- def to_dict(self):
- response_dict = {}
- for key, value in self.input_properties.items():
- response_dict.update({
- key: getattr(self, key),
- })
- return response_dict
-
- def __repr__(self):
- return json.dumps(
- self.to_dict(),
- indent=4,
- separators=(',', ': '))
+ self._log = logging.getLogger(logger_name)
\ No newline at end of file
diff --git a/nose2.cfg b/nose2.cfg
new file mode 100644
index 0000000..25fb599
--- /dev/null
+++ b/nose2.cfg
@@ -0,0 +1,5 @@
+[unittest]
+plugins = nose2.plugins.attrib
+ nose2.plugins.layers
+
+code-directories = tests
diff --git a/requirements-test.txt b/requirements-test.txt
new file mode 100644
index 0000000..cafd3e0
--- /dev/null
+++ b/requirements-test.txt
@@ -0,0 +1 @@
+nose2
diff --git a/tests/data/garbageday.html b/tests/data/garbageday.html
new file mode 100644
index 0000000..1adedee
--- /dev/null
+++ b/tests/data/garbageday.html
@@ -0,0 +1,244 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Sanitation Collection Schedule
+
+
+
+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Sanitation Collection Schedule
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/data/nongarbageday-recycle_unknown.html b/tests/data/nongarbageday-recycle_unknown.html
new file mode 100644
index 0000000..0614b78
--- /dev/null
+++ b/tests/data/nongarbageday-recycle_unknown.html
@@ -0,0 +1,248 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Sanitation Collection Schedule
+
+
+
+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Sanitation Collection Schedule
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/test_parsing.py b/tests/test_parsing.py
new file mode 100644
index 0000000..87915e6
--- /dev/null
+++ b/tests/test_parsing.py
@@ -0,0 +1,86 @@
+from __future__ import print_function
+from nose2.tools import such
+
+from mkerefuse.refuse import RefusePickup
+
+
+def setup_parser(html_path):
+ """
+ Reads test HTML & instantiates a new `RefusePickup`
+
+ :param html_path: Path to HTML file with a test response
+ :type html_path: str
+ :return: RefusePickup instance
+ :rtype: mkerefuse.RefusePickup
+ """
+ with open(html_path, 'r') as infile:
+ return RefusePickup.from_html(infile.read())
+
+
+with such.A('successfully fetched response') as it:
+ with it.having('garbage day'):
+ @it.has_setup
+ def setup():
+ it.parser = setup_parser('tests/data/garbageday.html')
+
+ @it.should('have the correct garbage route')
+ def test(case):
+ case.assertEqual(
+ it.parser.route_garbage,
+ 'NA1-2A')
+
+ @it.should('have the correct recycle route')
+ def test(case):
+ case.assertEqual(
+ it.parser.route_recycle,
+ 'NR1-2-3')
+
+ @it.should('have the correct next garbage pickup')
+ def test(case):
+ case.assertEqual(
+ it.parser.next_pickup_garbage,
+ 'TUESDAY DECEMBER 27, 2016')
+
+ @it.should('have the correct next recycle pickup range')
+ def test(case):
+ case.assertEqual(
+ it.parser.next_pickup_recycle_after,
+ 'TUESDAY JANUARY 3, 2017')
+ case.assertEqual(
+ it.parser.next_pickup_recycle_before,
+ 'WEDNESDAY JANUARY 4, 2017')
+
+ with it.having('non-garbage day, unknown recycling'):
+ @it.has_setup
+ def setup():
+ it.parser = setup_parser(
+ 'tests/data/nongarbageday-recycle_unknown.html')
+
+ @it.should('have the correct garbage route')
+ def test(case):
+ case.assertEqual(
+ it.parser.route_garbage,
+ 'SP1-3A')
+
+ @it.should('have the correct recycle route')
+ def test(case):
+ case.assertEqual(
+ it.parser.route_recycle,
+ '')
+
+ @it.should('have the correct next garbage pickup')
+ def test(case):
+ case.assertEqual(
+ it.parser.next_pickup_garbage,
+ 'THURSDAY DECEMBER 29, 2016')
+
+ @it.should('have the correct next recycle pickup range')
+ def test(case):
+ case.assertEqual(
+ it.parser.next_pickup_recycle_after,
+ '')
+ case.assertEqual(
+ it.parser.next_pickup_recycle_before,
+ '')
+
+it.createTests(globals())
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..040f216
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,8 @@
+[tox]
+envlist=py27,py33,py34
+skipsdist = True
+
+[testenv]
+deps = -r{toxinidir}/requirements-test.txt
+commands = {envbindir}/python {toxinidir}/setup.py develop --quiet
+ python -m nose2 --log-capture -vv []