diff --git a/Makefile b/Makefile index 4a3804b..271e575 100644 --- a/Makefile +++ b/Makefile @@ -8,9 +8,6 @@ LAMBDA_NAME := check-garbage-day LAMBDA_HANDLER := refusereminder LAMBDA_FREQ := 12 hours -BUILD_CONTAINER_NAME := mke-trash-pickup_libs -BUILD_CONTAINER_IMAGE := amazonlinux:latest - venv: virtualenv -p python$(PYTHON_VERSION) $(VENV_DIR) @@ -19,34 +16,13 @@ venv: deps : venv $(VENV_DIR)/bin/pip$(PIP_VERSION) install -e . -site-packages : - @docker rm -f $(BUILD_CONTAINER_NAME) >&/dev/null || true - @docker run \ - -id \ - -v $(shell pwd):/code:ro \ - --name $(BUILD_CONTAINER_NAME) \ - $(BUILD_CONTAINER_IMAGE) - @docker exec -it $(BUILD_CONTAINER_NAME) yum install -y \ - gcc \ - libxml2-devel \ - libxslt-devel \ - python27 \ - python27-devel \ - python27-pip - @docker exec -it $(BUILD_CONTAINER_NAME) pip install /code - @docker cp \ - $(BUILD_CONTAINER_NAME):/usr/local/lib64/python2.7/$@ \ - ./$@-64 - @docker cp \ - $(BUILD_CONTAINER_NAME):/usr/local/lib/python2.7/$@ \ - ./$@ - @docker rm -f $(BUILD_CONTAINER_NAME) +.PHONY : version +version : venv + @echo "import mkerefuse; print(mkerefuse.__version__)" | $(VENV_DIR)/bin/python .PHONY : ldist ldist : site-packages zip -r $(LDIST_ZIP) $(LAMBDA_HANDLER).py mkerefuse -x *.pyc - cd site-packages-64 && zip -r $(LDIST_ZIP) * - cd site-packages && zip -r $(LDIST_ZIP) * .PHONY : s3-bucket s3-bucket : diff --git a/README.md b/README.md index acc4f37..4a8dc1e 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,6 @@ I'm lazy. ## Usage ### CloudFormation -**Important:** To build the [lxml](http://lxml.de/) library for deployment -to AWS Lambda, a Docker container will be employed during the `make ldist` -target. - ```sh # Create the S3 bucket, build the code, deploy to S3 make s3-bucket ldist s3-deploy @@ -27,6 +23,7 @@ make cloud \ **Other Variables** | Name | Default | Description | +| ---- | ------- | ----------- | | `STACK_NAME` | `mke-trash-pickup` | CloudFormation stack name | | `DEPLOY_BUCKET` | `mke-trash-pickup-12241` | S3 bucket for .zip deployment (Must be changed) | | `LAMBDA_FREQ` | `12 hours` | How often the the scheduled event will check for changes | @@ -40,30 +37,55 @@ $ mkerefusecheck \ --direction S \ --street 27th \ --street-type st -2016-04-14 20:23:19 - mke-refuse - DEBUG - Parsing arguments -2016-04-14 20:23:19 - mke-refuse - DEBUG - Composing query address -2016-04-14 20:23:19 - mke-refuse - INFO - Executing query... -2016-04-14 20:23:19 - requests.packages.urllib3.connectionpool - INFO - Starting new HTTP connection (1): mpw.milwaukee.gov -2016-04-14 20:23:19 - requests.packages.urllib3.connectionpool - DEBUG - "POST /services/garbage_day HTTP/1.1" 200 None -2016-04-14 20:23:19 - RefusePickup - INFO - Reading through 14152 bytes for 6 properties... -2016-04-14 20:23:19 - RefusePickup - DEBUG - Searching for 'next_pickup_recycle_after': //*[@id="nConf"]/strong[4] -2016-04-14 20:23:19 - RefusePickup - DEBUG - Searching for 'route_recyle': //*[@id="nConf"]/strong[3] -2016-04-14 20:23:19 - RefusePickup - DEBUG - Searching for 'next_pickup_recycle_before': //*[@id="nConf"]/strong[5] -2016-04-14 20:23:19 - RefusePickup - DEBUG - Searching for 'success_msg': //*[@id="nConf"]/h1 -2016-04-14 20:23:19 - RefusePickup - DEBUG - Searching for 'route_garbage': //*[@id="nConf"]/strong[1] -2016-04-14 20:23:19 - RefusePickup - DEBUG - Searching for 'next_pickup_garbage': //*[@id="nConf"]/strong[2] -2016-04-14 20:23:19 - mke-refuse - INFO - Query returned +2016-12-29 12:50:08 - mke-refuse - DEBUG - Parsing arguments +2016-12-29 12:50:08 - mke-refuse - DEBUG - Composing query address +2016-12-29 12:50:08 - mke-refuse - INFO - Executing query... +2016-12-29 12:50:08 - requests.packages.urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): mpw.milwaukee.gov +2016-12-29 12:50:08 - requests.packages.urllib3.connectionpool - DEBUG - http://mpw.milwaukee.gov:80 "POST /services/garbage_day HTTP/1.1" 200 None +2016-12-29 12:50:08 - RefusePickup - DEBUG - Parsing 13813 bytes of HTML +2016-12-29 12:50:08 - RefusePickup - DEBUG - Searching for 'next_pickup_garbage' with 'The next garbage collection pickup for this location is: (?P[^<]+)' +2016-12-29 12:50:08 - RefusePickup - DEBUG - Searching for 'route_garbage' with 'garbage pickup route for this location is (?P[^<]+)' +2016-12-29 12:50:08 - RefusePickup - DEBUG - Searching for 'next_pickup_recycle_before' with 'The next estimated pickup time is between (?P[^<]+) and (?P[^<]+)' +2016-12-29 12:50:08 - RefusePickup - DEBUG - Searching for 'route_recycle' with 'recycling pickup route for this location is (?P[^<]+)' +2016-12-29 12:50:08 - RefusePickup - DEBUG - Searching for 'next_pickup_recycle_after' with 'The next estimated pickup time is between (?P[^<]+) and (?P[^<]+)' +2016-12-29 12:50:08 - mke-refuse - INFO - Query returned { - "next_pickup_recycle_after": "TUESDAY MAY 3, 2016", - "route_recyle": "SR01-3-07", - "route_garbage": "SP1-3A", - "success_msg": "2727 S 27TH ST - Address located!", - "next_pickup_recycle_before": "May 9th - May 13th", - "next_pickup_garbage": "TUESDAY APRIL 19, 2016" + "route_recycle": "NR1-2-3", + "next_pickup_garbage": "THURSDAY JANUARY 5, 2017", + "route_garbage": "NP1-2A", + "next_pickup_recycle_before": "THURSDAY JANUARY 5, 2017", + "next_pickup_recycle_after": "WEDNESDAY JANUARY 4, 2017" } ``` ### Advanced + +```python +In [1]: from mkerefuse.refuse import RefuseQuery + ...: from mkerefuse.refuse import RefuseQueryAddress + ...: + ...: address = RefuseQueryAddress( + ...: house_number='2727', + ...: direction='S', + ...: street_name='27th', + ...: street_type='ST') + ...: + ...: pickup = RefuseQuery.Execute(address) + ...: + ...: print("Found garbage route: {}".format(pickup.route_garbage)) + ...: + ...: pickup.to_dict() + ...: + +Found garbage route: SP1-3A +Out[1]: +{'next_pickup_garbage': u'THURSDAY DECEMBER 29, 2016', + 'next_pickup_recycle_after': '', + 'next_pickup_recycle_before': '', + 'route_garbage': u'SP1-3A', + 'route_recycle': ''} +``` + *See [Usage.ipynb](Usage.ipynb)* ### Development Setup @@ -85,6 +107,10 @@ venv/bin/jupyter notebook - **Submit:** Submit ### Form Response *(XPaths)* +**Note:** These xpaths are still listed for historical reasons since parsing +is now done via regex due to issues like +[#5](https://github.com/tomislacker/python-mke-trash-pickup/issues/5). + - **Success Or Note:** `//*[@id="nConf"]/h1` - **Winter Pickup Route:** `//*[@id="nConf"]/strong[1]` - **Next Garbage Pickup:** `//*[@id="nConf"]/strong[2]` @@ -103,5 +129,9 @@ curl \ ## References ### Building Libraries for Lambda +**Note:** These references are still listed for historical reasons since +parsing is now done via regex due to issues like +[#5](https://github.com/tomislacker/python-mke-trash-pickup/issues/5). + - [[azavea.com] Using Python's LXML in Amazon Lambda](https://www.azavea.com/blog/2016/06/27/using-python-lxml-amazon-lambda/) - [[stackoverflow.com] Use LXML on AWS Lambda](http://stackoverflow.com/questions/36387664/use-lxml-on-aws-lambda) diff --git a/Usage.ipynb b/Usage.ipynb deleted file mode 100644 index a657667..0000000 --- a/Usage.ipynb +++ /dev/null @@ -1,119 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Overview\n", - "This notebook will show you how to employe the `mkerefuse` module\n", - "to discover the upcoming garbage & recycle pickups." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from mkerefuse.refuse import RefuseQuery\n", - "from mkerefuse.refuse import RefuseQueryAddress" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define The Address" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "address = RefuseQueryAddress(\n", - " house_number=2727,\n", - " direction='S',\n", - " street_name='27th',\n", - " street_type='st')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Execute The Query" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "pickup = RefuseQuery.Execute(address)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Check Results" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"next_pickup_recycle_before\": \"SATURDAY MARCH 12, 2016\",\n", - " \"next_pickup_garbage\": \"MONDAY MARCH 14, 2016\",\n", - " \"route_recyle\": \"SR2-2-16\",\n", - " \"success_msg\": \"2727 S 27TH ST - Address located!\",\n", - " \"next_pickup_recycle_after\": \"FRIDAY MARCH 11, 2016\",\n", - " \"route_garbage\": \"SP1-3A\"\n", - "}\n" - ] - } - ], - "source": [ - "print(repr(pickup))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.3.5" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/mkerefuse/__main__.py b/mkerefuse/__main__.py index 054aa1f..0fb7678 100755 --- a/mkerefuse/__main__.py +++ b/mkerefuse/__main__.py @@ -14,12 +14,13 @@ -s, --street STRING Street Name (ex: '27th') -t, --street-type STRING Street Type -T, --types List all Street Types + + --html FILE Save the form output HTML for debug """ import logging import sys from docopt import docopt from mkerefuse import __version__ -from mkerefuse.util import LogProducer from mkerefuse.util import setup_logging setup_logging() @@ -50,7 +51,8 @@ # Execute the query log.info("Executing query...") -pickup = RefuseQuery.Execute(address) +pickup = RefuseQuery.Execute(address, + html_output=args['--html']) log.info("Query returned") # Show the results diff --git a/mkerefuse/refuse.py b/mkerefuse/refuse.py index de036e5..05daa9c 100644 --- a/mkerefuse/refuse.py +++ b/mkerefuse/refuse.py @@ -1,41 +1,86 @@ +import json +import logging +import re import requests -from .util import XPathObject +from .util import LogProducer -class RefusePickup(XPathObject): - """Defines attribute to XPath specification matching""" +class RefusePickup(LogProducer): + """Parses a refuse pickup response""" input_properties = { - 'success_msg': '//*[@id="nConf"]/h1', - 'route_garbage': '//*[@id="nConf"]/strong[1]', - 'next_pickup_garbage': '//*[@id="nConf"]/strong[2]', - 'route_recyle': '//*[@id="nConf"]/strong[3]', - 'next_pickup_recycle_after': '//*[@id="nConf"]/strong[4]', - 'next_pickup_recycle_before': '//*[@id="nConf"]/strong[5]', + 'route_garbage': r'garbage pickup route for this location is (?P[^<]+)', + 'next_pickup_garbage': r'The next garbage collection pickup for this location is: (?P[^<]+)', + 'route_recycle': r'recycling pickup route for this location is (?P[^<]+)', + 'next_pickup_recycle_after': r'The next estimated pickup time is between (?P[^<]+) and (?P[^<]+)', + 'next_pickup_recycle_before': r'The next estimated pickup time is between (?P[^<]+) and (?P[^<]+)', } - """Maps the key to an attr name & value to an XPath lookup""" + """Maps the key to an attr name & value to a regex search""" pickup_time = '0700' """Define what time the refuse must be outside by to make pickup time""" + @classmethod + def from_html(cls, html_contents): + log = logging.getLogger(cls.__name__) + + log.debug("Parsing {} bytes of HTML".format(len(html_contents))) + + inst = cls() + for attr_name, regex in cls.input_properties.items(): + log.debug("Searching for '{n}' with '{p}'".format( + n=attr_name, + p=regex + )) + pattern = re.compile(regex) + match = pattern.search(html_contents) + + try: + setattr(inst, attr_name, match.group('value')) + except AttributeError: + # No value was found, by default set an empty string + setattr(inst, attr_name, '') + + return inst + + def to_dict(self): + """ + Returns pickup information in a JSON blob + + :return: JSON blob of pickup data + :rtype: dict + """ + response_dict = {} + for key, value in self.input_properties.items(): + response_dict.update({ + key: getattr(self, key), + }) + return response_dict + + def __repr__(self): + return json.dumps( + self.to_dict(), + indent=4, + separators=(',', ': ')) + class RefuseQueryAddress(object): """Defines an address to query for refuse pickup scheduling""" STREET_TYPES = [ - 'AV', # Avenue - 'BL', # Boulevard - 'CR', # Circle - 'CT', # Court - 'DR', # Drive - 'LA', # Lane - 'PK', # Parkway - 'PL', # Place - 'RD', # Road - 'SQ', # Square - 'ST', # Street - 'TR', # Terrace - 'WY', # Way + 'AV', # Avenue + 'BL', # Boulevard + 'CR', # Circle + 'CT', # Court + 'DR', # Drive + 'LA', # Lane + 'PK', # Parkway + 'PL', # Place + 'RD', # Road + 'SQ', # Square + 'ST', # Street + 'TR', # Terrace + 'WY', # Way ] """Static list of address suffixes""" @@ -92,11 +137,13 @@ class RefuseQuery(object): """Class to parse XHTML response with""" @classmethod - def Execute(cls, refuse_address): + def Execute(cls, refuse_address, html_output=None): """Queries the form URL & processes the response :param refuse_address: Address to lookup :type refuse_address: RefuseQueryAddress + :param html_output: Path to file for debugging HTML output + :type html_output: None|str :return: Parsed response :rtype: mkerefuse.refuse.RefusePickup """ @@ -109,5 +156,10 @@ def Execute(cls, refuse_address): 'stype': refuse_address.street_type, 'Submit': 'Submit', }) - response_method = getattr(cls.parse_xpath, 'FromHTML') + + if html_output is not None: + with open(html_output, 'w') as ofile: + ofile.write(response.text) + + response_method = getattr(cls.parse_xpath, 'from_html') return response_method(response.text) diff --git a/mkerefuse/util.py b/mkerefuse/util.py index 3e4f1f2..eba5222 100644 --- a/mkerefuse/util.py +++ b/mkerefuse/util.py @@ -1,9 +1,6 @@ -import json import logging import logging.config import os.path -import yaml -from lxml import html DEFAULT_LOGGING_CONFIG = { @@ -26,6 +23,8 @@ def setup_logging( config_path = config_path if not config_override else config_override if os.path.exists(config_path): + import yaml + with open(config_path, "rt") as yaml_file: config = yaml.load(yaml_file.read()) @@ -45,54 +44,4 @@ def __init__(self, subname=None): if subname: logger_name += " ({})".format(subname) - self._log = logging.getLogger(logger_name) - - -class XPathObject(LogProducer): - """Helper for importing response [X]HTML into a class instance""" - - input_properties = {} - """Dict of keys (property names) and XPaths (to read vals from)""" - - @classmethod - def FromHTML(cls, html_contents): - log = logging.getLogger(cls.__name__) - inst = cls() - log.info("Reading through {b} bytes for {c} properties...".format( - b=len(html_contents), - c=len(cls.input_properties))) - - tree = html.fromstring(html_contents) - - for attr_name, xpath in cls.input_properties.items(): - log.debug("Searching for '{n}': {x}".format( - n=attr_name, - x=xpath)) - elements = tree.xpath(xpath) - - if not len(elements): - log.warn("Failed to find '{n}': {x}".format( - n=attr_name, - x=xpath)) - continue - - setattr( - inst, - attr_name, - elements[0].text) - - return inst - - def to_dict(self): - response_dict = {} - for key, value in self.input_properties.items(): - response_dict.update({ - key: getattr(self, key), - }) - return response_dict - - def __repr__(self): - return json.dumps( - self.to_dict(), - indent=4, - separators=(',', ': ')) + self._log = logging.getLogger(logger_name) \ No newline at end of file diff --git a/nose2.cfg b/nose2.cfg new file mode 100644 index 0000000..25fb599 --- /dev/null +++ b/nose2.cfg @@ -0,0 +1,5 @@ +[unittest] +plugins = nose2.plugins.attrib + nose2.plugins.layers + +code-directories = tests diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000..cafd3e0 --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1 @@ +nose2 diff --git a/tests/data/garbageday.html b/tests/data/garbageday.html new file mode 100644 index 0000000..1adedee --- /dev/null +++ b/tests/data/garbageday.html @@ -0,0 +1,244 @@ + + + + + + + + + + + + + + + + + + + + +Sanitation Collection Schedule + + + + + + + + +
+
+
+ City of Milwaukee + +
+
+

Official Website of the City of Milwaukee

+
+
+

CALL for Action (414) 286-CITY | Click for Action

+
+
+ + +
+ + + + +
+ +
+
+
+

+ + + +Sanitation Collection Schedule

+
+ +
+
+ +
+ +
+ +
+

1234 N 15th ST - Address located!

+

Next Scheduled Garbage Pickup:

+The winter garbage pickup route for this location is NA1-2A.

Today is a garbage collection date for this location!

The next garbage collection pickup for this location is: TUESDAY DECEMBER 27, 2016

Click here to see your complete garbage collection schedule.

+

Next Scheduled Recycling Pickup:

+Because of weather events and Holidays, the recycling collection dates are not guaranteed in the winter.

The winter recycling pickup route for this location is NR1-2-3.

The next estimated pickup time is between TUESDAY JANUARY 3, 2017 and WEDNESDAY JANUARY 4, 2017.

+ +
Please make special note of the street and address. Every attempt has been made to match the address to a street name located in the City of Milwaukee database. If the street displayed above is not correct, please return to the request form and confirm street, house number, street direction or street suffix.  If that still does not provide the correct street, you may need to try another address, or an address on a nearby cross-street.
+

+
+ +
+
+ +
+
+ +
+
+ +
+
+
+ +
+
+
+ + +
+ + diff --git a/tests/data/nongarbageday-recycle_unknown.html b/tests/data/nongarbageday-recycle_unknown.html new file mode 100644 index 0000000..0614b78 --- /dev/null +++ b/tests/data/nongarbageday-recycle_unknown.html @@ -0,0 +1,248 @@ + + + + + + + + + + + + + + + + + + + + +Sanitation Collection Schedule + + + + + + + + +
+
+
+ City of Milwaukee + +
+
+

Official Website of the City of Milwaukee

+
+
+

CALL for Action (414) 286-CITY | Click for Action

+
+
+ + +
+ + + + +
+ +
+
+
+

+ + + +Sanitation Collection Schedule

+
+ +
+
+ +
+ +
+ +
+ + + + +

2727 S 27TH ST - Address located!

+

Next Scheduled Garbage Pickup:

+The winter garbage pickup route for this location is SP1-3A.

+The next garbage collection pickup for this location is: THURSDAY DECEMBER 29, 2016

+Click here to see your complete garbage collection schedule.

+

Next Scheduled Recycling Pickup:

+
Please make special note of the street and address. Every attempt has been made to match the address to a street name located in the City of Milwaukee database. If the street displayed above is not correct, please return to the request form and confirm street, house number, street direction or street suffix.  If that still does not provide the correct street, you may need to try another address, or an address on a nearby cross-street.
+

+
+ +
+
+ +
+
+ +
+
+ +
+
+
+ +
+
+
+ + +
+ + \ No newline at end of file diff --git a/tests/test_parsing.py b/tests/test_parsing.py new file mode 100644 index 0000000..87915e6 --- /dev/null +++ b/tests/test_parsing.py @@ -0,0 +1,86 @@ +from __future__ import print_function +from nose2.tools import such + +from mkerefuse.refuse import RefusePickup + + +def setup_parser(html_path): + """ + Reads test HTML & instantiates a new `RefusePickup` + + :param html_path: Path to HTML file with a test response + :type html_path: str + :return: RefusePickup instance + :rtype: mkerefuse.RefusePickup + """ + with open(html_path, 'r') as infile: + return RefusePickup.from_html(infile.read()) + + +with such.A('successfully fetched response') as it: + with it.having('garbage day'): + @it.has_setup + def setup(): + it.parser = setup_parser('tests/data/garbageday.html') + + @it.should('have the correct garbage route') + def test(case): + case.assertEqual( + it.parser.route_garbage, + 'NA1-2A') + + @it.should('have the correct recycle route') + def test(case): + case.assertEqual( + it.parser.route_recycle, + 'NR1-2-3') + + @it.should('have the correct next garbage pickup') + def test(case): + case.assertEqual( + it.parser.next_pickup_garbage, + 'TUESDAY DECEMBER 27, 2016') + + @it.should('have the correct next recycle pickup range') + def test(case): + case.assertEqual( + it.parser.next_pickup_recycle_after, + 'TUESDAY JANUARY 3, 2017') + case.assertEqual( + it.parser.next_pickup_recycle_before, + 'WEDNESDAY JANUARY 4, 2017') + + with it.having('non-garbage day, unknown recycling'): + @it.has_setup + def setup(): + it.parser = setup_parser( + 'tests/data/nongarbageday-recycle_unknown.html') + + @it.should('have the correct garbage route') + def test(case): + case.assertEqual( + it.parser.route_garbage, + 'SP1-3A') + + @it.should('have the correct recycle route') + def test(case): + case.assertEqual( + it.parser.route_recycle, + '') + + @it.should('have the correct next garbage pickup') + def test(case): + case.assertEqual( + it.parser.next_pickup_garbage, + 'THURSDAY DECEMBER 29, 2016') + + @it.should('have the correct next recycle pickup range') + def test(case): + case.assertEqual( + it.parser.next_pickup_recycle_after, + '') + case.assertEqual( + it.parser.next_pickup_recycle_before, + '') + +it.createTests(globals()) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..040f216 --- /dev/null +++ b/tox.ini @@ -0,0 +1,8 @@ +[tox] +envlist=py27,py33,py34 +skipsdist = True + +[testenv] +deps = -r{toxinidir}/requirements-test.txt +commands = {envbindir}/python {toxinidir}/setup.py develop --quiet + python -m nose2 --log-capture -vv []