From 2e9db1f5c4df8d8acd05daa112fc1b27110de28a Mon Sep 17 00:00:00 2001
From: Julian Kahnert <mail@juliankahnert.de>
Date: Mon, 27 Aug 2018 09:39:11 +0200
Subject: [PATCH] Fix geizhals price parsing (#15990)

* fix geizhals price parsing

* Fix lint issue

* switch to the geizhals pypi package

* throttle updates

* update geizhals version

* initialize empty device

* minor changes to trigger another TravisCI test

* device => _device

* bump geizhals version
---
 homeassistant/components/sensor/geizhals.py | 109 ++++++--------------
 requirements_all.txt                        |   4 +-
 2 files changed, 35 insertions(+), 78 deletions(-)

diff --git a/homeassistant/components/sensor/geizhals.py b/homeassistant/components/sensor/geizhals.py
index 2c7325866ac..7d215fb6baf 100644
--- a/homeassistant/components/sensor/geizhals.py
+++ b/homeassistant/components/sensor/geizhals.py
@@ -13,15 +13,15 @@ from homeassistant.components.sensor import PLATFORM_SCHEMA
 import homeassistant.helpers.config_validation as cv
 from homeassistant.util import Throttle
 from homeassistant.helpers.entity import Entity
-from homeassistant.const import (CONF_DOMAIN, CONF_NAME)
+from homeassistant.const import CONF_NAME
 
-REQUIREMENTS = ['beautifulsoup4==4.6.3']
+REQUIREMENTS = ['geizhals==0.0.7']
 
 _LOGGER = logging.getLogger(__name__)
 
 CONF_DESCRIPTION = 'description'
 CONF_PRODUCT_ID = 'product_id'
-CONF_REGEX = 'regex'
+CONF_LOCALE = 'locale'
 
 ICON = 'mdi:coin'
 
@@ -31,13 +31,12 @@ PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend({
     vol.Required(CONF_NAME): cv.string,
     vol.Required(CONF_PRODUCT_ID): cv.positive_int,
     vol.Optional(CONF_DESCRIPTION, default='Price'): cv.string,
-    vol.Optional(CONF_DOMAIN, default='geizhals.de'): vol.In(
-        ['geizhals.at',
-         'geizhals.eu',
-         'geizhals.de',
-         'skinflint.co.uk',
-         'cenowarka.pl']),
-    vol.Optional(CONF_REGEX, default=r'\D\s(\d*)[\,|\.](\d*)'): cv.string,
+    vol.Optional(CONF_LOCALE, default='DE'): vol.In(
+        ['AT',
+         'EU',
+         'DE',
+         'UK',
+         'PL']),
 })
 
 
@@ -46,22 +45,27 @@ def setup_platform(hass, config, add_entities, discovery_info=None):
     name = config.get(CONF_NAME)
     description = config.get(CONF_DESCRIPTION)
     product_id = config.get(CONF_PRODUCT_ID)
-    domain = config.get(CONF_DOMAIN)
-    regex = config.get(CONF_REGEX)
+    domain = config.get(CONF_LOCALE)
 
-    add_entities([Geizwatch(name, description, product_id, domain, regex)],
+    add_entities([Geizwatch(name, description, product_id, domain)],
                  True)
 
 
 class Geizwatch(Entity):
     """Implementation of Geizwatch."""
 
-    def __init__(self, name, description, product_id, domain, regex):
+    def __init__(self, name, description, product_id, domain):
         """Initialize the sensor."""
+        from geizhals import Device, Geizhals
+
+        # internal
         self._name = name
+        self._geizhals = Geizhals(product_id, domain)
+        self._device = Device()
+
+        # external
         self.description = description
-        self.data = GeizParser(product_id, domain, regex)
-        self._state = None
+        self.product_id = product_id
 
     @property
     def name(self):
@@ -76,73 +80,24 @@ class Geizwatch(Entity):
     @property
     def state(self):
         """Return the best price of the selected product."""
-        return self._state
+        return self._device.prices[0]
 
     @property
     def device_state_attributes(self):
         """Return the state attributes."""
-        while len(self.data.prices) < 4:
-            self.data.prices.append("None")
-        attrs = {'device_name': self.data.device_name,
+        while len(self._device.prices) < 4:
+            self._device.prices.append('None')
+        attrs = {'device_name': self._device.name,
                  'description': self.description,
-                 'unit_of_measurement': self.data.unit_of_measurement,
-                 'product_id': self.data.product_id,
-                 'price1': self.data.prices[0],
-                 'price2': self.data.prices[1],
-                 'price3': self.data.prices[2],
-                 'price4': self.data.prices[3]}
+                 'unit_of_measurement': self._device.price_currency,
+                 'product_id': self.product_id,
+                 'price1': self._device.prices[0],
+                 'price2': self._device.prices[1],
+                 'price3': self._device.prices[2],
+                 'price4': self._device.prices[3]}
         return attrs
 
-    def update(self):
-        """Get the latest price from geizhals and updates the state."""
-        self.data.update()
-        self._state = self.data.prices[0]
-
-
-class GeizParser:
-    """Pull data from the geizhals website."""
-
-    def __init__(self, product_id, domain, regex):
-        """Initialize the sensor."""
-        # parse input arguments
-        self.product_id = product_id
-        self.domain = domain
-        self.regex = regex
-
-        # set some empty default values
-        self.device_name = ''
-        self.prices = [None, None, None, None]
-        self.unit_of_measurement = ''
-
     @Throttle(MIN_TIME_BETWEEN_UPDATES)
     def update(self):
-        """Update the device prices."""
-        import bs4
-        import requests
-        import re
-
-        sess = requests.session()
-        request = sess.get('https://{}/{}'.format(self.domain,
-                                                  self.product_id),
-                           allow_redirects=True,
-                           timeout=1)
-        soup = bs4.BeautifulSoup(request.text, 'html.parser')
-
-        # parse name
-        raw = soup.find_all('span', attrs={'itemprop': 'name'})
-        self.device_name = raw[1].string
-
-        # parse prices
-        prices = []
-        for tmp in soup.find_all('span', attrs={'class': 'gh_price'}):
-            matches = re.search(self.regex, tmp.string)
-            raw = '{}.{}'.format(matches.group(1),
-                                 matches.group(2))
-            prices += [float(raw)]
-        prices.sort()
-        self.prices = prices[1:]
-
-        # parse unit
-        price_match = soup.find('span', attrs={'class': 'gh_price'})
-        matches = re.search(r'€|£|PLN', price_match.string)
-        self.unit_of_measurement = matches.group()
+        """Get the latest price from geizhals and updates the state."""
+        self._device = self._geizhals.parse()
diff --git a/requirements_all.txt b/requirements_all.txt
index 92aeec6aefe..2a238a933b8 100644
--- a/requirements_all.txt
+++ b/requirements_all.txt
@@ -163,7 +163,6 @@ batinfo==0.4.2
 # beacontools[scan]==1.2.3
 
 # homeassistant.components.device_tracker.linksys_ap
-# homeassistant.components.sensor.geizhals
 # homeassistant.components.sensor.scrape
 # homeassistant.components.sensor.sytadin
 beautifulsoup4==4.6.3
@@ -387,6 +386,9 @@ gTTS-token==1.1.1
 # homeassistant.components.sensor.gearbest
 gearbest_parser==1.0.7
 
+# homeassistant.components.sensor.geizhals
+geizhals==0.0.7
+
 # homeassistant.components.sensor.gitter
 gitterpy==0.1.7
 
-- 
GitLab