From 72f600036511c4999fa56bf007bf92ec465e94d7 Mon Sep 17 00:00:00 2001 From: Caeden Perelli-Harris Date: Tue, 5 Sep 2023 05:49:00 +0100 Subject: [PATCH] Fix get amazon product data erroring due to whitespace in headers (#9009) * updating DIRECTORY.md * fix(get-amazon-product-data): Remove whitespace in headers * refactor(get-amazon-product-data): Don't print to_csv --------- Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com> --- web_programming/get_amazon_product_data.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/web_programming/get_amazon_product_data.py b/web_programming/get_amazon_product_data.py index c796793f2..a16175688 100644 --- a/web_programming/get_amazon_product_data.py +++ b/web_programming/get_amazon_product_data.py @@ -19,11 +19,13 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame: """ url = f"https://www.amazon.in/laptop/s?k={product}" header = { - "User-Agent": """Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 - (KHTML, like Gecko)Chrome/44.0.2403.157 Safari/537.36""", + "User-Agent": ( + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36" + "(KHTML, like Gecko)Chrome/44.0.2403.157 Safari/537.36" + ), "Accept-Language": "en-US, en;q=0.5", } - soup = BeautifulSoup(requests.get(url, headers=header).text) + soup = BeautifulSoup(requests.get(url, headers=header).text, features="lxml") # Initialize a Pandas dataframe with the column titles data_frame = DataFrame( columns=[ @@ -74,8 +76,8 @@ def get_amazon_product_data(product: str = "laptop") -> DataFrame: except ValueError: discount = float("nan") except AttributeError: - pass - data_frame.loc[len(data_frame.index)] = [ + continue + data_frame.loc[str(len(data_frame.index))] = [ product_title, product_link, product_price,