From 2ce86bbd2f4c0859cc4aca13ee4f3fcdf3920e12 Mon Sep 17 00:00:00 2001 From: Melora Hugues Date: Sat, 16 Dec 2023 16:45:40 +0100 Subject: [PATCH] Add scraper script --- .gitignore | 1 + Dockerfile | 7 +++ requirements.txt | 5 +++ scraper.py | 110 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 123 insertions(+) create mode 100644 Dockerfile create mode 100644 requirements.txt create mode 100644 scraper.py diff --git a/.gitignore b/.gitignore index 5d381cc..d33bc9a 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,4 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +data/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0afe941 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,7 @@ +FROM python:3.11-alpine + +COPY requirements.txt requirements.txt +COPY scraper.py scraper.py + +RUN pip install -r requirements.txt +ENTRYPOINT [ "python", "scraper.py", "-c", "/data/config.json" ] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..92e5c45 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +certifi==2023.11.17 +charset-normalizer==3.3.2 +idna==3.6 +requests==2.31.0 +urllib3==2.1.0 diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..139224c --- /dev/null +++ b/scraper.py @@ -0,0 +1,110 @@ +import argparse +import json +import sys +from dataclasses import dataclass +from pathlib import Path +import sqlite3 +from typing import List, NamedTuple, Dict + +import requests + +DEFAULT_CONFIG_PATH = Path("config.json") +EURONEXT_BASE_URL = "https://live.euronext.com/intraday_chart/getChartData" + + +@dataclass +class ProgramArgs: + config_path: Path + + +class Action(NamedTuple): + name: str + code: str + + +class StoredAction(NamedTuple): + name: str + value: float + date: str + + +@dataclass +class Config: + actions: List[Action] + db_path: Path + + +def load_config(config_path: Path) -> Config: + with open(config_path) as conf_file: + raw_conf = json.load(conf_file) + conf = Config( + actions=[Action(name=a["name"], code=a["code"]) for a in raw_conf["actions"]], + db_path=Path(raw_conf["db"]["path"]), + ) + return conf + + +def get_last_value_for_action(action: Action) -> StoredAction: + url = f"{EURONEXT_BASE_URL}/{action.code}/intraday" + resp = requests.get(url) + resp_json = resp.json() + if len(resp_json) < 1: + raise ValueError("Empty list of values") + last_value = resp_json[-1] + try: + return StoredAction( + name=action.name, + value=last_value["price"], + date=last_value["time"], + ) + except KeyError as exc: + raise ValueError("Invalid format for response") from exc + + +def save_values_to_db(db_path: Path, values: List[StoredAction]) -> None: + con = sqlite3.connect(db_path) + cur = con.cursor() + cur.execute("DELETE FROM actions") + query_data = [(k.name, k.value, k.date) for k in values] + cur.executemany( + "INSERT INTO actions (name, value, date) VALUES(?, ?, ?)", query_data + ) + con.commit() + + +def main(): + args = parse_args() + conf = load_config(args.config_path) + res = [] + for action in conf.actions: + try: + value = get_last_value_for_action(action) + res.append(value) + except ValueError as exc: + print(f"Got error {exc} for action {action.name}") + save_values_to_db(conf.db_path, res) + + +def parse_args() -> ProgramArgs: + parser = argparse.ArgumentParser( + prog="Euronext scraper", description="Scraper for euronext action values" + ) + parser.add_argument( + "--config", + "-c", + type=Path, + help="Path for the configuration file", + default=DEFAULT_CONFIG_PATH, + ) + args = parser.parse_args() + return ProgramArgs( + config_path=args.config, + ) + + +if __name__ == "__main__": + try: + main() + except Exception as e: + print(f"An error occured: {e}") + sys.exit(1)