Add scraper script

This commit is contained in:
Melora Hugues 2023-12-16 16:45:40 +01:00
parent c523e95856
commit 2ce86bbd2f
4 changed files with 123 additions and 0 deletions

1
.gitignore vendored
View file

@ -160,3 +160,4 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder. # option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/ #.idea/
data/

7
Dockerfile Normal file
View file

@ -0,0 +1,7 @@
FROM python:3.11-alpine
COPY requirements.txt requirements.txt
COPY scraper.py scraper.py
RUN pip install -r requirements.txt
ENTRYPOINT [ "python", "scraper.py", "-c", "/data/config.json" ]

5
requirements.txt Normal file
View file

@ -0,0 +1,5 @@
certifi==2023.11.17
charset-normalizer==3.3.2
idna==3.6
requests==2.31.0
urllib3==2.1.0

110
scraper.py Normal file
View file

@ -0,0 +1,110 @@
import argparse
import json
import sys
from dataclasses import dataclass
from pathlib import Path
import sqlite3
from typing import List, NamedTuple, Dict
import requests
DEFAULT_CONFIG_PATH = Path("config.json")
EURONEXT_BASE_URL = "https://live.euronext.com/intraday_chart/getChartData"
@dataclass
class ProgramArgs:
config_path: Path
class Action(NamedTuple):
name: str
code: str
class StoredAction(NamedTuple):
name: str
value: float
date: str
@dataclass
class Config:
actions: List[Action]
db_path: Path
def load_config(config_path: Path) -> Config:
with open(config_path) as conf_file:
raw_conf = json.load(conf_file)
conf = Config(
actions=[Action(name=a["name"], code=a["code"]) for a in raw_conf["actions"]],
db_path=Path(raw_conf["db"]["path"]),
)
return conf
def get_last_value_for_action(action: Action) -> StoredAction:
url = f"{EURONEXT_BASE_URL}/{action.code}/intraday"
resp = requests.get(url)
resp_json = resp.json()
if len(resp_json) < 1:
raise ValueError("Empty list of values")
last_value = resp_json[-1]
try:
return StoredAction(
name=action.name,
value=last_value["price"],
date=last_value["time"],
)
except KeyError as exc:
raise ValueError("Invalid format for response") from exc
def save_values_to_db(db_path: Path, values: List[StoredAction]) -> None:
con = sqlite3.connect(db_path)
cur = con.cursor()
cur.execute("DELETE FROM actions")
query_data = [(k.name, k.value, k.date) for k in values]
cur.executemany(
"INSERT INTO actions (name, value, date) VALUES(?, ?, ?)", query_data
)
con.commit()
def main():
args = parse_args()
conf = load_config(args.config_path)
res = []
for action in conf.actions:
try:
value = get_last_value_for_action(action)
res.append(value)
except ValueError as exc:
print(f"Got error {exc} for action {action.name}")
save_values_to_db(conf.db_path, res)
def parse_args() -> ProgramArgs:
parser = argparse.ArgumentParser(
prog="Euronext scraper", description="Scraper for euronext action values"
)
parser.add_argument(
"--config",
"-c",
type=Path,
help="Path for the configuration file",
default=DEFAULT_CONFIG_PATH,
)
args = parser.parse_args()
return ProgramArgs(
config_path=args.config,
)
if __name__ == "__main__":
try:
main()
except Exception as e:
print(f"An error occured: {e}")
sys.exit(1)