Add scraper script
This commit is contained in:
parent
c523e95856
commit
2ce86bbd2f
4 changed files with 123 additions and 0 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -160,3 +160,4 @@ cython_debug/
|
|||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
data/
|
7
Dockerfile
Normal file
7
Dockerfile
Normal file
|
@ -0,0 +1,7 @@
|
|||
FROM python:3.11-alpine
|
||||
|
||||
COPY requirements.txt requirements.txt
|
||||
COPY scraper.py scraper.py
|
||||
|
||||
RUN pip install -r requirements.txt
|
||||
ENTRYPOINT [ "python", "scraper.py", "-c", "/data/config.json" ]
|
5
requirements.txt
Normal file
5
requirements.txt
Normal file
|
@ -0,0 +1,5 @@
|
|||
certifi==2023.11.17
|
||||
charset-normalizer==3.3.2
|
||||
idna==3.6
|
||||
requests==2.31.0
|
||||
urllib3==2.1.0
|
110
scraper.py
Normal file
110
scraper.py
Normal file
|
@ -0,0 +1,110 @@
|
|||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
import sqlite3
|
||||
from typing import List, NamedTuple, Dict
|
||||
|
||||
import requests
|
||||
|
||||
DEFAULT_CONFIG_PATH = Path("config.json")
|
||||
EURONEXT_BASE_URL = "https://live.euronext.com/intraday_chart/getChartData"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProgramArgs:
|
||||
config_path: Path
|
||||
|
||||
|
||||
class Action(NamedTuple):
|
||||
name: str
|
||||
code: str
|
||||
|
||||
|
||||
class StoredAction(NamedTuple):
|
||||
name: str
|
||||
value: float
|
||||
date: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
actions: List[Action]
|
||||
db_path: Path
|
||||
|
||||
|
||||
def load_config(config_path: Path) -> Config:
|
||||
with open(config_path) as conf_file:
|
||||
raw_conf = json.load(conf_file)
|
||||
conf = Config(
|
||||
actions=[Action(name=a["name"], code=a["code"]) for a in raw_conf["actions"]],
|
||||
db_path=Path(raw_conf["db"]["path"]),
|
||||
)
|
||||
return conf
|
||||
|
||||
|
||||
def get_last_value_for_action(action: Action) -> StoredAction:
|
||||
url = f"{EURONEXT_BASE_URL}/{action.code}/intraday"
|
||||
resp = requests.get(url)
|
||||
resp_json = resp.json()
|
||||
if len(resp_json) < 1:
|
||||
raise ValueError("Empty list of values")
|
||||
last_value = resp_json[-1]
|
||||
try:
|
||||
return StoredAction(
|
||||
name=action.name,
|
||||
value=last_value["price"],
|
||||
date=last_value["time"],
|
||||
)
|
||||
except KeyError as exc:
|
||||
raise ValueError("Invalid format for response") from exc
|
||||
|
||||
|
||||
def save_values_to_db(db_path: Path, values: List[StoredAction]) -> None:
|
||||
con = sqlite3.connect(db_path)
|
||||
cur = con.cursor()
|
||||
cur.execute("DELETE FROM actions")
|
||||
query_data = [(k.name, k.value, k.date) for k in values]
|
||||
cur.executemany(
|
||||
"INSERT INTO actions (name, value, date) VALUES(?, ?, ?)", query_data
|
||||
)
|
||||
con.commit()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
conf = load_config(args.config_path)
|
||||
res = []
|
||||
for action in conf.actions:
|
||||
try:
|
||||
value = get_last_value_for_action(action)
|
||||
res.append(value)
|
||||
except ValueError as exc:
|
||||
print(f"Got error {exc} for action {action.name}")
|
||||
save_values_to_db(conf.db_path, res)
|
||||
|
||||
|
||||
def parse_args() -> ProgramArgs:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="Euronext scraper", description="Scraper for euronext action values"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--config",
|
||||
"-c",
|
||||
type=Path,
|
||||
help="Path for the configuration file",
|
||||
default=DEFAULT_CONFIG_PATH,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
return ProgramArgs(
|
||||
config_path=args.config,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except Exception as e:
|
||||
print(f"An error occured: {e}")
|
||||
sys.exit(1)
|
Loading…
Reference in a new issue