Add scraper script
This commit is contained in:
parent
c523e95856
commit
2ce86bbd2f
4 changed files with 123 additions and 0 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -160,3 +160,4 @@ cython_debug/
|
||||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
#.idea/
|
#.idea/
|
||||||
|
|
||||||
|
data/
|
7
Dockerfile
Normal file
7
Dockerfile
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
FROM python:3.11-alpine
|
||||||
|
|
||||||
|
COPY requirements.txt requirements.txt
|
||||||
|
COPY scraper.py scraper.py
|
||||||
|
|
||||||
|
RUN pip install -r requirements.txt
|
||||||
|
ENTRYPOINT [ "python", "scraper.py", "-c", "/data/config.json" ]
|
5
requirements.txt
Normal file
5
requirements.txt
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
certifi==2023.11.17
|
||||||
|
charset-normalizer==3.3.2
|
||||||
|
idna==3.6
|
||||||
|
requests==2.31.0
|
||||||
|
urllib3==2.1.0
|
110
scraper.py
Normal file
110
scraper.py
Normal file
|
@ -0,0 +1,110 @@
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
import sqlite3
|
||||||
|
from typing import List, NamedTuple, Dict
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
DEFAULT_CONFIG_PATH = Path("config.json")
|
||||||
|
EURONEXT_BASE_URL = "https://live.euronext.com/intraday_chart/getChartData"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ProgramArgs:
|
||||||
|
config_path: Path
|
||||||
|
|
||||||
|
|
||||||
|
class Action(NamedTuple):
|
||||||
|
name: str
|
||||||
|
code: str
|
||||||
|
|
||||||
|
|
||||||
|
class StoredAction(NamedTuple):
|
||||||
|
name: str
|
||||||
|
value: float
|
||||||
|
date: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Config:
|
||||||
|
actions: List[Action]
|
||||||
|
db_path: Path
|
||||||
|
|
||||||
|
|
||||||
|
def load_config(config_path: Path) -> Config:
|
||||||
|
with open(config_path) as conf_file:
|
||||||
|
raw_conf = json.load(conf_file)
|
||||||
|
conf = Config(
|
||||||
|
actions=[Action(name=a["name"], code=a["code"]) for a in raw_conf["actions"]],
|
||||||
|
db_path=Path(raw_conf["db"]["path"]),
|
||||||
|
)
|
||||||
|
return conf
|
||||||
|
|
||||||
|
|
||||||
|
def get_last_value_for_action(action: Action) -> StoredAction:
|
||||||
|
url = f"{EURONEXT_BASE_URL}/{action.code}/intraday"
|
||||||
|
resp = requests.get(url)
|
||||||
|
resp_json = resp.json()
|
||||||
|
if len(resp_json) < 1:
|
||||||
|
raise ValueError("Empty list of values")
|
||||||
|
last_value = resp_json[-1]
|
||||||
|
try:
|
||||||
|
return StoredAction(
|
||||||
|
name=action.name,
|
||||||
|
value=last_value["price"],
|
||||||
|
date=last_value["time"],
|
||||||
|
)
|
||||||
|
except KeyError as exc:
|
||||||
|
raise ValueError("Invalid format for response") from exc
|
||||||
|
|
||||||
|
|
||||||
|
def save_values_to_db(db_path: Path, values: List[StoredAction]) -> None:
|
||||||
|
con = sqlite3.connect(db_path)
|
||||||
|
cur = con.cursor()
|
||||||
|
cur.execute("DELETE FROM actions")
|
||||||
|
query_data = [(k.name, k.value, k.date) for k in values]
|
||||||
|
cur.executemany(
|
||||||
|
"INSERT INTO actions (name, value, date) VALUES(?, ?, ?)", query_data
|
||||||
|
)
|
||||||
|
con.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = parse_args()
|
||||||
|
conf = load_config(args.config_path)
|
||||||
|
res = []
|
||||||
|
for action in conf.actions:
|
||||||
|
try:
|
||||||
|
value = get_last_value_for_action(action)
|
||||||
|
res.append(value)
|
||||||
|
except ValueError as exc:
|
||||||
|
print(f"Got error {exc} for action {action.name}")
|
||||||
|
save_values_to_db(conf.db_path, res)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> ProgramArgs:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog="Euronext scraper", description="Scraper for euronext action values"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--config",
|
||||||
|
"-c",
|
||||||
|
type=Path,
|
||||||
|
help="Path for the configuration file",
|
||||||
|
default=DEFAULT_CONFIG_PATH,
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
return ProgramArgs(
|
||||||
|
config_path=args.config,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
main()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occured: {e}")
|
||||||
|
sys.exit(1)
|
Loading…
Reference in a new issue