Load settings from config or from arguments

This commit is contained in:
riot 2022-09-12 16:18:21 +03:00
parent c9d9b8b997
commit dd35c02cdd
2 changed files with 23 additions and 33 deletions

View file

@ -8,13 +8,11 @@ from bs4 import BeautifulSoup
class Scraper:
def __init__(
self, domains: list[str], proxy: list[str], rps_min: int, rps_max: int
self, domains: list[str], rps: int, proxy: list[str] = None
):
self.domains = domains
self.proxy = proxy
self.rps_min = rps_min
self.rps_max = rps_max
self.rps = rps
self.parse_tasks = set()

50
main.py
View file

@ -7,23 +7,21 @@ import sys
from sys import platform
import argparse
from argparse import ArgumentParser
from argparse import ArgumentParser, Namespace
import yaml
import asyncio
from scraper import Scraper
from courts_scraper.scraper import Scraper
def init_argparser() -> ArgumentParser:
argparser = argparse.ArgumentParser(
description="List fish in aquarium.",
argument_default=argparse.SUPPRESS
)
argparser.add_argument(
"--config", "-c",
help="Path to the config file",
type=pathlib.Path,
default="config.yaml",
)
argparser.add_argument(
"--domains", "-d",
@ -35,33 +33,11 @@ def init_argparser() -> ArgumentParser:
help="Path to the proxy file",
type=pathlib.Path,
)
argparser.add_argument("--rps_min", help="", type=int)
argparser.add_argument("--rps_max", help="", type=int)
argparser.add_argument("--rps", type=int)
return argparser
def load_config() -> dict:
argparser = init_argparser()
args = vars(argparser.parse_args())
with open(args["domains"]) as domains_file:
domains = domains_file.readlines()
args["domains"] = domains
with open(args["proxy"]) as proxy_file:
proxy = proxy_file.readlines()
args["proxy"] = proxy
with open(args["config"]) as config_file:
config = yaml.safe_load(config_file)
config["settings"].update(args)
# Remove config path to pass config values to the Scraper
config["settings"].pop("config")
return config
async def main():
logger.add("project.log")
logger.info("Starting...")
@ -71,8 +47,24 @@ async def main():
input()
sys.exit(1)
config = load_config()
scraper = Scraper(**config["settings"])
scraper_settings = dict()
argparser = init_argparser()
args = argparser.parse_args()
if args.config is not None:
with open(args.config) as config_file:
scraper_settings = yaml.safe_load(config_file)
else:
scraper_settings["rps"] = args.rps
with open(args.domains) as domains_file:
scraper_settings["domains"] = domains_file.readlines()
if args.proxy is not None: # Optional argument
with open(args.proxy) as proxy_file:
scraper_settings["proxy"] = proxy_file.readlines()
scraper = Scraper(**scraper_settings)
await scraper.scrape()