Load settings from config or from arguments

This commit is contained in:
riot 2022-09-12 16:18:21 +03:00
parent c9d9b8b997
commit dd35c02cdd
2 changed files with 23 additions and 33 deletions

View file

@ -8,13 +8,11 @@ from bs4 import BeautifulSoup
class Scraper: class Scraper:
def __init__( def __init__(
self, domains: list[str], proxy: list[str], rps_min: int, rps_max: int self, domains: list[str], rps: int, proxy: list[str] = None
): ):
self.domains = domains self.domains = domains
self.proxy = proxy self.proxy = proxy
self.rps = rps
self.rps_min = rps_min
self.rps_max = rps_max
self.parse_tasks = set() self.parse_tasks = set()

50
main.py
View file

@ -7,23 +7,21 @@ import sys
from sys import platform from sys import platform
import argparse import argparse
from argparse import ArgumentParser from argparse import ArgumentParser, Namespace
import yaml import yaml
import asyncio import asyncio
from scraper import Scraper from courts_scraper.scraper import Scraper
def init_argparser() -> ArgumentParser: def init_argparser() -> ArgumentParser:
argparser = argparse.ArgumentParser( argparser = argparse.ArgumentParser(
description="List fish in aquarium.", description="List fish in aquarium.",
argument_default=argparse.SUPPRESS
) )
argparser.add_argument( argparser.add_argument(
"--config", "-c", "--config", "-c",
help="Path to the config file", help="Path to the config file",
type=pathlib.Path, type=pathlib.Path,
default="config.yaml",
) )
argparser.add_argument( argparser.add_argument(
"--domains", "-d", "--domains", "-d",
@ -35,33 +33,11 @@ def init_argparser() -> ArgumentParser:
help="Path to the proxy file", help="Path to the proxy file",
type=pathlib.Path, type=pathlib.Path,
) )
argparser.add_argument("--rps_min", help="", type=int) argparser.add_argument("--rps", type=int)
argparser.add_argument("--rps_max", help="", type=int)
return argparser return argparser
def load_config() -> dict:
argparser = init_argparser()
args = vars(argparser.parse_args())
with open(args["domains"]) as domains_file:
domains = domains_file.readlines()
args["domains"] = domains
with open(args["proxy"]) as proxy_file:
proxy = proxy_file.readlines()
args["proxy"] = proxy
with open(args["config"]) as config_file:
config = yaml.safe_load(config_file)
config["settings"].update(args)
# Remove config path to pass config values to the Scraper
config["settings"].pop("config")
return config
async def main(): async def main():
logger.add("project.log") logger.add("project.log")
logger.info("Starting...") logger.info("Starting...")
@ -71,8 +47,24 @@ async def main():
input() input()
sys.exit(1) sys.exit(1)
config = load_config() scraper_settings = dict()
scraper = Scraper(**config["settings"]) argparser = init_argparser()
args = argparser.parse_args()
if args.config is not None:
with open(args.config) as config_file:
scraper_settings = yaml.safe_load(config_file)
else:
scraper_settings["rps"] = args.rps
with open(args.domains) as domains_file:
scraper_settings["domains"] = domains_file.readlines()
if args.proxy is not None: # Optional argument
with open(args.proxy) as proxy_file:
scraper_settings["proxy"] = proxy_file.readlines()
scraper = Scraper(**scraper_settings)
await scraper.scrape() await scraper.scrape()