Load settings from config or from arguments
This commit is contained in:
parent
c9d9b8b997
commit
dd35c02cdd
|
@ -8,13 +8,11 @@ from bs4 import BeautifulSoup
|
|||
|
||||
class Scraper:
|
||||
def __init__(
|
||||
self, domains: list[str], proxy: list[str], rps_min: int, rps_max: int
|
||||
self, domains: list[str], rps: int, proxy: list[str] = None
|
||||
):
|
||||
self.domains = domains
|
||||
self.proxy = proxy
|
||||
|
||||
self.rps_min = rps_min
|
||||
self.rps_max = rps_max
|
||||
self.rps = rps
|
||||
|
||||
self.parse_tasks = set()
|
||||
|
50
main.py
50
main.py
|
@ -7,23 +7,21 @@ import sys
|
|||
from sys import platform
|
||||
|
||||
import argparse
|
||||
from argparse import ArgumentParser
|
||||
from argparse import ArgumentParser, Namespace
|
||||
import yaml
|
||||
|
||||
import asyncio
|
||||
from scraper import Scraper
|
||||
from courts_scraper.scraper import Scraper
|
||||
|
||||
|
||||
def init_argparser() -> ArgumentParser:
|
||||
argparser = argparse.ArgumentParser(
|
||||
description="List fish in aquarium.",
|
||||
argument_default=argparse.SUPPRESS
|
||||
)
|
||||
argparser.add_argument(
|
||||
"--config", "-c",
|
||||
help="Path to the config file",
|
||||
type=pathlib.Path,
|
||||
default="config.yaml",
|
||||
)
|
||||
argparser.add_argument(
|
||||
"--domains", "-d",
|
||||
|
@ -35,33 +33,11 @@ def init_argparser() -> ArgumentParser:
|
|||
help="Path to the proxy file",
|
||||
type=pathlib.Path,
|
||||
)
|
||||
argparser.add_argument("--rps_min", help="", type=int)
|
||||
argparser.add_argument("--rps_max", help="", type=int)
|
||||
argparser.add_argument("--rps", type=int)
|
||||
|
||||
return argparser
|
||||
|
||||
|
||||
def load_config() -> dict:
|
||||
argparser = init_argparser()
|
||||
args = vars(argparser.parse_args())
|
||||
|
||||
with open(args["domains"]) as domains_file:
|
||||
domains = domains_file.readlines()
|
||||
args["domains"] = domains
|
||||
|
||||
with open(args["proxy"]) as proxy_file:
|
||||
proxy = proxy_file.readlines()
|
||||
args["proxy"] = proxy
|
||||
|
||||
with open(args["config"]) as config_file:
|
||||
config = yaml.safe_load(config_file)
|
||||
config["settings"].update(args)
|
||||
|
||||
# Remove config path to pass config values to the Scraper
|
||||
config["settings"].pop("config")
|
||||
return config
|
||||
|
||||
|
||||
async def main():
|
||||
logger.add("project.log")
|
||||
logger.info("Starting...")
|
||||
|
@ -71,8 +47,24 @@ async def main():
|
|||
input()
|
||||
sys.exit(1)
|
||||
|
||||
config = load_config()
|
||||
scraper = Scraper(**config["settings"])
|
||||
scraper_settings = dict()
|
||||
argparser = init_argparser()
|
||||
args = argparser.parse_args()
|
||||
|
||||
if args.config is not None:
|
||||
with open(args.config) as config_file:
|
||||
scraper_settings = yaml.safe_load(config_file)
|
||||
else:
|
||||
scraper_settings["rps"] = args.rps
|
||||
|
||||
with open(args.domains) as domains_file:
|
||||
scraper_settings["domains"] = domains_file.readlines()
|
||||
|
||||
if args.proxy is not None: # Optional argument
|
||||
with open(args.proxy) as proxy_file:
|
||||
scraper_settings["proxy"] = proxy_file.readlines()
|
||||
|
||||
scraper = Scraper(**scraper_settings)
|
||||
await scraper.scrape()
|
||||
|
||||
|
||||
|
|
Reference in a new issue