Load settings from config or from arguments
This commit is contained in:
parent
c9d9b8b997
commit
dd35c02cdd
|
@ -8,13 +8,11 @@ from bs4 import BeautifulSoup
|
||||||
|
|
||||||
class Scraper:
|
class Scraper:
|
||||||
def __init__(
|
def __init__(
|
||||||
self, domains: list[str], proxy: list[str], rps_min: int, rps_max: int
|
self, domains: list[str], rps: int, proxy: list[str] = None
|
||||||
):
|
):
|
||||||
self.domains = domains
|
self.domains = domains
|
||||||
self.proxy = proxy
|
self.proxy = proxy
|
||||||
|
self.rps = rps
|
||||||
self.rps_min = rps_min
|
|
||||||
self.rps_max = rps_max
|
|
||||||
|
|
||||||
self.parse_tasks = set()
|
self.parse_tasks = set()
|
||||||
|
|
50
main.py
50
main.py
|
@ -7,23 +7,21 @@ import sys
|
||||||
from sys import platform
|
from sys import platform
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser, Namespace
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
from scraper import Scraper
|
from courts_scraper.scraper import Scraper
|
||||||
|
|
||||||
|
|
||||||
def init_argparser() -> ArgumentParser:
|
def init_argparser() -> ArgumentParser:
|
||||||
argparser = argparse.ArgumentParser(
|
argparser = argparse.ArgumentParser(
|
||||||
description="List fish in aquarium.",
|
description="List fish in aquarium.",
|
||||||
argument_default=argparse.SUPPRESS
|
|
||||||
)
|
)
|
||||||
argparser.add_argument(
|
argparser.add_argument(
|
||||||
"--config", "-c",
|
"--config", "-c",
|
||||||
help="Path to the config file",
|
help="Path to the config file",
|
||||||
type=pathlib.Path,
|
type=pathlib.Path,
|
||||||
default="config.yaml",
|
|
||||||
)
|
)
|
||||||
argparser.add_argument(
|
argparser.add_argument(
|
||||||
"--domains", "-d",
|
"--domains", "-d",
|
||||||
|
@ -35,33 +33,11 @@ def init_argparser() -> ArgumentParser:
|
||||||
help="Path to the proxy file",
|
help="Path to the proxy file",
|
||||||
type=pathlib.Path,
|
type=pathlib.Path,
|
||||||
)
|
)
|
||||||
argparser.add_argument("--rps_min", help="", type=int)
|
argparser.add_argument("--rps", type=int)
|
||||||
argparser.add_argument("--rps_max", help="", type=int)
|
|
||||||
|
|
||||||
return argparser
|
return argparser
|
||||||
|
|
||||||
|
|
||||||
def load_config() -> dict:
|
|
||||||
argparser = init_argparser()
|
|
||||||
args = vars(argparser.parse_args())
|
|
||||||
|
|
||||||
with open(args["domains"]) as domains_file:
|
|
||||||
domains = domains_file.readlines()
|
|
||||||
args["domains"] = domains
|
|
||||||
|
|
||||||
with open(args["proxy"]) as proxy_file:
|
|
||||||
proxy = proxy_file.readlines()
|
|
||||||
args["proxy"] = proxy
|
|
||||||
|
|
||||||
with open(args["config"]) as config_file:
|
|
||||||
config = yaml.safe_load(config_file)
|
|
||||||
config["settings"].update(args)
|
|
||||||
|
|
||||||
# Remove config path to pass config values to the Scraper
|
|
||||||
config["settings"].pop("config")
|
|
||||||
return config
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
logger.add("project.log")
|
logger.add("project.log")
|
||||||
logger.info("Starting...")
|
logger.info("Starting...")
|
||||||
|
@ -71,8 +47,24 @@ async def main():
|
||||||
input()
|
input()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
config = load_config()
|
scraper_settings = dict()
|
||||||
scraper = Scraper(**config["settings"])
|
argparser = init_argparser()
|
||||||
|
args = argparser.parse_args()
|
||||||
|
|
||||||
|
if args.config is not None:
|
||||||
|
with open(args.config) as config_file:
|
||||||
|
scraper_settings = yaml.safe_load(config_file)
|
||||||
|
else:
|
||||||
|
scraper_settings["rps"] = args.rps
|
||||||
|
|
||||||
|
with open(args.domains) as domains_file:
|
||||||
|
scraper_settings["domains"] = domains_file.readlines()
|
||||||
|
|
||||||
|
if args.proxy is not None: # Optional argument
|
||||||
|
with open(args.proxy) as proxy_file:
|
||||||
|
scraper_settings["proxy"] = proxy_file.readlines()
|
||||||
|
|
||||||
|
scraper = Scraper(**scraper_settings)
|
||||||
await scraper.scrape()
|
await scraper.scrape()
|
||||||
|
|
||||||
|
|
||||||
|
|
Reference in a new issue