This repository has been archived on 2022-09-12. You can view files and clone it, but cannot push or open issues or pull requests.
project305/courts_scraper/scraper.py

36 lines
926 B
Python

from loguru import logger
import aiohttp
import asyncio
from bs4 import BeautifulSoup
class Scraper:
def __init__(
self, domains: list[str], rps: int, proxy: list[str] = None
):
self.domains = domains
self.proxy = proxy
self.rps = rps
self.parse_tasks = set()
async def fetch(self, url: str):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
return await resp.text()
async def parse(self, url: str) -> str:
resp = await self.fetch(url)
logger.debug(f"Received response from {url}!")
return resp
async def scrape(self):
for domain in self.domains:
url = f"https://{domain}"
parse_task = asyncio.create_task(self.parse(url))
self.parse_tasks.add(parse_task)
done, _pending = await asyncio.wait(self.parse_tasks)