This repository has been archived on 2022-09-12. You can view files and clone it, but cannot push or open issues or pull requests.
project305/scraper.py

38 lines
977 B
Python

from loguru import logger
import aiohttp
import asyncio
from bs4 import BeautifulSoup
class Scraper:
def __init__(
self, domains: list[str], proxy: list[str], rps_min: int, rps_max: int
):
self.domains = domains
self.proxy = proxy
self.rps_min = rps_min
self.rps_max = rps_max
self.parse_tasks = set()
async def fetch(self, url: str):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
return await resp.text()
async def parse(self, url: str) -> str:
resp = await self.fetch(url)
logger.debug(f"Received response from {url}!")
return resp
async def scrape(self):
for domain in self.domains:
url = f"https://{domain}"
parse_task = asyncio.create_task(self.parse(url))
self.parse_tasks.add(parse_task)
done, _pending = await asyncio.wait(self.parse_tasks)