123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687 |
- import importlib
- from selenium import webdriver
- from stores.data_store_interface import IDataStore
- from stores.default_data_store import DefaultDataStore
- from adapters.data_collection_adapter_interface import IDataCollectionAdapter
- from utils.logger_helper import LoggerHelper
- from utils.config_helper import ConfigHelper
- class DataCollector:
- logger = LoggerHelper.get_logger()
- config = ConfigHelper()
- _adapter = None
- _driver = None
- _store = None
- # 使用字典映射域名和适配器类
- # _adapterModelMap = {"chinabidding": "chinabidding_data_collection_adapter"}
- # _adapterClassMap = {"chinabidding": "ChinabiddingDataCollectionAdapter"}
- def __init__(self,
- type: str,
- url: str,
- un: str,
- up: str,
- store: IDataStore = None):
- self._adapter = self._genAdapter(type, url)
- self._driver = self.adapter.createDriver()
- # if type == "chinabidding":
- # return
- self.adapter.login(self.driver, un, up)
- if store:
- self._store = store
- else:
- self._store = DefaultDataStore()
- @property
- def driver(self) -> webdriver:
- return self._driver
- @property
- def store(self) -> IDataStore:
- return self._store
- @property
- def adapter(self) -> IDataCollectionAdapter:
- return self._adapter
- def setStore(self, store: IDataStore) -> None:
- self._store = store
- def collect(self, keyword: str):
- items = self.adapter.search(self.driver, keyword)
- self.adapter.collect(self.driver, items, self.store)
- def close(self):
- self.logger.info(f"关闭浏览器驱动,URL: {self.adapter.url}")
- self.adapter.teardown(self.driver)
- def collectWithStore(self, keyword: str, store: IDataStore):
- self.setStore(store)
- self.collect(keyword)
- def _genAdapter(self, type: str, url: str):
- adapterModelName = self.config.get(f"adapter.{type}.model_name")
- adapterClassName = self.config.get(f"adapter.{type}.class_name")
- if adapterClassName:
- try:
- self.logger.info(
- f"生成适配器 TYPE:{type},适配器: {adapterClassName},URL:{url}")
- # 使用 importlib 动态导入模块
- adapterModule = importlib.import_module(
- f"adapters.{adapterModelName}")
- adapterClass = getattr(adapterModule, adapterClassName)
- adapter = adapterClass(url)
- except ImportError as e:
- raise ImportError(f"无法导入适配器模块 {adapterModelName}") from e
- except AttributeError as e:
- raise AttributeError(
- f"适配器模块 {adapterModelName} 中找不到类 {adapterClassName}"
- ) from e
- else:
- raise Exception("不支持的适配器类型")
- return adapter
|