1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- import importlib
- from selenium import webdriver
- from stores.data_store_interface import IDataStore
- from stores.default_data_store import DefaultDataStore
- from adapters.data_collection_adapter_interface import IDataCollectionAdapter
- from utils.logger_helper import LoggerHelper
- from utils.config_helper import ConfigHelper
- class DataCollector:
- logger = LoggerHelper.get_logger()
- config = ConfigHelper()
- _adapter = None
- _driver = None
- _store = None
- # 使用字典映射域名和适配器类
- # _adapterModelMap = {"chinabidding": "chinabidding_data_collection_adapter"}
- # _adapterClassMap = {"chinabidding": "ChinabiddingDataCollectionAdapter"}
- def __init__(self,
- adapter_type: str,
- url: str,
- un: str,
- up: str,
- store: IDataStore = None):
- self._adapter = self._gen_adapter(adapter_type, url)
- self._driver = self.adapter.driver
- # if type == "chinabidding":
- # return
- self.adapter.login(un, up)
- if store:
- self._store = store
- else:
- self._store = DefaultDataStore()
- @property
- def driver(self) -> webdriver:
- return self._driver
- @property
- def store(self) -> IDataStore:
- return self._store
- @property
- def adapter(self) -> IDataCollectionAdapter:
- return self._adapter
- def set_store(self, store: IDataStore) -> None:
- self._store = store
- def collect(self, keyword: str):
- self.adapter.collect(keyword, self.store)
- def close(self):
- self.logger.info(f"关闭浏览器驱动,URL: {self.adapter.url}")
- self.adapter.teardown()
- def _gen_adapter(self, adapter_type: str, url: str):
- adapter_model_name = self.config.get(f"adapter.{adapter_type}.model_name")
- adapter_class_name = self.config.get(f"adapter.{adapter_type}.class_name")
- if adapter_class_name:
- try:
- self.logger.info(
- f"生成适配器 TYPE:{adapter_type},适配器: {adapter_class_name},URL:{url}")
- # 使用 importlib 动态导入模块
- adapter_module = importlib.import_module(
- f"adapters.{adapter_model_name}")
- adapter_class = getattr(adapter_module, adapter_class_name)
- adapter = adapter_class(url)
- except ImportError as e:
- raise ImportError(f"无法导入适配器模块 {adapter_model_name}") from e
- except AttributeError as e:
- raise AttributeError(
- f"适配器模块 {adapter_model_name} 中找不到类 {adapter_class_name}"
- ) from e
- else:
- raise Exception("不支持的适配器类型")
- return adapter
|