data_collector.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. import importlib
  2. from selenium import webdriver
  3. from stores.data_store_interface import IDataStore
  4. from stores.default_data_store import DefaultDataStore
  5. from adapters.data_collection_adapter_interface import IDataCollectionAdapter
  6. from utils.logger_helper import LoggerHelper
  7. from utils.config_helper import ConfigHelper
  8. class DataCollector:
  9. logger = LoggerHelper.get_logger()
  10. config = ConfigHelper()
  11. _adapter = None
  12. _driver = None
  13. _store = None
  14. # 使用字典映射域名和适配器类
  15. # _adapterModelMap = {"chinabidding": "chinabidding_data_collection_adapter"}
  16. # _adapterClassMap = {"chinabidding": "ChinabiddingDataCollectionAdapter"}
  17. def __init__(self,
  18. type: str,
  19. url: str,
  20. un: str,
  21. up: str,
  22. store: IDataStore = None):
  23. self._adapter = self._genAdapter(type, url)
  24. self._driver = self.adapter.createDriver()
  25. # if type == "chinabidding":
  26. # return
  27. self.adapter.login(self.driver, un, up)
  28. if store:
  29. self._store = store
  30. else:
  31. self._store = DefaultDataStore()
  32. @property
  33. def driver(self) -> webdriver:
  34. return self._driver
  35. @property
  36. def store(self) -> IDataStore:
  37. return self._store
  38. @property
  39. def adapter(self) -> IDataCollectionAdapter:
  40. return self._adapter
  41. def setStore(self, store: IDataStore) -> None:
  42. self._store = store
  43. def collect(self, keyword: str):
  44. items = self.adapter.search(self.driver, keyword)
  45. self.adapter.collect(self.driver, items, self.store)
  46. def close(self):
  47. self.logger.info(f"关闭浏览器驱动,URL: {self.adapter.url}")
  48. self.adapter.teardown(self.driver)
  49. def collectWithStore(self, keyword: str, store: IDataStore):
  50. self.setStore(store)
  51. self.collect(keyword)
  52. def _genAdapter(self, type: str, url: str):
  53. adapterModelName = self.config.get(f"adapter.{type}.model_name")
  54. adapterClassName = self.config.get(f"adapter.{type}.class_name")
  55. if adapterClassName:
  56. try:
  57. self.logger.info(
  58. f"生成适配器 TYPE:{type},适配器: {adapterClassName},URL:{url}")
  59. # 使用 importlib 动态导入模块
  60. adapterModule = importlib.import_module(
  61. f"adapters.{adapterModelName}")
  62. adapterClass = getattr(adapterModule, adapterClassName)
  63. adapter = adapterClass(url)
  64. except ImportError as e:
  65. raise ImportError(f"无法导入适配器模块 {adapterModelName}") from e
  66. except AttributeError as e:
  67. raise AttributeError(
  68. f"适配器模块 {adapterModelName} 中找不到类 {adapterClassName}"
  69. ) from e
  70. else:
  71. raise Exception("不支持的适配器类型")
  72. return adapter