data_collector.py 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. import importlib
  2. from selenium import webdriver
  3. import adapters
  4. import utils
  5. from adapters.data_collection_adapter_interface import IDataCollectionAdapter
  6. from stores.data_store_interface import IDataStore
  7. from stores.default_data_store import DefaultDataStore
  8. class DataCollector:
  9. _adapter = None
  10. _driver = None
  11. _store = None
  12. def __init__(
  13. self, adapter_type: str, url: str, un: str, up: str, store: IDataStore = None
  14. ):
  15. self._adapter = self._gen_adapter(adapter_type, url)
  16. self._driver = self.adapter.driver
  17. # if type == "chinabidding":
  18. # return
  19. self.adapter.login(un, up)
  20. if store:
  21. self._store = store
  22. else:
  23. self._store = DefaultDataStore()
  24. @property
  25. def driver(self) -> webdriver:
  26. return self._driver
  27. @property
  28. def store(self) -> IDataStore:
  29. return self._store
  30. @property
  31. def adapter(self) -> IDataCollectionAdapter:
  32. return self._adapter
  33. def set_store(self, store: IDataStore) -> None:
  34. self._store = store
  35. def collect(self, keyword: str):
  36. adapters.collect(self.adapter, keyword, self.store)
  37. def close(self):
  38. utils.get_logger().info(f"关闭浏览器驱动,URL: {self.adapter.url}")
  39. adapters.teardown(self.adapter)
  40. @staticmethod
  41. def _gen_adapter(adapter_type: str, url: str):
  42. adapter_model_name = utils.get_config_value(
  43. f"adapter.{adapter_type}.model_name"
  44. )
  45. adapter_class_name = utils.get_config_value(
  46. f"adapter.{adapter_type}.class_name"
  47. )
  48. if adapter_class_name:
  49. try:
  50. utils.get_logger().info(
  51. f"生成适配器 TYPE:{adapter_type},适配器: {adapter_class_name},URL:{url}"
  52. )
  53. # 使用 importlib 动态导入模块
  54. adapter_module = importlib.import_module(
  55. f"adapters.{adapter_model_name}"
  56. )
  57. adapter_class = getattr(adapter_module, adapter_class_name)
  58. adapter = adapter_class(url)
  59. except ImportError as e:
  60. raise ImportError(f"无法导入适配器模块 {adapter_model_name}") from e
  61. except AttributeError as e:
  62. raise AttributeError(
  63. f"适配器模块 {adapter_model_name} 中找不到类 {adapter_class_name}"
  64. ) from e
  65. else:
  66. raise Exception("不支持的适配器类型")
  67. return adapter