|
@@ -3,7 +3,7 @@ from time import sleep
|
|
|
from selenium import webdriver
|
|
|
from selenium.webdriver.common.by import By
|
|
|
from selenium.webdriver.support.wait import WebDriverWait
|
|
|
-from selenium.webdriver.support import expected_conditions as EC
|
|
|
+from selenium.webdriver.support import expected_conditions as ec
|
|
|
from selenium.common.exceptions import TimeoutException, NoSuchElementException
|
|
|
|
|
|
from drivers.driver_creator import DriverCreator
|
|
@@ -41,29 +41,29 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
@property
|
|
|
def driver(self):
|
|
|
if not self._driver:
|
|
|
- self._driver = self.createDriver()
|
|
|
+ self._driver = self.create_driver()
|
|
|
return self._driver
|
|
|
|
|
|
- def createDriver(self) -> webdriver:
|
|
|
+ def create_driver(self) -> webdriver:
|
|
|
try:
|
|
|
- return DriverCreator().GenRemoteDriver(self.url)
|
|
|
+ return DriverCreator().gen_remote_driver(self.url)
|
|
|
except Exception as e:
|
|
|
raise Exception(f"创建驱动器失败: {e}")
|
|
|
|
|
|
def login(self, driver, username: str, password: str) -> None:
|
|
|
try:
|
|
|
- loginEl = driver.find_element(
|
|
|
+ login_el = driver.find_element(
|
|
|
By.XPATH, "//div[@id='loginRight']/a[@class='login']")
|
|
|
- loginEl.click()
|
|
|
+ login_el.click()
|
|
|
wait = WebDriverWait(driver, 10, 1)
|
|
|
- wait.until(EC.presence_of_element_located((By.ID, "userpass")))
|
|
|
- unEl = driver.find_element(By.ID, "username")
|
|
|
- unEl.send_keys(username)
|
|
|
- passEl = driver.find_element(By.ID, "userpass")
|
|
|
- passEl.send_keys(password)
|
|
|
- loginBtn = driver.find_element(By.ID, "login-button")
|
|
|
- loginBtn.click()
|
|
|
- wait.until(EC.presence_of_element_located((By.ID, "site-content")))
|
|
|
+ wait.until(ec.presence_of_element_located((By.ID, "userpass")))
|
|
|
+ un_el = driver.find_element(By.ID, "username")
|
|
|
+ un_el.send_keys(username)
|
|
|
+ pass_el = driver.find_element(By.ID, "userpass")
|
|
|
+ pass_el.send_keys(password)
|
|
|
+ login_btn = driver.find_element(By.ID, "login-button")
|
|
|
+ login_btn.click()
|
|
|
+ wait.until(ec.presence_of_element_located((By.ID, "site-content")))
|
|
|
except TimeoutException as e:
|
|
|
raise Exception(f"登录失败 [超时]: {e}")
|
|
|
except NoSuchElementException as e:
|
|
@@ -74,25 +74,25 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
self._keyword = keyword
|
|
|
wait = WebDriverWait(driver, 10, 1)
|
|
|
wait.until(
|
|
|
- EC.presence_of_element_located((By.ID, "projSearchForm")))
|
|
|
- searchEl = driver.find_element(By.ID, "fullText")
|
|
|
- searchEl.send_keys(keyword)
|
|
|
- searchBtn = driver.find_element(
|
|
|
+ ec.presence_of_element_located((By.ID, "projSearchForm")))
|
|
|
+ search_el = driver.find_element(By.ID, "fullText")
|
|
|
+ search_el.send_keys(keyword)
|
|
|
+ search_btn = driver.find_element(
|
|
|
By.XPATH, "//form[@id='projSearchForm']/button")
|
|
|
- searchBtn.click()
|
|
|
- wait.until(EC.presence_of_element_located((By.ID, "site-content")))
|
|
|
+ search_btn.click()
|
|
|
+ wait.until(ec.presence_of_element_located((By.ID, "site-content")))
|
|
|
# 查询3天内的数据
|
|
|
search_txt = ConfigHelper().get("adapter.chinabidding.search_day")
|
|
|
if not search_txt:
|
|
|
search_txt = "近三天"
|
|
|
self.logger.info(f"搜索关键字: {keyword},搜索条件: {search_txt}")
|
|
|
- lastEl = driver.find_element(By.LINK_TEXT, search_txt)
|
|
|
- lastEl.click()
|
|
|
- wait.until(EC.presence_of_element_located((By.ID, "site-content")))
|
|
|
+ last_el = driver.find_element(By.LINK_TEXT, search_txt)
|
|
|
+ last_el.click()
|
|
|
+ wait.until(ec.presence_of_element_located((By.ID, "site-content")))
|
|
|
try:
|
|
|
- aLinks = driver.find_elements(
|
|
|
+ a_links = driver.find_elements(
|
|
|
By.XPATH, "//form[@id='pagerSubmitForm']/a")
|
|
|
- count = len(aLinks)
|
|
|
+ count = len(a_links)
|
|
|
if count > 1:
|
|
|
count = count - 1
|
|
|
self.logger.info(f"共查询到 {count} 页")
|
|
@@ -106,7 +106,7 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
except NoSuchElementException as e:
|
|
|
raise Exception(f"搜索失败 [找不到元素]: {e}")
|
|
|
|
|
|
- def collect(self, driver, items: list, store: IDataStore) -> list:
|
|
|
+ def collect(self, driver, items: list, store: IDataStore) :
|
|
|
if store:
|
|
|
self._store = store
|
|
|
self._process_list(driver, items)
|
|
@@ -115,12 +115,12 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
def _next_page(self, driver) -> list:
|
|
|
try:
|
|
|
wait = WebDriverWait(driver, 10, 1)
|
|
|
- nextPath = "//form[@id='pagerSubmitForm']/a[@class='next']"
|
|
|
- wait.until(EC.presence_of_element_located((By.XPATH, nextPath)))
|
|
|
- btn = driver.find_element(By.XPATH, nextPath)
|
|
|
+ next_path = "//form[@id='pagerSubmitForm']/a[@class='next']"
|
|
|
+ wait.until(ec.presence_of_element_located((By.XPATH, next_path)))
|
|
|
+ btn = driver.find_element(By.XPATH, next_path)
|
|
|
btn.click()
|
|
|
self.logger.info(f"跳转到下页: {driver.current_url}")
|
|
|
- wait.until(EC.presence_of_element_located((By.ID, "site-content")))
|
|
|
+ wait.until(ec.presence_of_element_located((By.ID, "site-content")))
|
|
|
items = driver.find_elements(By.XPATH,
|
|
|
"//ul[@class='as-pager-body']/li/a")
|
|
|
return items
|
|
@@ -131,8 +131,8 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
return []
|
|
|
|
|
|
def _process_item(self, driver, item):
|
|
|
+ current_handle = driver.current_window_handle
|
|
|
try:
|
|
|
- currentHandle = driver.current_window_handle
|
|
|
url = item.get_attribute('href')
|
|
|
old = self.store.query_one_collect_by_url(url)
|
|
|
if old:
|
|
@@ -140,15 +140,15 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
return
|
|
|
item.click()
|
|
|
wait = WebDriverWait(driver, 10, 1)
|
|
|
- wait.until(EC.number_of_windows_to_be(2))
|
|
|
+ wait.until(ec.number_of_windows_to_be(2))
|
|
|
handles = driver.window_handles
|
|
|
for handle in handles:
|
|
|
- if handle != currentHandle:
|
|
|
+ if handle != current_handle:
|
|
|
driver.switch_to.window(handle)
|
|
|
break
|
|
|
url = driver.current_url
|
|
|
self.logger.info(f"跳转详情: {driver.current_url}")
|
|
|
- wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
|
|
|
+ wait.until(ec.presence_of_element_located((By.TAG_NAME, "body")))
|
|
|
content = driver.find_element(By.TAG_NAME, "body").text
|
|
|
self._save(url, content)
|
|
|
sleep(1)
|
|
@@ -163,7 +163,7 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
f"采集发生异常 NoSuchElement: {driver.current_url}。Exception: {e}")
|
|
|
raise Exception(f"采集失败 [找不到元素]: {e}")
|
|
|
finally:
|
|
|
- driver.switch_to.window(currentHandle)
|
|
|
+ driver.switch_to.window(current_handle)
|
|
|
|
|
|
def _save(self, url, content):
|
|
|
# self.logger.info(f"保存数据: {url},关键字{self.keyword}")
|