|
@@ -3,7 +3,6 @@ from time import sleep
|
|
|
from selenium.common.exceptions import TimeoutException, NoSuchElementException
|
|
|
from selenium.webdriver.common.by import By
|
|
|
from selenium.webdriver.support import expected_conditions as ec
|
|
|
-from selenium.webdriver.support.wait import WebDriverWait
|
|
|
|
|
|
import utils
|
|
|
from adapters.data_collection_adapter_interface import IDataCollectionAdapter
|
|
@@ -21,6 +20,7 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
self._driver = None
|
|
|
self._keyword = None
|
|
|
self._adapter_type = "chinabidding"
|
|
|
+ self._next_count = 0
|
|
|
|
|
|
def login(self, username: str, password: str) -> None:
|
|
|
try:
|
|
@@ -28,24 +28,20 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
By.XPATH, "//div[@id='loginRight']/a[@class='login']"
|
|
|
)
|
|
|
login_el.click()
|
|
|
- wait = WebDriverWait(self.driver, 10, 1)
|
|
|
- wait.until(ec.presence_of_element_located((By.ID, "userpass")))
|
|
|
+ self._wait_until(ec.presence_of_element_located((By.ID, "userpass")))
|
|
|
un_el = self.driver.find_element(By.ID, "username")
|
|
|
un_el.send_keys(username)
|
|
|
pass_el = self.driver.find_element(By.ID, "userpass")
|
|
|
pass_el.send_keys(password)
|
|
|
login_btn = self.driver.find_element(By.ID, "login-button")
|
|
|
login_btn.click()
|
|
|
- wait.until(ec.presence_of_element_located((By.ID, "site-content")))
|
|
|
+ self._wait_until(ec.presence_of_element_located((By.ID, "site-content")))
|
|
|
except TimeoutException as e:
|
|
|
raise Exception(f"登录失败 [{self._adapter_type}] [超时]: {e}")
|
|
|
except NoSuchElementException as e:
|
|
|
raise Exception(f"登录失败 [{self._adapter_type}] [找不到元素]: {e}")
|
|
|
|
|
|
- def collect(self, keyword: str, store: IDataStore):
|
|
|
- if store:
|
|
|
- self._store = store
|
|
|
- self._keyword = keyword
|
|
|
+ def _collect(self, keyword: str):
|
|
|
items = self._search_by_type(keyword, 0)
|
|
|
self._process_list(items, 0)
|
|
|
sleep(2)
|
|
@@ -68,8 +64,7 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
By.XPATH, "//div[@id='z-b-jg-gg']/h2/a[@class='more']"
|
|
|
)
|
|
|
el.click()
|
|
|
- wait = WebDriverWait(self.driver, 10, 1)
|
|
|
- wait.until(ec.number_of_windows_to_be(2))
|
|
|
+ self._wait_until(ec.number_of_windows_to_be(2))
|
|
|
self.driver.close()
|
|
|
self.driver.switch_to.window(self.driver.window_handles[0])
|
|
|
return self._search(keyword)
|
|
@@ -79,8 +74,7 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
raise Exception(f"搜索失败 [{self._adapter_type}] [找不到元素]: {e}")
|
|
|
|
|
|
def _search(self, keyword: str) -> list:
|
|
|
- wait = WebDriverWait(self.driver, 10, 1)
|
|
|
- wait.until(ec.presence_of_element_located((By.ID, "searchBidProjForm")))
|
|
|
+ self._wait_until(ec.presence_of_element_located((By.ID, "searchBidProjForm")))
|
|
|
search_el = self.driver.find_element(
|
|
|
By.XPATH, "//form[@id='searchBidProjForm']/ul/li/input[@id='fullText']"
|
|
|
)
|
|
@@ -90,15 +84,16 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
By.XPATH, "//form[@id='searchBidProjForm']/ul/li/button"
|
|
|
)
|
|
|
search_btn.click()
|
|
|
- wait.until(ec.presence_of_element_located((By.ID, "site-content")))
|
|
|
+ self._next_count = 0
|
|
|
+ self._wait_until(ec.presence_of_element_located((By.ID, "site-content")))
|
|
|
default_search_txt = "全部"
|
|
|
search_txt = utils.get_config_value(self.search_day_key, default_search_txt)
|
|
|
- utils.get_logger().info(f"搜索关键字: {keyword},搜索条件: {search_txt}")
|
|
|
+ utils.get_logger().debug(f"搜索日期条件: {search_txt}")
|
|
|
if search_txt != default_search_txt:
|
|
|
last_el = self.driver.find_element(By.LINK_TEXT, search_txt)
|
|
|
sleep(1)
|
|
|
last_el.click()
|
|
|
- wait.until(ec.presence_of_element_located((By.ID, "site-content")))
|
|
|
+ self._wait_until(ec.presence_of_element_located((By.ID, "site-content")))
|
|
|
else:
|
|
|
sleep(1)
|
|
|
try:
|
|
@@ -108,7 +103,7 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
count = len(a_links)
|
|
|
if count > 1:
|
|
|
count = count - 1
|
|
|
- utils.get_logger().info(f"共查询到 {count} 页,每页 10 条")
|
|
|
+ utils.get_logger().debug(f"共查询到 {count} 页,每页 10 条")
|
|
|
except Exception as e:
|
|
|
utils.get_logger().error(f"搜索失败[尝试查询页数]: {e}")
|
|
|
items = self.driver.find_elements(By.XPATH, "//ul[@class='as-pager-body']/li/a")
|
|
@@ -125,17 +120,19 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
|
|
|
def _next_page(self) -> list:
|
|
|
try:
|
|
|
- wait = WebDriverWait(self.driver, 10, 1)
|
|
|
try:
|
|
|
btn = self.driver.find_element(
|
|
|
By.XPATH, "//form[@id='pagerSubmitForm']/a[@class='next']"
|
|
|
)
|
|
|
except NoSuchElementException:
|
|
|
- utils.get_logger().info(f"翻页结束 [{self._adapter_type}]")
|
|
|
+ utils.get_logger().debug(f"翻页结束 [{self._adapter_type}]")
|
|
|
return []
|
|
|
btn.click()
|
|
|
- utils.get_logger().info(f"跳转到下页: {self.driver.current_url}")
|
|
|
- wait.until(ec.presence_of_element_located((By.ID, "site-content")))
|
|
|
+ self._next_count += 1
|
|
|
+ utils.get_logger().debug(
|
|
|
+ f"下一页[{self._next_count+1}]: {self.driver.current_url}"
|
|
|
+ )
|
|
|
+ self._wait_until(ec.presence_of_element_located((By.ID, "site-content")))
|
|
|
items = self.driver.find_elements(
|
|
|
By.XPATH, "//ul[@class='as-pager-body']/li/a"
|
|
|
)
|
|
@@ -143,7 +140,7 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
except NoSuchElementException as e:
|
|
|
raise Exception(f"翻页失败 [{self._adapter_type}] [找不到元素]: {e}")
|
|
|
except TimeoutException as e:
|
|
|
- raise Exception(f"翻页结束 [{self._adapter_type}] [超时]: {e}")
|
|
|
+ raise Exception(f"翻页失败 [{self._adapter_type}] [超时]: {e}")
|
|
|
|
|
|
def _process_item(self, item, data_type):
|
|
|
main_handle = self.driver.current_window_handle
|
|
@@ -154,17 +151,15 @@ class ChinabiddingDataCollectionAdapter(IDataCollectionAdapter):
|
|
|
close = False
|
|
|
return
|
|
|
item.click()
|
|
|
- wait = WebDriverWait(self.driver, 10, 1)
|
|
|
- wait.until(ec.number_of_windows_to_be(2))
|
|
|
+ self._wait_until(ec.number_of_windows_to_be(2))
|
|
|
handles = self.driver.window_handles
|
|
|
for handle in handles:
|
|
|
if handle != main_handle:
|
|
|
self.driver.switch_to.window(handle)
|
|
|
break
|
|
|
url = self.driver.current_url
|
|
|
- # utils.get_logger().info(f"跳转详情")
|
|
|
- print(".", end="")
|
|
|
- wait.until(ec.presence_of_element_located((By.CLASS_NAME, "content")))
|
|
|
+ utils.get_logger().debug(f"跳转详情")
|
|
|
+ self._wait_until(ec.presence_of_element_located((By.CLASS_NAME, "content")))
|
|
|
content = self.driver.find_element(By.CLASS_NAME, "content").text
|
|
|
if self._check_content(content):
|
|
|
self._save_db(url, content, data_type)
|