安装环境(pytesseract识别图片上的字符串,主要用来识别验证码)
apt update && apt upgrade -y apt install python3 python3-pip chromium chromium-l10n chromium-driver tesseract-ocr xvfb xauth pip3 install selenium requests PyVirtualDisplay pytesseract
测试代码test.py(打开百度,输入python—截图—点击搜索—截图—输出网页源码):
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import time from selenium import webdriver from pyvirtualdisplay import Display def main(): #虚拟显示分辨率 display = Display(visible=0, size=(1440, 900)) display.start() chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') chrome_options.add_argument('--no-sandbox') #chrome的窗口大小 chrome_options.add_argument('--window-size=1280,720') chrome_options.add_argument('--disable-gpu') browser = webdriver.Chrome(chrome_options=chrome_options) browser.get('https://www.baidu.com/') browser.find_element_by_xpath('//*[@id="kw"]').send_keys('python') browser.save_screenshot('input.png') time.sleep(5) browser.find_element_by_xpath('//*[@id="su"]').click() time.sleep(5) browser.save_screenshot('result.png') print(browser.page_source) browser.close() display.stop() if __name__ == '__main__': main()
python3 test.py
参考:
https://blog.testproject.io/2018/02/20/chrome-headless-selenium-python-linux-servers/
https://cuiqingcai.com/4886.html