Debian 9/10 server无图形界面配置python3+selenium+chrome模拟浏览网页

安装环境(pytesseract识别图片上的字符串,主要用来识别验证码)

apt update && apt upgrade -y
apt install python3 python3-pip chromium chromium-l10n chromium-driver tesseract-ocr xvfb xauth
pip3 install selenium requests PyVirtualDisplay pytesseract

测试代码test.py(打开百度,输入python—截图—点击搜索—截图—输出网页源码):

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import time
from selenium import  webdriver
from pyvirtualdisplay import Display

def main():
    #虚拟显示分辨率
    display = Display(visible=0, size=(1440, 900))
    display.start()
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
    #chrome的窗口大小
    chrome_options.add_argument('--window-size=1280,720')
    chrome_options.add_argument('--disable-gpu')
    browser = webdriver.Chrome(chrome_options=chrome_options)
    browser.get('https://www.baidu.com/')
    browser.find_element_by_xpath('//*[@id="kw"]').send_keys('python')
    browser.save_screenshot('input.png')
    time.sleep(5)
    browser.find_element_by_xpath('//*[@id="su"]').click()
    time.sleep(5)
    browser.save_screenshot('result.png')
    print(browser.page_source)
    browser.close()
    display.stop()

if __name__ == '__main__':
    main()

python3 test.py

参考:
https://blog.testproject.io/2018/02/20/chrome-headless-selenium-python-linux-servers/
https://cuiqingcai.com/4886.html

上一篇
下一篇