安装环境(pytesseract识别图片上的字符串,主要用来识别验证码)
apt update && apt upgrade -y apt install python3 python3-pip chromium chromium-l10n chromium-driver tesseract-ocr xvfb xauth pip3 install selenium requests PyVirtualDisplay pytesseract
测试代码test.py(打开百度,输入python—截图—点击搜索—截图—输出网页源码):
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import time
from selenium import webdriver
from pyvirtualdisplay import Display
def main():
#虚拟显示分辨率
display = Display(visible=0, size=(1440, 900))
display.start()
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
#chrome的窗口大小
chrome_options.add_argument('--window-size=1280,720')
chrome_options.add_argument('--disable-gpu')
browser = webdriver.Chrome(chrome_options=chrome_options)
browser.get('https://www.baidu.com/')
browser.find_element_by_xpath('//*[@id="kw"]').send_keys('python')
browser.save_screenshot('input.png')
time.sleep(5)
browser.find_element_by_xpath('//*[@id="su"]').click()
time.sleep(5)
browser.save_screenshot('result.png')
print(browser.page_source)
browser.close()
display.stop()
if __name__ == '__main__':
main()
python3 test.py
参考:
https://blog.testproject.io/2018/02/20/chrome-headless-selenium-python-linux-servers/
https://cuiqingcai.com/4886.html