Python学习记录之简单交互（微博热搜）

前言

这次主要通过tkinter进行简单的交互操作，但感觉就目前看来这个功能没啥用处，于是没有深入研究。同时学习了一下更复杂的re正则表达式（虽然这次没用的上）

python 正则 re模块(详细版) - 风，又奈何 - 博客园 (cnblogs.com)

思路与实践

研究微博源码的时候发现意外的简单（并不是），以为用requests就可以轻松爬取，但爬取之后却返回了一个错误的源码

大概是微博的身份识别系统吧，经过测试发现在网页滞留几秒之后才能正常进入，于是使用selenium进行操作（requests应该也能做到，以后研究）

def Askurl(url):
    s = Service(r"E:\python\PyCharm Community Edition 2021.3.1\plugins\python-ce\helpers\typeshed\stubs\selenium\selenium\webdriver\chrome\chromedriver.exe")
    option = webdriver.ChromeOptions()
    option.add_argument('headless')  # 设置option，隐藏弹出的窗口
    driver = webdriver.Chrome(service=s,options=option)
    driver.get(url)
    #这一段可以直接改为time.sleep(6)
    print("请等待.....")
    for i in range(1,7):
        print(str(i) +"/6")
        time.sleep(1)
    #
    text = driver.page_source
    return text

可以正常获取源码

通过tkinter添加一个选择框，分别对应三个榜单

def jh():
    root = Tk()
    v = IntVar()
    root.geometry('300x100')
    def selection():
        se = str(v.get())
        url = 'link'
        if se=='1':
            url = 'https://s.weibo.com/top/summary/'
        elif se=='2':
            url = 'https://s.weibo.com/top/summary/summary?cate=socialevent'
        elif se=='3':
            url = 'https://s.weibo.com/top/summary/summary?cate=entrank'
        Gtedata(url)
        root.destroy()
    Language = [('热搜榜', 1),
                ('要闻榜', 2),
                ('文娱榜', 3),
                ]
    for lang, num in Language:
        b = Radiobutton(root, text=lang, variable=v, value=num, indicatoron=False, padx=30, pady=3,command=selection)
        l = Label(root, textvariable=v)
        b.pack(anchor=W, fill=X)
    mainloop()

运行结果：

附源码：

点击展开

from tkinter import *
import re
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import time
from bs4 import BeautifulSoup

def Askurl(url):
    s = Service(r"E:\python\PyCharm Community Edition 2021.3.1\plugins\python-ce\helpers\typeshed\stubs\selenium\selenium\webdriver\chrome\chromedriver.exe")
    option = webdriver.ChromeOptions()
    option.add_argument('headless')  # 设置option
    driver = webdriver.Chrome(service=s,options=option)
    driver.get(url)
    print("请等待.....")
    for i in range(1,7):
        print(str(i) +"/6")
        time.sleep(1)
    text = driver.page_source
    return text

datalist = []

def Gtedata(url):
    Findtitle = re.compile(r'">(.*?)</a>')
    Findlink = re.compile(r'<a href="(.*?)" target="_blank">')
    #
    html=Askurl(url)
    print(html)
    soup=BeautifulSoup(html,"html.parser")
    for item in soup.find_all('td',class_="td-02"):
        data = []
        item = str(item)

        title = re.findall(Findtitle, item)[0]
        data.append(title)

        if re.findall(Findlink,item):
            slink = re.findall(Findlink, item)[0]
            link = 'https://s.weibo.com' + str(slink)
            data.append(link)

        datalist.append(data)

    #输出测试
    for index in datalist:
        print(index)

def jh():
    root = Tk()
    v = IntVar()
    root.geometry('300x100')
    def selection():
        se = str(v.get())
        url = 'link'
        if se=='1':
            url = 'https://s.weibo.com/top/summary/'
        elif se=='2':
            url = 'https://s.weibo.com/top/summary/summary?cate=socialevent'
        elif se=='3':
            url = 'https://s.weibo.com/top/summary/summary?cate=entrank'
        Gtedata(url)
        root.destroy()
    Language = [('热搜榜', 1),
                ('要闻榜', 2),
                ('文娱榜', 3),
                ]
    for lang, num in Language:
        b = Radiobutton(root, text=lang, variable=v, value=num, indicatoron=False, padx=30, pady=3,command=selection)
        l = Label(root, textvariable=v)
        b.pack(anchor=W, fill=X)
    mainloop()

def main():
    jh()
if __name__ == "__main__":
    main()