用Python读取yml文件并抓取友链的文章数

获取文章数这个过程太麻烦了，因为需要经常更新，每个人的更新速度不同，就先写了一个获取butterfly主题的友链的文章数读取。以后有时间再完善一下，支持更多主题吧。

效果

代码

getLinkList函数中open打开的地址换成yml文件地址

import urllib
from urllib.request import urlopen
from distutils.filelist import findall
from bs4 import BeautifulSoup
import yaml

def getLinkList():
    f = open('/Users/zhheo/Desktop/我的项目/blog/zhheo/source/_data/link.yml', 'r')
    ystr = f.read()

    ymllist = yaml.load(ystr, Loader=yaml.FullLoader)
    for item in ymllist:
        for link in item['link_list']:
            try:
                count = getCount(link['link'])
            except:
                count = '???'
            print(link['name'] + count + '：' + link['link'])
    

def getCount(site):
    if not site.endswith('/'):
        site += '/'
    # html
    headers = {'user-agent':'mozilla/5.0'}
    add = urllib.request.Request(url=site,headers=headers)
    htmlr = urllib.request.urlopen(url=add,timeout=10)
    html = htmlr.read()
    soup = BeautifulSoup(html, "html.parser")

    # print(html)
    # postcount = soup.find('div', class_='length-num').get_text()
    webinfo = soup.find('div', class_='webinfo-item')
    webinfopostcount = webinfo.find('div', class_='item-count').get_text()

    if webinfopostcount:
        return webinfopostcount
    else:
        return "???"

getLinkList()