哔哩哔哩Bv号获取信息小程序.py

哔哩哔哩Bv号获取信息小程序.py

四月 02, 2020
  • 学习python爬虫的第一个产物

    输入Bv号获得视频的信息

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import requests, bs4, re, os
from lxml import etree
url = input("输入BV号(BV12345678)\n")
url = 'https://www.bilibili.com/video/'+url
response = requests.get(url)
#print("the return code : " + str(response.status_code))
#BV1u7411S74d
soup = bs4.BeautifulSoup(response.text, "html.parser")
comic = soup.select('link')
length = len(comic)
#print(comic[0])
str1 = str(comic[0])
length_now = len(str1)
begin=0
for i in range(length_now):
if str1[i-2] == '/' and str1[i-1]=='a' and str1[i] == 'v':
begin = i + 1
if str1[i] == '/' and begin != 0:
final = i
break

avnum='av'+str1[begin:final]

comic = soup.select('title')
str2 = str(comic)
length_now = len(str2)
begin=0
for i in range(length_now):
if str2[i] == '>':
begin = i + 1
if str2[i] == '_' and begin != 0:
final = i
break

title = str2[begin:final]

comic = soup.select('a')

target = 0
for i in range(len(comic)):
if str(comic[i]).find("//space.bilibili.com") != -1:
target = i+1
break

str3 = str(comic[target])
length_now = len(str3)
begin1 = 0
begin2 = 0
upurl = ''
up = ''
final = 0
for i in range(length_now):
if str3[i] == '/' and str3[i-1] == '/' and str3[i-2] == '"':
begin1 = i + 1
if str3[i] == '"' and begin1 != 0 and final == 0:
final = i
upurl = str3[begin1:final]
if str3[i] == '>' and begin1 != 0:
begin2 = i+1
if str3[i] == '<' and begin2 != 0:
final = i
up=str3[begin2:final]
break

#comic = soup.select('div')
#for i in range(len(comic)):
# print("{}{}".format(i, comic[i]))

os.system("cls")
print('地址:'+url)
print('标题:'+title)
print('av号:'+avnum)
print('up主:'+up+' ('+upurl+')')
print('')
os.system('pause')