1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
| from selenium import webdriver from selenium.webdriver.common.by import By import pymysql
if __name__ == '__main__': conn = pymysql.connect(host='localhost', user='root', password='root', charset='utf8mb4') cursor = conn.cursor()
cursor.execute("create database if not exists cartoon;") conn.select_db("cartoon")
cursor.execute("""CREATE TABLE IF NOT EXISTS news ( ct_rank VARCHAR(3) , ct_name VARCHAR(30), ct_tips VARCHAR(30), ct_play_num VARCHAR(10), ct_people VARCHAR(10), ct_date VARCHAR(20), ct_status VARCHAR(20), ct_point VARCHAR(5), ct_point_num VARCHAR(10) );""")
driver = webdriver.Chrome() driver.get("https://www.bilibili.com/v/popular/rank/bangumi")
neaItems=driver.find_elements(By.XPATH, "//*[@id='app']/div/div[2]/div[2]/ul/li") num= len(neaItems) print(num) work_url = []
# i = 0 for neaItem in neaItems: link_element = neaItem.find_element(By.XPATH, "div/div[1]/a") url = link_element.get_attribute("href") work_url.append(url) # i += 1 # if i == 5: # break print(work_url) driver.close()
elems = [] for i in work_url: driver1 = webdriver.Chrome() driver1.get(i) link_element = driver1.find_element(By.XPATH, "//*[@id='__next']/div[2]/div[2]/div[2]/div/div[2]/a") url = link_element.get_attribute("href") driver1.close() print(url) driver2 = webdriver.Chrome() driver2.get(url) rank = work_url.index(i) + 1 name = driver2.find_element(By.XPATH, "//*[@id='app']/div[1]/div[2]/div/div[2]/div[1]/span[1]").text tip = driver2.find_elements(By.XPATH, "//*[@id='app']/div[1]/div[2]/div/div[2]/div[1]/span[2]/span") tips = '' for i in tip: if tips == '': tips = i.text continue j = i.text tips = tips + ',' + j play_num = driver2.find_element(By.XPATH, "//*[@id='app']/div[1]/div[2]/div/div[2]/div[2]/div[1]/span[1]/em").text people = driver2.find_element(By.XPATH, "//*[@id='app']/div[1]/div[2]/div/div[2]/div[2]/div[1]/span[2]/em").text date = driver2.find_element(By.XPATH, "//*[@id='app']/div[1]/div[2]/div/div[2]/div[3]/span[1]").text status = driver2.find_element(By.XPATH, "//*[@id='app']/div[1]/div[2]/div/div[2]/div[3]/span[2]").text point = driver2.find_element(By.XPATH, "//*[@id='app']/div[1]/div[2]/div/div[2]/div[2]/div[2]/div/div[1]").text try: point_num = driver2.find_element(By.XPATH,"//*[@id='app']/div[1]/div[2]/div/div[2]/div[2]/div[2]/div/div[2]/div[1]").text except Exception : point_num = '评分人数不足' driver2.close() print(rank) print(tips) print(name) print(play_num) print(people) print(date) print(status) print(point) print(point_num) elem = { "rank":str(rank), "name":name, "tips":tips, "play_num":play_num, "people":people, "date":date, "status":status, "point":point, "point_num":point_num } elems.append(elem)
sql = "insert into news(ct_rank,ct_name,ct_tips,ct_play_num,ct_people,ct_date,ct_status,ct_point,ct_point_num) values(%s,%s,%s,%s,%s,%s,%s,%s,%s)" for item in elems: cursor.execute(sql, ( item['rank'], item['name'], item['tips'], item['play_num'], item['people'], item['date'], item['status'], item['point'], item['point_num'])) conn.commit() cursor.close() conn.close()
|