#Date:2019.5.10
#author : isenwen
#describe : 电影天堂链接爬虫

import requests
import re
import time
import pymysql


headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'
}
BASE_URL='https://www.dytt8.net'





#获取每页的25个url
def get_url(url):
    ret = requests.get(url,headers=headers).content.decode('gbk')
    link_lists=re.findall(r'<a\shref="(.*?)"\sc',ret)
    detail_urls = map(lambda url:BASE_URL+url ,link_lists)
    return detail_urls#返回的是一个map  列表,直接输入的不行的 要用for XX in XX :输出

#获取每页的下载链接
def get_parse(detail_url):
    ret = requests.get(detail_url,headers=headers).content.decode('gbk')
    download_link=re.findall(r'<td\sstyle="WORD-WRAP.*?"><a.*?>(.*?)</a></td>',ret,re.DOTALL)

    return download_link


def get_save(downloadlink):
    # 测试链接数据库
    connet = pymysql.connect(
        host="localhost",
        user='root',
        password='root',
        database='test',
        port=3306
    )
    cursor = connet.cursor()


    sql='''
    insert into movies(id,link) values(null ,"%s")
    
    '''
    link=downloadlink

    cursor.execute(sql,link)
    connet.commit()
    connet.close()


def main():
    first_time=time.time()
    pages=int(input('输入你要下载的页数:'))
    for page in range(pages+1):
        url = 'https://www.dytt8.net/html/gndy/dyzz/list_23_{}.html'.format(page)
        detail_urls=get_url(url)
        for detail_url in detail_urls:
            download_link=get_parse(detail_url)
            print(download_link)
            get_save(download_link)

    last_time=time.time()
    print("保存完毕!,花费时间%s"%(last_time-first_time))






if __name__ == '__main__':
    main()