diff --git a/douban.py b/douban.py new file mode 100644 index 0000000..e1fcb9a --- /dev/null +++ b/douban.py @@ -0,0 +1,169 @@ +import urllib.request +import urllib.error +import urllib.parse +from bs4 import BeautifulSoup +import re +import xlwt +import sqlite3 + + +def main(): + baseurl="https://movie.douban.com/top250?start=" + datalist=getdata(baseurl) + #savepath=".\\豆瓣电影top250.xls" + #savedata(datalist,savepath) + dbpath="movie.db" + savedata2DB(datalist,dbpath) + + + +#影片详情链接的规则 +findlink=re.compile(r'') #创建正则表达式规则 + +#影片图片 +findImSrc=re.compile(r'(.*)') + +#影片评分 +findRating=re.compile(r'(.*)') + +#评价人数 +findJudge=re.compile(r'(\d*)人评价') + +#找到概况 +findInq=re.compile(r'(.*)') + +#找到影片的相关内容 +findBd=re.compile(r'

(.*?)

',re.S) + + +def getdata(baseurl): + datalist=[] + for i in range(0,10): + url=baseurl+str(i*25) + html=askurl(url) + soup=BeautifulSoup(html,"html.parser") + for item in soup.find_all('div',class_="item"): #查找符合要求的字符串,形成列表 + #print(item) #测试查看电影item信息 + data=[] #保存一部电影的全部信息 + item=str(item) + + #影片详情链接 + link=re.findall(findlink,item)[0] #通过正则表达式查找指定字符串 + data.append(link) + imgSrc=re.findall(findImSrc,item)[0] + data.append(imgSrc) + titles=re.findall(findTitle,item) + if(len(titles)==2): + ctitle=titles[0] + data.append(ctitle) + otitle=titles[1].replace("/","") + data.append(otitle) + else: + data.append(titles[0]) + data.append(' ') + + rating=re.findall(findRating,item)[0] + data.append(rating) + + judgenum=re.findall(findJudge,item)[0] + data.append(judgenum) + + inq=re.findall(findInq,item) + if len(inq)!=0: + inq=inq[0].replace("。","") + data.append(inq) + else: + data.append(" ") + + bd=re.findall(findBd,item)[0] + bd=re.sub('(\s+)?'," ",bd) + bd=re.sub("/"," ",bd) + data.append(bd.strip()) + + datalist.append(data) + + print(datalist) + return datalist + +def savedata(datalist,savepath): + print("save....") + book = xlwt.Workbook(encoding="utf-8",style_compression=0) # 创建workbook对象 + sheet = book.add_sheet('豆瓣电影Top250',cell_overwrite_ok=True) + col=('电影详情链接',"图片链接","影片中文名","影片外国名","评分","评价数","概况","相关信息") + for i in range(0,8): + sheet.write(0,i,col[i]) + for i in range(0,250): + print("%d"%(i+1)) + data=datalist[i] + for j in range(0,8): + sheet.write(i+1,j,data[j]) + + book.save(savepath) + +def savedata2DB(datalist,dbpath): + init_db(dbpath) + conn=sqlite3.connect(dbpath) + cursor=conn.cursor() + for data in datalist: + for index in range(len(data)): + if index==4 or index==5: + continue + data[index]='"'+data[index]+'"' + sql=''' + insert into movie250( + info_link,pic_link,cname,ename,score,rated,introduction,info + ) + values(%s)'''%",".join(data) + print(sql) + cursor.execute(sql) + conn.commit() + cursor.close() + conn.close() + + + print("") + +def init_db(dbpath): + sql=''' + create table movie250( + id integer primary key autoincrement, + info_link text, + pic_link text, + cname varchar, + ename varchar, + score numeric, + rated numeric, + introduction text, + info text + + ) + + ''' + conn=sqlite3.connect(dbpath) + cursor=conn.cursor() + cursor.execute(sql) + conn.commit() + conn.close() + +def askurl(url): + head={ + "User-Agent" :"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" + } + request=urllib.request.Request(url,headers=head) + html="" + try: + response=urllib.request.urlopen(request) + html=response.read().decode("utf-8") + except urllib.error.URLError as e: + if hasattr(e,"code"): + print(e.code) + if hasattr(e,"reason"): + print(e.reason) + return html + +if __name__ =="__main__": + main() + print("爬取完毕") \ No newline at end of file diff --git a/movie.db b/movie.db new file mode 100644 index 0000000..4fc6e59 Binary files /dev/null and b/movie.db differ diff --git a/test/1.py b/test/1.py new file mode 100644 index 0000000..6394697 --- /dev/null +++ b/test/1.py @@ -0,0 +1,369 @@ +import turtle + +turtle.title('PythonBingDwenDwen') + +turtle.speed(1) # 速度 + + # 左手 +turtle.penup() +turtle.goto(177, 112) +turtle.pencolor("lightgray") +turtle.pensize(3) +turtle.fillcolor("white") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(80) +turtle.circle(-45, 200) +turtle.circle(-300, 23) +turtle.end_fill() + + # 左手内 +turtle.penup() +turtle.goto(182, 95) +turtle.pencolor("black") +turtle.pensize(1) +turtle.fillcolor("black") +turtle.begin_fill() +turtle.setheading(95) +turtle.pendown() +turtle.circle(-37, 160) +turtle.circle(-20, 50) +turtle.circle(-200, 30) +turtle.end_fill() + # 轮廓 + # 头顶 +turtle.penup() +turtle.goto(-73, 230) +turtle.pencolor("lightgray") +turtle.pensize(3) +turtle.fillcolor("white") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(20) +turtle.circle(-250, 35) + # 左耳 +turtle.setheading(50) +turtle.circle(-42, 180) + # 左侧 +turtle.setheading(-50) +turtle.circle(-190, 30) +turtle.circle(-320, 45) + # 左腿 +turtle.circle(120, 30) +turtle.circle(200, 12) +turtle.circle(-18, 85) +turtle.circle(-180, 23) +turtle.circle(-20, 110) +turtle.circle(15, 115) +turtle.circle(100, 12) + # 右腿 +turtle.circle(15, 120) +turtle.circle(-15, 110) +turtle.circle(-150, 30) +turtle.circle(-15, 70) +turtle.circle(-150, 10) +turtle.circle(200, 35) +turtle.circle(-150, 20) + # 右手 +turtle.setheading(-120) +turtle.circle(50, 30) +turtle.circle(-35, 200) +turtle.circle(-300, 23) + # 右侧 +turtle.setheading(86) +turtle.circle(-300, 26) + # 右耳 +turtle.setheading(122) +turtle.circle(-53, 160) +turtle.end_fill() + + # 右耳内 +turtle.penup() +turtle.goto(-130, 180) +turtle.pencolor("black") +turtle.pensize(1) +turtle.fillcolor("black") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(120) +turtle.circle(-28, 160) +turtle.setheading(210) +turtle.circle(150, 20) +turtle.end_fill() + + # 左耳内 +turtle.penup() +turtle.goto(90, 230) +turtle.setheading(40) +turtle.begin_fill() +turtle.pendown() +turtle.circle(-30, 170) +turtle.setheading(125) +turtle.circle(150, 23) +turtle.end_fill() + + # 右手内 +turtle.penup() +turtle.goto(-180, -55) +turtle.fillcolor("black") +turtle.begin_fill() +turtle.setheading(-120) +turtle.pendown() +turtle.circle(50, 30) +turtle.circle(-27, 200) +turtle.circle(-300, 20) +turtle.setheading(-90) +turtle.circle(300, 14) +turtle.end_fill() + + # 左腿内 +turtle.penup() +turtle.goto(108, -168) +turtle.fillcolor("black") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(-115) +turtle.circle(110, 15) +turtle.circle(200, 10) +turtle.circle(-18, 80) +turtle.circle(-180, 13) +turtle.circle(-20, 90) +turtle.circle(15, 60) +turtle.setheading(42) +turtle.circle(-200, 29) +turtle.end_fill() + # 右腿内 +turtle.penup() +turtle.goto(-38, -210) +turtle.fillcolor("black") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(-155) +turtle.circle(15, 100) +turtle.circle(-10, 110) +turtle.circle(-100, 30) +turtle.circle(-15, 65) +turtle.circle(-100, 10) +turtle.circle(200, 15) +turtle.setheading(-14) +turtle.circle(-200, 27) +turtle.end_fill() + + # 右眼 + # 眼圈 +turtle.penup() +turtle.goto(-64, 120) +turtle.begin_fill() +turtle.pendown() +turtle.setheading(40) +turtle.circle(-35, 152) +turtle.circle(-100, 50) +turtle.circle(-35, 130) +turtle.circle(-100, 50) +turtle.end_fill() + # 眼珠 +turtle.penup() +turtle.goto(-47, 55) +turtle.fillcolor("white") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(0) +turtle.circle(25, 360) +turtle.end_fill() +turtle.penup() +turtle.goto(-45, 62) +turtle.pencolor("darkslategray") +turtle.fillcolor("darkslategray") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(0) +turtle.circle(19, 360) +turtle.end_fill() +turtle.penup() +turtle.goto(-45, 68) +turtle.fillcolor("black") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(0) +turtle.circle(10, 360) +turtle.end_fill() +turtle.penup() +turtle.goto(-47, 86) +turtle.pencolor("white") +turtle.fillcolor("white") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(0) +turtle.circle(5, 360) +turtle.end_fill() + + # 左眼 + # 眼圈 +turtle.penup() +turtle.goto(51, 82) +turtle.fillcolor("black") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(120) +turtle.circle(-32, 152) +turtle.circle(-100, 55) +turtle.circle(-25, 120) +turtle.circle(-120, 45) +turtle.end_fill() + # 眼珠 +turtle.penup() +turtle.goto(79, 60) +turtle.fillcolor("white") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(0) +turtle.circle(24, 360) +turtle.end_fill() +turtle.penup() +turtle.goto(79, 64) +turtle.pencolor("darkslategray") +turtle.fillcolor("darkslategray") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(0) +turtle.circle(19, 360) +turtle.end_fill() +turtle.penup() +turtle.goto(79, 70) +turtle.fillcolor("black") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(0) +turtle.circle(10, 360) +turtle.end_fill() +turtle.penup() +turtle.goto(79, 88) +turtle.pencolor("white") +turtle.fillcolor("white") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(0) +turtle.circle(5, 360) +turtle.end_fill() + + # 鼻子 +turtle.penup() +turtle.goto(37, 80) +turtle.fillcolor("black") +turtle.begin_fill() +turtle.pendown() +turtle.circle(-8, 130) +turtle.circle(-22, 100) +turtle.circle(-8, 130) +turtle.end_fill() + + # 嘴 +turtle.penup() +turtle.goto(-15, 48) +turtle.setheading(-36) +turtle.begin_fill() +turtle.pendown() +turtle.circle(60, 70) +turtle.setheading(-132) +turtle.circle(-45, 100) +turtle.end_fill() + + # 彩虹圈 +turtle.penup() +turtle.goto(-135, 120) +turtle.pensize(5) +turtle.pencolor("cyan") +turtle.pendown() +turtle.setheading(60) +turtle.circle(-165, 150) +turtle.circle(-130, 78) +turtle.circle(-250, 30) +turtle.circle(-138, 105) +turtle.penup() +turtle.goto(-131, 116) +turtle.pencolor("slateblue") +turtle.pendown() +turtle.setheading(60) +turtle.circle(-160, 144) +turtle.circle(-120, 78) +turtle.circle(-242, 30) +turtle.circle(-135, 105) +turtle.penup() +turtle.goto(-127, 112) +turtle.pencolor("orangered") +turtle.pendown() +turtle.setheading(60) +turtle.circle(-155, 136) +turtle.circle(-116, 86) +turtle.circle(-220, 30) +turtle.circle(-134, 103) +turtle.penup() +turtle.goto(-123, 108) +turtle.pencolor("gold") +turtle.pendown() +turtle.setheading(60) +turtle.circle(-150, 136) +turtle.circle(-104, 86) +turtle.circle(-220, 30) +turtle.circle(-126, 102) +turtle.penup() +turtle.goto(-120, 104) +turtle.pencolor("greenyellow") +turtle.pendown() +turtle.setheading(60) +turtle.circle(-145, 136) +turtle.circle(-90, 83) +turtle.circle(-220, 30) +turtle.circle(-120, 100) +turtle.penup() + + # 爱心 +turtle.penup() +turtle.goto(220, 115) +turtle.pencolor("brown") +turtle.pensize(1) +turtle.fillcolor("brown") +turtle.begin_fill() +turtle.pendown() +turtle.setheading(36) +turtle.circle(-8, 180) +turtle.circle(-60, 24) +turtle.setheading(110) +turtle.circle(-60, 24) +turtle.circle(-8, 180) +turtle.end_fill() + + # 五环 +turtle.penup() +turtle.goto(-5, -170) +turtle.pendown() +turtle.pencolor("blue") +turtle.circle(6) +turtle.penup() +turtle.goto(10, -170) +turtle.pendown() +turtle.pencolor("black") +turtle.circle(6) +turtle.penup() +turtle.goto(25, -170) +turtle.pendown() +turtle.pencolor("brown") +turtle.circle(6) +turtle.penup() +turtle.goto(2, -175) +turtle.pendown() +turtle.pencolor("lightgoldenrod") +turtle.circle(6) +turtle.penup() +turtle.goto(16, -175) +turtle.pendown() +turtle.pencolor("green") +turtle.circle(6) +turtle.penup() + +turtle.pencolor("black") +turtle.goto(-16, -160) +turtle.write("BEIJING 2022", font=('Arial', 10, 'bold italic')) +turtle.hideturtle() + +turtle.done() \ No newline at end of file diff --git a/test/demo1.py b/test/demo1.py new file mode 100644 index 0000000..0fd0c25 --- /dev/null +++ b/test/demo1.py @@ -0,0 +1,29 @@ +import urllib.request +import urllib.parse + +#response =urllib.request.urlopen("http://www.baidu.com") +#print(response.read().decode('utf-8')) + +#data=bytes(urllib.parse.urlencode({"hello":"world"}),encoding="utf-8") +#response=urllib.request.urlopen("http://httpbin.org/post",data=data) +#print(response.read().decode("utf-8")) +# try: +# response=urllib.request.urlopen("http://httpbin.org/get",timeout=0.01) +# print(response.read().decode("utf-8")) +# except urllib.error.URLError as e: +# print("time out!") + +# response=urllib.request.urlopen("http://www.baidu.com",) +# print(response.getheader("Server")) + +#url="http://www.douban.com" + +headers={ +"User-Agent" :"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36" + +} + +url="http://www.douban.com" +req=urllib.request.Request(url=url,headers=headers) +response=urllib.request.urlopen(req) +print(response.read().decode("utf-8")) \ No newline at end of file diff --git a/test/demo2bs4.py b/test/demo2bs4.py new file mode 100644 index 0000000..fa5ac15 --- /dev/null +++ b/test/demo2bs4.py @@ -0,0 +1,53 @@ +import re + +from bs4 import BeautifulSoup + + +file=open("./百度一下,你就知道.html","rb") + +html=file.read() +bs=BeautifulSoup(html,"html.parser") +# print(bs.head) +#1.Tag标签及其内容,第一个 + +#print(bs.title.string) +#2.NavigableString 字符串 + +#print(type(bs)) +#3.BeautifulSoup 整个文档 + + +#print(type(bs.a.string)) +#4.Comment 标注 + + +#print(bs.head.contents[0]) + +#find_all字符串过滤:会查找与字符串完全匹配的内容 +#t_list=bs.find_all("a") + +#正则表达式搜索:使用search()搜索 +#t_list=bs.find_all(re.compile("a")) + + +#方法:传入一个函数,根据函数的要求来搜索 +# def name_is_exists(tag): +# return tag.has_attr("name") +# t_list=bs.find_all(name_is_exists) + +#2.kwargs 参数href class_ +# t_list=bs.find_all(class_=True) +# for item in t_list: +# print(item) + +#limit + + +#css选择器 +#print(bs.select("title")) +#print(bs.select(".mnav"))#通过类名来查找 +#print(bs.select("#u1"))#通过id来查找 +#print(bs.select("a[class='bri'])")) +print(bs.select("head > title")) + +file.close() \ No newline at end of file diff --git a/test/demore.py b/test/demore.py new file mode 100644 index 0000000..4f0162c --- /dev/null +++ b/test/demore.py @@ -0,0 +1,21 @@ +import re + + + +#正则表达式:字符串模式判断字符串是否符合一定的标准 + +#创建模式对象 +# pat=re.compile("AA")#标准 +# b=pat.search("CBAAACDSAAAAA") #search方法查找比对 + +#b=re.search("asd","Aasd")#前面的是标准,后面的是要校验的字符串 + +#b=re.findall("a",'asddsaASDACasa') +#b=re.findall("[a-z]+","asddsaASDACasa") + +b=re.sub("a","A","avcdcasd")#找到a用A来替换,在第三个字符串中 + + + + +print(b) \ No newline at end of file diff --git a/test/student.xls b/test/student.xls new file mode 100644 index 0000000..394f049 Binary files /dev/null and b/test/student.xls differ diff --git a/test/test SQlite.py b/test/test SQlite.py new file mode 100644 index 0000000..2c2c9ae --- /dev/null +++ b/test/test SQlite.py @@ -0,0 +1,56 @@ +import sqlite3 + + + + + + +conn=sqlite3.connect("test.db")#打开或创建数据库文件 +# +# +# print("成功打开数据库") +# c=conn.cursor()#获得游标 +# sql=''' +# create table company +# (id int primary key not null, +# name text not null, +# age int not null, +# address char(50), +# salary real); +# +# +# ''' +# c.execute(sql)#执行sql语句 +# conn.commit()#提交数据库操作 +# +# print("成功建表") +#3.插入数据 +c=conn.cursor() +sql1=''' + insert into company (id,name,age,address,salary) + values(1,'张三',32,'chengdu',8000); +''' + +sql2=''' + insert into company (id,name,age,address,salary) + values(2,'李四',38,'chongqing',8800); +''' + +c.execute(sql1) +c.execute(sql2) + +conn.commit() +print("成功插入数据") +#4.查询数据 + +sql3="select id,name,address,salary from company" +cursor=c.execute(sql3) +for row in cursor: + print("id=",row[0]) + print("name=",row[1]) + print("address=",row[2]) + print("salary=",row[3],"\n") + +print("查询完毕") + +conn.close() \ No newline at end of file diff --git a/test/test.db b/test/test.db new file mode 100644 index 0000000..9fb85fe Binary files /dev/null and b/test/test.db differ diff --git a/test/testxlwt.py b/test/testxlwt.py new file mode 100644 index 0000000..70a9b28 --- /dev/null +++ b/test/testxlwt.py @@ -0,0 +1,8 @@ +import xlwt + +workbook=xlwt.Workbook(encoding="utf-8")#创建workbook对象 +worksheet=workbook.add_sheet('sheet1') +for i in range(1,10): + for j in range(i,10): + worksheet.write(j-1,i-1,str(i)+"×"+str(j)+"="+str(i*j)) #第一行参数‘行’,第二参数‘列’,第三参数内容 +workbook.save('student.xls') \ No newline at end of file diff --git "a/test/\347\231\276\345\272\246\344\270\200\344\270\213\357\274\214\344\275\240\345\260\261\347\237\245\351\201\223.html" "b/test/\347\231\276\345\272\246\344\270\200\344\270\213\357\274\214\344\275\240\345\260\261\347\237\245\351\201\223.html" new file mode 100644 index 0000000..f96d42a --- /dev/null +++ "b/test/\347\231\276\345\272\246\344\270\200\344\270\213\357\274\214\344\275\240\345\260\261\347\237\245\351\201\223.html" @@ -0,0 +1,1812 @@ + + + 百度一下,你就知道 + + + + + + + + + + + + + + + + + + + + + + +
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git "a/\350\261\206\347\223\243\347\224\265\345\275\261top250.xls" "b/\350\261\206\347\223\243\347\224\265\345\275\261top250.xls" new file mode 100644 index 0000000..9821dac Binary files /dev/null and "b/\350\261\206\347\223\243\347\224\265\345\275\261top250.xls" differ diff --git "a/\350\276\271\347\274\230\346\243\200\346\265\213/picture.py" "b/\350\276\271\347\274\230\346\243\200\346\265\213/picture.py" deleted file mode 100644 index 155a67a..0000000 --- "a/\350\276\271\347\274\230\346\243\200\346\265\213/picture.py" +++ /dev/null @@ -1,12 +0,0 @@ -import cv2 -def pic_canny(image): - img=cv2.imread(image) - blurred=cv2.GaussianBlur(img,(3,3),0) - gray=cv2.cvtColor(blurred,cv2.COLOR_BGR2GRAY) - xgrad=cv2.Sobel(gray.cv2_16SC1,1,0) - ygrad = cv2.Sobel(gray.cv2_16SC1, 0, 1) - output=cv2.Canny(xgrad,ygrad,50,150) - cv2.imshow("canny",output) - cv2.waitKey(0) - -pic_canny() \ No newline at end of file diff --git "a/\350\276\271\347\274\230\346\243\200\346\265\213/viedo.py" "b/\350\276\271\347\274\230\346\243\200\346\265\213/viedo.py" deleted file mode 100644 index 54e4594..0000000 --- "a/\350\276\271\347\274\230\346\243\200\346\265\213/viedo.py" +++ /dev/null @@ -1,23 +0,0 @@ -import cv2 - -def viedo_canny(video): - cap=cv2.VideoCapture(video) - while 1: - ret,frame=cap.read() - img=frame - if ret : - img_resize=cv2.resize(img,(1080,608)) - blurred = cv2.GaussianBlur(img_resize, (3, 3), 0) - gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY) - xgrad = cv2.Sobel(gray,cv2.CV_16SC1, 1, 0) - ygrad = cv2.Sobel(gray,cv2.CV_16SC1, 0, 1) - output = cv2.Canny(xgrad, ygrad, 50, 150) - cv2.imshow("video", output) - if cv2.waitKey(24) & 0xff==27: - break - else: - break - cap.release() - cv2.destroyAllWindows() - -viedo_canny("《原神》剧情PV-「神女劈观」.《原神》剧情PV-「神女劈观」.mp4")#把MP4格式视频放在根目录下, 然后复制路径到这里,即可运行