-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCrawl_knpu.py
More file actions
45 lines (41 loc) · 1.6 KB
/
Crawl_knpu.py
File metadata and controls
45 lines (41 loc) · 1.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from selenium import webdriver
import requests
from bs4 import BeautifulSoup
path = "D:/python-coding/chromedriver.exe"
driver = webdriver.Chrome(path)
driver.implicitly_wait(3)
driver.get('https://accounts.kakao.com/login?continue=https%3A%2F%2Flogins.daum.net%2Faccounts%2Fksso.do%3Frescue%3Dtrue%26url%3Dhttp%253A%252F%252Ftop.cafe.daum.net%252F')
assert "카카오계정" in driver.title
assert "No results found." not in driver.page_source
emailid = driver.find_element_by_name("email")
emailid.clear()
emailid.send_keys("")
pw = driver.find_element_by_name("password")
pw.clear()
pw.send_keys("")
driver.find_element_by_css_selector("button.btn_g.btn_confirm.submit").click()
assert "No results found." not in driver.page_source
n = 2
t = 1
while "NO results found." not in driver.page_source:
driver.get("http://cafe.daum.net/knpuarchive/IDfT/{}".format(n))
assert "No results found." not in driver.page_source
driver.implicitly_wait(3)
driver.switch_to.frame('down')
webpage = driver.page_source
soup = BeautifulSoup(webpage, "html.parser")
f = open("D:/python-coding/files/textfile{}.txt".format(t), 'w', encoding='utf-8')
f.close()
f = open("D:/python-coding/files/textfile{}.txt".format(t), 'a', encoding='utf-8')
t += 1
while True:
titles = soup.select('#user_contents > p:nth-child(%d)'%(n))
if titles != []:
if titles[0].get_text() == '':
break
else:
f.write(titles[0].get_text())
n += 1
else:
break
f.close()