Python
python: python 파이썬 # html 태그 제거
최무회
2020. 4. 26. 18:12
import requests
from bs4 import BeautifulSoup
import re
rq = requests.get("naver.com")
print(type(rq)) #<class 'requests.models.Response'>
rqctnt = rq.content
print(type(rqctnt)) # <class 'bytes'>
soup = BeautifulSoup(rqctnt,"html.parser")
print(type(soup)) # <class 'bs4.BeautifulSoup'>
OMG = str(soup.find_all("p"))
print(type(OMG)) # <class 'str'>
OMG = re.sub('<.+?>', '', OMG, 0).strip() # html 태그 제거
print(OMG)