Use cloudscraper module instead of requests in order to bypass Cloudflare bot restrictions
This commit is contained in:
parent
eae6f0db24
commit
ca32f51e1a
@ -1,5 +1,5 @@
|
|||||||
|
import cloudscraper
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import requests
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
@ -14,12 +14,8 @@ def scrape_news(date: datetime) -> pd.DataFrame:
|
|||||||
formatted_date = convert_datetime_to_url_format(date)
|
formatted_date = convert_datetime_to_url_format(date)
|
||||||
url = f"{base_url}?day={formatted_date}"
|
url = f"{base_url}?day={formatted_date}"
|
||||||
|
|
||||||
# Set headers to mimic a web browser.
|
scraper = cloudscraper.create_scraper()
|
||||||
headers = {
|
response = scraper.get(url)
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
|
|
||||||
}
|
|
||||||
|
|
||||||
response = requests.get(url, headers=headers)
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
print(f"Failed to retrieve news. Status code: {response.status_code}")
|
print(f"Failed to retrieve news. Status code: {response.status_code}")
|
||||||
return
|
return
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
beautifulsoup4
|
beautifulsoup4
|
||||||
pandas
|
cloudscraper
|
||||||
requests
|
pandas
|
Loading…
Reference in New Issue
Block a user