Use cloudscraper module instead of requests in order to bypass Cloudflare bot restrictions
This commit is contained in:
parent
eae6f0db24
commit
ca32f51e1a
@ -1,5 +1,5 @@
|
||||
import cloudscraper
|
||||
import pandas as pd
|
||||
import requests
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime, timedelta
|
||||
@ -14,12 +14,8 @@ def scrape_news(date: datetime) -> pd.DataFrame:
|
||||
formatted_date = convert_datetime_to_url_format(date)
|
||||
url = f"{base_url}?day={formatted_date}"
|
||||
|
||||
# Set headers to mimic a web browser.
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
|
||||
}
|
||||
|
||||
response = requests.get(url, headers=headers)
|
||||
scraper = cloudscraper.create_scraper()
|
||||
response = scraper.get(url)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to retrieve news. Status code: {response.status_code}")
|
||||
return
|
||||
|
@ -1,3 +1,3 @@
|
||||
beautifulsoup4
|
||||
cloudscraper
|
||||
pandas
|
||||
requests
|
Loading…
Reference in New Issue
Block a user