I am trying to download a batch of .tar files from the following website to my local folder. But the download is very slow. Average size of each file is in the range of 30 - 60 MB. Download of these files from web to local folder is very slow. Is there a better way to improve this code so that I can download them fast? Please check the code below:
import requests
from os import mkdir
from os.path import isdir
from bs4 import BeautifulSoup
from os import chdir, getcwd
url = "https://opendata.dwd.de/climate_environment/CDC/grids_germany/hourly/radolan/historical/asc/"
years = [str(year) for year in range(2005,2021)]
links = [url + i + "/" for i in years]
t_links = []
def get_tarlinks():
for i in links:
#create response object
r = requests.get(i)
#create beautiful object
soup = BeautifulSoup(r.content, 'html5lib')
#find all links on webpage
a_links = soup.find_all('a')
#filter the link sending with .tar
tar_links = [i + link['href'] for link in a_links if link['href'].endswith('.tar')]
t_links.append(tar_links)
return t_links
t_links = get_tarlinks()
src_path = "D:/Sandeep/Thesis/Data/"
for i in t_links:
for j in i:
year,filename = j.split('/')[10:]
r = requests.get(j, allow_redirects=True)
if isdir(src_path+year) == False:
mkdir(src_path+year)
chdir(src_path+year)
open(filename, "wb").write(r.content)
else:
open(filename, "wb").write(r.content)
Link to the same question posted in stack overflow
Note: Please check the indentation when you copy this code to your IDE. Thanks!