PythonScripts/NewsPaperDownloaderV2.py at master · RajeshReddyG/PythonScripts · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# -*- coding: utf-8 -*-
"""
Created on Thu Apr  4 19:30:46 2019

@author: Rajesh.Gundupalli
"""

import requests
import html5lib
from bs4 import BeautifulSoup

#URL which contain GDrive link of Hindhu Paper
URL = "https://www.bitul.in/epaper/the-hindu/"    #"https://www.bitul.in/epaper/indian-express-pdf/"
r = requests.get(URL)

soup = BeautifulSoup(r.content, 'html5lib')
f=0
#Checking all the hyperlinks in the site until drive link
for link in soup.find_all('a'):
  drivelink = link.get('href')
  if "http://www.newspapertoday.xyz" in str(drivelink):
    print(drivelink)
    f=1
    break;
  elif "drive.google.com" in str(drivelink):
    print(drivelink)
    break;
#Now AdFree Hindhu News Paper Link is on drivelink
if f==0:
    Id = drivelink[32:65]
    file_url = "https://drive.google.com/uc?authuser=0&id="+Id+"&export=download"
else:
    file_url = drivelink
r = requests.get(file_url, stream = True)
with open("resources/NewsPaper/NewsPaper.pdf","wb") as pdf:
	for chunk in r.iter_content(chunk_size=1024):
		# writing one chunk at a time to pdf file
		if chunk:
			pdf.write(chunk)
#File Will be Downloaded in the current folder