2424"""
2525This module contains the following classes:
2626
27+ * :class:`~aeneas.downloader.DownloadError`, which represents an error occurred
28+ while downloading a Web resource.
2729* :class:`~aeneas.downloader.Downloader`, which download files from various Web sources.
2830
29- .. note:: This module requires Python modules ``youtube-dl`` and ``pafy`` (``pip install youtube-dl pafy ``).
31+ .. note:: This module requires Python module ``youtube-dl`` (``pip install youtube-dl``).
3032"""
3133
3234from __future__ import absolute_import
3335from __future__ import print_function
36+ import time
3437
3538from aeneas .logger import Loggable
3639from aeneas .runtimeconfiguration import RuntimeConfiguration
3740import aeneas .globalfunctions as gf
3841
3942
43+ class DownloadError (Exception ):
44+ """
45+ Error raised when a given URL is not valid or
46+ it cannot be downloaded because of temporary
47+ network issues.
48+ """
49+ pass
50+
51+
4052class Downloader (Loggable ):
4153 """
4254 Download files from various Web sources.
55+ At the moment, only YouTube videos
56+ are officially supported.
4357
4458 :param rconf: a runtime configuration
4559 :type rconf: :class:`~aeneas.runtimeconfiguration.RuntimeConfiguration`
@@ -54,9 +68,8 @@ def audio_from_youtube(
5468 source_url ,
5569 download = True ,
5670 output_file_path = None ,
57- preferred_index = None ,
71+ download_format = None ,
5872 largest_audio = True ,
59- preferred_format = None
6073 ):
6174 """
6275 Download an audio stream from a YouTube video,
@@ -67,8 +80,8 @@ def audio_from_youtube(
6780
6881 Otherwise, download the audio stream best matching
6982 the provided parameters, as follows.
70- If ``preferred_index `` is not ``None``,
71- download the audio stream at that index .
83+ If ``download_format `` is not ``None``,
84+ download the audio stream with the specified format .
7285 If ``largest_audio`` is ``True``,
7386 download the largest audiostream;
7487 otherwise, download the smallest audiostream.
@@ -80,76 +93,150 @@ def audio_from_youtube(
8093
8194 :param string source_url: the URL of the YouTube video
8295 :param bool download: if ``True``, download the audio stream
83- best matching ``preferred_index`` or ``preferred_format``
84- and ``largest_audio``;
96+ best matching ``preferred_index`` or
97+ ``preferred_format`` and ``largest_audio``;
8598 if ``False``, return the list of available audio streams
8699 :param string output_file_path: the path where the downloaded audio should be saved;
87100 if ``None``, create a temporary file
88- :param int preferred_index: preferably download this audio stream
101+ :param int download_format: download the audio stream with the given format
89102 :param bool largest_audio: if ``True``, download the largest audio stream available;
90103 if ``False``, download the smallest one.
91- :param string preferred_format: preferably download this audio format
92- :rtype: string or list of pafy audio streams
93- :raises: ImportError: if ``pafy`` is not installed
104+ :rtype: string or list of dict
105+ :raises: ImportError: if ``youtube-dl`` is not installed
94106 :raises: OSError: if ``output_file_path`` cannot be written
95- :raises: ValueError: if ``source_url`` is not a valid YouTube URL
107+ :raises: :class:`~aeneas.downloader.DownloadError`: if ``source_url`` is not a valid YouTube URL
108+ or it cannot be downloaded e.g. for temporary
109+ network issues
96110 """
97- def select_audiostream (audiostreams ):
98- """ Select the audiostream best matching the given parameters. """
99- if preferred_index is not None :
100- if preferred_index in range (len (audiostreams )):
101- self .log ([u"Selecting audiostream with index %d" , preferred_index ])
102- return audiostreams [preferred_index ]
103- else :
104- self .log_warn ([u"Audio stream index '%d' not allowed" , preferred_index ])
105- self .log_warn (u"Ignoring the requested audio stream index" )
106- # selecting by preferred format
107- streams = audiostreams
108- if preferred_format is not None :
109- self .log ([u"Selecting audiostreams by preferred format %s" , preferred_format ])
110- streams = [audiostream for audiostream in streams if audiostream .extension == preferred_format ]
111- if len (streams ) < 1 :
112- self .log ([u"No audiostream with preferred format %s" , preferred_format ])
113- streams = audiostreams
114- # sort by size
115- streams = sorted ([(audio .get_filesize (), audio ) for audio in streams ])
116- if largest_audio :
117- self .log (u"Selecting largest audiostream" )
118- selected = streams [- 1 ][1 ]
119- else :
120- self .log (u"Selecting smallest audiostream" )
121- selected = streams [0 ][1 ]
111+
112+ def _list_audiostreams (self , source_url ):
113+ """
114+ Return a list of dicts, each describing
115+ an available audiostream for the given ``source_url``.
116+ """
117+ self .log (u"Getting audiostreams..." )
118+ audiostreams = []
119+ options = {
120+ "download" : False ,
121+ "quiet" : True ,
122+ "skip_download" : True ,
123+ }
124+ with youtube_dl .YoutubeDL (options ) as ydl :
125+ info = ydl .extract_info (source_url , download = False )
126+ audio_formats = [f for f in info ["formats" ] if f ["vcodec" ] == "none" and f ["acodec" ] != "none" ]
127+ for a in audio_formats :
128+ audiostreams .append ({
129+ "format" : a ["format" ].split (" " )[0 ],
130+ "filesize" : a ["filesize" ],
131+ "ext" : a ["ext" ],
132+ "abr" : a ["abr" ]
133+ })
134+ self .log (u"Getting audiostreams... done" )
135+ return audiostreams
136+
137+ def _select_audiostream (self , audiostreams , download_format = None , largest_audio = False ):
138+ """
139+ Select the best-matching audiostream:
140+ if a ``download_format`` is given, use it,
141+ otherwise act according to ``largest_audio``.
142+ If ``download_format`` is not matching any
143+ of the available audiostreams, then just act
144+ according to ``largest_audio``.
145+ """
146+ self .log (u"Selecting best-matching audiostream..." )
147+ selected = None
148+ if download_format is not None :
149+ matching = [a for a in audiostreams if a ["format" ] == download_format ]
150+ if len (matching ) > 0 :
151+ selected = matching [0 ]
152+ if selected is None :
153+ sa = sorted (audiostreams , key = lambda x : x ["filesize" ])
154+ selected = sa [- 1 ] if largest_audio else sa [0 ]
155+ self .log (u"Selecting best-matching audiostream... done" )
122156 return selected
123157
124- try :
125- import pafy
126- except ImportError as exc :
127- self .log_exc (u"Python module pafy is not installed" , exc , True , ImportError )
158+ def _compose_output_file_path (self , extension , output_file_path = None ):
159+ """
160+ If ``output_file_path`` is given, use it.
161+ Otherwise (``output_file_path`` is ``None``),
162+ create a temporary file with the correct extension.
163+ """
164+ self .log (u"Determining output file path..." )
165+ if output_file_path is None :
166+ self .log (u"output_file_path is None: creating temp file" )
167+ handler , output_file_path = gf .tmp_file (
168+ root = self .rconf [RuntimeConfiguration .TMP_PATH ],
169+ suffix = (".%s" % extension )
170+ )
171+ gf .delete_file (handler , output_file_path )
172+ else :
173+ self .log (u"output_file_path is not None: cheking that file can be written" )
174+ if not gf .file_can_be_written (output_file_path ):
175+ self .log_exc (u"Path '%s' cannot be written. Wrong permissions?" % (output_file_path ), None , True , OSError )
176+ self .log (u"Determining output file path... done" )
177+ self .log ([u"Output file path is '%s'" , output_file_path ])
178+ return output_file_path
179+
180+ def _download_audiostream (self , source_url , fmt , output_path ):
181+ self .log (u"Downloading audiostream..." )
182+ options = {
183+ "download" : True ,
184+ "format" : fmt ,
185+ "outtmpl" : output_path ,
186+ "quiet" : True ,
187+ "skip_download" : False ,
188+ }
189+ with youtube_dl .YoutubeDL (options ) as ydl :
190+ ydl .download ([source_url ])
191+ self .log (u"Downloading audiostream... done" )
128192
129193 try :
130- video = pafy .new (source_url )
131- except (IOError , OSError , ValueError ) as exc :
132- self .log_exc (u"The specified source URL '%s' is not a valid YouTube URL or you are offline" % (source_url ), exc , True , ValueError )
194+ import youtube_dl
195+ except ImportError as exc :
196+ self .log_exc (u"Python module youtube-dl is not installed" , exc , True , ImportError )
197+
198+ # retry parameters
199+ sleep_delay = self .rconf [RuntimeConfiguration .DOWNLOADER_SLEEP ]
200+ attempts = self .rconf [RuntimeConfiguration .DOWNLOADER_RETRY_ATTEMPTS ]
201+ self .log ([u"Sleep delay: %.3f" , sleep_delay ])
202+ self .log ([u"Retry attempts: %d" , attempts ])
203+
204+ # get audiostreams
205+ att = attempts
206+ while att > 0 :
207+ self .log (u"Sleeping to throttle API usage..." )
208+ time .sleep (sleep_delay )
209+ self .log (u"Sleeping to throttle API usage... done" )
210+ try :
211+ audiostreams = _list_audiostreams (self , source_url )
212+ break
213+ except :
214+ self .log_warn (u"Unable to list audio streams, retry" )
215+ att -= 1
216+ if att <= 0 :
217+ self .log_exc (u"All downloader requests failed: wrong URL or you are offline" , None , True , DownloadError )
133218
134219 if not download :
135- self .log (u"Returning the list of audio streams" )
136- return video .audiostreams
137-
138- output_path = output_file_path
139- if output_file_path is None :
140- self .log (u"output_path is None: creating temp file" )
141- handler , output_path = gf .tmp_file (root = self .rconf [RuntimeConfiguration .TMP_PATH ])
142- else :
143- if not gf .file_can_be_written (output_path ):
144- self .log_exc (u"Path '%s' cannot be written. Wrong permissions?" % (output_path ), None , True , OSError )
145-
146- audiostream = select_audiostream (video .audiostreams )
147- if output_file_path is None :
148- gf .delete_file (handler , output_path )
149- output_path += "." + audiostream .extension
150-
151- self .log ([u"output_path is '%s'" , output_path ])
152- self .log (u"Downloading..." )
153- audiostream .download (filepath = output_path , quiet = True )
154- self .log (u"Downloading... done" )
220+ self .log (u"Returning list of audiostreams" )
221+ return audiostreams
222+
223+ # download the best-matching audiostream
224+ if len (audiostreams ) == 0 :
225+ self .log_exc (u"No audiostreams available for the provided URL" , None , True , OSError )
226+ audiostream = _select_audiostream (self , audiostreams , download_format , largest_audio )
227+ output_path = _compose_output_file_path (self , audiostream ["ext" ], output_file_path )
228+ att = attempts
229+ while att > 0 :
230+ self .log (u"Sleeping to throttle API usage..." )
231+ time .sleep (sleep_delay )
232+ self .log (u"Sleeping to throttle API usage... done" )
233+ try :
234+ _download_audiostream (self , source_url , audiostream ["format" ], output_path )
235+ break
236+ except :
237+ self .log_warn (u"Unable to download audio streams, retry" )
238+ att -= 1
239+ if att <= 0 :
240+ self .log_exc (u"All downloader requests failed: wrong URL or you are offline" , None , True , DownloadError )
241+
155242 return output_path
0 commit comments