mirror of
https://github.com/coursera-dl/coursera-dl.git
synced 2026-01-23 02:35:37 +00:00
Add tests for downloader wrappers and iter_modules
This commit is contained in:
parent
d72644258d
commit
31d333e63d
3 changed files with 189 additions and 1 deletions
|
|
@ -1,8 +1,14 @@
|
|||
import abc
|
||||
import logging
|
||||
import traceback
|
||||
from multiprocessing.dummy import Pool
|
||||
|
||||
|
||||
class AbstractDownloader(object):
|
||||
"""
|
||||
Base class for download wrappers. Two methods should be implemented:
|
||||
`download` and `join`.
|
||||
"""
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
def __init__(self, file_downloader):
|
||||
|
|
@ -18,13 +24,22 @@ class AbstractDownloader(object):
|
|||
raise NotImplementedError()
|
||||
|
||||
def _download_wrapper(self, url, *args, **kwargs):
|
||||
"""
|
||||
Actual download call. Calls the underlying file downloader,
|
||||
catches all exceptions and returns the result.
|
||||
"""
|
||||
try:
|
||||
return url, self._file_downloader.download(url, *args, **kwargs)
|
||||
except Exception as e:
|
||||
logging.error("AbstractDownloader: %s", traceback.format_exc())
|
||||
return url, e
|
||||
|
||||
|
||||
class ConsecutiveDownloader(AbstractDownloader):
|
||||
"""
|
||||
This class calls underlying file downloader in a sequential order
|
||||
in the same thread where it was created.
|
||||
"""
|
||||
def download(self, callback, url, *args, **kwargs):
|
||||
_, result = self._download_wrapper(url, *args, **kwargs)
|
||||
callback(url, result)
|
||||
|
|
@ -35,7 +50,10 @@ class ConsecutiveDownloader(AbstractDownloader):
|
|||
|
||||
|
||||
class ParallelDownloader(AbstractDownloader):
|
||||
def __init__(self, file_downloader, processes):
|
||||
"""
|
||||
This class uses threading.Pool to run download requests in parallel.
|
||||
"""
|
||||
def __init__(self, file_downloader, processes=1):
|
||||
super(ParallelDownloader, self).__init__(file_downloader)
|
||||
self._pool = Pool(processes=processes)
|
||||
|
||||
|
|
|
|||
154
coursera/test/test_workflow.py
Normal file
154
coursera/test/test_workflow.py
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
import pytest
|
||||
import requests
|
||||
from requests.exceptions import RequestException
|
||||
|
||||
from coursera.workflow import CourseraDownloader, _iter_modules, _walk_modules
|
||||
from coursera.commandline import parse_args
|
||||
from coursera.parallel import ConsecutiveDownloader, ParallelDownloader
|
||||
from coursera.downloaders import Downloader
|
||||
|
||||
|
||||
class MockedCommandLineArgs(object):
|
||||
"""
|
||||
This mock uses default arguments from parse_args and allows to overwrite
|
||||
them in constructor.
|
||||
"""
|
||||
def __init__(self, **kwargs):
|
||||
args = parse_args('-u username -p password test_class'.split())
|
||||
self.__dict__.update(args.__dict__)
|
||||
self.__dict__.update(kwargs)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__dict__.__repr__()
|
||||
|
||||
|
||||
class MockedFailingDownloader(Downloader):
|
||||
"""
|
||||
This mock will raise whatever exception you pass to it in constructor
|
||||
in _start_download method. Pass None to prevent any exception.
|
||||
"""
|
||||
def __init__(self, exception_to_throw):
|
||||
self._exception_to_throw = exception_to_throw
|
||||
|
||||
def _start_download(self, *args, **kwargs):
|
||||
if self._exception_to_throw is None:
|
||||
return
|
||||
raise self._exception_to_throw
|
||||
|
||||
|
||||
TEST_URL = "https://www.coursera.org/api/test-url"
|
||||
|
||||
|
||||
def make_test_modules():
|
||||
modules = [
|
||||
["section1",
|
||||
[
|
||||
["module1",
|
||||
[
|
||||
["lecture1",
|
||||
{"en.txt": [
|
||||
[TEST_URL,
|
||||
"title"
|
||||
]
|
||||
]
|
||||
}
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
return modules
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'expected_failed_urls,exception_to_throw,downloader_wrapper_class', [
|
||||
([], None, ConsecutiveDownloader),
|
||||
([], None, ParallelDownloader),
|
||||
([TEST_URL], RequestException('Test exception'), ConsecutiveDownloader),
|
||||
([TEST_URL], RequestException('Test exception'), ParallelDownloader),
|
||||
([TEST_URL], Exception('Test exception'), ConsecutiveDownloader),
|
||||
([TEST_URL], Exception('Test exception'), ParallelDownloader),
|
||||
([TEST_URL], ValueError('Test exception'), ConsecutiveDownloader),
|
||||
([TEST_URL], ValueError('Test exception'), ParallelDownloader),
|
||||
([TEST_URL], AttributeError('Test exception'), ConsecutiveDownloader),
|
||||
([TEST_URL], AttributeError('Test exception'), ParallelDownloader),
|
||||
]
|
||||
)
|
||||
def test_failed_urls_are_collected(expected_failed_urls,
|
||||
exception_to_throw,
|
||||
downloader_wrapper_class):
|
||||
"""
|
||||
This test makes sure that if there was an exception in the file downloader,
|
||||
downloader wrapper will intercept it and course downloader will record
|
||||
the problematic URL.
|
||||
"""
|
||||
file_downloader = MockedFailingDownloader(exception_to_throw)
|
||||
course_downloader = CourseraDownloader(
|
||||
downloader=downloader_wrapper_class(file_downloader),
|
||||
commandline_args=MockedCommandLineArgs(overwrite=True),
|
||||
class_name='test_class',
|
||||
path='',
|
||||
ignored_formats=None,
|
||||
disable_url_skipping=False)
|
||||
modules = make_test_modules()
|
||||
|
||||
course_downloader.download_modules(modules)
|
||||
assert expected_failed_urls == course_downloader.failed_urls
|
||||
|
||||
|
||||
def test_iter_modules():
|
||||
"""
|
||||
Test that all modules are iterated and intermediate values are formatted
|
||||
correctly. Filtering is not tested at the moment.
|
||||
"""
|
||||
modules = make_test_modules()
|
||||
args = MockedCommandLineArgs()
|
||||
|
||||
expected_output = [
|
||||
(0, '01_section1'),
|
||||
(0, 'test_class/01_section1/01_module1'),
|
||||
(0, 'lecture1', 'en.txt', 'title'),
|
||||
('en.txt', 'https://www.coursera.org/api/test-url', 'title')
|
||||
]
|
||||
collected_output = []
|
||||
|
||||
for module in _iter_modules(modules=modules, class_name='test_class',
|
||||
path='', ignored_formats=None, args=args):
|
||||
collected_output.append((module.index, module.name))
|
||||
for section in module.sections:
|
||||
collected_output.append((section.index, section.dir))
|
||||
for lecture in section.lectures:
|
||||
for resource in lecture.resources:
|
||||
collected_output.append((lecture.index, lecture.name,
|
||||
resource.fmt, resource.title))
|
||||
collected_output.append((resource.fmt, resource.url, resource.title))
|
||||
|
||||
assert expected_output == collected_output
|
||||
|
||||
def test_walk_modules():
|
||||
"""
|
||||
Test _walk_modules, a flattened version of _iter_modules.
|
||||
"""
|
||||
modules = make_test_modules()
|
||||
args = MockedCommandLineArgs()
|
||||
|
||||
expected_output = [
|
||||
(0, '01_section1',
|
||||
0, 'test_class/01_section1/01_module1',
|
||||
0, 'lecture1', 'test_class/01_section1/01_module1/01_lecture1_title.en.txt',
|
||||
'https://www.coursera.org/api/test-url')]
|
||||
collected_output = []
|
||||
|
||||
for module, section, lecture, resource in _walk_modules(
|
||||
modules=modules, class_name='test_class',
|
||||
path='', ignored_formats=None, args=args):
|
||||
|
||||
collected_output.append(
|
||||
(module.index, module.name,
|
||||
section.index, section.dir,
|
||||
lecture.index, lecture.name, lecture.filename(resource.fmt, resource.title),
|
||||
resource.url)
|
||||
)
|
||||
|
||||
assert expected_output == collected_output
|
||||
|
|
@ -96,6 +96,22 @@ def _iter_modules(modules, class_name, path, ignored_formats, args):
|
|||
yield IterModule(index, module)
|
||||
|
||||
|
||||
def _walk_modules(modules, class_name, path, ignored_formats, args):
|
||||
"""
|
||||
Helper generator that traverses modules in returns a flattened
|
||||
iterator.
|
||||
"""
|
||||
for module in _iter_modules(modules=modules,
|
||||
class_name=class_name,
|
||||
path=path,
|
||||
ignored_formats=ignored_formats,
|
||||
args=args):
|
||||
for section in module.sections:
|
||||
for lecture in section.lectures:
|
||||
for resource in lecture.resources:
|
||||
yield module, section, lecture, resource
|
||||
|
||||
|
||||
class CourseDownloader(object):
|
||||
__metaclass__ = abc.ABCMeta
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue