Add tests for downloader wrappers and iter_modules

This commit is contained in:
Yuri Bochkarev 2016-07-17 16:32:43 +03:00
parent d72644258d
commit 31d333e63d
3 changed files with 189 additions and 1 deletions

View file

@ -1,8 +1,14 @@
import abc
import logging
import traceback
from multiprocessing.dummy import Pool
class AbstractDownloader(object):
"""
Base class for download wrappers. Two methods should be implemented:
`download` and `join`.
"""
__metaclass__ = abc.ABCMeta
def __init__(self, file_downloader):
@ -18,13 +24,22 @@ class AbstractDownloader(object):
raise NotImplementedError()
def _download_wrapper(self, url, *args, **kwargs):
"""
Actual download call. Calls the underlying file downloader,
catches all exceptions and returns the result.
"""
try:
return url, self._file_downloader.download(url, *args, **kwargs)
except Exception as e:
logging.error("AbstractDownloader: %s", traceback.format_exc())
return url, e
class ConsecutiveDownloader(AbstractDownloader):
"""
This class calls underlying file downloader in a sequential order
in the same thread where it was created.
"""
def download(self, callback, url, *args, **kwargs):
_, result = self._download_wrapper(url, *args, **kwargs)
callback(url, result)
@ -35,7 +50,10 @@ class ConsecutiveDownloader(AbstractDownloader):
class ParallelDownloader(AbstractDownloader):
def __init__(self, file_downloader, processes):
"""
This class uses threading.Pool to run download requests in parallel.
"""
def __init__(self, file_downloader, processes=1):
super(ParallelDownloader, self).__init__(file_downloader)
self._pool = Pool(processes=processes)

View file

@ -0,0 +1,154 @@
import pytest
import requests
from requests.exceptions import RequestException
from coursera.workflow import CourseraDownloader, _iter_modules, _walk_modules
from coursera.commandline import parse_args
from coursera.parallel import ConsecutiveDownloader, ParallelDownloader
from coursera.downloaders import Downloader
class MockedCommandLineArgs(object):
"""
This mock uses default arguments from parse_args and allows to overwrite
them in constructor.
"""
def __init__(self, **kwargs):
args = parse_args('-u username -p password test_class'.split())
self.__dict__.update(args.__dict__)
self.__dict__.update(kwargs)
def __repr__(self):
return self.__dict__.__repr__()
class MockedFailingDownloader(Downloader):
"""
This mock will raise whatever exception you pass to it in constructor
in _start_download method. Pass None to prevent any exception.
"""
def __init__(self, exception_to_throw):
self._exception_to_throw = exception_to_throw
def _start_download(self, *args, **kwargs):
if self._exception_to_throw is None:
return
raise self._exception_to_throw
TEST_URL = "https://www.coursera.org/api/test-url"
def make_test_modules():
modules = [
["section1",
[
["module1",
[
["lecture1",
{"en.txt": [
[TEST_URL,
"title"
]
]
}
]
]
]
]
]
]
return modules
@pytest.mark.parametrize(
'expected_failed_urls,exception_to_throw,downloader_wrapper_class', [
([], None, ConsecutiveDownloader),
([], None, ParallelDownloader),
([TEST_URL], RequestException('Test exception'), ConsecutiveDownloader),
([TEST_URL], RequestException('Test exception'), ParallelDownloader),
([TEST_URL], Exception('Test exception'), ConsecutiveDownloader),
([TEST_URL], Exception('Test exception'), ParallelDownloader),
([TEST_URL], ValueError('Test exception'), ConsecutiveDownloader),
([TEST_URL], ValueError('Test exception'), ParallelDownloader),
([TEST_URL], AttributeError('Test exception'), ConsecutiveDownloader),
([TEST_URL], AttributeError('Test exception'), ParallelDownloader),
]
)
def test_failed_urls_are_collected(expected_failed_urls,
exception_to_throw,
downloader_wrapper_class):
"""
This test makes sure that if there was an exception in the file downloader,
downloader wrapper will intercept it and course downloader will record
the problematic URL.
"""
file_downloader = MockedFailingDownloader(exception_to_throw)
course_downloader = CourseraDownloader(
downloader=downloader_wrapper_class(file_downloader),
commandline_args=MockedCommandLineArgs(overwrite=True),
class_name='test_class',
path='',
ignored_formats=None,
disable_url_skipping=False)
modules = make_test_modules()
course_downloader.download_modules(modules)
assert expected_failed_urls == course_downloader.failed_urls
def test_iter_modules():
"""
Test that all modules are iterated and intermediate values are formatted
correctly. Filtering is not tested at the moment.
"""
modules = make_test_modules()
args = MockedCommandLineArgs()
expected_output = [
(0, '01_section1'),
(0, 'test_class/01_section1/01_module1'),
(0, 'lecture1', 'en.txt', 'title'),
('en.txt', 'https://www.coursera.org/api/test-url', 'title')
]
collected_output = []
for module in _iter_modules(modules=modules, class_name='test_class',
path='', ignored_formats=None, args=args):
collected_output.append((module.index, module.name))
for section in module.sections:
collected_output.append((section.index, section.dir))
for lecture in section.lectures:
for resource in lecture.resources:
collected_output.append((lecture.index, lecture.name,
resource.fmt, resource.title))
collected_output.append((resource.fmt, resource.url, resource.title))
assert expected_output == collected_output
def test_walk_modules():
"""
Test _walk_modules, a flattened version of _iter_modules.
"""
modules = make_test_modules()
args = MockedCommandLineArgs()
expected_output = [
(0, '01_section1',
0, 'test_class/01_section1/01_module1',
0, 'lecture1', 'test_class/01_section1/01_module1/01_lecture1_title.en.txt',
'https://www.coursera.org/api/test-url')]
collected_output = []
for module, section, lecture, resource in _walk_modules(
modules=modules, class_name='test_class',
path='', ignored_formats=None, args=args):
collected_output.append(
(module.index, module.name,
section.index, section.dir,
lecture.index, lecture.name, lecture.filename(resource.fmt, resource.title),
resource.url)
)
assert expected_output == collected_output

View file

@ -96,6 +96,22 @@ def _iter_modules(modules, class_name, path, ignored_formats, args):
yield IterModule(index, module)
def _walk_modules(modules, class_name, path, ignored_formats, args):
"""
Helper generator that traverses modules in returns a flattened
iterator.
"""
for module in _iter_modules(modules=modules,
class_name=class_name,
path=path,
ignored_formats=ignored_formats,
args=args):
for section in module.sections:
for lecture in section.lectures:
for resource in lecture.resources:
yield module, section, lecture, resource
class CourseDownloader(object):
__metaclass__ = abc.ABCMeta