From 053883eb235d770f06cbc8a27d26286b7cb865df Mon Sep 17 00:00:00 2001 From: Nachikethas Date: Sat, 6 Aug 2016 03:10:46 -0700 Subject: [PATCH 001/105] Fix command typo in alternative Unix installation By default, the git clone command clones the repository to a directory with the repository's name. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 83d9c78..fa36d58 100644 --- a/README.md +++ b/README.md @@ -167,7 +167,7 @@ following steps (create/adapt first the directory cd my-coursera source bin/activate git clone https://github.com/coursera-dl/coursera-dl - cd coursera + cd coursera-dl pip install -r requirements.txt ./coursera-dl ... @@ -175,7 +175,7 @@ To further download new videos from your classes, simply perform: cd /directory/where/I/want/my/courses/my-coursera source bin/activate - cd coursera + cd coursera-dl ./coursera-dl ... We are working on streamlining this whole process so that it is as simple as From d3d1c4d0f1738c3fb548ba287ac26d88b3d1fbd0 Mon Sep 17 00:00:00 2001 From: Anderson Mesquita Date: Fri, 5 Aug 2016 15:48:31 -0700 Subject: [PATCH 002/105] Make class_names arg optional for certain flags The `class_names` argument shouldn't be required for certain flags (e.g. `--version` or `--list-courses`). This makes it optional while also taking these flags into account to display an appropriate error message when they aren't being used. Fixes #562 --- coursera/commandline.py | 21 ++++++++++++++++++++- coursera/test/test_commandline.py | 23 +++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 coursera/test/test_commandline.py diff --git a/coursera/commandline.py b/coursera/commandline.py index b467d30..4332671 100644 --- a/coursera/commandline.py +++ b/coursera/commandline.py @@ -14,6 +14,20 @@ from .credentials import get_credentials, CredentialsError, keyring from .utils import decode_input +def class_name_arg_required(args): + """ + Evaluates whether class_name arg is required. + + @param args: Command-line arguments. + @type args: namedtuple + """ + no_class_name_flags = ['list_courses', 'version'] + return not any( + getattr(args, flag) + for flag in no_class_name_flags + ) + + def parse_args(args=None): """ Parse the arguments/options passed to the program on the command line. @@ -27,7 +41,7 @@ def parse_args(args=None): group_basic.add_argument('class_names', action='store', - nargs='+', + nargs='*', help='name(s) of the class(es) (e.g. "ml-005")') group_basic.add_argument('-u', @@ -335,6 +349,11 @@ def parse_args(args=None): logging.basicConfig(level=logging.INFO, format='%(message)s') + if class_name_arg_required(args) and not args.class_names: + parser.print_usage() + logging.error('You must supply at least one class name') + sys.exit(1) + # show version? if args.version: # we use print (not logging) function because version may be used diff --git a/coursera/test/test_commandline.py b/coursera/test/test_commandline.py new file mode 100644 index 0000000..4a1f34c --- /dev/null +++ b/coursera/test/test_commandline.py @@ -0,0 +1,23 @@ +""" +Test command line module. +""" + +from coursera import commandline +from coursera.test import test_workflow + + +def test_class_name_arg_required(): + args = {'list_courses': False, 'version': False} + mock_args = test_workflow.MockedCommandLineArgs(**args) + assert commandline.class_name_arg_required(mock_args) + + +def test_class_name_arg_not_required(): + not_required_cases = [ + {'list_courses': True, 'version': False}, + {'list_courses': False, 'version': True}, + {'list_courses': True, 'version': True}, + ] + for args in not_required_cases: + mock_args = test_workflow.MockedCommandLineArgs(**args) + assert not commandline.class_name_arg_required(mock_args) From 84a8cb7b0f8fda8939780235e135275f99e8f307 Mon Sep 17 00:00:00 2001 From: Charles Bickel Date: Tue, 16 Aug 2016 16:10:13 -0400 Subject: [PATCH 003/105] Update README.md --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index fa36d58..5d581a6 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ - [Alternative ways of installing missing dependencies](#alternative-ways-of-installing-missing-dependencies) - [Alternative installation method for Unix systems](#alternative-installation-method-for-unix-systems) - [Installing dependencies on your own](#installing-dependencies-on-your-own) + - [Windows](#windows) - [Create an account with Coursera](#create-an-account-with-coursera) - [Running the script](#running-the-script) - [Resuming downloads](#resuming-downloads) @@ -211,6 +212,22 @@ your own, please check that the versions of your modules are at least those listed in the `requirements.txt` file (and, `requirements-dev.txt` file, if applicable). +## Windows + +`python -m pip install coursera-dl` + +Be sure that the Python install path is added to the PATH system environment variables. + +``` +Example: +C:\Python35\Scripts\;C:\Python35\; +``` +Or if you happen to install the 32-bit version. + +`C:\Users\%username%\AppData\Local\Programs\Python\Python35-32\Scripts;C:\Users\%username%\AppData\Local\Programs\Python\Python35-32;` + +Coursera-dl can now be run from commandline or powershell. + ## Create an account with Coursera If you don't already have one, create a [Coursera][1] account and enroll in From 7b4c29cc2fa57501b14e2463af88801bd4ceec19 Mon Sep 17 00:00:00 2001 From: Charles Bickel Date: Wed, 17 Aug 2016 02:27:13 -0400 Subject: [PATCH 004/105] Update README.md --- README.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 5d581a6..15527fd 100644 --- a/README.md +++ b/README.md @@ -218,13 +218,14 @@ applicable). Be sure that the Python install path is added to the PATH system environment variables. -``` -Example: -C:\Python35\Scripts\;C:\Python35\; -``` -Or if you happen to install the 32-bit version. +`C:\Python35\Scripts\;C:\Python35\;` -`C:\Users\%username%\AppData\Local\Programs\Python\Python35-32\Scripts;C:\Users\%username%\AppData\Local\Programs\Python\Python35-32;` +Or if you have restricted installation permissions and you've installed Python under AppData, add this to your PATH. + +``` +C:\Users\%username%\AppData\Local\Programs\Python\Python35-32\Scripts; +C:\Users\%username%\AppData\Local\Programs\Python\Python35-32; +``` Coursera-dl can now be run from commandline or powershell. From 584cbf326598a379bb90f436b443e197306c3cd6 Mon Sep 17 00:00:00 2001 From: Charles Bickel Date: Wed, 17 Aug 2016 08:26:15 -0400 Subject: [PATCH 005/105] Update README.md --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 15527fd..a9c1898 100644 --- a/README.md +++ b/README.md @@ -216,13 +216,18 @@ applicable). `python -m pip install coursera-dl` -Be sure that the Python install path is added to the PATH system environment variables. -`C:\Python35\Scripts\;C:\Python35\;` +Be sure that the Python install path is added to the PATH system environment variables. This can be found in Start Menu > Control Panel > System > Advanced System Settings > Environment Variables. + +``` +Example: +C:\Python35\Scripts\;C:\Python35\; +``` Or if you have restricted installation permissions and you've installed Python under AppData, add this to your PATH. ``` +Example: C:\Users\%username%\AppData\Local\Programs\Python\Python35-32\Scripts; C:\Users\%username%\AppData\Local\Programs\Python\Python35-32; ``` From 308fbc1857c9b1b6b3860f8781d46b5670617bc3 Mon Sep 17 00:00:00 2001 From: Charles Bickel Date: Wed, 17 Aug 2016 08:30:41 -0400 Subject: [PATCH 006/105] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index a9c1898..8df8f92 100644 --- a/README.md +++ b/README.md @@ -228,8 +228,7 @@ Or if you have restricted installation permissions and you've installed Python u ``` Example: -C:\Users\%username%\AppData\Local\Programs\Python\Python35-32\Scripts; -C:\Users\%username%\AppData\Local\Programs\Python\Python35-32; +C:\Users\\AppData\Local\Programs\Python\Python35-32\Scripts;C:\Users\\AppData\Local\Programs\Python\Python35-32; ``` Coursera-dl can now be run from commandline or powershell. From a8132fdb1c90d6e65103b200d9fa39c168ae3676 Mon Sep 17 00:00:00 2001 From: Charles Bickel Date: Wed, 17 Aug 2016 09:04:56 -0400 Subject: [PATCH 007/105] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8df8f92..b5fcb1a 100644 --- a/README.md +++ b/README.md @@ -217,7 +217,7 @@ applicable). `python -m pip install coursera-dl` -Be sure that the Python install path is added to the PATH system environment variables. This can be found in Start Menu > Control Panel > System > Advanced System Settings > Environment Variables. +Be sure that the Python install path is added to the PATH system environment variables. This can be found in Control Panel > System > Advanced System Settings > Environment Variables. ``` Example: From 75e37263461bd5deb73307df78afaf2d99a4cc5a Mon Sep 17 00:00:00 2001 From: Charles Bickel Date: Wed, 17 Aug 2016 09:16:07 -0400 Subject: [PATCH 008/105] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index b5fcb1a..28c3d70 100644 --- a/README.md +++ b/README.md @@ -216,7 +216,6 @@ applicable). `python -m pip install coursera-dl` - Be sure that the Python install path is added to the PATH system environment variables. This can be found in Control Panel > System > Advanced System Settings > Environment Variables. ``` From a1c1d624b4d0c7365b71edbfbdf379b8b2fed1e1 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Mon, 3 Oct 2016 17:36:17 +0300 Subject: [PATCH 009/105] Be more robust when extracting quiz/exam/programmingAssignment This a revised version of #575 pull request by @partobs-mdp fix #568 related #573 --- coursera/api.py | 102 +++++++++++++++++++++++--------------- coursera/define.py | 3 ++ coursera/test/test_api.py | 70 +++++++++++++++++++++++++- 3 files changed, 133 insertions(+), 42 deletions(-) diff --git a/coursera/api.py b/coursera/api.py index 7d94804..ca83f56 100644 --- a/coursera/api.py +++ b/coursera/api.py @@ -543,14 +543,20 @@ class CourseraOnDemand(object): if assets is None: assets = [] - links = self._extract_videos_and_subtitles_from_lecture( - video_id, subtitle_language, resolution) + try: + links = self._extract_videos_and_subtitles_from_lecture( + video_id, subtitle_language, resolution) - assets = self._normalize_assets(assets) - extend_supplement_links( - links, self._extract_links_from_lecture_assets(assets)) + assets = self._normalize_assets(assets) + extend_supplement_links( + links, self._extract_links_from_lecture_assets(assets)) - return links + return links + except requests.exceptions.HTTPError as exception: + logging.error('Could not download lecture %s: %s', video_id, exception) + if is_debug_run(): + logging.exception('Could not download lecture %s: %s', video_id, exception) + return None def _normalize_assets(self, assets): """ @@ -753,20 +759,26 @@ class CourseraOnDemand(object): """ logging.debug('Gathering supplement URLs for element_id <%s>.', element_id) - # Assignment text (instructions) contains asset tags which describe - # supplementary files. - text = ''.join(self._extract_assignment_text(element_id)) - if not text: - return {} + try: + # Assignment text (instructions) contains asset tags which describe + # supplementary files. + text = ''.join(self._extract_assignment_text(element_id)) + if not text: + return {} - supplement_links = self._extract_links_from_text(text) - - instructions = (IN_MEMORY_MARKER + self._markup_to_html(text), - 'instructions') - extend_supplement_links( - supplement_links, {IN_MEMORY_EXTENSION: [instructions]}) - - return supplement_links + supplement_links = self._extract_links_from_text(text) + instructions = (IN_MEMORY_MARKER + self._markup_to_html(text), + 'instructions') + extend_supplement_links( + supplement_links, {IN_MEMORY_EXTENSION: [instructions]}) + return supplement_links + except requests.exceptions.HTTPError as exception: + logging.error('Could not download programming assignment %s: %s', + element_id, exception) + if is_debug_run(): + logging.exception('Could not download programming assignment %s: %s', + element_id, exception) + return None def extract_links_from_supplement(self, element_id): """ @@ -777,33 +789,41 @@ class CourseraOnDemand(object): """ logging.debug('Gathering supplement URLs for element_id <%s>.', element_id) - dom = get_page(self._session, OPENCOURSE_SUPPLEMENT_URL, - json=True, - course_id=self._course_id, - element_id=element_id) + try: + dom = get_page(self._session, OPENCOURSE_SUPPLEMENT_URL, + json=True, + course_id=self._course_id, + element_id=element_id) - supplement_content = {} + supplement_content = {} - # Supplement content has structure as follows: - # 'linked' { - # 'openCourseAssets.v1' [ { - # 'definition' { - # 'value' + # Supplement content has structure as follows: + # 'linked' { + # 'openCourseAssets.v1' [ { + # 'definition' { + # 'value' - for asset in dom['linked']['openCourseAssets.v1']: - value = asset['definition']['value'] - # Supplement lecture types are known to contain both tags - # and tags (depending on the course), so we extract - # both of them. - extend_supplement_links( - supplement_content, self._extract_links_from_text(value)) + for asset in dom['linked']['openCourseAssets.v1']: + value = asset['definition']['value'] + # Supplement lecture types are known to contain both tags + # and tags (depending on the course), so we extract + # both of them. + extend_supplement_links( + supplement_content, self._extract_links_from_text(value)) - instructions = (IN_MEMORY_MARKER + self._markup_to_html(value), - 'instructions') - extend_supplement_links( - supplement_content, {IN_MEMORY_EXTENSION: [instructions]}) + instructions = (IN_MEMORY_MARKER + self._markup_to_html(value), + 'instructions') + extend_supplement_links( + supplement_content, {IN_MEMORY_EXTENSION: [instructions]}) - return supplement_content + return supplement_content + except requests.exceptions.HTTPError as exception: + logging.error('Could not download supplement %s: %s', + element_id, exception) + if is_debug_run(): + logging.exception('Could not download supplement %s: %s', + element_id, exception) + return None def _extract_asset_tags(self, text): """ diff --git a/coursera/define.py b/coursera/define.py index 5597a19..1a72241 100644 --- a/coursera/define.py +++ b/coursera/define.py @@ -8,6 +8,9 @@ import os import getpass import tempfile + +HTTP_FORBIDDEN = 403 + COURSERA_URL = 'https://www.coursera.org' AUTH_URL = 'https://accounts.coursera.org/api/v1/login' AUTH_URL_V3 = 'https://www.coursera.org/api/login/v3' diff --git a/coursera/test/test_api.py b/coursera/test/test_api.py index d4e54aa..636fbc5 100644 --- a/coursera/test/test_api.py +++ b/coursera/test/test_api.py @@ -13,14 +13,82 @@ from coursera import define from coursera.test.utils import slurp_fixture from coursera.utils import BeautifulSoup +from requests.exceptions import HTTPError +from requests import Response, Session + @pytest.fixture def course(): course = api.CourseraOnDemand( - session=None, course_id='0', course_name='test_course') + session=Session(), course_id='0', course_name='test_course') return course +@patch('coursera.api.get_page') +def test_extract_links_from_programming_http_error(get_page, course): + """ + This test checks that downloader skips locked programming assignments + instead of throwing an error. (Locked == returning 403 error code) + """ + locked_response = Response() + locked_response.status_code = define.HTTP_FORBIDDEN + get_page.side_effect = HTTPError('Mocked HTTP error', + response=locked_response) + assert None == course.extract_links_from_programming('0') + + +@patch('coursera.api.get_page') +def test_extract_links_from_exam_http_error(get_page, course): + """ + This test checks that downloader skips locked exams + instead of throwing an error. (Locked == returning 403 error code) + """ + locked_response = Response() + locked_response.status_code = define.HTTP_FORBIDDEN + get_page.side_effect = HTTPError('Mocked HTTP error', + response=locked_response) + assert None == course.extract_links_from_exam('0') + + +@patch('coursera.api.get_page') +def test_extract_links_from_supplement_http_error(get_page, course): + """ + This test checks that downloader skips locked supplements + instead of throwing an error. (Locked == returning 403 error code) + """ + locked_response = Response() + locked_response.status_code = define.HTTP_FORBIDDEN + get_page.side_effect = HTTPError('Mocked HTTP error', + response=locked_response) + assert None == course.extract_links_from_supplement('0') + + +@patch('coursera.api.get_page') +def test_extract_links_from_lecture_http_error(get_page, course): + """ + This test checks that downloader skips locked lectures + instead of throwing an error. (Locked == returning 403 error code) + """ + locked_response = Response() + locked_response.status_code = define.HTTP_FORBIDDEN + get_page.side_effect = HTTPError('Mocked HTTP error', + response=locked_response) + assert None == course.extract_links_from_lecture('0') + + +@patch('coursera.api.get_page') +def test_extract_links_from_quiz_http_error(get_page, course): + """ + This test checks that downloader skips locked quizzes + instead of throwing an error. (Locked == returning 403 error code) + """ + locked_response = Response() + locked_response.status_code = define.HTTP_FORBIDDEN + get_page.side_effect = HTTPError('Mocked HTTP error', + response=locked_response) + assert None == course.extract_links_from_quiz('0') + + @patch('coursera.api.get_page') def test_ondemand_programming_supplement_no_instructions(get_page, course): no_instructions = slurp_fixture('json/supplement-programming-no-instructions.json') From 7f773530fc75b229bdb388489a0776ad5a00c25d Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Mon, 3 Oct 2016 17:36:52 +0300 Subject: [PATCH 010/105] Add missing import to playlist.py related #573 --- coursera/playlist.py | 1 + 1 file changed, 1 insertion(+) diff --git a/coursera/playlist.py b/coursera/playlist.py index f273595..ebe4d15 100644 --- a/coursera/playlist.py +++ b/coursera/playlist.py @@ -1,4 +1,5 @@ import os +import glob def create_m3u_playlist(section_dir): From e430da130447a9afbaee1e3d63a877f87550a8f3 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Mon, 3 Oct 2016 17:37:39 +0300 Subject: [PATCH 011/105] Be more verbose when printing login failure message related #573 --- coursera/cookies.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/coursera/cookies.py b/coursera/cookies.py index f9d050e..36fa1df 100644 --- a/coursera/cookies.py +++ b/coursera/cookies.py @@ -150,8 +150,8 @@ def login(session, username, password, class_name=None): # for coursera!!! v = session.cookies.pop('CAUTH') session.cookies.set('CAUTH', v) - except requests.exceptions.HTTPError: - raise AuthenticationFailed('Cannot login on coursera.org.') + except requests.exceptions.HTTPError as e: + raise AuthenticationFailed('Cannot login on coursera.org: %s' % e) logging.info('Logged in on coursera.org.') @@ -169,8 +169,8 @@ def down_the_wabbit_hole(session, class_name): try: r.raise_for_status() - except requests.exceptions.HTTPError: - raise AuthenticationFailed('Cannot login on class.coursera.org.') + except requests.exceptions.HTTPError as e: + raise AuthenticationFailed('Cannot login on class.coursera.org: %s' % e) logging.debug('Exiting "deep" authentication.') From 7579dc9771a9ef31f2216ebf814f3f4f3a5839e5 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Mon, 3 Oct 2016 17:53:38 +0300 Subject: [PATCH 012/105] Introduce --download-delay option This option makes the script sleep for N seconds before downloading next course. Default value is 60 seconds. This option proved useful when downloading many courses at once. Otherwise Coursera API starts to respond with errors (cannot login). related #573 --- coursera/commandline.py | 8 ++++++++ coursera/coursera_dl.py | 12 ++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/coursera/commandline.py b/coursera/commandline.py index 4332671..c4884b3 100644 --- a/coursera/commandline.py +++ b/coursera/commandline.py @@ -66,6 +66,14 @@ def parse_args(args=None): help='number of parallel jobs to use for ' 'downloading resources. (Default: 1)') + group_basic.add_argument('--download-delay', + dest='download_delay', + action='store', + default=60, + type=int, + help='number of seconds to wait before downloading ' + 'next course. (Default: 60)') + group_basic.add_argument('-b', # FIXME: kill this one-letter option '--preview', dest='preview', diff --git a/coursera/coursera_dl.py b/coursera/coursera_dl.py index a04c599..13a3bc7 100644 --- a/coursera/coursera_dl.py +++ b/coursera/coursera_dl.py @@ -46,6 +46,7 @@ import json import logging import os import re +import time import shutil from distutils.version import LooseVersion as V @@ -215,9 +216,10 @@ def main(): list_courses(args) return - for class_name in args.class_names: + for class_index, class_name in enumerate(args.class_names): try: - logging.info('Downloading class: %s', class_name) + logging.info('Downloading class: %s (%d / %d)', + class_name, class_index + 1, len(args.class_names)) if download_class(args, class_name): completed_classes.append(class_name) except requests.exceptions.HTTPError as e: @@ -234,6 +236,12 @@ def main(): except AuthenticationFailed as af: logging.error('Could not authenticate: %s', af) + if class_index + 1 != len(args.class_names): + logging.info('Sleeping for %d seconds before downloading next course. ' + 'You can change this with --download-delay option.', + args.download_delay) + time.sleep(args.download_delay) + if completed_classes: logging.info( "Classes which appear completed: " + " ".join(completed_classes)) From 9f333903d05879861ce258e29dc6c742f98d885b Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Mon, 3 Oct 2016 18:21:18 +0300 Subject: [PATCH 013/105] Add --only-syllabus option to download syllabus only and skip course contents Useful for developers. related #573 --- coursera/commandline.py | 7 +++++++ coursera/coursera_dl.py | 3 +++ 2 files changed, 10 insertions(+) diff --git a/coursera/commandline.py b/coursera/commandline.py index c4884b3..dc9a2bb 100644 --- a/coursera/commandline.py +++ b/coursera/commandline.py @@ -98,6 +98,13 @@ def parse_args(args=None): # Selection of material to download group_material = parser.add_argument_group('Selection of material to download') + group_material.add_argument('--only-syllabus', + dest='only_syllabus', + action='store_true', + default=False, + help='download only syllabus, skip course content. ' + '(Default: False)') + group_material.add_argument('--download-quizzes', dest='download_quizzes', action='store_true', diff --git a/coursera/coursera_dl.py b/coursera/coursera_dl.py index 13a3bc7..a1393e1 100644 --- a/coursera/coursera_dl.py +++ b/coursera/coursera_dl.py @@ -136,6 +136,9 @@ def download_on_demand_class(args, class_name): with open(cached_syllabus_filename, 'w') as file_object: json.dump(modules, file_object, indent=4) + if args.only_syllabus: + return False + downloader = get_downloader(session, class_name, args) downloader_wrapper = ParallelDownloader(downloader, args.jobs) \ if args.jobs > 1 else ConsecutiveDownloader(downloader) From 69de37b82ee4e431b4fa5c9ff85a9f5dfb812b46 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Mon, 3 Oct 2016 19:13:44 +0300 Subject: [PATCH 014/105] Add support for reflect and mcqReflect question type reflect generates the same narrow edit field as for numerical answers. Yet it does not look as on Coursera site, I think it's enough to indicate the intention. mcqReflect is basically the same as just mcq. related #573 --- coursera/api.py | 10 +++-- .../question-type-mcqReflect-input.json | 44 +++++++++++++++++++ .../question-type-mcqReflect-output.txt | 15 +++++++ .../question-type-reflect-input.json | 27 ++++++++++++ .../question-type-reflect-output.txt | 4 ++ coursera/test/test_api.py | 2 + 6 files changed, 99 insertions(+), 3 deletions(-) create mode 100644 coursera/test/fixtures/json/quiz-to-markup/question-type-mcqReflect-input.json create mode 100644 coursera/test/fixtures/json/quiz-to-markup/question-type-mcqReflect-output.txt create mode 100644 coursera/test/fixtures/json/quiz-to-markup/question-type-reflect-input.json create mode 100644 coursera/test/fixtures/json/quiz-to-markup/question-type-reflect-output.txt diff --git a/coursera/api.py b/coursera/api.py index ca83f56..e22c32d 100644 --- a/coursera/api.py +++ b/coursera/api.py @@ -45,18 +45,21 @@ class QuizExamToMarkupConverter(object): The output needs to be further processed by MarkupToHTMLConverter. """ KNOWN_QUESTION_TYPES = ('mcq', + 'mcqReflect', 'checkbox', 'singleNumeric', 'textExactMatch', 'mathExpression', - 'regex') + 'regex', + 'reflect') # TODO: support live MathJAX preview rendering for mathExpression # and regex question types KNOWN_INPUT_TYPES = ('textExactMatch', 'singleNumeric', 'mathExpression', - 'regex') + 'regex', + 'reflect') def __init__(self, session): self._session = session @@ -86,9 +89,10 @@ class QuizExamToMarkupConverter(object): if question_type in self.KNOWN_INPUT_TYPES: result.extend(self._generate_input_field()) - # Convert input_type form JSON reply to HTML input type + # Convert input_type from JSON reply to HTML input type input_type = { 'mcq': 'radio', + 'mcqReflect': 'radio', 'checkbox': 'checkbox' }.get(question_type, '') diff --git a/coursera/test/fixtures/json/quiz-to-markup/question-type-mcqReflect-input.json b/coursera/test/fixtures/json/quiz-to-markup/question-type-mcqReflect-input.json new file mode 100644 index 0000000..90b530e --- /dev/null +++ b/coursera/test/fixtures/json/quiz-to-markup/question-type-mcqReflect-input.json @@ -0,0 +1,44 @@ +{ + "questions": [ + { + "id": "8uUpMzm_EeaetxLgjw7H8Q@0", + "variant": { + "detailLevel": "Full", + "definition": { + "prompt": { + "typeName": "cml", + "definition": { + "dtdId": "assess/1", + "value": "Lorem ipsum" + } + }, + "options": [ + { + "id": "0.9109180361318947", + "display": { + "typeName": "cml", + "definition": { + "dtdId": "assess/1", + "value": "Answer 1" + } + } + }, + { + "id": "0.11974743029080992", + "display": { + "typeName": "cml", + "definition": { + "dtdId": "assess/1", + "value": "Answer 2" + } + } + } + ] + } + }, + "question": { + "type": "mcqReflect" + } + } + ] +} diff --git a/coursera/test/fixtures/json/quiz-to-markup/question-type-mcqReflect-output.txt b/coursera/test/fixtures/json/quiz-to-markup/question-type-mcqReflect-output.txt new file mode 100644 index 0000000..f4a8f24 --- /dev/null +++ b/coursera/test/fixtures/json/quiz-to-markup/question-type-mcqReflect-output.txt @@ -0,0 +1,15 @@ +

Question 1

+Lorem ipsum +
+ + +
+
diff --git a/coursera/test/fixtures/json/quiz-to-markup/question-type-reflect-input.json b/coursera/test/fixtures/json/quiz-to-markup/question-type-reflect-input.json new file mode 100644 index 0000000..7a75e87 --- /dev/null +++ b/coursera/test/fixtures/json/quiz-to-markup/question-type-reflect-input.json @@ -0,0 +1,27 @@ +{ + "evaluation": null, + "questions": [ + { + "id": "jeVDBjnNEeaetxLgjw7H8Q@0", + "variant": { + "detailLevel": "Full", + "definition": { + "prompt": { + "typeName": "cml", + "definition": { + "dtdId": "assess/1", + "value": "Lorem ipsum" + } + } + } + }, + "weightedScoring": { + "maxScore": 1 + }, + "isSubmitAllowed": true, + "question": { + "type": "reflect" + } + } + ] +} diff --git a/coursera/test/fixtures/json/quiz-to-markup/question-type-reflect-output.txt b/coursera/test/fixtures/json/quiz-to-markup/question-type-reflect-output.txt new file mode 100644 index 0000000..2692cbc --- /dev/null +++ b/coursera/test/fixtures/json/quiz-to-markup/question-type-reflect-output.txt @@ -0,0 +1,4 @@ +

Question 1

+Lorem ipsum +
+
diff --git a/coursera/test/test_api.py b/coursera/test/test_api.py index 636fbc5..acae374 100644 --- a/coursera/test/test_api.py +++ b/coursera/test/test_api.py @@ -214,6 +214,8 @@ def test_list_courses(get_page, course): ('question-type-checkbox-input.json', 'question-type-checkbox-output.txt'), ('question-type-mcq-input.json', 'question-type-mcq-output.txt'), ('question-type-singleNumeric-input.json', 'question-type-singleNumeric-output.txt'), + ('question-type-reflect-input.json', 'question-type-reflect-output.txt'), + ('question-type-mcqReflect-input.json', 'question-type-mcqReflect-output.txt'), ('question-type-unknown-input.json', 'question-type-unknown-output.txt'), ('multiple-questions-input.json', 'multiple-questions-output.txt'), ] From 00b36f70351bfd5d5b3d0a414c22df1b8d4cb762 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Mon, 3 Oct 2016 19:51:09 +0300 Subject: [PATCH 015/105] Show problematic courses after having downloaded all courses This change makes the script show courses where an error during syllabus parsing occurred. Printing such courses in the end is helpful to bring attention when downloading multiple courses. related #573 --- coursera/coursera_dl.py | 45 +++++++++++++++++++++++++++++++---------- coursera/extractors.py | 26 +++++++++++++++++++----- 2 files changed, 55 insertions(+), 16 deletions(-) diff --git a/coursera/coursera_dl.py b/coursera/coursera_dl.py index a1393e1..124868e 100644 --- a/coursera/coursera_dl.py +++ b/coursera/coursera_dl.py @@ -114,9 +114,13 @@ def download_on_demand_class(args, class_name): """ Download all requested resources from the on-demand class given in class_name. - Returns True if the class appears completed. + @return: Tuple of (bool, bool), where the first bool indicates whether + errors occured while parsing syllabus, the second bool indicaters + whether the course appears to be completed. + @rtype: (bool, bool) """ + error_occured = False session = get_session() extractor = CourseraExtractor(session, args.username, args.password) @@ -125,19 +129,20 @@ def download_on_demand_class(args, class_name): with open(cached_syllabus_filename) as syllabus_file: modules = json.load(syllabus_file) else: - modules = extractor.get_modules(class_name, - args.reverse, - args.unrestricted_filenames, - args.subtitle_language, - args.video_resolution, - args.download_quizzes) + error_occured, modules = extractor.get_modules( + class_name, + args.reverse, + args.unrestricted_filenames, + args.subtitle_language, + args.video_resolution, + args.download_quizzes) if is_debug_run or args.cache_syllabus(): with open(cached_syllabus_filename, 'w') as file_object: json.dump(modules, file_object, indent=4) if args.only_syllabus: - return False + return error_occured, False downloader = get_downloader(session, class_name, args) downloader_wrapper = ParallelDownloader(downloader, args.jobs) \ @@ -169,7 +174,7 @@ def download_on_demand_class(args, class_name): if course_downloader.failed_urls: print_failed_urls(course_downloader.failed_urls) - return completed + return error_occured, completed def print_skipped_urls(skipped_urls): @@ -196,7 +201,10 @@ def download_class(args, class_name): """ Try to download on-demand class. - Returns True if the class appears completed. + @return: Tuple of (bool, bool), where the first bool indicates whether + errors occured while parsing syllabus, the second bool indicaters + whether the course appears to be completed. + @rtype: (bool, bool) """ logging.debug('Downloading new style (on demand) class %s', class_name) return download_on_demand_class(args, class_name) @@ -210,6 +218,7 @@ def main(): args = parse_args() logging.info('coursera_dl version %s', __version__) completed_classes = [] + classes_with_errors = [] mkdir_p(PATH_CACHE, 0o700) if args.clear_cache: @@ -223,8 +232,11 @@ def main(): try: logging.info('Downloading class: %s (%d / %d)', class_name, class_index + 1, len(args.class_names)) - if download_class(args, class_name): + error_occured, completed = download_class(args, class_name) + if completed: completed_classes.append(class_name) + if error_occured: + classes_with_errors.append(class_name) except requests.exceptions.HTTPError as e: logging.error('HTTPError %s', e) if is_debug_run(): @@ -246,9 +258,20 @@ def main(): time.sleep(args.download_delay) if completed_classes: + logging.info('-' * 80) logging.info( "Classes which appear completed: " + " ".join(completed_classes)) + if classes_with_errors: + logging.info('-' * 80) + logging.info('The following classes had errors during the syllabus' + ' parsing stage. You may want to review error messages and' + ' courses (sometimes enrolling to the course or switching' + ' session helps):') + for class_name in classes_with_errors: + logging.info('%s (https://www.coursera.org/learn/%s)', + class_name, class_name) + if __name__ == '__main__': main() diff --git a/coursera/extractors.py b/coursera/extractors.py index d41c7dc..a4e8ade 100644 --- a/coursera/extractors.py +++ b/coursera/extractors.py @@ -50,11 +50,11 @@ class CourseraExtractor(PlatformExtractor): download_quizzes=False): page = self._get_on_demand_syllabus(class_name) - modules = self._parse_on_demand_syllabus( + error_occured, modules = self._parse_on_demand_syllabus( page, reverse, unrestricted_filenames, subtitle_language, video_resolution, download_quizzes) - return modules + return error_occured, modules def _get_on_demand_syllabus(self, class_name): """ @@ -74,6 +74,11 @@ class CourseraExtractor(PlatformExtractor): download_quizzes=False): """ Parse a Coursera on-demand course listing/syllabus page. + + @return: Tuple of (bool, list), where bool indicates whether + there was at least on error while parsing syllabus, the list + is a list of parsed modules. + @rtype: (bool, list) """ dom = json.loads(page) @@ -96,6 +101,8 @@ class CourseraExtractor(PlatformExtractor): with open('%s-course-material-items.json' % course_name, 'w') as file_object: json.dump(ondemand_material_items._items, file_object, indent=4) + error_occured = False + for module in json_modules: module_slug = module['slug'] logging.info('Processing module %s', module_slug) @@ -121,7 +128,9 @@ class CourseraExtractor(PlatformExtractor): logging.info('Processing lecture %s (%s)', lecture_slug, typename) - links = None + # Empty dictionary means there were no data + # None means an error occured + links = {} if typename == 'lecture': lecture_video_id = lecture['content']['definition']['videoId'] @@ -146,7 +155,14 @@ class CourseraExtractor(PlatformExtractor): if download_quizzes: links = course.extract_links_from_exam(lecture['id']) - if links: + else: + logging.info('Unsupported typename "%s" in lecture "%s"', + typename, lecture_slug) + continue + + if links is None: + error_occured = True + elif links: lectures.append((lecture_slug, links)) if lectures: @@ -158,4 +174,4 @@ class CourseraExtractor(PlatformExtractor): if modules and reverse: modules.reverse() - return modules + return error_occured, modules From f7d451c581049ae01fd48a287381b0a78110003e Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Mon, 3 Oct 2016 20:48:03 +0300 Subject: [PATCH 016/105] Fix tests to make sure they use mocked session, not actual Session related #573 --- coursera/test/test_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/coursera/test/test_api.py b/coursera/test/test_api.py index acae374..c5586ef 100644 --- a/coursera/test/test_api.py +++ b/coursera/test/test_api.py @@ -14,13 +14,13 @@ from coursera.test.utils import slurp_fixture from coursera.utils import BeautifulSoup from requests.exceptions import HTTPError -from requests import Response, Session +from requests import Response @pytest.fixture def course(): course = api.CourseraOnDemand( - session=Session(), course_id='0', course_name='test_course') + session=Mock(cookies={}), course_id='0', course_name='test_course') return course From f21fd18b4f3d51f5564760de33c5e769b869c7e6 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Tue, 4 Oct 2016 00:25:18 +0300 Subject: [PATCH 017/105] Update CHANGELOG.md --- CHANGELOG.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d74f1aa..ff3668b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ # Change Log +## 0.8.0 (2016-10-04) + +Features: + - Add `--download-delay` option that adds a specified delay in seconds + before downloading next course. This is useful when downloading many + courses at once. Default value is 60 seconds. + - Add `--only-syllabus` option which is when activated, allows to skip + download of the course content. Only syllabus is parsed. + - Add support for `reflect` and `mcqReflect` question types in quizzes. + - Courses that encountered an error while parsing syllabus will be listed + in the end of the program execution, after all courses have been + processed (hopefully, downloaded). This helps skip vast output and easily + see which courses need user's attention, e.g. enrollment, session + switching or just patience until the course start date. + +Bugfixes: + - Locked programming assignments in syllabus used to crash coursera-dl. + Now the script goes on parsing syllabus and skips locked assignments. + - Add missing import statement to playlist generation module + ## 0.7.0 (2016-07-28) Features: From 759fe78e629976de01a98753db7243218c4e6ff4 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Tue, 4 Oct 2016 00:28:21 +0300 Subject: [PATCH 018/105] Bump version (0.7.0 -> 0.8.0) Features: - Add `--download-delay` option that adds a specified delay in seconds before downloading next course. This is useful when downloading many courses at once. Default value is 60 seconds. - Add `--only-syllabus` option which is when activated, allows to skip download of the course content. Only syllabus is parsed. - Add support for `reflect` and `mcqReflect` question types in quizzes. - Courses that encountered an error while parsing syllabus will be listed in the end of the program execution, after all courses have been processed (hopefully, downloaded). This helps skip vast output and easily see which courses need user's attention, e.g. enrollment, session switching or just patience until the course start date. Bugfixes: - Locked programming assignments in syllabus used to crash coursera-dl. Now the script goes on parsing syllabus and skips locked assignments. - Add missing import statement to playlist generation module --- coursera/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coursera/__init__.py b/coursera/__init__.py index a71c5c7..32a90a3 100644 --- a/coursera/__init__.py +++ b/coursera/__init__.py @@ -1 +1 @@ -__version__ = '0.7.0' +__version__ = '0.8.0' From 2ea2e7aa62e175f96531d26151fa941f6ee14f58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=99=E4=B8=9A=E5=86=9B?= Date: Fri, 10 Mar 2017 01:47:48 +0800 Subject: [PATCH 019/105] improve download subtiles. add support to download certern language's subtile --- coursera/filtering.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/coursera/filtering.py b/coursera/filtering.py index d9ed5d5..14f4116 100644 --- a/coursera/filtering.py +++ b/coursera/filtering.py @@ -94,15 +94,16 @@ def find_resources_to_get(lecture, file_formats, resource_filter, ignored_format logging.info("The following file formats will be ignored: " + ",".join(ignored_formats)) for fmt, resources in iteritems(lecture): - fmt0 = fmt if '.' in fmt: - fmt = fmt.split('.')[1] + short_fmt = fmt.split('.')[1] + else: + short_fmt = None - if fmt in ignored_formats: + if fmt in ignored_formats or short_fmt in ignored_formats: continue - if fmt in file_formats or 'all' in file_formats: + if fmt in file_formats or short_fmt in file_formats or 'all' in file_formats: for r in resources: if resource_filter and r[1] and not re.search(resource_filter, r[1]): logging.debug('Skipping b/c of rf: %s %s', From eec99f64a480dbc4cf34d4ca0cc471e189ff44cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=99=E4=B8=9A=E5=86=9B?= Date: Fri, 10 Mar 2017 14:35:53 +0800 Subject: [PATCH 020/105] add support to download certain subtitle --- coursera/filtering.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/coursera/filtering.py b/coursera/filtering.py index 14f4116..4331831 100644 --- a/coursera/filtering.py +++ b/coursera/filtering.py @@ -95,15 +95,15 @@ def find_resources_to_get(lecture, file_formats, resource_filter, ignored_format for fmt, resources in iteritems(lecture): fmt0 = fmt + + short_fmt = None if '.' in fmt: short_fmt = fmt.split('.')[1] - else: - short_fmt = None - if fmt in ignored_formats or short_fmt in ignored_formats: + if fmt in ignored_formats or (short_fmt != None and short_fmt in ignored_formats) : continue - if fmt in file_formats or short_fmt in file_formats or 'all' in file_formats: + if fmt in file_formats or (short_fmt != None and short_fmt in file_formats) or 'all' in file_formats: for r in resources: if resource_filter and r[1] and not re.search(resource_filter, r[1]): logging.debug('Skipping b/c of rf: %s %s', From b7f24a772420e10fe477122431ee1916b95c36ed Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sun, 19 Mar 2017 15:41:43 +0300 Subject: [PATCH 021/105] Mention Windows proxy support in the README.md related #205 #594 --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 28c3d70..3207408 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ - [Resuming downloads](#resuming-downloads) - [Troubleshooting](#troubleshooting) - [Found 0 sections and 0 lectures on this page](#found-0-sections-and-0-lectures-on-this-page) + - [Windows: Proxy support](#windows-proxy-support) - [Windows: Failed to create process](#windows-failed-to-create-process) - [SSLError: Errno 1 _ssl.c:504: error:14094410:SSL routines:SSL3_READ_BYTES:sslv3 alert handshake failure](#sslerror-errno-1-_sslc504-error14094410ssl-routinesssl3_read_bytessslv3-alert-handshake-failure) - [Reporting issues](#reporting-issues) @@ -398,6 +399,18 @@ Alternatively you may want to try this Chrome extension: https://chrome.google.c If none of the above works for you, there is nothing we can do. +## Windows: proxy support + +If you're on Windows behind a proxy, set up the environment variables +before running the script as follows: + +``` +set HTTP_PROXY=http://host:port +set HTTPS_PROXY=http://host:port +``` + +Related discussion: [#205](https://github.com/coursera-dl/coursera-dl/issues/205) + ## Windows: Failed to create process In `C:\Users\\AppData\Local\Programs\Python\Python35-32\Scripts` From f66e13f66820a61b609f38c119b34e9ce11885b4 Mon Sep 17 00:00:00 2001 From: Tony Yang Date: Tue, 21 Mar 2017 14:55:11 +0800 Subject: [PATCH 022/105] change course URL in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3207408..083ef02 100644 --- a/README.md +++ b/README.md @@ -317,7 +317,7 @@ one of the following actions solve your problem: * Make sure the class name you are using corresponds to the resource name used in the URL for that class: - `https://class.coursera.org//class/index` + `https://www.coursera.org/learn//home/welcome` * Have you tried to clean the cached cookies/credentials with the `--clear-cache` option? From f37bc44f510f69e158f814876eaa48ef67713604 Mon Sep 17 00:00:00 2001 From: "Gautam krishna.R" Date: Mon, 1 May 2017 15:02:42 +0530 Subject: [PATCH 023/105] fixes repository misclassifying --- .gitattributes | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..3ff2dd9 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +tests/* linguist-vendored From 19103f2718d7d4e5aff1ac8a840a265116cf40f3 Mon Sep 17 00:00:00 2001 From: "Gautam krishna.R" Date: Mon, 1 May 2017 15:10:04 +0530 Subject: [PATCH 024/105] Update .gitattributes --- .gitattributes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index 3ff2dd9..a76c228 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -tests/* linguist-vendored +coursera/test/* linguist-vendored From 08b8ad44c2404586b1c4d3258971e1e630f15ab8 Mon Sep 17 00:00:00 2001 From: ifaint Date: Sat, 13 May 2017 10:04:01 +0800 Subject: [PATCH 025/105] 1. enable multiple subtitles and transcripts with alternatives. 1. enable downloading resources blocks. 1. enable file to store parameters. 1. added unit-test. --- README.md | 29 ++- coursera/api.py | 227 +++++++++++++++--- coursera/commandline.py | 30 ++- coursera/coursera_dl.py | 4 +- coursera/define.py | 12 +- coursera/extractors.py | 39 ++- .../fixtures/json/references-poll-output.json | 24 ++ .../fixtures/json/references-poll-reply.json | 47 ++++ ...diate-instructions-empty-instructions.json | 18 ++ ...mmediate-instructions-no-instructions.json | 6 + ...ming-immediate-instructions-one-asset.json | 18 ++ .../fixtures/json/video-output-1-all.json | 16 ++ .../test/fixtures/json/video-output-1-en.json | 4 + .../test/fixtures/json/video-output-1.json | 6 + .../test/fixtures/json/video-output-2.json | 6 + .../test/fixtures/json/video-reply-1.json | 47 ++++ .../test/fixtures/json/video-reply-2.json | 77 ++++++ coursera/test/test_api.py | 164 ++++++++++++- requirements.txt | 1 + tox.ini | 1 + 20 files changed, 730 insertions(+), 46 deletions(-) create mode 100644 coursera/test/fixtures/json/references-poll-output.json create mode 100644 coursera/test/fixtures/json/references-poll-reply.json create mode 100644 coursera/test/fixtures/json/supplement-programming-immediate-instructions-empty-instructions.json create mode 100644 coursera/test/fixtures/json/supplement-programming-immediate-instructions-no-instructions.json create mode 100644 coursera/test/fixtures/json/supplement-programming-immediate-instructions-one-asset.json create mode 100644 coursera/test/fixtures/json/video-output-1-all.json create mode 100644 coursera/test/fixtures/json/video-output-1-en.json create mode 100644 coursera/test/fixtures/json/video-output-1.json create mode 100644 coursera/test/fixtures/json/video-output-2.json create mode 100644 coursera/test/fixtures/json/video-reply-1.json create mode 100644 coursera/test/fixtures/json/video-reply-2.json diff --git a/README.md b/README.md index 083ef02..2ed7098 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,7 @@ I've downloaded many other good videos such as those from Khan Academy. certain resources. * File format extension filter to grab resource types you want. * Login credentials accepted on command-line or from `.netrc` file. + * Default arguments loaded from `coursera-dl.conf` file. * Core functionality tested on Linux, Mac and Windows. # Disclaimer @@ -278,6 +279,23 @@ instead. This is especially convenient, as typing usernames (email addresses) and passwords directly on the command line can get tiresome (even more if you happened to choose a "strong" password). +Alternatively, if you want to store your preferred parameters (which might +also include your username and password), create a file named `coursera-dl.conf` +where the script is supposed to be executed, with the following format: + + --username + --password + --subtitle-language en,zh-CN|zh-TW + --download-quizzes True + #--mathjax-cdn https://cdn.bootcss.com/mathjax/2.7.1/MathJax.js + # more other parameters + +Parameter which is stored in the file will be overriden if it is again specifed +in your commandline script + +**Note:** In `coursera-dl.conf`, all the parameters should not be wrapped +with quotes. + ## Resuming downloads In default mode when you interrupt the download process by pressing @@ -341,7 +359,7 @@ one of the following actions solve your problem: * If results show 0 sections, you most likely have provided invalid credentials (username and/or password in the command line or in your - `.netrc` file). + `.netrc` file or in your `coursera-dl.conf` file). * For courses that have not started yet, but have had a previous iteration sometimes a preview is available, containing all the classes from the last @@ -456,6 +474,15 @@ If you still have the problem, please read the following issues for more ideas o This is also worth reading: https://urllib3.readthedocs.io/en/latest/security.html#insecureplatformwarning +## Use an alternative cdn url for `MathJax.js` + +When saving a course page, we enabled `MathJax` rendering for math equations, by +injecting `MathJax.js` in the header. The script is using a cdn service provided +by [mathjax.org](https://cdn.mathjax.org/mathjax/latest/MathJax.js). However, that +url is not accessible in some countries/regions, you can provide a +`--mathjax-cdn ` parameter to specify the `MathJax.js` file that is +accessible in your region. + # Reporting issues Before reporting any issue please follow the steps below: diff --git a/coursera/api.py b/coursera/api.py index e22c32d..a680b1a 100644 --- a/coursera/api.py +++ b/coursera/api.py @@ -25,12 +25,18 @@ from .define import (OPENCOURSE_SUPPLEMENT_URL, OPENCOURSE_ONDEMAND_COURSE_MATERIALS, OPENCOURSE_VIDEO_URL, OPENCOURSE_MEMBERSHIPS, + OPENCOURSE_REFERENCES_POLL_URL, + OPENCOURSE_REFERENCE_ITEM_URL, + OPENCOURSE_PROGRAMMING_IMMEDIATE_INSTRUCTIOINS_URL, + POST_OPENCOURSE_API_QUIZ_SESSION, POST_OPENCOURSE_API_QUIZ_SESSION_GET_STATE, POST_OPENCOURSE_ONDEMAND_EXAM_SESSIONS, POST_OPENCOURSE_ONDEMAND_EXAM_SESSIONS_GET_STATE, - INSTRUCTIONS_HTML_INJECTION, + INSTRUCTIONS_HTML_INJECTION_PRE, + INSTRUCTIONS_HTML_MATHJAX_URL, + INSTRUCTIONS_HTML_INJECTION_AFTER, IN_MEMORY_EXTENSION, IN_MEMORY_MARKER) @@ -135,9 +141,12 @@ class QuizExamToMarkupConverter(object): class MarkupToHTMLConverter(object): - def __init__(self, session): + def __init__(self, session, mathjax_cdn_url=None): self._session = session self._asset_retriever = AssetRetriever(session) + if not mathjax_cdn_url: + mathjax_cdn_url = INSTRUCTIONS_HTML_MATHJAX_URL + self._mathjax_cdn_url = mathjax_cdn_url def __call__(self, markup): """ @@ -170,7 +179,11 @@ class MarkupToHTMLConverter(object): soup.insert(0, meta) # 1. Inject basic CSS style - css_soup = BeautifulSoup(INSTRUCTIONS_HTML_INJECTION) + css = "".join([ + INSTRUCTIONS_HTML_INJECTION_PRE, + self._mathjax_cdn_url, + INSTRUCTIONS_HTML_INJECTION_AFTER]) + css_soup = BeautifulSoup(css) soup.append(css_soup) # 2. Replace with

@@ -386,7 +399,8 @@ class CourseraOnDemand(object): """ def __init__(self, session, course_id, course_name, - unrestricted_filenames=False): + unrestricted_filenames=False, + mathjax_cdn_url=None): """ Initialize Coursera OnDemand API. @@ -409,7 +423,7 @@ class CourseraOnDemand(object): self._user_id = None self._quiz_to_markup = QuizExamToMarkupConverter(session) - self._markup_to_html = MarkupToHTMLConverter(session) + self._markup_to_html = MarkupToHTMLConverter(session, mathjax_cdn_url=mathjax_cdn_url) self._asset_retriever = AssetRetriever(session) def obtain_user_id(self): @@ -721,29 +735,11 @@ class CourseraOnDemand(object): video_url = sources[0]['formatSources']['video/mp4'] video_content['mp4'] = video_url - # subtitles and transcripts - subtitle_nodes = [ - ('subtitles', 'srt', 'subtitle'), - ('subtitlesTxt', 'txt', 'transcript'), - ] - for (subtitle_node, subtitle_extension, subtitle_description) in subtitle_nodes: - logging.debug('Gathering %s URLs for video_id <%s>.', subtitle_description, video_id) - subtitles = dom.get(subtitle_node) - if subtitles is not None: - if subtitle_language == 'all': - for current_subtitle_language in subtitles: - video_content[current_subtitle_language + '.' + subtitle_extension] = make_coursera_absolute_url(subtitles.get(current_subtitle_language)) - else: - if subtitle_language != 'en' and subtitle_language not in subtitles: - logging.warning("%s unavailable in '%s' language for video " - "with video id: [%s], falling back to 'en' " - "%s", subtitle_description.capitalize(), subtitle_language, video_id, subtitle_description) - subtitle_language = 'en' + subtitle_link = self._extract_subtitles_from_video_dom( + dom, subtitle_language, video_id) - subtitle_url = subtitles.get(subtitle_language) - if subtitle_url is not None: - # some subtitle urls are relative! - video_content[subtitle_language + '.' + subtitle_extension] = make_coursera_absolute_url(subtitle_url) + for key, value in iteritems(subtitle_link): + video_content[key] = value lecture_video_content = {} for key, value in iteritems(video_content): @@ -751,6 +747,102 @@ class CourseraOnDemand(object): return lecture_video_content + def _extract_subtitles_from_video_dom(self, video_dom, + subtitle_language, video_id): + # subtitles and transcripts + subtitle_nodes = [ + ('subtitles', 'srt', 'subtitle'), + ('subtitlesTxt', 'txt', 'transcript'), + ] + subtitle_set_download = set() + subtitle_set_nonexist = set() + subtitle_links = {} + for (subtitle_node, subtitle_extension, subtitle_description) \ + in subtitle_nodes: + logging.debug('Gathering %s URLs for video_id <%s>.', + subtitle_description, video_id) + subtitles = video_dom.get(subtitle_node) + download_all_subtitle = False + if subtitles is not None: + subtitles_set = set(subtitles) + requested_subtitle_list = [s.strip() for s in + subtitle_language.split(",")] + for language_with_alts in requested_subtitle_list: + if download_all_subtitle: + break + grouped_language_list = [l.strip() for l in + language_with_alts.split("|")] + for language in grouped_language_list: + if language == "all": + download_all_subtitle = True + break + elif language in subtitles_set: + subtitle_set_download.update([language]) + break + else: + subtitle_set_nonexist.update([language]) + + if download_all_subtitle and subtitles is not None: + subtitle_set_download = set(subtitles) + + if not download_all_subtitle and subtitle_set_nonexist: + logging.warning("%s unavailable in '%s' language for video " + "with video id: [%s]," + "%s", subtitle_description.capitalize(), + ", ".join(subtitle_set_nonexist), video_id, + subtitle_description) + if not subtitle_set_download: + logging.warning("%s all requested subtitles are unavaliable," + "with video id: [%s], falling back to 'en' " + "%s", subtitle_description.capitalize(), + video_id, + subtitle_description) + subtitle_set_download = set(['en']) + + for current_subtitle_language in subtitle_set_download: + subtitle_url = subtitles.get(current_subtitle_language) + if subtitle_url is not None: + # some subtitle urls are relative! + subtitle_links[ + "%s.%s" % (current_subtitle_language, subtitle_extension) + ] = make_coursera_absolute_url(subtitle_url) + return subtitle_links + + def extract_links_from_programming_immediate_instructions(self, element_id): + """ + Return a dictionary with links to supplement files (pdf, csv, zip, + ipynb, html and so on) extracted from graded programming assignment. + + @param element_id: Element ID to extract files from. + @type element_id: str + + @return: @see CourseraOnDemand._extract_links_from_text + """ + logging.debug('Extracting links from programming immediate ' + 'instructions for element_id <%s>.', element_id) + + try: + # Assignment text (instructions) contains asset tags which describe + # supplementary files. + text = ''.join( + self._extract_programming_immediate_instructions_text(element_id)) + if not text: + return {} + + supplement_links = self._extract_links_from_text(text) + instructions = (IN_MEMORY_MARKER + self._markup_to_html(text), + 'instructions') + extend_supplement_links( + supplement_links, {IN_MEMORY_EXTENSION: [instructions]}) + return supplement_links + except requests.exceptions.HTTPError as exception: + logging.error('Could not download programming assignment %s: %s', + element_id, exception) + if is_debug_run(): + logging.exception('Could not download programming assignment %s: %s', + element_id, exception) + return None + def extract_links_from_programming(self, element_id): """ Return a dictionary with links to supplement files (pdf, csv, zip, @@ -876,6 +968,87 @@ class CourseraOnDemand(object): 'url': element['url'].strip()} for element in dom['elements']] + def extract_references_poll(self): + try: + dom = get_page(self._session, + OPENCOURSE_REFERENCES_POLL_URL.format( + course_id=self._course_id), + json=True + ) + logging.info('Downloaded resource poll (%d bytes)', len(dom)) + return dom['elements'] + + except requests.exceptions.HTTPError as exception: + logging.error('Could not download resource section: %s', + exception) + if is_debug_run(): + logging.exception('Could not download resource section: %s', + exception) + return None + + def extract_links_from_reference(self, short_id): + """ + Return a dictionary with supplement files (pdf, csv, zip, ipynb, html + and so on) extracted from supplement page. + + @return: @see CourseraOnDemand._extract_links_from_text + """ + logging.debug('Gathering resource URLs for short_id <%s>.', short_id) + + try: + dom = get_page(self._session, OPENCOURSE_REFERENCE_ITEM_URL, + json=True, + course_id=self._course_id, + short_id=short_id) + + resource_content = {} + + # Supplement content has structure as follows: + # 'linked' { + # 'openCourseAssets.v1' [ { + # 'definition' { + # 'value' + + for asset in dom['linked']['openCourseAssets.v1']: + value = asset['definition']['value'] + # Supplement lecture types are known to contain both tags + # and tags (depending on the course), so we extract + # both of them. + extend_supplement_links( + resource_content, self._extract_links_from_text(value)) + + instructions = (IN_MEMORY_MARKER + self._markup_to_html(value), + 'resources') + extend_supplement_links( + resource_content, {IN_MEMORY_EXTENSION: [instructions]}) + + return resource_content + except requests.exceptions.HTTPError as exception: + logging.error('Could not download supplement %s: %s', + short_id, exception) + if is_debug_run(): + logging.exception('Could not download supplement %s: %s', + short_id, exception) + return None + + def _extract_programming_immediate_instructions_text(self, element_id): + """ + Extract assignment text (instructions). + + @param element_id: Element id to extract assignment instructions from. + @type element_id: str + + @return: List of assignment text (instructions). + @rtype: [str] + """ + dom = get_page(self._session, OPENCOURSE_PROGRAMMING_IMMEDIATE_INSTRUCTIOINS_URL, + json=True, + course_id=self._course_id, + element_id=element_id) + + return [element['assignmentInstructions']['definition']['value'] + for element in dom['elements']] + def _extract_assignment_text(self, element_id): """ Extract assignment text (instructions). diff --git a/coursera/commandline.py b/coursera/commandline.py index dc9a2bb..5b47196 100644 --- a/coursera/commandline.py +++ b/coursera/commandline.py @@ -6,13 +6,15 @@ handling. The primary candidate is argument parser. import os import sys import logging -import argparse +import configargparse as argparse from coursera import __version__ from .credentials import get_credentials, CredentialsError, keyring from .utils import decode_input +LOCAL_CONF_FILE_NAME = 'coursera-dl.conf' + def class_name_arg_required(args): """ @@ -33,8 +35,14 @@ def parse_args(args=None): Parse the arguments/options passed to the program on the command line. """ - parser = argparse.ArgumentParser( - description='Download Coursera.org lecture material and resources.') + parse_kwargs = { + "description": 'Download Coursera.org lecture material and resources.' + } + + conf_file_path = os.path.join(os.getcwd(), LOCAL_CONF_FILE_NAME) + if os.path.isfile(conf_file_path): + parse_kwargs["default_config_files"] = [conf_file_path] + parser = argparse.ArgParser(**parse_kwargs) # Basic options group_basic = parser.add_argument_group('Basic options') @@ -93,7 +101,15 @@ def parse_args(args=None): action='store', default='all', help='Choose language to download subtitles and transcripts. (Default: all)' - 'Use special value "all" to download all available.') + 'Use special value "all" to download all available.' + 'To download subtitles and transcripts of multiple languages,' + 'use comma(s) (without spaces) to seperate the names of the languages, i.e., "en,zh-CN".' + 'To download subtitles and transcripts of alternative language(s) ' + 'if only the current language is not available,' + 'put an "|" for each of the alternative languages after ' + 'the current language, i.e., "en|fr,zh-CN|zh-TW|de", and make sure the parameter are wrapped with ' + 'quotes when "|" presents.' + ) # Selection of material to download group_material = parser.add_argument_group('Selection of material to download') @@ -316,6 +332,12 @@ def parse_args(args=None): default=False, help='generate M3U playlists for course weeks') + group_adv_misc.add_argument('--mathjax-cdn', + dest='mathjax_cdn_url', + default='https://cdn.mathjax.org/mathjax/latest/MathJax.js', + help='the cdn address of MathJax.js' + ) + # Debug options group_debug = parser.add_argument_group('Debugging options') diff --git a/coursera/coursera_dl.py b/coursera/coursera_dl.py index 124868e..941793d 100644 --- a/coursera/coursera_dl.py +++ b/coursera/coursera_dl.py @@ -135,7 +135,9 @@ def download_on_demand_class(args, class_name): args.unrestricted_filenames, args.subtitle_language, args.video_resolution, - args.download_quizzes) + args.download_quizzes, + args.mathjax_cdn_url + ) if is_debug_run or args.cache_syllabus(): with open(cached_syllabus_filename, 'w') as file_object: diff --git a/coursera/define.py b/coursera/define.py index 1a72241..7b3fbe5 100644 --- a/coursera/define.py +++ b/coursera/define.py @@ -67,6 +67,12 @@ OPENCOURSE_SUPPLEMENT_URL = 'https://www.coursera.org/api/onDemandSupplements.v1 '{course_id}~{element_id}?includes=asset&fields=openCourseAssets.v1%28typeName%29,openCourseAssets.v1%28definition%29' OPENCOURSE_PROGRAMMING_ASSIGNMENTS_URL = \ 'https://www.coursera.org/api/onDemandProgrammingLearnerAssignments.v1/{course_id}~{element_id}?fields=submissionLearnerSchema' +OPENCOURSE_PROGRAMMING_IMMEDIATE_INSTRUCTIOINS_URL = \ + 'https://www.coursera.org/api/onDemandProgrammingImmediateInstructions.v1/{course_id}~{element_id}' +OPENCOURSE_REFERENCES_POLL_URL = \ + "https://www.coursera.org/api/onDemandReferences.v1/?courseId={course_id}&q=courseListed&fields=name%2CshortId%2Cslug%2Ccontent&includes=assets" +OPENCOURSE_REFERENCE_ITEM_URL = \ + "https://www.coursera.org/api/onDemandReferences.v1/?courseId={course_id}&q=shortId&shortId={short_id}&fields=name%2CshortId%2Cslug%2Ccontent&includes=assets" # These are ids that are present in tag in assignment text: # @@ -772,7 +778,7 @@ FORMAT_MAX_LENGTH = 20 TITLE_MAX_LENGTH = 200 #: CSS that is usen to prettify instructions -INSTRUCTIONS_HTML_INJECTION = ''' +INSTRUCTIONS_HTML_INJECTION_PRE = ''' ''' + +# The following url is the root url (tree) for a Coursera Course +OPENCOURSE_NOTEBOOK_DESCRIPTIONS = "https://hub.coursera-notebooks.org/hub/coursera_login?token={authId}&next=/" +OPENCOURSE_NOTEBOOK_LAUNCHES = "https://www.coursera.org/api/onDemandNotebookWorkspaceLaunches.v1/?fields=authorizationId%2CcontentPath%2CuseLegacySystem" +OPENCOURSE_NOTEBOOK_TREE = "https://hub.coursera-notebooks.org/user/{jupId}/api/contents/{path}?type=directory&_={timestamp}" +OPENCOURSE_NOTEBOOK_DOWNLOAD = "https://hub.coursera-notebooks.org/user/{jupId}/files/{path}?download=1" diff --git a/coursera/extractors.py b/coursera/extractors.py index 420373f..6e6d631 100644 --- a/coursera/extractors.py +++ b/coursera/extractors.py @@ -29,7 +29,7 @@ class PlatformExtractor(object): class CourseraExtractor(PlatformExtractor): def __init__(self, session, username, password): login(session, username, password) - + self._notebook_downloaded = False self._session = session def list_courses(self): @@ -47,13 +47,14 @@ class CourseraExtractor(PlatformExtractor): def get_modules(self, class_name, reverse=False, unrestricted_filenames=False, subtitle_language='en', video_resolution=None, - download_quizzes=False, mathjax_cdn_url=None): + download_quizzes=False, mathjax_cdn_url=None, + download_notebooks=False): page = self._get_on_demand_syllabus(class_name) error_occured, modules = self._parse_on_demand_syllabus( page, reverse, unrestricted_filenames, subtitle_language, video_resolution, - download_quizzes, mathjax_cdn_url) + download_quizzes, mathjax_cdn_url, download_notebooks) return error_occured, modules def _get_on_demand_syllabus(self, class_name): @@ -72,7 +73,8 @@ class CourseraExtractor(PlatformExtractor): subtitle_language='en', video_resolution=None, download_quizzes=False, - mathjax_cdn_url=None + mathjax_cdn_url=None, + download_notebooks=False ): """ Parse a Coursera on-demand course listing/syllabus page. @@ -145,8 +147,7 @@ class CourseraExtractor(PlatformExtractor): video_resolution, assets) elif typename == 'supplement': - links = course.extract_links_from_supplement( - lecture['id']) + links = course.extract_links_from_supplement(lecture['id']) elif typename in ('gradedProgramming', 'ungradedProgramming'): links = course.extract_links_from_programming(lecture['id']) @@ -162,7 +163,12 @@ class CourseraExtractor(PlatformExtractor): elif typename == 'programming': if download_quizzes: links = course.extract_links_from_programming_immediate_instructions(lecture['id']) - + + elif typename == 'notebook': + if download_notebooks and self._notebook_downloaded == False: + logging.warning('According to notebooks platform, content will be downloaded first') + links = course.extract_links_from_notebook(lecture['id']) + self._notebook_downloaded = True else: logging.info('Unsupported typename "%s" in lecture "%s"', typename, lecture_slug) From 788f9539fb4ee0da7767867b2e979e11b8e9ee10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mois=C3=A9s=20Lodeiro?= Date: Mon, 20 Nov 2017 11:27:52 +0000 Subject: [PATCH 033/105] Added info message when skipping file --- coursera/api.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/coursera/api.py b/coursera/api.py index 6376709..de8679b 100644 --- a/coursera/api.py +++ b/coursera/api.py @@ -498,6 +498,9 @@ class CourseraOnDemand(object): logging.info('Downloading {} into {}'.format(tail, head)) with open(self._course_name + "/notebook/" + head + "/" + tail, 'wb+') as f: f.write(r.content) + else: + logging.info('Skipping {}... (file exists)'.format(tail)) + if not str(extension[1:]) in supplement_links: supplement_links[str(extension[1:])] = [] @@ -520,6 +523,8 @@ class CourseraOnDemand(object): logging.info('Downloading Jupyter {} into {}'.format(tail, head)) with open(self._course_name + "/notebook/" + head + "/" + tail, 'wb+') as f: f.write(r.content) + else: + logging.info('Skipping {}... (file exists)'.format(tail)) if not "ipynb" in supplement_links: supplement_links["ipynb"] = [] From 27fae191840befdb0d10c269c6fe648b98a88719 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Wed, 29 Nov 2017 22:44:08 -0200 Subject: [PATCH 034/105] travis: Remove Python 3.3 from build matrix. This closes #632. Thanks @PrabhanshuAttri for the sharp eye. --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6ae0053..6c13a24 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,14 +2,12 @@ language: python python: - "2.6" - "2.7" - - "3.3" - "3.4" - "3.5" - "3.6" - "pypy" matrix: allow_failures: - - python: "3.3" - python: "pypy" # command to install dependencies install: From e16d9c1ae39441ce396004763574d5474e5fc872 Mon Sep 17 00:00:00 2001 From: orlandocr Date: Mon, 8 Jan 2018 20:06:19 -0600 Subject: [PATCH 035/105] Fixed --download-quizzes usage Fixed --download-quizzes parameter usage description --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 28c43ef..599ff0f 100644 --- a/README.md +++ b/README.md @@ -287,7 +287,7 @@ where the script is supposed to be executed, with the following format: --username --password --subtitle-language en,zh-CN|zh-TW - --download-quizzes True + --download-quizzes #--mathjax-cdn https://cdn.bootcss.com/mathjax/2.7.1/MathJax.js # more other parameters From 2250ea6238fdad261f4bc8200d7d487ffd954153 Mon Sep 17 00:00:00 2001 From: SCaffrey Date: Mon, 15 Jan 2018 22:13:08 +0800 Subject: [PATCH 036/105] Update README.md ref: https://github.com/googlehosts/hosts/blob/master/hosts-files/hosts#L163 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 28c43ef..c563a50 100644 --- a/README.md +++ b/README.md @@ -403,9 +403,9 @@ one of the following actions solve your problem: ## China issues If you are from China and you're having problems downloading videos, -adding "52.84.246.72 d3c33hcgiwev3.cloudfront.net" in the hosts file +adding "52.84.167.78 d3c33hcgiwev3.cloudfront.net" in the hosts file (/etc/hosts) and freshing DNS with "ipconfig/flushdns" may work -(see this [comment](https://github.com/coursera-dl/coursera-dl/issues/606#issuecomment-305698809)). +(see https://github.com/googlehosts/hosts for more info). ## Found 0 sections and 0 lectures on this page From 9cf1af597917c65a42b604ef7437986854a48f18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Mon, 19 Feb 2018 22:45:02 -0300 Subject: [PATCH 037/105] CHANGELOG: Update with info of new release. --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5526c73..8806246 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Change Log +## 0.10.0 (2018-02-19) + +Features: + - Support Coursera Notebooks (option: `--download-notebooks`) + - Add hints in the documentation for users in China + ## 0.9.0 (2017-05-25) Features: From 761c7fb1880ee9769a17abf9f900de9263d23c61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Mon, 19 Feb 2018 22:45:31 -0300 Subject: [PATCH 038/105] coursera: Update version number. [ci skip] --- coursera/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coursera/__init__.py b/coursera/__init__.py index e4e49b3..9d1bb72 100644 --- a/coursera/__init__.py +++ b/coursera/__init__.py @@ -1 +1 @@ -__version__ = '0.9.0' +__version__ = '0.10.0' From b4ebc526ac81caf88dde936a14cef3e1b461a5a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Mon, 19 Feb 2018 22:53:17 -0300 Subject: [PATCH 039/105] README: Remove unavailable badge. --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 919314b..0ae240f 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ [![Build status](https://ci.appveyor.com/api/projects/status/3hru0ycv5fbny5k8/branch/master?svg=true)](https://ci.appveyor.com/project/balta2ar/coursera-dl/branch/master) [![Coverage Status](https://coveralls.io/repos/coursera-dl/coursera-dl/badge.svg)](https://coveralls.io/r/coursera-dl/coursera-dl) [![Latest version on PyPI](https://img.shields.io/pypi/v/coursera-dl.svg)](https://pypi.python.org/pypi/coursera-dl) -[![Downloads from PyPI](https://img.shields.io/pypi/dm/coursera-dl.svg)](https://pypi.python.org/pypi/coursera-dl) [![Code Climate](https://codeclimate.com/github/coursera-dl/coursera-dl/badges/gpa.svg)](https://codeclimate.com/github/coursera-dl/coursera-dl) - [Introduction](#introduction) From 6e933dd0a115bd4c5a0b8e35860693547af369df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Mon, 19 Feb 2018 22:54:32 -0300 Subject: [PATCH 040/105] setup.py: Remove support for Python 3.3 and add for Python 3.6. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 19c0be5..f489078 100644 --- a/setup.py +++ b/setup.py @@ -75,9 +75,9 @@ trove_classifiers = [ 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Programming Language :: Python', From c484e66a45cde3a8422e3d8e9967852cc35ddec3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Mon, 19 Feb 2018 22:57:20 -0300 Subject: [PATCH 041/105] README: Adjust Python 3 versions. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0ae240f..5560929 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ relevant excerpt: `coursera-dl` requires Python 2 or Python 3 and a free Coursera account enrolled in the class of interest. (As of February of 2016, we test automatically the execution of the program with Python versions 2.6, 2.7, -Pypy, 3.2, 3.3, 3.4, and 3.5). +Pypy, 3.4, 3.5, and 3.6). **Note:** We *strongly* recommend that you use a Python 3 interpreter (3.4 or later). From 2e265ef24e99d2e026e9d7a31de9d15edc92ec45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rog=C3=A9rio=20Brito?= Date: Mon, 19 Feb 2018 22:59:04 -0300 Subject: [PATCH 042/105] README: Remove dead bitdeli badge. --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 5560929..96736a3 100644 --- a/README.md +++ b/README.md @@ -587,5 +587,3 @@ geemail dotcom (twitter: [@jplehmann][12]). [issue213]: https://github.com/coursera-dl/coursera-dl/issues/213 [issue500]: https://github.com/coursera-dl/coursera-dl/issues/500 [pipinstallerbug]: http://stackoverflow.com/questions/31808180/installing-pyinstaller-via-pip-leads-to-failed-to-create-process - -[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/coursera-dl/coursera-dl/trend.png)](https://bitdeli.com/free "Bitdeli Badge") From 360aec5f2724a77d718b82617671a35cfc4529af Mon Sep 17 00:00:00 2001 From: OPSXCQ Date: Fri, 3 Nov 2017 21:36:59 -0200 Subject: [PATCH 043/105] dockerfile added --- Dockerfile | 22 ++++++++++++++++++++++ README.md | 13 +++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..65915b4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.4-slim + +LABEL maintainer "opsxcq@strm.sh" + +WORKDIR /src +COPY requirements.txt /src + +COPY requirements-dev.txt /src + +RUN apt-get update && \ + apt-get install -y --no-install-recommends gcc g++ libssl-dev && \ + rm -rf /var/lib/apt/lists/* && \ + pip install -r requirements.txt && \ + pip install -r requirements-dev.txt && \ + apt-get purge -y --auto-remove gcc g++ libssl-dev + +COPY . /src +RUN python setup.py install + +WORKDIR /courses +ENTRYPOINT ["coursera-dl"] +CMD ["--help"] diff --git a/README.md b/README.md index 96736a3..1a2f8a4 100644 --- a/README.md +++ b/README.md @@ -214,6 +214,19 @@ your own, please check that the versions of your modules are at least those listed in the `requirements.txt` file (and, `requirements-dev.txt` file, if applicable). +## Docker + +If you prefer you can run this software inside Docker: + +``` +docker run --rm -it \ + -v "$(pwd):/courses" \ + strm/coursera-dl \ + -u -p +``` + +The actual working dir for coursera-dl is /courses, all courses will be downloaded there if you don't specify otherwise. + ## Windows `python -m pip install coursera-dl` From 4326937e1207f5ed0e4024e1f5f50234bc124840 Mon Sep 17 00:00:00 2001 From: OPSXCQ Date: Fri, 2 Mar 2018 18:36:46 +0000 Subject: [PATCH 044/105] Removed old Dockerfile and related files. Bellow a list of reasons for the change: * Is better to use an official python image instead * UBUNTU:14 is very old, better use a more recent image * The old Docker image used to clone this repo, that isn't necessary. * Old Docker image wan't ephemeral enough * For layer optimization, is better to add the dependencies before installing the software * Old image didn't set an entrypoint --- deploy/.netrc | 1 - deploy/Dockerfile | 14 -------------- deploy/README.md | 10 ---------- deploy/build.sh | 7 ------- deploy/download.sh | 15 --------------- 5 files changed, 47 deletions(-) delete mode 100644 deploy/.netrc delete mode 100644 deploy/Dockerfile delete mode 100644 deploy/README.md delete mode 100755 deploy/build.sh delete mode 100755 deploy/download.sh diff --git a/deploy/.netrc b/deploy/.netrc deleted file mode 100644 index bd0c698..0000000 --- a/deploy/.netrc +++ /dev/null @@ -1 +0,0 @@ -machine coursera-dl login password diff --git a/deploy/Dockerfile b/deploy/Dockerfile deleted file mode 100644 index 3a73264..0000000 --- a/deploy/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM ubuntu:14.04 -MAINTAINER Dmitry Senin - -RUN apt-get update -RUN DEBIAN_FRONTEND=noninteractive apt-get install -y git build-essential libssl-dev libffi-dev -RUN DEBIAN_FRONTEND=noninteractive apt-get install -y python-pip python-dev -RUN pip install ndg-httpsclient - -COPY .netrc /root/.netrc -RUN chmod 0600 /root/.netrc - -RUN cd /root && git clone https://github.com/coursera-dl/coursera.git -RUN cd /root/coursera && pip install -r requirements.txt -RUN cd /usr/bin && ln -s /root/coursera/coursera-dl coursera-dl diff --git a/deploy/README.md b/deploy/README.md deleted file mode 100644 index 30725b9..0000000 --- a/deploy/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# How to launch the container - -1. [optional] Insert your username and password in the `.netrc` file if you - plan to use the `-n` optionof `coursera-dl` (edit template in this - directory). -2. Build Docker image: - `./build.sh` -3. Run Docker container to download courses A, B and C: - `./download.sh A B C` -4. All courses will be downloaded in directory `~/courses` diff --git a/deploy/build.sh b/deploy/build.sh deleted file mode 100755 index 55c1661..0000000 --- a/deploy/build.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh - -if groups | grep -q "docker" ; then - docker build --tag coursera-img --rm . -else - sudo docker build --tag coursera-img --rm . -fi diff --git a/deploy/download.sh b/deploy/download.sh deleted file mode 100755 index bcda3b9..0000000 --- a/deploy/download.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh - -COURSES=$* - -if [ ! -e ~/courses ]; then - mkdir ~/courses -fi - -if groups | grep -q "docker" ; then - docker run --rm --name coursera -v ~/courses:/courses coursera-img \ - coursera-dl -n --path /courses $COURSES -else - sudo docker run --rm --name coursera -v ~/courses:/courses coursera-img \ - coursera-dl -n --path /courses $COURSES -fi From acfa6c5fce19b5cb797a00633ce32908ef086d02 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sun, 25 Mar 2018 16:10:10 +0300 Subject: [PATCH 045/105] Fix style and whitespace --- coursera/api.py | 39 ++++++++++++++++++--------------------- coursera/test/test_api.py | 2 +- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/coursera/api.py b/coursera/api.py index de8679b..85770a5 100644 --- a/coursera/api.py +++ b/coursera/api.py @@ -5,6 +5,7 @@ downloader. """ import os +import re import json import base64 import logging @@ -468,7 +469,7 @@ class CourseraOnDemand(object): supplement_links = {} - url = url.format(**kwargs) + url = url.format(**kwargs) reply = get_page( self._session, url, @@ -478,21 +479,21 @@ class CourseraOnDemand(object): headers = self._auth_headers_with_json() for content in reply['content']: - + if content['type'] == 'directory': a = self._get_notebook_folder(OPENCOURSE_NOTEBOOK_TREE, jupyterId, jupId=jupyterId, path=content['path'], timestamp=int(time.time())) supplement_links.update(a) - + elif content['type'] == 'file': tmpUrl = OPENCOURSE_NOTEBOOK_DOWNLOAD.format(path=content['path'], jupId=jupyterId, timestamp=int(time.time())) filename, extension = os.path.splitext(clean_url(tmpUrl)) - + head, tail = os.path.split(content['path']) - + if os.path.isdir(self._course_name + "/notebook/" + head + "/") == False: logging.info('Creating [{}] directories...'.format(head)) os.makedirs(self._course_name + "/notebook/" + head + "/") - + r = requests.get(tmpUrl.replace(" ", "%20"), cookies=self._session.cookies) if os.path.exists(self._course_name + "/notebook/" + head + "/" + tail) == False: logging.info('Downloading {} into {}'.format(tail, head)) @@ -504,20 +505,19 @@ class CourseraOnDemand(object): if not str(extension[1:]) in supplement_links: supplement_links[str(extension[1:])] = [] - - supplement_links[str(extension[1:])].append((tmpUrl.replace(" ", "%20"), filename)) + supplement_links[str(extension[1:])].append((tmpUrl.replace(" ", "%20"), filename)) elif content['type'] == 'notebook': tmpUrl = OPENCOURSE_NOTEBOOK_DOWNLOAD.format(path=content['path'], jupId=jupyterId, timestamp=int(time.time())) filename, extension = os.path.splitext(clean_url(tmpUrl)) - + head, tail = os.path.split(content['path']) - + if os.path.isdir(self._course_name + "/notebook/" + head + "/") == False: logging.info('Creating [{}] directories...'.format(head)) os.makedirs(self._course_name + "/notebook/" + head + "/") - + r = requests.get(tmpUrl.replace(" ", "%20"), cookies=self._session.cookies) if os.path.exists(self._course_name + "/notebook/" + head + "/" + tail) == False: logging.info('Downloading Jupyter {} into {}'.format(tail, head)) @@ -528,18 +528,16 @@ class CourseraOnDemand(object): if not "ipynb" in supplement_links: supplement_links["ipynb"] = [] - + supplement_links["ipynb"].append((tmpUrl.replace(" ", "%20"), filename)) else: logging.info('Unsupported typename {} in notebook'.format(content['type'])) - + return supplement_links - def _get_notebook_json(self, notebook_id, authorizationId): - - import re, time + headers = self._auth_headers_with_json() reply = get_page( self._session, @@ -553,22 +551,21 @@ class CourseraOnDemand(object): if len(jupyterId) == 0: logging.error('Could not download notebook %s', notebook_id) return None - + jupyterId = jupyterId[0] newReq = requests.Session() req = newReq.get(OPENCOURSE_NOTEBOOK_TREE.format(jupId=jupyterId, path="/", timestamp=int(time.time())), headers=headers) - + return self._get_notebook_folder(OPENCOURSE_NOTEBOOK_TREE, jupyterId, jupId=jupyterId, path="/", timestamp=int(time.time())) - def extract_links_from_notebook(self, notebook_id): - try: + try: authorizationId = self._extract_notebook_text(notebook_id) ret = self._get_notebook_json(notebook_id, authorizationId) return ret - except requests.exceptions.HTTPError as exception: + except requests.exceptions.HTTPError as exception: logging.error('Could not download notebook %s: %s', notebook_id, exception) if is_debug_run(): logging.exception('Could not download notebook %s: %s', notebook_id, exception) diff --git a/coursera/test/test_api.py b/coursera/test/test_api.py index 23096ed..fbd445d 100644 --- a/coursera/test/test_api.py +++ b/coursera/test/test_api.py @@ -117,7 +117,7 @@ def test_extract_links_from_reference_http_error(get_page, course): @patch('coursera.api.get_page') def test_extract_links_from_programming_immediate_instructions_http_error( - get_page, course): + get_page, course): """ This test checks that downloader skips locked programming immediate instructions instead of throwing an error. (Locked == returning 403 error code) From 26cf38cee3275767fd706d981b999b65f7bb25df Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sun, 25 Mar 2018 16:13:27 +0300 Subject: [PATCH 046/105] Add support for "phasedPeer" typename (peer assignment instructions) The structure is very similar to all other text instructions but peer assignment instructions should be retrieved using the following API method: onDemandPeerAssignmentInstructions.v1 fix #650 --- coursera/api.py | 69 +++++++++++++++++++++++++++ coursera/define.py | 104 +++++++++++++++++++++++++++++++++++++++++ coursera/extractors.py | 11 +++-- 3 files changed, 181 insertions(+), 3 deletions(-) diff --git a/coursera/api.py b/coursera/api.py index 85770a5..a8859e0 100644 --- a/coursera/api.py +++ b/coursera/api.py @@ -31,6 +31,7 @@ from .define import (OPENCOURSE_SUPPLEMENT_URL, OPENCOURSE_REFERENCES_POLL_URL, OPENCOURSE_REFERENCE_ITEM_URL, OPENCOURSE_PROGRAMMING_IMMEDIATE_INSTRUCTIOINS_URL, + OPENCOURSE_PEER_ASSIGNMENT_INSTRUCTIONS, # New feature, Notebook (Python Jupyter) OPENCOURSE_NOTEBOOK_DESCRIPTIONS, @@ -992,6 +993,39 @@ class CourseraOnDemand(object): element_id, exception) return None + def extract_links_from_peer_assignment(self, element_id): + """ + Return a dictionary with links to supplement files (pdf, csv, zip, + ipynb, html and so on) extracted from peer assignment. + + @param element_id: Element ID to extract files from. + @type element_id: str + + @return: @see CourseraOnDemand._extract_links_from_text + """ + logging.debug('Gathering supplement URLs for element_id <%s>.', element_id) + + try: + # Assignment text (instructions) contains asset tags which describe + # supplementary files. + text = ''.join(self._extract_peer_assignment_text(element_id)) + if not text: + return {} + + supplement_links = self._extract_links_from_text(text) + instructions = (IN_MEMORY_MARKER + self._markup_to_html(text), + 'peer_assignment_instructions') + extend_supplement_links( + supplement_links, {IN_MEMORY_EXTENSION: [instructions]}) + return supplement_links + except requests.exceptions.HTTPError as exception: + logging.error('Could not download peer assignment %s: %s', + element_id, exception) + if is_debug_run(): + logging.exception('Could not download peer assignment %s: %s', + element_id, exception) + return None + def extract_links_from_supplement(self, element_id): """ Return a dictionary with supplement files (pdf, csv, zip, ipynb, html @@ -1209,6 +1243,41 @@ class CourseraOnDemand(object): ['assignmentInstructions']['definition']['value'] for element in dom['elements']] + def _extract_peer_assignment_text(self, element_id): + """ + Extract peer assignment text (instructions). + + @param element_id: Element id to extract peer assignment instructions from. + @type element_id: str + + @return: List of peer assignment text (instructions). + @rtype: [str] + """ + dom = get_page(self._session, OPENCOURSE_PEER_ASSIGNMENT_INSTRUCTIONS, + json=True, + user_id=self._user_id, + course_id=self._course_id, + element_id=element_id) + + result = [] + + for element in dom['elements']: + # There is only one section with Instructions + if 'introduction' in element['instructions']: + result.append(element['instructions']['introduction']['definition']['value']) + + # But there may be multiple sections in Sections + for section in element['instructions'].get('sections', []): + section_value = section['content']['definition']['value'] + section_title = section.get('title') + if section_title is not None: + # If section title is present, put it in the beginning of + # section value as if it was there. + section_value = ('%s' % section_title) + section_value + result.append(section_value) + + return result + def _extract_links_from_text(self, text): """ Extract supplement links from the html text. Links may be provided diff --git a/coursera/define.py b/coursera/define.py index 1fc9d17..ba9ff66 100644 --- a/coursera/define.py +++ b/coursera/define.py @@ -185,6 +185,110 @@ ABOUT_URL = ('https://api.coursera.org/api/catalog.v1/courses?' AUTH_REDIRECT_URL = ('https://class.coursera.org/{class_name}' '/auth/auth_redirector?type=login&subtype=normal') +# Sample URL: +# +# https://www.coursera.org/api/onDemandPeerAssignmentInstructions.v1/?q=latest&userId=4958&courseId=RcnRZHHtEeWxvQr3acyajw&itemId=2yTvX&includes=gradingMetadata%2CreviewSchemas%2CsubmissionSchemas&fields=instructions%2ConDemandPeerAssignmentGradingMetadata.v1(requiredAuthoredReviewCount%2CisMentorGraded%2CassignmentDetails)%2ConDemandPeerReviewSchemas.v1(reviewSchema)%2ConDemandPeerSubmissionSchemas.v1(submissionSchema) +# +# Sample response: +# +# { +# "elements": [ +# { +# "instructions": { +# "introduction": { +# "typeName": "cml", +# "definition": { +# "dtdId": "assess/1", +# "value": "Ваше первое задание заключается в установке Python и библиотек.." +# } +# }, +# "sections": [ +# { +# "typeId": "unknown", +# "title": "Review criteria", +# "content": { +# "typeName": "cml", +# "definition": { +# "dtdId": "assess/1", +# "value": "В результате работы вы установите на компьютер Python и библиотеки, необходимые для дальнейшего прохождения курса.." +# } +# } +# } +# ] +# }, +# "id": "4958~RcnRZHHtEeWxvQr3acyajw~2yTvX~8x7Qhs66EeW2Tw715xhIPQ@13" +# } +# ], +# "paging": {}, +# "linked": { +# "onDemandPeerSubmissionSchemas.v1": [ +# { +# "submissionSchema": { +# "parts": [ +# { +# "details": { +# "typeName": "fileUpload", +# "definition": { +# "required": false +# } +# }, +# "id": "_fcfP3bPT5W4pkfkshmUAQ", +# "prompt": { +# "typeName": "cml", +# "definition": { +# "dtdId": "assess/1", +# "value": "Загрузите скриншот №1." +# } +# } +# }, +# { +# "details": { +# "typeName": "fileUpload", +# "definition": { +# "required": false +# } +# }, +# "id": "92ea4b4e-3492-41eb-ee32-2624ee807bd3", +# "prompt": { +# "typeName": "cml", +# "definition": { +# "dtdId": "assess/1", +# "value": "Загрузите скриншот №2." +# } +# } +# } +# ] +# }, +# "id": "4958~RcnRZHHtEeWxvQr3acyajw~2yTvX~8x7Qhs66EeW2Tw715xhIPQ@13" +# } +# ], +# "onDemandPeerAssignmentGradingMetadata.v1": [ +# { +# "assignmentDetails": { +# "typeName": "phased", +# "definition": { +# "receivedReviewCutoffs": { +# "count": 3 +# }, +# "passingFraction": 0.8 +# } +# }, +# "requiredAuthoredReviewCount": 3, +# "isMentorGraded": false, +# "id": "4958~RcnRZHHtEeWxvQr3acyajw~2yTvX~8x7Qhs66EeW2Tw715xhIPQ@13" +# } +# ], +# "onDemandPeerReviewSchemas.v1": [] +# } +# } +# +# This URL is used to retrieve "phasedPeer" typename instructions' contents +OPENCOURSE_PEER_ASSIGNMENT_INSTRUCTIONS = ( + 'https://www.coursera.org/api/onDemandPeerAssignmentInstructions.v1/?' + 'q=latest&userId={user_id}&courseId={course_id}&itemId={element_id}&' + 'includes=gradingMetadata%2CreviewSchemas%2CsubmissionSchemas&' + 'fields=instructions%2ConDemandPeerAssignmentGradingMetadata.v1(requiredAuthoredReviewCount%2CisMentorGraded%2CassignmentDetails)%2ConDemandPeerReviewSchemas.v1(reviewSchema)%2ConDemandPeerSubmissionSchemas.v1(submissionSchema)') + #POST_OPENCOURSE_API_QUIZ_SESSION = 'https://www.coursera.org/api/opencourse.v1/user/4958/course/text-mining/item/7OQHc/quiz/session' # Sample response: # diff --git a/coursera/extractors.py b/coursera/extractors.py index 6e6d631..7d23b96 100644 --- a/coursera/extractors.py +++ b/coursera/extractors.py @@ -149,6 +149,9 @@ class CourseraExtractor(PlatformExtractor): elif typename == 'supplement': links = course.extract_links_from_supplement(lecture['id']) + elif typename == 'phasedPeer': + links = course.extract_links_from_peer_assignment(lecture['id']) + elif typename in ('gradedProgramming', 'ungradedProgramming'): links = course.extract_links_from_programming(lecture['id']) @@ -163,15 +166,17 @@ class CourseraExtractor(PlatformExtractor): elif typename == 'programming': if download_quizzes: links = course.extract_links_from_programming_immediate_instructions(lecture['id']) - + elif typename == 'notebook': if download_notebooks and self._notebook_downloaded == False: logging.warning('According to notebooks platform, content will be downloaded first') links = course.extract_links_from_notebook(lecture['id']) self._notebook_downloaded = True + else: - logging.info('Unsupported typename "%s" in lecture "%s"', - typename, lecture_slug) + logging.info( + 'Unsupported typename "%s" in lecture "%s" (lecture id "%s")', + typename, lecture_slug, lecture['id']) continue if links is None: From 1ed4490b5b0667d97e209ddd20631649d48637b7 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sun, 25 Mar 2018 16:17:34 +0300 Subject: [PATCH 047/105] Add tests for "phasedPeer" typename (peer assignment instructions) --- coursera/test/test_api.py | 24 ++++++++++++++++++++++-- coursera/test/utils.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/coursera/test/test_api.py b/coursera/test/test_api.py index fbd445d..063d274 100644 --- a/coursera/test/test_api.py +++ b/coursera/test/test_api.py @@ -10,7 +10,7 @@ from mock import patch, Mock from coursera import api from coursera import define -from coursera.test.utils import slurp_fixture +from coursera.test.utils import slurp_fixture, links_to_plain_text from coursera.utils import BeautifulSoup from requests.exceptions import HTTPError @@ -139,9 +139,28 @@ def test_ondemand_programming_supplement_no_instructions(get_page, course): assert {} == output +@patch('coursera.api.get_page') +@pytest.mark.parametrize( + "input_filename,expected_output", [ + ('peer-assignment-instructions-all.json', 'intro Review criteria section'), + ('peer-assignment-instructions-no-title.json', 'intro section'), + ('peer-assignment-instructions-only-introduction.json', 'intro'), + ('peer-assignment-instructions-only-sections.json', 'Review criteria section'), + ('peer-assignment-no-instructions.json', ''), + ] +) +def test_ondemand_from_peer_assgnment_instructions( + get_page, course, input_filename, expected_output): + instructions = slurp_fixture('json/%s' % input_filename) + get_page.return_value = json.loads(instructions) + + output = course.extract_links_from_peer_assignment('0') + assert expected_output == links_to_plain_text(output) + + @patch('coursera.api.get_page') def test_ondemand_from_programming_immediate_instructions_no_instructions( - get_page, course): + get_page, course): no_instructions = slurp_fixture( 'json/supplement-programming-immediate-instructions-no-instructions.json') get_page.return_value = json.loads(no_instructions) @@ -149,6 +168,7 @@ def test_ondemand_from_programming_immediate_instructions_no_instructions( output = course.extract_links_from_programming_immediate_instructions('0') assert {} == output + @patch('coursera.api.get_page') def test_ondemand_programming_supplement_empty_instructions(get_page, course): empty_instructions = slurp_fixture('json/supplement-programming-empty-instructions.json') diff --git a/coursera/test/utils.py b/coursera/test/utils.py index cc6805e..0e8e1a2 100644 --- a/coursera/test/utils.py +++ b/coursera/test/utils.py @@ -2,9 +2,43 @@ Helper functions that are only used in tests. """ import os +import re from io import open +from six import iteritems + +from coursera.define import IN_MEMORY_MARKER +from coursera.utils import BeautifulSoup + def slurp_fixture(path): return open(os.path.join(os.path.dirname(__file__), "fixtures", path), encoding='utf8').read() + + +def links_to_plain_text(links): + """ + Converts extracted links into text and cleans up extra whitespace. Only HTML + sections are converted. This is a helper to be used in tests. + + @param links: Links obtained from such methods as extract_links_from_peer_assignment. + @type links: @see CourseraOnDemand._extract_links_from_text + + @return: HTML converted to plain text with extra space removed. + @rtype: str + """ + result = [] + for filetype, contents in iteritems(links): + if filetype != 'html': + continue + + for content, _prefix in contents: + if content.startswith(IN_MEMORY_MARKER): + content = content[len(IN_MEMORY_MARKER):] + + soup = BeautifulSoup(content) + [script.extract() for script in soup(["script", "style"])] + text = re.sub(r'[ \t\r\n]+', ' ', soup.get_text()).strip() + result.append(text) + + return ''.join(result) From fda7e337c3f0307b1c2fba628b9f51000585cd66 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sun, 25 Mar 2018 16:18:01 +0300 Subject: [PATCH 048/105] Add fixtures to test "phasedPeer" --- .../peer-assignment-instructions-all.json | 29 +++++++++++++++++++ ...peer-assignment-instructions-no-title.json | 28 ++++++++++++++++++ ...gnment-instructions-only-introduction.json | 16 ++++++++++ ...assignment-instructions-only-sections.json | 22 ++++++++++++++ .../json/peer-assignment-no-instructions.json | 4 +++ 5 files changed, 99 insertions(+) create mode 100644 coursera/test/fixtures/json/peer-assignment-instructions-all.json create mode 100644 coursera/test/fixtures/json/peer-assignment-instructions-no-title.json create mode 100644 coursera/test/fixtures/json/peer-assignment-instructions-only-introduction.json create mode 100644 coursera/test/fixtures/json/peer-assignment-instructions-only-sections.json create mode 100644 coursera/test/fixtures/json/peer-assignment-no-instructions.json diff --git a/coursera/test/fixtures/json/peer-assignment-instructions-all.json b/coursera/test/fixtures/json/peer-assignment-instructions-all.json new file mode 100644 index 0000000..70d9f5c --- /dev/null +++ b/coursera/test/fixtures/json/peer-assignment-instructions-all.json @@ -0,0 +1,29 @@ +{ + "elements": [ + { + "instructions": { + "introduction": { + "typeName": "cml", + "definition": { + "dtdId": "assess/1", + "value": "intro" + } + }, + "sections": [ + { + "typeId": "unknown", + "title": "Review criteria", + "content": { + "typeName": "cml", + "definition": { + "dtdId": "assess/1", + "value": "section" + } + } + } + ] + }, + "id": "4958~RcnRZHHtEeWxvQr3acyajw~2yTvX~8x7Qhs66EeW2Tw715xhIPQ@13" + } + ] +} diff --git a/coursera/test/fixtures/json/peer-assignment-instructions-no-title.json b/coursera/test/fixtures/json/peer-assignment-instructions-no-title.json new file mode 100644 index 0000000..f210263 --- /dev/null +++ b/coursera/test/fixtures/json/peer-assignment-instructions-no-title.json @@ -0,0 +1,28 @@ +{ + "elements": [ + { + "instructions": { + "introduction": { + "typeName": "cml", + "definition": { + "dtdId": "assess/1", + "value": "intro" + } + }, + "sections": [ + { + "typeId": "unknown", + "content": { + "typeName": "cml", + "definition": { + "dtdId": "assess/1", + "value": "section" + } + } + } + ] + }, + "id": "4958~RcnRZHHtEeWxvQr3acyajw~2yTvX~8x7Qhs66EeW2Tw715xhIPQ@13" + } + ] +} diff --git a/coursera/test/fixtures/json/peer-assignment-instructions-only-introduction.json b/coursera/test/fixtures/json/peer-assignment-instructions-only-introduction.json new file mode 100644 index 0000000..7a186c4 --- /dev/null +++ b/coursera/test/fixtures/json/peer-assignment-instructions-only-introduction.json @@ -0,0 +1,16 @@ +{ + "elements": [ + { + "instructions": { + "introduction": { + "typeName": "cml", + "definition": { + "dtdId": "assess/1", + "value": "intro" + } + } + }, + "id": "4958~RcnRZHHtEeWxvQr3acyajw~2yTvX~8x7Qhs66EeW2Tw715xhIPQ@13" + } + ] +} diff --git a/coursera/test/fixtures/json/peer-assignment-instructions-only-sections.json b/coursera/test/fixtures/json/peer-assignment-instructions-only-sections.json new file mode 100644 index 0000000..7cd735c --- /dev/null +++ b/coursera/test/fixtures/json/peer-assignment-instructions-only-sections.json @@ -0,0 +1,22 @@ +{ + "elements": [ + { + "instructions": { + "sections": [ + { + "typeId": "unknown", + "title": "Review criteria", + "content": { + "typeName": "cml", + "definition": { + "dtdId": "assess/1", + "value": "section" + } + } + } + ] + }, + "id": "4958~RcnRZHHtEeWxvQr3acyajw~2yTvX~8x7Qhs66EeW2Tw715xhIPQ@13" + } + ] +} diff --git a/coursera/test/fixtures/json/peer-assignment-no-instructions.json b/coursera/test/fixtures/json/peer-assignment-no-instructions.json new file mode 100644 index 0000000..9764791 --- /dev/null +++ b/coursera/test/fixtures/json/peer-assignment-no-instructions.json @@ -0,0 +1,4 @@ +{ + "elements": [ + ] +} From 7d6d0909abec759a1205250944df48c297aff346 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sun, 8 Apr 2018 21:47:37 +0300 Subject: [PATCH 049/105] Mention how to configure timeouts for an external downloader (aria2c) ref #453 ref #626 --- README.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/README.md b/README.md index 96736a3..5236470 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ - [Resuming downloads](#resuming-downloads) - [Troubleshooting](#troubleshooting) - [China issues](#china-issues) + - [Download timeouts](#download-timeouts) - [Found 0 sections and 0 lectures on this page](#found-0-sections-and-0-lectures-on-this-page) - [Windows: Proxy support](#windows-proxy-support) - [Windows: Failed to create process](#windows-failed-to-create-process) @@ -424,6 +425,30 @@ Alternatively you may want to try this Chrome extension: https://chrome.google.c If none of the above works for you, there is nothing we can do. +## Download timeouts + +Coursera-dl supports external downloaders but note that they are only used to +download materials after the syllabus has been parsed, e.g. videos, PDFs, some +handouts and additional files (syllabus is always downloaded using the internal +downloader). If you experience problems with downloading such materials, you may +want to start using external downloader and configure its timeout values. For +example, you can use aria2c downloader by passing `--aria` option: + +``` +coursera-dl -n --path . --aria2 +``` + +And put this into aria2c's configuration file `~/.aria2/aria2.conf` to reduce +timeouts: + +``` +connect-timeout=2 +timeout=2 +bt-stop-timeout=1 +``` + +Timeout configuration for internal downloader is not supported. + ## Windows: proxy support If you're on Windows behind a proxy, set up the environment variables From 564c741755fd0f6880650caa88b9fbeb5845d6c6 Mon Sep 17 00:00:00 2001 From: NoUrEdDiN Date: Mon, 7 May 2018 10:11:48 +0200 Subject: [PATCH 050/105] improve and use clean_filename `clean_filename` wasn't used; it's now used before making directories or write files under the notebook folder. `clean_filename` is improved a little, to handle more Windows-specific edge cases. And the existing tests were updated accordingly. --- coursera/api.py | 2 ++ coursera/test/test_utils.py | 4 ++-- coursera/utils.py | 13 ++++++++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/coursera/api.py b/coursera/api.py index a8859e0..e9c5000 100644 --- a/coursera/api.py +++ b/coursera/api.py @@ -490,6 +490,8 @@ class CourseraOnDemand(object): filename, extension = os.path.splitext(clean_url(tmpUrl)) head, tail = os.path.split(content['path']) + head = '/'.join([clean_filename(dir, minimal_change=True) for dir in head.split('/')]) + tail = clean_filename(tail, minimal_change=True) if os.path.isdir(self._course_name + "/notebook/" + head + "/") == False: logging.info('Creating [{}] directories...'.format(head)) diff --git a/coursera/test/test_utils.py b/coursera/test/test_utils.py index ec198d5..8724519 100644 --- a/coursera/test/test_utils.py +++ b/coursera/test/test_utils.py @@ -34,7 +34,7 @@ from coursera.utils import total_seconds, is_course_complete ('Week 3: Data and Abstraction', 'Week_3-_Data_and_Abstraction'), ('  (Week 1) BRANDING: Marketing Strategy and Brand Positioning', 'Week_1_BRANDING-__Marketing_Strategy_and_Brand_Positioning'), - ('test & " adfas', 'test___adfas'), + ('test & " adfas', 'test__-_adfas'), # `"` were changed first to `-` (' ', ''), ('☂℮﹩т ω☤☂ℌ Ṳᾔ☤ḉ◎ⅾε', '__') ] @@ -54,7 +54,7 @@ def test_clean_filename(unclean, clean): 'Week 3- Data and Abstraction'), ('  (Week 1) BRANDING: Marketing Strategy and Brand Positioning', '  (Week 1) BRANDING- Marketing Strategy and Brand Positioning'), - ('test & " adfas', 'test & " adfas'), + ('test & " adfas', 'test & - adfas'), # `"` are forbidden on Windows (' ', u'\xa0'), ('☂℮﹩т ω☤☂ℌ Ṳᾔ☤ḉ◎ⅾε', '☂℮﹩т ω☤☂ℌ Ṳᾔ☤ḉ◎ⅾε') ] diff --git a/coursera/utils.py b/coursera/utils.py index 6fd4814..87c115e 100644 --- a/coursera/utils.py +++ b/coursera/utils.py @@ -106,13 +106,24 @@ def clean_filename(s, minimal_change=False): s = unquote_plus(s) # Strip forbidden characters + # https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx s = ( s.replace(':', '-') .replace('/', '-') + .replace('<', '-') + .replace('>', '-') + .replace('"', '-') + .replace('\\', '-') + .replace('|', '-') + .replace('?', '-') + .replace('*', '-') .replace('\x00', '-') - .replace('\n', '') + .replace('\n', ' ') ) + # Remove trailing dots and spaces; forbidden on Windows + s = s.rstrip(' .') + if minimal_change: return s From b01bde501e3e4a5159ea2f19d2118b50edca2bb6 Mon Sep 17 00:00:00 2001 From: Dharmanshu Saini <36137804+dharmanshu24@users.noreply.github.com> Date: Thu, 31 May 2018 21:00:17 +0530 Subject: [PATCH 051/105] Update Readme.md to run without -p field --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 5236470..1487b68 100644 --- a/README.md +++ b/README.md @@ -247,6 +247,10 @@ credentials (e.g. email address and password or a `~/.netrc` file), the class names, as well as any additional parameters: General: coursera-dl -u -p modelthinking-004 + +If you don't want to type your password in command line as plain text, you can use the script without `-p` option. In this case you will be prompted for password once the script is run. + + Without -p field: coursera-dl -u modelthinking-004 Multiple classes: coursera-dl -u -p saas historyofrock1-001 algo-2012-002 Filter by section name: coursera-dl -u -p -sf "Chapter_Four" crypto-004 Filter by lecture name: coursera-dl -u -p -lf "3.1_" ml-2012-002 From 32e95d0d1cd9ccaf2b0e72de9fb9fc908ef7e02d Mon Sep 17 00:00:00 2001 From: TheGoddessInari Date: Fri, 1 Jun 2018 19:25:10 -0700 Subject: [PATCH 052/105] Switch to API subdomain for API URL defines. Started hitting errors today, switch from www.coursera.org/api/ to api.coursera.org/api/ to fix. --- coursera/define.py | 54 +++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/coursera/define.py b/coursera/define.py index ba9ff66..a50272a 100644 --- a/coursera/define.py +++ b/coursera/define.py @@ -13,11 +13,11 @@ HTTP_FORBIDDEN = 403 COURSERA_URL = 'https://www.coursera.org' AUTH_URL = 'https://accounts.coursera.org/api/v1/login' -AUTH_URL_V3 = 'https://www.coursera.org/api/login/v3' +AUTH_URL_V3 = 'https://api.coursera.org/api/login/v3' CLASS_URL = 'https://class.coursera.org/{class_name}' # The following link is left just for illustative purposes: -# https://www.coursera.org/api/courses.v1?fields=display%2CpartnerIds%2CphotoUrl%2CstartDate%2Cpartners.v1(homeLink%2Cname)&includes=partnerIds&q=watchlist&start=0 +# https://api.coursera.org/api/courses.v1?fields=display%2CpartnerIds%2CphotoUrl%2CstartDate%2Cpartners.v1(homeLink%2Cname)&includes=partnerIds&q=watchlist&start=0 # Reply is as follows: # { # "elements": [ @@ -34,10 +34,10 @@ CLASS_URL = 'https://class.coursera.org/{class_name}' # }, # "linked": {} # } -OPENCOURSE_LIST_COURSES = 'https://www.coursera.org/api/courses.v1?q=watchlist&start={start}' +OPENCOURSE_LIST_COURSES = 'https://api.coursera.org/api/courses.v1?q=watchlist&start={start}' # The following link is left just for illustative purposes: -# https://www.coursera.org/api/memberships.v1?fields=courseId,enrolledTimestamp,grade,id,lastAccessedTimestamp,onDemandSessionMembershipIds,onDemandSessionMemberships,role,v1SessionId,vc,vcMembershipId,courses.v1(courseStatus,display,partnerIds,photoUrl,specializations,startDate,v1Details,v2Details),partners.v1(homeLink,name),v1Details.v1(sessionIds),v1Sessions.v1(active,certificatesReleased,dbEndDate,durationString,hasSigTrack,startDay,startMonth,startYear),v2Details.v1(onDemandSessions,plannedLaunchDate,sessionsEnabledAt),specializations.v1(logo,name,partnerIds,shortName)&includes=courseId,onDemandSessionMemberships,vcMembershipId,courses.v1(partnerIds,specializations,v1Details,v2Details),v1Details.v1(sessionIds),v2Details.v1(onDemandSessions),specializations.v1(partnerIds)&q=me&showHidden=true&filter=current,preEnrolled +# https://api.coursera.org/api/memberships.v1?fields=courseId,enrolledTimestamp,grade,id,lastAccessedTimestamp,onDemandSessionMembershipIds,onDemandSessionMemberships,role,v1SessionId,vc,vcMembershipId,courses.v1(courseStatus,display,partnerIds,photoUrl,specializations,startDate,v1Details,v2Details),partners.v1(homeLink,name),v1Details.v1(sessionIds),v1Sessions.v1(active,certificatesReleased,dbEndDate,durationString,hasSigTrack,startDay,startMonth,startYear),v2Details.v1(onDemandSessions,plannedLaunchDate,sessionsEnabledAt),specializations.v1(logo,name,partnerIds,shortName)&includes=courseId,onDemandSessionMemberships,vcMembershipId,courses.v1(partnerIds,specializations,v1Details,v2Details),v1Details.v1(sessionIds),v2Details.v1(onDemandSessions),specializations.v1(partnerIds)&q=me&showHidden=true&filter=current,preEnrolled # Sample reply: # { # "elements": [ @@ -60,19 +60,19 @@ OPENCOURSE_LIST_COURSES = 'https://www.coursera.org/api/courses.v1?q=watchlist&s # ] # } # } -OPENCOURSE_MEMBERSHIPS = 'https://www.coursera.org/api/memberships.v1?includes=courseId,courses.v1&q=me&showHidden=true&filter=current,preEnrolled' -OPENCOURSE_CONTENT_URL = 'https://www.coursera.org/api/opencourse.v1/course/{class_name}?showLockedItems=true' -OPENCOURSE_VIDEO_URL = 'https://www.coursera.org/api/opencourse.v1/video/{video_id}' -OPENCOURSE_SUPPLEMENT_URL = 'https://www.coursera.org/api/onDemandSupplements.v1/'\ +OPENCOURSE_MEMBERSHIPS = 'https://api.coursera.org/api/memberships.v1?includes=courseId,courses.v1&q=me&showHidden=true&filter=current,preEnrolled' +OPENCOURSE_CONTENT_URL = 'https://api.coursera.org/api/opencourse.v1/course/{class_name}?showLockedItems=true' +OPENCOURSE_VIDEO_URL = 'https://api.coursera.org/api/opencourse.v1/video/{video_id}' +OPENCOURSE_SUPPLEMENT_URL = 'https://api.coursera.org/api/onDemandSupplements.v1/'\ '{course_id}~{element_id}?includes=asset&fields=openCourseAssets.v1%28typeName%29,openCourseAssets.v1%28definition%29' OPENCOURSE_PROGRAMMING_ASSIGNMENTS_URL = \ - 'https://www.coursera.org/api/onDemandProgrammingLearnerAssignments.v1/{course_id}~{element_id}?fields=submissionLearnerSchema' + 'https://api.coursera.org/api/onDemandProgrammingLearnerAssignments.v1/{course_id}~{element_id}?fields=submissionLearnerSchema' OPENCOURSE_PROGRAMMING_IMMEDIATE_INSTRUCTIOINS_URL = \ - 'https://www.coursera.org/api/onDemandProgrammingImmediateInstructions.v1/{course_id}~{element_id}' + 'https://api.coursera.org/api/onDemandProgrammingImmediateInstructions.v1/{course_id}~{element_id}' OPENCOURSE_REFERENCES_POLL_URL = \ - "https://www.coursera.org/api/onDemandReferences.v1/?courseId={course_id}&q=courseListed&fields=name%2CshortId%2Cslug%2Ccontent&includes=assets" + "https://api.coursera.org/api/onDemandReferences.v1/?courseId={course_id}&q=courseListed&fields=name%2CshortId%2Cslug%2Ccontent&includes=assets" OPENCOURSE_REFERENCE_ITEM_URL = \ - "https://www.coursera.org/api/onDemandReferences.v1/?courseId={course_id}&q=shortId&shortId={short_id}&fields=name%2CshortId%2Cslug%2Ccontent&includes=assets" + "https://api.coursera.org/api/onDemandReferences.v1/?courseId={course_id}&q=shortId&shortId={short_id}&fields=name%2CshortId%2Cslug%2Ccontent&includes=assets" # These are ids that are present in tag in assignment text: # @@ -95,7 +95,7 @@ OPENCOURSE_REFERENCE_ITEM_URL = \ # "linked": null # } OPENCOURSE_ASSET_URL = \ - 'https://www.coursera.org/api/assetUrls.v1?ids={ids}' + 'https://api.coursera.org/api/assetUrls.v1?ids={ids}' # These ids are provided in lecture json: # @@ -143,7 +143,7 @@ OPENCOURSE_ASSET_URL = \ # "linked": null # } OPENCOURSE_ASSETS_URL = \ - 'https://www.coursera.org/api/openCourseAssets.v1/{id}' + 'https://api.coursera.org/api/openCourseAssets.v1/{id}' # These asset ids are ids returned from OPENCOURSE_ASSETS_URL request: # See example above. @@ -166,10 +166,10 @@ OPENCOURSE_ASSETS_URL = \ # "linked": null # } OPENCOURSE_API_ASSETS_V1_URL = \ - 'https://www.coursera.org/api/assets.v1?ids={id}' + 'https://api.coursera.org/api/assets.v1?ids={id}' OPENCOURSE_ONDEMAND_COURSE_MATERIALS = \ - 'https://www.coursera.org/api/onDemandCourseMaterials.v1/?'\ + 'https://api.coursera.org/api/onDemandCourseMaterials.v1/?'\ 'q=slug&slug={class_name}&includes=moduleIds%2ClessonIds%2CpassableItemGroups%2CpassableItemGroupChoices%2CpassableLessonElements%2CitemIds%2Ctracks'\ '&fields=moduleIds%2ConDemandCourseMaterialModules.v1(name%2Cslug%2Cdescription%2CtimeCommitment%2ClessonIds%2Coptional)%2ConDemandCourseMaterialLessons.v1(name%2Cslug%2CtimeCommitment%2CelementIds%2Coptional%2CtrackId)%2ConDemandCourseMaterialPassableItemGroups.v1(requiredPassedCount%2CpassableItemGroupChoiceIds%2CtrackId)%2ConDemandCourseMaterialPassableItemGroupChoices.v1(name%2Cdescription%2CitemIds)%2ConDemandCourseMaterialPassableLessonElements.v1(gradingWeight)%2ConDemandCourseMaterialItems.v1(name%2Cslug%2CtimeCommitment%2Ccontent%2CisLocked%2ClockableByItem%2CitemLockedReasonCode%2CtrackId)%2ConDemandCourseMaterialTracks.v1(passablesCount)'\ '&showLockedItems=true' @@ -187,7 +187,7 @@ AUTH_REDIRECT_URL = ('https://class.coursera.org/{class_name}' # Sample URL: # -# https://www.coursera.org/api/onDemandPeerAssignmentInstructions.v1/?q=latest&userId=4958&courseId=RcnRZHHtEeWxvQr3acyajw&itemId=2yTvX&includes=gradingMetadata%2CreviewSchemas%2CsubmissionSchemas&fields=instructions%2ConDemandPeerAssignmentGradingMetadata.v1(requiredAuthoredReviewCount%2CisMentorGraded%2CassignmentDetails)%2ConDemandPeerReviewSchemas.v1(reviewSchema)%2ConDemandPeerSubmissionSchemas.v1(submissionSchema) +# https://api.coursera.org/api/onDemandPeerAssignmentInstructions.v1/?q=latest&userId=4958&courseId=RcnRZHHtEeWxvQr3acyajw&itemId=2yTvX&includes=gradingMetadata%2CreviewSchemas%2CsubmissionSchemas&fields=instructions%2ConDemandPeerAssignmentGradingMetadata.v1(requiredAuthoredReviewCount%2CisMentorGraded%2CassignmentDetails)%2ConDemandPeerReviewSchemas.v1(reviewSchema)%2ConDemandPeerSubmissionSchemas.v1(submissionSchema) # # Sample response: # @@ -284,12 +284,12 @@ AUTH_REDIRECT_URL = ('https://class.coursera.org/{class_name}' # # This URL is used to retrieve "phasedPeer" typename instructions' contents OPENCOURSE_PEER_ASSIGNMENT_INSTRUCTIONS = ( - 'https://www.coursera.org/api/onDemandPeerAssignmentInstructions.v1/?' + 'https://api.coursera.org/api/onDemandPeerAssignmentInstructions.v1/?' 'q=latest&userId={user_id}&courseId={course_id}&itemId={element_id}&' 'includes=gradingMetadata%2CreviewSchemas%2CsubmissionSchemas&' 'fields=instructions%2ConDemandPeerAssignmentGradingMetadata.v1(requiredAuthoredReviewCount%2CisMentorGraded%2CassignmentDetails)%2ConDemandPeerReviewSchemas.v1(reviewSchema)%2ConDemandPeerSubmissionSchemas.v1(submissionSchema)') -#POST_OPENCOURSE_API_QUIZ_SESSION = 'https://www.coursera.org/api/opencourse.v1/user/4958/course/text-mining/item/7OQHc/quiz/session' +#POST_OPENCOURSE_API_QUIZ_SESSION = 'https://api.coursera.org/api/opencourse.v1/user/4958/course/text-mining/item/7OQHc/quiz/session' # Sample response: # # { @@ -305,9 +305,9 @@ OPENCOURSE_PEER_ASSIGNMENT_INSTRUCTIONS = ( # "progressState": "Started" # } # } -POST_OPENCOURSE_API_QUIZ_SESSION = 'https://www.coursera.org/api/opencourse.v1/user/{user_id}/course/{class_name}/item/{quiz_id}/quiz/session' +POST_OPENCOURSE_API_QUIZ_SESSION = 'https://api.coursera.org/api/opencourse.v1/user/{user_id}/course/{class_name}/item/{quiz_id}/quiz/session' -#POST_OPENCOURSE_API_QUIZ_SESSION_GET_STATE = 'https://www.coursera.org/api/opencourse.v1/user/4958/course/text-mining/item/7OQHc/quiz/session/opencourse~bVgqTevEEeWvGQrWsIkLlw:4958:BiNDdOvPEeWAkwqbKEEh3w@13:1468773901987@1/action/getState?autoEnroll=false' +#POST_OPENCOURSE_API_QUIZ_SESSION_GET_STATE = 'https://api.coursera.org/api/opencourse.v1/user/4958/course/text-mining/item/7OQHc/quiz/session/opencourse~bVgqTevEEeWvGQrWsIkLlw:4958:BiNDdOvPEeWAkwqbKEEh3w@13:1468773901987@1/action/getState?autoEnroll=false' # Sample response: # # { @@ -389,9 +389,9 @@ POST_OPENCOURSE_API_QUIZ_SESSION = 'https://www.coursera.org/api/opencourse.v1/u # } # } # -POST_OPENCOURSE_API_QUIZ_SESSION_GET_STATE = 'https://www.coursera.org/api/opencourse.v1/user/{user_id}/course/{class_name}/item/{quiz_id}/quiz/session/{session_id}/action/getState?autoEnroll=false' +POST_OPENCOURSE_API_QUIZ_SESSION_GET_STATE = 'https://api.coursera.org/api/opencourse.v1/user/{user_id}/course/{class_name}/item/{quiz_id}/quiz/session/{session_id}/action/getState?autoEnroll=false' -#POST_OPENCOURSE_ONDEMAND_EXAM_SESSIONS = 'https://www.coursera.org/api/onDemandExamSessions.v1/-N44X0IJEeWpogr5ZO8qxQ~YV0W4~10!~1467462079068/actions?includes=gradingAttempts' +#POST_OPENCOURSE_ONDEMAND_EXAM_SESSIONS = 'https://api.coursera.org/api/onDemandExamSessions.v1/-N44X0IJEeWpogr5ZO8qxQ~YV0W4~10!~1467462079068/actions?includes=gradingAttempts' # Sample response: # # { @@ -532,14 +532,14 @@ POST_OPENCOURSE_API_QUIZ_SESSION_GET_STATE = 'https://www.coursera.org/api/openc # Request payload: # {"courseId":"-N44X0IJEeWpogr5ZO8qxQ","itemId":"YV0W4"} # -#POST_OPENCOURSE_ONDEMAND_EXAM_SESSIONS = 'https://www.coursera.org/api/onDemandExamSessions.v1/-N44X0IJEeWpogr5ZO8qxQ~YV0W4~10!~1467462079068/actions?includes=gradingAttempts' +#POST_OPENCOURSE_ONDEMAND_EXAM_SESSIONS = 'https://api.coursera.org/api/onDemandExamSessions.v1/-N44X0IJEeWpogr5ZO8qxQ~YV0W4~10!~1467462079068/actions?includes=gradingAttempts' # Response for this request is empty. Result (session_id) should be taken # either from Location header or from X-Coursera-Id header. # # Request payload: # {"courseId":"-N44X0IJEeWpogr5ZO8qxQ","itemId":"YV0W4"} -POST_OPENCOURSE_ONDEMAND_EXAM_SESSIONS = 'https://www.coursera.org/api/onDemandExamSessions.v1' +POST_OPENCOURSE_ONDEMAND_EXAM_SESSIONS = 'https://api.coursera.org/api/onDemandExamSessions.v1' # Sample response: # { @@ -851,7 +851,7 @@ POST_OPENCOURSE_ONDEMAND_EXAM_SESSIONS = 'https://www.coursera.org/api/onDemandE # # Request payload: # {"name":"getState","argument":[]} -POST_OPENCOURSE_ONDEMAND_EXAM_SESSIONS_GET_STATE = 'https://www.coursera.org/api/onDemandExamSessions.v1/{session_id}/actions?includes=gradingAttempts' +POST_OPENCOURSE_ONDEMAND_EXAM_SESSIONS_GET_STATE = 'https://api.coursera.org/api/onDemandExamSessions.v1/{session_id}/actions?includes=gradingAttempts' ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) # define a per-user cache folder @@ -940,6 +940,6 @@ INSTRUCTIONS_HTML_INJECTION_AFTER ='''?config=TeX-AMS-MML_HTMLorMML"> # The following url is the root url (tree) for a Coursera Course OPENCOURSE_NOTEBOOK_DESCRIPTIONS = "https://hub.coursera-notebooks.org/hub/coursera_login?token={authId}&next=/" -OPENCOURSE_NOTEBOOK_LAUNCHES = "https://www.coursera.org/api/onDemandNotebookWorkspaceLaunches.v1/?fields=authorizationId%2CcontentPath%2CuseLegacySystem" +OPENCOURSE_NOTEBOOK_LAUNCHES = "https://api.coursera.org/api/onDemandNotebookWorkspaceLaunches.v1/?fields=authorizationId%2CcontentPath%2CuseLegacySystem" OPENCOURSE_NOTEBOOK_TREE = "https://hub.coursera-notebooks.org/user/{jupId}/api/contents/{path}?type=directory&_={timestamp}" OPENCOURSE_NOTEBOOK_DOWNLOAD = "https://hub.coursera-notebooks.org/user/{jupId}/files/{path}?download=1" From 362c21db55aeb8c2ae37c26c7b411f7699f03a07 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sat, 2 Jun 2018 19:15:57 +0300 Subject: [PATCH 053/105] Update CHANGELOG.md --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8806246..3a0d8c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Change Log +## 0.11.0 (2018-06-02) + +Features: + - Add support for "peer assignment" section (#650) + +Bugfixes: + - Switched to api.coursera.org subdomain for API requests (#660) + + ## 0.10.0 (2018-02-19) Features: From dd983468c8308c504d254f2ed9e880eb732bd295 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sat, 2 Jun 2018 19:16:54 +0300 Subject: [PATCH 054/105] Bump version (0.10.0 -> 0.11.0) Features: - Add support for "peer assignment" section (#650) Bugfixes: - Switched to api.coursera.org subdomain for API requests (#660) --- coursera/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coursera/__init__.py b/coursera/__init__.py index 9d1bb72..f323a57 100644 --- a/coursera/__init__.py +++ b/coursera/__init__.py @@ -1 +1 @@ -__version__ = '0.10.0' +__version__ = '0.11.0' From 82722d80c62b346d901f07253f28cfa9d9cdc3cd Mon Sep 17 00:00:00 2001 From: TheGoddessInari Date: Sat, 2 Jun 2018 12:58:57 -0700 Subject: [PATCH 055/105] setup.py: Match file.open encoding with the source code encoding. Hit encoding errors on Windows after 0.11, easiest solution is to match encodings. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f489078..bdb6775 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ def read_file(filename, alt=None): lines = None try: - with open(filename) as f: + with open(filename, encoding='utf-8') as f: lines = f.read() except IOError: lines = [] if alt is None else alt From 699a9e03f322ff70d150908f04d1e4ef42eb6e7d Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sat, 2 Jun 2018 23:14:47 +0300 Subject: [PATCH 056/105] Add more files to ignore to MANIFEST.in --- MANIFEST.in | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/MANIFEST.in b/MANIFEST.in index a6c2ce8..c4b71c1 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,33 @@ include requirements*.txt include CONTRIBUTING.md include LICENSE + +exclude .coveragerc +exclude .ctags +exclude .gitattributes +exclude .github/ISSUE_TEMPLATE.md +exclude .github/PULL_REQUEST_TEMPLATE.md +exclude .gitignore +exclude .travis.yml +exclude AUTHORS.md +exclude CHANGELOG.md +exclude README.md +exclude appveyor.yml +exclude appveyor/install.ps1 +exclude appveyor/run_with_env.cmd +exclude assets/hat-logo.svg +exclude coursera-dl +exclude coursera-dl.bat +exclude deploy/.netrc +exclude deploy/Dockerfile +exclude deploy/README.md +exclude deploy/build.sh +exclude deploy/download.sh +exclude fabfile.py +exclude tox.ini + +prune appveyor/ +prune assets/ +prune deploy/ +prune coursera/test/ +prune .github/ From 6dccacd464eb6d0965589fa156b8d83ea831228b Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sat, 2 Jun 2018 23:16:29 +0300 Subject: [PATCH 057/105] Update CHANGELOG.md --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a0d8c0..3828bcc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Change Log +## 0.11.1 (2018-06-02) + +Bugfixes: + - Specify utf-8 encoding in setup.py to fix installation on Windows (#662) + ## 0.11.0 (2018-06-02) Features: @@ -8,7 +13,6 @@ Features: Bugfixes: - Switched to api.coursera.org subdomain for API requests (#660) - ## 0.10.0 (2018-02-19) Features: From bb62038650a0b95fc09ac69d30197c15e6ff9d12 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sat, 2 Jun 2018 23:17:18 +0300 Subject: [PATCH 058/105] Bump version (0.11.0 -> 0.11.1) Bugfixes: - Specify utf-8 encoding in setup.py to fix installation on Windows (#662) --- coursera/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coursera/__init__.py b/coursera/__init__.py index f323a57..ae4865c 100644 --- a/coursera/__init__.py +++ b/coursera/__init__.py @@ -1 +1 @@ -__version__ = '0.11.0' +__version__ = '0.11.1' From c98e83702ee20827bb1f8f3f436c2eca59e99c7e Mon Sep 17 00:00:00 2001 From: TheGoddessInari Date: Sat, 2 Jun 2018 19:10:08 -0700 Subject: [PATCH 059/105] Followup, change to api.coursera.org in the definition and tests. I wish I had time to track down the real URL that's preventing TLS negotiation, but this at least gets coursera-dl working with its own downloader again. --- coursera/define.py | 2 +- .../fixtures/json/video-output-1-all.json | 28 +++++++++---------- .../test/fixtures/json/video-output-1-en.json | 4 +-- .../test/fixtures/json/video-output-1.json | 8 +++--- .../test/fixtures/json/video-output-2.json | 8 +++--- coursera/test/test_workflow.py | 6 ++-- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/coursera/define.py b/coursera/define.py index a50272a..e2ec105 100644 --- a/coursera/define.py +++ b/coursera/define.py @@ -11,7 +11,7 @@ import tempfile HTTP_FORBIDDEN = 403 -COURSERA_URL = 'https://www.coursera.org' +COURSERA_URL = 'https://api.coursera.org' AUTH_URL = 'https://accounts.coursera.org/api/v1/login' AUTH_URL_V3 = 'https://api.coursera.org/api/login/v3' CLASS_URL = 'https://class.coursera.org/{class_name}' diff --git a/coursera/test/fixtures/json/video-output-1-all.json b/coursera/test/fixtures/json/video-output-1-all.json index e8df433..1e04337 100644 --- a/coursera/test/fixtures/json/video-output-1-all.json +++ b/coursera/test/fixtures/json/video-output-1-all.json @@ -1,16 +1,16 @@ { - "zh-CN.txt": "https://www.coursera.org/api/subtitleAssetProxy.v1/UKEuZoMQRcChLmaDEMXAsA?expiry=1495238400000&hmac=eNyKwEu_aMQtn7bg0mUj6uIyVZvjahFSE5x2CrbOXOU&fileExtension=txt", - "en.srt": "https://www.coursera.org/api/subtitleAssetProxy.v1/GgGZN65HQkyBmTeuR2JMsw?expiry=1495238400000&hmac=afqFhv9FWfxxEeSka8PCA4ihiyX3g2Z6K4jWJPFlcdo&fileExtension=srt", - "zh-CN.srt": "https://www.coursera.org/api/subtitleAssetProxy.v1/UKEuZoMQRcChLmaDEMXAsA?expiry=1495238400000&hmac=nmGzGoF4oNLv28ZDLUtX5dF4xPXUABgym76XMs4UzDE&fileExtension=srt", - "en.txt": "https://www.coursera.org/api/subtitleAssetProxy.v1/GgGZN65HQkyBmTeuR2JMsw?expiry=1495238400000&hmac=2Z37WW5Rc7GoT0eft1vdK0HX5imBqoZTKULMTiZ2EjM&fileExtension=txt", - "hi.srt": "https://www.coursera.org/api/subtitleAssetProxy.v1/v2OWSJUVSqCjlkiVFRqgng?expiry=1495238400000&hmac=qk--Ptsc4w3u6c-5BFPO9vhjyczMHzlSqUOQskjbfZ0&fileExtension=srt", - "es.srt": "https://www.coursera.org/api/subtitleAssetProxy.v1/jtYTHsSQToaWEx7EkJ6G4A?expiry=1495238400000&hmac=Ts5QKzu0jwhUafwsaHk7RKoQJK26d4_bzrX2M6iuRaQ&fileExtension=srt", - "pl.srt": "https://www.coursera.org/api/subtitleAssetProxy.v1/RGtowSWPQxSraMElj3MUbA?expiry=1495238400000&hmac=mcaMPGeK3J7Fn9RRwnuVFnHkyr1COFnLXYKVkUbyfSg&fileExtension=srt", - "ja.srt": "https://www.coursera.org/api/subtitleAssetProxy.v1/758f7ykrRcWfH-8pK3XFHw?expiry=1495238400000&hmac=huh5qtCJVj4rEJnsJ6D7MJdCcqN-s9cMd-M6xlSicLc&fileExtension=srt", - "pt-BR.srt": "https://www.coursera.org/api/subtitleAssetProxy.v1/1kRk9rXlSSeEZPa15aknhQ?expiry=1495238400000&hmac=XYyDJ71d9gl3HOqNplyJeEr7Wd2UhU3DhT-9w_Yudzs&fileExtension=srt", - "hi.txt": "https://www.coursera.org/api/subtitleAssetProxy.v1/v2OWSJUVSqCjlkiVFRqgng?expiry=1495238400000&hmac=earWLk_RUi3K5UpZfEVOlBgOcpSE9efXz2njRKu31rQ&fileExtension=txt", - "es.txt": "https://www.coursera.org/api/subtitleAssetProxy.v1/jtYTHsSQToaWEx7EkJ6G4A?expiry=1495238400000&hmac=sd6_C14J-qEkvvbqNTgI8W5eUCvOKwW6RzHcz8yF2Jk&fileExtension=txt", - "pl.txt": "https://www.coursera.org/api/subtitleAssetProxy.v1/RGtowSWPQxSraMElj3MUbA?expiry=1495238400000&hmac=sFwO_BWNlhZEDHsXYkFlnOEtHBIX8lSsVGIOLIHeZZ0&fileExtension=txt", - "ja.txt": "https://www.coursera.org/api/subtitleAssetProxy.v1/758f7ykrRcWfH-8pK3XFHw?expiry=1495238400000&hmac=WMhDBDbF6SiBuvRwg_QEkglLSK36bj8_5y6kZ9z94YY&fileExtension=txt", - "pt-BR.txt": "https://www.coursera.org/api/subtitleAssetProxy.v1/1kRk9rXlSSeEZPa15aknhQ?expiry=1495238400000&hmac=uQaL2V2AJ_Wp5dlCZH1HeyTU_AQo9VdJ2cphUhG8yxk&fileExtension=txt" + "zh-CN.txt": "https://api.coursera.org/api/subtitleAssetProxy.v1/UKEuZoMQRcChLmaDEMXAsA?expiry=1495238400000&hmac=eNyKwEu_aMQtn7bg0mUj6uIyVZvjahFSE5x2CrbOXOU&fileExtension=txt", + "en.srt": "https://api.coursera.org/api/subtitleAssetProxy.v1/GgGZN65HQkyBmTeuR2JMsw?expiry=1495238400000&hmac=afqFhv9FWfxxEeSka8PCA4ihiyX3g2Z6K4jWJPFlcdo&fileExtension=srt", + "zh-CN.srt": "https://api.coursera.org/api/subtitleAssetProxy.v1/UKEuZoMQRcChLmaDEMXAsA?expiry=1495238400000&hmac=nmGzGoF4oNLv28ZDLUtX5dF4xPXUABgym76XMs4UzDE&fileExtension=srt", + "en.txt": "https://api.coursera.org/api/subtitleAssetProxy.v1/GgGZN65HQkyBmTeuR2JMsw?expiry=1495238400000&hmac=2Z37WW5Rc7GoT0eft1vdK0HX5imBqoZTKULMTiZ2EjM&fileExtension=txt", + "hi.srt": "https://api.coursera.org/api/subtitleAssetProxy.v1/v2OWSJUVSqCjlkiVFRqgng?expiry=1495238400000&hmac=qk--Ptsc4w3u6c-5BFPO9vhjyczMHzlSqUOQskjbfZ0&fileExtension=srt", + "es.srt": "https://api.coursera.org/api/subtitleAssetProxy.v1/jtYTHsSQToaWEx7EkJ6G4A?expiry=1495238400000&hmac=Ts5QKzu0jwhUafwsaHk7RKoQJK26d4_bzrX2M6iuRaQ&fileExtension=srt", + "pl.srt": "https://api.coursera.org/api/subtitleAssetProxy.v1/RGtowSWPQxSraMElj3MUbA?expiry=1495238400000&hmac=mcaMPGeK3J7Fn9RRwnuVFnHkyr1COFnLXYKVkUbyfSg&fileExtension=srt", + "ja.srt": "https://api.coursera.org/api/subtitleAssetProxy.v1/758f7ykrRcWfH-8pK3XFHw?expiry=1495238400000&hmac=huh5qtCJVj4rEJnsJ6D7MJdCcqN-s9cMd-M6xlSicLc&fileExtension=srt", + "pt-BR.srt": "https://api.coursera.org/api/subtitleAssetProxy.v1/1kRk9rXlSSeEZPa15aknhQ?expiry=1495238400000&hmac=XYyDJ71d9gl3HOqNplyJeEr7Wd2UhU3DhT-9w_Yudzs&fileExtension=srt", + "hi.txt": "https://api.coursera.org/api/subtitleAssetProxy.v1/v2OWSJUVSqCjlkiVFRqgng?expiry=1495238400000&hmac=earWLk_RUi3K5UpZfEVOlBgOcpSE9efXz2njRKu31rQ&fileExtension=txt", + "es.txt": "https://api.coursera.org/api/subtitleAssetProxy.v1/jtYTHsSQToaWEx7EkJ6G4A?expiry=1495238400000&hmac=sd6_C14J-qEkvvbqNTgI8W5eUCvOKwW6RzHcz8yF2Jk&fileExtension=txt", + "pl.txt": "https://api.coursera.org/api/subtitleAssetProxy.v1/RGtowSWPQxSraMElj3MUbA?expiry=1495238400000&hmac=sFwO_BWNlhZEDHsXYkFlnOEtHBIX8lSsVGIOLIHeZZ0&fileExtension=txt", + "ja.txt": "https://api.coursera.org/api/subtitleAssetProxy.v1/758f7ykrRcWfH-8pK3XFHw?expiry=1495238400000&hmac=WMhDBDbF6SiBuvRwg_QEkglLSK36bj8_5y6kZ9z94YY&fileExtension=txt", + "pt-BR.txt": "https://api.coursera.org/api/subtitleAssetProxy.v1/1kRk9rXlSSeEZPa15aknhQ?expiry=1495238400000&hmac=uQaL2V2AJ_Wp5dlCZH1HeyTU_AQo9VdJ2cphUhG8yxk&fileExtension=txt" } \ No newline at end of file diff --git a/coursera/test/fixtures/json/video-output-1-en.json b/coursera/test/fixtures/json/video-output-1-en.json index 76d8cca..e2cb7cb 100644 --- a/coursera/test/fixtures/json/video-output-1-en.json +++ b/coursera/test/fixtures/json/video-output-1-en.json @@ -1,4 +1,4 @@ { - "en.srt": "https://www.coursera.org/api/subtitleAssetProxy.v1/GgGZN65HQkyBmTeuR2JMsw?expiry=1495238400000&hmac=afqFhv9FWfxxEeSka8PCA4ihiyX3g2Z6K4jWJPFlcdo&fileExtension=srt", - "en.txt": "https://www.coursera.org/api/subtitleAssetProxy.v1/GgGZN65HQkyBmTeuR2JMsw?expiry=1495238400000&hmac=2Z37WW5Rc7GoT0eft1vdK0HX5imBqoZTKULMTiZ2EjM&fileExtension=txt" + "en.srt": "https://api.coursera.org/api/subtitleAssetProxy.v1/GgGZN65HQkyBmTeuR2JMsw?expiry=1495238400000&hmac=afqFhv9FWfxxEeSka8PCA4ihiyX3g2Z6K4jWJPFlcdo&fileExtension=srt", + "en.txt": "https://api.coursera.org/api/subtitleAssetProxy.v1/GgGZN65HQkyBmTeuR2JMsw?expiry=1495238400000&hmac=2Z37WW5Rc7GoT0eft1vdK0HX5imBqoZTKULMTiZ2EjM&fileExtension=txt" } \ No newline at end of file diff --git a/coursera/test/fixtures/json/video-output-1.json b/coursera/test/fixtures/json/video-output-1.json index 86b0591..315a1af 100644 --- a/coursera/test/fixtures/json/video-output-1.json +++ b/coursera/test/fixtures/json/video-output-1.json @@ -1,6 +1,6 @@ { - "zh-CN.txt": "https://www.coursera.org/api/subtitleAssetProxy.v1/UKEuZoMQRcChLmaDEMXAsA?expiry=1495238400000&hmac=eNyKwEu_aMQtn7bg0mUj6uIyVZvjahFSE5x2CrbOXOU&fileExtension=txt", - "en.srt": "https://www.coursera.org/api/subtitleAssetProxy.v1/GgGZN65HQkyBmTeuR2JMsw?expiry=1495238400000&hmac=afqFhv9FWfxxEeSka8PCA4ihiyX3g2Z6K4jWJPFlcdo&fileExtension=srt", - "zh-CN.srt": "https://www.coursera.org/api/subtitleAssetProxy.v1/UKEuZoMQRcChLmaDEMXAsA?expiry=1495238400000&hmac=nmGzGoF4oNLv28ZDLUtX5dF4xPXUABgym76XMs4UzDE&fileExtension=srt", - "en.txt": "https://www.coursera.org/api/subtitleAssetProxy.v1/GgGZN65HQkyBmTeuR2JMsw?expiry=1495238400000&hmac=2Z37WW5Rc7GoT0eft1vdK0HX5imBqoZTKULMTiZ2EjM&fileExtension=txt" + "zh-CN.txt": "https://api.coursera.org/api/subtitleAssetProxy.v1/UKEuZoMQRcChLmaDEMXAsA?expiry=1495238400000&hmac=eNyKwEu_aMQtn7bg0mUj6uIyVZvjahFSE5x2CrbOXOU&fileExtension=txt", + "en.srt": "https://api.coursera.org/api/subtitleAssetProxy.v1/GgGZN65HQkyBmTeuR2JMsw?expiry=1495238400000&hmac=afqFhv9FWfxxEeSka8PCA4ihiyX3g2Z6K4jWJPFlcdo&fileExtension=srt", + "zh-CN.srt": "https://api.coursera.org/api/subtitleAssetProxy.v1/UKEuZoMQRcChLmaDEMXAsA?expiry=1495238400000&hmac=nmGzGoF4oNLv28ZDLUtX5dF4xPXUABgym76XMs4UzDE&fileExtension=srt", + "en.txt": "https://api.coursera.org/api/subtitleAssetProxy.v1/GgGZN65HQkyBmTeuR2JMsw?expiry=1495238400000&hmac=2Z37WW5Rc7GoT0eft1vdK0HX5imBqoZTKULMTiZ2EjM&fileExtension=txt" } \ No newline at end of file diff --git a/coursera/test/fixtures/json/video-output-2.json b/coursera/test/fixtures/json/video-output-2.json index f264b10..0b29d8c 100644 --- a/coursera/test/fixtures/json/video-output-2.json +++ b/coursera/test/fixtures/json/video-output-2.json @@ -1,6 +1,6 @@ { - "zh-TW.srt": "https://www.coursera.org/api/subtitleAssetProxy.v1/j8femXUVQaGH3pl1FYGh-Q?expiry=1495238400000&hmac=-sOeJbk_bICP9OMfbtkjLuwUAIZZcjGasIMk8JO6n0Q&fileExtension=srt", - "en.txt": "https://www.coursera.org/api/subtitleAssetProxy.v1/r3LdPY_CTUqy3T2Pwu1KVQ?expiry=1495238400000&hmac=xhMK0SSslbfwxl-vzjAXy-bd_iQQTY9iAIrNP4QHxq4&fileExtension=txt", - "en.srt": "https://www.coursera.org/api/subtitleAssetProxy.v1/r3LdPY_CTUqy3T2Pwu1KVQ?expiry=1495238400000&hmac=nO6NGCExQ5FO0aFFnr_YVXtd_lVW4JQaT34WS9tJi6c&fileExtension=srt", - "zh-TW.txt": "https://www.coursera.org/api/subtitleAssetProxy.v1/j8femXUVQaGH3pl1FYGh-Q?expiry=1495238400000&hmac=O9DKhZW6bOsI7ncNZIZPBMXmsreSrgulhGf3eyTCULo&fileExtension=txt" + "zh-TW.srt": "https://api.coursera.org/api/subtitleAssetProxy.v1/j8femXUVQaGH3pl1FYGh-Q?expiry=1495238400000&hmac=-sOeJbk_bICP9OMfbtkjLuwUAIZZcjGasIMk8JO6n0Q&fileExtension=srt", + "en.txt": "https://api.coursera.org/api/subtitleAssetProxy.v1/r3LdPY_CTUqy3T2Pwu1KVQ?expiry=1495238400000&hmac=xhMK0SSslbfwxl-vzjAXy-bd_iQQTY9iAIrNP4QHxq4&fileExtension=txt", + "en.srt": "https://api.coursera.org/api/subtitleAssetProxy.v1/r3LdPY_CTUqy3T2Pwu1KVQ?expiry=1495238400000&hmac=nO6NGCExQ5FO0aFFnr_YVXtd_lVW4JQaT34WS9tJi6c&fileExtension=srt", + "zh-TW.txt": "https://api.coursera.org/api/subtitleAssetProxy.v1/j8femXUVQaGH3pl1FYGh-Q?expiry=1495238400000&hmac=O9DKhZW6bOsI7ncNZIZPBMXmsreSrgulhGf3eyTCULo&fileExtension=txt" } \ No newline at end of file diff --git a/coursera/test/test_workflow.py b/coursera/test/test_workflow.py index 1c36224..99567b6 100644 --- a/coursera/test/test_workflow.py +++ b/coursera/test/test_workflow.py @@ -37,7 +37,7 @@ class MockedFailingDownloader(Downloader): raise self._exception_to_throw -TEST_URL = "https://www.coursera.org/api/test-url" +TEST_URL = "https://api.coursera.org/api/test-url" def make_test_modules(): @@ -110,7 +110,7 @@ def test_iter_modules(): (0, '01_section1'), (0, normpath('test_class/01_section1/01_module1')), (0, 'lecture1', 'en.txt', 'title'), - ('en.txt', 'https://www.coursera.org/api/test-url', 'title') + ('en.txt', 'https://api.coursera.org/api/test-url', 'title') ] collected_output = [] @@ -138,7 +138,7 @@ def test_walk_modules(): (0, '01_section1', 0, normpath('test_class/01_section1/01_module1'), 0, 'lecture1', normpath('test_class/01_section1/01_module1/01_lecture1_title.en.txt'), - 'https://www.coursera.org/api/test-url')] + 'https://api.coursera.org/api/test-url')] collected_output = [] for module, section, lecture, resource in _walk_modules( From bff4f4f9539c2c21189977d72d7ffb9085fb9588 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sun, 3 Jun 2018 11:24:34 +0300 Subject: [PATCH 060/105] Use TLSv1.2 instead of v1.0 (fix #661, #663) --- coursera/cookies.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/coursera/cookies.py b/coursera/cookies.py index 36fa1df..6beac2a 100644 --- a/coursera/cookies.py +++ b/coursera/cookies.py @@ -53,6 +53,7 @@ def __fixed_init__(self, version, name, value, rest, rfc2109=False) + cookielib.Cookie.__init__ = __fixed_init__ @@ -170,7 +171,8 @@ def down_the_wabbit_hole(session, class_name): try: r.raise_for_status() except requests.exceptions.HTTPError as e: - raise AuthenticationFailed('Cannot login on class.coursera.org: %s' % e) + raise AuthenticationFailed( + 'Cannot login on class.coursera.org: %s' % e) logging.debug('Exiting "deep" authentication.') @@ -375,8 +377,9 @@ class TLSAdapter(HTTPAdapter): A customized HTTP Adapter which uses TLS v1.2 for encrypted connections. """ + def init_poolmanager(self, connections, maxsize, block=False): self.poolmanager = PoolManager(num_pools=connections, maxsize=maxsize, block=block, - ssl_version=ssl.PROTOCOL_TLSv1) + ssl_version=ssl.PROTOCOL_TLSv1_2) From 2d3191997e71b7177e7acbdd5b2e86d6055bcb16 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sun, 3 Jun 2018 11:28:08 +0300 Subject: [PATCH 061/105] Update CHANGELOG.md --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3828bcc..83f6b90 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Change Log +## 0.11.2 (2018-06-03) + +Bugfixes: + - Use TLS v1.2 instead of v1.0 + - Switched to api.coursera.org subdomain for subtitles requests (#664) + ## 0.11.1 (2018-06-02) Bugfixes: From 0ac9765f817653453498c08e932c206c8c917203 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sun, 3 Jun 2018 11:29:40 +0300 Subject: [PATCH 062/105] Bump version (0.11.1 -> 0.11.2) Bugfixes: - Use TLS v1.2 instead of v1.0 - Switched to api.coursera.org subdomain for subtitles requests (#664) --- coursera/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coursera/__init__.py b/coursera/__init__.py index ae4865c..2b3823f 100644 --- a/coursera/__init__.py +++ b/coursera/__init__.py @@ -1 +1 @@ -__version__ = '0.11.1' +__version__ = '0.11.2' From ca21f41582eaa6549ddf09c069ed54731644d056 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sat, 9 Jun 2018 21:15:19 +0300 Subject: [PATCH 063/105] Update appveyor.yml according to the error message recomendations They say it should be like this: ERROR: To modify pip, please run the following command: c:\python35-x64\python.exe -m pip install --disable-pip-version-check --user --upgrade pip --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 452468e..9b3a1d4 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -83,7 +83,7 @@ install: # Upgrade to the latest version of pip to avoid it displaying warnings # about it being out of date. - - "pip install --disable-pip-version-check --user --upgrade pip" + - "python -m pip install --disable-pip-version-check --user --upgrade pip" # Install requirements - "%CMD_IN_ENV% pip install -r requirements.txt" From 0667dd45da52debe1d6da39e42ddc4fa44082854 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sat, 9 Jun 2018 21:33:38 +0300 Subject: [PATCH 064/105] Use io.open in setup.py for compatibility with Python 2.7 --- setup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bdb6775..f3e2ff6 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,8 @@ from __future__ import print_function import os.path import subprocess import sys +# For compatibility with Python2.7 +from io import open from setuptools import setup @@ -100,7 +102,8 @@ setup( description='Script for downloading Coursera.org videos and naming them.', long_description=long_description, - keywords=['coursera-dl', 'coursera', 'download', 'education', 'MOOCs', 'video'], + keywords=['coursera-dl', 'coursera', + 'download', 'education', 'MOOCs', 'video'], classifiers=trove_classifiers, packages=["coursera"], From c0ae84d12a70fa11f271ccd6f74735ea84645349 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sat, 9 Jun 2018 22:02:01 +0300 Subject: [PATCH 065/105] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 1726811..648ae3d 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ venv3 .python-version .ipynb_checkpoints .ropeproject +.mypy_cache From ce6f94022f099a097aac8a2feb0ee5cf0c7e0d10 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sat, 9 Jun 2018 22:03:00 +0300 Subject: [PATCH 066/105] Add py36 to tox and a note to myself to remember to activate pyenv before using tox --- tox.ini | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index d2870c4..bb378d5 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py26,py27,py33,py34,py35 +envlist = py26,py27,py33,py34,py35,py36 [testenv] downloadcache = .tox/_download/ @@ -21,3 +21,12 @@ commands = py.test -v --junitxml={envlogdir}/result.xml coursera/test # {opts} is remove to prevent passing option "--download-cache" to pip # which is already gone. install_command = pip install {packages} + +# Notes for developers. Depending on your system configuration, +# you may find this bash function useful to run before running tox: +# +# activate_pyenv () { +# export PYENV_ROOT="$HOME/.pyenv" +# export PATH="$PYENV_ROOT/bin:$PATH" +# eval "$(pyenv init -)" +# } From fa8cb2fbbd76e628b506bc46470dff82ea9bdb5d Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sat, 9 Jun 2018 22:07:28 +0300 Subject: [PATCH 067/105] Use old pip upgrade command only for Python 2.6 in appveyor.yml --- appveyor.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 9b3a1d4..97ce6e7 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -16,6 +16,8 @@ environment: # /E:ON and /V:ON options are not enabled in the batch script intepreter # See: http://stackoverflow.com/a/13751649/163740 CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\run_with_env.cmd" + # This command works for most versions of Python in AppVeyor except of Python 2.6 + PIP_UPGRADE_CMD: "python -m pip install --disable-pip-version-check --user --upgrade pip" #PANDOC_URL: "https://github.com/jgm/pandoc/releases/download/1.17.1/pandoc-1.17.1-1-windows.msi" #PANDOC_MSI: "C:\\pandoc.msi" @@ -27,6 +29,8 @@ environment: - PYTHON: "C:\\Python26" PYTHON_VERSION: "2.6.x" # currently 2.6.6 PYTHON_ARCH: "32" + # For Python 2.6 we are using old version of pip upgrade command + PIP_UPGRADE_CMD: "pip install --disable-pip-version-check --user --upgrade pip" - PYTHON: "C:\\Python26-x64" PYTHON_VERSION: "2.6.x" # currently 2.6.6 @@ -83,7 +87,7 @@ install: # Upgrade to the latest version of pip to avoid it displaying warnings # about it being out of date. - - "python -m pip install --disable-pip-version-check --user --upgrade pip" + - "%PIP_UPGRADE_CMD%" # Install requirements - "%CMD_IN_ENV% pip install -r requirements.txt" From 0327015be985d04cb244e14a8228119589d8bb6b Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sat, 9 Jun 2018 22:24:03 +0300 Subject: [PATCH 068/105] Drop Python 2.6 from matrix build in appveyor I think it's time we bury this stewardess already. Python 2.6 is not supported even by core Python team. --- appveyor.yml | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 97ce6e7..91a5d44 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -16,8 +16,6 @@ environment: # /E:ON and /V:ON options are not enabled in the batch script intepreter # See: http://stackoverflow.com/a/13751649/163740 CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\run_with_env.cmd" - # This command works for most versions of Python in AppVeyor except of Python 2.6 - PIP_UPGRADE_CMD: "python -m pip install --disable-pip-version-check --user --upgrade pip" #PANDOC_URL: "https://github.com/jgm/pandoc/releases/download/1.17.1/pandoc-1.17.1-1-windows.msi" #PANDOC_MSI: "C:\\pandoc.msi" @@ -26,16 +24,6 @@ environment: # a later point release. # See: http://www.appveyor.com/docs/installed-software#python - - PYTHON: "C:\\Python26" - PYTHON_VERSION: "2.6.x" # currently 2.6.6 - PYTHON_ARCH: "32" - # For Python 2.6 we are using old version of pip upgrade command - PIP_UPGRADE_CMD: "pip install --disable-pip-version-check --user --upgrade pip" - - - PYTHON: "C:\\Python26-x64" - PYTHON_VERSION: "2.6.x" # currently 2.6.6 - PYTHON_ARCH: "64" - - PYTHON: "C:\\Python27" PYTHON_VERSION: "2.7.x" # currently 2.7.11 PYTHON_ARCH: "32" @@ -87,7 +75,7 @@ install: # Upgrade to the latest version of pip to avoid it displaying warnings # about it being out of date. - - "%PIP_UPGRADE_CMD%" + - "python -m pip install --disable-pip-version-check --user --upgrade pip" # Install requirements - "%CMD_IN_ENV% pip install -r requirements.txt" From 88832628b76cd3109533762ad3e847ec0766762a Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sat, 9 Jun 2018 22:38:59 +0300 Subject: [PATCH 069/105] Add Python 3.6 to appveyor --- appveyor.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/appveyor.yml b/appveyor.yml index 91a5d44..f9ea741 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -56,6 +56,14 @@ environment: PYTHON_VERSION: "3.5.x" # currently 3.5.1 PYTHON_ARCH: "64" + - PYTHON: "C:\\Python36" + PYTHON_VERSION: "3.6.x" # currently 3.6.? + PYTHON_ARCH: "32" + + - PYTHON: "C:\\Python36-x64" + PYTHON_VERSION: "3.6.x" # currently 3.6.? + PYTHON_ARCH: "64" + init: - "ECHO %PYTHON%" - ps: "ls C:/Python*" From de2ba5bdce577823dc7ccb3bccdcc76d266e7f39 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sat, 9 Jun 2018 23:06:23 +0300 Subject: [PATCH 070/105] Add comment about split('/') in _get_notebook_folder --- coursera/api.py | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/coursera/api.py b/coursera/api.py index e9c5000..330b560 100644 --- a/coursera/api.py +++ b/coursera/api.py @@ -482,22 +482,33 @@ class CourseraOnDemand(object): for content in reply['content']: if content['type'] == 'directory': - a = self._get_notebook_folder(OPENCOURSE_NOTEBOOK_TREE, jupyterId, jupId=jupyterId, path=content['path'], timestamp=int(time.time())) + a = self._get_notebook_folder( + OPENCOURSE_NOTEBOOK_TREE, jupyterId, jupId=jupyterId, path=content['path'], timestamp=int(time.time())) supplement_links.update(a) elif content['type'] == 'file': - tmpUrl = OPENCOURSE_NOTEBOOK_DOWNLOAD.format(path=content['path'], jupId=jupyterId, timestamp=int(time.time())) + tmpUrl = OPENCOURSE_NOTEBOOK_DOWNLOAD.format( + path=content['path'], jupId=jupyterId, timestamp=int(time.time())) filename, extension = os.path.splitext(clean_url(tmpUrl)) head, tail = os.path.split(content['path']) - head = '/'.join([clean_filename(dir, minimal_change=True) for dir in head.split('/')]) + # '/' in the following line is for a reason: + # @noureddin says: "I split head using split('/') not + # os.path.split() because it's seems to me that it comes from a + # web page, so the separator will always be /, so using the + # native path splitting function is not the most portable way to + # do it." + # Original pull request: https://github.com/coursera-dl/coursera-dl/pull/654 + head = '/'.join([clean_filename(dir, minimal_change=True) + for dir in head.split('/')]) tail = clean_filename(tail, minimal_change=True) if os.path.isdir(self._course_name + "/notebook/" + head + "/") == False: logging.info('Creating [{}] directories...'.format(head)) os.makedirs(self._course_name + "/notebook/" + head + "/") - r = requests.get(tmpUrl.replace(" ", "%20"), cookies=self._session.cookies) + r = requests.get(tmpUrl.replace(" ", "%20"), + cookies=self._session.cookies) if os.path.exists(self._course_name + "/notebook/" + head + "/" + tail) == False: logging.info('Downloading {} into {}'.format(tail, head)) with open(self._course_name + "/notebook/" + head + "/" + tail, 'wb+') as f: @@ -505,14 +516,15 @@ class CourseraOnDemand(object): else: logging.info('Skipping {}... (file exists)'.format(tail)) - if not str(extension[1:]) in supplement_links: supplement_links[str(extension[1:])] = [] - supplement_links[str(extension[1:])].append((tmpUrl.replace(" ", "%20"), filename)) + supplement_links[str(extension[1:])].append( + (tmpUrl.replace(" ", "%20"), filename)) elif content['type'] == 'notebook': - tmpUrl = OPENCOURSE_NOTEBOOK_DOWNLOAD.format(path=content['path'], jupId=jupyterId, timestamp=int(time.time())) + tmpUrl = OPENCOURSE_NOTEBOOK_DOWNLOAD.format( + path=content['path'], jupId=jupyterId, timestamp=int(time.time())) filename, extension = os.path.splitext(clean_url(tmpUrl)) head, tail = os.path.split(content['path']) @@ -521,9 +533,11 @@ class CourseraOnDemand(object): logging.info('Creating [{}] directories...'.format(head)) os.makedirs(self._course_name + "/notebook/" + head + "/") - r = requests.get(tmpUrl.replace(" ", "%20"), cookies=self._session.cookies) + r = requests.get(tmpUrl.replace(" ", "%20"), + cookies=self._session.cookies) if os.path.exists(self._course_name + "/notebook/" + head + "/" + tail) == False: - logging.info('Downloading Jupyter {} into {}'.format(tail, head)) + logging.info( + 'Downloading Jupyter {} into {}'.format(tail, head)) with open(self._course_name + "/notebook/" + head + "/" + tail, 'wb+') as f: f.write(r.content) else: @@ -532,10 +546,12 @@ class CourseraOnDemand(object): if not "ipynb" in supplement_links: supplement_links["ipynb"] = [] - supplement_links["ipynb"].append((tmpUrl.replace(" ", "%20"), filename)) + supplement_links["ipynb"].append( + (tmpUrl.replace(" ", "%20"), filename)) else: - logging.info('Unsupported typename {} in notebook'.format(content['type'])) + logging.info( + 'Unsupported typename {} in notebook'.format(content['type'])) return supplement_links From 45824ef4b84b20159c5ded0b6bfdf9fd1b96e15d Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sat, 9 Jun 2018 23:07:05 +0300 Subject: [PATCH 071/105] autopep8 --- coursera/api.py | 75 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 26 deletions(-) diff --git a/coursera/api.py b/coursera/api.py index 330b560..657e4b5 100644 --- a/coursera/api.py +++ b/coursera/api.py @@ -127,7 +127,8 @@ class QuizExamToMarkupConverter(object): result = ['

'] for option in options: - option_text = unescape_html(option['display']['definition']['value']) + option_text = unescape_html( + option['display']['definition']['value']) # We need to replace with so that answer text # stays on the same line with checkbox/radio button @@ -238,7 +239,8 @@ class MarkupToHTMLConverter(object): asset = self._asset_retriever[image['assetid']] if asset.data is not None: encoded64 = base64.b64encode(asset.data).decode() - image['src'] = 'data:%s;base64,%s' % (asset.content_type, encoded64) + image['src'] = 'data:%s;base64,%s' % ( + asset.content_type, encoded64) def _convert_markup_audios(self, soup): """ @@ -264,9 +266,11 @@ class MarkupToHTMLConverter(object): asset = self._asset_retriever[audio['id']] if asset.data is not None: encoded64 = base64.b64encode(asset.data).decode() - data_string = 'data:%s;base64,%s' % (asset.content_type, encoded64) + data_string = 'data:%s;base64,%s' % ( + asset.content_type, encoded64) - source_tag = soup.new_tag('source', src=data_string, type=asset.content_type) + source_tag = soup.new_tag( + 'source', src=data_string, type=asset.content_type) controls_tag = soup.new_tag('audio', controls="") controls_tag.string = 'Your browser does not support the audio element.' @@ -278,6 +282,7 @@ class OnDemandCourseMaterialItems(object): """ Helper class that allows accessing lecture JSONs by lesson IDs. """ + def __init__(self, items): """ Initialization. Build a map from lessonId to Lecture (item) @@ -347,6 +352,7 @@ class Asset(namedtuple('Asset', 'id name type_name url content_type data')): This class contains information about an asset. """ __slots__ = () + def __repr__(self): return 'Asset(id="%s", name="%s", type_name="%s", url="%s", content_type="%s", data="<...>")' % ( self.id, self.name, self.type_name, self.url, self.content_type) @@ -356,6 +362,7 @@ class AssetRetriever(object): """ This class helps download assets by their ID. """ + def __init__(self, session): self._session = session self._asset_mapping = {} @@ -372,7 +379,8 @@ class AssetRetriever(object): id=','.join(asset_ids)) # Create a map "asset_id => asset" for easier access - asset_map = dict((asset['id'], asset) for asset in asset_list['elements']) + asset_map = dict((asset['id'], asset) + for asset in asset_list['elements']) for asset_id in asset_ids: # Download each asset @@ -434,7 +442,8 @@ class CourseraOnDemand(object): self._user_id = None self._quiz_to_markup = QuizExamToMarkupConverter(session) - self._markup_to_html = MarkupToHTMLConverter(session, mathjax_cdn_url=mathjax_cdn_url) + self._markup_to_html = MarkupToHTMLConverter( + session, mathjax_cdn_url=mathjax_cdn_url) self._asset_retriever = AssetRetriever(session) def obtain_user_id(self): @@ -463,7 +472,8 @@ class CourseraOnDemand(object): except requests.exceptions.HTTPError as exception: logging.error('Could not download exam %s: %s', exam_id, exception) if is_debug_run(): - logging.exception('Could not download exam %s: %s', exam_id, exception) + logging.exception( + 'Could not download exam %s: %s', exam_id, exception) return None def _get_notebook_folder(self, url, jupyterId, **kwargs): @@ -574,7 +584,8 @@ class CourseraOnDemand(object): jupyterId = jupyterId[0] newReq = requests.Session() - req = newReq.get(OPENCOURSE_NOTEBOOK_TREE.format(jupId=jupyterId, path="/", timestamp=int(time.time())), headers=headers) + req = newReq.get(OPENCOURSE_NOTEBOOK_TREE.format( + jupId=jupyterId, path="/", timestamp=int(time.time())), headers=headers) return self._get_notebook_folder(OPENCOURSE_NOTEBOOK_TREE, jupyterId, jupId=jupyterId, path="/", timestamp=int(time.time())) @@ -585,9 +596,11 @@ class CourseraOnDemand(object): ret = self._get_notebook_json(notebook_id, authorizationId) return ret except requests.exceptions.HTTPError as exception: - logging.error('Could not download notebook %s: %s', notebook_id, exception) + logging.error('Could not download notebook %s: %s', + notebook_id, exception) if is_debug_run(): - logging.exception('Could not download notebook %s: %s', notebook_id, exception) + logging.exception( + 'Could not download notebook %s: %s', notebook_id, exception) return None def extract_links_from_quiz(self, quiz_id): @@ -598,7 +611,8 @@ class CourseraOnDemand(object): except requests.exceptions.HTTPError as exception: logging.error('Could not download quiz %s: %s', quiz_id, exception) if is_debug_run(): - logging.exception('Could not download quiz %s: %s', quiz_id, exception) + logging.exception( + 'Could not download quiz %s: %s', quiz_id, exception) return None def _convert_quiz_json_to_links(self, quiz_json, filename_suffix): @@ -653,7 +667,7 @@ class CourseraOnDemand(object): def _get_quiz_session_id(self, quiz_id): headers = self._auth_headers_with_json() - data = {"contentRequestBody":[]} + data = {"contentRequestBody": []} reply = get_page(self._session, POST_OPENCOURSE_API_QUIZ_SESSION, json=True, @@ -706,9 +720,11 @@ class CourseraOnDemand(object): return links except requests.exceptions.HTTPError as exception: - logging.error('Could not download lecture %s: %s', video_id, exception) + logging.error('Could not download lecture %s: %s', + video_id, exception) if is_debug_run(): - logging.exception('Could not download lecture %s: %s', video_id, exception) + logging.exception( + 'Could not download lecture %s: %s', video_id, exception) return None def _normalize_assets(self, assets): @@ -871,7 +887,7 @@ class CourseraOnDemand(object): video_content['mp4'] = video_url subtitle_link = self._extract_subtitles_from_video_dom( - dom, subtitle_language, video_id) + dom, subtitle_language, video_id) for key, value in iteritems(subtitle_link): video_content[key] = value @@ -939,7 +955,8 @@ class CourseraOnDemand(object): if subtitle_url is not None: # some subtitle urls are relative! subtitle_links[ - "%s.%s" % (current_subtitle_language, subtitle_extension) + "%s.%s" % (current_subtitle_language, + subtitle_extension) ] = make_coursera_absolute_url(subtitle_url) return subtitle_links @@ -988,7 +1005,8 @@ class CourseraOnDemand(object): @return: @see CourseraOnDemand._extract_links_from_text """ - logging.debug('Gathering supplement URLs for element_id <%s>.', element_id) + logging.debug( + 'Gathering supplement URLs for element_id <%s>.', element_id) try: # Assignment text (instructions) contains asset tags which describe @@ -1021,7 +1039,8 @@ class CourseraOnDemand(object): @return: @see CourseraOnDemand._extract_links_from_text """ - logging.debug('Gathering supplement URLs for element_id <%s>.', element_id) + logging.debug( + 'Gathering supplement URLs for element_id <%s>.', element_id) try: # Assignment text (instructions) contains asset tags which describe @@ -1051,13 +1070,14 @@ class CourseraOnDemand(object): @return: @see CourseraOnDemand._extract_links_from_text """ - logging.debug('Gathering supplement URLs for element_id <%s>.', element_id) + logging.debug( + 'Gathering supplement URLs for element_id <%s>.', element_id) try: dom = get_page(self._session, OPENCOURSE_SUPPLEMENT_URL, - json=True, - course_id=self._course_id, - element_id=element_id) + json=True, + course_id=self._course_id, + element_id=element_id) supplement_content = {} @@ -1228,7 +1248,8 @@ class CourseraOnDemand(object): @rtype: [str] """ headers = self._auth_headers_with_json() - data = {'courseId': self._course_id, 'learnerId': self._user_id, 'itemId': element_id} + data = {'courseId': self._course_id, + 'learnerId': self._user_id, 'itemId': element_id} dom = get_page(self._session, OPENCOURSE_NOTEBOOK_LAUNCHES, post=True, json=True, @@ -1237,7 +1258,7 @@ class CourseraOnDemand(object): headers=headers, element_id=element_id, data=json.dumps(data) - ) + ) # Return authorization id. This id changes on each request return dom['elements'][0]['authorizationId'] @@ -1282,7 +1303,8 @@ class CourseraOnDemand(object): for element in dom['elements']: # There is only one section with Instructions if 'introduction' in element['instructions']: - result.append(element['instructions']['introduction']['definition']['value']) + result.append(element['instructions'] + ['introduction']['definition']['value']) # But there may be multiple sections in Sections for section in element['instructions'].get('sections', []): @@ -1291,7 +1313,8 @@ class CourseraOnDemand(object): if section_title is not None: # If section title is present, put it in the beginning of # section value as if it was there. - section_value = ('%s' % section_title) + section_value + section_value = ('%s' % + section_title) + section_value result.append(section_value) return result From ef3268677e4d66a779d3aece67a97fda803de5a2 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sun, 10 Jun 2018 01:33:39 +0300 Subject: [PATCH 072/105] Update Dockerfile to use Python3.6 and install from PyPI package Also update README and CONTRIBUTING guides with the information on how to build and run Docker image. --- CONTRIBUTING.md | 18 ++++++++++++++++++ Dockerfile | 15 ++++----------- README.md | 25 ++++++++++++++++++------- 3 files changed, 40 insertions(+), 18 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cca172d..004a1ee 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -241,3 +241,21 @@ DRAFT I think this is required for PyPI description to look nice. 7. `python setup.py sdist` to build the package 8. `twine upload dist/coursera-dl-0.6.1.tar.gz` to deploy the package. + +## Docker + +Build new Docker image from PyPI package: + +``` +docker build --tag courseradl/courseradl --build-arg VERSION=0.11.2 . +``` + +Run the image: +``` +docker run --rm -it -v "$(pwd):/courses" -v "$HOME/.netrc:/netrc" courseradl -n /netrc -- google-machine-learning +``` + +Publish the image: +``` +docker push courseradl/courseradl +``` diff --git a/Dockerfile b/Dockerfile index 65915b4..cef9e3c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,21 +1,14 @@ -FROM python:3.4-slim +FROM python:3.6-slim -LABEL maintainer "opsxcq@strm.sh" - -WORKDIR /src -COPY requirements.txt /src - -COPY requirements-dev.txt /src +LABEL maintainer "https://github.com/coursera-dl/" RUN apt-get update && \ apt-get install -y --no-install-recommends gcc g++ libssl-dev && \ rm -rf /var/lib/apt/lists/* && \ - pip install -r requirements.txt && \ - pip install -r requirements-dev.txt && \ apt-get purge -y --auto-remove gcc g++ libssl-dev -COPY . /src -RUN python setup.py install +ARG VERSION +RUN pip install coursera-dl==$VERSION WORKDIR /courses ENTRYPOINT ["coursera-dl"] diff --git a/README.md b/README.md index 349b8b8..4f2e55c 100644 --- a/README.md +++ b/README.md @@ -220,26 +220,37 @@ applicable). If you prefer you can run this software inside Docker: ``` -docker run --rm -it \ - -v "$(pwd):/courses" \ - strm/coursera-dl \ - -u -p +docker run --rm -it -v \ + "$(pwd):/courses" \ + courseradl/courseradl -u -p ``` -The actual working dir for coursera-dl is /courses, all courses will be downloaded there if you don't specify otherwise. +Or using netrc file: + +``` +docker run --rm -it \ + -v "$(pwd):/courses" -v "$HOME/.netrc:/netrc" \ + courseradl/courseradl -n /netrc +``` + +The actual working dir for coursera-dl is /courses, all courses will be +downloaded there if you don't specify otherwise. ## Windows `python -m pip install coursera-dl` -Be sure that the Python install path is added to the PATH system environment variables. This can be found in Control Panel > System > Advanced System Settings > Environment Variables. +Be sure that the Python install path is added to the PATH system environment +variables. This can be found in Control Panel > System > Advanced System +Settings > Environment Variables. ``` Example: C:\Python35\Scripts\;C:\Python35\; ``` -Or if you have restricted installation permissions and you've installed Python under AppData, add this to your PATH. +Or if you have restricted installation permissions and you've installed Python +under AppData, add this to your PATH. ``` Example: From 98e4c141065c67dd6d64041b14057e92f36cd496 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Mon, 11 Jun 2018 18:14:48 +0300 Subject: [PATCH 073/105] autopep8 + trailing spaces --- README.md | 6 +++--- coursera/commandline.py | 14 ++++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 4f2e55c..083b7a6 100644 --- a/README.md +++ b/README.md @@ -242,7 +242,7 @@ downloaded there if you don't specify otherwise. Be sure that the Python install path is added to the PATH system environment variables. This can be found in Control Panel > System > Advanced System -Settings > Environment Variables. +Settings > Environment Variables. ``` Example: @@ -409,7 +409,7 @@ one of the following actions solve your problem: * You get an error when using `-n` to specify that you want to use a `.netrc` file and, * You want the script to use your default netrc file and, - * You get a message saying `coursera-dl: error: too few arguments` + * You get a message saying `coursera-dl: error: too few arguments` Then you should specify `--` as an argument after `-n`, that is, `-n --` or change the order in which you pass the arguments to the script, so that @@ -539,7 +539,7 @@ https://urllib3.readthedocs.io/en/latest/security.html#insecureplatformwarning When saving a course page, we enabled `MathJax` rendering for math equations, by injecting `MathJax.js` in the header. The script is using a cdn service provided by [mathjax.org](https://cdn.mathjax.org/mathjax/latest/MathJax.js). However, that -url is not accessible in some countries/regions, you can provide a +url is not accessible in some countries/regions, you can provide a `--mathjax-cdn ` parameter to specify the `MathJax.js` file that is accessible in your region. diff --git a/coursera/commandline.py b/coursera/commandline.py index 2ea69b1..bb17504 100644 --- a/coursera/commandline.py +++ b/coursera/commandline.py @@ -112,7 +112,8 @@ def parse_args(args=None): ) # Selection of material to download - group_material = parser.add_argument_group('Selection of material to download') + group_material = parser.add_argument_group( + 'Selection of material to download') group_material.add_argument('--only-syllabus', dest='only_syllabus', @@ -289,7 +290,8 @@ def parse_args(args=None): help='Do not limit filenames to be ASCII-only') # Advanced authentication - group_adv_auth = parser.add_argument_group('Advanced authentication options') + group_adv_auth = parser.add_argument_group( + 'Advanced authentication options') group_adv_auth.add_argument('-c', '--cookies_file', @@ -323,7 +325,8 @@ def parse_args(args=None): help='clear cached cookies') # Advanced miscellaneous options - group_adv_misc = parser.add_argument_group('Advanced miscellaneous options') + group_adv_misc = parser.add_argument_group( + 'Advanced miscellaneous options') group_adv_misc.add_argument('--hook', dest='hooks', @@ -416,7 +419,8 @@ def parse_args(args=None): # check arguments if args.use_keyring and args.password: - logging.warning('--keyring and --password cannot be specified together') + logging.warning( + '--keyring and --password cannot be specified together') args.use_keyring = False if args.use_keyring and not keyring: @@ -437,5 +441,3 @@ def parse_args(args=None): sys.exit(1) return args - - From ad61d52a5b999b8b8d823119826a9502401c052e Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Mon, 11 Jun 2018 18:16:09 +0300 Subject: [PATCH 074/105] Mention that username is email address in command-line options fix #625 --- coursera/commandline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coursera/commandline.py b/coursera/commandline.py index bb17504..4c5508f 100644 --- a/coursera/commandline.py +++ b/coursera/commandline.py @@ -57,7 +57,7 @@ def parse_args(args=None): dest='username', action='store', default=None, - help='coursera username') + help='username (email) that you use to login to Coursera') group_basic.add_argument('-p', '--password', From e788aed7982f1e80f69f304dc29f9a2886d8f371 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Tue, 12 Jun 2018 20:14:19 +0300 Subject: [PATCH 075/105] Update TOC in README.md --- README.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 083b7a6..8d72afc 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,9 @@ [![Latest version on PyPI](https://img.shields.io/pypi/v/coursera-dl.svg)](https://pypi.python.org/pypi/coursera-dl) [![Code Climate](https://codeclimate.com/github/coursera-dl/coursera-dl/badges/gpa.svg)](https://codeclimate.com/github/coursera-dl/coursera-dl) + + +- [Coursera Downloader](#coursera-downloader) - [Introduction](#introduction) - [Features](#features) - [Disclaimer](#disclaimer) @@ -13,23 +16,28 @@ - [Recommended installation method for all Operating Systems](#recommended-installation-method-for-all-operating-systems) - [Alternative ways of installing missing dependencies](#alternative-ways-of-installing-missing-dependencies) - [Alternative installation method for Unix systems](#alternative-installation-method-for-unix-systems) + - [ArchLinux](#archlinux) - [Installing dependencies on your own](#installing-dependencies-on-your-own) + - [Docker](#docker) - [Windows](#windows) - [Create an account with Coursera](#create-an-account-with-coursera) - [Running the script](#running-the-script) - [Resuming downloads](#resuming-downloads) - [Troubleshooting](#troubleshooting) - [China issues](#china-issues) - - [Download timeouts](#download-timeouts) - [Found 0 sections and 0 lectures on this page](#found-0-sections-and-0-lectures-on-this-page) - - [Windows: Proxy support](#windows-proxy-support) + - [Download timeouts](#download-timeouts) + - [Windows: proxy support](#windows-proxy-support) - [Windows: Failed to create process](#windows-failed-to-create-process) - - [SSLError: Errno 1 _ssl.c:504: error:14094410:SSL routines:SSL3_READ_BYTES:sslv3 alert handshake failure](#sslerror-errno-1-_sslc504-error14094410ssl-routinesssl3_read_bytessslv3-alert-handshake-failure) + - [SSLError: [Errno 1] _ssl.c:504: error:14094410:SSL routines:SSL3_READ_BYTES:sslv3 alert handshake failure](#sslerror-errno-1-_sslc504-error14094410ssl-routinesssl3_read_bytessslv3-alert-handshake-failure) + - [Alternative CDN for `MathJax.js`](#alternative-cdn-for-mathjaxjs) - [Reporting issues](#reporting-issues) - [Filing an issue/Reporting a bug](#filing-an-issuereporting-a-bug) - [Feedback](#feedback) - [Contact](#contact) + + # Introduction [Coursera][1] is arguably the leader in *massive open online courses* (MOOC) @@ -534,7 +542,7 @@ If you still have the problem, please read the following issues for more ideas o This is also worth reading: https://urllib3.readthedocs.io/en/latest/security.html#insecureplatformwarning -## Use an alternative cdn url for `MathJax.js` +## Alternative CDN for `MathJax.js` When saving a course page, we enabled `MathJax` rendering for math equations, by injecting `MathJax.js` in the header. The script is using a cdn service provided From 5bc5cd9c77e8f0572d5230f6ac9896c1172ef660 Mon Sep 17 00:00:00 2001 From: John Doe Date: Sat, 16 Jun 2018 23:42:36 -0700 Subject: [PATCH 076/105] Typo (#671) --- coursera/api.py | 6 +++--- coursera/commandline.py | 4 ++-- coursera/cookies.py | 10 +++++----- coursera/coursera_dl.py | 16 ++++++++-------- coursera/define.py | 4 ++-- coursera/extractors.py | 14 +++++++------- coursera/test/test_api.py | 12 ++++++------ coursera/test/test_parsing.py | 4 ++-- 8 files changed, 35 insertions(+), 35 deletions(-) diff --git a/coursera/api.py b/coursera/api.py index 657e4b5..4fc6613 100644 --- a/coursera/api.py +++ b/coursera/api.py @@ -52,7 +52,7 @@ from .define import (OPENCOURSE_SUPPLEMENT_URL, IN_MEMORY_MARKER) -from .cookies import prepape_auth_headers +from .cookies import prepare_auth_headers class QuizExamToMarkupConverter(object): @@ -681,7 +681,7 @@ class CourseraOnDemand(object): return reply['contentResponseBody']['session']['id'] def _auth_headers_with_json(self): - headers = prepape_auth_headers(self._session, include_cauth=True) + headers = prepare_auth_headers(self._session, include_cauth=True) headers.update({ 'Content-Type': 'application/json; charset=UTF-8' }) @@ -943,7 +943,7 @@ class CourseraOnDemand(object): ", ".join(subtitle_set_nonexist), video_id, subtitle_description) if not subtitle_set_download: - logging.warning("%s all requested subtitles are unavaliable," + logging.warning("%s all requested subtitles are unavailable," "with video id: [%s], falling back to 'en' " "%s", subtitle_description.capitalize(), video_id, diff --git a/coursera/commandline.py b/coursera/commandline.py index 4c5508f..a09e77a 100644 --- a/coursera/commandline.py +++ b/coursera/commandline.py @@ -103,7 +103,7 @@ def parse_args(args=None): help='Choose language to download subtitles and transcripts. (Default: all)' 'Use special value "all" to download all available.' 'To download subtitles and transcripts of multiple languages,' - 'use comma(s) (without spaces) to seperate the names of the languages, i.e., "en,zh-CN".' + 'use comma(s) (without spaces) to separate the names of the languages, i.e., "en,zh-CN".' 'To download subtitles and transcripts of alternative language(s) ' 'if only the current language is not available,' 'put an "|" for each of the alternative languages after ' @@ -132,7 +132,7 @@ def parse_args(args=None): dest='download_notebooks', action='store_true', default=False, - help='download Python Jupyther Notebooks. (Default: False)') + help='download Python Jupyter Notebooks. (Default: False)') group_material.add_argument('--about', # FIXME: should be --about-course dest='about', diff --git a/coursera/cookies.py b/coursera/cookies.py index 6beac2a..a66f126 100644 --- a/coursera/cookies.py +++ b/coursera/cookies.py @@ -69,15 +69,15 @@ class AuthenticationFailed(BaseException): """ -def prepape_auth_headers(session, include_cauth=False): +def prepare_auth_headers(session, include_cauth=False): """ - This function prepapes headers with CSRF/CAUTH tokens that can + This function prepares headers with CSRF/CAUTH tokens that can be used in POST requests such as login/get_quiz. @param session: Requests session. @type session: requests.Session - @param include_cauth: Flag that indicates whethe CAUTH cookies should be + @param include_cauth: Flag that indicates whether CAUTH cookies should be included as well. @type include_cauth: bool @@ -133,7 +133,7 @@ def login(session, username, password, class_name=None): logging.error(e) raise ClassNotFound(class_name) - headers = prepape_auth_headers(session, include_cauth=False) + headers = prepare_auth_headers(session, include_cauth=False) data = { 'email': username, @@ -355,7 +355,7 @@ def get_cookies_for_class(session, class_name, Get the cookies for the given class. We do not validate the cookies if they are loaded from a cookies file - because this is intented for debugging purposes or if the coursera + because this is intended for debugging purposes or if the coursera authentication process has changed. """ if cookies_file: diff --git a/coursera/coursera_dl.py b/coursera/coursera_dl.py index 4c20689..a883b0c 100644 --- a/coursera/coursera_dl.py +++ b/coursera/coursera_dl.py @@ -115,12 +115,12 @@ def download_on_demand_class(args, class_name): Download all requested resources from the on-demand class given in class_name. @return: Tuple of (bool, bool), where the first bool indicates whether - errors occured while parsing syllabus, the second bool indicaters + errors occurred while parsing syllabus, the second bool indicates whether the course appears to be completed. @rtype: (bool, bool) """ - error_occured = False + error_occurred = False session = get_session() extractor = CourseraExtractor(session, args.username, args.password) @@ -129,7 +129,7 @@ def download_on_demand_class(args, class_name): with open(cached_syllabus_filename) as syllabus_file: modules = json.load(syllabus_file) else: - error_occured, modules = extractor.get_modules( + error_occurred, modules = extractor.get_modules( class_name, args.reverse, args.unrestricted_filenames, @@ -145,7 +145,7 @@ def download_on_demand_class(args, class_name): json.dump(modules, file_object, indent=4) if args.only_syllabus: - return error_occured, False + return error_occurred, False downloader = get_downloader(session, class_name, args) downloader_wrapper = ParallelDownloader(downloader, args.jobs) \ @@ -177,7 +177,7 @@ def download_on_demand_class(args, class_name): if course_downloader.failed_urls: print_failed_urls(course_downloader.failed_urls) - return error_occured, completed + return error_occurred, completed def print_skipped_urls(skipped_urls): @@ -205,7 +205,7 @@ def download_class(args, class_name): Try to download on-demand class. @return: Tuple of (bool, bool), where the first bool indicates whether - errors occured while parsing syllabus, the second bool indicaters + errors occurred while parsing syllabus, the second bool indicaters whether the course appears to be completed. @rtype: (bool, bool) """ @@ -235,10 +235,10 @@ def main(): try: logging.info('Downloading class: %s (%d / %d)', class_name, class_index + 1, len(args.class_names)) - error_occured, completed = download_class(args, class_name) + error_occurred, completed = download_class(args, class_name) if completed: completed_classes.append(class_name) - if error_occured: + if error_occurred: classes_with_errors.append(class_name) except requests.exceptions.HTTPError as e: logging.error('HTTPError %s', e) diff --git a/coursera/define.py b/coursera/define.py index e2ec105..a586392 100644 --- a/coursera/define.py +++ b/coursera/define.py @@ -16,7 +16,7 @@ AUTH_URL = 'https://accounts.coursera.org/api/v1/login' AUTH_URL_V3 = 'https://api.coursera.org/api/login/v3' CLASS_URL = 'https://class.coursera.org/{class_name}' -# The following link is left just for illustative purposes: +# The following link is left just for illustrative purposes: # https://api.coursera.org/api/courses.v1?fields=display%2CpartnerIds%2CphotoUrl%2CstartDate%2Cpartners.v1(homeLink%2Cname)&includes=partnerIds&q=watchlist&start=0 # Reply is as follows: # { @@ -36,7 +36,7 @@ CLASS_URL = 'https://class.coursera.org/{class_name}' # } OPENCOURSE_LIST_COURSES = 'https://api.coursera.org/api/courses.v1?q=watchlist&start={start}' -# The following link is left just for illustative purposes: +# The following link is left just for illustrative purposes: # https://api.coursera.org/api/memberships.v1?fields=courseId,enrolledTimestamp,grade,id,lastAccessedTimestamp,onDemandSessionMembershipIds,onDemandSessionMemberships,role,v1SessionId,vc,vcMembershipId,courses.v1(courseStatus,display,partnerIds,photoUrl,specializations,startDate,v1Details,v2Details),partners.v1(homeLink,name),v1Details.v1(sessionIds),v1Sessions.v1(active,certificatesReleased,dbEndDate,durationString,hasSigTrack,startDay,startMonth,startYear),v2Details.v1(onDemandSessions,plannedLaunchDate,sessionsEnabledAt),specializations.v1(logo,name,partnerIds,shortName)&includes=courseId,onDemandSessionMemberships,vcMembershipId,courses.v1(partnerIds,specializations,v1Details,v2Details),v1Details.v1(sessionIds),v2Details.v1(onDemandSessions),specializations.v1(partnerIds)&q=me&showHidden=true&filter=current,preEnrolled # Sample reply: # { diff --git a/coursera/extractors.py b/coursera/extractors.py index 7d23b96..46f3683 100644 --- a/coursera/extractors.py +++ b/coursera/extractors.py @@ -51,11 +51,11 @@ class CourseraExtractor(PlatformExtractor): download_notebooks=False): page = self._get_on_demand_syllabus(class_name) - error_occured, modules = self._parse_on_demand_syllabus( + error_occurred, modules = self._parse_on_demand_syllabus( page, reverse, unrestricted_filenames, subtitle_language, video_resolution, download_quizzes, mathjax_cdn_url, download_notebooks) - return error_occured, modules + return error_occurred, modules def _get_on_demand_syllabus(self, class_name): """ @@ -107,7 +107,7 @@ class CourseraExtractor(PlatformExtractor): with open('%s-course-material-items.json' % course_name, 'w') as file_object: json.dump(ondemand_material_items._items, file_object, indent=4) - error_occured = False + error_occurred = False for module in json_modules: module_slug = module['slug'] @@ -135,7 +135,7 @@ class CourseraExtractor(PlatformExtractor): logging.info('Processing lecture %s (%s)', lecture_slug, typename) # Empty dictionary means there were no data - # None means an error occured + # None means an error occurred links = {} if typename == 'lecture': @@ -180,7 +180,7 @@ class CourseraExtractor(PlatformExtractor): continue if links is None: - error_occured = True + error_occurred = True elif links: lectures.append((lecture_slug, links)) @@ -206,7 +206,7 @@ class CourseraExtractor(PlatformExtractor): links = course.extract_links_from_reference(json_reference['shortId']) if links is None: - error_occured = True + error_occurred = True elif links: reference.append(('', links)) @@ -216,4 +216,4 @@ class CourseraExtractor(PlatformExtractor): if references: modules.append(("Resources", references)) - return error_occured, modules + return error_occurred, modules diff --git a/coursera/test/test_api.py b/coursera/test/test_api.py index 063d274..6f50723 100644 --- a/coursera/test/test_api.py +++ b/coursera/test/test_api.py @@ -149,7 +149,7 @@ def test_ondemand_programming_supplement_no_instructions(get_page, course): ('peer-assignment-no-instructions.json', ''), ] ) -def test_ondemand_from_peer_assgnment_instructions( +def test_ondemand_from_peer_assignment_instructions( get_page, course, input_filename, expected_output): instructions = slurp_fixture('json/%s' % input_filename) get_page.return_value = json.loads(instructions) @@ -176,7 +176,7 @@ def test_ondemand_programming_supplement_empty_instructions(get_page, course): output = course.extract_links_from_programming('0') # Make sure that SOME html content has been extracted, but remove - # it immeditely because it's a hassle to properly prepare test input + # it immediately because it's a hassle to properly prepare test input # for it. FIXME later. assert 'html' in output del output['html'] @@ -193,7 +193,7 @@ def test_ondemand_programming_immediate_instructions_empty_instructions( output = course.extract_links_from_programming_immediate_instructions('0') # Make sure that SOME html content has been extracted, but remove - # it immeditely because it's a hassle to properly prepare test input + # it immediately because it's a hassle to properly prepare test input # for it. FIXME later. assert 'html' in output del output['html'] @@ -214,7 +214,7 @@ def test_ondemand_programming_supplement_one_asset(get_page, course): output = course.extract_links_from_programming('0') # Make sure that SOME html content has been extracted, but remove - # it immeditely because it's a hassle to properly prepare test input + # it immediately because it's a hassle to properly prepare test input # for it. FIXME later. assert 'html' in output del output['html'] @@ -249,7 +249,7 @@ def test_ondemand_programming_immediate_instructions_one_asset(get_page, course) output = course.extract_links_from_programming_immediate_instructions('0') # Make sure that SOME html content has been extracted, but remove - # it immeditely because it's a hassle to properly prepare test input + # it immediately because it's a hassle to properly prepare test input # for it. FIXME later. assert 'html' in output del output['html'] @@ -269,7 +269,7 @@ def test_ondemand_programming_supplement_three_assets(get_page, course): output = json.loads(json.dumps(output)) # Make sure that SOME html content has been extracted, but remove - # it immeditely because it's a hassle to properly prepare test input + # it immediately because it's a hassle to properly prepare test input # for it. FIXME later. assert 'html' in output del output['html'] diff --git a/coursera/test/test_parsing.py b/coursera/test/test_parsing.py index 385e506..4eac879 100644 --- a/coursera/test/test_parsing.py +++ b/coursera/test/test_parsing.py @@ -65,7 +65,7 @@ def test_that_we_parse_and_write_json_correctly(get_page, json_path): def get_old_style_video(monkeypatch): pytest.skip() """ - Mock some methods that would, otherwise, create repeateadly many web + Mock some methods that would, otherwise, create repeatedly many web requests. More specifically, we mock: @@ -139,7 +139,7 @@ def test_get_on_demand_supplement_url_accumulates_assets(mocked): output = course.extract_links_from_supplement('element_id') # Make sure that SOME html content has been extracted, but remove - # it immeditely because it's a hassle to properly prepare test input + # it immediately because it's a hassle to properly prepare test input # for it. FIXME later. assert 'html' in output del output['html'] From d17027da41efb418384d70cc484390613ec5f850 Mon Sep 17 00:00:00 2001 From: John Doe Date: Sun, 17 Jun 2018 10:36:53 -0700 Subject: [PATCH 077/105] Typo (#672) --- README.md | 8 ++++---- coursera/cookies.py | 4 ++-- coursera/coursera_dl.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 8d72afc..a0059f8 100644 --- a/README.md +++ b/README.md @@ -120,7 +120,7 @@ particular courses that you want to use with `coursera-dl`. ## Recommended installation method for all Operating Systems -From a command line (preferrably, from a virtual environment), simply issue +From a command line (preferably, from a virtual environment), simply issue the command: pip install coursera-dl @@ -327,7 +327,7 @@ where the script is supposed to be executed, with the following format: #--mathjax-cdn https://cdn.bootcss.com/mathjax/2.7.1/MathJax.js # more other parameters -Parameter which is stored in the file will be overriden if it is again specifed +Parameter which is stored in the file will be overriden if it is again specified in your commandline script **Note:** In `coursera-dl.conf`, all the parameters should not be wrapped @@ -337,11 +337,11 @@ with quotes. In default mode when you interrupt the download process by pressing CTRL+C, partially downloaded files will be deleted from your disk and -you have to start the download process from the begining. If your +you have to start the download process from the beginning. If your download was interrupted by something other than KeyboardInterrupt (CTRL+C) like sudden system crash, partially downloaded files will remain on your disk and the next time you start the process again, -these files will be discraded from download list!, therefore it's your +these files will be discarded from download list!, therefore it's your job to delete them manually before next start. For this reason we added an option called `--resume` which continues your downloads from where they stopped: diff --git a/coursera/cookies.py b/coursera/cookies.py index a66f126..f12ed8f 100644 --- a/coursera/cookies.py +++ b/coursera/cookies.py @@ -25,7 +25,7 @@ from .utils import mkdir_p, random_string # Monkey patch cookielib.Cookie.__init__. # Reason: The expires value may be a decimal string, # but the Cookie class uses int() ... -__orginal_init__ = cookielib.Cookie.__init__ +__original_init__ = cookielib.Cookie.__init__ def __fixed_init__(self, version, name, value, @@ -41,7 +41,7 @@ def __fixed_init__(self, version, name, value, rfc2109=False): if expires is not None: expires = float(expires) - __orginal_init__(self, version, name, value, + __original_init__(self, version, name, value, port, port_specified, domain, domain_specified, domain_initial_dot, path, path_specified, diff --git a/coursera/coursera_dl.py b/coursera/coursera_dl.py index a883b0c..eb019ce 100644 --- a/coursera/coursera_dl.py +++ b/coursera/coursera_dl.py @@ -205,7 +205,7 @@ def download_class(args, class_name): Try to download on-demand class. @return: Tuple of (bool, bool), where the first bool indicates whether - errors occurred while parsing syllabus, the second bool indicaters + errors occurred while parsing syllabus, the second bool indicates whether the course appears to be completed. @rtype: (bool, bool) """ From b0d1cc0cffbfb5516c0febe8ed74e0c09278c3ca Mon Sep 17 00:00:00 2001 From: Richard Decal Date: Wed, 20 Jun 2018 11:27:17 -0700 Subject: [PATCH 078/105] better error msg for incorrect .netrc permissions (#674) --- coursera/credentials.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/coursera/credentials.py b/coursera/credentials.py index d6a36cd..aacd1d1 100644 --- a/coursera/credentials.py +++ b/coursera/credentials.py @@ -134,7 +134,8 @@ def authenticate_through_netrc(path=None): error_messages = '\n'.join(str(e) for e in errors) raise CredentialsError( - 'Did not find valid netrc file:\n' + error_messages) + 'Did not find valid netrc file:\n' + error_messages + + '\nPlease run this command: chmod og-rw ~/.netrc') def get_credentials(username=None, password=None, netrc=None, use_keyring=False): From 3df019a6611d687842849546f6ed57fafd23bb28 Mon Sep 17 00:00:00 2001 From: Yuri Bochkarev Date: Sun, 24 Jun 2018 00:09:31 +0300 Subject: [PATCH 079/105] Move to newer API for syllabus and lecture retrieval ref #665 ref #673 ref #634 --- coursera/api.py | 208 +++++++++++++++++++++++++++++++------- coursera/coursera_dl.py | 9 +- coursera/define.py | 42 ++++++-- coursera/extractors.py | 136 ++++++++++++++----------- coursera/test/test_api.py | 2 +- coursera/utils.py | 15 ++- requirements.txt | 1 + 7 files changed, 305 insertions(+), 108 deletions(-) diff --git a/coursera/api.py b/coursera/api.py index 657e4b5..d1832ba 100644 --- a/coursera/api.py +++ b/coursera/api.py @@ -12,9 +12,11 @@ import logging import time import requests import urllib -from collections import namedtuple + +from collections import namedtuple, OrderedDict from six import iterkeys, iteritems from six.moves.urllib_parse import quote_plus +import attr from .utils import (BeautifulSoup, make_coursera_absolute_url, extend_supplement_links, clean_url, clean_filename, @@ -26,7 +28,10 @@ from .define import (OPENCOURSE_SUPPLEMENT_URL, OPENCOURSE_ASSETS_URL, OPENCOURSE_API_ASSETS_V1_URL, OPENCOURSE_ONDEMAND_COURSE_MATERIALS, - OPENCOURSE_VIDEO_URL, + OPENCOURSE_ONDEMAND_COURSE_MATERIALS_V2, + OPENCOURSE_ONDEMAND_COURSES_V1, + OPENCOURSE_ONDEMAND_LECTURE_VIDEOS_URL, + OPENCOURSE_ONDEMAND_LECTURE_ASSETS_URL, OPENCOURSE_MEMBERSHIPS, OPENCOURSE_REFERENCES_POLL_URL, OPENCOURSE_REFERENCE_ITEM_URL, @@ -278,7 +283,7 @@ class MarkupToHTMLConverter(object): audio.insert_after(controls_tag) -class OnDemandCourseMaterialItems(object): +class OnDemandCourseMaterialItemsV1(object): """ Helper class that allows accessing lecture JSONs by lesson IDs. """ @@ -312,7 +317,7 @@ class OnDemandCourseMaterialItems(object): dom = get_page(session, OPENCOURSE_ONDEMAND_COURSE_MATERIALS, json=True, class_name=course_name) - return OnDemandCourseMaterialItems( + return OnDemandCourseMaterialItemsV1( dom['linked']['onDemandCourseMaterialItems.v1']) def get(self, lesson_id): @@ -408,6 +413,134 @@ class AssetRetriever(object): return result +@attr.s +class ModuleV1(object): + name = attr.ib() + id = attr.ib() + slug = attr.ib() + child_ids = attr.ib() + + def children(self, all_children): + return [all_children[child] for child in self.child_ids] + + +@attr.s +class ModulesV1(object): + children = attr.ib() + + @staticmethod + def from_json(data): + return ModulesV1(OrderedDict( + (item['id'], + ModuleV1(item['name'], + item['id'], + item['slug'], + item['lessonIds'])) + for item in data + )) + + def __getitem__(self, key): + return self.children[key] + + def __iter__(self): + return iter(self.children.values()) + + +@attr.s +class LessonV1(object): + name = attr.ib() + id = attr.ib() + slug = attr.ib() + child_ids = attr.ib() + + def children(self, all_children): + return [all_children[child] for child in self.child_ids] + + +@attr.s +class LessonsV1(object): + children = attr.ib() + + @staticmethod + def from_json(data): + return LessonsV1(OrderedDict( + (item['id'], + LessonV1(item['name'], + item['id'], + item['slug'], + item['itemIds'])) + for item in data + )) + + def __getitem__(self, key): + return self.children[key] + + +@attr.s +class ItemV2(object): + name = attr.ib() + id = attr.ib() + slug = attr.ib() + type_name = attr.ib() + lesson_id = attr.ib() + module_id = attr.ib() + + +@attr.s +class ItemsV2(object): + children = attr.ib() + + @staticmethod + def from_json(data): + return ItemsV2({ + item['id']: + ItemV2(item['name'], + item['id'], + item['slug'], + item['contentSummary']['typeName'], + item['lessonId'], + item['moduleId']) + for item in data + }) + + def __getitem__(self, key): + return self.children[key] + + +@attr.s +class VideoV1(object): + resolution = attr.ib() + mp4_video_url = attr.ib() + + +@attr.s +class VideosV1(object): + children = attr.ib() + + @staticmethod + def from_json(data): + + videos = [VideoV1(resolution, links['mp4VideoUrl']) + for resolution, links + in data['sources']['byResolution'].items()] + videos.sort(key=lambda video: video.resolution, reverse=True) + + videos = OrderedDict( + (video.resolution, video) + for video in videos + ) + return VideosV1(videos) + + def __contains__(self, key): + return key in self.children + + def __getitem__(self, key): + return self.children[key] + + def get_best(self): + return next(iter(self.children.values())) + + class CourseraOnDemand(object): """ This is a class that provides a friendly interface to extract certain @@ -687,9 +820,9 @@ class CourseraOnDemand(object): }) return headers - def extract_links_from_lecture(self, + def extract_links_from_lecture(self, course_id, video_id, subtitle_language='en', - resolution='540p', assets=None): + resolution='540p'): """ Return the download URLs of on-demand course video. @@ -702,18 +835,13 @@ class CourseraOnDemand(object): @param resolution: Preferred video resolution. @type resolution: str - @param assets: List of assets that may present in the video. - @type assets: [str] - @return: @see CourseraOnDemand._extract_links_from_text """ - if assets is None: - assets = [] - try: links = self._extract_videos_and_subtitles_from_lecture( - video_id, subtitle_language, resolution) + course_id, video_id, subtitle_language, resolution) + assets = self._get_lecture_asset_ids(course_id, video_id) assets = self._normalize_assets(assets) extend_supplement_links( links, self._extract_links_from_lecture_assets(assets)) @@ -727,6 +855,17 @@ class CourseraOnDemand(object): 'Could not download lecture %s: %s', video_id, exception) return None + def _get_lecture_asset_ids(self, course_id, video_id): + """ + Obtain a list of asset ids from a lecture. + """ + dom = get_page(self._session, OPENCOURSE_ONDEMAND_LECTURE_ASSETS_URL, + json=True, course_id=course_id, video_id=video_id) + # Note that we extract here "id", not definition -> assetId, as it + # be extracted later. + return [asset['id'] + for asset in dom['linked']['openCourseAssets.v1']] + def _normalize_assets(self, assets): """ Perform asset normalization. For some reason, assets that are sometimes @@ -850,41 +989,34 @@ class CourseraOnDemand(object): return urls def _extract_videos_and_subtitles_from_lecture(self, + course_id, video_id, subtitle_language='en', resolution='540p'): - dom = get_page(self._session, OPENCOURSE_VIDEO_URL, - json=True, - video_id=video_id) - logging.debug('Parsing JSON for video_id <%s>.', video_id) + + dom = get_page(self._session, OPENCOURSE_ONDEMAND_LECTURE_VIDEOS_URL, + json=True, + course_id=course_id, + video_id=video_id) + dom = dom['linked']['onDemandVideos.v1'][0] + + videos = VideosV1.from_json(dom) video_content = {} - # videos - logging.debug('Gathering video URLs for video_id <%s>.', video_id) - sources = dom['sources'] - sources.sort(key=lambda src: src['resolution']) - sources.reverse() - - # Try to select resolution requested by the user. - filtered_sources = [source - for source in sources - if source['resolution'] == resolution] - - if len(filtered_sources) == 0: - # We will just use the 'vanilla' version of sources here, instead of - # filtered_sources. - logging.warning('Requested resolution %s not available for <%s>. ' - 'Downloading highest resolution available instead.', - resolution, video_id) - else: + if resolution in videos: + source = videos[resolution] logging.debug('Proceeding with download of resolution %s of <%s>.', resolution, video_id) - sources = filtered_sources + else: + source = videos.get_best() + logging.warning( + 'Requested resolution %s not available for <%s>. ' + 'Downloading highest resolution (%s) available instead.', + resolution, video_id, source.resolution) - video_url = sources[0]['formatSources']['video/mp4'] - video_content['mp4'] = video_url + video_content['mp4'] = source.mp4_video_url subtitle_link = self._extract_subtitles_from_video_dom( dom, subtitle_language, video_id) diff --git a/coursera/coursera_dl.py b/coursera/coursera_dl.py index 4c20689..a389106 100644 --- a/coursera/coursera_dl.py +++ b/coursera/coursera_dl.py @@ -67,7 +67,8 @@ from .workflow import CourseraDownloader from .parallel import ConsecutiveDownloader, ParallelDownloader from .utils import (clean_filename, get_anchor_format, mkdir_p, fix_url, print_ssl_error_message, - decode_input, BeautifulSoup, is_debug_run) + decode_input, BeautifulSoup, is_debug_run, + spit_json, slurp_json) from .network import get_page, get_page_and_url from .commandline import parse_args @@ -126,8 +127,7 @@ def download_on_demand_class(args, class_name): cached_syllabus_filename = '%s-syllabus-parsed.json' % class_name if args.cache_syllabus and os.path.isfile(cached_syllabus_filename): - with open(cached_syllabus_filename) as syllabus_file: - modules = json.load(syllabus_file) + modules = slurp_json(cached_syllabus_filename) else: error_occured, modules = extractor.get_modules( class_name, @@ -141,8 +141,7 @@ def download_on_demand_class(args, class_name): ) if is_debug_run or args.cache_syllabus(): - with open(cached_syllabus_filename, 'w') as file_object: - json.dump(modules, file_object, indent=4) + spit_json(modules, cached_syllabus_filename) if args.only_syllabus: return error_occured, False diff --git a/coursera/define.py b/coursera/define.py index e2ec105..ff8c51e 100644 --- a/coursera/define.py +++ b/coursera/define.py @@ -61,8 +61,10 @@ OPENCOURSE_LIST_COURSES = 'https://api.coursera.org/api/courses.v1?q=watchlist&s # } # } OPENCOURSE_MEMBERSHIPS = 'https://api.coursera.org/api/memberships.v1?includes=courseId,courses.v1&q=me&showHidden=true&filter=current,preEnrolled' -OPENCOURSE_CONTENT_URL = 'https://api.coursera.org/api/opencourse.v1/course/{class_name}?showLockedItems=true' -OPENCOURSE_VIDEO_URL = 'https://api.coursera.org/api/opencourse.v1/video/{video_id}' +OPENCOURSE_ONDEMAND_LECTURE_VIDEOS_URL = \ + 'https://api.coursera.org/api/onDemandLectureVideos.v1/'\ + '{course_id}~{video_id}?includes=video&'\ + 'fields=onDemandVideos.v1(sources%2Csubtitles%2CsubtitlesVtt%2CsubtitlesTxt)' OPENCOURSE_SUPPLEMENT_URL = 'https://api.coursera.org/api/onDemandSupplements.v1/'\ '{course_id}~{element_id}?includes=asset&fields=openCourseAssets.v1%28typeName%29,openCourseAssets.v1%28definition%29' OPENCOURSE_PROGRAMMING_ASSIGNMENTS_URL = \ @@ -97,6 +99,23 @@ OPENCOURSE_REFERENCE_ITEM_URL = \ OPENCOURSE_ASSET_URL = \ 'https://api.coursera.org/api/assetUrls.v1?ids={ids}' +# Sample response: +# "linked": { +# "openCourseAssets.v1": [ +# { +# "typeName": "asset", +# "definition": { +# "assetId": "fytYX5rYEeedWRLokafKRg", +# "name": "Lecture slides" +# }, +# "id": "j6g7VZrYEeeUVgpv-dYMig" +# } +# ] +# } +OPENCOURSE_ONDEMAND_LECTURE_ASSETS_URL = \ + 'https://api.coursera.org/api/onDemandLectureAssets.v1/'\ + '{course_id}~{video_id}/?includes=openCourseAssets' + # These ids are provided in lecture json: # # { @@ -170,9 +189,20 @@ OPENCOURSE_API_ASSETS_V1_URL = \ OPENCOURSE_ONDEMAND_COURSE_MATERIALS = \ 'https://api.coursera.org/api/onDemandCourseMaterials.v1/?'\ - 'q=slug&slug={class_name}&includes=moduleIds%2ClessonIds%2CpassableItemGroups%2CpassableItemGroupChoices%2CpassableLessonElements%2CitemIds%2Ctracks'\ - '&fields=moduleIds%2ConDemandCourseMaterialModules.v1(name%2Cslug%2Cdescription%2CtimeCommitment%2ClessonIds%2Coptional)%2ConDemandCourseMaterialLessons.v1(name%2Cslug%2CtimeCommitment%2CelementIds%2Coptional%2CtrackId)%2ConDemandCourseMaterialPassableItemGroups.v1(requiredPassedCount%2CpassableItemGroupChoiceIds%2CtrackId)%2ConDemandCourseMaterialPassableItemGroupChoices.v1(name%2Cdescription%2CitemIds)%2ConDemandCourseMaterialPassableLessonElements.v1(gradingWeight)%2ConDemandCourseMaterialItems.v1(name%2Cslug%2CtimeCommitment%2Ccontent%2CisLocked%2ClockableByItem%2CitemLockedReasonCode%2CtrackId)%2ConDemandCourseMaterialTracks.v1(passablesCount)'\ - '&showLockedItems=true' + 'q=slug&slug={class_name}&includes=moduleIds%2ClessonIds%2CpassableItemGroups%2CpassableItemGroupChoices%2CpassableLessonElements%2CitemIds%2Ctracks'\ + '&fields=moduleIds%2ConDemandCourseMaterialModules.v1(name%2Cslug%2Cdescription%2CtimeCommitment%2ClessonIds%2Coptional)%2ConDemandCourseMaterialLessons.v1(name%2Cslug%2CtimeCommitment%2CelementIds%2Coptional%2CtrackId)%2ConDemandCourseMaterialPassableItemGroups.v1(requiredPassedCount%2CpassableItemGroupChoiceIds%2CtrackId)%2ConDemandCourseMaterialPassableItemGroupChoices.v1(name%2Cdescription%2CitemIds)%2ConDemandCourseMaterialPassableLessonElements.v1(gradingWeight)%2ConDemandCourseMaterialItems.v1(name%2Cslug%2CtimeCommitment%2Ccontent%2CisLocked%2ClockableByItem%2CitemLockedReasonCode%2CtrackId)%2ConDemandCourseMaterialTracks.v1(passablesCount)'\ + '&showLockedItems=true' + +OPENCOURSE_ONDEMAND_COURSE_MATERIALS_V2 = \ + 'https://api.coursera.org/api/onDemandCourseMaterials.v2/?q=slug&slug={class_name}'\ + '&includes=modules%2Clessons%2CpassableItemGroups%2CpassableItemGroupChoices%2CpassableLessonElements%2Citems%2Ctracks%2CgradePolicy&'\ + '&fields=moduleIds%2ConDemandCourseMaterialModules.v1(name%2Cslug%2Cdescription%2CtimeCommitment%2ClessonIds%2Coptional%2ClearningObjectives)%2ConDemandCourseMaterialLessons.v1(name%2Cslug%2CtimeCommitment%2CelementIds%2Coptional%2CtrackId)%2ConDemandCourseMaterialPassableItemGroups.v1(requiredPassedCount%2CpassableItemGroupChoiceIds%2CtrackId)%2ConDemandCourseMaterialPassableItemGroupChoices.v1(name%2Cdescription%2CitemIds)%2ConDemandCourseMaterialPassableLessonElements.v1(gradingWeight%2CisRequiredForPassing)%2ConDemandCourseMaterialItems.v2(name%2Cslug%2CtimeCommitment%2CcontentSummary%2CisLocked%2ClockableByItem%2CitemLockedReasonCode%2CtrackId%2ClockedStatus%2CitemLockSummary)%2ConDemandCourseMaterialTracks.v1(passablesCount)'\ + '&showLockedItems=true' + +OPENCOURSE_ONDEMAND_COURSES_V1 = \ + 'https://api.coursera.org/api/onDemandCourses.v1?q=slug&slug={class_name}&'\ + 'includes=instructorIds%2CpartnerIds%2C_links&'\ + 'fields=brandingImage%2CcertificatePurchaseEnabledAt%2Cpartners.v1(squareLogo%2CrectangularLogo)%2Cinstructors.v1(fullName)%2CoverridePartnerLogos%2CsessionsEnabledAt%2CdomainTypes%2CpremiumExperienceVariant%2CisRestrictedMembership' ABOUT_URL = ('https://api.coursera.org/api/catalog.v1/courses?' 'fields=largeIcon,photo,previewLink,shortDescription,smallIcon,' @@ -924,7 +954,7 @@ pre {