mirror of
https://github.com/coursera-dl/coursera-dl.git
synced 2026-01-23 02:35:37 +00:00
Now specialization names can be passed and they will be expanded: corresponding child classes will be downloaded.
108 lines
2.9 KiB
Python
108 lines
2.9 KiB
Python
"""
|
|
This module contains utility functions that operate on the network, download
|
|
some data and so on.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
|
|
import requests
|
|
|
|
|
|
def get_reply(session, url, post=False, data=None, headers=None, quiet=False):
|
|
"""
|
|
Download an HTML page using the requests session. Low-level function
|
|
that allows for flexible request configuration.
|
|
|
|
@param session: Requests session.
|
|
@type session: requests.Session
|
|
|
|
@param url: URL pattern with optional keywords to format.
|
|
@type url: str
|
|
|
|
@param post: Flag that indicates whether POST request should be sent.
|
|
@type post: bool
|
|
|
|
@param data: Payload data that is sent with request (in request body).
|
|
@type data: object
|
|
|
|
@param headers: Additional headers to send with request.
|
|
@type headers: dict
|
|
|
|
@param quiet: Flag that tells whether to print error message when status
|
|
code != 200.
|
|
@type quiet: bool
|
|
|
|
@return: Requests response.
|
|
@rtype: requests.Response
|
|
"""
|
|
|
|
request_headers = {} if headers is None else headers
|
|
|
|
request = requests.Request('POST' if post else 'GET',
|
|
url,
|
|
data=data,
|
|
headers=request_headers)
|
|
prepared_request = session.prepare_request(request)
|
|
|
|
reply = session.send(prepared_request)
|
|
|
|
try:
|
|
reply.raise_for_status()
|
|
except requests.exceptions.HTTPError as e:
|
|
if not quiet:
|
|
logging.error("Error %s getting page %s", e, url)
|
|
logging.error("The server replied: %s", reply.text)
|
|
raise
|
|
|
|
return reply
|
|
|
|
|
|
def get_page(session,
|
|
url,
|
|
json=False,
|
|
post=False,
|
|
data=None,
|
|
headers=None,
|
|
quiet=False,
|
|
**kwargs):
|
|
"""
|
|
Download an HTML page using the requests session.
|
|
|
|
@param session: Requests session.
|
|
@type session: requests.Session
|
|
|
|
@param url: URL pattern with optional keywords to format.
|
|
@type url: str
|
|
|
|
@param post: Flag that indicates whether POST request should be sent.
|
|
@type post: bool
|
|
|
|
@param data: Payload data that is sent with request (in request body).
|
|
@type data: object
|
|
|
|
@param headers: Additional headers to send with request.
|
|
@type headers: dict
|
|
|
|
@return: Response body.
|
|
@rtype: str
|
|
"""
|
|
url = url.format(**kwargs)
|
|
reply = get_reply(session, url, post=post, data=data, headers=headers,
|
|
quiet=quiet)
|
|
return reply.json() if json else reply.text
|
|
|
|
|
|
def get_page_and_url(session, url):
|
|
"""
|
|
Download an HTML page using the requests session and return
|
|
the final URL after following redirects.
|
|
"""
|
|
reply = get_reply(session, url)
|
|
return reply.text, reply.url
|
|
|
|
|
|
def post_page_and_reply(session, url, data=None, headers=None, **kwargs):
|
|
url = url.format(**kwargs)
|
|
reply = get_reply(session, url, post=True, data=data, headers=headers)
|
|
return reply.text, reply
|