"""
RTFM:
* http://docs.python.org/2/library/cookielib.html#cookie-objects
Some code got from
https://github.com/kennethreitz/requests/blob/master/requests/cookies.py
"""
import json
from six.moves.http_cookiejar import CookieJar, Cookie
from six.moves.urllib.parse import urlparse, urlunparse
from grab.error import GrabMisuseError
COOKIE_ATTRS = (
"name",
"value",
"version",
"port",
"domain",
"path",
"secure",
"expires",
"discard",
"comment",
"comment_url",
"rfc2109",
)
# Source:
# https://github.com/kennethreitz/requests/blob/master/requests/cookies.py
class MockRequest(object):
"""Wraps a `requests.Request` to mimic a `urllib2.Request`.
The code in `cookielib.CookieJar` expects this interface in order to
correctly manage cookie policies, i.e., determine whether a cookie can be
set, given the domains of the request and the cookie.
The original request object is read-only. The client is responsible for
collecting the new headers via `get_new_headers()` and interpreting them
appropriately. You probably want `get_cookie_header`, defined below.
"""
def __init__(self, request):
self._req = request
self._new_headers = {}
self.type = urlparse(self._req.url).scheme
def get_type(self):
return self.type
def get_host(self):
return urlparse(self._req.url).netloc
def get_origin_req_host(self):
return self.get_host()
def get_full_url(self):
# Only return the response's URL if the user hadn't set the Host
# header
if not self._req.headers.get("Host"):
return self._req.url
# If they did set it, retrieve it and reconstruct the expected domain
host = self._req.headers["Host"]
parsed = urlparse(self._req.url)
# Reconstruct the URL as we expect it
return urlunparse(
[
parsed.scheme,
host,
parsed.path,
parsed.params,
parsed.query,
parsed.fragment,
]
)
def is_unverifiable(self):
return True
def has_header(self, name):
return name in self._req.headers or name in self._new_headers
def get_header(self, name, default=None):
return self._req.headers.get(name, self._new_headers.get(name, default))
def add_header(self, key, val):
"""
cookielib has no legitimate use for this method;
add it back if you find one.
"""
raise NotImplementedError(
"Cookie headers should be added" " with add_unredirected_header()"
)
def add_unredirected_header(self, name, value):
self._new_headers[name] = value
def get_new_headers(self):
return self._new_headers
@property
def unverifiable(self):
return self.is_unverifiable()
@property
def origin_req_host(self):
return self.get_origin_req_host()
@property
def host(self):
return self.get_host()
# https://github.com/kennethreitz/requests/blob/master/requests/cookies.py
class MockResponse(object):
"""Wraps a `httplib.HTTPMessage` to mimic a `urllib.addinfourl`.
...what? Basically, expose the parsed HTTP headers from the server response
the way `cookielib` expects to see them.
"""
def __init__(self, headers):
"""Make a MockResponse for `cookielib` to read.
:param headers: a httplib.HTTPMessage or analogous carrying the headers
"""
self._headers = headers
def info(self):
return self._headers
def getheaders(self, name):
self._headers.getheaders(name)
[docs]def create_cookie(name, value, domain, httponly=None, **kwargs):
"""Creates `cookielib.Cookie` instance"""
if domain == "localhost":
domain = ""
config = dict(
name=name,
value=value,
version=0,
port=None,
domain=domain,
path="/",
secure=False,
expires=None,
discard=True,
comment=None,
comment_url=None,
rfc2109=False,
rest={"HttpOnly": httponly},
)
for key in kwargs:
if key not in config:
raise GrabMisuseError(
"Function `create_cookie` does not accept " "`%s` argument" % key
)
config.update(**kwargs)
config["rest"]["HttpOnly"] = httponly
config["port_specified"] = bool(config["port"])
config["domain_specified"] = bool(config["domain"])
# pytype: disable=attribute-error
config["domain_initial_dot"] = (config["domain"] or "").startswith(".")
# pytype: enable=attribute-error
config["path_specified"] = bool(config["path"])
return Cookie(**config)
[docs]class CookieManager(object):
"""
Each Grab instance has `cookies` attribute that is instance of
`CookieManager` class.
That class contains helpful methods to create, load, save cookies from/to
different places.
"""
__slots__ = ("cookiejar",)
[docs] def __init__(self, cookiejar=None):
if cookiejar is not None:
self.cookiejar = cookiejar
else:
self.cookiejar = CookieJar()
# self.disable_cookiejar_lock(self.cookiejar)
# def disable_cookiejar_lock(self, cj):
# cj._cookies_lock = dummy_threading.RLock()
[docs] def set(self, name, value, domain, **kwargs):
"""Add new cookie or replace existing cookie with same parameters.
:param name: name of cookie
:param value: value of cookie
:param kwargs: extra attributes of cookie
"""
if domain == "localhost":
domain = ""
self.cookiejar.set_cookie(create_cookie(name, value, domain, **kwargs))
[docs] def update(self, cookies):
if isinstance(cookies, CookieJar):
for cookie in cookies:
self.cookiejar.set_cookie(cookie)
elif isinstance(cookies, CookieManager):
for cookie in cookies.cookiejar:
self.cookiejar.set_cookie(cookie)
else:
raise GrabMisuseError(
"Unknown type of cookies argument: %s" % type(cookies)
)
@classmethod
def from_cookie_list(cls, clist):
jar = CookieJar()
for cookie in clist:
jar.set_cookie(cookie)
return cls(jar)
[docs] def clear(self):
self.cookiejar = CookieJar()
def __getstate__(self):
state = {}
for cls in type(self).mro():
cls_slots = getattr(cls, "__slots__", ())
for slot in cls_slots:
if slot != "__weakref__":
if hasattr(self, slot):
state[slot] = getattr(self, slot)
state["_cookiejar_cookies"] = list(self.cookiejar)
del state["cookiejar"]
return state
def __setstate__(self, state):
state["cookiejar"] = CookieJar()
for cookie in state["_cookiejar_cookies"]:
state["cookiejar"].set_cookie(cookie)
del state["_cookiejar_cookies"]
for slot, value in state.items():
setattr(self, slot, value)
[docs] def __getitem__(self, key):
for cookie in self.cookiejar:
if cookie.name == key:
return cookie.value
raise KeyError
[docs] def items(self):
res = []
for cookie in self.cookiejar:
res.append((cookie.name, cookie.value))
return res
[docs] def load_from_file(self, path):
"""
Load cookies from the file.
Content of file should be a JSON-serialized list of dicts.
"""
with open(path, encoding="utf-8") as inf:
data = inf.read()
if data:
items = json.loads(data)
else:
items = {}
for item in items:
extra = dict(
(x, y) for x, y in item.items() if x not in ["name", "value", "domain"]
)
self.set(item["name"], item["value"], item["domain"], **extra)
[docs] def get_dict(self):
res = []
for cookie in self.cookiejar:
res.append(dict((x, getattr(cookie, x)) for x in COOKIE_ATTRS))
return res
[docs] def save_to_file(self, path):
"""
Dump all cookies to file.
Cookies are dumped as JSON-serialized dict of keys and values.
"""
with open(path, "w", encoding="utf-8") as out:
out.write(json.dumps(self.get_dict()))
def get_cookie_header(self, req):
"""
:param req: object with httplib.Request interface
Actually, it have to have `url` and `headers` attributes
"""
mocked_req = MockRequest(req)
self.cookiejar.add_cookie_header(mocked_req)
return mocked_req.get_new_headers().get("Cookie")