# -*- test-case-name: twisted.python.test.test_urlpath -*- # Copyright (c) Twisted Matrix Laboratories. # See LICENSE for details. """ L{URLPath}, a representation of a URL. """ from __future__ import division, absolute_import from twisted.python.compat import ( nativeString, unicode, urllib_parse as urlparse, urlunquote, urlquote ) from hyperlink import URL as _URL _allascii = b"".join([chr(x).encode('ascii') for x in range(1, 128)]) def _rereconstituter(name): """ Attriute declaration to preserve mutability on L{URLPath}. @param name: a public attribute name @type name: native L{str} @return: a descriptor which retrieves the private version of the attribute on get and calls rerealize on set. """ privateName = nativeString("_") + name return property( lambda self: getattr(self, privateName), lambda self, value: (setattr(self, privateName, value if isinstance(value, bytes) else value.encode("charmap")) or self._reconstitute()) ) class URLPath(object): """ A representation of a URL. @ivar scheme: The scheme of the URL (e.g. 'http'). @type scheme: L{bytes} @ivar netloc: The network location ("host"). @type netloc: L{bytes} @ivar path: The path on the network location. @type path: L{bytes} @ivar query: The query argument (the portion after ? in the URL). @type query: L{bytes} @ivar fragment: The page fragment (the portion after # in the URL). @type fragment: L{bytes} """ def __init__(self, scheme=b'', netloc=b'localhost', path=b'', query=b'', fragment=b''): self._scheme = scheme or b'http' self._netloc = netloc self._path = path or b'/' self._query = query self._fragment = fragment self._reconstitute() def _reconstitute(self): """ Reconstitute this L{URLPath} from all its given attributes. """ urltext = urlquote( urlparse.urlunsplit((self._scheme, self._netloc, self._path, self._query, self._fragment)), safe=_allascii ) self._url = _URL.fromText(urltext.encode("ascii").decode("ascii")) scheme = _rereconstituter("scheme") netloc = _rereconstituter("netloc") path = _rereconstituter("path") query = _rereconstituter("query") fragment = _rereconstituter("fragment") @classmethod def _fromURL(cls, urlInstance): """ Reconstruct all the public instance variables of this L{URLPath} from its underlying L{_URL}. @param urlInstance: the object to base this L{URLPath} on. @type urlInstance: L{_URL} @return: a new L{URLPath} """ self = cls.__new__(cls) self._url = urlInstance.replace(path=urlInstance.path or [u""]) self._scheme = self._url.scheme.encode("ascii") self._netloc = self._url.authority().encode("ascii") self._path = (_URL(path=self._url.path, rooted=True).asURI().asText() .encode("ascii")) self._query = (_URL(query=self._url.query).asURI().asText() .encode("ascii"))[1:] self._fragment = self._url.fragment.encode("ascii") return self def pathList(self, unquote=False, copy=True): """ Split this URL's path into its components. @param unquote: whether to remove %-encoding from the returned strings. @param copy: (ignored, do not use) @return: The components of C{self.path} @rtype: L{list} of L{bytes} """ segments = self._url.path mapper = lambda x: x.encode("ascii") if unquote: mapper = (lambda x, m=mapper: m(urlunquote(x))) return [b''] + [mapper(segment) for segment in segments] @classmethod def fromString(klass, url): """ Make a L{URLPath} from a L{str} or L{unicode}. @param url: A L{str} representation of a URL. @type url: L{str} or L{unicode}. @return: a new L{URLPath} derived from the given string. @rtype: L{URLPath} """ if not isinstance(url, (str, unicode)): raise ValueError("'url' must be a str or unicode") if isinstance(url, bytes): # On Python 2, accepting 'str' (for compatibility) means we might # get 'bytes'. On py3, this will not work with bytes due to the # check above. return klass.fromBytes(url) return klass._fromURL(_URL.fromText(url)) @classmethod def fromBytes(klass, url): """ Make a L{URLPath} from a L{bytes}. @param url: A L{bytes} representation of a URL. @type url: L{bytes} @return: a new L{URLPath} derived from the given L{bytes}. @rtype: L{URLPath} @since: 15.4 """ if not isinstance(url, bytes): raise ValueError("'url' must be bytes") quoted = urlquote(url, safe=_allascii) if isinstance(quoted, bytes): # This will only be bytes on python 2, where we can transform it # into unicode. On python 3, urlquote always returns str. quoted = quoted.decode("ascii") return klass.fromString(quoted) @classmethod def fromRequest(klass, request): """ Make a L{URLPath} from a L{twisted.web.http.Request}. @param request: A L{twisted.web.http.Request} to make the L{URLPath} from. @return: a new L{URLPath} derived from the given request. @rtype: L{URLPath} """ return klass.fromBytes(request.prePathURL()) def _mod(self, newURL, keepQuery): """ Return a modified copy of C{self} using C{newURL}, keeping the query string if C{keepQuery} is C{True}. @param newURL: a L{URL} to derive a new L{URLPath} from @type newURL: L{URL} @param keepQuery: if C{True}, preserve the query parameters from C{self} on the new L{URLPath}; if C{False}, give the new L{URLPath} no query parameters. @type keepQuery: L{bool} @return: a new L{URLPath} """ return self._fromURL(newURL.replace( fragment=u'', query=self._url.query if keepQuery else () )) def sibling(self, path, keepQuery=False): """ Get the sibling of the current L{URLPath}. A sibling is a file which is in the same directory as the current file. @param path: The path of the sibling. @type path: L{bytes} @param keepQuery: Whether to keep the query parameters on the returned L{URLPath}. @type: keepQuery: L{bool} @return: a new L{URLPath} """ return self._mod(self._url.sibling(path.decode("ascii")), keepQuery) def child(self, path, keepQuery=False): """ Get the child of this L{URLPath}. @param path: The path of the child. @type path: L{bytes} @param keepQuery: Whether to keep the query parameters on the returned L{URLPath}. @type: keepQuery: L{bool} @return: a new L{URLPath} """ return self._mod(self._url.child(path.decode("ascii")), keepQuery) def parent(self, keepQuery=False): """ Get the parent directory of this L{URLPath}. @param keepQuery: Whether to keep the query parameters on the returned L{URLPath}. @type: keepQuery: L{bool} @return: a new L{URLPath} """ return self._mod(self._url.click(u".."), keepQuery) def here(self, keepQuery=False): """ Get the current directory of this L{URLPath}. @param keepQuery: Whether to keep the query parameters on the returned L{URLPath}. @type: keepQuery: L{bool} @return: a new L{URLPath} """ return self._mod(self._url.click(u"."), keepQuery) def click(self, st): """ Return a path which is the URL where a browser would presumably take you if you clicked on a link with an HREF as given. @param st: A relative URL, to be interpreted relative to C{self} as the base URL. @type st: L{bytes} @return: a new L{URLPath} """ return self._fromURL(self._url.click(st.decode("ascii"))) def __str__(self): """ The L{str} of a L{URLPath} is its URL text. """ return nativeString(self._url.asURI().asText()) def __repr__(self): """ The L{repr} of a L{URLPath} is an eval-able expression which will construct a similar L{URLPath}. """ return ('URLPath(scheme=%r, netloc=%r, path=%r, query=%r, fragment=%r)' % (self.scheme, self.netloc, self.path, self.query, self.fragment))