From e6233e1ff3f8c40311928d66b9243825d6368246 Mon Sep 17 00:00:00 2001 From: eggplants Date: Tue, 15 Mar 2022 03:55:51 +0900 Subject: [PATCH 1/3] fix: judge query type more exactly --- SPARQLWrapper/Wrapper.py | 87 ++++++++++------------------------------ 1 file changed, 22 insertions(+), 65 deletions(-) diff --git a/SPARQLWrapper/Wrapper.py b/SPARQLWrapper/Wrapper.py index 977380a..ff2f0be 100644 --- a/SPARQLWrapper/Wrapper.py +++ b/SPARQLWrapper/Wrapper.py @@ -30,27 +30,22 @@ import urllib.request import warnings from http.client import HTTPResponse -from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, Union, cast -from urllib.request import ( - urlopen as urlopener, -) # don't change the name: tests override it +from typing import (TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, + Union, cast) +from urllib.request import \ + urlopen as urlopener # don't change the name: tests override it from xml.dom.minidom import Document, parse +from rdflib.plugins.sparql import parser + from SPARQLWrapper import __agent__ if TYPE_CHECKING: from rdflib import Graph - - from .KeyCaseInsensitiveDict import KeyCaseInsensitiveDict -from .SPARQLExceptions import ( - EndPointInternalError, - EndPointNotFound, - QueryBadFormed, - Unauthorized, - URITooLong, -) +from .SPARQLExceptions import (EndPointInternalError, EndPointNotFound, + QueryBadFormed, Unauthorized, URITooLong) # alias @@ -131,7 +126,6 @@ MOVE, ADD, ] - # Possible methods to perform requests URLENCODED = "urlencoded" """to be used to set **URL encode** as the encoding method for the request. @@ -248,28 +242,8 @@ class SPARQLWrapper(object): :ivar _defaultReturnFormat: The default return format. It is used in case the same class instance is reused for subsequent queries. :vartype _defaultReturnFormat: string - - :cvar prefix_pattern: regular expression used to remove base/prefixes in the process of determining the query type. - :vartype prefix_pattern: :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module of Python - :cvar pattern: regular expression used to determine whether a query (without base/prefixes) is of type - :data:`CONSTRUCT`, :data:`SELECT`, :data:`ASK`, :data:`DESCRIBE`, :data:`INSERT`, :data:`DELETE`, :data:`CREATE`, - :data:`CLEAR`, :data:`DROP`, :data:`LOAD`, :data:`COPY`, :data:`MOVE` or :data:`ADD`. - :vartype pattern: :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module of Python - :cvar comments_pattern: regular expression used to remove comments from a query. - :vartype comments_pattern: :class:`re.RegexObject`, a compiled regular expression. See the :mod:`re` module of - Python """ - prefix_pattern = re.compile( - r"((?P(\s*BASE\s*<.*?>)\s*)|(?P(\s*PREFIX\s+.+:\s*<.*?>)\s*))*" - ) - # Maybe the future name could be queryType_pattern - pattern = re.compile( - r"(?P(CONSTRUCT|SELECT|ASK|DESCRIBE|INSERT|DELETE|CREATE|CLEAR|DROP|LOAD|COPY|MOVE|ADD))", - re.VERBOSE | re.IGNORECASE, - ) - comments_pattern = re.compile(r"(^|\n)\s*#.*?\n") - def __init__( self, endpoint: str, @@ -594,7 +568,7 @@ def setQuery(self, query: Union[str, bytes]) -> None: self.queryString = query self.queryType = self._parseQueryType(query) - def _parseQueryType(self, query: str) -> Optional[str]: + def _parseQueryType(self, query: str) -> str: """ Internal method for parsing the SPARQL query and return its type (ie, :data:`SELECT`, :data:`ASK`, etc). @@ -609,28 +583,21 @@ def _parseQueryType(self, query: str) -> Optional[str]: :return: the type of SPARQL query (aka SPARQL query form). :rtype: string """ - try: - query = ( - query if (isinstance(query, str)) else query.encode("ascii", "ignore") - ) - query = self._cleanComments(query) - query_for_queryType = re.sub(self.prefix_pattern, "", query.strip()) - # type error: Item "None" of "Optional[Match[str]]" has no attribute "group" - r_queryType = ( - self.pattern.search(query_for_queryType).group("queryType").upper() # type: ignore[union-attr] - ) - except AttributeError: - warnings.warn( - "not detected query type for query '%r'" % query.replace("\n", " "), - RuntimeWarning, - ) - r_queryType = None + query = query if (isinstance(query, str)) else query.encode("ascii", "ignore") + tokens = parser.parseQuery(query) # type: ignore[no-untyped-call] + r_queryTypes: List[str] = [ + token.name.upper().replace("QUERY", "") + for token in tokens + if any([t in token.name for t in _allowedQueryTypes]) + ] - if r_queryType in _allowedQueryTypes: - return r_queryType + if len(r_queryTypes) > 0: + return r_queryTypes[0] else: # raise Exception("Illegal SPARQL Query; must be one of SELECT, ASK, DESCRIBE, or CONSTRUCT") - warnings.warn("unknown query type '%s'" % r_queryType, RuntimeWarning) + warnings.warn( + "query type is not allowed or cannot be detected", RuntimeWarning + ) return SELECT def setMethod(self, method: str) -> None: @@ -690,17 +657,6 @@ def isSparqlQueryRequest(self) -> bool: """ return not self.isSparqlUpdateRequest() - def _cleanComments(self, query: str) -> str: - """Internal method for returning the query after all occurrence of singleline comments are removed - (issues #32 and #77). - - :param query: The query. - :type query: string - :return: the query after all occurrence of singleline comments are removed. - :rtype: string - """ - return re.sub(self.comments_pattern, "\n\n", query) - def _getRequestEncodedParameters( self, query: Optional[Tuple[str, str]] = None ) -> str: @@ -1083,6 +1039,7 @@ def _convertRDF(self) -> "Graph": :rtype: :class:`rdflib.graph.Graph` """ from rdflib import ConjunctiveGraph + retval = ConjunctiveGraph() retval.parse(self.response, format="xml") # type: ignore[no-untyped-call] return retval From 9dedfbaf43b0c40601f1910fd0283a6cd0cff18d Mon Sep 17 00:00:00 2001 From: eggplants Date: Tue, 15 Mar 2022 05:54:41 +0900 Subject: [PATCH 2/3] fix --- SPARQLWrapper/Wrapper.py | 83 +++++++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 13 deletions(-) diff --git a/SPARQLWrapper/Wrapper.py b/SPARQLWrapper/Wrapper.py index ff2f0be..d97dd73 100644 --- a/SPARQLWrapper/Wrapper.py +++ b/SPARQLWrapper/Wrapper.py @@ -30,10 +30,20 @@ import urllib.request import warnings from http.client import HTTPResponse -from typing import (TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple, - Union, cast) -from urllib.request import \ - urlopen as urlopener # don't change the name: tests override it +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Iterator, + List, + Optional, + Tuple, + Union, + cast, +) +from urllib.request import ( + urlopen as urlopener, +) # don't change the name: tests override it from xml.dom.minidom import Document, parse from rdflib.plugins.sparql import parser @@ -44,8 +54,13 @@ from rdflib import Graph from .KeyCaseInsensitiveDict import KeyCaseInsensitiveDict -from .SPARQLExceptions import (EndPointInternalError, EndPointNotFound, - QueryBadFormed, Unauthorized, URITooLong) +from .SPARQLExceptions import ( + EndPointInternalError, + EndPointNotFound, + QueryBadFormed, + Unauthorized, + URITooLong, +) # alias @@ -244,6 +259,8 @@ class SPARQLWrapper(object): :vartype _defaultReturnFormat: string """ + comments_pattern = re.compile(r"(^|\n)\s*#.*?\n") + def __init__( self, endpoint: str, @@ -584,15 +601,45 @@ def _parseQueryType(self, query: str) -> str: :rtype: string """ query = query if (isinstance(query, str)) else query.encode("ascii", "ignore") - tokens = parser.parseQuery(query) # type: ignore[no-untyped-call] - r_queryTypes: List[str] = [ - token.name.upper().replace("QUERY", "") - for token in tokens - if any([t in token.name for t in _allowedQueryTypes]) - ] + tokens = None + r_queryTypes = [] + try: + tokens = [ + token.name.upper().replace("QUERY", "") + for token in parser.parseQuery(query) + ] # type: ignore[no-untyped-call] + r_queryTypes = [ + token + for token in tokens + if any([t == token for t in _allowedQueryTypes]) + ] + except Exception: + try: + tokens = [ + token.name.upper() + .replace("DATA", "") + .replace("WHERE", "") + .replace("CLAUSE", "") + for token in parser.parseUpdate(query).get("request", []) + ] # type: ignore[no-untyped-call] + r_queryTypes = [ + token + for token in tokens + if any([t == token for t in _allowedQueryTypes]) + ] + + except Exception as e: + warnings.warn( + ( + "not detected query type for query '%r' " + % query.replace("\n", " ") + + "(%s)" % e + ), + RuntimeWarning, + ) if len(r_queryTypes) > 0: - return r_queryTypes[0] + return str(r_queryTypes[0]) else: # raise Exception("Illegal SPARQL Query; must be one of SELECT, ASK, DESCRIBE, or CONSTRUCT") warnings.warn( @@ -657,6 +704,16 @@ def isSparqlQueryRequest(self) -> bool: """ return not self.isSparqlUpdateRequest() + def _cleanComments(self, query: str) -> str: + """Internal method for returning the query after all occurrence of singleline comments are removed + (issues #32 and #77). + :param query: The query. + :type query: string + :return: the query after all occurrence of singleline comments are removed. + :rtype: string + """ + return re.sub(self.comments_pattern, "\n\n", query) + def _getRequestEncodedParameters( self, query: Optional[Tuple[str, str]] = None ) -> str: From 956530236687208fce520554e3605769701017fe Mon Sep 17 00:00:00 2001 From: eggplants Date: Tue, 15 Mar 2022 16:38:42 +0900 Subject: [PATCH 3/3] fix: any([token == t for t in allowed]) -> token in allowed --- SPARQLWrapper/Wrapper.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/SPARQLWrapper/Wrapper.py b/SPARQLWrapper/Wrapper.py index d97dd73..862b82d 100644 --- a/SPARQLWrapper/Wrapper.py +++ b/SPARQLWrapper/Wrapper.py @@ -41,9 +41,11 @@ Union, cast, ) -from urllib.request import ( - urlopen as urlopener, -) # don't change the name: tests override it + +# -- don't change the name: tests override it +from urllib.request import urlopen as urlopener +# -- don't change the name: tests override it + from xml.dom.minidom import Document, parse from rdflib.plugins.sparql import parser @@ -608,11 +610,7 @@ def _parseQueryType(self, query: str) -> str: token.name.upper().replace("QUERY", "") for token in parser.parseQuery(query) ] # type: ignore[no-untyped-call] - r_queryTypes = [ - token - for token in tokens - if any([t == token for t in _allowedQueryTypes]) - ] + r_queryTypes = [token for token in tokens if token in _allowedQueryTypes] except Exception: try: tokens = [ @@ -623,9 +621,7 @@ def _parseQueryType(self, query: str) -> str: for token in parser.parseUpdate(query).get("request", []) ] # type: ignore[no-untyped-call] r_queryTypes = [ - token - for token in tokens - if any([t == token for t in _allowedQueryTypes]) + token for token in tokens if token in _allowedQueryTypes ] except Exception as e: