From dc0306bbe269950d01a1c2624ceb4dd47fcca22d Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sun, 20 Nov 2016 15:38:26 +0100 Subject: [PATCH 01/30] Python 3 compatibility --- btdht/__init__.py | 2 +- btdht/dht.pyx | 199 ++++++++++++++++++--------------- btdht/krcp.pxd | 3 +- btdht/krcp.pyx | 259 +++++++++++++++++++++++-------------------- btdht/utils.pyx | 40 ++++--- requirements-dev.txt | 1 + requirements.txt | 1 + setup.py | 2 +- 8 files changed, 281 insertions(+), 226 deletions(-) diff --git a/btdht/__init__.py b/btdht/__init__.py index 94eb142..dd7e057 100644 --- a/btdht/__init__.py +++ b/btdht/__init__.py @@ -10,4 +10,4 @@ # # (c) 2015 Valentin Samir -from .dht import DHT, DHT_BASE, ID, Node, Bucket, RoutingTable, NotFound, BucketFull, NoTokenError +from btdht.dht import DHT, DHT_BASE, ID, Node, Bucket, RoutingTable, NotFound, BucketFull, NoTokenError diff --git a/btdht/dht.pyx b/btdht/dht.pyx index f8cc0df..8b5ba11 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -17,6 +17,7 @@ import os import IN import sys import time +import six try: import Queue except ImportError: @@ -28,6 +29,7 @@ import socket import select import collections import netaddr +import binascii from functools import total_ordering, reduce from threading import Thread, Lock from random import shuffle @@ -149,7 +151,7 @@ cdef class DHT_BASE: """ nodes_nb = 0 if filename is None: - myid = self.myid.value.encode("hex") + myid = binascii.b2a_hex(self.myid.value) filename = "dht_%s.status" % myid with open(filename, 'wb') as f: for bucket in self.root.trie.values(): @@ -172,7 +174,7 @@ cdef class DHT_BASE: """ nodes_nb = 0 if filename is None: - myid = self.myid.value.encode("hex") + myid = binascii.b2a_hex(self.myid.value) filename = "dht_%s.status" % myid try: with open(filename, 'rb') as f: @@ -310,7 +312,7 @@ cdef class DHT_BASE: def init_socket(self): """Initialize the UDP socket of the DHT""" - self.debug(0, "init socket for %s" % self.myid.value.encode("hex")) + self.debug(0, "init socket for %s" % binascii.b2a_hex(self.myid.value)) if self.sock: try:self.sock.close() except: pass @@ -372,7 +374,7 @@ cdef class DHT_BASE: self.sleep(0.1) if not info_hash in self._get_closest_loop_lock: self._get_closest_loop_lock[info_hash]=time.time() - self.debug(2, "get closest hash %s" % info_hash.encode("hex")) + self.debug(2, "get closest hash %s" % binascii.b2a_hex(info_hash)) self.root.register_torrent(info_hash) tried_nodes = set() ts = time.time() + delay @@ -434,7 +436,7 @@ cdef class DHT_BASE: return peers else: self._get_peer_loop_lock[hash]=time.time() - self.debug(2, "get peers hash %s" % hash.encode("hex")) + self.debug(2, "get peers hash %s" % binascii.b2a_hex(hash)) self.root.register_torrent(hash) tried_nodes = set() ts = time.time() + delay @@ -491,13 +493,13 @@ cdef class DHT_BASE: ts = time.time() + 2 # we search peers and we found as least limit of them if (typ == "peers" and limit and hash in self._got_peers and self._got_peers[hash] and len(self._got_peers[hash])>=limit): - self.debug(2, "Hash %s find peers" % hash.encode("hex")) + self.debug(2, "Hash %s find peers" % binascii.b2a_hex(hash)) if callback: callback(self._get_peers(hash, compact=False)) on_stop(hash, typ) # we search closest node and we don't find any closest elif (typ == "closest" and closest == _closest): - self.debug(2, "Hash %s find nodes" % hash.encode("hex")) + self.debug(2, "Hash %s find nodes" % binascii.b2a_hex(hash)) if callback: callback(_closest) on_stop(hash, typ) @@ -509,13 +511,13 @@ cdef class DHT_BASE: else: # we search peers, and we found some if (typ == "peers" and hash in self._got_peers and self._got_peers[hash]): - self.debug(2, "Hash %s find peers" % hash.encode("hex")) + self.debug(2, "Hash %s find peers" % binascii.b2a_hex(hash)) if callback: callback(self._get_peers(hash, compact=False)) on_stop(hash, typ) # we did not found peers nor closest node althougth we ask every close nodes we know of else: - self.debug(2, "Hash %s not peers or nodes not found" % hash.encode("hex")) + self.debug(2, "Hash %s not peers or nodes not found" % binascii.b2a_hex(hash)) if callback: callback([]) on_stop(hash, typ) @@ -540,12 +542,12 @@ cdef class DHT_BASE: try: # In compact mode (to send over udp) return at most 70 peers to avoid udp fragmentation if compact: - peers = [(-t,ip,port) for ((ip, port), t) in self._peers[info_hash].items()] + peers = [(-t,ip,port) for ((ip, port), t) in six.iteritems(self._peers[info_hash])] # putting the more recent annonces in first peers.sort() return [struct.pack("!4sH", socket.inet_aton(ip), port) for (_, ip, port) in peers[0:70]] else: - peers = [(-t,ip,port) for ((ip, port), t) in self._got_peers[info_hash].items()] + peers = [(-t,ip,port) for ((ip, port), t) in six.iteritems(self._got_peers[info_hash])] # putting the more recent annonces in first peers.sort() return [(ip, port) for (_, ip, port) in peers] @@ -578,7 +580,7 @@ cdef class DHT_BASE: """ l = list(self.root.get_closest_nodes(id)) if compact: - return "".join(n.compact_info() for n in l) + return b"".join(n.compact_info() for n in l) else: return list(self.root.get_closest_nodes(id)) @@ -587,20 +589,20 @@ cdef class DHT_BASE: self.debug(0,"Bootstraping") for addr in [("router.utorrent.com", 6881), ("genua.fr", 6880), ("dht.transmissionbt.com", 6881)]: msg = BMessage() - msg.y = 'q' - msg.q = "find_node" + msg.y = b'q' + msg.q = b"find_node" self._set_transaction_id(msg) msg.set_a(True) - msg["id"] = self.myid.value - msg["target"] = self.myid.value - self.sendto(str(msg), addr) + msg[b"id"] = self.myid.value + msg[b"target"] = self.myid.value + self.sendto(msg.encode(), addr) def _update_node(self, obj): """update a node the in routing table on msg received""" - if obj.y == "q" or obj.y == "r": - id = obj.get("id") + if obj.y == b"q" or obj.y == b"r": + id = obj.get(b"id") if id: try: node = self.root.get_node(id) @@ -609,9 +611,9 @@ cdef class DHT_BASE: except NotFound: node = Node(id=id, ip=obj.addr[0], port=obj.addr[1]) self.root.add(self, node) - if obj.y == "q": + if obj.y == b"q": node.last_query = int(time.time()) - elif obj.y == "r": + elif obj.y == b"r": node.last_response = int(time.time()) node.failed = 0 else: @@ -686,7 +688,7 @@ cdef class DHT_BASE: print("TypeError: %r in _recv_loop" % obj) raise # On query - if obj.y == "q": + if obj.y == b"q": # process the query self._process_query(obj) # build the response object @@ -696,9 +698,9 @@ cdef class DHT_BASE: self.last_msg = time.time() # send it - self.sendto(str(reponse), addr) + self.sendto(reponse.encode(), addr) # on response - elif obj.y == "r": + elif obj.y == b"r": # process the response self._process_response(obj, obj_opt) @@ -706,7 +708,7 @@ cdef class DHT_BASE: self.last_msg = time.time() self.last_msg_rep = time.time() # on error - elif obj.y == "e": + elif obj.y == b"e": # process it self.on_error(obj, obj_opt) @@ -715,7 +717,7 @@ cdef class DHT_BASE: if self.debuglvl > 1: traceback.print_exc() self.debug(2, "error %r" % error) - self.sendto(str(error), addr) + self.sendto(error.encode(), addr) # socket unavailable ? except socket.error as e: if e.errno not in [11, 1]: # 11: Resource temporarily unavailable @@ -760,9 +762,12 @@ cdef class DHT_BASE: """Function cleaning datastructures of the DHT""" now = time.time() - for id in self.transaction_type.keys(): + to_delete = [] + for id in self.transaction_type: if now - self.transaction_type[id][1] > 30: - del self.transaction_type[id] + to_delete.append(id) + for key in to_delete: + del self.transaction_type[key] self._threads = [t for t in self._threads[:] if t.is_alive()] @@ -778,35 +783,49 @@ cdef class DHT_BASE: # Long cleaning if now - self.long_clean >= 15 * 60: # cleaning old tokens - for ip in self.token.keys(): + to_delete = [] + for ip in self.token: self.token[ip] = [t for t in self.token[ip] if (now - t[1]) < 600] if not self.token[ip]: - del self.token[ip] - for id in self.mytoken.keys(): + to_delete.append(ip) + for key in to_delete: + del self.token[key] + to_delete = [] + for id in self.mytoken: try: if now - self.mytoken[id][1] > 600: - del self.mytoken[id] + to_delete.append(id) except KeyError: pass + for key in to_delete: + del self.mytoken[id] # cleaning old peer for announce_peer - for hash, peers in self._peers.items(): - for peer in peers.keys(): + to_delete = collections.defaultdict(list) + for hash, peers in six.iteritems(self._peers): + for peer in peers: try: if now - self._peers[hash][peer] > 30 * 60: - del self._peers[hash][peer] + to_delete[hash].append(peer) except KeyError: pass + for hash in to_delete: + for peer in to_delete[hash]: + del self._peers[hash][peer] if not self._peers[hash]: del self._peers[hash] - for hash, peers in self._got_peers.items(): - for peer in peers.keys(): + to_delete = collections.defaultdict(list) + for hash, peers in six.iteritems(self._got_peers): + for peer in peers: try: if now - self._got_peers[hash][peer] > 15 * 60: - del self._got_peers[hash][peer] + to_delete[hash].append(peer) except KeyError: pass + for hash in to_delete: + for peer in to_delete[hash]: + del self._got_peers[hash][peer] if not self._got_peers[hash]: del self._got_peers[hash] @@ -934,7 +953,7 @@ cdef class DHT_BASE: def _on_ping_response(self, query, response): pass def _on_find_node_response(self, query, response): - nodes = Node.from_compact_infos(response.get("nodes", "")) + nodes = Node.from_compact_infos(response.get(b"nodes", b"")) for node in nodes: try: self.root.add(self, node) @@ -943,15 +962,15 @@ cdef class DHT_BASE: raise self.debug(2, "%s nodes added to routing table" % len(nodes)) def _on_get_peers_response(self, query, response): - token = response.get("token") + token = response.get(b"token") if token: - self.mytoken[response["id"]]=(token, time.time()) - for node in Node.from_compact_infos(response.get("nodes", "")): + self.mytoken[response[b"id"]]=(token, time.time()) + for node in Node.from_compact_infos(response.get(b"nodes", "")): self.root.add(self, node) - for ipport in response.get("values", []): + for ipport in response.get(b"values", []): (ip, port) = struct.unpack("!4sH", ipport) ip = socket.inet_ntoa(ip) - self._add_peer_queried(query["info_hash"], ip=ip, port=port) + self._add_peer_queried(query[b"info_hash"], ip=ip, port=port) def _on_announce_peer_response(self, query, response): pass @@ -963,23 +982,23 @@ cdef class DHT_BASE: pass def _on_announce_peer_query(self, query): try: - if query.get("implied_port", 0) != 0: + if query.get(b"implied_port", 0) != 0: if query.addr[1] > 0 and query.addr[1] < 65536: - self._add_peer(info_hash=query["info_hash"], ip=query.addr[0], port=query.addr[1]) + self._add_peer(info_hash=query[b"info_hash"], ip=query.addr[0], port=query.addr[1]) else: self.debug(1, "Invalid port number on announce %s, sould be within 1 and 65535" % query.addr[1]) else: - if query["port"] > 0 and query["port"] < 65536: - self._add_peer(info_hash=query["info_hash"], ip=query.addr[0], port=query["port"]) + if query[b"port"] > 0 and query[b"port"] < 65536: + self._add_peer(info_hash=query[b"info_hash"], ip=query.addr[0], port=query[b"port"]) else: self.debug(1, "Invalid port number on announce %s, sould be within 1 and 65535" % query["port"]) except KeyError as e: - raise ProtocolError(query.t, "Message malformed: %s key is missing" % e.message) + raise ProtocolError(query.t, b"Message malformed: %s key is missing" % e.message) def _process_response(self, obj, query): - if query.q in ["find_node", "ping", "get_peers", "announce_peer"]: - getattr(self, '_on_%s_response' % query.q)(query, obj) + if query.q in [b"find_node", b"ping", b"get_peers", b"announce_peer"]: + getattr(self, '_on_%s_response' % query.q.decode())(query, obj) if query.q in self._to_process_registered: try: self._to_process.put_nowait((query, obj)) @@ -988,8 +1007,8 @@ cdef class DHT_BASE: #getattr(self, 'on_%s_response' % query.q)(query, obj) def _process_query(self, obj): - if obj.q in ["find_node", "ping", "get_peers", "announce_peer"]: - getattr(self, '_on_%s_query' % obj.q)(obj) + if obj.q in [b"find_node", b"ping", b"get_peers", b"announce_peer"]: + getattr(self, '_on_%s_query' % obj.q.decode())(obj) if obj.q in self._to_process_registered: try: self._to_process.put_nowait((obj, None)) @@ -1005,9 +1024,9 @@ cdef class DHT_BASE: try: (query, response) = self._to_process.get(timeout=1) if response is None: - getattr(self, 'on_%s_query' % query.q)(query) + getattr(self, 'on_%s_query' % query.q.decode())(query) else: - getattr(self, 'on_%s_response' % query.q)(query, response) + getattr(self, 'on_%s_response' % query.q.decode())(query, response) except Queue.Empty: pass @@ -1020,24 +1039,24 @@ cdef class DHT_BASE: if self.debuglvl > 0: traceback.print_exc() self.debug(1, "%s for %r" % (e, addr)) - raise ProtocolError("") + raise ProtocolError(b"") try: - if msg.y == "q": + if msg.y == b"q": return msg, None - elif msg.y == "r": + elif msg.y == b"r": if msg.t in self.transaction_type: ttype = self.transaction_type[msg.t][0] query = self.transaction_type[msg.t][2] return msg, query else: - raise GenericError(msg.t, "transaction id unknown") - elif msg.y == "e": + raise GenericError(msg.t, b"transaction id unknown") + elif msg.y == b"e": query = self.transaction_type.get(msg.t, (None, None, None))[2] if msg.errno == 201: self.debug(2, "ERROR:201:%s pour %r" % (msg.errmsg, self.transaction_type.get(msg.t, {}))) return GenericError(msg.t, msg.errmsg), query elif msg.errno == 202: - self.debug(2, "ERROR:202:%s pour %r" % (msg.errmsg, self.transaction_type.get(msg.t, {}))) + self.debug(2, "ERROR:202:%s pour %r" % (msg.errmsg, self.transaction_type.get(msg.t, {})[2].encode())) return ServerError(msg.t, msg.errmsg), query elif msg.errno == 203: t = self.transaction_type.get(msg.t) @@ -1049,14 +1068,14 @@ cdef class DHT_BASE: return MethodUnknownError(msg.t, msg.errmsg), query else: self.debug(3, "ERROR:%s:%s pour %r" % (msg.errno, msg.errmsg, self.transaction_type.get(msg.t, {}))) - raise MethodUnknownError(msg.t, "Error code %s unknown" % msg.errno) + raise MethodUnknownError(msg.t, b"Error code %s unknown" % msg.errno) else: self.debug(0, "UNKNOWN MSG: %s" % msg) raise ProtocolError(msg.t) except KeyError as e: - raise ProtocolError(msg.t, "Message malformed: %s key is missing" % e.message) + raise ProtocolError(msg.t, b"Message malformed: %s key is missing" % e.message) except IndexError: - raise ProtocolError(msg.t, "Message malformed") + raise ProtocolError(msg.t, b"Message malformed") class BucketFull(Exception): @@ -1097,7 +1116,7 @@ cdef class Node: cdef int _last_query cdef int _failed - def __init__(self, id,char* ip,int port, int last_response=0,int last_query=0,int failed=0): + def __init__(self, id,ip,int port, int last_response=0,int last_query=0,int failed=0): """ Args: id (str): A 160bits (20 Bytes) identifier @@ -1111,7 +1130,7 @@ cdef class Node: """ cdef char* cip cdef char* cid - if ip[0] == b'0': + if ip[0] == u'0': raise ValueError("IP start with 0 *_* %r %r" % (ip, self._ip[:4])) tip = socket.inet_aton(ip) cip = tip @@ -1187,9 +1206,9 @@ cdef class Node: if ip[0] == '0': raise ValueError("IP start with 0 *_* %r %r" % (ip, self._ip[:4])) return ip - def __set__(self, char *ip): + def __set__(self, ip): cdef char* cip - if ip[0] == b'0': + if ip[0] == u'0': raise ValueError("IP start with 0 *_* %r %r" % (ip, self._ip[:4])) tip = socket.inet_aton(ip) cip = tip @@ -1224,11 +1243,11 @@ cdef class Node: """ nodes = [] length = len(infos) - if length/26*26 != length: - raise ProtocolError("", "nodes length should be a multiple of 26") + if length//26*26 != length: + raise ProtocolError(b"", b"nodes length should be a multiple of 26") i=0 while i < length: - if infos[i+20:i+24] != '\0\0\0\0' and infos[i+24:i+26] != '\0\0': + if infos[i+20:i+24] != b'\0\0\0\0' and infos[i+24:i+26] != b'\0\0': #try: nodes.append(Node.from_compact_info(infos[i:i+26])) #except ValueError as e: @@ -1282,7 +1301,7 @@ cdef class Node: Args: dht (DHT_BASE): a dht instance """ - id = str(dht.myid) + id = dht.myid.value msg = BMessage() dht._set_transaction_id(msg) msg.set_y("q", 1) @@ -1290,7 +1309,7 @@ cdef class Node: msg.set_a(True) msg.set_id(id, len(dht.myid)) self._failed+=1 - dht.sendto(str(msg), (self.ip, self.port)) + dht.sendto(msg.encode(), (self.ip, self.port)) def find_node(self, DHT_BASE dht, target): """send a find_node query to the node @@ -1310,7 +1329,7 @@ cdef class Node: msg.set_id(id, len(dht.myid)) msg.set_target(target, tl) self._failed+=1 - dht.sendto(str(msg), (self.ip, self.port)) + dht.sendto(msg.encode(), (self.ip, self.port)) def get_peers(self, DHT_BASE dht, info_hash): """send a get_peers query to the node @@ -1330,7 +1349,7 @@ cdef class Node: msg.set_id(id, len(dht.myid)) msg.set_info_hash(info_hash, ihl) self._failed+=1 - dht.sendto(str(msg), (self.ip, self.port)) + dht.sendto(msg.encode(), (self.ip, self.port)) def announce_peer(self, DHT_BASE dht, info_hash, int port): """send a announce_peer query to the node @@ -1357,7 +1376,7 @@ cdef class Node: msg.set_port(port) msg.set_token(token, len(info_hash)) self._failed+=1 - dht.sendto(str(msg), (self.ip, self.port)) + dht.sendto(msg.encode(), (self.ip, self.port)) else: raise NoTokenError() @@ -1386,10 +1405,10 @@ class Bucket(list): """ if not self.id: return True - if id.startswith(self.id[:self.id_length/8]): + if id.startswith(self.id[:self.id_length//8]): i=-1 try: - for i in range(self.id_length/8*8, self.id_length): + for i in range(self.id_length//8*8, self.id_length): if nbit(self.id, i) != nbit(id, i): return False return True @@ -1399,7 +1418,7 @@ class Bucket(list): else: return False - def __init__(self, id="", id_length=0, init=None): + def __init__(self, id=b"", id_length=0, init=None): """ Args: id (str): prefix identifier for the bucket @@ -1418,23 +1437,23 @@ class Bucket(list): """return a random id handle by the bucket""" id = ID() id_length = self.id_length - id_end = id[id_length/8] + id_end = bytes(bytearray((id[id_length//8],))) tmp = '' if id_length>0: try: - id_start = self.id[id_length/8] + id_start = bytes(bytearray((self.id[id_length//8],))) except IndexError: - id_start = "\0" + id_start = b"\0" for i in range((id_length % 8)): - tmp +=str(nbit(id_start, i)) + tmp += '1' if nbit(id_start, i) == 1 else '0' for i in range((id_length % 8), 8): - tmp +=str(nbit(id_end, i)) + tmp += '1' if nbit(id_end, i) == 1 else '0' try: - char = chr(int(tmp, 2)) + char = bytes(bytearray((int(tmp, 2),))) except ValueError: print(tmp) raise - return ID(self.id[0:id_length/8] + char + id[id_length/8+1:]) + return ID(self.id[0:id_length//8] + char + id[id_length//8+1:]) def get_node(self, id): """return the node with id `id` or raise NotFound""" @@ -1495,7 +1514,7 @@ class Bucket(list): if self.id_length < 8*len(self.id): new_id = self.id else: - new_id = self.id + "\0" + new_id = self.id + b"\0" b1 = Bucket(id=new_id, id_length=self.id_length + 1) b2 = Bucket(id=nflip(new_id, self.id_length), id_length=self.id_length + 1) for node in self: @@ -1942,7 +1961,7 @@ class RoutingTable(object): # return #print prefix #print utils.id_to_longid(str(bucket.id))[:bucket.id_length] - prefix = utils.id_to_longid(str(bucket.id))[:bucket.id_length] + prefix = utils.id_to_longid(bucket.id)[:bucket.id_length] (zero_b, one_b) = self.trie[prefix].split(self, dht) (zero_b, one_b) = self.trie[prefix].split(self, dht) self.trie[prefix + u"1"] = one_b @@ -1982,7 +2001,7 @@ class RoutingTable(object): continue to_merge = True for id in self.split_ids | self.info_hash: - if utils.id_to_longid(str(id)).startswith(key[:-1]): + if utils.id_to_longid(id).startswith(key[:-1]): to_merge = False break if to_merge: @@ -1992,7 +2011,7 @@ class RoutingTable(object): self.debug(2, "%s gone away while merging" % key) continue prefix0 = key - prefix1 = key[:-1] + unicode(int(key[-1]) ^ 1) + prefix1 = key[:-1] + six.text_type(int(key[-1]) ^ 1) bucket0 = self.trie[prefix0] if prefix1 in self.trie: bucket1 = self.trie[prefix1] diff --git a/btdht/krcp.pxd b/btdht/krcp.pxd index 74fc295..c460c1a 100644 --- a/btdht/krcp.pxd +++ b/btdht/krcp.pxd @@ -42,7 +42,8 @@ cdef class BMessage: cdef int encoded_len cdef int encoded_uptodate cdef int debug - cdef char* addr_addr + cdef unicode addr_addr_3 + cdef bytes addr_addr_2 cdef int addr_port cdef int set_r(self, int value) nogil diff --git a/btdht/krcp.pyx b/btdht/krcp.pyx index 7670ceb..a821394 100644 --- a/btdht/krcp.pyx +++ b/btdht/krcp.pyx @@ -16,7 +16,9 @@ from libc.string cimport strlen, strncmp, strcmp, strncpy, strcpy from libc.stdlib cimport atoi, atoll, malloc, free from cython.parallel import prange -import utils +import six + +from btdht import utils cdef int str_to_int(char* data, int len) nogil: cdef char* msg = NULL @@ -34,13 +36,13 @@ cdef int str_to_int(char* data, int len) nogil: cdef int str_to_long_long(char* data, int len) nogil: cdef char* msg = NULL cdef long long i - if data[0] == '-' and len > 16 or len > 17: + if data[0] == b'-' and len > 16 or len > 17: with gil: raise EnvironmentError("Trying to convert %s to long long but it's too big" % data[:len]) try: msg = malloc((len+1) * sizeof(char)) strncpy(msg, data, len) - msg[len]='\0' + msg[len]=b'\0' i = atoll(msg) finally: if msg != NULL: @@ -77,17 +79,17 @@ cdef int _decode_pass_list(char* data, int *i, int max) nogil except -1: if i[0] >= max + 1: with gil: raise ValueError("%s > %s : %r" % (i[0], max, data[:max])) - if data[i[0]] != "l": + if data[i[0]] != b"l": return False i[0]+=1 - while data[i[0]] != 'e' and i[0] < max: + while data[i[0]] != b'e' and i[0] < max: if not _decode_string(data, i, max, j) and not _decode_int(data, i, max, ll) and not _decode_pass_list(data, i, max) and not _decode_pass_dict(data, i, max): with gil: raise ValueError("Unable to parse one of the element of the list %d %r" % (i[0], data[:max])) if i[0] >= max: with gil: raise ValueError("list_pass: %s > %s : %r" % (i[0], max, data[:max])) - if data[i[0]] != 'e': + if data[i[0]] != b'e': return False i[0]+=1 return True @@ -98,37 +100,37 @@ cdef int _decode_pass_dict(char* data, int *i, int max) nogil except -1: if i[0] >= max + 1: with gil: raise ValueError("%s > %s : %r" % (i[0], max, data[:max])) - if data[i[0]] != "d": + if data[i[0]] != b"d": return False i[0]+=1 - while data[i[0]] != 'e' and i[0] < max: + while data[i[0]] != b'e' and i[0] < max: if not _decode_string(data, i, max, j) or (not _decode_string(data, i, max, j) and not _decode_int(data, i, max, ll) and not _decode_pass_list(data, i, max) and not _decode_pass_dict(data, i, max)): with gil: raise ValueError("Unable to parse one of the element of the dict %d %r" % (i[0], data[:max])) if i[0] >= max: with gil: raise ValueError("dict_pass: %s > %s : %r" % (i[0], max, data[:max])) - if data[i[0]] != 'e': + if data[i[0]] != b'e': return False i[0]+=1 return True cdef int _decode_string(char* data, int* i, int max, int* j) nogil except -1: cdef int ret - if data[i[0]] == '0' \ - or data[i[0]] == '2' \ - or data[i[0]] == '3' \ - or data[i[0]] == '4' \ - or data[i[0]] == '5' \ - or data[i[0]] == '6' \ - or data[i[0]] == '7' \ - or data[i[0]] == '8' \ - or data[i[0]] == '9' \ - or data[i[0]] == '1': + if data[i[0]] == b'0' \ + or data[i[0]] == b'2' \ + or data[i[0]] == b'3' \ + or data[i[0]] == b'4' \ + or data[i[0]] == b'5' \ + or data[i[0]] == b'6' \ + or data[i[0]] == b'7' \ + or data[i[0]] == b'8' \ + or data[i[0]] == b'9' \ + or data[i[0]] == b'1': j[0]=i[0]+1 - while data[j[0]] != ':' and j[0] < max: + while data[j[0]] != b':' and j[0] < max: j[0]+=1 - if data[j[0]] == ':': + if data[j[0]] == b':': i[0] = j[0] + str_to_int(data + i[0], j[0]-i[0]) + 1 j[0]+=1 if i[0] <= max: @@ -148,12 +150,12 @@ cdef int _decode_int(char* data, int *i, int max, long long *myint) nogil excep otherwise, use the function from utils that can decode arbitrary long integer """ cdef int j - if data[i[0]] == 'i': + if data[i[0]] == b'i': i[0]+=1 j = i[0] - while data[j]!='e' and j < max: + while data[j] != b'e' and j < max: j+=1 - if data[j] == 'e': + if data[j] == b'e': myint[0]=str_to_long_long(data + i[0], j-i[0]) i[0]=j+1 if i[0] <= max: @@ -171,11 +173,11 @@ cdef int _encode_int(char* data, int *i, int max, int j) nogil: cdef int l l = int_length(j) if max >= i[0] + l + 2: - data[i[0]]='i' + data[i[0]]=b'i' i[0]+=1 - sprintf(data + i[0], "%d", j) + sprintf(data + i[0], b"%d", j) i[0]+=l - data[i[0]]='e' + data[i[0]]=b'e' i[0]+=1 return True else: @@ -186,9 +188,9 @@ cdef int _encode_string(char* data, int* i, int max, char* str, int strlen) nogi cdef int l l = int_length(strlen) if max >= i[0] + l + 1 + strlen: # size as char + : + string - sprintf(data + i[0], "%d", strlen) + sprintf(data + i[0], b"%d", strlen) i[0]+=l - data[i[0]]=':' + data[i[0]]=b':' i[0]+=1 strncpy(data + i[0], str, strlen) i[0]+=strlen @@ -198,7 +200,7 @@ cdef int _encode_string(char* data, int* i, int max, char* str, int strlen) nogi return False class BError(Exception): - y = "e" + y = b"e" t = None # string value representing a transaction ID e = None # a list. The first element is an integer representing the error code. The second element is a string containing the error message def __init__(self, t, e, **kwargs): @@ -207,22 +209,27 @@ class BError(Exception): self.t = t self.e = e super(BError, self).__init__(*e, **kwargs) + + def encode(self): + return utils.bencode({b"y":self.y, b"t":self.t, b"e":self.e}) + def __str__(self): - return utils.bencode({"y":self.y, "t":self.t, "e":self.e}) + raise NotImplementedError() + def __repr__(self): - return "%s: %s" % (self.__class__.__name__, self.e) + return "%s: %r" % (self.__class__.__name__, self.e) class GenericError(BError): - def __init__(self, t, msg=""): + def __init__(self, t, msg=b""): super(GenericError, self).__init__(t=t, e=[201, msg]) class ServerError(BError): - def __init__(self, t, msg="Server Error"): + def __init__(self, t, msg=b"Server Error"): super(ServerError, self).__init__(t=t, e=[202, msg]) class ProtocolError(BError): - def __init__(self, t, msg="Protocol Error"): + def __init__(self, t, msg=b"Protocol Error"): super(ProtocolError, self).__init__(t=t, e=[203, msg]) class MethodUnknownError(BError): - def __init__(self, t, msg="Method Unknow"): + def __init__(self, t, msg=b"Method Unknow"): super(MethodUnknownError, self).__init__(t=t, e=[204, msg]) @@ -484,24 +491,24 @@ cdef class BMessage: cdef char* nodes = NULL cdef char* token = NULL cdef char** values = NULL - s = str(dht.myid) + s = dht.myid.value id = s with nogil: - if self.has_y and self.y_len == 1 and strncmp(self._y, "q", 1) == 0: + if self.has_y and self.y_len == 1 and strncmp(self._y, b"q", 1) == 0: if self.has_q: - if self.q_len == 4 and strncmp(self._q, "ping", 4) == 0: - rep.set_y("r", 1) + if self.q_len == 4 and strncmp(self._q, b"ping", 4) == 0: + rep.set_y(b"r", 1) rep.set_t(self._t, self.t_len) rep.set_r(True) rep.set_id(id, 20) self._encode() with gil: return rep - elif self.q_len == 9 and strncmp(self._q, "find_nodes", 9) == 0: + elif self.q_len == 9 and strncmp(self._q, b"find_nodes", 9) == 0: if not self.has_target: with gil: - raise ProtocolError(self.t, "target missing") - rep.set_y("r", 1) + raise ProtocolError(self.t, b"target missing") + rep.set_y(b"r", 1) rep.set_t(self._t, self.t_len) rep.set_r(True) rep.set_id(id, 20) @@ -513,11 +520,11 @@ cdef class BMessage: self._encode() with gil: return rep - elif self.q_len == 9 and strncmp(self._q, "get_peers", 9) == 0: + elif self.q_len == 9 and strncmp(self._q, b"get_peers", 9) == 0: if not self.has_info_hash: with gil: - raise ProtocolError(self.t, "info_hash missing") - rep.set_y("r", 1) + raise ProtocolError(self.t, b"info_hash missing") + rep.set_y(b"r", 1) rep.set_t(self._t, self.t_len) rep.set_r(True) rep.set_id(id, 20) @@ -540,21 +547,21 @@ cdef class BMessage: self._encode() with gil: return rep - elif self.q_len == 13 and strncmp(self._q, "announce_peer", 13) == 0: + elif self.q_len == 13 and strncmp(self._q, b"announce_peer", 13) == 0: if not self.has_info_hash: with gil: - raise ProtocolError(self.t, "info_hash missing") + raise ProtocolError(self.t, b"info_hash missing") if not self.has_port: with gil: - raise ProtocolError(self.t, "port missing") + raise ProtocolError(self.t, b"port missing") if not self.has_token: with gil: - raise ProtocolError(self.t, "token missing") + raise ProtocolError(self.t, b"token missing") with gil: s = dht._get_valid_token(self.addr[0]) - if not self["token"] in s: - raise ProtocolError(self.t, "bad token") - rep.set_y("r", 1) + if not self[b"token"] in s: + raise ProtocolError(self.t, b"bad token") + rep.set_y(b"r", 1) rep.set_t(self._t, self.t_len) rep.set_r(True) rep.set_id(id, 20) @@ -563,7 +570,7 @@ cdef class BMessage: return rep else: with gil: - raise MethodUnknownError(self.t, "Method %s Unknown" % self.q) + raise MethodUnknownError(self.t, b"Method %s Unknown" % self.q) else: printf("not ping %d\n", 0) else: @@ -574,7 +581,7 @@ cdef class BMessage: if i[0] + self.values_nb * 8 + 2 > max: printf("encode_values: %d < %d\n", max, i[0] + self.values_nb * 8 + 2) return False - data[i[0]]='l' + data[i[0]]=b'l' i[0]+=1 for j in prange(self.values_nb): #printf("encode value %d in encode_values\n", j) @@ -582,7 +589,7 @@ cdef class BMessage: i[0]+=2 strncpy(data + i[0], self.values[j], 6) i[0]+=6 - data[i[0]]='e' + data[i[0]]=b'e' i[0]+=1 return True @@ -590,13 +597,13 @@ cdef class BMessage: if i[0] + 1 > max: printf("encode_secondary:%d\n", 0) return False - data[i[0]] = 'd' + data[i[0]] = b'd' i[0]+=1 if self.has_id: if i[0] + 4 > max: printf("encode_secondary:%d\n", 1) return False - strncpy(data + i[0], "2:id", 4) + strncpy(data + i[0], b"2:id", 4) i[0]+=4 if not _encode_string(data, i, max, self.id, 20): return False @@ -604,7 +611,7 @@ cdef class BMessage: if i[0] + 15 > max: printf("encode_secondary:%d\n", 2) return False - strncpy(data + i[0], "12:implied_port", 15) + strncpy(data + i[0], b"12:implied_port", 15) i[0]+=15 if not _encode_int(data, i, max, self.implied_port): return False @@ -612,7 +619,7 @@ cdef class BMessage: if i[0] + 11 > max: printf("encode_secondary:%d\n", 3) return False - strncpy(data + i[0], "9:info_hash", 11) + strncpy(data + i[0], b"9:info_hash", 11) i[0]+=11 if not _encode_string(data, i, max, self.info_hash, 20): return False @@ -620,7 +627,7 @@ cdef class BMessage: if i[0] + 7 > max: printf("encode_secondary:%d\n", 4) return False - strncpy(data + i[0], "5:nodes", 7) + strncpy(data + i[0], b"5:nodes", 7) i[0]+=7 if not _encode_string(data, i, max, self.nodes, self.nodes_len): return False @@ -628,7 +635,7 @@ cdef class BMessage: if i[0] + 6 > max: printf("encode_secondary:%d\n", 5) return False - strncpy(data + i[0], "4:port", 6) + strncpy(data + i[0], b"4:port", 6) i[0]+=6 if not _encode_int(data, i, max, self.port): return False @@ -636,7 +643,7 @@ cdef class BMessage: if i[0] + 8 > max: printf("encode_secondary:%d\n", 6) return False - strncpy(data + i[0], "6:target", 8) + strncpy(data + i[0], b"6:target", 8) i[0]+=8 if not _encode_string(data, i, max, self.target, 20): return False @@ -644,7 +651,7 @@ cdef class BMessage: if i[0] + 7 > max: printf("encode_secondary:%d\n", 7) return False - strncpy(data + i[0], "5:token", 7) + strncpy(data + i[0], b"5:token", 7) i[0]+=7 if not _encode_string(data, i, max, self.token, self.token_len): return False @@ -652,14 +659,14 @@ cdef class BMessage: if i[0] + 8 > max: printf("encode_secondary:%d\n", 8) return False - strncpy(data + i[0], "6:values", 8) + strncpy(data + i[0], b"6:values", 8) i[0]+=8 if not self._encode_values(data, i, max): return False if i[0] + 1 > max: printf("encode_secondary:%d\n", 9) return False - data[i[0]] = 'e' + data[i[0]] = b'e' i[0]+=1 return True @@ -667,7 +674,7 @@ cdef class BMessage: if i[0] + 2 > max: printf("encode_error: %d", 0) return False - data[i[0]] = 'l' + data[i[0]] = b'l' i[0]+=1 if not _encode_int(data, i, max, self._errno): return False @@ -676,7 +683,7 @@ cdef class BMessage: if i[0] >= max: printf("encode_error: %d", 1) return False - data[i[0]] = 'e' + data[i[0]] = b'e' i[0]+=1 return True @@ -684,13 +691,13 @@ cdef class BMessage: if i[0] + 1 > max: printf("encode_main: %d\n", 0) return False - data[i[0]] = 'd' + data[i[0]] = b'd' i[0]+=1 if self.a: if i[0] + 3 > max: printf("encode_main: %d\n", 1) return False - strncpy(data + i[0], "1:a", 3) + strncpy(data + i[0], b"1:a", 3) i[0]+=3 if not self._encode_secondary_dict(data, i, max): return False @@ -698,7 +705,7 @@ cdef class BMessage: if i[0] + 3 > max: printf("encode_main: %d\n", 8) return False - strncpy(data + i[0], "1:e", 3) + strncpy(data + i[0], b"1:e", 3) i[0]+=3 if not self._encode_error(data, i, max): return False @@ -706,7 +713,7 @@ cdef class BMessage: if i[0] + 3 > max: printf("encode_main: %d\n", 2) return False - strncpy(data + i[0], "1:q", 3) + strncpy(data + i[0], b"1:q", 3) i[0]+=3 if not _encode_string(data, i, max, self._q, self.q_len): return False @@ -714,7 +721,7 @@ cdef class BMessage: if i[0] + 3 > max: printf("encode_main: %d\n", 3) return False - strncpy(data + i[0], "1:r", 3) + strncpy(data + i[0], b"1:r", 3) i[0]+=3 if not self._encode_secondary_dict(data, i, max): return False @@ -722,7 +729,7 @@ cdef class BMessage: if i[0] + 3 > max: printf("encode_main: %d\n", 4) return False - strncpy(data + i[0], "1:t", 3) + strncpy(data + i[0], b"1:t", 3) i[0]+=3 if not _encode_string(data, i, max, self._t, self.t_len): return False @@ -730,7 +737,7 @@ cdef class BMessage: if i[0] + 3 > max: printf("encode_main: %d\n", 5) return False - strncpy(data + i[0], "1:v", 3) + strncpy(data + i[0], b"1:v", 3) i[0]+=3 if not _encode_string(data, i, max, self._v, self.v_len): return False @@ -738,14 +745,14 @@ cdef class BMessage: if i[0] + 3 > max: printf("encode_main: %d %d\n", 6, i[0]) return False - strncpy(data + i[0], "1:y", 3) + strncpy(data + i[0], b"1:y", 3) i[0]+=3 if not _encode_string(data, i, max, self._y, self.y_len): return False if i[0] + 1 > max: printf("encode_main: %d\n", 7) return False - data[i[0]] = 'e' + data[i[0]] = b'e' i[0]+=1 return True @@ -812,8 +819,11 @@ cdef class BMessage: else: raise EnvironmentError("Unable to encode BMessage") + def __repr__(self): + return "%r" % self.encode() + def __str__(self): - return self.encode() + raise NotImplementedError() property errno: def __get__(self): @@ -837,13 +847,22 @@ cdef class BMessage: property addr: def __get__(self): - if self.addr_addr and self.addr_port: - return (self.addr_addr, self.addr_port) + if six.PY3: + if self.addr_addr_3 and self.addr_port: + return (self.addr_addr_3, self.addr_port) + else: + return None else: - return None + if self.addr_addr_2 and self.addr_port: + return (self.addr_addr_2, self.addr_port) + else: + return None def __set__(self, addr): if addr is not None: - self.addr_addr = addr[0] + if six.PY3: + self.addr_addr_3 = addr[0] + else: + self.addr_addr_2 = addr[0] self.addr_port = addr[1] def __del__(self): self.addr_addr = None @@ -928,21 +947,21 @@ cdef class BMessage: def __delitem__(self, char* key): with nogil: - if self.has_id and strcmp(key, "id") == 0: + if self.has_id and strcmp(key, b"id") == 0: self.del_id() - elif self.has_target and strcmp(key, "target") == 0: + elif self.has_target and strcmp(key, b"target") == 0: self.del_target() - elif self.has_info_hash and strcmp(key, "info_hash") == 0: + elif self.has_info_hash and strcmp(key, b"info_hash") == 0: self.del_info_hash() - elif self.has_token and strcmp(key, "token") == 0: + elif self.has_token and strcmp(key, b"token") == 0: self.del_token() - elif self.has_nodes and strcmp(key, "nodes") == 0: + elif self.has_nodes and strcmp(key, b"nodes") == 0: self.del_nodes() - elif self.has_implied_port and strcmp(key, "implied_port") == 0: + elif self.has_implied_port and strcmp(key, b"implied_port") == 0: self.del_implied_port() - elif self.has_port and strcmp(key, "port") == 0: + elif self.has_port and strcmp(key, b"port") == 0: self.del_port() - elif self.has_values and strcmp(key, "values") == 0: + elif self.has_values and strcmp(key, b"values") == 0: self.del_values() else: with gil: @@ -954,14 +973,14 @@ cdef class BMessage: cdef char** v cdef int l = 0 with nogil: - if strcmp(key, "id") == 0: + if strcmp(key, b"id") == 0: with gil: if len(value) != 20: raise ValueError("Can only set strings of length 20B") j = value self.set_id(j, 20) return - elif strcmp(key, "target") == 0: + elif strcmp(key, b"target") == 0: self.encoded_uptodate = False with gil: if len(value) != 20: @@ -969,36 +988,36 @@ cdef class BMessage: j = value self.set_target(j, 20) return - elif strcmp(key, "info_hash") == 0: + elif strcmp(key, b"info_hash") == 0: with gil: if len(value) != 20: raise ValueError("Can only set strings of length 20B") j = value self.set_info_hash(j, 20) return - elif strcmp(key, "token") == 0: + elif strcmp(key, b"token") == 0: with gil: l = len(value) j = value self.set_token(j, l) return - elif strcmp(key, "nodes") == 0: + elif strcmp(key, b"nodes") == 0: with gil: l = len(value) j = value self.set_nodes(j, l) return - elif strcmp(key, "implied_port") == 0: + elif strcmp(key, b"implied_port") == 0: with gil: i = value self.set_implied_port(i) return - elif strcmp(key, "port") == 0: + elif strcmp(key, b"port") == 0: with gil: i = value self.set_port(i) return - elif strcmp(key, "values") == 0: + elif strcmp(key, b"values") == 0: with gil: v = vlist_to_array(value) i = len(value) @@ -1036,7 +1055,7 @@ cdef class BMessage: if i[0] > max: with gil: raise ValueError("%s > %s : %r" % (i[0], max, data[:max])) - if data[i[0]] != 'l': + if data[i[0]] != b'l': return False i[0]+=1 if not _decode_int(data, i, max, ll): @@ -1045,7 +1064,7 @@ cdef class BMessage: if not _decode_string(data, i, max, j): return False self.set_errmsg(data + j[0], i[0]-j[0]) - if data[i[0]] != 'e': + if data[i[0]] != b'e': return False i[0]+=1 return True @@ -1059,35 +1078,35 @@ cdef class BMessage: with gil: raise ValueError("Fail to decode dict key %d %s" % (i[0], data[:max])) - if (i[0]-j[0]) == 1 and strncmp(data + j[0], "a", i[0]-j[0]) == 0: + if (i[0]-j[0]) == 1 and strncmp(data + j[0], b"a", i[0]-j[0]) == 0: return self._decode_dict(data, i, max) and self.set_a(True) - elif (i[0]-j[0]) == 1 and strncmp(data + j[0], "r", i[0]-j[0]) == 0: + elif (i[0]-j[0]) == 1 and strncmp(data + j[0], b"r", i[0]-j[0]) == 0: return self._decode_dict(data, i, max) and self.set_r(True) - elif (i[0]-j[0]) == 1 and strncmp(data + j[0], "e", i[0]-j[0]) == 0: + elif (i[0]-j[0]) == 1 and strncmp(data + j[0], b"e", i[0]-j[0]) == 0: return self._decode_error(data, i, max) and self.set_e(True) - elif (i[0]-j[0]) == 1 and strncmp(data + j[0], "t", i[0]-j[0]) == 0: + elif (i[0]-j[0]) == 1 and strncmp(data + j[0], b"t", i[0]-j[0]) == 0: return _decode_string(data, i, max, j) and self.set_t(data + j[0], i[0]-j[0]) - elif (i[0]-j[0]) == 1 and strncmp(data + j[0], "v", i[0]-j[0]) == 0: + elif (i[0]-j[0]) == 1 and strncmp(data + j[0], b"v", i[0]-j[0]) == 0: return _decode_string(data, i, max, j) and self.set_v(data + j[0], i[0]-j[0]) - elif (i[0]-j[0]) == 1 and strncmp(data + j[0], "y", i[0]-j[0]) == 0: + elif (i[0]-j[0]) == 1 and strncmp(data + j[0], b"y", i[0]-j[0]) == 0: return _decode_string(data, i, max, j) and self.set_y(data + j[0], i[0]-j[0]) - elif (i[0]-j[0]) == 1 and strncmp(data + j[0], "q", i[0]-j[0]) == 0: + elif (i[0]-j[0]) == 1 and strncmp(data + j[0], b"q", i[0]-j[0]) == 0: return _decode_string(data, i, max, j) and self.set_q(data + j[0], i[0]-j[0]) - elif (i[0]-j[0]) == 2 and strncmp(data + j[0], "id", i[0]-j[0]) == 0: + elif (i[0]-j[0]) == 2 and strncmp(data + j[0], b"id", i[0]-j[0]) == 0: return _decode_string(data, i, max, j) and self.set_id(data + j[0], i[0]-j[0]) - elif (i[0]-j[0]) == 6 and strncmp(data + j[0], "target", i[0]-j[0]) == 0: + elif (i[0]-j[0]) == 6 and strncmp(data + j[0], b"target", i[0]-j[0]) == 0: return _decode_string(data, i, max, j) and self.set_target(data + j[0], i[0]-j[0]) - elif (i[0]-j[0]) == 9 and strncmp(data + j[0], "info_hash", i[0]-j[0]) == 0: + elif (i[0]-j[0]) == 9 and strncmp(data + j[0], b"info_hash", i[0]-j[0]) == 0: return _decode_string(data, i, max, j) and self.set_info_hash(data + j[0], i[0]-j[0]) - elif (i[0]-j[0]) == 12 and strncmp(data + j[0], "implied_port", i[0]-j[0]) == 0: + elif (i[0]-j[0]) == 12 and strncmp(data + j[0], b"implied_port", i[0]-j[0]) == 0: return _decode_int(data, i, max, ll) and self.set_implied_port(ll[0]) - elif (i[0]-j[0]) == 4 and strncmp(data + j[0], "port", i[0]-j[0]) == 0: + elif (i[0]-j[0]) == 4 and strncmp(data + j[0], b"port", i[0]-j[0]) == 0: return _decode_int(data, i, max, ll) and self.set_port(ll[0]) - elif (i[0]-j[0]) == 5 and strncmp(data + j[0], "token", i[0]-j[0]) == 0: + elif (i[0]-j[0]) == 5 and strncmp(data + j[0], b"token", i[0]-j[0]) == 0: return _decode_string(data, i, max, j) and self.set_token(data + j[0], i[0]-j[0]) - elif (i[0]-j[0]) == 5 and strncmp(data + j[0], "nodes", i[0]-j[0]) == 0: + elif (i[0]-j[0]) == 5 and strncmp(data + j[0], b"nodes", i[0]-j[0]) == 0: return _decode_string(data, i, max, j) and self.set_nodes(data + j[0], i[0]-j[0]) - elif (i[0]-j[0]) == 6 and strncmp(data + j[0], "values", i[0]-j[0]) == 0: + elif (i[0]-j[0]) == 6 and strncmp(data + j[0], b"values", i[0]-j[0]) == 0: if self._decode_values(data, i, max): return True else: @@ -1120,7 +1139,7 @@ cdef class BMessage: if i[0] >= max: with gil: raise ValueError("%s > %s : %r" % (i[0], max, data[:max])) - if not data[i[0]] == 'l': + if not data[i[0]] == b'l': return False i[0]+=1 while _decode_string(data, i, max, j): @@ -1128,7 +1147,7 @@ cdef class BMessage: with gil: raise ValueError("element of values are expected to be of length 6 and not %s" % (i[0]-j[0])) c+=1 - if i[0] >= max or data[i[0]] != 'e': + if i[0] >= max or data[i[0]] != b'e': with gil: raise ValueError("End of values list not found %s >= %s found %s elements" % (i[0], max, c)) i[0] = k @@ -1144,14 +1163,14 @@ cdef class BMessage: cdef int _decode_dict(self, char* data, int *i, int max) nogil except -1: cdef int k - if data[i[0]] == 'd': + if data[i[0]] == b'd': i[0]+=1 - while data[i[0]] != 'e' and i[0] < max: + while data[i[0]] != b'e' and i[0] < max: k = i[0] if not self._decode_dict_elm(data, i, max): with gil: raise ValueError("fail to decode dict element %d %r" % (k, data[:max])) - if data[i[0]] != 'e': + if data[i[0]] != b'e': with gil: raise ValueError("End of dict not found %s>=%d %r" % (i[0], max, data[:max])) else: diff --git a/btdht/utils.pyx b/btdht/utils.pyx index 3c037b0..fd755ca 100644 --- a/btdht/utils.pyx +++ b/btdht/utils.pyx @@ -13,6 +13,8 @@ import os import sys import netaddr +import binascii +import six from functools import total_ordering from libc.stdlib cimport atoi, malloc, free @@ -54,13 +56,13 @@ cdef char _longid_to_char(char* id) nogil: cdef char* _longid_to_id(char* longid, int size=160) nogil except NULL: cdef int i cdef char* id - if size/8*8 != size: + if size//8*8 != size: with gil: raise ValueError("size must be a multiple of 8") - id = malloc((size / 8) * sizeof(char)) + id = malloc((size // 8) * sizeof(char)) i=0 while i < size: - id[i/8] = _longid_to_char(longid + i) + id[i//8] = _longid_to_char(longid + i) i+=8 return id @@ -87,21 +89,27 @@ def id_to_longid(char* id, int l=20): def nbit(s, n): """Renvois la valeur du nième bit de la chaine s""" - c=str(s)[n/8] - return int(format(ord(c), '08b')[n % 8]) + if six.PY3: + c = s[n//8] + else: + c = ord(s[n//8]) + return int(format(c, '08b')[n % 8]) def nflip(s, n): """Renvois la chaine s dont la valeur du nième bit a été retourné""" bit = [0b10000000, 0b01000000, 0b00100000, 0b00010000, 0b00001000, 0b00000100, 0b00000010, 0b00000001] - return s[:n/8] + chr(ord(s[n/8]) ^ bit[n % 8]) + s[n/8+1:] + if six.PY2: + return s[:n//8] + chr(ord(s[n//8]) ^ bit[n % 8]) + s[n//8+1:] + else: + return s[:n//8] + bytes([s[n//8] ^ bit[n % 8]]) + s[n//8+1:] def nset(s, n , i): bit1 = [0b10000000, 0b01000000, 0b00100000, 0b00010000, 0b00001000, 0b00000100, 0b00000010, 0b00000001] bit0 = [0b01111111, 0b10111111, 0b11011111, 0b11101111, 0b11110111, 0b11111011, 0b11111101, 0b11111110] if i == 1: - return s[:n/8] + chr(ord(s[n/8]) | bit1[n % 8]) + s[n/8+1:] + return s[:n//8] + chr(ord(s[n//8]) | bit1[n % 8]) + s[n//8+1:] elif i == 0: - return s[:n/8] + chr(ord(s[n/8]) & bit0[n % 8]) + s[n/8+1:] + return s[:n//8] + chr(ord(s[n//8]) & bit0[n % 8]) + s[n//8+1:] else: raise ValueError("i doit être 0 ou 1") @@ -150,10 +158,10 @@ class ID(object): return self.value[i] def __str__(self): - return self.value + raise NotImplementedError() def __repr__(self): - return self.value.encode("hex") + return binascii.b2a_hex(self.value).decode() def __eq__(self, other): if isinstance(other, ID): @@ -176,9 +184,15 @@ class ID(object): def __xor__(self, other): if isinstance(other, ID): - return ''.join(chr(ord(a) ^ ord(b)) for a,b in zip(self.value, other.value)) - elif isinstance(other, str): - return ''.join(chr(ord(a) ^ ord(b)) for a,b in zip(self.value, other)) + if six.PY2: + return ''.join(chr(ord(a) ^ ord(b)) for a,b in zip(self.value, other.value)) + else: + return bytes([a ^ b for a,b in zip(self.value, other.value)]) + elif isinstance(other, bytes): + if six.PY2: + return ''.join(chr(ord(a) ^ ord(b)) for a,b in zip(self.value, other)) + else: + return bytes([a ^ b for a,b in zip(self.value, other)]) else: raise TypeError("unsupported operand type(s) for ^: 'ID' and '%s'" % type(other).__name__) diff --git a/requirements-dev.txt b/requirements-dev.txt index 7386339..55276b6 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,5 @@ datrie >= 0.7 netaddr >= 0.7.12 +six >= 1.8 Cython >= 0.21 wheel diff --git a/requirements.txt b/requirements.txt index 6347a2d..50607eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ datrie >= 0.7 netaddr >= 0.7.12 +six >= 1.8 diff --git a/setup.py b/setup.py index 7e430d3..a31af95 100755 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ 'Topic :: Software Development :: Libraries :: Python Modules', 'Topic :: Communications :: File Sharing' ], - install_requires=["datrie >= 0.7", "netaddr >= 0.7.12"], + install_requires=["datrie >= 0.7", "netaddr >= 0.7.12", "six >= 1.8"], url='https://github.com/nitmir/btdht/', download_url="https://github.com/nitmir/btdht/releases/latest", zip_safe=False, From 7e3d87205da63c2e14a198396a7c77b1b37b5a3c Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sun, 20 Nov 2016 15:40:02 +0100 Subject: [PATCH 02/30] Remove trailing space --- btdht/dht.pyx | 32 ++++++++++++++++---------------- btdht/krcp.pxd | 8 ++++---- btdht/krcp.pyx | 20 ++++++++++---------- btdht/utils.pyx | 12 ++++++------ 4 files changed, 36 insertions(+), 36 deletions(-) diff --git a/btdht/dht.pyx b/btdht/dht.pyx index 8b5ba11..596b6f4 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -141,7 +141,7 @@ cdef class DHT_BASE: def save(self, filename=None, max_node=None): - """save the current list of nodes to `filename`. + """save the current list of nodes to `filename`. Args: filename (str, optional): filename where the list of known node is saved. @@ -218,11 +218,11 @@ cdef class DHT_BASE: self.debug(0, "Unable to stop %s threads, giving up:\n%r" % (len(self._threads), self._threads)) self._threads_zombie.extend(self._threads) self._threads = [] - + if self.sock: try:self.sock.close() except: pass - + @property def zombie(self): return bool(self.stoped and [t for t in self._threads if t.is_alive()]) @@ -275,7 +275,7 @@ cdef class DHT_BASE: self.debug(0, "One thread died, stopping dht") self.stop_bg() return False - + def debug(self, lvl, msg): """to print `msg` if `lvl` > `debuglvl` @@ -482,7 +482,7 @@ cdef class DHT_BASE: # get hash k closest node that have not been tried _closest = self.get_closest_nodes(hash) __closest = [node for node in _closest if node not in tried_nodes] - + if __closest: # alpha = 3 from the kademlia paper nodes = __closest[0:3] @@ -583,7 +583,7 @@ cdef class DHT_BASE: return b"".join(n.compact_info() for n in l) else: return list(self.root.get_closest_nodes(id)) - + def bootstarp(self): """boostrap the DHT to some wellknown nodes""" self.debug(0,"Bootstraping") @@ -724,7 +724,7 @@ cdef class DHT_BASE: self.debug(0, "send:%r : (%r, %r)" % (e, data, addr)) raise - + cdef void _set_transaction_id(self, BMessage query, int id_len=6): """Set the transaction id (key t of the dictionnary) on a query""" id = os.urandom(id_len) @@ -993,7 +993,7 @@ cdef class DHT_BASE: else: self.debug(1, "Invalid port number on announce %s, sould be within 1 and 65535" % query["port"]) except KeyError as e: - raise ProtocolError(query.t, b"Message malformed: %s key is missing" % e.message) + raise ProtocolError(query.t, b"Message malformed: %s key is missing" % e.message) def _process_response(self, obj, query): @@ -1151,7 +1151,7 @@ cdef class Node: def __richcmp__(self, Node other, int op): - if op == 2: # == + if op == 2: # == return other.id == self.id elif op == 3: # != return other.id != self.id @@ -1642,13 +1642,13 @@ class RoutingTable(object): if self._threads: self.debug(0, "Unable to stop %s threads, giving up:\n%r" % (len(self._threads), self._threads)) self.zombie = True - self._threads_zombie.extend(self._threads) + self._threads_zombie.extend(self._threads) self._threads = [] @property def zombie(self): return self.stoped and [t for t in self._threads if t.is_alive()] - + def start(self): """start the routing table""" with self.lock: @@ -1760,7 +1760,7 @@ class RoutingTable(object): """ try: self._dhts.remove(dht) except KeyError:pass - try: + try: self.split_ids.remove(dht.myid) if not self.need_merge: self.debug(1, "Programming merge") @@ -1814,7 +1814,7 @@ class RoutingTable(object): del nodes # If questionnable nodes, ping one of them questionable = [node for node in bucket if not node.good and not node.bad] - + for dht in dhts: if not questionable: break @@ -1870,7 +1870,7 @@ class RoutingTable(object): else: others+=1 except (TypeError, AttributeError): - pass + pass return (nodes, goods, bads) def __iter__(self): @@ -1991,7 +1991,7 @@ class RoutingTable(object): nodes_before = self.stats()[0] if nodes_before < 1000: self.debug(1, "Less than 1000 nodes, no merge") - return + return started = time.time() while stack: if self.stoped: @@ -2028,6 +2028,6 @@ class RoutingTable(object): if full_merge: self._heigth = max(len(k) for k in self.trie.keys()) + 1 self.debug(1, "%s nodes merged in %ss" % (nodes_before - self.stats()[0], int(time.time() - started))) - + diff --git a/btdht/krcp.pxd b/btdht/krcp.pxd index c460c1a..aa36234 100644 --- a/btdht/krcp.pxd +++ b/btdht/krcp.pxd @@ -79,7 +79,7 @@ cdef class BMessage: cdef void del_encoded(self) nogil cdef int _encode_values(self, char* data, int* i, int max) nogil - + cdef int _encode_secondary_dict(self, char* data, int* i, int max) nogil cdef int _encode_error(self, char* data, int* i, int max) nogil @@ -89,13 +89,13 @@ cdef class BMessage: cdef int _encode(self) nogil cdef int _encode_len(self) nogil - + cdef int _decode_error(self, char* data, int* i, int max) nogil except -1 - + cdef int _decode_dict_elm(self, char* data, int* i, int max) nogil except -1 cdef int _decode_values(self, char* data, int *i, int max) nogil except -1 - + cdef int _decode_dict(self, char* data, int *i, int max) nogil except -1 cdef int _decode(self, char* data, int *i, int max) nogil except -1 diff --git a/btdht/krcp.pyx b/btdht/krcp.pyx index a821394..54e6cc3 100644 --- a/btdht/krcp.pyx +++ b/btdht/krcp.pyx @@ -114,7 +114,7 @@ cdef int _decode_pass_dict(char* data, int *i, int max) nogil except -1: return False i[0]+=1 return True - + cdef int _decode_string(char* data, int* i, int max, int* j) nogil except -1: cdef int ret if data[i[0]] == b'0' \ @@ -168,7 +168,7 @@ cdef int _decode_int(char* data, int *i, int max, long long *myint) nogil excep raise ValueError("%s != e at %s %r" % (data[j], j, data[:max])) else: return False - + cdef int _encode_int(char* data, int *i, int max, int j) nogil: cdef int l l = int_length(j) @@ -592,7 +592,7 @@ cdef class BMessage: data[i[0]]=b'e' i[0]+=1 return True - + cdef int _encode_secondary_dict(self, char* data, int* i, int max) nogil: if i[0] + 1 > max: printf("encode_secondary:%d\n", 0) @@ -807,13 +807,13 @@ cdef class BMessage: estimated_len+= 8 * self.values_nb + 2 + 8 # l + nb * IPPORT + e #printf("estimated_len: %d\n" , estimated_len) return estimated_len - + def encode(self): if self.encoded_uptodate: return self.encoded[:self.encoded_len] else: with nogil: - self._encode() + self._encode() if self.encoded_uptodate: return self.encoded[:self.encoded_len] else: @@ -1068,7 +1068,7 @@ cdef class BMessage: return False i[0]+=1 return True - + cdef int _decode_dict_elm(self, char* data, int* i, int max) nogil except -1: cdef char* error cdef int j[1] @@ -1077,7 +1077,7 @@ cdef class BMessage: if not _decode_string(data, i, max, j): with gil: raise ValueError("Fail to decode dict key %d %s" % (i[0], data[:max])) - + if (i[0]-j[0]) == 1 and strncmp(data + j[0], b"a", i[0]-j[0]) == 0: return self._decode_dict(data, i, max) and self.set_a(True) elif (i[0]-j[0]) == 1 and strncmp(data + j[0], b"r", i[0]-j[0]) == 0: @@ -1160,7 +1160,7 @@ cdef class BMessage: self.set_values(values, c) i[0]+=1 return True - + cdef int _decode_dict(self, char* data, int *i, int max) nogil except -1: cdef int k if data[i[0]] == b'd': @@ -1176,7 +1176,7 @@ cdef class BMessage: else: i[0]+=1 return True - + cdef int _decode(self, char* data, int *i, int max) nogil except -1: return self._decode_dict(data, i, max) @@ -1203,7 +1203,7 @@ cdef class BMessage: self.has_nodes = False self.has_values = False self.encoded_uptodate = False - + def decode(self, char* data, int datalen): cdef int i = 0 cdef int valid = False diff --git a/btdht/utils.pyx b/btdht/utils.pyx index fd755ca..23ad46e 100644 --- a/btdht/utils.pyx +++ b/btdht/utils.pyx @@ -69,7 +69,7 @@ cdef char* _longid_to_id(char* longid, int size=160) nogil except NULL: cdef char* _id_to_longid(char* id, int size=20) nogil: global BYTE_TO_BIT cdef char* ret = malloc((size * 8) * sizeof(char)) - cdef int i = 0 + cdef int i = 0 while i < size: strncpy(ret + (i*8), BYTE_TO_BIT[id[i]], 8) i+=1 @@ -112,7 +112,7 @@ def nset(s, n , i): return s[:n//8] + chr(ord(s[n//8]) & bit0[n % 8]) + s[n//8+1:] else: raise ValueError("i doit être 0 ou 1") - + class BcodeError(Exception): pass @@ -163,7 +163,7 @@ class ID(object): def __repr__(self): return binascii.b2a_hex(self.value).decode() - def __eq__(self, other): + def __eq__(self, other): if isinstance(other, ID): return self.value == other.value elif isinstance(other, str): @@ -178,7 +178,7 @@ class ID(object): return self.value < other else: raise TypeError("unsupported operand type(s) for <: 'ID' and '%s'" % type(other).__name__) - + def __len__(self): return len(self.value) @@ -209,7 +209,7 @@ def bencode(obj): print("%r" % obj) raise def _bencode(obj): - + if isinstance(obj, int) or isinstance(obj, float): return b"i" + str(obj).encode() + b"e" elif isinstance(obj, bytes): @@ -337,7 +337,7 @@ def _bdecode(s): return _decode(s, len(s)) #cdef _bdecode2(char* s, int* ii): # if ii[0] > 2000 and (ii[0] % 100) == 0: - + def _bdecode2(s, ii=None): if ii is None: ii = [0] From dffdabbd11f526da72367e9fbca2e0e0909f3c7d Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sun, 20 Nov 2016 15:56:35 +0100 Subject: [PATCH 03/30] Then called _add_peer_queried, check that port number is > 0 --- btdht/dht.pyx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/btdht/dht.pyx b/btdht/dht.pyx index 596b6f4..0ac2bbd 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -395,7 +395,11 @@ cdef class DHT_BASE: def _add_peer_queried(self, info_hash, ip, port): """Store a peer after a announce_peer query""" - if ip not in self.ignored_ip and not utils.ip_in_nets(ip, self.ignored_net): + if ( + port > 0 and + ip not in self.ignored_ip and + not utils.ip_in_nets(ip, self.ignored_net) + ): self._got_peers[info_hash][(ip,port)]=time.time() # we only keep at most 1000 peers per hash if len(self._got_peers[info_hash]) > 1000: From 647f3f072b796d90f8d6cd62ea35f01705fd59a3 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sun, 20 Nov 2016 17:40:19 +0100 Subject: [PATCH 04/30] in python3 KeyError dot not have a message attribute, we use args[0] instead --- btdht/dht.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/btdht/dht.pyx b/btdht/dht.pyx index 0ac2bbd..17149bf 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -997,7 +997,7 @@ cdef class DHT_BASE: else: self.debug(1, "Invalid port number on announce %s, sould be within 1 and 65535" % query["port"]) except KeyError as e: - raise ProtocolError(query.t, b"Message malformed: %s key is missing" % e.message) + raise ProtocolError(query.t, b"Message malformed: %s key is missing" % e.args[0]) def _process_response(self, obj, query): @@ -1060,7 +1060,7 @@ cdef class DHT_BASE: self.debug(2, "ERROR:201:%s pour %r" % (msg.errmsg, self.transaction_type.get(msg.t, {}))) return GenericError(msg.t, msg.errmsg), query elif msg.errno == 202: - self.debug(2, "ERROR:202:%s pour %r" % (msg.errmsg, self.transaction_type.get(msg.t, {})[2].encode())) + self.debug(2, "ERROR:202:%s pour %r" % (msg.errmsg, self.transaction_type.get(msg.t, {}))) return ServerError(msg.t, msg.errmsg), query elif msg.errno == 203: t = self.transaction_type.get(msg.t) @@ -1077,7 +1077,7 @@ cdef class DHT_BASE: self.debug(0, "UNKNOWN MSG: %s" % msg) raise ProtocolError(msg.t) except KeyError as e: - raise ProtocolError(msg.t, b"Message malformed: %s key is missing" % e.message) + raise ProtocolError(msg.t, b"Message malformed: %s key is missing" % e.args[0]) except IndexError: raise ProtocolError(msg.t, b"Message malformed") From abf569d15d66136652f3950e0a04bdc8c413e5d2 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sun, 20 Nov 2016 17:40:51 +0100 Subject: [PATCH 05/30] Catch KeyError on dict key deletion --- btdht/dht.pyx | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/btdht/dht.pyx b/btdht/dht.pyx index 17149bf..f8f4bc6 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -802,7 +802,10 @@ cdef class DHT_BASE: except KeyError: pass for key in to_delete: - del self.mytoken[id] + try: + del self.mytoken[id] + except KeyError: + pass # cleaning old peer for announce_peer to_delete = collections.defaultdict(list) @@ -815,7 +818,10 @@ cdef class DHT_BASE: pass for hash in to_delete: for peer in to_delete[hash]: - del self._peers[hash][peer] + try: + del self._peers[hash][peer] + except KeyError: + pass if not self._peers[hash]: del self._peers[hash] @@ -829,7 +835,10 @@ cdef class DHT_BASE: pass for hash in to_delete: for peer in to_delete[hash]: - del self._got_peers[hash][peer] + try: + del self._got_peers[hash][peer] + except KeyError: + pass if not self._got_peers[hash]: del self._got_peers[hash] From f71b1096dd8ff0aeb29eef30487f523a602a0a36 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sun, 20 Nov 2016 17:41:12 +0100 Subject: [PATCH 06/30] Disable pip cache then installing using make install --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 44c0d5b..e0739d1 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ dist: install: dist pip -V - pip install --no-deps --upgrade --force-reinstall --find-links ./dist/btdht-${VERSION}.tar.gz btdht + pip install --no-cache-dir --no-deps --upgrade --force-reinstall --find-links ./dist/btdht-${VERSION}.tar.gz btdht uninstall: pip uninstall btdht || true From 9d6d2163e37ec447770fd5443c0c21969a315424 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sun, 20 Nov 2016 19:58:27 +0100 Subject: [PATCH 07/30] Update .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index ffaa5f4..696173a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ build/* *.c +*~ + +btdht.egg-info From c564929eb5bee269dda5a1441496ce0d2e976aa7 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Wed, 23 Nov 2016 16:31:39 +0100 Subject: [PATCH 08/30] Begin to document using sphinx --- .gitignore | 6 +- Makefile | 15 ++ btdht/dht.pyx | 498 ++++++++++++++++++++++------------- docs/Makefile | 225 ++++++++++++++++ docs/README.rst | 1 + docs/conf.py | 363 +++++++++++++++++++++++++ docs/index.rst | 27 ++ docs/make.bat | 281 ++++++++++++++++++++ docs/package/btdht.dht.rst | 109 ++++++++ docs/package/btdht.krcp.rst | 7 + docs/package/btdht.rst | 19 ++ docs/package/btdht.utils.rst | 7 + setup.py | 75 +++--- 13 files changed, 1412 insertions(+), 221 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/README.rst create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/make.bat create mode 100644 docs/package/btdht.dht.rst create mode 100644 docs/package/btdht.krcp.rst create mode 100644 docs/package/btdht.rst create mode 100644 docs/package/btdht.utils.rst diff --git a/.gitignore b/.gitignore index 696173a..582fa32 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,9 @@ -build/* +build/ +dist/ +test_venv/ +docs/_build/ *.c *~ +*.pyc btdht.egg-info diff --git a/Makefile b/Makefile index e0739d1..e3ec37e 100644 --- a/Makefile +++ b/Makefile @@ -23,3 +23,18 @@ clean: publish_pypi_release: python setup.py sdist upload --sign + +test_venv: test_venv/bin/python + +test_venv/bin/python: + virtualenv test_venv + test_venv/bin/pip install -U --requirement requirements-dev.txt + +test_venv/bin/sphinx-build: test_venv + test_venv/bin/pip install Sphinx sphinx_rtd_theme + +docs: test_venv/bin/sphinx-build + bash -c "source test_venv/bin/activate; cd docs; make html" + +clean_docs: + cd docs; make clean diff --git a/btdht/dht.pyx b/btdht/dht.pyx index f8f4bc6..ad1244d 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -42,51 +42,120 @@ from utils import ID, nbit, nflip, nset from .krcp cimport BMessage from .krcp import BError, ProtocolError, GenericError, ServerError, MethodUnknownError + cdef class DHT_BASE: """ - Attributes: - root (RoutingTable): the dht instance routing table - bind_port (int): udp port to which this dht instance is binded - bind_ip (str): ip addresse to which this dht instance is binded - myid (str): 160bits long (20 Bytes) id of the node running this - instance of the dht. - debuglvl (int): Level of verbosity - master (bool): A boolean value to disting a particular dht instance - threads (list of Thread): list of the threads of the dht instance - zombie (bool): True if dht is stopped but one thread or more remains - alive + The DHT base class + + :param RoutingTable routing_table: An optional routing table, possibly shared between several + dht instances. If not specified, a new routing table is instanciated. + :param int bind_port: And optional udp port to use for the dht instance. If not specified, the + hosting system will choose an available port. + :param str bind_ip: The interface to listen to. The default is ``"0.0.0.0"``. + :param bytes id: An optional 160 bits long (20 Bytes) id. If not specified, a random one is + generated. + :param set ignored_ip: A set of ip address in dotted (``"1.2.3.4"``) notation to ignore. + The default is the empty set. + :param int debuglvl: Level of verbosity, default to ``0``. + :param str prefix: A prefix to use in logged messages. The default is ``""``. + :param int process_queue_size: Size of the queue of messages waiting to be processed by user + defines functions (on_`msg`_(query|response)). see the :meth:`register_message` method. + The default to ``500``. + :param list ignored_net: An list of ip networks in cidr notation (``"1.2.3.4/5"``) to ignore. + The default is the value of the attribute :attr:`ignored_net`. + + Note: + try to use same ``id`` and ``bind_port`` over dht restart to increase + the probability to remain in other nodes routing table + """ cdef char _myid[20] + #: :class:`list` of default ignored ip networks + ignored_net = [ + '0.0.0.0/8', '10.0.0.0/8', '100.64.0.0/10', '127.0.0.0/8', '169.254.0.0/16', + '172.16.0.0/12', '192.0.0.0/24', '192.0.2.0/24', '192.168.0.0/16', '198.18.0.0/15', + '198.51.100.0/24', '203.0.113.0/24', '224.0.0.0/4', '240.0.0.0/4', '255.255.255.255/32' + ] + #: :class:`RoutingTable` the used instance of the routing table + root = None + #: :class:`int` port the dht is binded to + bind_port = None + #: :class:`str` interface the dht is binded to + bind_ip = "0.0.0.0" + #: :class:`utils.ID` the dht instance id, 160bits long (20 Bytes) + myid = None + #: :class:`int` the dht instance verbosity level + debuglvl = 0 + #: :class:`list` of the :class:`Thread` of the dht instance + threads = [] + #: Map beetween transaction id and messages type (to be able to match responses) + transaction_type = {} + #: Token send with get_peers response. Map between ip addresses and a list of random token. + #: A new token by ip is genereted at most every 5 min, a single token is valid 10 min. + #: On reception of a announce_peer query from ip, the query is only accepted if we have a + #: valid token (generated less than 10min ago). + token = collections.defaultdict(list) + #: Tokens received on get_peers response. Map between ip addresses and received token from ip. + #: Needed to send announce_peer to that particular ip. + mytoken = {} + #: The current dht :class:`socket.Socket` + sock = None + #: the state (stoped ?) of the dht + stoped = True + #: last time we received any message + last_msg = 0 + #: last time we receive a response to one of our messages + last_msg_rep = 0 + + + + #: Map torrent hash -> peer ip and port -> received time. hash, ip and port are from + #: announce_peer query messages. time is the time of the received message. We only keep the + #: 100 most recent (ip, port). A (ip, port) couple is kept max 30min + _peers=collections.defaultdict(collections.OrderedDict) + #: Map torrent hash -> peer ip and port -> received time. hash, ip and port are from get_peers + #: response messages. time is the time of the received message. We keep the 1000 most recent + #: (ip, port). A (ip, port) couple is kept max 15min + _got_peers=collections.defaultdict(collections.OrderedDict) + #: internal heap structure used to find the K closed nodes in the DHT from one id + _get_peer_loop_list = [] + #: Map hash -> time. Pseudo lock structure to ensure we only run background process for + #: :meth:`get_peers` only once by hash + _get_peer_loop_lock = {} + #: same as previous but for :meth:`announce_peer` + _get_closest_loop_lock = {} + #: A queue of DHT messages to send to user defined function (on_`msg`_(query|response)). + #: See the :meth:`register_message` method. + _to_process = None + #: A set of messages name (e.g. ``b"find_node"``, ``b"ping"``, ``b"get_peers"``, + #: ``b"announce_peer"``) for which we call user defined functions. + #: See the :meth:`register_message` method. + _to_process_registered = set() + #: internal list of supposed alive threads + _threads = [] + #: internal list of supposed zombie (asked to stop but still running) threads + _threads_zombie = [] + #: last debug message, use to prevent duplicate messages over 5 seconds + _last_debug = "" + #: time of the lat debug message, use to prevent duplicate messages over 5 seconds + _last_debug_time = 0 + #: number of received messages since the last time :meth:`socket_stats` was called + _socket_in = 0 + #: number of sended messages since the last time :meth:`socket_stats` was called + _socket_out = 0 + #: last time :meth:`socket_stats` was called + _last_socket_stats = 0 + #: last time the long background cleaning was run + _long_clean = 0 + #: heigth of the routing table (a binary tree) during the last run of :meth:`_routine` + _root_heigth = 0 + + def __init__(self, routing_table=None, bind_port=None, bind_ip="0.0.0.0", - id=None, ignored_ip=[], debuglvl=0, prefix="", master=False, process_queue_size=500, + id=None, ignored_ip=[], debuglvl=0, prefix="", process_queue_size=500, ignored_net=None ): - """ - Note: - try to use same `id` and `bind_port` over dht restart to increase - the probability to remain in other nodes buckets - - Args: - routing_table (RoutingTable, optional): A routing table possibly - shared between several dht instance. By default a new one is - instanciated. - bind_port (int, optional): udp port to which bind this dht instance - default is to let the system choose an available port. - bind_ip (str, optional): default to "0.0.0.0". - id (str, optional): 160bits long (20 Bytes) id of the node running - this instance of the dht. Default is to choose a random id - ignored_ip (list of str, optional): a list of ip to ignore message from - debuglvl (int, optional): Level of verbosity, default to 0 - master (bool, optional): A boolean value to disting a particular dht - instance among several other then subclassing. Unused. default to False - process_queue_size(int, optional): Size of the queue of messages waiting - to be processed by user function (on_`msg`_(query|response)). see - the `register_message` method. default to 500. - ignored_net (list of str, optional): a list of ip network in CIDR notation - to ignore. By default, the list contains all private ip networks. - """ - # checking the provided id or picking a random one if id is not None: if len(id) != 20: @@ -98,56 +167,27 @@ cdef class DHT_BASE: # initialising the routing table self.root = RoutingTable() if routing_table is None else routing_table - # Map beetween transaction id and messages type (to be able to match responses) - self.transaction_type={} - # Token send on get_peers query reception - self.token=collections.defaultdict(list) - # Token received on get_peers response reception - self.mytoken={} - # Map between torrent hash on list of peers - self._peers=collections.defaultdict(collections.OrderedDict) - self._got_peers=collections.defaultdict(collections.OrderedDict) - self._get_peer_loop_list = [] - self._get_peer_loop_lock = {} - self._get_closest_loop_lock = {} self._to_process = Queue.Queue(maxsize=process_queue_size) - self._to_process_registered = set() self.bind_port = bind_port self.bind_ip = bind_ip - self.sock = None - - if ignored_net is None: - ignored_net = [ - '10.0.0.0/8', '172.16.0.0/12','198.18.0.0/15', - '169.254.0.0/16', '192.168.0.0/16', '224.0.0.0/4', '100.64.0.0/10', - '0.0.0.0/8','127.0.0.0/8','192.0.2.0/24','198.51.100.0/24','203.0.113.0/24', - '192.0.0.0/29', '240.0.0.0/4', '255.255.255.255/32', - ] self.ignored_ip = ignored_ip - self.ignored_net = [netaddr.IPNetwork(net) for net in ignored_net] + if ignored_net is not None: + self.ignored_net = [netaddr.IPNetwork(net) for net in ignored_net] + else: + self.ignored_net = [netaddr.IPNetwork(net) for net in self.ignored_net] self.debuglvl = debuglvl self.prefix = prefix - self._threads=[] - self.threads = [] - - self.master = master - self.stoped = True - self._threads_zombie = [] - self._last_debug = "" - self._last_debug_time = 0 - def save(self, filename=None, max_node=None): - """save the current list of nodes to `filename`. + """save the current list of nodes to ``filename``. - Args: - filename (str, optional): filename where the list of known node is saved. - default to dht_`id`.status - max_node (int, optional): maximun number of nodes to save. default is all - the routing table + :param str filename: An optional filename where to save the current list of nodes. + If not provided, the file ``"dht_`myid`.status`` is used. + :param int max_node: An optional integer to limit the number of saved nodes. + If not provided, all of the routing table nodes are saved. """ nodes_nb = 0 if filename is None: @@ -164,13 +204,12 @@ cdef class DHT_BASE: return def load(self, filename=None, max_node=None): - """load a list of nodes from `filename`. + """load a list of nodes from ``filename``. - Args: - filename (str, optional): filename where the list of known node is load from. - default to dht_`id`.status - max_node (int, optional): maximun number of nodes to save. default is all - nodes in the file + :param str filename: An optional filename where to load the list of nodes. + If not provided, the file ``"dht_`myid`.status`` is used. + :param int max_node: An optional integer to limit the number of loaded nodes. + If not provided, all of the file nodes are loaded. """ nodes_nb = 0 if filename is None: @@ -198,7 +237,15 @@ cdef class DHT_BASE: t.start() def stop(self): - """Stop the dht""" + """ + Stop the dht: + + * Set the attribute :attr:`stoped` to ``True`` and wait for threads to terminate + * Close the dht socket + + :raises FailToStop: if there is still some alive threads after 30 secondes, with the + list of still alive threads as parameter. + """ if self.stoped: self.debug(0, "Already stoped or soping in progress") return @@ -215,7 +262,10 @@ cdef class DHT_BASE: else: break if self._threads: - self.debug(0, "Unable to stop %s threads, giving up:\n%r" % (len(self._threads), self._threads)) + self.debug( + 0, + "Unable to stop %s threads, giving up:\n%r" % (len(self._threads), self._threads) + ) self._threads_zombie.extend(self._threads) self._threads = [] @@ -223,37 +273,58 @@ cdef class DHT_BASE: try:self.sock.close() except: pass + if self._threads_zombie: + raise FailToStop(self._threads_zombie) + @property def zombie(self): + """``True`` if dht is stopped but one thread or more remains alive, ``False`` otherwise""" return bool(self.stoped and [t for t in self._threads if t.is_alive()]) def start(self): - """Start the threads of the dht""" + """ + Start the dht: + * initialize some attributes + * register this instance of the dht in the routing table + (see :meth:`RoutingTable.register_dht`) + * initialize the dht socket (see :meth:init_socket) + * start the routing table if needed + * start 5 threads: + * for receiving messages (see :meth:`_recv_loop`) + * for sending messages (see :meth:`_send_loop`) + * for doing some routines (boostraping, cleaning, see :meth:`_routine`) + * for finding the closest peer from some ids (see :meth:`_get_peers_closest_loop`) + * for processing messages to send to user defined functions + (see :meth:`_process_loop`) + """ if not self.stoped: self.debug(0, "Already started") return if self.zombie: self.debug(0, "Zombie threads, unable de start") return self._threads_zombie - self.root.register_dht(self) + self.root.register_dht(self) - if self.root.stoped: - self.root.start() - self.root_heigth = 0 self.stoped = False - self.root.last_merge = 0 - self.socket_in = 0 - self.socket_out = 0 - self.last_socket_stats = time.time() + self._root_heigth = 0 + self._socket_in = 0 + self._socket_out = 0 + self._last_socket_stats = time.time() self.last_msg = time.time() self.last_msg_rep = time.time() - self.long_clean = time.time() + self._long_clean = time.time() + self.init_socket() + if self.root.stoped: + self.root.start() + self.threads = [] - for f, name in [(self._recv_loop, 'recv'), (self._send_loop, 'send'), (self._routine, 'routine'), - (self._get_peers_closest_loop, 'get_peers_closest'), (self._process_loop, 'process_msg')]: + for f, name in [ + (self._recv_loop, 'recv'), (self._send_loop, 'send'), (self._routine, 'routine'), + (self._get_peers_closest_loop, 'get_peers_closest'), (self._process_loop, 'process_msg') + ]: t = Thread(target=f) t.setName("%s:%s" % (self.prefix, name)) t.daemon = True @@ -264,8 +335,9 @@ cdef class DHT_BASE: def is_alive(self): """Test if all threads of the dht are alive, stop the dht if one of the thread is dead - Returns: - True if all dht threads are alive, False otherwise and stop all threads + :return: ``True`` if all dht threads are alive, ``False`` otherwise and stop all remaining + threads. + :rtype: bool """ if self.threads and reduce(lambda x,y: x and y, [t.is_alive() for t in self.threads]): return True @@ -276,38 +348,41 @@ cdef class DHT_BASE: self.stop_bg() return False - def debug(self, lvl, msg): - """to print `msg` if `lvl` > `debuglvl` + """ + Print ``msg`` prefixed with :attr:`prefix` if ``lvl`` <= :attr:`debuglvl` - Note: - duplicate messages are removed + :param int lvl: The debug level of the message to print + :param str msg: The debug message to print - Args: - lvl (int): minimal level for `debuglvl` to print `msg` - msg (str): message to print + Note: + duplicate messages are removed: """ - if lvl <= self.debuglvl and (self._last_debug != msg or (time.time() - self._last_debug_time)>5): + if ( + lvl <= self.debuglvl and + (self._last_debug != msg or (time.time() - self._last_debug_time) > 5) + ): print(self.prefix + msg) self._last_debug = msg self._last_debug_time = time.time() - def socket_stats(self): - """Statistic on send/received messages + def _socket_stats(self): + """ + Display some statistic on send/received messages + + :return: A tuple (number of received messages, number of sended messages, periode of time) + :rtype: tuple Note: The counter are reset to 0 on each call - - Returns: - The couple (number a received, number of sent) messages """ now = time.time() - in_s = self.socket_in - self.socket_in = 0 - out_s = self.socket_out - self.socket_out = 0 - delta = now - self.last_socket_stats - self.last_socket_stats = now + in_s = self._socket_in + self._socket_in = 0 + out_s = self._socket_out + self._socket_out = 0 + delta = now - self._last_socket_stats + self._last_socket_stats = now return (in_s, out_s, delta) def init_socket(self): @@ -316,6 +391,7 @@ cdef class DHT_BASE: if self.sock: try:self.sock.close() except: pass + # initialize the sending queue self._to_send = Queue.Queue() self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) #self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) @@ -325,18 +401,20 @@ cdef class DHT_BASE: self.sock.bind((self.bind_ip, self.bind_port)) else: self.sock.bind((self.bind_ip, 0)) + # set :attr:`bind_port` to the port choosen by the system self.bind_port = self.sock.getsockname()[1] - def sleep(self, t, fstop=None): - """Sleep for t seconds. If the dht is requested to be stop, run `fstop` and exit + """ + Sleep for t seconds. If the dht is requested to be stop, run ``fstop()`` and exit - Note: - Dont use it in the main thread otherwise it can exit before child threads + :param float t: A time to sleep, in seconds + :param fstop: A callable with no arguments, called before exiting + + Note: + Dont use it in the main thread otherwise it can exit before child threads. + Only use it in child threads - Args: - fstop (callable, optional): A callable object taking no argument called on dht stop - during the sleep """ if t>0: t_int = int(t) @@ -351,16 +429,16 @@ cdef class DHT_BASE: def announce_peer(self, info_hash, port, delay=0, block=True): - """Announce `info_hash` available on `port` to the K closest nodes from - `info_hash` found in the dht - - Args: - info_hash (str): A 160bits (20 Bytes) long identifier to announce - port (int): tcp port on which `info_hash` if avaible on the current node - delay (int, optional): delay in second to wait before starting to look for - the K closest nodes into the dht. default ot 0 - block (bool, optional): wait until the announce in done if True, return immediately - otherwise. default ot True. + """ + Announce that the ``info_hash`` is available on ``port`` to the K closest nodes from + ``info_hash`` found in the whole dht. + + :param bytes info_hash: A 160 bits (20 Bytes) long identifier to announce + :param int port: The tcp port of with ``info_hash`` if available + :param int delay: An optional delay in second to wait before starting to look for the K + closest nodes into the dht. The default is ``0``. + :param bool block: If ``True`` (the default) wait until the announce in done to the K + closest nodes. Otherwise, return immediately. """ def callback(nodes): for node in nodes: @@ -380,13 +458,25 @@ cdef class DHT_BASE: ts = time.time() + delay closest = self.get_closest_nodes(info_hash) typ = "closest" - heapq.heappush(self._get_peer_loop_list, (ts, info_hash, tried_nodes, closest, typ, callback, None)) + heapq.heappush( + self._get_peer_loop_list, + (ts, info_hash, tried_nodes, closest, typ, callback, None) + ) if block: while info_hash in self._get_closest_loop_lock and not self.stoped: self.sleep(0.1) def _add_peer(self, info_hash, ip, port): - """Store a peer after a announce_peer query""" + """ + Store a peer after a announce_peer query + + :param bytes info_hash: A 160 bits (20 Bytes) long identifier the peer is offering + :param str ip: The ip address of the peer in dotted notation (``"1.2.3.4"``) + :param int port: The tcp port of the peer + + Note: + The peer address is store 30 minutes + """ if ip not in self.ignored_ip and not utils.ip_in_nets(ip, self.ignored_net): self._peers[info_hash][(ip,port)]=time.time() # we only keep at most 100 peers per hash @@ -394,7 +484,16 @@ cdef class DHT_BASE: self._peers[info_hash].popitem(False) def _add_peer_queried(self, info_hash, ip, port): - """Store a peer after a announce_peer query""" + """ + Store a peer after a get_peer response + + :param bytes info_hash: A 160 bits (20 Bytes) long identifier the peer is offering + :param str ip: The ip address of the peer in dotted notation (``"1.2.3.4"``) + :param int port: The tcp port of the peer + + Note: + The peer address is store 15 minutes + """ if ( port > 0 and ip not in self.ignored_ip and @@ -406,23 +505,24 @@ cdef class DHT_BASE: self._got_peers[info_hash].popitem(False) def get_peers(self, hash, delay=0, block=True, callback=None, limit=10): - """Return a list of at most 1000 (ip, port) downloading `hash` or pass-it to `callback` - - Note: - if `block` is False, the returned list will be most likely empty on the first call - - Args: - hash (str): A 160bits (20 Bytes) long identifier to look for peers - delay (int, optional): delay in second to wait before starting to look for - the K closest nodes into the dht. default ot 0 - block (bool, optional): wait until the announce in done if True, return immediately - otherwise. default ot True. - callback (callable, optional): A callable accepting a argument of type list of (str, int) - called then peers have been found. - limit (int, optional): max number of peer to look for before returning. default to 10. - - Returns: - a list of (str, int) peers downloading `hash` + """ + Return a list of at most 1000 (ip, port) downloading ``hash`` or pass-it to ``callback`` + + :param bytes hash: A 160bits (20 Bytes) long identifier to look for peers + :param float delay: A delay in second to wait before starting to look for the K closest + nodes into the dht. The default is ``0`` + :param bool block: If ``True`` (the default) block until we get at least one peer, + otherwise, return immediately (with or without peers). + :param callback: An optional callable taking as argument a list of peers (ip, port). + Called once we found most of the peers store in the DHT. + :param int limit: The maximum number of peer to look for before stoping the search. + The default is 10, the max is 1000. + :return: A list of peers (ip, port) with the ip in dotted notation (``"1.2.3.4"``) + :rtype: list + + Note: + if ``block`` is False, the returned list will be most likely empty on the first call + subsequent call will return peers found so far. """ peers = None if hash in self._got_peers and self._got_peers[hash] and len(self._got_peers[hash])>=limit: @@ -537,7 +637,27 @@ cdef class DHT_BASE: self.sleep(tosleep, stop) def _get_peers(self, info_hash, compact=True, errno=0): - """Return peers store locally by remote announce_peer""" + """ + Return peers store locally by remote announce_peer queries + + :param bytes info_hash: A 160 bits (20 Bytes) long identifier for which we want to get + peers + :param bool compact: If ``True`` the peers addresses are returned in compact format + Otherwise, the peers addresses are tuple (ip, port) with ip in dotted notation + (``"1.2.3.4"``) and port an integer. The default is ``True``. + :return: A list of peers addresses + :rtype: list + :raises KeyError: if no peers for ``info_hash`` are store locally + + Note: + If not peer are found for ``info_hash``, the function will retry for 2s before + raising a KeyError exception. + + Contact information in for peers is encoded as a 6-byte string. + Also known as "Compact IP-address/port info" the 4-byte IP address + is in network byte order with the 2 byte port in network byte order + concatenated onto the end. + """ if not info_hash in self._peers and compact: return None elif not info_hash in self._got_peers and not compact: @@ -562,25 +682,26 @@ cdef class DHT_BASE: return self._get_peers(info_hash, compact, errno=errno+1) def get_closest_nodes(self, id, compact=False): - """return the current K closest nodes from `id` - - Note: - Contact information for peers is encoded as a 6-byte string. - Also known as "Compact IP-address/port info" the 4-byte IP address - is in network byte order with the 2 byte port in network byte order - concatenated onto the end. - Contact information for nodes is encoded as a 26-byte string. - Also known as "Compact node info" the 20-byte Node ID in network byte - order has the compact IP-address/port info concatenated to the end. + """ + return the current K closest nodes from ``id`` present in the routing table (K = 8) - Args: - id (str): A 160bits (20 Bytes) long identifier to look for closest nodes - in the routing table - compact (bool, optional): default to False + :param bytes id: A 160bits (20 Bytes) long identifier for which we want the closest nodes + in the routing table. + :param bool compact: If ``True`` the nodes infos are returned in compact format. + Otherwise, intances of :class:`Node` are returned. The default is ``False``. + :return: A list of :class:`Node` if ``compact`` is ``False``, a :class:`bytes` of size + multiple of 26 if ``compact`` is ``True``. + :rtype: :class:`list` if ``compact`` is ``False``, a :class:`bytes` otherwise. - Returns: - A list of Compact node info if `compact` is True, a list of - `Node` instances otherwise. + Note: + Contact information for peers is encoded as a 6-byte string. + Also known as "Compact IP-address/port info" the 4-byte IP address + is in network byte order with the 2 byte port in network byte order + concatenated onto the end. + + Contact information for nodes is encoded as a 26-byte string. + Also known as "Compact node info" the 20-byte Node ID in network byte + order and the compact IP-address/port info concatenated to the end. """ l = list(self.root.get_closest_nodes(id)) if compact: @@ -639,13 +760,20 @@ cdef class DHT_BASE: (_,sockets,_) = select.select([], [self.sock], [], 1) if sockets: self.sock.sendto(msg, addr) - self.socket_out+=1 + self._socket_out+=1 break + except socket.gaierror: + self.debug(0, "send:%r %r %r" % (e, addr, msg)) except socket.error as e: - if e.errno in [90, 13]: # Message too long + # 90: Message too long + # 13: Permission denied + if e.errno in [90, 13]: self.debug(0, "send:%r %r %r" % (e, addr, msg)) - break - if e.errno not in [11, 1]: # 11: Resource temporarily unavailable + # 11: Resource temporarily unavailable, try again + # 1: Operation not permitted + elif e.errno in [11, 1]: + pass + else: self.debug(0, "send:%r %r" % (e, addr) ) raise except Queue.Empty: @@ -698,7 +826,7 @@ cdef class DHT_BASE: # build the response object reponse = obj.response(self) - self.socket_in+=1 + self._socket_in+=1 self.last_msg = time.time() # send it @@ -708,7 +836,7 @@ cdef class DHT_BASE: # process the response self._process_response(obj, obj_opt) - self.socket_in+=1 + self._socket_in+=1 self.last_msg = time.time() self.last_msg_rep = time.time() # on error @@ -785,7 +913,7 @@ cdef class DHT_BASE: self.clean() # Long cleaning - if now - self.long_clean >= 15 * 60: + if now - self._long_clean >= 15 * 60: # cleaning old tokens to_delete = [] for ip in self.token: @@ -844,7 +972,7 @@ cdef class DHT_BASE: self.clean_long() - self.long_clean = now + self._long_clean = now def build_table(self): """Build the routing table by querying find_nodes on his own id""" @@ -867,13 +995,13 @@ cdef class DHT_BASE: self._clean() # Searching its own id while the Routing table is growing - if self.root_heigth != self.root.heigth(): + if self._root_heigth != self.root.heigth(): self.debug(1, "Fetching my own id") if self.build_table(): - self.root_heigth += 1 + self._root_heigth += 1 # displaying some stats - (in_s, out_s, delta) = self.socket_stats() + (in_s, out_s, delta) = self._socket_stats() if in_s <= 0 or self.debuglvl > 0: (nodes, goods, bads) = self.root.stats() if goods <= 0: @@ -1100,6 +1228,9 @@ class BucketNotFull(Exception): class NoTokenError(Exception): pass +class FailToStop(Exception): + pass + cdef class Node: """A node of the dht in the routing table @@ -1606,7 +1737,7 @@ class RoutingTable(object): zombie (bool): True if dht is stopped but one thread or more remains alive """ - #__slot__ = ("trie", "_heigth", "split_ids", "info_hash", "last_merge", "lock", "_dhts", "stoped") + #__slot__ = ("trie", "_heigth", "split_ids", "info_hash", "lock", "_dhts", "stoped") def __init__(self, debuglvl=0): """ Args: @@ -1618,7 +1749,6 @@ class RoutingTable(object): self._heigth=1 self.split_ids = set() self.info_hash = set() - #self.last_merge = 0 self.lock = Lock() self._to_split = SplitQueue() self._dhts = set() diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d61fc55 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,225 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " applehelp to make an Apple Help Book" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " epub3 to make an epub3" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " coverage to run coverage check of the documentation (if enabled)" + @echo " dummy to check syntax errors of document sources" + +.PHONY: clean +clean: + rm -rf $(BUILDDIR)/* + +.PHONY: html +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +.PHONY: dirhtml +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +.PHONY: singlehtml +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +.PHONY: pickle +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +.PHONY: json +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +.PHONY: htmlhelp +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +.PHONY: qthelp +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/btdht.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/btdht.qhc" + +.PHONY: applehelp +applehelp: + $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp + @echo + @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." + @echo "N.B. You won't be able to view it unless you put it in" \ + "~/Library/Documentation/Help or install it in your application" \ + "bundle." + +.PHONY: devhelp +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/btdht" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/btdht" + @echo "# devhelp" + +.PHONY: epub +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +.PHONY: epub3 +epub3: + $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 + @echo + @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." + +.PHONY: latex +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +.PHONY: latexpdf +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +.PHONY: latexpdfja +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +.PHONY: text +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +.PHONY: man +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +.PHONY: texinfo +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +.PHONY: info +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +.PHONY: gettext +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +.PHONY: changes +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +.PHONY: linkcheck +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +.PHONY: doctest +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +.PHONY: coverage +coverage: + $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage + @echo "Testing of coverage in the sources finished, look at the " \ + "results in $(BUILDDIR)/coverage/python.txt." + +.PHONY: xml +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +.PHONY: pseudoxml +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." + +.PHONY: dummy +dummy: + $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy + @echo + @echo "Build finished. Dummy builder generates no files." diff --git a/docs/README.rst b/docs/README.rst new file mode 100644 index 0000000..72a3355 --- /dev/null +++ b/docs/README.rst @@ -0,0 +1 @@ +.. include:: ../README.rst diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..59df1c5 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,363 @@ +# -*- coding: utf-8 -*- +# +# btdht documentation build configuration file, created by +# sphinx-quickstart on Mon Nov 21 13:42:01 2016. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +import os +import sys +#import pyximport +#pyximport.install() + +sys.path.append(os.path.abspath('..')) + +from setup import VERSION + +sys.path.pop() + + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.viewcode', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The encoding of source files. +# +# source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'btdht' +copyright = u'2016, Valentin Samir' +author = u'Valentin Samir' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = VERSION +# The full version, including alpha/beta/rc tags. +release = version + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +# +# today = '' +# +# Else, today_fmt is used as the format for a strftime call. +# +# today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +# +# default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +# +# add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +# +# add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +# +# show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +# modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +# keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +# +# html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +# html_theme_path = [] + +# The name for this set of Sphinx documents. +# " v documentation" by default. +# +# html_title = u'btdht v0.2.0' + +# A shorter title for the navigation bar. Default is the same as html_title. +# +# html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +# +# html_logo = None + +# The name of an image file (relative to this directory) to use as a favicon of +# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +# +# html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +# +# html_extra_path = [] + +# If not None, a 'Last updated on:' timestamp is inserted at every page +# bottom, using the given strftime format. +# The empty string is equivalent to '%b %d, %Y'. +# +# html_last_updated_fmt = None + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +# +# html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +# +# html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +# +# html_additional_pages = {} + +# If false, no module index is generated. +# +# html_domain_indices = True + +# If false, no index is generated. +# +# html_use_index = True + +# If true, the index is split into individual pages for each letter. +# +# html_split_index = False + +# If true, links to the reST sources are added to the pages. +# +# html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +# +# html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +# +# html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +# +# html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +# html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' +# +# html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# 'ja' uses this config value. +# 'zh' user can custom change `jieba` dictionary path. +# +# html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +# +# html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'btdhtdoc' + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'btdht.tex', u'btdht Documentation', + u'Valentin Samir', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +# +# latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +# +# latex_use_parts = False + +# If true, show page references after internal links. +# +# latex_show_pagerefs = False + +# If true, show URL addresses after external links. +# +# latex_show_urls = False + +# Documents to append as an appendix to all manuals. +# +# latex_appendices = [] + +# It false, will not define \strong, \code, itleref, \crossref ... but only +# \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added +# packages. +# +# latex_keep_old_macro_names = True + +# If false, no module index is generated. +# +# latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'btdht', u'btdht Documentation', + [author], 1) +] + +# If true, show URL addresses after external links. +# +# man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'btdht', u'btdht Documentation', + author, 'btdht', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +# +# texinfo_appendices = [] + +# If false, no module index is generated. +# +# texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +# +# texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +# +# texinfo_no_detailmenu = False + + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {'https://docs.python.org/': None} + +autodoc_member_order = 'bysource' diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..6d8c0b4 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,27 @@ +.. btdht documentation master file, created by + sphinx-quickstart on Mon Nov 21 13:42:01 2016. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to btdht's documentation! +================================= + +Contents: + +.. toctree:: + :maxdepth: 3 + + README + package/btdht + +.. toctree:: + :maxdepth: 2 + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..d224147 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,281 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. epub3 to make an epub3 + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. xml to make Docutils-native XML files + echo. pseudoxml to make pseudoxml-XML files for display purposes + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + echo. coverage to run coverage check of the documentation if enabled + echo. dummy to check syntax errors of document sources + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + + +REM Check if sphinx-build is available and fallback to Python version if any +%SPHINXBUILD% 1>NUL 2>NUL +if errorlevel 9009 goto sphinx_python +goto sphinx_ok + +:sphinx_python + +set SPHINXBUILD=python -m sphinx.__init__ +%SPHINXBUILD% 2> nul +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +:sphinx_ok + + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\btdht.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\btdht.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "epub3" ( + %SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3 + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub3 file is in %BUILDDIR%/epub3. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdf" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf + cd %~dp0 + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdfja" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf-ja + cd %~dp0 + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +if "%1" == "coverage" ( + %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage + if errorlevel 1 exit /b 1 + echo. + echo.Testing of coverage in the sources finished, look at the ^ +results in %BUILDDIR%/coverage/python.txt. + goto end +) + +if "%1" == "xml" ( + %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The XML files are in %BUILDDIR%/xml. + goto end +) + +if "%1" == "pseudoxml" ( + %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. + goto end +) + +if "%1" == "dummy" ( + %SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. Dummy builder generates no files. + goto end +) + +:end diff --git a/docs/package/btdht.dht.rst b/docs/package/btdht.dht.rst new file mode 100644 index 0000000..be09a32 --- /dev/null +++ b/docs/package/btdht.dht.rst @@ -0,0 +1,109 @@ +btdht.dht module +================ + +.. automodule:: btdht.dht + :show-inheritance: + +.. autoclass:: BucketFull + :show-inheritance: + :members: + :undoc-members: +.. autoclass:: BucketNotFull + :show-inheritance: + :members: + :undoc-members: +.. autoclass:: NoTokenError + :show-inheritance: + :members: + :undoc-members: +.. autoclass:: NotFound + :show-inheritance: + :members: + :undoc-members: +.. autoclass:: FailToStop + :show-inheritance: + :members: + :undoc-members: + + +.. autoclass:: DHT_BASE + :show-inheritance: + :members: + + .. autoattribute:: ignored_net + + :class:`list` of default ignored ip networks + + .. autoattribute:: root + + :class:`RoutingTable` the used instance of the routing table + + .. autoattribute:: bind_port + + :class:`int` port the dht is binded to + + .. autoattribute:: bind_ip + + :class:`str` interface the dht is binded to + + .. autoattribute:: myid + + :class:`utils.ID` the dht instance id, 160bits long (20 Bytes) + + .. autoattribute:: debuglvl + + :class:`int` the dht instance verbosity level + + .. autoattribute:: threads + + :class:`list` of the :class:`Thread` of the dht instance + + .. autoattribute:: transaction_type + + Map beetween transaction id and messages type (to be able to match responses) + + .. autoattribute:: token + + Token send with get_peers response. Map between ip addresses and a list of random token. + A new token by ip is genereted at most every 5 min, a single token is valid 10 min. + On reception of a announce_peer query from ip, the query is only accepted if we have a + valid token (generated less than 10min ago). + + .. autoattribute:: mytoken + + Tokens received on get_peers response. Map between ip addresses and received token from ip. + Needed to send announce_peer to that particular ip. + + .. autoattribute:: sock + + The current dht :class:`socket.Socket` + + .. autoattribute:: stoped + + the state (stoped ?) of the dht + + +.. autoclass:: DHT + :show-inheritance: + :undoc-members: + +.. autoclass:: Node + :show-inheritance: + :members: + :undoc-members: + +.. autoclass:: Bucket + :show-inheritance: + :members: + :undoc-members: + +.. autoclass:: SplitQueue + :show-inheritance: + :members: + :undoc-members: + +.. autoclass:: RoutingTable + :show-inheritance: + :members: + :undoc-members: + diff --git a/docs/package/btdht.krcp.rst b/docs/package/btdht.krcp.rst new file mode 100644 index 0000000..3bc6e5b --- /dev/null +++ b/docs/package/btdht.krcp.rst @@ -0,0 +1,7 @@ +btdht.krcp module +================= + +.. automodule:: btdht.krcp + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/package/btdht.rst b/docs/package/btdht.rst new file mode 100644 index 0000000..64eaea9 --- /dev/null +++ b/docs/package/btdht.rst @@ -0,0 +1,19 @@ +btdht package +============= + +Submodules +---------- + +.. toctree:: + + btdht.dht + btdht.utils + btdht.krcp + +Module contents +--------------- + +.. automodule:: btdht + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/package/btdht.utils.rst b/docs/package/btdht.utils.rst new file mode 100644 index 0000000..1a0a550 --- /dev/null +++ b/docs/package/btdht.utils.rst @@ -0,0 +1,7 @@ +btdht.utils module +================== + +.. automodule:: btdht.utils + :members: + :undoc-members: + :show-inheritance: diff --git a/setup.py b/setup.py index a31af95..1630604 100755 --- a/setup.py +++ b/setup.py @@ -9,41 +9,44 @@ except ImportError: has_cython = False -c_extensions = [ - Extension("btdht.dht", ["btdht/dht.c"]), - Extension("btdht.krcp", ["btdht/krcp.c"]), - Extension("btdht.utils", ["btdht/utils.c"]), -] +VERSION = "0.2.0" -with open(os.path.join(os.path.dirname(__file__), 'README.rst')) as readme: - README = readme.read() +if __name__ == "__main__": + c_extensions = [ + Extension("btdht.dht", ["btdht/dht.c"]), + Extension("btdht.krcp", ["btdht/krcp.c"]), + Extension("btdht.utils", ["btdht/utils.c"]), + ] -setup( - name="btdht", - version="0.2.0", - packages = ['btdht'], - ext_modules = cythonize("btdht/*.pyx") if has_cython else c_extensions, - include_package_data=True, - license='GPLv3', - description="efficent full implementation of the bittorent mainline dht", - long_description=README, - author='Valentin Samir', - author_email='valentin.samir@crans.org', - classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)', - 'Programming Language :: C', - 'Programming Language :: Cython', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Topic :: Software Development :: Libraries :: Python Modules', - 'Topic :: Communications :: File Sharing' - ], - install_requires=["datrie >= 0.7", "netaddr >= 0.7.12", "six >= 1.8"], - url='https://github.com/nitmir/btdht/', - download_url="https://github.com/nitmir/btdht/releases/latest", - zip_safe=False, -) + with open(os.path.join(os.path.dirname(__file__), 'README.rst')) as readme: + README = readme.read() + + setup( + name="btdht", + version=VERSION, + packages = ['btdht'], + ext_modules = cythonize("btdht/*.pyx") if has_cython else c_extensions, + include_package_data=True, + license='GPLv3', + description="efficent full implementation of the bittorent mainline dht", + long_description=README, + author='Valentin Samir', + author_email='valentin.samir@crans.org', + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)', + 'Programming Language :: C', + 'Programming Language :: Cython', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: Communications :: File Sharing' + ], + install_requires=["datrie >= 0.7", "netaddr >= 0.7.12", "six >= 1.8"], + url='https://github.com/nitmir/btdht/', + download_url="https://github.com/nitmir/btdht/releases/latest", + zip_safe=False, + ) From e46f104adbb24e3b44c0f757a8f27341bdd02c86 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Fri, 25 Nov 2016 23:04:44 +0100 Subject: [PATCH 09/30] Start using "light" threads --- btdht/dht.pyx | 503 +++++++++++++++++++++++++++++------------------- btdht/krcp.pyx | 2 +- btdht/utils.pyx | 118 ++++++++++++ 3 files changed, 426 insertions(+), 197 deletions(-) diff --git a/btdht/dht.pyx b/btdht/dht.pyx index ad1244d..cc131bb 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -18,10 +18,6 @@ import IN import sys import time import six -try: - import Queue -except ImportError: - import queue as Queue import heapq import traceback import struct @@ -30,6 +26,10 @@ import select import collections import netaddr import binascii +try: + import Queue +except ImportError: + import queue as Queue from functools import total_ordering, reduce from threading import Thread, Lock from random import shuffle @@ -37,7 +37,7 @@ from random import shuffle import datrie import utils -from utils import ID, nbit, nflip, nset +from utils import ID, nbit, nflip, nset, SplitQueue, PollableQueue from .krcp cimport BMessage from .krcp import BError, ProtocolError, GenericError, ServerError, MethodUnknownError @@ -77,6 +77,10 @@ cdef class DHT_BASE: '172.16.0.0/12', '192.0.0.0/24', '192.0.2.0/24', '192.168.0.0/16', '198.18.0.0/15', '198.51.100.0/24', '203.0.113.0/24', '224.0.0.0/4', '240.0.0.0/4', '255.255.255.255/32' ] + #: :class:`str` prefixing all debug message + prefix = "" + #: :class:`set` of ignored ip in dotted notation + ignored_ip = [] #: :class:`RoutingTable` the used instance of the routing table root = None #: :class:`int` port the dht is binded to @@ -101,12 +105,17 @@ cdef class DHT_BASE: mytoken = {} #: The current dht :class:`socket.Socket` sock = None + #: A :class:`PollableQueue` of messages (data, (ip, port)) to send + to_send = PollableQueue() #: the state (stoped ?) of the dht stoped = True #: last time we received any message last_msg = 0 #: last time we receive a response to one of our messages last_msg_rep = 0 + #: A list of looping iterator to schedule. Calling :meth:`schedule` will do a scheduling for + #: 1 DHT instance + to_schedule = [] @@ -167,7 +176,7 @@ cdef class DHT_BASE: # initialising the routing table self.root = RoutingTable() if routing_table is None else routing_table - self._to_process = Queue.Queue(maxsize=process_queue_size) + self._to_process = PollableQueue(maxsize=process_queue_size) self.bind_port = bind_port self.bind_ip = bind_ip @@ -180,6 +189,37 @@ cdef class DHT_BASE: self.debuglvl = debuglvl self.prefix = prefix + self.threads = [] + self.transaction_type = {} + self.token = collections.defaultdict(list) + self.mytoken = {} + self.stoped = True + self.last_msg = 0 + self.last_msg_rep = 0 + + self._peers=collections.defaultdict(collections.OrderedDict) + self._got_peers=collections.defaultdict(collections.OrderedDict) + self._get_peer_loop_list = [] + self._get_peer_loop_lock = {} + self._get_closest_loop_lock = {} + self._to_process_registered = set() + self._threads = [] + self._threads_zombie = [] + self._last_debug = "" + self._last_debug_time = 0 + self._socket_in = 0 + self._socket_out = 0 + self._last_socket_stats = 0 + self._long_clean = 0 + self._root_heigth = 0 + + self.to_schedule = [ + ("%sroutine" % self.prefix, self._routine), + ("%sget_peers_closest_loop" % self.prefix, self._get_peers_closest_loop), + ("%sprocess_loop" % self.prefix, self._process_loop) + ] + + self.root.register_dht(self) def save(self, filename=None, max_node=None): """save the current list of nodes to ``filename``. @@ -281,21 +321,23 @@ cdef class DHT_BASE: """``True`` if dht is stopped but one thread or more remains alive, ``False`` otherwise""" return bool(self.stoped and [t for t in self._threads if t.is_alive()]) - def start(self): + def start(self, start_routing_table=True): """ Start the dht: - * initialize some attributes - * register this instance of the dht in the routing table - (see :meth:`RoutingTable.register_dht`) - * initialize the dht socket (see :meth:init_socket) - * start the routing table if needed - * start 5 threads: - * for receiving messages (see :meth:`_recv_loop`) - * for sending messages (see :meth:`_send_loop`) - * for doing some routines (boostraping, cleaning, see :meth:`_routine`) - * for finding the closest peer from some ids (see :meth:`_get_peers_closest_loop`) - * for processing messages to send to user defined functions - (see :meth:`_process_loop`) + * initialize some attributes + * register this instance of the dht in the routing table + (see :meth:`RoutingTable.register_dht`) + * initialize the dht socket (see :meth:init_socket) + * start the routing table if needed and ``start_routing_table` is ``True`` + + :param bool start_routing_table: If ``True`` (the default) also start the routing table + if needed + + Notes: + The routing table needs to be started in last after all possible DHT using it. + As per default only one DHT use the routing table, the default is to start it + automatically immediatly after the DHT is started. ``start_routing_table`` allow + to diable this automatic start. """ if not self.stoped: self.debug(0, "Already started") @@ -304,8 +346,6 @@ cdef class DHT_BASE: self.debug(0, "Zombie threads, unable de start") return self._threads_zombie - self.root.register_dht(self) - self.stoped = False self._root_heigth = 0 self._socket_in = 0 @@ -317,21 +357,9 @@ cdef class DHT_BASE: self.init_socket() - if self.root.stoped: + if start_routing_table and self.root.stoped: self.root.start() - self.threads = [] - for f, name in [ - (self._recv_loop, 'recv'), (self._send_loop, 'send'), (self._routine, 'routine'), - (self._get_peers_closest_loop, 'get_peers_closest'), (self._process_loop, 'process_msg') - ]: - t = Thread(target=f) - t.setName("%s:%s" % (self.prefix, name)) - t.daemon = True - t.start() - self._threads.append(t) - self.threads.append(t) - def is_alive(self): """Test if all threads of the dht are alive, stop the dht if one of the thread is dead @@ -392,7 +420,7 @@ cdef class DHT_BASE: try:self.sock.close() except: pass # initialize the sending queue - self._to_send = Queue.Queue() + self.to_send = PollableQueue() self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) #self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.sock.setsockopt(socket.IPPROTO_IP, IN.IP_MTU_DISCOVER, IN.IP_PMTUDISC_DO) @@ -556,6 +584,7 @@ cdef class DHT_BASE: return peers def _get_peers_closest_loop(self): + yield 0 """Function run by the thread exploring the DHT""" def on_stop(hash, typ): self.root.release_torrent(hash) @@ -634,7 +663,8 @@ cdef class DHT_BASE: break del tried_nodes del closest - self.sleep(tosleep, stop) + yield (time.time() + tosleep) + #self.sleep(tosleep, stop) def _get_peers(self, info_hash, compact=True, errno=0): """ @@ -746,39 +776,6 @@ cdef class DHT_BASE: else: self.debug(2, "obj of type %r" % obj.y) - def _send_loop(self): - """function lauch by the thread sending the udp msg""" - while True: - if self.stoped: - return - try: - (msg, addr) = self._to_send.get(timeout=1) - while True: - if self.stoped: - return - try: - (_,sockets,_) = select.select([], [self.sock], [], 1) - if sockets: - self.sock.sendto(msg, addr) - self._socket_out+=1 - break - except socket.gaierror: - self.debug(0, "send:%r %r %r" % (e, addr, msg)) - except socket.error as e: - # 90: Message too long - # 13: Permission denied - if e.errno in [90, 13]: - self.debug(0, "send:%r %r %r" % (e, addr, msg)) - # 11: Resource temporarily unavailable, try again - # 1: Operation not permitted - elif e.errno in [11, 1]: - pass - else: - self.debug(0, "send:%r %r" % (e, addr) ) - raise - except Queue.Empty: - pass - def sendto(self, msg, addr): """program a msg to be send over the network @@ -786,75 +783,130 @@ cdef class DHT_BASE: msg (str): message to be send to addr (tuple of str, port): address to send to """ - self._to_send.put((msg, addr)) + self.to_send.put((msg, addr)) - def _recv_loop(self): + def _socket_loop(self): """function lauch by the thread receiving the udp messages from the DHT""" while True: if self.stoped: return try: - (sockets,_,_) = select.select([self.sock], [], [], 1) + (sockets_read, sockets_write, _) = select.select( + [self.sock, self.to_send.sock], + [] if self.to_send.empty() else [self.sock], + [], + 0.1 + ) except socket.error as e: self.debug(0, "recv:%r" %e ) raise - if sockets: + for sock in sockets_read: + if sock == self.sock: + self._process_incoming_message() + elif sock == self.to_send.sock and sockets_write: + self._process_outgoing_message() + + + def _process_outgoing_message(self): + """ + Process a new outgoing message. The message is retrieved from the queue :attr:`to_send` + and send to :attr:`sock`. So the method should only be called then there is a message + in the send queue and then :attr:`sock` is ready for a write. + """ + try: + (msg, addr) = self.to_send.get_nowait() + try: + self.sock.sendto(msg, addr) + self._socket_out+=1 + except socket.gaierror as e: + self.debug(0, "send:%r %r %r" % (e, addr, msg)) + except socket.error as e: + # 90: Message too long + # 13: Permission denied + if e.errno in [90, 13]: + self.debug(0, "send:%r %r %r" % (e, addr, msg)) + # 11: Resource temporarily unavailable, try again + # 1: Operation not permitted + elif e.errno in [11, 1]: + pass + else: + self.debug(0, "send:%r %r" % (e, addr) ) + raise + except Queue.Empty: + pass + + def _process_incoming_message(self): + """ + Process a new incoming message. The message is read from :attr:`sock`, so this + method should only be called when :attr:`sock` is ready for a read. + + """ + try: + data, addr = self.sock.recvfrom(4048) + if addr[0] in self.ignored_ip: + return + elif utils.ip_in_nets(addr[0], self.ignored_net): + return + elif addr[1] < 1 or addr[1] > 65535: + self.debug(1, "Port should be whithin 1 and 65535, not %s" % addr[1]) + return + elif len(data) < 20: + return + else: + # Building python object from bencoded data + obj, obj_opt = self._decode(data, addr) + # Update sender node in routing table try: - data, addr = self.sock.recvfrom(4048) - if addr[0] in self.ignored_ip: - continue - if utils.ip_in_nets(addr[0], self.ignored_net): - continue - if addr[1] < 1 or addr[1] > 65535: - self.debug(1, "Port should be whithin 1 and 65535, not %s" % addr[1]) - continue - if len(data) < 20: - continue - # Building python object from bencoded data - obj, obj_opt = self._decode(data, addr) - # Update sender node in routing table + self._update_node(obj) + except TypeError: + print("TypeError: %r in _recv_loop" % obj) + raise + # On query + if obj.y == b"q": + # process the query + self._process_query(obj) + # build the response object + reponse = obj.response(self) + + self._socket_in+=1 + self.last_msg = time.time() + + # send it + self.sendto(reponse.encode(), addr) + # on response + elif obj.y == b"r": + # process the response try: - self._update_node(obj) - except TypeError: - print("TypeError: %r in _recv_loop" % obj) - raise - # On query - if obj.y == b"q": - # process the query - self._process_query(obj) - # build the response object - reponse = obj.response(self) - - self._socket_in+=1 - self.last_msg = time.time() - - # send it - self.sendto(reponse.encode(), addr) - # on response - elif obj.y == b"r": - # process the response self._process_response(obj, obj_opt) + except ValueError as error: + raise ProtocolError(obj.t, error.args[0]) + + self._socket_in+=1 + self.last_msg = time.time() + self.last_msg_rep = time.time() + # on error + elif obj.y == b"e": + # process it + self._process_error(obj, obj_opt) + # if we raised a BError, send it + except (BError,) as error: + if self.debuglvl > 1: + traceback.print_exc() + self.debug(2, "error %r" % error) + self.sendto(error.encode(), addr) + # socket unavailable ? + except socket.error as e: + if e.errno not in [11, 1]: # 11: Resource temporarily unavailable + self.debug(0, "send:%r : (%r, %r)" % (e, data, addr)) + raise + except ValueError as e: + #if self.debuglvl > 0: + # traceback.print_exc() + # self.debug(1, "%s for %r" % (e, addr)) + traceback.print_exc() + #self.debug(-100, e.args[0]) - self._socket_in+=1 - self.last_msg = time.time() - self.last_msg_rep = time.time() - # on error - elif obj.y == b"e": - # process it - self.on_error(obj, obj_opt) - - # if we raised a BError, send it - except (BError,) as error: - if self.debuglvl > 1: - traceback.print_exc() - self.debug(2, "error %r" % error) - self.sendto(error.encode(), addr) - # socket unavailable ? - except socket.error as e: - if e.errno not in [11, 1]: # 11: Resource temporarily unavailable - self.debug(0, "send:%r : (%r, %r)" % (e, data, addr)) - raise cdef void _set_transaction_id(self, BMessage query, int id_len=6): @@ -983,11 +1035,13 @@ cdef class DHT_BASE: def _routine(self): """function lauch by the thread performing some routine (boostraping, building the routing table, cleaning) on the DHT""" + yield 0 next_routine = time.time() + 15 while True: if self.stoped: return - self.sleep(next_routine - time.time()) + #self.sleep(next_routine - time.time()) + yield next_routine now = time.time() next_routine = now + 15 @@ -1136,6 +1190,12 @@ cdef class DHT_BASE: except KeyError as e: raise ProtocolError(query.t, b"Message malformed: %s key is missing" % e.args[0]) + def _process_error(self, obj, query): + if "error" in self._to_process_registered: + try: + self._to_process.put_nowait((query, obj)) + except Queue.Full: + self.debug(0, "Unable to queue msg to be processed, QueueFull") def _process_response(self, obj, query): if query.q in [b"find_node", b"ping", b"get_peers", b"announce_peer"]: @@ -1159,17 +1219,22 @@ cdef class DHT_BASE: def _process_loop(self): """function lauch by the thread processing messages""" + yield 1 + yield self._to_process while True: if self.stoped: return try: - (query, response) = self._to_process.get(timeout=1) + (query, response) = self._to_process.get_nowait() if response is None: getattr(self, 'on_%s_query' % query.q.decode())(query) + elif response.y == b"e": + self.on_error(response, query) else: getattr(self, 'on_%s_response' % query.q.decode())(query, response) except Queue.Empty: pass + yield def _decode(self, s, addr): """decode a message""" @@ -1718,17 +1783,6 @@ class DHT(DHT_BASE): class NotFound(Exception): pass -class SplitQueue(Queue.Queue): - def _init(self, maxsize): - self.queue = collections.OrderedDict() - def _put(self, item): - if not item[0] in self.queue: - self.queue[item[0]] = item[1:-1] + (set(),) - self.queue[item[0]][-1].add(item[-1]) - def _get(self): - (key, value) = self.queue.popitem(False) - return (key, ) + value - class RoutingTable(object): """ Attributs: @@ -1750,7 +1804,6 @@ class RoutingTable(object): self.split_ids = set() self.info_hash = set() self.lock = Lock() - self._to_split = SplitQueue() self._dhts = set() self.stoped = True self.need_merge = False @@ -1760,6 +1813,10 @@ class RoutingTable(object): self._threads_zombie= [] self._last_debug = "" self._last_debug_time = 0 + self.to_schedule = [ + ("RT:merge_loop", self._merge_loop), + ("RT:routine", self._routine), + ] def stop_bg(self): """stop the routing table and return immediately""" @@ -1792,7 +1849,7 @@ class RoutingTable(object): def zombie(self): return self.stoped and [t for t in self._threads if t.is_alive()] - def start(self): + def start(self, **kwargs): """start the routing table""" with self.lock: if not self.stoped: @@ -1803,14 +1860,29 @@ class RoutingTable(object): return self._threads_zombie self.stoped = False + # Le the routing table schedule the DHT iterators + to_schedule = [] + to_schedule.extend(self.to_schedule) + for dht in self._dhts: + if dht.stoped is True: + raise RuntimeError( + "Try to start the routing table before once of its DHT instances" + ) + to_schedule.extend(dht.to_schedule) + self.threads = [] - for f in [self._merge_loop, self._routine, self._split_loop]: - t = Thread(target=f) - t.setName("RT:%s" % f.__func__.__name__) - t.daemon = True - t.start() - self._threads.append(t) - self.threads.append(t) + t = Thread(target=utils.schedule, args=(to_schedule,)) + t.setName("RT:scheduler") + t.daemon = True + t.start() + self._threads.append(t) + self.threads.append(t) + t = Thread(target=self._dhts_send_loop) + t.setName("RT:dhts_send_loop") + t.daemon = True + t.start() + self._threads.append(t) + self.threads.append(t) def is_alive(self): """return True if all routing table threads are alive. Otherwire return False @@ -1850,24 +1922,32 @@ class RoutingTable(object): pass def _merge_loop(self): + yield 0 next_merge = 0 # at most one full merge every 10 minutes next_full_merge = time.time() + 10 * 60 while True: - self.sleep(max(next_merge - time.time(), 1)) + #self.sleep(max(next_merge - time.time(), 1)) + if self.stoped: + return + yield max(next_merge, time.time() + 1) if self._to_merge: stack = [] while self._to_merge: stack.append(self._to_merge.pop()) next_merge = time.time() + 60 self.debug(1, "Merging %s buckets" % (len(stack),)) - self._merge(stack) + # execute merge partially and return regulary hand to the scheduler + for i in self._merge(stack): + yield i if self.need_merge and time.time() > next_full_merge: self.need_merge = False next_merge = time.time() + 60 next_full_merge = time.time() + 10 * 60 - self._merge() + # execute merge partially and return regulary hand to the scheduler + for i in self._merge(): + yield i def register_torrent_longterm(self, id): """Same as register_torrent but garanty that the torrent wont @@ -1891,8 +1971,13 @@ class RoutingTable(object): on start, dht automaticaly register itself to its routing table """ + if dht.stoped is False: + RuntimeError( + "DHT instance must be registered on the routing table before the start " + "of the routing table" + ) self._dhts.add(dht) - self.split_ids.add(dht.myid) + self.split_ids.add(dht.myid.value) def release_dht(self, dht): """release a `dht` instance to the routing table @@ -1901,8 +1986,10 @@ class RoutingTable(object): on stop, dht automatially release itself from the routing table """ - try: self._dhts.remove(dht) - except KeyError:pass + try: + self._dhts.remove(dht) + except KeyError: + pass try: self.split_ids.remove(dht.myid) if not self.need_merge: @@ -1934,14 +2021,19 @@ class RoutingTable(object): self._last_debug_time = time.time() def _routine(self): - last_explore_tree = 0 + yield 0 + last_explore_tree = time.time() while True: #self.clean() # exploring the routing table - self.sleep(60 - (time.time() - last_explore_tree)) + if self.stoped: + return + yield (last_explore_tree + 60) + #self.sleep(60 - (time.time() - last_explore_tree)) dhts = list(self._dhts) shuffle(dhts) now = time.time() + i = 0 for key, bucket in self.trie.items(): if self.stoped: return @@ -1954,6 +2046,7 @@ class RoutingTable(object): nodes = self.get_closest_nodes(id) if nodes and dhts: nodes[0].find_node(dhts[0], id) + i += 1 del nodes # If questionnable nodes, ping one of them questionable = [node for node in bucket if not node.good and not node.bad] @@ -1962,34 +2055,14 @@ class RoutingTable(object): if not questionable: break questionable.pop().ping(dht) + i+=1 del questionable - last_explore_tree = time.time() - - def _split_loop(self): - while True: - if self.stoped: - return - try: - (bucket, dht, callbacks) = self._to_split.get(timeout=1) - self._split(dht, bucket, callbacks) - except Queue.Empty: - pass - - def split(self, dht, bucket, callback=None): - """request for a bucket identified by `id` to be split - - Notes: - the routing table cover the entire 160bits space - - Args: - dht (DHT_BASE): a dht instance - bucket (Bucket): a bucket in the routing table to split - callback (tuple): first element must be callable and further element - arguments to pass to the callable. - """ - self._to_split.put((bucket, dht, callback)) + # give back the main in case of very big routing table to the scheduler + if i > 1000: + yield 0 + last_explore_tree = time.time() def empty(self): """Remove all subtree""" @@ -2084,7 +2157,8 @@ class RoutingTable(object): if b.id_length < 160: for id in self.split_ids | self.info_hash: if b.own(id): - self.split(dht, b, callback=(self.add, (dht, node))) + self.split(dht, b) + self.add(dht, node) return else: print("%r" % b) @@ -2093,17 +2167,17 @@ class RoutingTable(object): """height of the tree of the routing table""" return self._heigth - def _split(self, dht, bucket, callbacks=None): + def split(self, dht, bucket): + """request for a bucket identified by `id` to be split + + Notes: + the routing table cover the entire 160bits space + + Args: + dht (DHT_BASE): a dht instance + bucket (Bucket): a bucket in the routing table to split + """ try: - #try: - # prefix = self.trie.longest_prefix(utils.id_to_longid(str(bucket.id))) - #except KeyError: - # if u"" in self.trie: - # prefix = u"" - # else: - # return - #print prefix - #print utils.id_to_longid(str(bucket.id))[:bucket.id_length] prefix = utils.id_to_longid(bucket.id)[:bucket.id_length] (zero_b, one_b) = self.trie[prefix].split(self, dht) (zero_b, one_b) = self.trie[prefix].split(self, dht) @@ -2115,10 +2189,6 @@ class RoutingTable(object): self.debug(2, "trie changed while splitting") except BucketNotFull as e: self.debug(1, "%r" % e) - if callbacks: - for callback in callbacks: - callback[0](*callback[1]) - def merge(self): """Request a merge to be perform""" @@ -2136,6 +2206,8 @@ class RoutingTable(object): self.debug(1, "Less than 1000 nodes, no merge") return started = time.time() + i = 0 + j = 0 while stack: if self.stoped: return @@ -2147,8 +2219,11 @@ class RoutingTable(object): if utils.id_to_longid(id).startswith(key[:-1]): to_merge = False break + j += 1 + # give back control to the scheduler every 100,000 keys + if j >= 100000: + yield 0 if to_merge: - #with self.lock: try: if key not in self.trie: self.debug(2, "%s gone away while merging" % key) @@ -2168,9 +2243,45 @@ class RoutingTable(object): except KeyError: self.debug(0, "trie changed while merging") + i += 1 + # give back control to the scheduler every 1000 buckets merged + if i >= 1000: + yield 0 + if full_merge: self._heigth = max(len(k) for k in self.trie.keys()) + 1 self.debug(1, "%s nodes merged in %ss" % (nodes_before - self.stats()[0], int(time.time() - started))) - + def _dhts_send_loop(self): + sockets = {} + to_send_sockets = {} + for dht in self._dhts: + sockets[dht.sock] = dht + to_send_sockets[dht.to_send.sock] = dht + read_sockets = [s for s in sockets] + [s for s in to_send_sockets] + def write_sockets(): + return [s for (s, dht) in six.iteritems(sockets) if not dht.to_send.empty()] + while True: + if self.stoped: + return + try: + (sockets_read, sockets_write, _) = select.select( + read_sockets, write_sockets(), [], 0.1 + ) + except socket.error as e: + self.debug(0, "recv:%r" %e ) + raise + sockets_write = set(sockets_write) + for sock in sockets_read: + if sock in sockets: + dht = sockets[sock] + if dht.stoped: + return + dht._process_incoming_message() + else: + dht = to_send_sockets[sock] + if dht.stoped: + return + if dht.sock in sockets_write: + dht._process_outgoing_message() diff --git a/btdht/krcp.pyx b/btdht/krcp.pyx index 54e6cc3..5a1c38b 100644 --- a/btdht/krcp.pyx +++ b/btdht/krcp.pyx @@ -572,7 +572,7 @@ cdef class BMessage: with gil: raise MethodUnknownError(self.t, b"Method %s Unknown" % self.q) else: - printf("not ping %d\n", 0) + printf("no rpc method name %d\n", 0) else: printf("not query %d\n", 1) diff --git a/btdht/utils.pyx b/btdht/utils.pyx index 23ad46e..b8bbf2a 100644 --- a/btdht/utils.pyx +++ b/btdht/utils.pyx @@ -15,6 +15,14 @@ import sys import netaddr import binascii import six +import socket +import collections +import time +import select +try: + import Queue +except ImportError: + import queue as Queue from functools import total_ordering from libc.stdlib cimport atoi, malloc, free @@ -420,3 +428,113 @@ def ip_in_nets(ip, nets): if ip in net: return True return False + + +class PollableQueue(Queue.Queue): + def __init__(self, *args, **kwargs): + Queue.Queue.__init__(self, *args, **kwargs) + # Create a pair of connected sockets + if os.name == 'posix': + self._putsocket, self._getsocket = socket.socketpair() + else: + # Compatibility on non-POSIX systems + server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server.bind(('127.0.0.1', 0)) + server.listen(1) + self._putsocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self._putsocket.connect(server.getsockname()) + self._getsocket, _ = server.accept() + server.close() + self._getsocket.setblocking(0) + self._putsocket.setblocking(0) + self.sock = self._getsocket + + def _put(self, *args, **kwargs): + Queue.Queue._put(self, *args, **kwargs) + self._signal_put() + + def _signal_put(self): + try: + self._putsocket.send(b'x') + except socket.error as error: + if error.errno != 11: # Resource temporarily unavailable + raise + + def _comsume_get(self): + try: + self._getsocket.recv(1) + except socket.error as error: + if error.errno != 11: # Resource temporarily unavailable + raise + + def _get(self, *args, **kwargs): + self._comsume_get() + return Queue.Queue._get(self, *args, **kwargs) + + +class SplitQueue(PollableQueue): + def _init(self, maxsize): + self.queue = collections.OrderedDict() + + def _put(self, item): + if not item[0] in self.queue: + self.queue[item[0]] = item[1:-1] + (set(),) + self._signal_put() + self.queue[item[0]][-1].add(item[-1]) + + def _get(self): + self._comsume_get() + (key, value) = self.queue.popitem(False) + return (key, ) + value + + +def schedule(to_schedule): + """ + Schedule the call of predefined iterator functions. + + :param list to_schedule: A list of callable returning an iterator + + Notes: + Iterators must behave as describe next. The first returned value must be an integer + describing the type of the iterator. 0 mean time bases and all subsequent yield must + return the next timestamp at which the iterator want to be called. 1 mean queue based. + The next call to the iterator must return an instance of :class:`PollableQueue`. All + subsequent yield value are then ignored. The queue based iterator will be called when + something is put on its queue. + """ + time_based = {} + queue_based = {} + timers = {} + names = {} + for i, (name, function) in enumerate(to_schedule): + iterator = function() + names[iterator] = name + typ = iterator.next() + if typ == 0: + time_based[i] = iterator + timers[i] = 0 + elif typ == 1: + queue = iterator.next() + queue_based[queue] = iterator + else: + raise RuntimeError("Unknown iterator type %s" % typ) + next_time = 0 + queue_base_socket_map = dict((q.sock, i) for (q, i) in six.iteritems(queue_based)) + queue_base_sockets = [q.sock for q in queue_based.keys()] + try: + while True: + now = time.time() + wait = max(0, next_time - now) + (sockets, _, _) = select.select(queue_base_sockets, [], [], wait) + now = time.time() + if now >= next_time: + for i, iterator in six.iteritems(time_based): + if now >= timers[i]: + timers[i] = iterator.next() + next_time = min(timers.values()) + for sock in sockets: + iterator = queue_base_socket_map[sock] + iterator.next() + except StopIteration as error: + print("Iterator %s stopped" % names[iterator]) + raise From e70bbf528bc1d442ebe85d3d13e4cd46fbdaff10 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sat, 26 Nov 2016 12:33:34 +0100 Subject: [PATCH 10/30] Only send error message upon bad query. Always put a valid t in error message --- btdht/dht.pyx | 29 +++++----- btdht/krcp.pxd | 2 + btdht/krcp.pyx | 146 +++++++++++++++++++++++++++++-------------------- 3 files changed, 104 insertions(+), 73 deletions(-) diff --git a/btdht/dht.pyx b/btdht/dht.pyx index cc131bb..17529f2 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -40,7 +40,8 @@ import utils from utils import ID, nbit, nflip, nset, SplitQueue, PollableQueue from .krcp cimport BMessage -from .krcp import BError, ProtocolError, GenericError, ServerError, MethodUnknownError +from .krcp import BError, ProtocolError, GenericError, ServerError, MethodUnknownError, MissingT +from .krcp import DecodeError cdef class DHT_BASE: @@ -900,6 +901,12 @@ cdef class DHT_BASE: if e.errno not in [11, 1]: # 11: Resource temporarily unavailable self.debug(0, "send:%r : (%r, %r)" % (e, data, addr)) raise + except MissingT: + pass + except DecodeError: + pass + except TransactionIdUnknown: + pass except ValueError as e: #if self.debuglvl > 0: # traceback.print_exc() @@ -1238,14 +1245,8 @@ cdef class DHT_BASE: def _decode(self, s, addr): """decode a message""" - try: - msg = BMessage(addr=addr, debug=self.debuglvl) - msg.decode(s, len(s)) - except ValueError as e: - if self.debuglvl > 0: - traceback.print_exc() - self.debug(1, "%s for %r" % (e, addr)) - raise ProtocolError(b"") + msg = BMessage(addr=addr, debug=self.debuglvl) + msg.decode(s, len(s)) try: if msg.y == b"q": return msg, None @@ -1255,7 +1256,7 @@ cdef class DHT_BASE: query = self.transaction_type[msg.t][2] return msg, query else: - raise GenericError(msg.t, b"transaction id unknown") + raise TransactionIdUnknown(msg.t) elif msg.y == b"e": query = self.transaction_type.get(msg.t, (None, None, None))[2] if msg.errno == 201: @@ -1276,8 +1277,7 @@ cdef class DHT_BASE: self.debug(3, "ERROR:%s:%s pour %r" % (msg.errno, msg.errmsg, self.transaction_type.get(msg.t, {}))) raise MethodUnknownError(msg.t, b"Error code %s unknown" % msg.errno) else: - self.debug(0, "UNKNOWN MSG: %s" % msg) - raise ProtocolError(msg.t) + raise ValueError("UNKNOWN MSG: %r decoded as %r from %r" % (s, msg, addr)) except KeyError as e: raise ProtocolError(msg.t, b"Message malformed: %s key is missing" % e.args[0]) except IndexError: @@ -1296,6 +1296,9 @@ class NoTokenError(Exception): class FailToStop(Exception): pass +class TransactionIdUnknown(Exception): + pass + cdef class Node: """A node of the dht in the routing table @@ -1453,7 +1456,7 @@ cdef class Node: nodes = [] length = len(infos) if length//26*26 != length: - raise ProtocolError(b"", b"nodes length should be a multiple of 26") + raise ValueError(b"nodes length should be a multiple of 26") i=0 while i < length: if infos[i+20:i+24] != b'\0\0\0\0' and infos[i+24:i+26] != b'\0\0': diff --git a/btdht/krcp.pxd b/btdht/krcp.pxd index aa36234..51add1b 100644 --- a/btdht/krcp.pxd +++ b/btdht/krcp.pxd @@ -45,6 +45,8 @@ cdef class BMessage: cdef unicode addr_addr_3 cdef bytes addr_addr_2 cdef int addr_port + cdef int failed + cdef char* failed_msg cdef int set_r(self, int value) nogil cdef int set_a(self, int value) nogil diff --git a/btdht/krcp.pyx b/btdht/krcp.pyx index 5a1c38b..fbc6128 100644 --- a/btdht/krcp.pyx +++ b/btdht/krcp.pyx @@ -78,17 +78,17 @@ cdef int _decode_pass_list(char* data, int *i, int max) nogil except -1: cdef long long ll[0] if i[0] >= max + 1: with gil: - raise ValueError("%s > %s : %r" % (i[0], max, data[:max])) + raise DecodeError("%s > %s : %r" % (i[0], max, data[:max])) if data[i[0]] != b"l": return False i[0]+=1 while data[i[0]] != b'e' and i[0] < max: if not _decode_string(data, i, max, j) and not _decode_int(data, i, max, ll) and not _decode_pass_list(data, i, max) and not _decode_pass_dict(data, i, max): with gil: - raise ValueError("Unable to parse one of the element of the list %d %r" % (i[0], data[:max])) + raise DecodeError("Unable to parse one of the element of the list %d %r" % (i[0], data[:max])) if i[0] >= max: with gil: - raise ValueError("list_pass: %s > %s : %r" % (i[0], max, data[:max])) + raise DecodeError("list_pass: %s > %s : %r" % (i[0], max, data[:max])) if data[i[0]] != b'e': return False i[0]+=1 @@ -99,17 +99,17 @@ cdef int _decode_pass_dict(char* data, int *i, int max) nogil except -1: cdef long long ll[0] if i[0] >= max + 1: with gil: - raise ValueError("%s > %s : %r" % (i[0], max, data[:max])) + raise DecodeError("%s > %s : %r" % (i[0], max, data[:max])) if data[i[0]] != b"d": return False i[0]+=1 while data[i[0]] != b'e' and i[0] < max: if not _decode_string(data, i, max, j) or (not _decode_string(data, i, max, j) and not _decode_int(data, i, max, ll) and not _decode_pass_list(data, i, max) and not _decode_pass_dict(data, i, max)): with gil: - raise ValueError("Unable to parse one of the element of the dict %d %r" % (i[0], data[:max])) + raise DecodeError("Unable to parse one of the element of the dict %d %r" % (i[0], data[:max])) if i[0] >= max: with gil: - raise ValueError("dict_pass: %s > %s : %r" % (i[0], max, data[:max])) + raise DecodeError("dict_pass: %s > %s : %r" % (i[0], max, data[:max])) if data[i[0]] != b'e': return False i[0]+=1 @@ -137,10 +137,10 @@ cdef int _decode_string(char* data, int* i, int max, int* j) nogil except -1: return True else: with gil: - raise ValueError("%s > %s : %r" % (i[0], max, data[:max])) + raise DecodeError("%s > %s : %r" % (i[0], max, data[:max])) else: with gil: - raise ValueError("%s != : at %s %r" % (data[j[0]], j[0], data[:max])) + raise DecodeError("%s != : at %s %r" % (data[j[0]], j[0], data[:max])) else: return False @@ -162,10 +162,10 @@ cdef int _decode_int(char* data, int *i, int max, long long *myint) nogil excep return True else: with gil: - raise ValueError("%s > %s : %r" % (i[0], max, data[:max])) + raise DecodeError("%s > %s : %r" % (i[0], max, data[:max])) else: with gil: - raise ValueError("%s != e at %s %r" % (data[j], j, data[:max])) + raise DecodeError("%s != e at %s %r" % (data[j], j, data[:max])) else: return False @@ -232,6 +232,10 @@ class MethodUnknownError(BError): def __init__(self, t, msg=b"Method Unknow"): super(MethodUnknownError, self).__init__(t=t, e=[204, msg]) +class MissingT(ValueError): + pass +class DecodeError(ValueError): + pass cdef class BMessage: cdef int set_r(self, int value) nogil: @@ -322,9 +326,6 @@ cdef class BMessage: free(self._q) cdef int set_id(self, char* value, int size) nogil except -1: - if size != 20: - with gil: - raise ValueError("id must be 20B long") self.encoded_uptodate = False if self.has_id: free(self.id) @@ -341,9 +342,6 @@ cdef class BMessage: free(self.id) cdef int set_target(self, char* value, int size) nogil except -1: - if size != 20: - with gil: - raise ValueError("id must be 20B long") self.encoded_uptodate = False if self.has_target: free(self.target) @@ -360,9 +358,6 @@ cdef class BMessage: free(self.target) cdef int set_info_hash(self, char* value, int size) nogil except -1: - if size != 20: - with gil: - raise ValueError("id must be 20B long") self.encoded_uptodate = False if self.has_info_hash: free(self.info_hash) @@ -1054,7 +1049,7 @@ cdef class BMessage: cdef long long ll[1] if i[0] > max: with gil: - raise ValueError("%s > %s : %r" % (i[0], max, data[:max])) + raise DecodeError("%s > %s : %r" % (i[0], max, data[:max])) if data[i[0]] != b'l': return False i[0]+=1 @@ -1076,7 +1071,7 @@ cdef class BMessage: j[0]=0 if not _decode_string(data, i, max, j): with gil: - raise ValueError("Fail to decode dict key %d %s" % (i[0], data[:max])) + raise DecodeError("Fail to decode dict key %d %s" % (i[0], data[:max])) if (i[0]-j[0]) == 1 and strncmp(data + j[0], b"a", i[0]-j[0]) == 0: return self._decode_dict(data, i, max) and self.set_a(True) @@ -1093,11 +1088,29 @@ cdef class BMessage: elif (i[0]-j[0]) == 1 and strncmp(data + j[0], b"q", i[0]-j[0]) == 0: return _decode_string(data, i, max, j) and self.set_q(data + j[0], i[0]-j[0]) elif (i[0]-j[0]) == 2 and strncmp(data + j[0], b"id", i[0]-j[0]) == 0: - return _decode_string(data, i, max, j) and self.set_id(data + j[0], i[0]-j[0]) + if _decode_string(data, i, max, j): + if i[0]-j[0] != 20: + self.failed = True + self.failed_msg = b"id should be of length 20" + return self.set_id(data + j[0], i[0]-j[0]) + else: + return False elif (i[0]-j[0]) == 6 and strncmp(data + j[0], b"target", i[0]-j[0]) == 0: - return _decode_string(data, i, max, j) and self.set_target(data + j[0], i[0]-j[0]) + if _decode_string(data, i, max, j): + if i[0]-j[0] != 20: + self.failed = True + self.failed_msg = b"target should be of length 20" + return self.set_target(data + j[0], i[0]-j[0]) + else: + return False elif (i[0]-j[0]) == 9 and strncmp(data + j[0], b"info_hash", i[0]-j[0]) == 0: - return _decode_string(data, i, max, j) and self.set_info_hash(data + j[0], i[0]-j[0]) + if _decode_string(data, i, max, j): + if i[0]-j[0] != 20: + self.failed = True + self.failed_msg = b"info_hash should be of length 20" + return self.set_info_hash(data + j[0], i[0]-j[0]) + else: + return False elif (i[0]-j[0]) == 12 and strncmp(data + j[0], b"implied_port", i[0]-j[0]) == 0: return _decode_int(data, i, max, ll) and self.set_implied_port(ll[0]) elif (i[0]-j[0]) == 4 and strncmp(data + j[0], b"port", i[0]-j[0]) == 0: @@ -1110,26 +1123,25 @@ cdef class BMessage: if self._decode_values(data, i, max): return True else: - with gil: - raise ProtocolError("", "values items should be a list") - else: - #if self.debug: - # error = malloc((i[0] + 1 - j[0]) * sizeof(char)) - # error[i[0]-j[0]]='\0' - # strncpy(error, data + j[0], i[0] - j[0]) - # printf("error %s\n", error) - # free(error) - if _decode_string(data, i, max, j): - return True - if _decode_int(data, i, max, ll): - return True - if _decode_pass_list(data, i, max): - return True - if _decode_pass_dict(data, i, max): - return True + self.failed = True + self.failed_msg = b"values items should be a list" + #if self.debug: + # error = malloc((i[0] + 1 - j[0]) * sizeof(char)) + # error[i[0]-j[0]]='\0' + # strncpy(error, data + j[0], i[0] - j[0]) + # printf("error %s\n", error) + # free(error) + if _decode_string(data, i, max, j): + return True + if _decode_int(data, i, max, ll): + return True + if _decode_pass_list(data, i, max): + return True + if _decode_pass_dict(data, i, max): + return True with gil: - raise ValueError("Unable to decode element of dict at %d %r" % (j[0], data[:max])) + raise DecodeError("Unable to decode element of dict at %d %r" % (j[0], data[:max])) cdef int _decode_values(self, char* data, int *i, int max) nogil except -1: cdef int j[1] @@ -1138,18 +1150,18 @@ cdef class BMessage: cdef char** values if i[0] >= max: with gil: - raise ValueError("%s > %s : %r" % (i[0], max, data[:max])) + raise DecodeError("%s > %s : %r" % (i[0], max, data[:max])) if not data[i[0]] == b'l': return False i[0]+=1 while _decode_string(data, i, max, j): if (i[0]-j[0]) != 6: - with gil: - raise ValueError("element of values are expected to be of length 6 and not %s" % (i[0]-j[0])) + self.failed = True + self.failed_msg = b"element of values are expected to be of length 6" c+=1 if i[0] >= max or data[i[0]] != b'e': with gil: - raise ValueError("End of values list not found %s >= %s found %s elements" % (i[0], max, c)) + raise DecodeError("End of values list not found %s >= %s found %s elements" % (i[0], max, c)) i[0] = k values = malloc(c * sizeof(char*)) c=0 @@ -1169,10 +1181,10 @@ cdef class BMessage: k = i[0] if not self._decode_dict_elm(data, i, max): with gil: - raise ValueError("fail to decode dict element %d %r" % (k, data[:max])) + raise DecodeError("fail to decode dict element %d %r" % (k, data[:max])) if data[i[0]] != b'e': with gil: - raise ValueError("End of dict not found %s>=%d %r" % (i[0], max, data[:max])) + raise DecodeError("End of dict not found %s>=%d %r" % (i[0], max, data[:max])) else: i[0]+=1 return True @@ -1192,6 +1204,7 @@ cdef class BMessage: self.r = False self.a = False self.e = False + self.failed = False self.has_y = False self.has_t = False self.has_q = False @@ -1210,17 +1223,30 @@ cdef class BMessage: with nogil: if datalen > 0: valid = self._decode(data, &i, datalen) - if valid: - self.encoded_len = self._encode_len() - self.encoded = malloc(self.encoded_len * sizeof(char)) - strncpy(self.encoded, data, self.encoded_len) - self.encoded_uptodate = True - if not valid or not self.has_t or not self.has_y: + if not self.has_t: with gil: - if self.debug: - print("%r" % data) - if self.has_t: - raise ProtocolError(self._t[:self.t_len]) - else: - raise ProtocolError("") + raise MissingT() + if self.failed: + if self.has_y and strncmp(self._y, b"q", 1): + with gil: + raise ProtocolError(self.t, self.failed_msg) + else: + with gil: + raise DecodeError(self.failed_msg) + #if valid: + # self.encoded_len = self._encode_len() + # self.encoded = malloc(self.encoded_len * sizeof(char)) + # strncpy(self.encoded, data, self.encoded_len) + # self.encoded_uptodate = True + # every message MUST have a y field, every SHOULD have t, but we receive some + # error messages without them, so lets accepted them as en empty t + #if valid and not self.has_t and self.has_y and strncmp(self._y, b"e", 1): + # self.set_t("", 0) + if not valid or not self.has_y: + if self.has_y and strncmp(self._y, b"q", 1): + with gil: + raise ProtocolError(self.t) + else: + with gil: + raise DecodeError() return data[i:] From 4b407ecaad514b5504e8d61507ccf28544396e18 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Tue, 29 Nov 2016 18:50:56 +0100 Subject: [PATCH 11/30] A lots of comments, and a custom Scheduler class The scheduler class schedules all of the DHT and RoutingTable thread, weightless threads and io network event. --- MANIFEST.in | 2 +- btdht/dht.pyx | 1092 +++++++++++++++++++++++++++++------------------ btdht/utils.pyx | 454 ++++++++++++++++++-- 3 files changed, 1096 insertions(+), 452 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 70eefff..20b4e07 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ -recursive-include btdht * +recursive-include btdht *.c *.py *.pyx *.pxd include setup.cfg include Makefile include LICENSE diff --git a/btdht/dht.pyx b/btdht/dht.pyx index 17529f2..dc16571 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -32,7 +32,7 @@ except ImportError: import queue as Queue from functools import total_ordering, reduce from threading import Thread, Lock -from random import shuffle +from random import shuffle, randint import datrie @@ -160,12 +160,19 @@ cdef class DHT_BASE: _long_clean = 0 #: heigth of the routing table (a binary tree) during the last run of :meth:`_routine` _root_heigth = 0 + #: a :class:`utils.Scheduler` instance + _scheduler = None def __init__(self, routing_table=None, bind_port=None, bind_ip="0.0.0.0", id=None, ignored_ip=[], debuglvl=0, prefix="", process_queue_size=500, - ignored_net=None + ignored_net=None, scheduler=None ): + if self.__class__ == DHT_BASE: + raise RuntimeError( + "DHT_BASE cannot be directly instantiated, use DHT instead or any subclass that" + " may have be defined" + ) # checking the provided id or picking a random one if id is not None: if len(id) != 20: @@ -175,9 +182,11 @@ cdef class DHT_BASE: id = ID().value self.myid = ID(id) + # initialize the scheduler + self._scheduler = utils.Scheduler() if scheduler is None else scheduler + # initialising the routing table - self.root = RoutingTable() if routing_table is None else routing_table - self._to_process = PollableQueue(maxsize=process_queue_size) + self.root = RoutingTable(scheduler=self._scheduler, prefix=prefix) if routing_table is None else routing_table self.bind_port = bind_port self.bind_ip = bind_ip @@ -190,6 +199,7 @@ cdef class DHT_BASE: self.debuglvl = debuglvl self.prefix = prefix + # initialize public attributes self.threads = [] self.transaction_type = {} self.token = collections.defaultdict(list) @@ -198,6 +208,7 @@ cdef class DHT_BASE: self.last_msg = 0 self.last_msg_rep = 0 + # initialize private attributes self._peers=collections.defaultdict(collections.OrderedDict) self._got_peers=collections.defaultdict(collections.OrderedDict) self._get_peer_loop_list = [] @@ -213,15 +224,14 @@ cdef class DHT_BASE: self._last_socket_stats = 0 self._long_clean = 0 self._root_heigth = 0 + self._to_process = PollableQueue(maxsize=process_queue_size) self.to_schedule = [ - ("%sroutine" % self.prefix, self._routine), - ("%sget_peers_closest_loop" % self.prefix, self._get_peers_closest_loop), - ("%sprocess_loop" % self.prefix, self._process_loop) + ("%sroutine" % self.prefix, self._routine, False), + ("%sget_peers_closest_loop" % self.prefix, self._get_peers_closest_loop, False), + ("%sprocess_loop" % self.prefix, self._process_loop, True) ] - self.root.register_dht(self) - def save(self, filename=None, max_node=None): """save the current list of nodes to ``filename``. @@ -288,10 +298,11 @@ cdef class DHT_BASE: list of still alive threads as parameter. """ if self.stoped: - self.debug(0, "Already stoped or soping in progress") + self.debug(0, "Already stoped or stoping in progress") return - self.stoped = True + self._scheduler.del_dht(self) self.root.release_dht(self) + self.stoped = True self._threads = [t for t in self._threads[:] if t.is_alive()] #self.debug(0, "Trying to terminate thread for 1 minutes") for i in range(0, 30): @@ -322,7 +333,7 @@ cdef class DHT_BASE: """``True`` if dht is stopped but one thread or more remains alive, ``False`` otherwise""" return bool(self.stoped and [t for t in self._threads if t.is_alive()]) - def start(self, start_routing_table=True): + def start(self, start_routing_table=True, start_scheduler=True): """ Start the dht: * initialize some attributes @@ -330,15 +341,11 @@ cdef class DHT_BASE: (see :meth:`RoutingTable.register_dht`) * initialize the dht socket (see :meth:init_socket) * start the routing table if needed and ``start_routing_table` is ``True`` + * start the scheduler if needed and ``start_scheduler`` is ``True`` :param bool start_routing_table: If ``True`` (the default) also start the routing table if needed - - Notes: - The routing table needs to be started in last after all possible DHT using it. - As per default only one DHT use the routing table, the default is to start it - automatically immediatly after the DHT is started. ``start_routing_table`` allow - to diable this automatic start. + :param bool start_scheduler: If ``True``(the default) alsp start the scheduler """ if not self.stoped: self.debug(0, "Already started") @@ -358,8 +365,13 @@ cdef class DHT_BASE: self.init_socket() + self.root.register_dht(self) + self._scheduler.add_dht(self) + if start_routing_table and self.root.stoped: self.root.start() + if start_scheduler and self._scheduler._stoped: + self._scheduler.start() def is_alive(self): """Test if all threads of the dht are alive, stop the dht if one of the thread is dead @@ -368,9 +380,16 @@ cdef class DHT_BASE: threads. :rtype: bool """ - if self.threads and reduce(lambda x,y: x and y, [t.is_alive() for t in self.threads]): + weigthless_threads_satus = [ + self._scheduler.thread_alive(s[0]) for s in self.to_schedule + ] + if ( + self.threads is not None and + all([t.is_alive() for t in self.threads]) + and all(weigthless_threads_satus) + ): return True - elif not self._threads and self.stoped: + elif not self._threads and self.stoped and not any(weigthless_threads_satus): return False else: self.debug(0, "One thread died, stopping dht") @@ -427,7 +446,11 @@ cdef class DHT_BASE: self.sock.setsockopt(socket.IPPROTO_IP, IN.IP_MTU_DISCOVER, IN.IP_PMTUDISC_DO) self.sock.setblocking(0) if self.bind_port: - self.sock.bind((self.bind_ip, self.bind_port)) + try: + self.sock.bind((self.bind_ip, self.bind_port)) + except socket.error: + self.debug(-10, "fail to bind to port %s" % self.bind_port) + raise else: self.sock.bind((self.bind_ip, 0)) # set :attr:`bind_port` to the port choosen by the system @@ -585,8 +608,11 @@ cdef class DHT_BASE: return peers def _get_peers_closest_loop(self): + """ + Weigthless thread dealing we the :attr:`_get_peer_loop_list` heapq. Its execution + is handled by :attr:`_scheduler` of type :class:`utils.Scheduler` + """ yield 0 - """Function run by the thread exploring the DHT""" def on_stop(hash, typ): self.root.release_torrent(hash) if typ == "peers": @@ -665,7 +691,6 @@ cdef class DHT_BASE: del tried_nodes del closest yield (time.time() + tosleep) - #self.sleep(tosleep, stop) def _get_peers(self, info_hash, compact=True, errno=0): """ @@ -740,10 +765,23 @@ cdef class DHT_BASE: else: return list(self.root.get_closest_nodes(id)) - def bootstarp(self): - """boostrap the DHT to some wellknown nodes""" + def bootstarp( + self, + addresses=[ + ("router.utorrent.com", 6881), ("grenade.genua.fr", 6880), ("dht.transmissionbt.com", 6881) + ] + ): + """ + Boostrap the DHT to some wellknown nodes + + :param list addresses: A list of couple (node addresse, node ip). The default the list + of the following nodes + * router.utorrent.com:6881 + * dht.transmissionbt.com:6881 + * grenade.genua.fr:6880 + """ self.debug(0,"Bootstraping") - for addr in [("router.utorrent.com", 6881), ("genua.fr", 6880), ("dht.transmissionbt.com", 6881)]: + for addr in addresses: msg = BMessage() msg.y = b'q' msg.q = b"find_node" @@ -756,7 +794,12 @@ cdef class DHT_BASE: def _update_node(self, obj): - """update a node the in routing table on msg received""" + """ + Update a node the in routing table on msg receival, especially its + :attr:`Node.last_query` :attr:`Node.last_response` and :attr:`Node.failed` attributes + + :param brcp.BMessage obj: A reived message + """ if obj.y == b"q" or obj.y == b"r": id = obj.get(b"id") if id: @@ -778,36 +821,16 @@ cdef class DHT_BASE: self.debug(2, "obj of type %r" % obj.y) def sendto(self, msg, addr): - """program a msg to be send over the network - - Args: - msg (str): message to be send to - addr (tuple of str, port): address to send to """ - self.to_send.put((msg, addr)) - - def _socket_loop(self): - """function lauch by the thread receiving the udp messages from the DHT""" - while True: - if self.stoped: - return - try: - (sockets_read, sockets_write, _) = select.select( - [self.sock, self.to_send.sock], - [] if self.to_send.empty() else [self.sock], - [], - 0.1 - ) - except socket.error as e: - self.debug(0, "recv:%r" %e ) - raise + Program a msg to be send over the network - for sock in sockets_read: - if sock == self.sock: - self._process_incoming_message() - elif sock == self.to_send.sock and sockets_write: - self._process_outgoing_message() + :param bytes msg: The message to send + :param tuple addr: A couple (ip, port) to send the message to. ip is in dotted notation + Notes: + The message is push to the :attr:`to_send` queue. + """ + self.to_send.put((msg, addr)) def _process_outgoing_message(self): """ @@ -917,7 +940,16 @@ cdef class DHT_BASE: cdef void _set_transaction_id(self, BMessage query, int id_len=6): - """Set the transaction id (key t of the dictionnary) on a query""" + """ + Set the transaction id (key t of the dictionnary) on a query + + :param krcp.BMessage query: A query message + :param int id_len: The len of the generated transaction id. The default is 6. + + Notes: + In case of collision with a already generated id, ``_set_transaction_id`` is + called again, incrementing ``id_len``. + """ id = os.urandom(id_len) if id in self.transaction_type: self._set_transaction_id(query, id_len=id_len+1) @@ -925,9 +957,19 @@ cdef class DHT_BASE: query.set_t(id, id_len) def _get_token(self, ip): - """Generate a token for `ip`""" + """ + Return a token for ``ip`` + + :param str ip: A ip address in dotted notation + :return: A random id of lendth 4 + :rtype: bytes + + Notes: + Generate at most 1 new token by ip every 5 min. A token is considered valid + until 10 min after it has been generated. + + """ if ip in self.token and self.token[ip][-1][1] < 300: - #self.token[ip] = (self.token[ip][0], time.time()) return self.token[ip][-1][0] else: id = os.urandom(4) @@ -935,7 +977,13 @@ cdef class DHT_BASE: return id def _get_valid_token(self, ip): - """Return a list of valid tokens for `ip`""" + """ + Return a list of valid tokens for ``ip`` + + :param str ip: A ip address in dotted notation + :return: A list of valid tokens for ``ip`` + :rtype: list + """ if ip in self.token: now = time.time() return [t[0] for t in self.token[ip] if (now - t[1]) < 600] @@ -950,7 +998,25 @@ cdef class DHT_BASE: pass def _clean(self): - """Function cleaning datastructures of the DHT""" + """ + Function cleaning datastructures of the DHT + + The following cleaning is done every 15 seconds + * delete entries from :attr:`transaction_type` (query without response) older than + 30 seconds + * Remove dead threads from :attr:`_threads` + * If no message has been received since more than 2 minutes, stop the DHT + * If no response to our query has been received since more than 5 minutes, + stop the DHT + * call the :meth:`clean` method + + The following cleaning is done every 15 minutes + * delete expired tokens (older than 10 min) from :attr:`token` + * delete received token older than 10 min from :attr:`mytoken` + * delete peers not annonced since more than 30min from :attr:`_peers` + * delete peers from get_peer response older than 15min from :attr:`_got_peers` + * call the :meth:`clean_long` method + """ now = time.time() to_delete = [] @@ -963,10 +1029,13 @@ cdef class DHT_BASE: self._threads = [t for t in self._threads[:] if t.is_alive()] if now - self.last_msg > 2 * 60: - self.debug(0, "No msg since more then 2 minutes") + self.debug(-10, "No msg since more than 2 minutes on udp port %d" % self.bind_port) self.stop() elif now - self.last_msg_rep > 5 * 60: - self.debug(0, "No msg response since more then 5 minutes") + self.debug( + -10, + "No msg response since more than 5 minutes on udp port %d" % self.bind_port + ) self.stop() self.clean() @@ -1034,20 +1103,22 @@ cdef class DHT_BASE: self._long_clean = now def build_table(self): - """Build the routing table by querying find_nodes on his own id""" + """Build the routing table by querying find_nodes on the dht own id :attr:`myid`""" nodes = self.get_closest_nodes(self.myid) for node in nodes: node.find_node(self, self.myid) return bool(nodes) def _routine(self): - """function lauch by the thread performing some routine (boostraping, building the routing table, cleaning) on the DHT""" + """ + Weigthless thread performing some routine (boostraping, building the routing table, + cleaning) on the DHT + """ yield 0 next_routine = time.time() + 15 while True: if self.stoped: return - #self.sleep(next_routine - time.time()) yield next_routine now = time.time() next_routine = now + 15 @@ -1059,7 +1130,7 @@ cdef class DHT_BASE: if self._root_heigth != self.root.heigth(): self.debug(1, "Fetching my own id") if self.build_table(): - self._root_heigth += 1 + self._root_heigth = self.root.heigth() # displaying some stats (in_s, out_s, delta) = self._socket_stats() @@ -1067,17 +1138,40 @@ cdef class DHT_BASE: (nodes, goods, bads) = self.root.stats() if goods <= 0: self.bootstarp() - self.debug(0 if in_s <= 0 and out_s > 0 and goods < 20 else 1, "%d nodes, %d goods, %d bads | in: %s, out: %s en %ss" % (nodes, goods, bads, in_s, out_s, int(delta))) + next_routine = now + 1 + self.debug( + 0 if in_s <= 0 and out_s > 0 and goods < 20 else 1, + "%d nodes, %d goods, %d bads | in: %s, out: %s en %ss" % ( + nodes, goods, bads, in_s, out_s, int(delta) + ) + ) def register_message(self, msg): - """register a dht message to be processed + """ + Register a dht message to be processed by the following user defined functions + * :meth:`on_error` + * :meth:`on_ping_query` + * :meth:`on_ping_response` + * :meth:`on_find_node_query` + * :meth:`on_find_node_response` + * :meth:`on_get_peers_query` + * :meth:`on_get_peers_response` + * :meth:`on_announce_peer_query` + * :meth:`on_announce_peer_response` + * ... + + :param bytes msg: A dht message to register like ``b'error'``, ``b'ping'``, + ``b'find_node'``, ``b'get_peers'`` or ``b'announce_peer'`` - Note: - on query receival, the function on_`msg`_query will be call with the - query as parameter - on response receival, the function on_`msg`_response will be called with - the query and the response as parameters + Note: + * on query reception, the function on_``msg``_query will be call with the + query as parameter + * on response reception, the function on_``msg``_response will be called with + the query and the response as parameters + * on error reception, the function ``on_error`` will be called with the error and + the query as parameter + * The message kind is in the ``q`` key of any dht query message Args: msg (str): a dht message type like ping, find_node, get_peers or announce_peer @@ -1085,76 +1179,127 @@ cdef class DHT_BASE: self._to_process_registered.add(msg) def on_error(self, error, query=None): - """function called then a query has be responded by an error message. Can safely the overloaded + """ + Function called then a query has be responded by an error message. + Can safely the overloaded. - Args: - error (BError): An error instance - query (BMessage, optional): query that was reply by an error + :param krcp.Berror error: An error instance + :param krcp.BMessage query: An optional query raising the error message + + Notes: + For this function to be called on error reception, you need to call + :meth:`register_message` with the parameter ``b'error'`` """ pass def on_ping_response(self, query, response): - """function called on a ping response reception. Can safely the overloaded + """ + Function called on a ping response reception. Can safely the overloaded - Args: - query (BMessage): the sent query object - response (BMessage): the received response object + :param krcp.BMessage query: the sent query object + :param krcp.BMessage response: the received response object + + Notes: + For this function to be called on ping response reception, you need to call + :meth:`register_message` with the parameter ``b'ping'`` """ pass def on_find_node_response(self, query, response): - """function called on a find_node response reception. Can safely the overloaded + """ + Function called on a find_node response reception. Can safely the overloaded - Args: - query (BMessage): the sent query object - response (BMessage): the received response object + :param krcp.BMessage query: the sent query object + :param krcp.BMessage response: the received response object + + Notes: + For this function to be called on find_node response reception, you need to call + :meth:`register_message` with the parameter ``b'find_node'`` """ pass def on_get_peers_response(self, query, response): - """function called on a get_peers response reception. Can safely the overloaded + """ + Function called on a get_peers response reception. Can safely the overloaded - Args: - query (BMessage): the sent query object - response (BMessage): the received response object + :param krcp.BMessage query: the sent query object + :param krcp.BMessage response: the received response object + + Notes: + For this function to be called on get_peers response reception, you need to call + :meth:`register_message` with the parameter ``b'get_peers'`` """ pass def on_announce_peer_response(self, query, response): - """function called on a announce_peer response reception. Can safely the overloaded + """ + Function called on a announce_peer response reception. Can safely the overloaded - Args: - query (BMessage): the sent query object - response (BMessage): the received response object + :param krcp.BMessage query: the sent query object + :param krcp.BMessage response: the received response object + + Notes: + For this function to be called on announce_peer response reception, you need to call + :meth:`register_message` with the parameter ``b'announce_peer'`` """ pass def on_ping_query(self, query): - """function called on a ping query reception. Can safely the overloaded + """ + Function called on a ping query reception. Can safely the overloaded - Args: - query (BMessage): the received query object + :param krcp.BMessage query: the received query object + + Notes: + For this function to be called on ping query reception, you need to call + :meth:`register_message` with the parameter ``b'ping'`` """ pass def on_find_node_query(self, query): - """function called on a find_node query reception. Can safely the overloaded + """ + Function called on a find_node query reception. Can safely the overloaded - Args: - query (BMessage): the received query object + :param krcp.BMessage query: the received query object + + Notes: + For this function to be called on find_node query reception, you need to call + :meth:`register_message` with the parameter ``b'find_node'`` """ pass def on_get_peers_query(self, query): - """function called on a get_peers query reception. Can safely the overloaded + """ + Function called on a get_peers query reception. Can safely the overloaded - Args: - query (BMessage): the received query object + :param krcp.BMessage query: the received query object + + Notes: + For this function to be called on get_peers query reception, you need to call + :meth:`register_message` with the parameter ``b'get_peers'`` """ pass def on_announce_peer_query(self, query): - """function called on a announce query reception. Can safely the overloaded + """ + Function called on a announce query reception. Can safely the overloaded - Args: - query (BMessage): the received query object + :param krcp.BMessage query: the received query object + + Notes: + For this function to be called on announce_peer query reception, you need to call + :meth:`register_message` with the parameter ``b'announce_peer'`` """ pass def _on_ping_response(self, query, response): + """ + Function called on a ping response reception, do not overload, use + :meth:`on_ping_response` instead. + + :param krcp.BMessage query: the sent query object + :param krcp.BMessage response: the received response object + """ pass def _on_find_node_response(self, query, response): + """ + Function called on a find_node response reception, do not overload, use + :meth:`find_node_response` instead. + + :param krcp.BMessage query: the sent query object + :param krcp.BMessage response: the received response object + """ nodes = Node.from_compact_infos(response.get(b"nodes", b"")) for node in nodes: try: @@ -1164,6 +1309,13 @@ cdef class DHT_BASE: raise self.debug(2, "%s nodes added to routing table" % len(nodes)) def _on_get_peers_response(self, query, response): + """ + Function called on a get_peers response reception, do not overload, use + :meth:`on_get_peers_response` instead. + + :param krcp.BMessage query: the sent query object + :param krcp.BMessage response: the received response object + """ token = response.get(b"token") if token: self.mytoken[response[b"id"]]=(token, time.time()) @@ -1174,26 +1326,75 @@ cdef class DHT_BASE: ip = socket.inet_ntoa(ip) self._add_peer_queried(query[b"info_hash"], ip=ip, port=port) def _on_announce_peer_response(self, query, response): + """ + Function called on a announce_peer response reception, do not overload, use + :meth:`on_announce_peer_response` instead. + + :param krcp.BMessage query: the sent query object + :param krcp.BMessage response: the received response object + """ pass def _on_ping_query(self, query): + """ + Function called on a ping query reception, do not overload, use + :meth:`on_ping_query` instead. + + :param krcp.BMessage query: the received query object + """ pass def _on_find_node_query(self, query): + """ + Function called on a find_node query reception, do not overload, use + :meth:`on_find_node_query` instead. + + :param krcp.BMessage query: the received query object + """ pass def _on_get_peers_query(self, query): + """ + Function called on a get_peers query reception, do not overload, use + :meth:`on_get_peers_query` instead. + + :param krcp.BMessage query: the received query object + """ pass def _on_announce_peer_query(self, query): + """ + Function called on a announce_peer query reception, do not overload, use + :meth:`on_announce_peer_query` instead. + + :param krcp.BMessage query: the received query object + """ try: if query.get(b"implied_port", 0) != 0: if query.addr[1] > 0 and query.addr[1] < 65536: - self._add_peer(info_hash=query[b"info_hash"], ip=query.addr[0], port=query.addr[1]) + self._add_peer( + info_hash=query[b"info_hash"], + ip=query.addr[0], + port=query.addr[1] + ) else: - self.debug(1, "Invalid port number on announce %s, sould be within 1 and 65535" % query.addr[1]) + self.debug( + 1, + "Invalid port number on announce %s, sould be within 1 and 65535" % ( + query.addr[1], + ) + ) else: if query[b"port"] > 0 and query[b"port"] < 65536: - self._add_peer(info_hash=query[b"info_hash"], ip=query.addr[0], port=query[b"port"]) + self._add_peer( + info_hash=query[b"info_hash"], + ip=query.addr[0], + port=query[b"port"] + ) else: - self.debug(1, "Invalid port number on announce %s, sould be within 1 and 65535" % query["port"]) + self.debug( + 1, + "Invalid port number on announce %s, sould be within 1 and 65535" % ( + query["port"], + ) + ) except KeyError as e: raise ProtocolError(query.t, b"Message malformed: %s key is missing" % e.args[0]) @@ -1212,7 +1413,6 @@ cdef class DHT_BASE: self._to_process.put_nowait((query, obj)) except Queue.Full: self.debug(0, "Unable to queue msg to be processed, QueueFull") - #getattr(self, 'on_%s_response' % query.q)(query, obj) def _process_query(self, obj): if obj.q in [b"find_node", b"ping", b"get_peers", b"announce_peer"]: @@ -1222,7 +1422,6 @@ cdef class DHT_BASE: self._to_process.put_nowait((obj, None)) except Queue.Full: self.debug(0, "Unable to queue msg to be processed, QueueFull") - #getattr(self, 'on_%s_query' % obj.q)(obj) def _process_loop(self): """function lauch by the thread processing messages""" @@ -1244,7 +1443,15 @@ cdef class DHT_BASE: yield def _decode(self, s, addr): - """decode a message""" + """ + decode a message + + :param bytes s: A newly received message + :param tuple addr: A couple (ip, port) with ip in dotted notation + :return A couple (decoded message, query) if the message is a response or an error, + (decoded message, None) otherwise + :rtype: tuple + """ msg = BMessage(addr=addr, debug=self.debuglvl) msg.decode(s, len(s)) try: @@ -1300,46 +1507,38 @@ class TransactionIdUnknown(Exception): pass cdef class Node: - """A node of the dht in the routing table + """ + A node of the dht in the routing table - Note: - A good node is a node has responded to one of our queries within the last - 15 minutes. A node is also good if it has ever responded to one of our - queries and has sent us a query within the last 15 minutes. After 15 minutes - of inactivity, a node becomes questionable. Nodes become bad when they fail - to respond to multiple queries in a row. - - Attributes: - id (str): 160bits (20 Bytes) identifier of the node - ip (str): ip address of the node in doted notation - port (int): port of the node - good (bool): True if the node is good - bad (bool): True if the node is bad - last_response (bool): last response date in secondes since epoch - last_query (bool): last query date in secondes since epoch - failed (int): number of reponse pending (increse on sending query to the - node, set to 0 on reception from the node) + :param bytes id: The 160 bits (20 Bytes) long identifier of the node + :param str ip: The ip, in dotted notation of the node + :param int port: The udp dht port of the node + :param int last_response: Unix timestamp of the last received response from this node + :param int last_query: Unix timestamp of the last received query from this node + :param int failed: Number of consecutive queries sended to the node without responses + Note: + A good node is a node has responded to one of our queries within the last + 15 minutes. A node is also good if it has ever responded to one of our + queries and has sent us a query within the last 15 minutes. After 15 minutes + of inactivity, a node becomes questionable. Nodes become bad when they fail + to respond to multiple queries in a row (3 query in a row in this implementation). """ + #: 160bits (20 Bytes) identifier of the node cdef char _id[20] + #: ip address of the node, encoded on 4 bytes cdef char _ip[4] + #: The udp port of the node cdef int _port + #: Unix timestamp of the last received response from this node cdef int _last_response + #: Unix timestamp of the last received query from this node cdef int _last_query + #: number of reponse pending (increase on sending query to the node, set to 0 on reception from + #: the node) cdef int _failed - def __init__(self, id,ip,int port, int last_response=0,int last_query=0,int failed=0): - """ - Args: - id (str): A 160bits (20 Bytes) identifier - ip (str): ip address of the node in doted notation - port (int): port of the node - last_response (int, optional): last response (secondes since epoch) - from the node to one of our query. default is 0 - last_query (int, optional): last query (secondes since epoch) from - the node. default is 0 - failed (int, optional): number of pending response from the node. default is 0 - """ + def __init__(self, bytes id, ip, int port, int last_response=0, int last_query=0, int failed=0): cdef char* cip cdef char* cid if ip[0] == u'0': @@ -1378,28 +1577,30 @@ cdef class Node: else: return False - - def __dealloc__(self): - with nogil: - #free(self._id) - #free(self._ip) - pass - + #: udp port of the node property port: def __get__(self):return self._port def __set__(self, int i):self._port = i + #: Unix timestamp of the last received response from this node property last_response: def __get__(self):return self._last_response def __set__(self, int i):self._last_response = i + #: Unix timestamp of the last received query from this node property last_query: def __get__(self):return self._last_query def __set__(self, int i):self._last_query = i + #: number of reponse pending (increase on sending query to the node, set to 0 on reception from + #: the node) property failed: def __get__(self):return self._failed def __set__(self, int i):self._failed = i + #: 160bits (20 Bytes) identifier of the node property id: def __get__(self): return self._id[:20] + #: ``True`` if the node is a good node. A good node is a node has responded to one of our + #: queries within the last 15 minutes. A node is also good if it has ever responded to one of + #: our queries and has sent us a query within the last 15 minutes. property good: def __get__(self): now = time.time() @@ -1407,11 +1608,14 @@ cdef class Node: # A node is also good if it has ever responded to one of our queries and has sent us a query within the last 15 minutes. return ((now - self.last_response) < 15 * 60) or (self.last_response > 0 and (now - self.last_query) < 15 * 60) + #: ``True`` if the node is a bad node (communication with the node is not possible). Nodes + #: become bad when they fail to respond to 3 queries in a row. property bad: def __get__(self): # Nodes become bad when they fail to respond to multiple queries in a row. return not self.good and self.failed > 3 + #: ip address of the node in dotted notation property ip: def __get__(self): ip = socket.inet_ntoa(self._ip[:4]) @@ -1431,27 +1635,37 @@ cdef class Node: return "Node: %s:%s" % (self.ip, self.port) def compact_info(self): + """ + Return the compact contact information of the node + + Notes: + Contact information for peers is encoded as a 6-byte string. + Also known as "Compact IP-address/port info" the 4-byte IP address + is in network byte order with the 2 byte port in network byte order + concatenated onto the end. + Contact information for nodes is encoded as a 26-byte string. + Also known as "Compact node info" the 20-byte Node ID in network byte + order has the compact IP-address/port info concatenated to the end. + """ return struct.pack("!20s4sH", self.id, self._ip, self.port) @classmethod - def from_compact_infos(cls, infos, v=""): - """Instancy nodes from multiple compact node info string - - Note: - Contact information for peers is encoded as a 6-byte string. - Also known as "Compact IP-address/port info" the 4-byte IP address - is in network byte order with the 2 byte port in network byte order - concatenated onto the end. - Contact information for nodes is encoded as a 26-byte string. - Also known as "Compact node info" the 20-byte Node ID in network byte - order has the compact IP-address/port info concatenated to the end. + def from_compact_infos(cls, infos): + """ + Instancy nodes from multiple compact node information string - Args: - infos (str): a string contening multiple compact node info - so its length should be a multiple of 26 + :param bytes infos: A string of size multiple of 26 + :return: A list of :class:`Node` instances + :rtype: list - Returns: - a list of Node instance + Notes: + Contact information for peers is encoded as a 6-byte string. + Also known as "Compact IP-address/port info" the 4-byte IP address + is in network byte order with the 2 byte port in network byte order + concatenated onto the end. + Contact information for nodes is encoded as a 26-byte string. + Also known as "Compact node info" the 20-byte Node ID in network byte + order has the compact IP-address/port info concatenated to the end. """ nodes = [] length = len(infos) @@ -1469,29 +1683,27 @@ cdef class Node: @classmethod def from_compact_info(cls, info): - """Instancy nodes from multiple compact node info string - - Note: - Contact information for peers is encoded as a 6-byte string. - Also known as "Compact IP-address/port info" the 4-byte IP address - is in network byte order with the 2 byte port in network byte order - concatenated onto the end. - Contact information for nodes is encoded as a 26-byte string. - Also known as "Compact node info" the 20-byte Node ID in network byte - order has the compact IP-address/port info concatenated to the end. + """ + Instancy a node from its compact node infoformation string - Args: - infos (str): a string contening one compact node info - so its length should be exactly 26 + :param bytes info: A string of length 26 + :return: A node instance + :rtype: Node - Returns: - a Node instance + Notes: + Contact information for peers is encoded as a 6-byte string. + Also known as "Compact IP-address/port info" the 4-byte IP address + is in network byte order with the 2 byte port in network byte order + concatenated onto the end. + Contact information for nodes is encoded as a 26-byte string. + Also known as "Compact node info" the 20-byte Node ID in network byte + order has the compact IP-address/port info concatenated to the end. """ if len(info) != 26: raise EnvironmentError("compact node info should be 26 chars long") (id, ip, port) = struct.unpack("!20s4sH", info) ip = socket.inet_ntoa(ip) - id = ID(id) + #id = ID(id) return cls(id, ip, port) @@ -1508,10 +1720,10 @@ cdef class Node: return hash(self.id) def ping(self, DHT_BASE dht): - """send a ping query to the node + """ + Send a ping query to the node - Args: - dht (DHT_BASE): a dht instance + :param DHT_BASE dht: The dht instance to use to send the message """ id = dht.myid.value msg = BMessage() @@ -1524,11 +1736,11 @@ cdef class Node: dht.sendto(msg.encode(), (self.ip, self.port)) def find_node(self, DHT_BASE dht, target): - """send a find_node query to the node + """ + Send a find_node query to the node - Args: - dht (DHT_BASE): a dht instance - target (str): the 160bits (20 bytes) target node id + :param DHT_BASE dht: The dht instance to use to send the message + :param bytes target: the 160bits (20 bytes) target node id """ id = dht.myid.value target = ID.to_bytes(target) @@ -1544,11 +1756,11 @@ cdef class Node: dht.sendto(msg.encode(), (self.ip, self.port)) def get_peers(self, DHT_BASE dht, info_hash): - """send a get_peers query to the node + """ + Send a get_peers query to the node - Args: - dht (DHT_BASE): a dht instance - info_hash (str): a 160bits (20 bytes) to get downloading peers + :param DHT_BASE dht: The dht instance to use to send the message + :param bytes info_hash: a 160bits (20 bytes) torrent id """ id = dht.myid.value info_hash = ID.to_bytes(info_hash) @@ -1564,12 +1776,12 @@ cdef class Node: dht.sendto(msg.encode(), (self.ip, self.port)) def announce_peer(self, DHT_BASE dht, info_hash, int port): - """send a announce_peer query to the node + """ + Send a announce_peer query to the node - Args: - dht (DHT_BASE): a dht instance - info_hash (str): a 160bits (20 bytes) hash to announce download - port (int): port where data for `info_hash` is avaible + :param DHT_BASE dht: The dht instance to use to send the message + :param bytes info_hash: A 160bits (20 bytes) torrent id to announce + :param int port: The tcp port where data for ``info_hash`` is available """ cdef char* tk @@ -1595,25 +1807,30 @@ cdef class Node: @total_ordering class Bucket(list): - """A bucket of nodes in the routing table + """ + A bucket of nodes in the routing table - Attributes: - to_refresh (bool): True if the bucket need to be refresh - max_size (int): maximun number of element in the bucket - last_changed (int): last time the bucket had been updated un secodes - since epoch + :param bytes id: A prefix identifier from 0 to 169 bits for the bucket + :param int id_length: number of signifiant bit in ``id`` (can also be seen as the length + between the root and the bucket in the routing table) + :param iterable init: some values to store initialy in the bucket """ + #: maximun number of element in the bucket max_size = 8 + #: Unix timestamp, ast time the bucket had been updated last_changed = 0 + #: A prefix identifier from 0 to 169 bits for the bucket + id = None + #: number of signifiant bit in :attr:`id` + id_length = 0 __slot__ = ("id", "id_length") def own(self, id): - """Args: - id (str): a 160bit (20 Bytes) identifier - - Returns: - True if `id` is handle by this bucket + """ + :param bytes id: A 60bit (20 Bytes) identifier + :return: ``True`` if ``id`` is handled by this bucket + :rtype: bool """ if not self.id: return True @@ -1631,22 +1848,18 @@ class Bucket(list): return False def __init__(self, id=b"", id_length=0, init=None): - """ - Args: - id (str): prefix identifier for the bucket - id_length (int): number of signifiant bit in `id` - (can also be seen as the length between the root - and the bucket in the routing table) - init (iterable, optional): some values to store - initialy in the bucket - """ self.id = id self.id_length = id_length # en bit if init: super(Bucket, self).__init__(init) def random_id(self): - """return a random id handle by the bucket""" + """ + :return: A random id handle by the bucket + :rtype: bytes + + This is used to send find_nodes for randoms ids in a bucket + """ id = ID() id_length = self.id_length id_end = bytes(bytearray((id[id_length//8],))) @@ -1668,18 +1881,33 @@ class Bucket(list): return ID(self.id[0:id_length//8] + char + id[id_length//8+1:]) def get_node(self, id): - """return the node with id `id` or raise NotFound""" + """ + :return: A :class:`Node` with :attr:`Node.id`` equal to ``id`` + :rtype: Node + :raises: :class:`NotFound` if no node is found within this bucket + """ for n in self: if n.id == id: return n raise NotFound() def add(self, dht, node): - """Try to add a node to the bucket + """ + Try to add a node to the bucket. - Args: - dht (DHT_BASE): a dht instance - node (Node): a node instance + :param DHT_BASE dht: The dht instance the node to add is from + :param Node node: A node to add to the bucket + :raises: :class:`BucketFull` if the bucket is full + + Notes: + The addition of a node to a bucket is done as follow: + * if the bucket is not full, just add the node + * if the bucket is full + * if there is some bad nodes in the bucket, remove a bad node and add the + node + * if there is some questionnable nodes (neither good not bad), send a ping + request to the oldest one, discard the node + * if all nodes are good in the bucket, discard the node """ if not self.own(node.id): raise ValueError("Wrong Bucket") @@ -1712,14 +1940,14 @@ class Bucket(list): raise BucketFull() def split(self, rt, dht): - """Split the bucket into two buckets - - Args: - rt (RoutingTable): a routing table instance - dht (DHT_BASE): a dht instance + """ + Split the bucket into two buckets - Returns: - a tuple of two buckets + :param RoutingTable rt: The routing table handling the bucket + :param DHT_BASE dht: A dht using ``rt`` as routing table + :return: A couple of two bucket, the first one this the last significant bit of its id + equal to 0, the second, equal to 1 + :rtype: tuple """ if len(self) < self.max_size: raise BucketNotFull("Bucket not Full %r" % self) @@ -1746,13 +1974,12 @@ class Bucket(list): return (b2, b1) def merge(self, bucket): - """Merge the bucket with `bucket` - - Args: - bucket (Bucket): bucket to be merged with + """ + Merge the bucket with ``bucket`` - Returns - A merged bucket + :param Bucket bucket: a bucket to be merged with + :return: The merged bucket + :rtype: Bucket """ l = [n for l in zip(self, bucket) for n in l if n.good][:self.max_size] return Bucket(id=self.id, id_length=self.id_length - 1, init=l) @@ -1781,32 +2008,65 @@ class Bucket(list): except AttributeError: raise ValueError("%s not comparable with %s" % (other.__class__.__name__, self.__class__.__name__)) -class DHT(DHT_BASE): - pass + +DHT = type("DHT", (DHT_BASE,), {'__doc__': DHT_BASE.__doc__}) + + class NotFound(Exception): pass class RoutingTable(object): """ - Attributs: - trie (datrie.Trie): the routing table storage data structure - threads (list of Thread): threads of the routing table - zombie (bool): True if dht is stopped but one thread or more remains - alive + A routing table for one or more :class:`DHT_BASE` instances + + :param utils.Scheduler scheduler: A scheduler instance + :param int debuglvl: Level of verbosity, default to ``0``. """ - #__slot__ = ("trie", "_heigth", "split_ids", "info_hash", "lock", "_dhts", "stoped") - def __init__(self, debuglvl=0): - """ - Args: - debuglvl (int, optional): level of verbosity. default is 0 - """ + #: :class:`int` the routing table instance verbosity level + debuglvl = 0 + #: the routing table storage data structure, an instance of :class:`datrie.Trie` + trie = None + #: the state (stoped ?) of the routing table + stoped = True + #: Is a merge sheduled ? + need_merge = False + #: :class:`list` of the :class:`Thread` of the routing table instance + threads = [] + #: A class:`list` of couple (weightless thread name, weightless thread function) + to_schedule = [] + #: prefix in logs and threads name + prefix = "" + #: current height of the tree :attr:`trie` structure of the routing table + _heigth = 1 + #: A set of registered dht instance with this routing table + _dhts = set() + #: A set of torrent id + _info_hash = set() + #: a set of dht id + _split_ids = set() + #: internal list of supposed alive threads + _threads = [] + #: a set of bucket id to merge (keys of :class:`datrie.Trie`) + _to_merge = set() + #: internal list of supposed zombie (asked to stop but still running) threads + _threads_zombie= [] + #: last debug message, use to prevent duplicate messages over 5 seconds + _last_debug = "" + #: time of the lat debug message, use to prevent duplicate messages over 5 seconds + _last_debug_time = 0 + #: a :class:`utils.Scheduler` instance + _scheduler = None + #: A :class:`threading.Lock` instance to prevent concurrent start to happend + _lock = None + + def __init__(self, scheduler, debuglvl=0, prefix=""): self.debuglvl = debuglvl self.trie = datrie.Trie(u"01") self.trie[u""]=Bucket() self._heigth=1 - self.split_ids = set() - self.info_hash = set() - self.lock = Lock() + self._split_ids = set() + self._info_hash = set() + self._lock = Lock() self._dhts = set() self.stoped = True self.need_merge = False @@ -1816,9 +2076,11 @@ class RoutingTable(object): self._threads_zombie= [] self._last_debug = "" self._last_debug_time = 0 + self._scheduler = scheduler + self.prefix = prefix self.to_schedule = [ - ("RT:merge_loop", self._merge_loop), - ("RT:routine", self._routine), + ("RT%s:merge_loop" % prefix, self._merge_loop), + ("RT%s:routine" % prefix, self._routine), ] def stop_bg(self): @@ -1829,8 +2091,10 @@ class RoutingTable(object): def stop(self): """stop the routing table and wait for all threads to terminate""" if self.stoped: - self.debug(0, "Already stoped or soping in progress") + self.debug(0, "Already stoped or stoping in progress") return + for s in self.to_schedule: + self._scheduler.del_thread(s[0]) self.stoped = True self._threads = [t for t in self._threads[:] if t.is_alive()] #self.debug(0, "Trying to terminate thread for 1 minutes") @@ -1854,7 +2118,7 @@ class RoutingTable(object): def start(self, **kwargs): """start the routing table""" - with self.lock: + with self._lock: if not self.stoped: self.debug(0, "Already started") return @@ -1863,36 +2127,27 @@ class RoutingTable(object): return self._threads_zombie self.stoped = False - # Le the routing table schedule the DHT iterators - to_schedule = [] - to_schedule.extend(self.to_schedule) - for dht in self._dhts: - if dht.stoped is True: - raise RuntimeError( - "Try to start the routing table before once of its DHT instances" - ) - to_schedule.extend(dht.to_schedule) - - self.threads = [] - t = Thread(target=utils.schedule, args=(to_schedule,)) - t.setName("RT:scheduler") - t.daemon = True - t.start() - self._threads.append(t) - self.threads.append(t) - t = Thread(target=self._dhts_send_loop) - t.setName("RT:dhts_send_loop") - t.daemon = True - t.start() - self._threads.append(t) - self.threads.append(t) + for (name, function) in self.to_schedule: + self._scheduler.add_thread(name, function) def is_alive(self): - """return True if all routing table threads are alive. Otherwire return False - and stop the routing table""" - if self.threads and reduce(lambda x,y: x and y, [t.is_alive() for t in self.threads]): + """ + Test if all routing table threads are alive. If a thread is found dead, stop the + routingtable + + :return: ``True`` if all routing table threads are alive, ``False`` otherwise + :rtype: bool + """ + weigthless_threads_satus = [ + self._scheduler.thread_alive(s[0]) for s in self.to_schedule + ] + if ( + self.threads is not None and + all([t.is_alive() for t in self.threads]) and + all(weigthless_threads_satus) + ): return True - elif not self._threads and self.stoped: + elif not self._threads and self.stoped and not any(weigthless_threads_satus): return False else: self.debug(0, "One thread died, stopping dht") @@ -1900,19 +2155,28 @@ class RoutingTable(object): return True def register_torrent(self, id): - """register a torrent `id` (info_hash) for spliting bucket containing this `id` + """ + Register a torrent ``id`` (info_hash) for being tracked by the routing table. + This means that if a node need to be added to the bucket handling ``id``and the + bucket is full, then, this bucket will be split into 2 buckets - Note: - torrent can automaticaly be release by a dht instance after a get_peers. - For keeping a torrent registered, use the method `register_torrent_longterm` + :param bytes id: A 160 bits (20 Bytes) torrent identifier + + Note: + torrent ids can automaticaly be release by a dht instance after a get_peers. + For keeping a torrent registered, use the method :meth:`register_torrent_longterm` """ - self.info_hash.add(id) + self._info_hash.add(id) def release_torrent(self, id): - """release a torrent `id` (info_hash) and program the routing table to be merged""" + """ + Release a torrent ``id`` (info_hash) and program the routing table to be merged + + :param bytes id: A 160 bits (20 Bytes) torrent identifier + """ try: - self.info_hash.remove(id) - if not id in self.split_ids: + self._info_hash.remove(id) + if not id in self._split_ids: try: key = self.trie.longest_prefix(utils.id_to_longid(ID.to_bytes(id))) #self._to_merge.add(key) @@ -1925,12 +2189,12 @@ class RoutingTable(object): pass def _merge_loop(self): + """Weigthless thread handling the merge of the routing table""" yield 0 next_merge = 0 # at most one full merge every 10 minutes next_full_merge = time.time() + 10 * 60 while True: - #self.sleep(max(next_merge - time.time(), 1)) if self.stoped: return yield max(next_merge, time.time() + 1) @@ -1953,14 +2217,23 @@ class RoutingTable(object): yield i def register_torrent_longterm(self, id): - """Same as register_torrent but garanty that the torrent wont - be released automaticaly by the dht """ - self.split_ids.add(id) + Same as :meth:`register_torrent` but garanty that the torrent wont be released + automaticaly by the dht. + + :param bytes id: A 160 bits (20 Bytes) torrent identifier + """ + self._split_ids.add(id) + def release_torrent_longterm(self, id): - """for releasing torrent registered with the `register_torrent_longterm` method""" + """ + For releasing torrent registered with the :meth`register_torrent_longterm` method + + :param bytes id: A 160 bits (20 Bytes) torrent identifier + + """ try: - self.split_ids.remove(id) + self._split_ids.remove(id) if not self.need_merge: self.debug(1, "Programming merge") self.need_merge = True @@ -1968,33 +2241,31 @@ class RoutingTable(object): pass def register_dht(self, dht): - """Register a `dht` instance to the routing table + """ + Register a ``dht`` instance to the routing table - Note: - on start, dht automaticaly register itself to its - routing table + :param DHT_BASE dht: A dht instance + + Notes: + on start, all dht instances automaticaly register themself to their routing tables """ - if dht.stoped is False: - RuntimeError( - "DHT instance must be registered on the routing table before the start " - "of the routing table" - ) self._dhts.add(dht) - self.split_ids.add(dht.myid.value) + self._split_ids.add(dht.myid.value) def release_dht(self, dht): - """release a `dht` instance to the routing table + """ + Release a ``dht`` instance to the routing table, and shedule the routing table for a + merge. - Note: - on stop, dht automatially release itself from the - routing table + Notes: + on stop, dht automatially release itself from the routing table """ try: self._dhts.remove(dht) except KeyError: pass try: - self.split_ids.remove(dht.myid) + self._split_ids.remove(dht.myid) if not self.need_merge: self.debug(1, "Programming merge") self.need_merge = True @@ -2003,27 +2274,21 @@ class RoutingTable(object): if not self._dhts: self.stop() - def sleep(self, t, fstop=None): - """same as sleep on DHT_BASE""" - if t > 0: - t_int = int(t) - t_dec = t - t_int - for i in range(0, t_int): - time.sleep(1) - if self.stoped: - if fstop: - fstop() - sys.exit(0) - time.sleep(t_dec) - def debug(self, lvl, msg): """same as debug on DHT_BASE""" - if lvl <= self.debuglvl and (msg != self._last_debug or (time.time() - self._last_debug_time) > 5): - print("RT:%s" % msg) + if ( + lvl <= self.debuglvl and + (msg != self._last_debug or (time.time() - self._last_debug_time) > 5) + ): + print("RT%s:%s" % (self.prefix, msg)) self._last_debug = msg self._last_debug_time = time.time() def _routine(self): + """ + Weigthless thread perfoming routine on the routing table like performing quering to + bucket with no activity and pinging questionnable (neither good nor bad) nodes. + """ yield 0 last_explore_tree = time.time() while True: @@ -2032,48 +2297,52 @@ class RoutingTable(object): if self.stoped: return yield (last_explore_tree + 60) - #self.sleep(60 - (time.time() - last_explore_tree)) - dhts = list(self._dhts) - shuffle(dhts) - now = time.time() - i = 0 - for key, bucket in self.trie.items(): - if self.stoped: - return - # if trie modifies while looping - if not key in self.trie: - continue - # If bucket inactif for more than 15min, find_node on a random id in it - if now - bucket.last_changed > 15 * 60: - id = bucket.random_id() - nodes = self.get_closest_nodes(id) - if nodes and dhts: - nodes[0].find_node(dhts[0], id) - i += 1 - del nodes - # If questionnable nodes, ping one of them - questionable = [node for node in bucket if not node.good and not node.bad] - - for dht in dhts: - if not questionable: - break - questionable.pop().ping(dht) - i+=1 - del questionable - - # give back the main in case of very big routing table to the scheduler - if i > 1000: - yield 0 + if self._dhts: + dhts = list(self._dhts) + dhts_last_elt = len(dhts) - 1 + shuffle(dhts) + now = time.time() + i = 0 + for key, bucket in self.trie.items(): + if self.stoped: + return + # if trie modifies while looping + if not key in self.trie: + continue + # If bucket inactif for more than 15min, find_node on a random id in it + if now - bucket.last_changed > 15 * 60: + id = bucket.random_id() + nodes = self.get_closest_nodes(id) + if nodes: + nodes[0].find_node(dhts[randint(0, dhts_last_elt)], id) + i += 1 + del nodes + # If questionnable nodes, ping one of them + questionable = [node for node in bucket if not node.good and not node.bad] + + for dht in dhts: + if not questionable: + break + questionable.pop().ping(dht) + i+=1 + del questionable + + # give back the main in case of very big routing table to the scheduler + if i > 1000: + yield 0 last_explore_tree = time.time() def empty(self): - """Remove all subtree""" + """Empty the routing table, deleting all buckets""" self.trie = datrie.Trie("".join(chr(i) for i in range(256))) self.trie[u""]=Bucket() def stats(self): - """return the number of nodes, good nodes, bad nodes""" + """ + :return: A triple (number of nodes, number of good nodes, number of bad nodes) + :rtype: tuple + """ nodes = 0 goods = 0 bads = 0 @@ -2096,12 +2365,29 @@ class RoutingTable(object): return iter(self.trie.values()) def get_node(self, id): - """return the node with id `id` or raise `NotFound`""" + """ + :param bytes id: A 160 bits (20 Bytes) identifier + :return: A node with id ``id`` + :rtype: Node + :raises: :class:`NotFound` if no nodes is found + """ b = self.find(id) return b.get_node(id) def find(self, id, errno=0): - """retourn the bucket containing `id`""" + """ + :param bytes id: A 160 bits (20 Bytes) identifier + :return: The bucket handling ``id`` + :rtype: Bucket + :raises KeyError: then a racing condition with merging and/or spliting a bucket is met. + This should not happen + + Notes: + Duging a split or merge of bucket it is possible that the bucket handling ``id`` + is not found. :meth:`find` will retry at most 20 times to get the bucket. + In most case, during those retries, the split and/or merge will end and the bucket + handling ``id`` will be returned. + """ try: return self.trie.longest_prefix_value(utils.id_to_longid(ID.to_bytes(id))) except KeyError as e: @@ -2116,7 +2402,18 @@ class RoutingTable(object): raise def get_closest_nodes(self, id, bad=False, errno=0): - """return the K closest nodes from `id` in the routing table""" + """ + Return the K closest nodes from ``id`` in the routing table + + :param bytes id: A 160 bits (20 Bytes) identifier + :param bool bad: Should we return bad nodes ? The default is ``False`` + + Notes: + If less than K (=8) good nodes is found, bad nodes will be included it solve + the case there the connection where temporary lost and all nodes in the routing + table marked as bad. + In normal operation, we should always find K (=8) good nodes in the routing table. + """ try: id = ID(id) nodes = set(n for n in self.find(id) if not n.bad) @@ -2142,11 +2439,11 @@ class RoutingTable(object): return self.get_closest_nodes(id, bad=bad, errno=errno+1) def add(self, dht, node): - """Add a node the the routing table + """ + Add a node the the routing table - Args: - dht (DHT_BASE): a dht instance - node (Node): a node instance to be added + :param DHT_BASE dht: The dht instance ``node``is from + :param Node node: The node to add to the routing table """ if node.ip in dht.ignored_ip: return @@ -2158,7 +2455,7 @@ class RoutingTable(object): except BucketFull: # If bucket is full, try to split if b.id_length < 160: - for id in self.split_ids | self.info_hash: + for id in self._split_ids | self._info_hash: if b.own(id): self.split(dht, b) self.add(dht, node) @@ -2167,18 +2464,21 @@ class RoutingTable(object): print("%r" % b) def heigth(self): - """height of the tree of the routing table""" + """ + :return: the height of the tree of the routing table + :rtype: int + """ return self._heigth def split(self, dht, bucket): - """request for a bucket identified by `id` to be split + """ + Split ``bucket`` in two - Notes: - the routing table cover the entire 160bits space + :param DHT_BASE dht: A dht instance + :param Bucket bucket: A bucket from the routing table to split - Args: - dht (DHT_BASE): a dht instance - bucket (Bucket): a bucket in the routing table to split + Notes: + the routing table cover the entire 160bits space """ try: prefix = utils.id_to_longid(bucket.id)[:bucket.id_length] @@ -2198,6 +2498,13 @@ class RoutingTable(object): self.need_merge = True def _merge(self, stack=None): + """ + Perform a merge of the routing table. If ``stack`` is provided, only a partial merge + on buckets identified by ``stack`` is perform. Otherwise, a full merge of the + routing table is done. + + :param list stack: An optional list of keys of :attr:`trie` to merge. + """ if stack is None: stack = self.trie.keys() full_merge = True @@ -2218,7 +2525,7 @@ class RoutingTable(object): if not key: continue to_merge = True - for id in self.split_ids | self.info_hash: + for id in self._split_ids | self._info_hash: if utils.id_to_longid(id).startswith(key[:-1]): to_merge = False break @@ -2255,36 +2562,3 @@ class RoutingTable(object): self._heigth = max(len(k) for k in self.trie.keys()) + 1 self.debug(1, "%s nodes merged in %ss" % (nodes_before - self.stats()[0], int(time.time() - started))) - def _dhts_send_loop(self): - sockets = {} - to_send_sockets = {} - for dht in self._dhts: - sockets[dht.sock] = dht - to_send_sockets[dht.to_send.sock] = dht - read_sockets = [s for s in sockets] + [s for s in to_send_sockets] - def write_sockets(): - return [s for (s, dht) in six.iteritems(sockets) if not dht.to_send.empty()] - while True: - if self.stoped: - return - try: - (sockets_read, sockets_write, _) = select.select( - read_sockets, write_sockets(), [], 0.1 - ) - except socket.error as e: - self.debug(0, "recv:%r" %e ) - raise - sockets_write = set(sockets_write) - for sock in sockets_read: - if sock in sockets: - dht = sockets[sock] - if dht.stoped: - return - dht._process_incoming_message() - else: - dht = to_send_sockets[sock] - if dht.stoped: - return - if dht.sock in sockets_write: - dht._process_outgoing_message() - diff --git a/btdht/utils.pyx b/btdht/utils.pyx index b8bbf2a..cd5621f 100644 --- a/btdht/utils.pyx +++ b/btdht/utils.pyx @@ -24,6 +24,7 @@ try: except ImportError: import queue as Queue from functools import total_ordering +from threading import Thread, Lock from libc.stdlib cimport atoi, malloc, free from libc.string cimport strlen, strncmp, strcmp, strncpy, strcpy @@ -32,6 +33,7 @@ from .krcp cimport _decode_string, _decode_int as _decode_long cdef extern from "ctype.h": int isdigit(int c) +#: an array mapping and int ([0-256]) to the corresponging byte (like the function :func:`chr`) cdef char BYTE_TO_BIT[256][8] # fill BYTE_TO_BIT array def __init(): @@ -42,6 +44,14 @@ __init() del __init cdef char _longid_to_char(char* id) nogil: + """ + Transform a 8 long string of 0 and 1 like "10110110" in base 2 to the corresponding char + in base 256. + + :param str id: A 8 Bytes long string with only 0 and 1 as characters + :return: A single char where the nth bit correspond to the nth bytes of ``id`` + :rtype: str + """ cdef unsigned char i = 0 if id[0] == 1: i = i | (1 << 7) @@ -62,6 +72,14 @@ cdef char _longid_to_char(char* id) nogil: return i cdef char* _longid_to_id(char* longid, int size=160) nogil except NULL: + """ + Transform a base 2, 160 Bytes long id like "101...001" to its 20 Bytes base 256 form + + :param str longid: A string, of length multiple of 8 contening only 0 and 1 chars + :param int size: The length of ``longid``, the default is 160. + :return: A ``size``/8 corresponding base 256 string + :rtype: str + """ cdef int i cdef char* id if size//8*8 != size: @@ -75,6 +93,15 @@ cdef char* _longid_to_id(char* longid, int size=160) nogil except NULL: return id cdef char* _id_to_longid(char* id, int size=20) nogil: + """ + Convert a random string ``id`` of length ``size`` to its base 2 equivalent. + For example, "\0\xFF" is converted to "0000000011111111" + + :param bytes id: A random string + :param int size: The length of ``id`` + :return: The corresponding base 2 string + :rtype: bytes + """ global BYTE_TO_BIT cdef char* ret = malloc((size * 8) * sizeof(char)) cdef int i = 0 @@ -85,8 +112,13 @@ cdef char* _id_to_longid(char* id, int size=20) nogil: def id_to_longid(char* id, int l=20): """ - convert a random char* to a unicode string of 1 and 0 - example : "\0" -> "00000000" + convert a random bytes to a unicode string of 1 and 0 + example : "\0" -> "00000000" + + :param bytes id: A random string + :param int size: The length of ``id`` + :return: The corresponding base 2 unicode string + :rtype: unicode """ #cdef int l = len(id) with nogil: @@ -449,6 +481,10 @@ class PollableQueue(Queue.Queue): self._putsocket.setblocking(0) self.sock = self._getsocket + def __del__(self): + self._putsocket.close() + self._getsocket.close() + def _put(self, *args, **kwargs): Queue.Queue._put(self, *args, **kwargs) self._signal_put() @@ -488,53 +524,387 @@ class SplitQueue(PollableQueue): return (key, ) + value -def schedule(to_schedule): +class Scheduler(object): """ - Schedule the call of predefined iterator functions. - - :param list to_schedule: A list of callable returning an iterator - - Notes: - Iterators must behave as describe next. The first returned value must be an integer - describing the type of the iterator. 0 mean time bases and all subsequent yield must - return the next timestamp at which the iterator want to be called. 1 mean queue based. - The next call to the iterator must return an instance of :class:`PollableQueue`. All - subsequent yield value are then ignored. The queue based iterator will be called when - something is put on its queue. + Schedule weightless threads and DHTs io + + A weightless threads is a python callable returning an iterator that behave as describe + next. The first returned value must be an integer describing the type of the iterator. + 0 means time based and all subsequent yield must return the next timestamp at which the + iterator want to be called. 1 means queue based. The next call to the iterator must return + an instance of :class:`PollableQueue`. All subsequent yield value are then ignored. + The queue based iterator will be called when something is put on its queue. """ - time_based = {} - queue_based = {} - timers = {} - names = {} - for i, (name, function) in enumerate(to_schedule): + + #: map between an iterator and a unix timestamp representing the next time the iterator want to + #: to be executed + _time_based = {} + #: map between an iterator and a queue processed by this iterator, processed by the main thread + _queue_based = {} + #: map between an iterator and a queue processed by this iterator, processed by the secondary + #: thread + _user_queue = {} + #: A map between an iterator and its name + _names = {} + #: A map between its name and an iterator + _iterators = {} + + #: A map between a :class:`PollableQueue` socket :attr:`PollableQueue.sock` and an iterator + _queue_base_socket_map = {} + #: A list of :attr:`PollableQueue.sock` to be processed on the main thread + _queue_base_sockets = [] + #: A list of :attr:`PollableQueue.sock` to be processed on the secondary thread + _user_queue_sockets = [] + + #: A map between a :class:`dht.DHT_BASE.sock` and a :class:`dht.DHT_BASE` instance + _dht_sockets = {} + #: A map between the :attr:`PollableQueue.sock` socket of the :class:`dht.DHT_BASE.to_send` + #: queue and a :class:`dht.DHT_BASE` instance + _dht_to_send_sockets = {} + #: A list of all keys of :attr`_dht_to_send_sockets` and :attr:`_dht_sockets` + _dht_read_sockets = [] + + def _dht_write_sockets(self): + """ + Compute dynamically the list of socket we need to write to. + All :class:`dht.DHT_BASE.sock` where :class:`dht.DHT_BASE.to_send` is not empty + + :return: A list of socket we want write to + :rtype: list + """ + try: + return [s for (s, dht) in six.iteritems(self._dht_sockets) if not dht.to_send.empty()] + except RuntimeError: + return [] + + _start_lock = None + _threads = None + _stoped = True + + def __init__(self): + self._start_lock = Lock() + self._init_attrs() + self._threads = [] + + def _init_attrs(self): + """Ititialize the instance attributes""" + self._time_based = {} + self._queue_based = {} + self._user_queue = {} + self._names = {} + self._queue_base_socket_map = {} + self._queue_base_sockets = [] + self._user_queue_sockets = [] + self._iterators = {} + + self._dht_sockets = {} + self._dht_to_send_sockets = {} + self._dht_read_sockets = [] + + + def add_thread(self, name, function, user=False): + """ + Schedule the call of weightless threads + + :param str name: The name of the thread to add. Must be unique in the :class:`Scheduler` + instance + :param function: A weightless threads, i.e a callable returning an iterator + :param bool user: If ``True`` the weightless threads is schedule in a secondary thread. + The default is ``False`` and the weightless threads is processed in the main + scheduler thread. This is usefull to put controled weightless threads and the main + thread, and all the other (like the user defined on_``msg``_(query|response)) + function to the secondary one. + + """ + if name in self._iterators: + raise ValueError("name already used") iterator = function() - names[iterator] = name + self._names[iterator] = name + self._iterators[name] = iterator typ = iterator.next() if typ == 0: - time_based[i] = iterator - timers[i] = 0 + if user == True: + raise ValueError("Only queue based threads can be put in the user loop") + self._time_based[iterator] = 0 elif typ == 1: queue = iterator.next() - queue_based[queue] = iterator + if user == True: + self._user_queue[iterator] = queue + self._user_queue_sockets.append(queue.sock) + else: + self._queue_based[iterator] = queue + self._queue_base_sockets.append(queue.sock) + self._queue_base_socket_map[queue.sock] = iterator else: raise RuntimeError("Unknown iterator type %s" % typ) - next_time = 0 - queue_base_socket_map = dict((q.sock, i) for (q, i) in six.iteritems(queue_based)) - queue_base_sockets = [q.sock for q in queue_based.keys()] - try: + + def del_thread(self, name, stop_if_empty=True): + """ + Remove the weightless threads named ``name`` + + :param str name: The name of a thread + :param bool stop_if_empty: If ``True`` (the default) and the scheduler has nothing to + schedules, the scheduler will be stopped. + """ + if name in self._iterators: + iterator = self._iterators[name] + try: + del self._iterators[name] + except KeyError: + pass + try: + del self._names[iterator] + except KeyError: + pass + try: + del self._time_based[iterator] + except KeyError: + pass + try: + queue = self._queue_based[iterator] + try: + del self._queue_base_socket_map[queue.sock] + except KeyError: + pass + try: + del self._queue_based[iterator] + self._queue_base_sockets.remove(queue.sock) + except KeyError: + pass + try: + del self._user_queue[iterator] + self._user_queue_sockets.remove(queue.sock) + except KeyError: + pass + except KeyError: + pass + if stop_if_empty and not self._dht_sockets and not self._iterators: + self.stop_bg() + + def add_dht(self, dht): + """ + Add a dht instance to be schedule by the scheduler + + :param dht.DHT_BASE dht: A dht instance + """ + self._dht_sockets[dht.sock] = dht + self._dht_to_send_sockets[dht.to_send.sock] = dht + self._dht_read_sockets.append(dht.sock) + self._dht_read_sockets.append(dht.to_send.sock) + for (name, function, user) in dht.to_schedule: + self.add_thread(name, function, user=user) + + def del_dht(self, dht): + """ + Remove a dht instance from the scheduler + + :param dht.DHT_BASE dht: A dht instance + """ + try: + del self._dht_sockets[dht.sock] + except KeyError: + pass + try: + del self._dht_to_send_sockets[dht.to_send.sock] + except KeyError: + pass + try: + self._dht_read_sockets.remove(dht.sock) + except ValueError: + pass + try: + self._dht_read_sockets.remove(dht.to_send.sock) + except ValueError: + pass + for (name, _, _) in dht.to_schedule: + self.del_thread(name) + + def thread_alive(self, name): + """ + Test is a weightless threads named ``name`` is currently schedule + + :param str name: The name of a thread + :return: ``True`` if a thread of name ``name`` if found + :rtype: bool + """ + return self.is_alive() and name in self._iterators + + def is_alive(self): + """Test if the scheduler main thread is alive + + :return: ``True`` the scheduler main thread is alive, ``False`` otherwise + :rtype: bool + """ + if self._threads and all([t.is_alive() for t in self._threads]): + return True + elif not self._threads and self._stoped: + return False + else: + print("One thread died, stopping scheduler") + self.stop(wait=False) + return False + + def start(self, name_prefix="scheduler"): + """ + start the scheduler + + :param str name_prefix: Prefix to the scheduler threads names + """ + with self._start_lock: + if not self._stoped: + print("Already started") + return + if self.zombie: + print("Zombie thread, unable de start") + return self._threads + self._stoped = False + t = Thread(target=self._schedule_loop) + t.setName("%s:schedule_loop" % name_prefix) + t.daemon = True + t.start() + self._threads.append(t) + t = Thread(target=self._schedule_user_loop) + t.setName("%s:schedule_user_loop" % name_prefix) + t.daemon = True + t.start() + self._threads.append(t) + t = Thread(target=self._io_loop) + t.setName("%s:io_loop" % name_prefix) + t.daemon = True + t.start() + self._threads.append(t) + + def stop(self, wait=True): + """stop the scheduler""" + if self._stoped: + print("Already stoped or stoping in progress") + return + self._stoped = True + self._init_attrs() + if wait: + self._threads = [t for t in self._threads[:] if t.is_alive()] + for i in range(0, 30): + if self._threads: + if i > 5: + print("Waiting for %s threads to terminate" % len(self._threads)) + time.sleep(1) + self._threads = [t for t in self._threads[:] if t.is_alive()] + else: + break + else: + print("Unable to stop the scheduler threads, giving up") + + def stop_bg(self): + """Lauch the stop process of the dht and return immediately""" + if not self._stoped: + t=Thread(target=self.stop) + t.daemon = True + t.start() + + @property + def zombie(self): + """ + :return: ``True`` if the scheduler is stoped but its threads are still running + :rtype: bool + """ + return bool(self._stoped and [t for t in self._threads if t.is_alive()]) + + def _schedule_loop(self): + """The schedule loop calling weightless threads iterators then needed""" + next_time = 0 + try: + while True: + + if self._stoped: + return + + wait = max(0, next_time - time.time()) if self._time_based else 1 + + (sockets, _, _) = select.select(self._queue_base_sockets, [], [], wait) + + # processing time based threads + if self._time_based: + now = time.time() + if now >= next_time: + to_set = [] + try: + for iterator, t in six.iteritems(self._time_based): + if now >= t: + to_set.append((iterator, iterator.next())) + for iterator, t in to_set: + self._time_based[iterator] = t + except RuntimeError: + pass + next_time = min(self._time_based.values()) + + # processing queue based threads + for sock in sockets: + try: + iterator = self._queue_base_socket_map[sock] + iterator.next() + except KeyError: + pass + except StopIteration as error: + try: + print("Iterator %s stoped" % self._names[iterator]) + self.del_thread(self._names[iterator]) + except KeyError: + pass + + def _schedule_user_loop(self): + """ + A second schedule loop calling weightless threads iterators then needed + + These second loop is here to handle user defined function (on_``msg``_query and + on_``msg``_response) than we do not known how long they can take, so they won't block + the main loop :meth:`_schedule_loop`. + """ + next_time = 0 + try: + while True: + + if self._stoped: + return + (sockets, _, _) = select.select(self._user_queue_sockets, [], [], 1) + # processing queue based threads + for sock in sockets: + try: + iterator = self._queue_base_socket_map[sock] + iterator.next() + except KeyError: + pass + except StopIteration as error: + try: + print("Iterator %s stoped" % self._names[iterator]) + self.del_thread(self._names[iterator]) + except KeyError: + pass + + def _io_loop(self): while True: - now = time.time() - wait = max(0, next_time - now) - (sockets, _, _) = select.select(queue_base_sockets, [], [], wait) - now = time.time() - if now >= next_time: - for i, iterator in six.iteritems(time_based): - if now >= timers[i]: - timers[i] = iterator.next() - next_time = min(timers.values()) - for sock in sockets: - iterator = queue_base_socket_map[sock] - iterator.next() - except StopIteration as error: - print("Iterator %s stopped" % names[iterator]) - raise + if self._stoped: + return + try: + (sockets_read, sockets_write, _) = select.select( + self._dht_read_sockets, self._dht_write_sockets(), [], 0.1 + ) + except socket.error as e: + self.debug(0, "recv:%r" %e ) + raise + sockets_write = set(sockets_write) + for sock in sockets_read: + try: + if sock in self._dht_sockets: + dht = self._dht_sockets[sock] + if dht.stoped: + self.del_dht(dht) + else: + dht._process_incoming_message() + else: + dht = self._dht_to_send_sockets[sock] + if dht.stoped: + self.del_dht(dht) + elif dht.sock in sockets_write: + dht._process_outgoing_message() + except KeyError: + pass From 14a31a9e3cc9a5128a2cdbd05d4af5e3d5a555d5 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Tue, 29 Nov 2016 18:53:10 +0100 Subject: [PATCH 12/30] Fix bad array declaration (size 0 instead of size 1) --- btdht/krcp.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/btdht/krcp.pyx b/btdht/krcp.pyx index fbc6128..95ea90e 100644 --- a/btdht/krcp.pyx +++ b/btdht/krcp.pyx @@ -74,8 +74,8 @@ cdef char** vlist_to_array(l, int size=6): cdef int _decode_pass_list(char* data, int *i, int max) nogil except -1: - cdef int j[0] - cdef long long ll[0] + cdef int j[1] + cdef long long ll[1] if i[0] >= max + 1: with gil: raise DecodeError("%s > %s : %r" % (i[0], max, data[:max])) @@ -95,8 +95,8 @@ cdef int _decode_pass_list(char* data, int *i, int max) nogil except -1: return True cdef int _decode_pass_dict(char* data, int *i, int max) nogil except -1: - cdef int j[0] - cdef long long ll[0] + cdef int j[1] + cdef long long ll[1] if i[0] >= max + 1: with gil: raise DecodeError("%s > %s : %r" % (i[0], max, data[:max])) From f3e8da151442ae647e7704b2820d077a1f1c3969 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Tue, 29 Nov 2016 19:13:31 +0100 Subject: [PATCH 13/30] atoll is not available on windows (7), use atol instead --- btdht/krcp.pyx | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/btdht/krcp.pyx b/btdht/krcp.pyx index 95ea90e..4e50dc1 100644 --- a/btdht/krcp.pyx +++ b/btdht/krcp.pyx @@ -9,11 +9,18 @@ # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # (c) 2015 Valentin Samir +import os from libc cimport math from libc.stdio cimport printf, sprintf from libc.string cimport strlen, strncmp, strcmp, strncpy, strcpy from libc.stdlib cimport atoi, atoll, malloc, free + +if os.name == 'posix': + from libc.stdlib cimport atoll +else: + from libc.stdlib cimport atol as atoll + from cython.parallel import prange import six @@ -33,12 +40,14 @@ cdef int str_to_int(char* data, int len) nogil: free(msg) return i -cdef int str_to_long_long(char* data, int len) nogil: +cdef long str_to_long_long(char* data, int len) nogil: cdef char* msg = NULL cdef long long i if data[0] == b'-' and len > 16 or len > 17: with gil: - raise EnvironmentError("Trying to convert %s to long long but it's too big" % data[:len]) + raise EnvironmentError( + "Trying to convert %s to long long but it's too big" % data[:len] + ) try: msg = malloc((len+1) * sizeof(char)) strncpy(msg, data, len) From e2317213c406ede27f65d590bccc6ac768c02e8a Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Wed, 30 Nov 2016 13:07:40 +0100 Subject: [PATCH 14/30] Windows do not handle empty selects --- btdht/utils.pyx | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/btdht/utils.pyx b/btdht/utils.pyx index cd5621f..317db95 100644 --- a/btdht/utils.pyx +++ b/btdht/utils.pyx @@ -500,7 +500,9 @@ class PollableQueue(Queue.Queue): try: self._getsocket.recv(1) except socket.error as error: - if error.errno != 11: # Resource temporarily unavailable + # 11: Resource temporarily unavailable raised on unix system then nothing to read + # 10035: raised on windows systems hen nothing to read + if error.errno not in [11, 10035]: raise def _get(self, *args, **kwargs): @@ -820,7 +822,12 @@ class Scheduler(object): wait = max(0, next_time - time.time()) if self._time_based else 1 - (sockets, _, _) = select.select(self._queue_base_sockets, [], [], wait) + # windows systems do not handle empty select + if self._queue_base_sockets: + (sockets, _, _) = select.select(self._queue_base_sockets, [], [], wait) + else: + sockets = [] + time.sleep(wait) # processing time based threads if self._time_based: @@ -865,7 +872,12 @@ class Scheduler(object): if self._stoped: return - (sockets, _, _) = select.select(self._user_queue_sockets, [], [], 1) + # windows systems do not handle empty select + if self._user_queue_sockets: + (sockets, _, _) = select.select(self._user_queue_sockets, [], [], 1) + else: + sockets = [] + time.sleep(1) # processing queue based threads for sock in sockets: try: @@ -885,9 +897,15 @@ class Scheduler(object): if self._stoped: return try: - (sockets_read, sockets_write, _) = select.select( - self._dht_read_sockets, self._dht_write_sockets(), [], 0.1 - ) + # windows systems do not handle empty select + if self._dht_read_sockets: + (sockets_read, sockets_write, _) = select.select( + self._dht_read_sockets, self._dht_write_sockets(), [], 0.1 + ) + else: + sockets_read = [] + sockets_write = [] + time.sleep(0.1) except socket.error as e: self.debug(0, "recv:%r" %e ) raise From be5cfdd91439f07b490cafe311433bdf5faa870c Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Wed, 30 Nov 2016 13:12:25 +0100 Subject: [PATCH 15/30] IN is not available on windows --- btdht/dht.pyx | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/btdht/dht.pyx b/btdht/dht.pyx index dc16571..7c08ec7 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -14,7 +14,6 @@ from libc.string cimport strlen, strncmp, strcmp, strncpy, strcpy from libc.stdlib cimport atoi, malloc, free import os -import IN import sys import time import six @@ -34,6 +33,11 @@ from functools import total_ordering, reduce from threading import Thread, Lock from random import shuffle, randint +try: + import IN +except ImportError: + IN = None + import datrie import utils @@ -443,7 +447,11 @@ cdef class DHT_BASE: self.to_send = PollableQueue() self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) #self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - self.sock.setsockopt(socket.IPPROTO_IP, IN.IP_MTU_DISCOVER, IN.IP_PMTUDISC_DO) + if IN is not None: + try: + self.sock.setsockopt(socket.IPPROTO_IP, IN.IP_MTU_DISCOVER, IN.IP_PMTUDISC_DO) + except AttributeError: + pass self.sock.setblocking(0) if self.bind_port: try: From 57e582d93e4cfe31c8f44d6427464a0bb96fec1c Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Wed, 30 Nov 2016 22:27:13 +0100 Subject: [PATCH 16/30] Check windows socket error codes --- btdht/dht.pyx | 13 ++++++++++--- btdht/utils.pyx | 6 +++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/btdht/dht.pyx b/btdht/dht.pyx index 7c08ec7..3712485 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -856,11 +856,14 @@ cdef class DHT_BASE: except socket.error as e: # 90: Message too long # 13: Permission denied - if e.errno in [90, 13]: + #: 10013: same as 13 but on windows + #: 10040: same as 90 but on windows + if e.errno in [90, 13, 10013, 10040]: self.debug(0, "send:%r %r %r" % (e, addr, msg)) # 11: Resource temporarily unavailable, try again # 1: Operation not permitted - elif e.errno in [11, 1]: + # 10035: same as 11 but on windows + elif e.errno in [11, 1, 10035]: pass else: self.debug(0, "send:%r %r" % (e, addr) ) @@ -929,7 +932,11 @@ cdef class DHT_BASE: self.sendto(error.encode(), addr) # socket unavailable ? except socket.error as e: - if e.errno not in [11, 1]: # 11: Resource temporarily unavailable + # 11: Resource temporarily unavailable + # 10035: same as 11 but on windows + # 1: premission denied + # 10013: same as 1 but on windows + if e.errno not in [11, 1, 10035, 10013]: self.debug(0, "send:%r : (%r, %r)" % (e, data, addr)) raise except MissingT: diff --git a/btdht/utils.pyx b/btdht/utils.pyx index 317db95..8bbbbe5 100644 --- a/btdht/utils.pyx +++ b/btdht/utils.pyx @@ -493,7 +493,7 @@ class PollableQueue(Queue.Queue): try: self._putsocket.send(b'x') except socket.error as error: - if error.errno != 11: # Resource temporarily unavailable + if error.errno not in [11, 10035]: # Resource temporarily unavailable raise def _comsume_get(self): @@ -501,8 +501,8 @@ class PollableQueue(Queue.Queue): self._getsocket.recv(1) except socket.error as error: # 11: Resource temporarily unavailable raised on unix system then nothing to read - # 10035: raised on windows systems hen nothing to read - if error.errno not in [11, 10035]: + # 10035: raised on windows systems then nothing to read + if error.errno not in [11, 10035]: raise def _get(self, *args, **kwargs): From c2d15c57e080b725535dbb2a5e1bcd2eec10cff6 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Wed, 30 Nov 2016 22:29:20 +0100 Subject: [PATCH 17/30] Add a script to compile wheel for windows --- make_wheel.bat | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100755 make_wheel.bat diff --git a/make_wheel.bat b/make_wheel.bat new file mode 100755 index 0000000..03185b8 --- /dev/null +++ b/make_wheel.bat @@ -0,0 +1,8 @@ +C:/Python27-32/Scripts/pip install wheel +C:/Python27-32/python setup.py bdist_wheel +C:/Python27/Scripts/pip install wheel +C:/Python27/python setup.py bdist_wheel +C:/Python35-32/python -m pip install wheel +C:/Python35-32/python setup.py bdist_wheel +C:/Python35/python -m pip install wheel +C:/Python35/python setup.py bdist_wheel From 7e2db74e6b61d19303cd313ed03526d08224d728 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Thu, 1 Dec 2016 16:10:19 +0100 Subject: [PATCH 18/30] Regroup non protocol exceptions to a file --- btdht/dht.pyx | 22 +++------------------- btdht/exceptions.py | 27 +++++++++++++++++++++++++++ btdht/krcp.pyx | 7 ++----- 3 files changed, 32 insertions(+), 24 deletions(-) create mode 100644 btdht/exceptions.py diff --git a/btdht/dht.pyx b/btdht/dht.pyx index 3712485..0fb7f4a 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -41,7 +41,9 @@ except ImportError: import datrie import utils -from utils import ID, nbit, nflip, nset, SplitQueue, PollableQueue +from .utils import ID, nbit, nflip, nset, PollableQueue +from .exceptions import BucketFull, BucketNotFull, NoTokenError, FailToStop, TransactionIdUnknown +from .exceptions import NotFound from .krcp cimport BMessage from .krcp import BError, ProtocolError, GenericError, ServerError, MethodUnknownError, MissingT @@ -1506,21 +1508,6 @@ cdef class DHT_BASE: raise ProtocolError(msg.t, b"Message malformed") -class BucketFull(Exception): - pass - -class BucketNotFull(Exception): - pass - -class NoTokenError(Exception): - pass - -class FailToStop(Exception): - pass - -class TransactionIdUnknown(Exception): - pass - cdef class Node: """ A node of the dht in the routing table @@ -2027,9 +2014,6 @@ class Bucket(list): DHT = type("DHT", (DHT_BASE,), {'__doc__': DHT_BASE.__doc__}) -class NotFound(Exception): - pass - class RoutingTable(object): """ A routing table for one or more :class:`DHT_BASE` instances diff --git a/btdht/exceptions.py b/btdht/exceptions.py new file mode 100644 index 0000000..99b3664 --- /dev/null +++ b/btdht/exceptions.py @@ -0,0 +1,27 @@ +class BucketFull(Exception): + pass + +class BucketNotFull(Exception): + pass + +class NoTokenError(Exception): + pass + +class FailToStop(Exception): + pass + +class TransactionIdUnknown(Exception): + pass + +class MissingT(ValueError): + pass + +class DecodeError(ValueError): + pass + +class BcodeError(Exception): + pass + +class NotFound(Exception): + pass + diff --git a/btdht/krcp.pyx b/btdht/krcp.pyx index 4e50dc1..5edc068 100644 --- a/btdht/krcp.pyx +++ b/btdht/krcp.pyx @@ -25,7 +25,8 @@ from cython.parallel import prange import six -from btdht import utils +import utils +from .exceptions import MissingT, DecodeError cdef int str_to_int(char* data, int len) nogil: cdef char* msg = NULL @@ -241,10 +242,6 @@ class MethodUnknownError(BError): def __init__(self, t, msg=b"Method Unknow"): super(MethodUnknownError, self).__init__(t=t, e=[204, msg]) -class MissingT(ValueError): - pass -class DecodeError(ValueError): - pass cdef class BMessage: cdef int set_r(self, int value) nogil: From 3dcf1d0f9d0a27401ee105d77c9acf71e822bc32 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Thu, 1 Dec 2016 16:14:05 +0100 Subject: [PATCH 19/30] Continue to document --- btdht/dht.pyx | 177 ++++++++++---------- btdht/utils.pyx | 260 ++++++++++++++++++++++++++---- docs/package/btdht.dht.rst | 169 +++++++++++++------ docs/package/btdht.exceptions.rst | 7 + docs/package/btdht.rst | 1 + 5 files changed, 448 insertions(+), 166 deletions(-) create mode 100644 docs/package/btdht.exceptions.rst diff --git a/btdht/dht.pyx b/btdht/dht.pyx index 0fb7f4a..571db52 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -78,30 +78,36 @@ cdef class DHT_BASE: """ cdef char _myid[20] + #: :class:`str` interface the dht is binded to + bind_ip = "0.0.0.0" + #: :class:`int` port the dht is binded to + bind_port = None + #: :class:`int` the dht instance verbosity level + debuglvl = 0 + #: last time we received any message + last_msg = 0 + #: last time we receive a response to one of our messages + last_msg_rep = 0 + #: :class:`set` of ignored ip in dotted notation + ignored_ip = [] #: :class:`list` of default ignored ip networks ignored_net = [ '0.0.0.0/8', '10.0.0.0/8', '100.64.0.0/10', '127.0.0.0/8', '169.254.0.0/16', '172.16.0.0/12', '192.0.0.0/24', '192.0.2.0/24', '192.168.0.0/16', '198.18.0.0/15', '198.51.100.0/24', '203.0.113.0/24', '224.0.0.0/4', '240.0.0.0/4', '255.255.255.255/32' ] + #: :class:`utils.ID` the dht instance id, 160bits long (20 Bytes) + myid = None #: :class:`str` prefixing all debug message prefix = "" - #: :class:`set` of ignored ip in dotted notation - ignored_ip = [] - #: :class:`RoutingTable` the used instance of the routing table + #: :class:`RoutingTable` the used instance of the routing table root = None - #: :class:`int` port the dht is binded to - bind_port = None - #: :class:`str` interface the dht is binded to - bind_ip = "0.0.0.0" - #: :class:`utils.ID` the dht instance id, 160bits long (20 Bytes) - myid = None - #: :class:`int` the dht instance verbosity level - debuglvl = 0 + #: The current dht :class:`socket.Socket` + sock = None + #: the state (stoped ?) of the dht + stoped = True #: :class:`list` of the :class:`Thread` of the dht instance threads = [] - #: Map beetween transaction id and messages type (to be able to match responses) - transaction_type = {} #: Token send with get_peers response. Map between ip addresses and a list of random token. #: A new token by ip is genereted at most every 5 min, a single token is valid 10 min. #: On reception of a announce_peer query from ip, the query is only accepted if we have a @@ -110,29 +116,21 @@ cdef class DHT_BASE: #: Tokens received on get_peers response. Map between ip addresses and received token from ip. #: Needed to send announce_peer to that particular ip. mytoken = {} - #: The current dht :class:`socket.Socket` - sock = None + #: Map beetween transaction id and messages type (to be able to match responses) + transaction_type = {} #: A :class:`PollableQueue` of messages (data, (ip, port)) to send to_send = PollableQueue() - #: the state (stoped ?) of the dht - stoped = True - #: last time we received any message - last_msg = 0 - #: last time we receive a response to one of our messages - last_msg_rep = 0 #: A list of looping iterator to schedule. Calling :meth:`schedule` will do a scheduling for #: 1 DHT instance to_schedule = [] - - #: Map torrent hash -> peer ip and port -> received time. hash, ip and port are from #: announce_peer query messages. time is the time of the received message. We only keep the - #: 100 most recent (ip, port). A (ip, port) couple is kept max 30min + #: 100 most recent (ip, port). A (ip, port) couple is kept max 30min _peers=collections.defaultdict(collections.OrderedDict) #: Map torrent hash -> peer ip and port -> received time. hash, ip and port are from get_peers #: response messages. time is the time of the received message. We keep the 1000 most recent - #: (ip, port). A (ip, port) couple is kept max 15min + #: (ip, port). A (ip, port) couple is kept max 15min _got_peers=collections.defaultdict(collections.OrderedDict) #: internal heap structure used to find the K closed nodes in the DHT from one id _get_peer_loop_list = [] @@ -410,7 +408,7 @@ cdef class DHT_BASE: :param str msg: The debug message to print Note: - duplicate messages are removed: + duplicate messages are removed """ if ( lvl <= self.debuglvl and @@ -1540,46 +1538,7 @@ cdef class Node: #: the node) cdef int _failed - def __init__(self, bytes id, ip, int port, int last_response=0, int last_query=0, int failed=0): - cdef char* cip - cdef char* cid - if ip[0] == u'0': - raise ValueError("IP start with 0 *_* %r %r" % (ip, self._ip[:4])) - tip = socket.inet_aton(ip) - cip = tip - id = ID.to_bytes(id) - cid = id - with nogil: - if not port > 0 and port < 65536: - with gil: - raise ValueError("Invalid port number %s, sould be within 1 and 65535 for %s" % (port, ip)) - #self._id = malloc(20 * sizeof(char)) - strncpy(self._id, cid, 20) - #self._ip = malloc(4 * sizeof(char)) - strncpy(self._ip, cip, 4) - self._port = port - self._last_response = last_response - self._last_query = last_query - self._failed = failed - - - def __richcmp__(self, Node other, int op): - if op == 2: # == - return other.id == self.id - elif op == 3: # != - return other.id != self.id - elif op == 0: # < - return max(self.last_response, self.last_query) < max(other.last_response, other.last_query) - elif op == 4: # > - return not (max(self.last_response, self.last_query) < max(other.last_response, other.last_query)) and not (other.id == self.id) - elif op == 1: # <= - return max(self.last_response, self.last_query) < max(other.last_response, other.last_query) or (other.id == self.id) - elif op == 5: # >= - return not (max(self.last_response, self.last_query) < max(other.last_response, other.last_query)) - else: - return False - - #: udp port of the node + #: UDP port of the node property port: def __get__(self):return self._port def __set__(self, int i):self._port = i @@ -1591,7 +1550,7 @@ cdef class Node: property last_query: def __get__(self):return self._last_query def __set__(self, int i):self._last_query = i - #: number of reponse pending (increase on sending query to the node, set to 0 on reception from + #: Number of reponse pending (increase on sending query to the node, set to 0 on reception from #: the node) property failed: def __get__(self):return self._failed @@ -1617,7 +1576,7 @@ cdef class Node: # Nodes become bad when they fail to respond to multiple queries in a row. return not self.good and self.failed > 3 - #: ip address of the node in dotted notation + #: IP address of the node in dotted notation property ip: def __get__(self): ip = socket.inet_ntoa(self._ip[:4]) @@ -1633,6 +1592,46 @@ cdef class Node: with nogil: strncmp(self._ip, cip, 4) + def __init__(self, bytes id, ip, int port, int last_response=0, int last_query=0, int failed=0): + cdef char* cip + cdef char* cid + if ip[0] == u'0': + raise ValueError("IP start with 0 *_* %r %r" % (ip, self._ip[:4])) + tip = socket.inet_aton(ip) + cip = tip + id = ID.to_bytes(id) + cid = id + with nogil: + if not port > 0 and port < 65536: + with gil: + raise ValueError("Invalid port number %s, sould be within 1 and 65535 for %s" % (port, ip)) + #self._id = malloc(20 * sizeof(char)) + strncpy(self._id, cid, 20) + #self._ip = malloc(4 * sizeof(char)) + strncpy(self._ip, cip, 4) + self._port = port + self._last_response = last_response + self._last_query = last_query + self._failed = failed + + + def __richcmp__(self, Node other, int op): + if op == 2: # == + return other.id == self.id + elif op == 3: # != + return other.id != self.id + elif op == 0: # < + return max(self.last_response, self.last_query) < max(other.last_response, other.last_query) + elif op == 4: # > + return not (max(self.last_response, self.last_query) < max(other.last_response, other.last_query)) and not (other.id == self.id) + elif op == 1: # <= + return max(self.last_response, self.last_query) < max(other.last_response, other.last_query) or (other.id == self.id) + elif op == 5: # >= + return not (max(self.last_response, self.last_query) < max(other.last_response, other.last_query)) + else: + return False + + def __repr__(self): return "Node: %s:%s" % (self.ip, self.port) @@ -1817,15 +1816,20 @@ class Bucket(list): between the root and the bucket in the routing table) :param iterable init: some values to store initialy in the bucket """ - #: maximun number of element in the bucket + #: Maximun number of element in the bucket max_size = 8 - #: Unix timestamp, ast time the bucket had been updated + #: Unix timestamp, last time the bucket had been updated last_changed = 0 - #: A prefix identifier from 0 to 169 bits for the bucket + #: A prefix identifier from 0 to 160 bits for the bucket id = None - #: number of signifiant bit in :attr:`id` + #: Number of signifiant bit in :attr:`id` id_length = 0 + @property + def to_refresh(self): + """``True`` if the bucket need refreshing""" + return time.time() - self.last_changed > 15 * 60 + __slot__ = ("id", "id_length") def own(self, id): @@ -1986,11 +1990,6 @@ class Bucket(list): l = [n for l in zip(self, bucket) for n in l if n.good][:self.max_size] return Bucket(id=self.id, id_length=self.id_length - 1, init=l) - @property - def to_refresh(self): - return time.time() - self.last_changed > 15 * 60 - - def __hash__(self): return hash(utils.id_to_longid(ID.to_bytes(self.id))[:self.id_length]) @@ -2023,9 +2022,9 @@ class RoutingTable(object): """ #: :class:`int` the routing table instance verbosity level debuglvl = 0 - #: the routing table storage data structure, an instance of :class:`datrie.Trie` + #: The routing table storage data structure, an instance of :class:`datrie.Trie` trie = None - #: the state (stoped ?) of the routing table + #: The state (stoped ?) of the routing table stoped = True #: Is a merge sheduled ? need_merge = False @@ -2033,27 +2032,28 @@ class RoutingTable(object): threads = [] #: A class:`list` of couple (weightless thread name, weightless thread function) to_schedule = [] - #: prefix in logs and threads name + #: Prefix in logs and threads name prefix = "" - #: current height of the tree :attr:`trie` structure of the routing table + + #: Current height of the tree :attr:`trie` structure of the routing table _heigth = 1 #: A set of registered dht instance with this routing table _dhts = set() #: A set of torrent id _info_hash = set() - #: a set of dht id + #: A set of dht id _split_ids = set() - #: internal list of supposed alive threads + #: Internal list of supposed alive threads _threads = [] - #: a set of bucket id to merge (keys of :class:`datrie.Trie`) + #: A set of bucket id to merge (keys of :class:`datrie.Trie`) _to_merge = set() - #: internal list of supposed zombie (asked to stop but still running) threads + #: Internal list of supposed zombie (asked to stop but still running) threads _threads_zombie= [] - #: last debug message, use to prevent duplicate messages over 5 seconds + #: Last debug message, use to prevent duplicate messages over 5 seconds _last_debug = "" - #: time of the lat debug message, use to prevent duplicate messages over 5 seconds + #: Time of the last debug message, use to prevent duplicate messages over 5 seconds _last_debug_time = 0 - #: a :class:`utils.Scheduler` instance + #: A :class:`utils.Scheduler` instance _scheduler = None #: A :class:`threading.Lock` instance to prevent concurrent start to happend _lock = None @@ -2113,6 +2113,7 @@ class RoutingTable(object): @property def zombie(self): + """``True`` if dht is stopped but one thread or more remains alive, ``False`` otherwise""" return self.stoped and [t for t in self._threads if t.is_alive()] def start(self, **kwargs): diff --git a/btdht/utils.pyx b/btdht/utils.pyx index 8bbbbe5..9ef3c7a 100644 --- a/btdht/utils.pyx +++ b/btdht/utils.pyx @@ -29,17 +29,20 @@ from threading import Thread, Lock from libc.stdlib cimport atoi, malloc, free from libc.string cimport strlen, strncmp, strcmp, strncpy, strcpy from cython.parallel import prange + from .krcp cimport _decode_string, _decode_int as _decode_long +from .exceptions import BcodeError + cdef extern from "ctype.h": int isdigit(int c) #: an array mapping and int ([0-256]) to the corresponging byte (like the function :func:`chr`) -cdef char BYTE_TO_BIT[256][8] -# fill BYTE_TO_BIT array +cdef char _BYTE_TO_BIT[256][8] +# fill _BYTE_TO_BIT array def __init(): for i in range(256): s = "{0:08b}".format(i).encode("ascii") - strncpy(BYTE_TO_BIT[i], s, 8) + strncpy(_BYTE_TO_BIT[i], s, 8) __init() del __init @@ -102,11 +105,11 @@ cdef char* _id_to_longid(char* id, int size=20) nogil: :return: The corresponding base 2 string :rtype: bytes """ - global BYTE_TO_BIT + global _BYTE_TO_BIT cdef char* ret = malloc((size * 8) * sizeof(char)) cdef int i = 0 while i < size: - strncpy(ret + (i*8), BYTE_TO_BIT[id[i]], 8) + strncpy(ret + (i*8), _BYTE_TO_BIT[id[i]], 8) i+=1 return ret @@ -128,35 +131,89 @@ def id_to_longid(char* id, int l=20): return u def nbit(s, n): - """Renvois la valeur du nième bit de la chaine s""" + """ + :param bytes s: A byte string + :param int n: A bit number (n must be smaller than 8 times the length of ``s``) + :return: The value of the nth bit of ``s`` (``0`` or ``1``) + :rtype: int + """ if six.PY3: c = s[n//8] else: c = ord(s[n//8]) return int(format(c, '08b')[n % 8]) + +_NFLIP_BITS = [ + 0b10000000, 0b01000000, 0b00100000, 0b00010000, 0b00001000, 0b00000100, 0b00000010, 0b00000001 +] def nflip(s, n): - """Renvois la chaine s dont la valeur du nième bit a été retourné""" - bit = [0b10000000, 0b01000000, 0b00100000, 0b00010000, 0b00001000, 0b00000100, 0b00000010, 0b00000001] + """ + :param bytes s: A byte string + :param int n: A bit number (n must be smaller than 8 times the length of ``s``) + :return: The same string except for the nth bit was flip + :rtype: bytes + """ + global _NFLIP_BIT if six.PY2: - return s[:n//8] + chr(ord(s[n//8]) ^ bit[n % 8]) + s[n//8+1:] + return s[:n//8] + chr(ord(s[n//8]) ^ _NFLIP_BITS[n % 8]) + s[n//8+1:] else: - return s[:n//8] + bytes([s[n//8] ^ bit[n % 8]]) + s[n//8+1:] + return s[:n//8] + bytes([s[n//8] ^ _NFLIP_BITS[n % 8]]) + s[n//8+1:] + +_NSET_BIT1 = [ + 0b10000000, 0b01000000, 0b00100000, 0b00010000, 0b00001000, 0b00000100, 0b00000010, 0b00000001 +] +_NSET_BIT0 = [ + 0b01111111, 0b10111111, 0b11011111, 0b11101111, 0b11110111, 0b11111011, 0b11111101, 0b11111110 +] def nset(s, n , i): - bit1 = [0b10000000, 0b01000000, 0b00100000, 0b00010000, 0b00001000, 0b00000100, 0b00000010, 0b00000001] - bit0 = [0b01111111, 0b10111111, 0b11011111, 0b11101111, 0b11110111, 0b11111011, 0b11111101, 0b11111110] + """ + :param bytes s: A byte string + :param int n: A bit number (n must be smaller than 8 times the length of ``s``) + :param int i: A bit value (``0`` or ``1``) + :return: ``s`` where the nth bit was set to ``i`` + :rtype: bytes + """ + global _NSET_BIT0, _NSET_BIT1 if i == 1: - return s[:n//8] + chr(ord(s[n//8]) | bit1[n % 8]) + s[n//8+1:] + return s[:n//8] + chr(ord(s[n//8]) | _NSET_BIT1[n % 8]) + s[n//8+1:] elif i == 0: - return s[:n//8] + chr(ord(s[n//8]) & bit0[n % 8]) + s[n//8+1:] + return s[:n//8] + chr(ord(s[n//8]) & _NSET_BIT0[n % 8]) + s[n//8+1:] else: - raise ValueError("i doit être 0 ou 1") - -class BcodeError(Exception): - pass + raise ValueError("i must be 0 or 1") def enumerate_ids(size, id): + """ + :param int size: A number of bit to flip in id + :param bytes id: A 160 bit (20 Bytes) long id + :return: A list of + ``id`` and 2 to the power of ``size`` (minus one) ids the furthest from each other + :rtype: list + + For instance: if id=("\0" * 20) (~0 * 160), ``enumerate_ids(4, id)`` will return a list with + * '\x00\x00\x00\x00\x00...' (~00000000...) + * '\x80\x00\x00\x00\x00...' (~10000000...) + * '@\x00\x00\x00\x00...' (~0100000000...) + * '\xc0\x00\x00\x00\x00...' (~11000000...) + + The can be see as the tree:: + + \x00 + / \ + 1/ \0 + / \ + \xc0 \x00 + 1/ \0 1/ \0 + / \ / \ + \xc0 \x80 @ \x00 + + The root is ``id``, at each level n, we set the nth bit of of 1 left and 0 right, ``size`` + if the level we return. + + This function may be usefull to lanch multiple DHT instance with ids the most distributed + on the 160 bit space. + """ def aux(lvl, ids): if lvl >= 0: l = [] @@ -168,11 +225,34 @@ def enumerate_ids(size, id): return ids return aux(size - 1, [id]) + +def copy_doc(f1): + """ + A decorator coping docstring from another function + + :param f1: An object with a docstring (functions, methods, classes, ...) + :return: A decorator that copy the docstring of ``f1`` + """ + def wrap(f2): + f2.__doc__ = f1.__doc__ + return f2 + return wrap + @total_ordering class ID(object): + """ + A 160 bit (20 Bytes) string implementing the XOR distance + :param id: An optional initial value (:class:`bytes` or :class:`ID`). If not specified, + a random 160 bit value is generated. + """ @classmethod def to_bytes(cls, id): + """ + :param id: A :class:`bytes` or :class:`ID` + :return: The value of the ``id`` + :rtype; bytes + """ try: return id.value except AttributeError: @@ -180,6 +260,9 @@ class ID(object): @staticmethod def __generate(): + """ + :return: A 20 Bytes (160 bit) random string (using ``os.urandom``) + """ return os.urandom(20) def __init__(self, id=None): @@ -188,12 +271,15 @@ class ID(object): else: self.value = self.to_bytes(id) + @copy_doc(u"".encode) def encode(self, c): return self.value.encode(c) + @copy_doc(b"".startswith) def startswith(self, s): return self.value.startswith(s) + @copy_doc(b"".__getitem__) def __getitem__(self, i): return self.value[i] @@ -203,6 +289,7 @@ class ID(object): def __repr__(self): return binascii.b2a_hex(self.value).decode() + @copy_doc(b"".__eq__) def __eq__(self, other): if isinstance(other, ID): return self.value == other.value @@ -211,6 +298,7 @@ class ID(object): else: return False + @copy_doc(b"".__lt__) def __lt__(self, other): if isinstance(other, ID): return self.value < other.value @@ -219,10 +307,19 @@ class ID(object): else: raise TypeError("unsupported operand type(s) for <: 'ID' and '%s'" % type(other).__name__) + @copy_doc(b"".__len__) def __len__(self): return len(self.value) + def __xor__(self, other): + """ + Permor a XOR bit by bit between the current id and ``other`` + + :param other: A :class:`bytes` or :class:`ID` + :return: The resulted XORed bit by bit string + :rtype: bytes + """ if isinstance(other, ID): if six.PY2: return ''.join(chr(ord(a) ^ ord(b)) for a,b in zip(self.value, other.value)) @@ -237,19 +334,46 @@ class ID(object): raise TypeError("unsupported operand type(s) for ^: 'ID' and '%s'" % type(other).__name__) def __rxor__(self, other): + """ + Permor a XOR bit by bit between the current id and ``other`` + + :param other: A :class:`bytes` or :class:`ID` + :return: The resulted XORed bit by bit string + :rtype: bytes + """ return self.__xor__(other) + @copy_doc(b"".__hash__) def __hash__(self): return hash(self.value) def bencode(obj): + """ + bencode an arbitrary object + + :param obj: A combination of dict, list, bytes or int + :return: Its bencoded representation + :rtype: bytes + + Notes: + This method is just a wrapper around :func:`_bencode` + """ try: return _bencode(obj) except: print("%r" % obj) raise + def _bencode(obj): + """ + bencode an arbitrary object + :param obj: A combination of :class:`dict`, :class:`list`, :class:`bytes` or :class:`int` + :return: Its bencoded representation + :rtype: bytes + :raises EnvironmentError: if ``obj`` is not a combination of :class:`dict`, :class:`list`, + :class:`bytes` or :class:`int` + """ if isinstance(obj, int) or isinstance(obj, float): return b"i" + str(obj).encode() + b"e" elif isinstance(obj, bytes): @@ -270,10 +394,31 @@ def _bencode(obj): raise EnvironmentError("Can only encode int, str, list or dict, not %s" % type(obj).__name__) def bdecode(s): - return _bdecode(s)[0] + """ + bdecode an bytes string + + :param s: A bencoded bytes string + :return: Its bencoded representation + :rtype: A combination of :class:`dict`, :class:`list`, :class:`bytes` or :class:`int` + :raises BcodeError: If failing to decode ``s`` + + Notes: + This method is just a wrapper around :func:`_bdecode` + """ + return _bdecode(s, len(s))[0] cdef _decode_int(char* data, int *i, int max): - """decode arbitrary long integer""" + """ + decode an arbitrary long integer + + :param bytes data: The data to decode + :param int i: An index of ``data`` to start decoding from + :param int max: the length of ``data`` + :return: A decoded integer if ``data[i]`` is ``b'i'``else False + :rtype: :class:`int` or :class:`bool:class:` + :raises BcodeError: if reach end of data before ending decoding or if the value to + decode is not of the forme ``iNNNNe`` with N a digit + """ cdef int j #cdef long long ll[1] #_decode_long(data, i, max, ll) @@ -290,15 +435,29 @@ cdef _decode_int(char* data, int *i, int max): if i[0] <= max: return myint else: - raise ValueError("%s > %s : %r" % (i[0], max, data[:max])) + raise BcodeError( + "Reach end of data before end of decoding %s > %s : %r" % ( + i[0], max, data[:max] + ) + ) else: with gil: - raise ValueError("%s != e at %s %r" % (data[j], j, data[:max])) + raise BcodeError("%s != e at %s %r" % (data[j], j, data[:max])) else: with gil: return False cdef _decode_list(char* data, int* i, int max): + """ + decode a list + + :param bytes data: The data to decode + :param int i: An index of ``data`` to start decoding from + :param int max: the length of ``data`` + :return: A decoded list of decoded elements + :rtype: list + :raises BcodeError: if reach end of data before ending decoding or failing to decode a value + """ cdef int j[1] i[0]+=1 l = [] @@ -316,11 +475,22 @@ cdef _decode_list(char* data, int* i, int max): _decode_string(data, i, max, j) l.append(data[j[0]:i[0]]) else: - raise ValueError("??? %s" % data[i[0]]) + raise BcodeError("Unknown type, starting with %r" % data[i[0]]) i[0]+=1 return l cdef _decode_dict(char* data, int* i, int max): + """ + decode a dict + + :param bytes data: The data to decode + :param int i: An index of ``data`` to start decoding from + :param int max: the length of ``data`` + :return: A decoded dict of decoded elements + :rtype: dict + :raises BcodeError: if reach end of data before ending decoding or failing to decode a value + or one of the dict key is not of type bytes + """ cdef int j[1] i[0]+=1 d = {} @@ -332,9 +502,9 @@ cdef _decode_dict(char* data, int* i, int max): _decode_string(data, i, max, j) key = data[j[0]:i[0]] else: - raise ValueError("??? key must by string") + raise BcodeError("dict key must be string, and thus start with a digit") if data[i[0]] == b'e': - raise ValueError("??? key without value") + raise BcodeError("dict key without value") if data[i[0]] == b'i': d[key]=_decode_int(data, i, max) elif data[i[0]] == b'l': @@ -346,11 +516,23 @@ cdef _decode_dict(char* data, int* i, int max): _decode_string(data, i, max, j) d[key]=data[j[0]:i[0]] else: - raise ValueError("??? dict value%s" % data[i[0]]) + raise BcodeError("Unknown type of dict value starting with %r" % data[i[0]]) i[0]+=1 return d -cdef _decode(char* data, int max): +cdef _bdecode(char* data, int max): + """ + bdecode an bytes string + + :param s: A bencoded bytes string + :return: A couple: (bdecoded representation, rest of the string). If only one bencoded + object is given as argument, then the 'rest of the string' will be empty + :rtype: :class:`tuple` ( + combination of :class:`dict`, :class:`list`, :class:`bytes` or :class:`int`, + bytes + ) + :raises BcodeError: If failing to decode ``s`` + """ cdef int i[1] cdef int j[1] i[0]=0 @@ -369,12 +551,10 @@ cdef _decode(char* data, int max): _decode_string(data, i, max, j) return data[j[0]:i[0]], data[i[0]:max] else: - raise ValueError("??? dict value%s" % data[i[0]]) + raise BcodeError("Unknown type, starting with %r" % data[i[0]]) except ValueError as e: raise BcodeError(str(e)) -def _bdecode(s): - return _decode(s, len(s)) #cdef _bdecode2(char* s, int* ii): # if ii[0] > 2000 and (ii[0] % 100) == 0: @@ -463,8 +643,24 @@ def ip_in_nets(ip, nets): class PollableQueue(Queue.Queue): - def __init__(self, *args, **kwargs): - Queue.Queue.__init__(self, *args, **kwargs) + """ + A queue that can be watch using :func:`select.select` + + :param int maxsize: The maximum size on the queue. If maxsize is <= 0, the queue size is + infinite. + """ + + #: A :class:`socket.socket` object ready for read then here is something to pull from the queue + sock = None + + #: Internal socket that is written to then something is put on the queue + _putsocket = None + #: Alias of :attr:`sock`. Internal socket that is read from then something is pull from + #: the queue + _getsocket = None + + def __init__(self, maxsize=0): + Queue.Queue.__init__(self, maxsize=maxsize) # Create a pair of connected sockets if os.name == 'posix': self._putsocket, self._getsocket = socket.socketpair() diff --git a/docs/package/btdht.dht.rst b/docs/package/btdht.dht.rst index be09a32..962b6e4 100644 --- a/docs/package/btdht.dht.rst +++ b/docs/package/btdht.dht.rst @@ -4,63 +4,62 @@ btdht.dht module .. automodule:: btdht.dht :show-inheritance: -.. autoclass:: BucketFull - :show-inheritance: - :members: - :undoc-members: -.. autoclass:: BucketNotFull - :show-inheritance: - :members: - :undoc-members: -.. autoclass:: NoTokenError - :show-inheritance: - :members: - :undoc-members: -.. autoclass:: NotFound - :show-inheritance: - :members: - :undoc-members: -.. autoclass:: FailToStop - :show-inheritance: - :members: - :undoc-members: - .. autoclass:: DHT_BASE :show-inheritance: :members: - .. autoattribute:: ignored_net - - :class:`list` of default ignored ip networks - - .. autoattribute:: root + .. autoattribute:: bind_ip - :class:`RoutingTable` the used instance of the routing table + :class:`str` interface the dht is binded to .. autoattribute:: bind_port :class:`int` port the dht is binded to - .. autoattribute:: bind_ip + .. autoattribute:: debuglvl - :class:`str` interface the dht is binded to + :class:`int` the dht instance verbosity level + + .. autoattribute:: last_msg + + last time we received any message + + .. autoattribute:: last_msg_rep + + last time we receive a response to one of our messages + + .. autoattribute:: ignored_ip + + :class:`set` of ignored ip in dotted notation + + .. autoattribute:: ignored_net + + :class:`list` of default ignored ip networks .. autoattribute:: myid :class:`utils.ID` the dht instance id, 160bits long (20 Bytes) - .. autoattribute:: debuglvl + .. autoattribute:: prefix - :class:`int` the dht instance verbosity level + :class:`str` prefixing all debug message - .. autoattribute:: threads + .. autoattribute:: root - :class:`list` of the :class:`Thread` of the dht instance + :class:`RoutingTable` the used instance of the routing table - .. autoattribute:: transaction_type + .. autoattribute:: sock - Map beetween transaction id and messages type (to be able to match responses) + The current dht :class:`socket.socket` + + .. autoattribute:: stoped + + the state (stoped ?) of the dht + + .. autoattribute:: threads + + :class:`list` of the :class:`Thread` of the dht instance .. autoattribute:: token @@ -74,36 +73,114 @@ btdht.dht module Tokens received on get_peers response. Map between ip addresses and received token from ip. Needed to send announce_peer to that particular ip. - .. autoattribute:: sock + .. autoattribute:: transaction_type - The current dht :class:`socket.Socket` + Map beetween transaction id and messages type (to be able to match responses) - .. autoattribute:: stoped + .. autoattribute:: to_send - the state (stoped ?) of the dht + A :class:`PollableQueue` of messages (data, (ip, port)) to send + + .. autoattribute:: to_schedule + + A list of looping iterator to schedule, passed to :attr:`_scheduler` .. autoclass:: DHT :show-inheritance: + :members: :undoc-members: .. autoclass:: Node :show-inheritance: :members: - :undoc-members: + + .. autoattribute:: port + + UDP port of the node + + .. autoattribute:: last_response + + Unix timestamp of the last received response from this node + + .. autoattribute:: last_query + + Unix timestamp of the last received query from this node + + .. autoattribute:: failed + + Number of reponse pending (increase on sending query to the node, set to 0 on reception from + the node) + + .. autoattribute:: id + + 160bits (20 Bytes) identifier of the node + + .. autoattribute:: good + + ``True`` if the node is a good node. A good node is a node has responded to one of our + queries within the last 15 minutes. A node is also good if it has ever responded to one of + our queries and has sent us a query within the last 15 minutes. + + .. autoattribute:: bad + + ``True`` if the node is a bad node (communication with the node is not possible). Nodes + become bad when they fail to respond to 3 queries in a row. + + .. autoattribute:: ip + + IP address of the node in dotted notation + .. autoclass:: Bucket :show-inheritance: :members: - :undoc-members: -.. autoclass:: SplitQueue - :show-inheritance: - :members: - :undoc-members: + .. autoattribute:: max_size + + Maximun number of element in the bucket + + .. autoattribute:: last_changed + + Unix timestamp, last time the bucket had been updated + + .. autoattribute:: id + + A prefix identifier from 0 to 160 bits for the bucket + + .. autoattribute:: id_length + + Number of signifiant bit in :attr:`id` .. autoclass:: RoutingTable :show-inheritance: :members: - :undoc-members: + + .. autoattribute:: debuglvl + + :class:`int` the routing table instance verbosity level + + .. autoattribute:: trie + + The routing table storage data structure, an instance of :class:`datrie.Trie` + + .. autoattribute:: stoped + + The state (stoped ?) of the routing table + + .. autoattribute:: need_merge + + Is a merge sheduled ? + + .. autoattribute:: threads + + :class:`list` of the :class:`Thread` of the routing table instance + + .. autoattribute:: to_schedule + + A class:`list` of couple (weightless thread name, weightless thread function) + + .. autoattribute:: prefix + + Prefix in logs and threads name diff --git a/docs/package/btdht.exceptions.rst b/docs/package/btdht.exceptions.rst new file mode 100644 index 0000000..9ecd56b --- /dev/null +++ b/docs/package/btdht.exceptions.rst @@ -0,0 +1,7 @@ +btdht.exceptions module +======================= + +.. automodule:: btdht.exceptions + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/package/btdht.rst b/docs/package/btdht.rst index 64eaea9..7beab7f 100644 --- a/docs/package/btdht.rst +++ b/docs/package/btdht.rst @@ -9,6 +9,7 @@ Submodules btdht.dht btdht.utils btdht.krcp + btdht.exceptions Module contents --------------- From 39d64e8499ae03039d1fe6c448cafb8f4517725b Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Thu, 1 Dec 2016 19:20:05 +0100 Subject: [PATCH 20/30] Wrap long lines --- btdht/__init__.py | 4 +- btdht/dht.pyx | 181 +++++++++++++++++++++++++++++++++++----------- btdht/krcp.pyx | 38 ++++++++-- btdht/utils.pyx | 12 ++- 4 files changed, 183 insertions(+), 52 deletions(-) diff --git a/btdht/__init__.py b/btdht/__init__.py index dd7e057..faf1fd8 100644 --- a/btdht/__init__.py +++ b/btdht/__init__.py @@ -10,4 +10,6 @@ # # (c) 2015 Valentin Samir -from btdht.dht import DHT, DHT_BASE, ID, Node, Bucket, RoutingTable, NotFound, BucketFull, NoTokenError +from btdht.dht import DHT, DHT_BASE, Node, Bucket, RoutingTable +from btdht.utils import ID +from btdht.exceptions import NotFound, BucketFull, NoTokenError diff --git a/btdht/dht.pyx b/btdht/dht.pyx index 571db52..4b03fdb 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -190,7 +190,10 @@ cdef class DHT_BASE: self._scheduler = utils.Scheduler() if scheduler is None else scheduler # initialising the routing table - self.root = RoutingTable(scheduler=self._scheduler, prefix=prefix) if routing_table is None else routing_table + if routing_table is None: + self.root = RoutingTable(scheduler=self._scheduler, prefix=prefix) + else: + self.root = routing_table self.bind_port = bind_port self.bind_ip = bind_ip @@ -606,7 +609,10 @@ cdef class DHT_BASE: ts = time.time() + delay closest = self.get_closest_nodes(hash) typ = "peers" - heapq.heappush(self._get_peer_loop_list, (ts, hash, tried_nodes, closest, typ, callback, limit)) + heapq.heappush( + self._get_peer_loop_list, + (ts, hash, tried_nodes, closest, typ, callback, limit) + ) if block: while hash in self._get_peer_loop_lock and not self.stoped: peers = self._get_peers(hash, compact=False) @@ -642,7 +648,9 @@ cdef class DHT_BASE: stop() return # fetch next hash to process - (ts, hash, tried_nodes, closest, typ, callback, limit) = heapq.heappop(self._get_peer_loop_list) + ( + ts, hash, tried_nodes, closest, typ, callback, limit + ) = heapq.heappop(self._get_peer_loop_list) if typ not in ["peers", "closest"]: raise ValueError("typ should not be %s" % typ) # if process time is in the past process it @@ -660,7 +668,13 @@ cdef class DHT_BASE: tried_nodes.add(node) ts = time.time() + 2 # we search peers and we found as least limit of them - if (typ == "peers" and limit and hash in self._got_peers and self._got_peers[hash] and len(self._got_peers[hash])>=limit): + if ( + typ == "peers" and + limit and + hash in self._got_peers and + self._got_peers[hash] and + len(self._got_peers[hash])>=limit + ): self.debug(2, "Hash %s find peers" % binascii.b2a_hex(hash)) if callback: callback(self._get_peers(hash, compact=False)) @@ -673,7 +687,10 @@ cdef class DHT_BASE: on_stop(hash, typ) # Else had it the the heap to be processed later else: - heapq.heappush(self._get_peer_loop_list, (ts, hash, tried_nodes, _closest, typ, callback, limit)) + heapq.heappush( + self._get_peer_loop_list, + (ts, hash, tried_nodes, _closest, typ, callback, limit) + ) del node del ts else: @@ -683,9 +700,13 @@ cdef class DHT_BASE: if callback: callback(self._get_peers(hash, compact=False)) on_stop(hash, typ) - # we did not found peers nor closest node althougth we ask every close nodes we know of + # we did not found peers nor closest node althougth we ask every close nodes + # we know of else: - self.debug(2, "Hash %s not peers or nodes not found" % binascii.b2a_hex(hash)) + self.debug( + 2, + "Hash %s not peers or nodes not found" % binascii.b2a_hex(hash) + ) if callback: callback([]) on_stop(hash, typ) @@ -694,7 +715,10 @@ cdef class DHT_BASE: else: # if fetch time in the future, sleep until that date tosleep = max(1, ts - time.time()) - heapq.heappush(self._get_peer_loop_list, (ts, hash, tried_nodes, closest, typ, callback, limit)) + heapq.heappush( + self._get_peer_loop_list, + (ts, hash, tried_nodes, closest, typ, callback, limit) + ) break del tried_nodes del closest @@ -727,23 +751,33 @@ cdef class DHT_BASE: elif not info_hash in self._got_peers and not compact: return None else: - try: - # In compact mode (to send over udp) return at most 70 peers to avoid udp fragmentation - if compact: - peers = [(-t,ip,port) for ((ip, port), t) in six.iteritems(self._peers[info_hash])] - # putting the more recent annonces in first - peers.sort() - return [struct.pack("!4sH", socket.inet_aton(ip), port) for (_, ip, port) in peers[0:70]] - else: - peers = [(-t,ip,port) for ((ip, port), t) in six.iteritems(self._got_peers[info_hash])] - # putting the more recent annonces in first - peers.sort() - return [(ip, port) for (_, ip, port) in peers] - except KeyError: - if errno > 20: - raise - time.sleep(0.1) - return self._get_peers(info_hash, compact, errno=errno+1) + try: + # In compact mode (to send over udp) return max 70 peers to avoid udp fragmentation + if compact: + peers = [ + (-t,ip,port) for ((ip, port), t) in six.iteritems(self._peers[info_hash]) + ] + # putting the more recent annonces in first + peers.sort() + return [ + struct.pack("!4sH", socket.inet_aton(ip), port) + for (_, ip, port) + in peers[0:70] + ] + else: + peers = [ + (-t,ip,port) + for ((ip, port), t) + in six.iteritems(self._got_peers[info_hash]) + ] + # putting the more recent annonces in first + peers.sort() + return [(ip, port) for (_, ip, port) in peers] + except KeyError: + if errno > 20: + raise + time.sleep(0.1) + return self._get_peers(info_hash, compact, errno=errno+1) def get_closest_nodes(self, id, compact=False): """ @@ -776,7 +810,9 @@ cdef class DHT_BASE: def bootstarp( self, addresses=[ - ("router.utorrent.com", 6881), ("grenade.genua.fr", 6880), ("dht.transmissionbt.com", 6881) + ("router.utorrent.com", 6881), + ("grenade.genua.fr", 6880), + ("dht.transmissionbt.com", 6881) ] ): """ @@ -1482,10 +1518,16 @@ cdef class DHT_BASE: elif msg.y == b"e": query = self.transaction_type.get(msg.t, (None, None, None))[2] if msg.errno == 201: - self.debug(2, "ERROR:201:%s pour %r" % (msg.errmsg, self.transaction_type.get(msg.t, {}))) + self.debug( + 2, + "ERROR:201:%s pour %r" % (msg.errmsg, self.transaction_type.get(msg.t, {})) + ) return GenericError(msg.t, msg.errmsg), query elif msg.errno == 202: - self.debug(2, "ERROR:202:%s pour %r" % (msg.errmsg, self.transaction_type.get(msg.t, {}))) + self.debug( + 2, + "ERROR:202:%s pour %r" % (msg.errmsg, self.transaction_type.get(msg.t, {})) + ) return ServerError(msg.t, msg.errmsg), query elif msg.errno == 203: t = self.transaction_type.get(msg.t) @@ -1496,7 +1538,12 @@ cdef class DHT_BASE: self.debug(0 if t else 1, "ERROR:204:%s pour %r" % (msg.errmsg, t)) return MethodUnknownError(msg.t, msg.errmsg), query else: - self.debug(3, "ERROR:%s:%s pour %r" % (msg.errno, msg.errmsg, self.transaction_type.get(msg.t, {}))) + self.debug( + 3, + "ERROR:%s:%s pour %r" % ( + msg.errno, msg.errmsg, self.transaction_type.get(msg.t, {}) + ) + ) raise MethodUnknownError(msg.t, b"Error code %s unknown" % msg.errno) else: raise ValueError("UNKNOWN MSG: %r decoded as %r from %r" % (s, msg, addr)) @@ -1566,8 +1613,12 @@ cdef class Node: def __get__(self): now = time.time() # A good node is a node has responded to one of our queries within the last 15 minutes. - # A node is also good if it has ever responded to one of our queries and has sent us a query within the last 15 minutes. - return ((now - self.last_response) < 15 * 60) or (self.last_response > 0 and (now - self.last_query) < 15 * 60) + # A node is also good if it has ever responded to one of our queries and has sent us + # a query within the last 15 minutes. + return ( + ((now - self.last_response) < 15 * 60) or + (self.last_response > 0 and (now - self.last_query) < 15 * 60) + ) #: ``True`` if the node is a bad node (communication with the node is not possible). Nodes #: become bad when they fail to respond to 3 queries in a row. @@ -1604,7 +1655,9 @@ cdef class Node: with nogil: if not port > 0 and port < 65536: with gil: - raise ValueError("Invalid port number %s, sould be within 1 and 65535 for %s" % (port, ip)) + raise ValueError( + "Invalid port number %s, sould be within 1 and 65535 for %s" % (port, ip) + ) #self._id = malloc(20 * sizeof(char)) strncpy(self._id, cid, 20) #self._ip = malloc(4 * sizeof(char)) @@ -1621,13 +1674,41 @@ cdef class Node: elif op == 3: # != return other.id != self.id elif op == 0: # < - return max(self.last_response, self.last_query) < max(other.last_response, other.last_query) + return max( + self.last_response, + self.last_query + ) < max( + other.last_response, + other.last_query + ) elif op == 4: # > - return not (max(self.last_response, self.last_query) < max(other.last_response, other.last_query)) and not (other.id == self.id) + return not ( + max( + self.last_response, + self.last_query + ) < max( + other.last_response, + other.last_query + ) + ) and not (other.id == self.id) elif op == 1: # <= - return max(self.last_response, self.last_query) < max(other.last_response, other.last_query) or (other.id == self.id) + return max( + self.last_response, + self.last_query + ) < max( + other.last_response, + other.last_query + ) or (other.id == self.id) elif op == 5: # >= - return not (max(self.last_response, self.last_query) < max(other.last_response, other.last_query)) + return not ( + max( + self.last_response, + self.last_query + ) < max( + other.last_response, + other.last_query + ) + ) else: return False @@ -1848,7 +1929,11 @@ class Bucket(list): return False return True except IndexError as e: - print("%r i:%s selfid:%s:%s:%r nodeid:%d:%r %r" % (e, i, len(self.id), self.id_length, self.id, len(id), id, self)) + print( + "%r i:%s selfid:%s:%s:%r nodeid:%d:%r %r" % ( + e, i, len(self.id), self.id_length, self.id, len(id), id, self + ) + ) return False else: return False @@ -2007,7 +2092,9 @@ class Bucket(list): else: return self.id_length < other.id_length except AttributeError: - raise ValueError("%s not comparable with %s" % (other.__class__.__name__, self.__class__.__name__)) + raise ValueError( + "%s not comparable with %s" % (other.__class__.__name__, self.__class__.__name__) + ) DHT = type("DHT", (DHT_BASE,), {'__doc__': DHT_BASE.__doc__}) @@ -2106,7 +2193,10 @@ class RoutingTable(object): else: break if self._threads: - self.debug(0, "Unable to stop %s threads, giving up:\n%r" % (len(self._threads), self._threads)) + self.debug( + 0, + "Unable to stop %s threads, giving up:\n%r" % (len(self._threads), self._threads) + ) self.zombie = True self._threads_zombie.extend(self._threads) self._threads = [] @@ -2547,7 +2637,11 @@ class RoutingTable(object): self.trie[key[:-1]] = bucket del self.trie[prefix1] else: - self.trie[key[:-1]] = Bucket(id=bucket0.id, id_length=len(key[:-1]), init=bucket0) + self.trie[key[:-1]] = Bucket( + id=bucket0.id, + id_length=len(key[:-1]), + init=bucket0 + ) del self.trie[prefix0] stack.append(key[:-1]) except KeyError: @@ -2560,5 +2654,10 @@ class RoutingTable(object): if full_merge: self._heigth = max(len(k) for k in self.trie.keys()) + 1 - self.debug(1, "%s nodes merged in %ss" % (nodes_before - self.stats()[0], int(time.time() - started))) + self.debug( + 1, + "%s nodes merged in %ss" % ( + nodes_before - self.stats()[0], int(time.time() - started) + ) + ) diff --git a/btdht/krcp.pyx b/btdht/krcp.pyx index 5edc068..fcbb1c3 100644 --- a/btdht/krcp.pyx +++ b/btdht/krcp.pyx @@ -93,9 +93,16 @@ cdef int _decode_pass_list(char* data, int *i, int max) nogil except -1: return False i[0]+=1 while data[i[0]] != b'e' and i[0] < max: - if not _decode_string(data, i, max, j) and not _decode_int(data, i, max, ll) and not _decode_pass_list(data, i, max) and not _decode_pass_dict(data, i, max): + if ( + not _decode_string(data, i, max, j) and + not _decode_int(data, i, max, ll) and + not _decode_pass_list(data, i, max) and + not _decode_pass_dict(data, i, max) + ): with gil: - raise DecodeError("Unable to parse one of the element of the list %d %r" % (i[0], data[:max])) + raise DecodeError( + "Unable to parse one of the element of the list %d %r" % (i[0], data[:max]) + ) if i[0] >= max: with gil: raise DecodeError("list_pass: %s > %s : %r" % (i[0], max, data[:max])) @@ -114,9 +121,19 @@ cdef int _decode_pass_dict(char* data, int *i, int max) nogil except -1: return False i[0]+=1 while data[i[0]] != b'e' and i[0] < max: - if not _decode_string(data, i, max, j) or (not _decode_string(data, i, max, j) and not _decode_int(data, i, max, ll) and not _decode_pass_list(data, i, max) and not _decode_pass_dict(data, i, max)): + if ( + not _decode_string(data, i, max, j) or + ( + not _decode_string(data, i, max, j) and + not _decode_int(data, i, max, ll) and + not _decode_pass_list(data, i, max) and + not _decode_pass_dict(data, i, max) + ) + ): with gil: - raise DecodeError("Unable to parse one of the element of the dict %d %r" % (i[0], data[:max])) + raise DecodeError( + "Unable to parse one of the element of the dict %d %r" % (i[0], data[:max]) + ) if i[0] >= max: with gil: raise DecodeError("dict_pass: %s > %s : %r" % (i[0], max, data[:max])) @@ -212,7 +229,9 @@ cdef int _encode_string(char* data, int* i, int max, char* str, int strlen) nogi class BError(Exception): y = b"e" t = None # string value representing a transaction ID - e = None # a list. The first element is an integer representing the error code. The second element is a string containing the error message + # a list. The first element is an integer representing the error code. + # The second element is a string containing the error message + e = None def __init__(self, t, e, **kwargs): if t is None: raise ValueError("t should not be None") @@ -788,7 +807,10 @@ cdef class BMessage: if self.r or self.a or self.e: # only one can be True estimated_len+=2 + 3 # the d and e of the a ou r dict if self.e: - estimated_len+=int_length(self._errno) + 2 + self.errmsg_len + 1 + int_length(self.errmsg_len) + estimated_len+=( + int_length(self._errno) + 2 + + self.errmsg_len + 1 + int_length(self.errmsg_len) + ) if self.r or self.a: if self.has_id: estimated_len+=23 + 4 @@ -1167,7 +1189,9 @@ cdef class BMessage: c+=1 if i[0] >= max or data[i[0]] != b'e': with gil: - raise DecodeError("End of values list not found %s >= %s found %s elements" % (i[0], max, c)) + raise DecodeError( + "End of values list not found %s >= %s found %s elements" % (i[0], max, c) + ) i[0] = k values = malloc(c * sizeof(char*)) c=0 diff --git a/btdht/utils.pyx b/btdht/utils.pyx index 9ef3c7a..4e67702 100644 --- a/btdht/utils.pyx +++ b/btdht/utils.pyx @@ -305,7 +305,9 @@ class ID(object): elif isinstance(other, str): return self.value < other else: - raise TypeError("unsupported operand type(s) for <: 'ID' and '%s'" % type(other).__name__) + raise TypeError( + "unsupported operand type(s) for <: 'ID' and '%s'" % type(other).__name__ + ) @copy_doc(b"".__len__) def __len__(self): @@ -331,7 +333,9 @@ class ID(object): else: return bytes([a ^ b for a,b in zip(self.value, other)]) else: - raise TypeError("unsupported operand type(s) for ^: 'ID' and '%s'" % type(other).__name__) + raise TypeError( + "unsupported operand type(s) for ^: 'ID' and '%s'" % type(other).__name__ + ) def __rxor__(self, other): """ @@ -391,7 +395,9 @@ def _bencode(obj): d.append(v) return b"d" + b"".join(_bencode(o) for o in d) + b"e" else: - raise EnvironmentError("Can only encode int, str, list or dict, not %s" % type(obj).__name__) + raise EnvironmentError( + "Can only encode int, str, list or dict, not %s" % type(obj).__name__ + ) def bdecode(s): """ From e01be8a22ef9b619cb18caf155cbfc62752595b3 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sun, 4 Dec 2016 15:03:50 +0100 Subject: [PATCH 21/30] Update doc --- btdht/dht.pyx | 11 +- btdht/krcp.pxd | 2 +- btdht/krcp.pyx | 762 ++++++++++++++++++++++++++++++++++-- btdht/utils.pyx | 25 +- docs/package/btdht.dht.rst | 106 +++++ docs/package/btdht.krcp.rst | 90 ++++- 6 files changed, 959 insertions(+), 37 deletions(-) diff --git a/btdht/dht.pyx b/btdht/dht.pyx index 4b03fdb..f12af4c 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -344,9 +344,10 @@ cdef class DHT_BASE: """ Start the dht: * initialize some attributes + * initialize the dht socket (see :meth:init_socket) * register this instance of the dht in the routing table (see :meth:`RoutingTable.register_dht`) - * initialize the dht socket (see :meth:init_socket) + * register this instance of the dht in the scheduler * start the routing table if needed and ``start_routing_table` is ``True`` * start the scheduler if needed and ``start_scheduler`` is ``True`` @@ -1734,6 +1735,8 @@ cdef class Node: @classmethod def from_compact_infos(cls, infos): """ + This is a classmethod + Instancy nodes from multiple compact node information string :param bytes infos: A string of size multiple of 26 @@ -1766,6 +1769,8 @@ cdef class Node: @classmethod def from_compact_info(cls, info): """ + This is a classmethod + Instancy a node from its compact node infoformation string :param bytes info: A string of length 26 @@ -2206,7 +2211,7 @@ class RoutingTable(object): """``True`` if dht is stopped but one thread or more remains alive, ``False`` otherwise""" return self.stoped and [t for t in self._threads if t.is_alive()] - def start(self, **kwargs): + def start(self): """start the routing table""" with self._lock: if not self.stoped: @@ -2365,7 +2370,7 @@ class RoutingTable(object): self.stop() def debug(self, lvl, msg): - """same as debug on DHT_BASE""" + """same as :meth:`DHT_BASE.debug`""" if ( lvl <= self.debuglvl and (msg != self._last_debug or (time.time() - self._last_debug_time) > 5) diff --git a/btdht/krcp.pxd b/btdht/krcp.pxd index 51add1b..c3e9d30 100644 --- a/btdht/krcp.pxd +++ b/btdht/krcp.pxd @@ -1,4 +1,4 @@ -cdef int _decode_int(char* data, int *i, int max, long long *myint) nogil except -1 +cdef long _decode_int(char* data, int *i, int max, long long *myint) nogil except -1 cdef int _decode_string(char* data, int* i, int max, int* j) nogil except -1 cdef class BMessage: cdef char* _y diff --git a/btdht/krcp.pyx b/btdht/krcp.pyx index fcbb1c3..44c8ee2 100644 --- a/btdht/krcp.pyx +++ b/btdht/krcp.pyx @@ -14,13 +14,7 @@ import os from libc cimport math from libc.stdio cimport printf, sprintf from libc.string cimport strlen, strncmp, strcmp, strncpy, strcpy -from libc.stdlib cimport atoi, atoll, malloc, free - -if os.name == 'posix': - from libc.stdlib cimport atoll -else: - from libc.stdlib cimport atol as atoll - +from libc.stdlib cimport atoi, atol, malloc, free from cython.parallel import prange import six @@ -29,6 +23,18 @@ import utils from .exceptions import MissingT, DecodeError cdef int str_to_int(char* data, int len) nogil: + """ + Transform a string of 1-9 to an int + + :param bytes data: A string with only character from 1 to 9 + :param in len: The length of data + :return: The integer represented by ``data`` + :rtype: int + + Notes: + We use atoi for the conversion so the integer must be a 32 bits signed integer. + This function can be called without the python GIL + """ cdef char* msg = NULL cdef int i try: @@ -41,7 +47,19 @@ cdef int str_to_int(char* data, int len) nogil: free(msg) return i -cdef long str_to_long_long(char* data, int len) nogil: +cdef long str_to_long(char* data, int len) nogil: + """ + Transform a string of 1-9 to an int + + :param bytes data: A string with only character from 1 to 9 + :param in len: The length of data + :return: The integer represented by ``data`` + :rtype: int + + Notes: + We use atol for the conversion so the integer must be a 64 bits signed integer. + This function can be called without the python GIL + """ cdef char* msg = NULL cdef long long i if data[0] == b'-' and len > 16 or len > 17: @@ -53,13 +71,21 @@ cdef long str_to_long_long(char* data, int len) nogil: msg = malloc((len+1) * sizeof(char)) strncpy(msg, data, len) msg[len]=b'\0' - i = atoll(msg) + i = atol(msg) finally: if msg != NULL: free(msg) return i cdef int int_length(int i) nogil: + """ + :param int i: An integer + :return: The size of the string necessary to write an integer in decimal notation + :rtype: int + + Notes: + This function can be called without the python GIL + """ if i == 0: return 1 elif i < 0: @@ -68,12 +94,37 @@ cdef int int_length(int i) nogil: return ( math.log10(i)) + 1 cdef varray_to_list(char ** data, size): + """ + Transform a an C array of compact peers information to a python list + + :param data: A C array of 6 length strings, each one representing a compact peers + information + :return: A python list of each elements of ``data`` + :rtype: list + + Note: + Contact information in for peers is encoded as a 6-byte string. + Also known as "Compact IP-address/port info" the 4-byte IP address + is in network byte order with the 2 byte port in network byte order + concatenated onto the end. + """ l=[] for i in range(size): l.append(data[i][:6]) return l cdef char** vlist_to_array(l, int size=6): + """ + Transform a python list of compact peers information to a C array + + :param list l: A list of string of equals length + :param int size: The length of the strings of ``l`` + :return: A C array of all of the elements of ``l`` + + Notes: + This function allocate a dynamic memory range (using malloc) for the returned array. + You need to manually free the returned value to free the memory. + """ cdef char ** data = malloc(len(l) * sizeof(char*)) for i in range(len(l)): if len(l[i]) != size: @@ -84,6 +135,20 @@ cdef char** vlist_to_array(l, int size=6): cdef int _decode_pass_list(char* data, int *i, int max) nogil except -1: + """ + Pass a bencoded list in a bencoded string + + :param bytes data: A bencoded string + :param int[1] i: reference to the index on ``data`` we start reading + :param int max: The length of ``data`` + :return bool: ``True`` if a bencoded list is successfully passed, then ``i`` is set + to the index of the next bencoded object in ``data``, ``False`` otherwise. + :rtype: bool + :raises DecodeError: if failed to decode ``data`` + + Notes: + This function can be called without the python GIL + """ cdef int j[1] cdef long long ll[1] if i[0] >= max + 1: @@ -112,6 +177,20 @@ cdef int _decode_pass_list(char* data, int *i, int max) nogil except -1: return True cdef int _decode_pass_dict(char* data, int *i, int max) nogil except -1: + """ + Pass a bencoded dict in a bencoded string + + :param bytes data: A bencoded string + :param int[1] i: reference to the index on ``data`` we start reading + :param int max: The length of ``data`` + :return bool: ``True`` if a bencoded dict is successfully passed, then ``i`` is set + to the index of the next bencoded object in ``data``, ``False`` otherwise. + :rtype: bool + :raises DecodeError: if failed to decode ``data`` + + Notes: + This function can be called without the python GIL + """ cdef int j[1] cdef long long ll[1] if i[0] >= max + 1: @@ -143,6 +222,20 @@ cdef int _decode_pass_dict(char* data, int *i, int max) nogil except -1: return True cdef int _decode_string(char* data, int* i, int max, int* j) nogil except -1: + """ + Pass a bencoded string in a bencoded string + + :param bytes data: A bencoded string + :param int[1] i: reference to the index on ``data`` we start reading + :param int max: The length of ``data`` + :return bool: ``True`` if a bencoded string is successfully passed, then ``i`` is set + to the index of the next bencoded object in ``data``, ``False`` otherwise. + :rtype: bool + :raises DecodeError: if failed to decode ``data`` + + Notes: + This function can be called without the python GIL + """ cdef int ret if data[i[0]] == b'0' \ or data[i[0]] == b'2' \ @@ -171,10 +264,26 @@ cdef int _decode_string(char* data, int* i, int max, int* j) nogil except -1: else: return False -cdef int _decode_int(char* data, int *i, int max, long long *myint) nogil except -1: +cdef long _decode_int(char* data, int *i, int max, long long *myint) nogil except -1: """ - warning ! use only if you are sure that int to decode fetch in a signed 64bit integer - otherwise, use the function from utils that can decode arbitrary long integer + Decode a bencoded int and write it to ``myint`` + + :param bytes data: A bencoded string + :param int[1] i: reference to the index on ``data`` we start reading + :param int max: The length of ``data`` + :param int[1] myint: Where to write the decode dencoded int + :return bool: ``True`` if a bencoded int is successfully decoded, then ``i`` is set + to the index of the next bencoded object in ``data``, ``myint`` is set to the value of + the decoded int. ``False`` otherwise. + :rtype: bool + :raises DecodeError: if failed to decode ``data`` + + Warning: + Use only if you are sure that int to decode is a signed 64bit integer + otherwise, use the function from utils that can decode arbitrary long integer + + Notes: + This function can be called without the python GIL """ cdef int j if data[i[0]] == b'i': @@ -183,7 +292,7 @@ cdef int _decode_int(char* data, int *i, int max, long long *myint) nogil excep while data[j] != b'e' and j < max: j+=1 if data[j] == b'e': - myint[0]=str_to_long_long(data + i[0], j-i[0]) + myint[0]=str_to_long(data + i[0], j-i[0]) i[0]=j+1 if i[0] <= max: return True @@ -197,6 +306,21 @@ cdef int _decode_int(char* data, int *i, int max, long long *myint) nogil excep return False cdef int _encode_int(char* data, int *i, int max, int j) nogil: + """ + Bencode the integer ``j``and write it in ``data`` at offset ``i`` + + :param bytes data: A bencoded string + :param int[1] i: The index of ``data`` we start writting to + :param int max: The length of ``data`` + :param int j: The integer to bencode and write to data + :return: ``True`` if ``j`` is successfully bencoded and written to ``data``, then ``i`` is + set to the offset of ``data`` immediately after the bencoded int ``j``. ``False`` + otherwise. + :rtype: bool + + Notes: + This function can be called without the python GIL + """ cdef int l l = int_length(j) if max >= i[0] + l + 2: @@ -212,6 +336,22 @@ cdef int _encode_int(char* data, int *i, int max, int j) nogil: return False cdef int _encode_string(char* data, int* i, int max, char* str, int strlen) nogil: + """ + Bencode the string ``str`` of length ``ßtrlen`` and write it in ``data`` at offset ``i`` + + :param bytes data: A bencoded string + :param int[1] i: The index of ``data`` we start writting to + :param int max: The length of ``data`` + :param bytes str: The string to bencode and write to data + :param int strlen: The length of ``str`` + :return: ``True`` if ``str`` is successfully bencoded and written to ``data``, then ``i`` is + set to the offset of ``data`` immediately after the bencoded string ``str``. ``False`` + otherwise. + :rtype: bool + + Notes: + This function can be called without the python GIL + """ cdef int l l = int_length(strlen) if max >= i[0] + l + 1 + strlen: # size as char + : + string @@ -227,9 +367,18 @@ cdef int _encode_string(char* data, int* i, int max, char* str, int strlen) nogi return False class BError(Exception): + """ + A base class exception for all bittorrent DHT protocol error exceptions + + :param bytes t: The value of the key t of the query for with the error is returned + :param list e: A couple [error code, error message] + """ + #: The ``y`` key of the error message. For an error message, it is always ``b"e"`` y = b"e" - t = None # string value representing a transaction ID - # a list. The first element is an integer representing the error code. + #: string value representing a transaction ID, must be set to the query transaction ID + #: for which an error is raises. + t = None + # A list. The first element is an :class:`int` representing the error code. # The second element is a string containing the error message e = None def __init__(self, t, e, **kwargs): @@ -240,6 +389,12 @@ class BError(Exception): super(BError, self).__init__(*e, **kwargs) def encode(self): + """ + Bencode the error message + + :return: The bencoded error message ready to be send + :rtype: bytes + """ return utils.bencode({b"y":self.y, b"t":self.t, b"e":self.e}) def __str__(self): @@ -249,36 +404,142 @@ class BError(Exception): return "%s: %r" % (self.__class__.__name__, self.e) class GenericError(BError): + """ + A Generic Error, error code 201 + + :param bytes t: The value of the key t of the query for with the error is returned + :param bytes msg: An optionnal error message + """ def __init__(self, t, msg=b""): super(GenericError, self).__init__(t=t, e=[201, msg]) class ServerError(BError): + """ + A Server Error, error code 202 + + :param bytes t: The value of the key t of the query for with the error is returned + :param bytes msg: An optionnal error message + """ def __init__(self, t, msg=b"Server Error"): super(ServerError, self).__init__(t=t, e=[202, msg]) class ProtocolError(BError): + """ + A Protocol Error, such as a malformed packet, invalid arguments, or bad token, + error code 203 + + :param bytes t: The value of the key t of the query for with the error is returned + :param bytes msg: An optionnal error message + """ def __init__(self, t, msg=b"Protocol Error"): super(ProtocolError, self).__init__(t=t, e=[203, msg]) class MethodUnknownError(BError): + """ + Method Unknown, error code 204 + + :param bytes t: The value of the key t of the query for with the error is returned + :param bytes msg: An optionnal error message + """ def __init__(self, t, msg=b"Method Unknow"): super(MethodUnknownError, self).__init__(t=t, e=[204, msg]) cdef class BMessage: + """ + A bittorrent DHT message. This class is able to bdecode a bittorrent DHT message. It + expose then the messages keys ``t``, ``y``, ``q``, ``errno``, ``errmsg`` and ``v`` as + attributes, and behave itself as a dictionnary for the ``a`` or ``r`` keys that contains + a secondary dictionnary (see Notes). + + :param tuple addr: An optionnal coupe (ip, port) of the sender of the message + :param bool debug: ``True`` for enabling debug message. The default is ``False`` + + Notes: + A query message is always of the following form with ``y == b'q'``:: + + { + "t": t, + "y": y, + "q": q, + "a": {...} + } + + A response message is always of the following form with ``y == b'r'``:: + + { + "t": t, + "y": y, + "r": {...} + } + + An error message is always in response of a query message and of the following form + with ``y == b'e'``:: + + { + "t": t, + "y": y, + "e":[errno, errmsg] + } + + The ``t`` key is a random string generated with every query. It is used to match + a response to a particular query. + + The ``y`` key is used to differenciate the type of the message. Its value is ``b'q'`` + for a query, ``b'r'`` for a response, and ``b'e'`` for and error message. + + The ``q`` is only present on query message and contain the name of the query (ping, + get_peers, announce_peer, find_node) + + ``errno`` and ``errmsg`` are only defined if the message is an error message. They are + respectively the error number (:class:`int`) and the error describing message of the error. + + The ``v`` key is set by some DHT clients to the name and version of the client and + is totally optionnal in the protocol. + """ cdef int set_r(self, int value) nogil: + """ + :param bool value: If ``True`` mark the current :class:`BMessage` as having a ``r`` dict + (response dictionnary) + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False self.r = value return True cdef int set_a(self, int value) nogil: + """ + :param bool value: If ``True`` mark the current :class:`BMessage` as a ``a`` dict + (query dictionnary) + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False self.a = value return True cdef int set_e(self, int value) nogil: + """ + :param bool value: If ``True`` mark the current :class:`BMessage` as an error, + having a ``e`` list + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False self.e = value return True cdef int set_t(self, char* value, int size) nogil: + """ + Set the ``t`` current message key to ``value`` + + :param bytes value: A string + :param int size: The length of ``value`` + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False if self.has_t: free(self._t) @@ -290,6 +551,12 @@ cdef class BMessage: return True cdef void del_t(self) nogil: + """ + Unset the ``t`` current message key + + Notes: + This method can be called without the python GIL + """ if self.has_t: self.encoded_uptodate = False self.has_t = False @@ -297,6 +564,15 @@ cdef class BMessage: free(self._t) cdef int set_v(self, char* value, int size) nogil: + """ + Set the ``v`` current message key to ``value`` + + :param bytes value: A string + :param int size: The length of ``value`` + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False if self.has_v: free(self._v) @@ -308,6 +584,12 @@ cdef class BMessage: return True cdef void del_v(self) nogil: + """ + Unset the ``v`` current message key + + Notes: + This method can be called without the python GIL + """ if self.has_v: self.encoded_uptodate = False self.has_v = False @@ -315,6 +597,15 @@ cdef class BMessage: free(self._v) cdef int set_y(self, char* value, int size) nogil: + """ + Set the ``y`` current message key to ``value`` + + :param bytes value: A string + :param int size: The length of ``value`` + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False if self.has_y: free(self._y) @@ -326,6 +617,12 @@ cdef class BMessage: return True cdef void del_y(self) nogil: + """ + Unset the ``y`` current message key + + Notes: + This method can be called without the python GIL + """ if self.has_y: self.encoded_uptodate = False self.has_y = False @@ -333,6 +630,15 @@ cdef class BMessage: free(self._y) cdef int set_q(self, char* value, int size) nogil: + """ + Set the ``q`` current message key to ``value`` + + :param bytes value: A string + :param int size: The length of ``value`` + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False if self.has_q: free(self._q) @@ -344,6 +650,12 @@ cdef class BMessage: return True cdef void del_q(self) nogil: + """ + Unset the ``q`` current message key + + Notes: + This method can be called without the python GIL + """ if self.has_q: self.encoded_uptodate = False self.has_q = False @@ -351,6 +663,15 @@ cdef class BMessage: free(self._q) cdef int set_id(self, char* value, int size) nogil except -1: + """ + Set the id of the message + + :param bytes value: A string + :param int size: The length of ``value`` + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False if self.has_id: free(self.id) @@ -361,12 +682,27 @@ cdef class BMessage: return True cdef void del_id(self) nogil: + """ + Unset the id of the message + + Notes: + This method can be called without the python GIL + """ if self.has_id: self.encoded_uptodate = False self.has_id = False free(self.id) cdef int set_target(self, char* value, int size) nogil except -1: + """ + Set the target of the message + + :param bytes value: A string + :param int size: The length of ``value`` + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False if self.has_target: free(self.target) @@ -377,12 +713,27 @@ cdef class BMessage: return True cdef void del_target(self) nogil: + """ + Unset the target of the message + + Notes: + This method can be called without the python GIL + """ if self.has_target: self.has_target = False self.encoded_uptodate = False free(self.target) cdef int set_info_hash(self, char* value, int size) nogil except -1: + """ + Set the info_hash of the message + + :param bytes value: A string + :param int size: The length of ``value`` + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False if self.has_info_hash: free(self.info_hash) @@ -393,32 +744,75 @@ cdef class BMessage: return True cdef void del_info_hash(self) nogil: + """ + Unset the info_hash of the message + + Notes: + This method can be called without the python GIL + """ if self.has_info_hash: self.has_info_hash = False self.encoded_uptodate = False free(self.info_hash) cdef void del_implied_port(self) nogil: + """ + Unset implied_port + + Notes: + This method can be called without the python GIL + """ self.has_implied_port = False self.encoded_uptodate = False cdef int set_implied_port(self, int value) nogil: + """ + Set implied_port to ``value`` + + :param bool value: A boolean + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False self.implied_port = value self.has_implied_port = True return True cdef int set_port(self, int value) nogil: + """ + Set port to ``value`` + + :param int port: An integer + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False self.port = value self.has_port = True return True cdef void del_port(self) nogil: + """ + Unset port attribut of th emessage + + Notes: + This method can be called without the python GIL + """ self.has_port = False self.encoded_uptodate = False cdef int set_token(self, char* value, int size) nogil: + """ + Set the token of the message + + :param bytes value: A string + :param int size: The length of ``value`` + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False if self.has_token: free(self.token) @@ -430,6 +824,12 @@ cdef class BMessage: return True cdef void del_token(self) nogil: + """ + Unset the token of the message + + Notes: + This method can be called without the python GIL + """ if self.has_token: self.has_token = False self.encoded_uptodate = False @@ -437,6 +837,15 @@ cdef class BMessage: free(self.token) cdef int set_nodes(self, char* value, int size) nogil: + """ + Set the nodes attribute of the message + + :param bytes value: A string + :param int size: The length of ``value`` + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False if self.has_nodes: free(self.nodes) @@ -448,6 +857,12 @@ cdef class BMessage: return True cdef int del_nodes(self) nogil: + """ + Unset the nodes attribute of the message + + Notes: + This method can be called without the python GIL + """ if self.has_nodes: self.has_nodes = False self.encoded_uptodate = False @@ -455,6 +870,15 @@ cdef class BMessage: free(self.nodes) cdef int set_values(self, char** values, int nb) nogil: + """ + Set the values of the message + + :param array value: An array of size ``nb`` of string of length 6 + :param int nb: The length of ``value`` + + Notes: + This method can be called without the python GIL + """ cdef int i self.encoded_uptodate = False if self.has_values: @@ -468,6 +892,12 @@ cdef class BMessage: return True cdef void del_values(self) nogil: + """ + Unset the values of the messages + + Notes: + This method can be called without the python GIL + """ cdef int i = 0 if self.has_values: self.has_values = False @@ -478,6 +908,15 @@ cdef class BMessage: free(self.values) cdef int set_errmsg(self, char* value, int size) nogil: + """ + Set the errmsg attribute + + :param bytes value: A string + :param int size: The length of ``value`` + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False if self.errmsg_len > 0: free(self._errmsg) @@ -487,23 +926,52 @@ cdef class BMessage: return True cdef void del_errmsg(self) nogil: + """ + Unset the errmsg attribute + + Notes: + This method can be called without the python GIL + """ if self.errmsg_len > 0: self.errmsg_len = 0 self.encoded_uptodate = False free(self._errmsg) cdef int set_errno(self, int value) nogil: + """ + Set the errno attribute + + :param int value: The error number to set + + Notes: + This method can be called without the python GIL + """ self.encoded_uptodate = False self._errno = value return True cdef void del_encoded(self) nogil: + """ + Mark the message as not encoded (invalided the cache of bencoded string of the message) + + Notes: + This method can be called without the python GIL + """ if self.encoded_len > 0: self.encoded_len = 0 self.encoded_uptodate = False free(self.encoded) def response(self, dht): + """ + If the message is a query, return the response message to send + + :param dht.DHT_BASE dht: The dht instance from which the message is originated + :return: A :class:`BMessage` to send as response to the query + :raises ProtocolError: if the query is malformated. To send as response to the querier + :raises MethodUnknownError: If the RPC DHT method asked in the query is unknown. + To send as response to the querier + """ cdef BMessage rep = BMessage() cdef char* id = NULL cdef int l1 = 0 @@ -597,6 +1065,18 @@ cdef class BMessage: printf("not query %d\n", 1) cdef int _encode_values(self, char* data, int* i, int max) nogil: + """ + If the values attribute of the message is set, bencode it in ``data`` + + :param bytes data: A buffer string where we write to + :param int[1] i: The index of ``data`` to start writting to + :param int max: The length of ``data`` + :return: ``True`` if :attr:`values` is successfully bencoded and written to data, then + ``i`` is set the the next free byte of ``data``. ``False`` otherwise. + + Notes: + This method can be called without the python GIL + """ cdef int j if i[0] + self.values_nb * 8 + 2 > max: printf("encode_values: %d < %d\n", max, i[0] + self.values_nb * 8 + 2) @@ -614,6 +1094,38 @@ cdef class BMessage: return True cdef int _encode_secondary_dict(self, char* data, int* i, int max) nogil: + """ + Bencode the secondary dictionnary of the message and write it to ``data`` + + :param bytes data: A buffer string where we write to + :param int[1] i: The index of ``data`` to start writting to + :param int max: The length of ``data`` + :return: ``True`` if the secondary dictionnary is successfully bencoded and written + to data, then ``i`` is set the the next free byte of ``data``. ``False`` otherwise. + + Notes: + This method can be called without the python GIL + + A dht message is a dictionnary that always contain exactly a second dictionnary + (except error messages). The method bencode this second dictionary. + This second dictionnary is in the key ``"a"`` in a query message and in the key + ``"r"`` of a response message. + + All attributes of the current message that should be in the secondary dictionnary + are encoded in it if set. + The following attributes are set to this dictionnary: + * id + * implied_port + * info_hash + * nodes + * port + * target + * token + * values + + Note that all of this attributes should never all bet set in the same BMessage + although no mecanism is preventing you to do it. + """ if i[0] + 1 > max: printf("encode_secondary:%d\n", 0) return False @@ -691,6 +1203,33 @@ cdef class BMessage: return True cdef int _encode_error(self, char* data, int* i, int max) nogil: + """ + Bencode the error list of an error message + + :param bytes data: A buffer string where we write to + :param int[1] i: The index of ``data`` to start writting to + :param int max: The length of ``data`` + :return: ``True`` if the error list is successfully bencoded and written + to data, then ``i`` is set the the next free byte of ``data``. ``False`` otherwise. + + Notes: + This method can be called without the python GIL + + All attributes of the current message that should be in the primary dictionnary + are encoded in it if set. + The following attributes are set to this dictionnary: + * q + * t + * v + * y + + Moreover if ``a`` or ``r`` are set, the secondary dictionnary is bencoded + and added to the corresponding key. If ``e`` is set, the error list is bencoded + and added to the ``e`` key of the main dictionnary. + + Note than ``a``, ``r`` and ``e`` are mutually exclusive and should not be set + together, although no mecanism is preventing you to do it. + """ if i[0] + 2 > max: printf("encode_error: %d", 0) return False @@ -708,6 +1247,18 @@ cdef class BMessage: return True cdef int _encode_main_dict(self, char* data, int* i, int max) nogil: + """ + Bencode the message primary dictionnary + + :param bytes data: A buffer string where we write to + :param int[1] i: The index of ``data`` to start writting to + :param int max: The length of ``data`` + :return: ``True`` if the primary dictionnary is successfully bencoded and written + to data, then ``i`` is set the the next free byte of ``data``. ``False`` otherwise. + + Notes: + This method can be called without the python GIL + """ if i[0] + 1 > max: printf("encode_main: %d\n", 0) return False @@ -778,6 +1329,14 @@ cdef class BMessage: cdef int _encode(self) nogil: + """ + Bencode the current message + + :return: ``True`` if the message is successfully bencoded, ``False`` otherwise + + Notes: + This method can be called without the python GIL + """ cdef int i=0 if self.encoded_len > 0: free(self.encoded) @@ -795,6 +1354,18 @@ cdef class BMessage: return False cdef int _encode_len(self) nogil: + """ + Compute the length of the message once bencoded + + :return: The length of the message once bencoded + :rtype: int + + Notes: + This method can be called without the python GIL + + This method is used to allocate the string buffer where the bencoded message + will be written to. + """ cdef int estimated_len = 2 # the d and e of the global dict if self.has_y: estimated_len+=int_length(self.y_len) + 1 + self.y_len + 3# len + : + str @@ -832,6 +1403,12 @@ cdef class BMessage: return estimated_len def encode(self): + """ + Bencoded the current message if necessary + + :return: The bencoded message + :rtype: bytes + """ if self.encoded_uptodate: return self.encoded[:self.encoded_len] else: @@ -848,6 +1425,7 @@ cdef class BMessage: def __str__(self): raise NotImplementedError() + #: The error number of the message if the message is and erro message property errno: def __get__(self): if self.e: @@ -857,6 +1435,7 @@ cdef class BMessage: def __set__(self, int value): self.set_errno(value) + #: The error message of the message if the message is and erro message property errmsg: def __get__(self): if self.e: @@ -868,6 +1447,7 @@ cdef class BMessage: with nogil: self.set_errmsg(msg, l) + #: The couple (ip, port) source of the message property addr: def __get__(self): if six.PY3: @@ -891,6 +1471,8 @@ cdef class BMessage: self.addr_addr = None self.addr_port = None + #: The ``y` key of the message. Possible value are ``"q"`` for a query, `"r"` for a response + #: and ``"e"`` for an error. property y: def __get__(self): if self.has_y: @@ -905,6 +1487,7 @@ cdef class BMessage: with nogil: self.del_y() + #: The ``t`` key, a random string, transaction id used to match queries and responses together. property t: def __get__(self): if self.has_t: @@ -919,6 +1502,9 @@ cdef class BMessage: with nogil: self.del_t() + #: The ``q`` key of the message, should only be define if the message is a query (:attr:`y` is + #: ``"q"``). It countains the name of the RPC method the query is asking for. Can be + #: ``b'ping'``, ``b'find_node'``, ``b'get_peers'``, ``b'announce_peer'``, ... property q: def __get__(self): if self.has_q: @@ -933,6 +1519,9 @@ cdef class BMessage: with nogil: self.del_q() + #: The ``v`` key of the message. This attribute is not describe in the BEP5 that describe the + #: bittorent DHT protocol. It it use as a version flag. Many bittorent client set it to + #: the name and version of the client. property v: def __get__(self): if self.has_v: @@ -949,6 +1538,26 @@ cdef class BMessage: self.del_v() def __getitem__(self, char* key): + """ + Allow to fetch infos from the secondary dictionnary:: + + self[b"id"] -> b"..." + + :param bytes key: The name of an attribute of the secondary dictionnary to retreive. + :return: The value store for ``key`` if found + :raises KeyError: if ``key`` is not found + + Notes: + Possible keys are: + * id + * target + * info_hash + * token + * nodes + * implied_port + * port + * values + """ if key == b"id" and self.has_id: return self.id[:20] elif key == b"target" and self.has_target: @@ -969,6 +1578,15 @@ cdef class BMessage: raise KeyError(key) def __delitem__(self, char* key): + """ + Allow to unset attributes from the secondary dictionnary:: + + del self[b'id'] + + :param :param bytes key: The name of an attribute of the secondary dictionnary to unset + :return: ``True`` if ``key`` is found and successfully unset + :raise KeyError: if ``key`` is not found + """ with nogil: if self.has_id and strcmp(key, b"id") == 0: self.del_id() @@ -991,6 +1609,17 @@ cdef class BMessage: raise KeyError(key) def __setitem__(self, char* key, value): + """ + Allow to set attributes from the secondary dictionnary:: + + self[b'id'] = b"..." + + :param bytes key: The name of an attribute of the secondary dictionnary to set + :param value: The value to set + :raises KeyError: if ``key`` is not one of id, target, info_hash, token, nodes, + implied_port, port, values. + :raises ValueError: if ``value`` is not well formated (length, type, ...) + """ cdef int i = 0 cdef char * j cdef char** v @@ -1049,12 +1678,21 @@ cdef class BMessage: raise KeyError(key) def get(self, char* key, default=None): + """ + :param bytes key: The name of an attribute of the secondary dictionnary to retreive. + :param default: Value to return in case ``key`` is not found. The default is ``None`` + :return: The value of ``key`` if found, else the value of ``default``. + """ try: return self[key] except KeyError as e: return default def __dealloc__(self): + """ + Called before removal of the object. + Used to free manually allocated memory + """ cdef int i with nogil: free(self._y) @@ -1073,6 +1711,18 @@ cdef class BMessage: free(self._errmsg) cdef int _decode_error(self, char* data, int* i, int max) nogil except -1: + """ + Decode and error bencoded list from ``data[i:]`` and set the message attributes errorno + and errormsg. + + :param bytes data: The bencoded string to decode + :param int[1] i: The offset of ``data`` to start decoding from + :param int max: The length of data + :return: ``True`` if the error list is successfully decoded, ``False`` otherwise + + Notes: + This method can be called without the python GIL + """ cdef int j[1] cdef long long ll[1] if i[0] > max: @@ -1093,6 +1743,26 @@ cdef class BMessage: return True cdef int _decode_dict_elm(self, char* data, int* i, int max) nogil except -1: + """ + Decode a dictionnary element: a key and a value. Set the corresponding attributes + on the message. + + :param bytes data: The bencoded string to decode + :param int[1] i: The offset of ``data`` to start decoding from + :param int max: The length of data + :return: ``False`` if the decoding failed. ``True`` otherwise. + + Notes: + This method can be called without the python GIL + + If one decoded element is successfully decoded but has a bad value or format or type + the :attr:`failed` attribute is set to ``True`` and :attr:`failed_msg` is set to + and error message. An error is then raises later. This is usefull for trying to + decode the ``"t"`` key in the dictionnary so we can send an error message to + the source of the errored message. As keys in bencoded dictionnary is alphanumeri- + cally sorted, t is often at the end of the message and thus, even is a bad value is + found, we must keep decoding the message at lest until we found its ``"t"``. + """ cdef char* error cdef int j[1] cdef long long ll[1] @@ -1172,6 +1842,22 @@ cdef class BMessage: raise DecodeError("Unable to decode element of dict at %d %r" % (j[0], data[:max])) cdef int _decode_values(self, char* data, int *i, int max) nogil except -1: + """ + Bdecode a values list of peers in compact forms (6 Bytes, 4 for ip and 2 for port) + + :param bytes data: The bencoded string to decode + :param int[1] i: The offset of ``data`` to start decoding from + :param int max: The length of data + :return: ``False`` if ``data[i]`` do not point a list. ``True`` otherwise. + :raises DecodeError: if we reach the end of ``data`` before the end of the list + + Notes: + This method can be called without the python GIL + + If one decoded element is successfully decoded but has a bad value or format or type + the :attr:`failed` attribute is set to ``True`` and :attr:`failed_msg` is set to + and error message. See :meth:`_decode_dict_elm` for more details why. + """ cdef int j[1] cdef int c = 0 cdef int k = i[0] + 1 @@ -1204,6 +1890,18 @@ cdef class BMessage: return True cdef int _decode_dict(self, char* data, int *i, int max) nogil except -1: + """ + Bdecode a dictionnary, element by element. + + :param bytes data: The bencoded string to decode + :param int[1] i: The offset of ``data`` to start decoding from + :param int max: The length of data + :raises DecodeError: if we reach the end of ``data`` before the end of the list or + fail to decode one of the dict elements. + + Notes: + This method can be called without the python GIL + """ cdef int k if data[i[0]] == b'd': i[0]+=1 @@ -1220,6 +1918,16 @@ cdef class BMessage: return True cdef int _decode(self, char* data, int *i, int max) nogil except -1: + """ + Bdecode a bencoded message and set the current :class:`BMessage` attributes accordingly + + :param bytes data: The bencoded string to decode + :param int[1] i: The offset of ``data`` to start decoding from + :param int max: The length of data + + Notes: + This method can be called without the python GIL + """ return self._decode_dict(data, i, max) def __init__(self, addr=None, debug=False): @@ -1248,6 +1956,21 @@ cdef class BMessage: self.encoded_uptodate = False def decode(self, char* data, int datalen): + """ + Bdecode a bencoded message and set the current :class:`BMessage` attributes accordingly + + :param bytes data: The bencoded message + :param int datalen: The length of ``data`` + :return: The remaining of ``data`` after the first bencoded message of ``data`` has been + bdecoded (it may be the empty string if ``data`` contains exactly one bencoded + message with no garbade at the end). + :raises DecodeError: If we fail to decode the message + :raises ProtocolError: If the message is decoded but some attributes are missing of + badly formated (length, type, ...). + :raises MissingT: If the message do not have a ``b"t"`` key. Indeed, + accordingly to the BEP5, every message (queries, responses, errors) should have + a ``b"t"`` key. + """ cdef int i = 0 cdef int valid = False with nogil: @@ -1263,15 +1986,6 @@ cdef class BMessage: else: with gil: raise DecodeError(self.failed_msg) - #if valid: - # self.encoded_len = self._encode_len() - # self.encoded = malloc(self.encoded_len * sizeof(char)) - # strncpy(self.encoded, data, self.encoded_len) - # self.encoded_uptodate = True - # every message MUST have a y field, every SHOULD have t, but we receive some - # error messages without them, so lets accepted them as en empty t - #if valid and not self.has_t and self.has_y and strncmp(self._y, b"e", 1): - # self.set_t("", 0) if not valid or not self.has_y: if self.has_y and strncmp(self._y, b"q", 1): with gil: diff --git a/btdht/utils.pyx b/btdht/utils.pyx index 4e67702..6cfdc81 100644 --- a/btdht/utils.pyx +++ b/btdht/utils.pyx @@ -54,6 +54,9 @@ cdef char _longid_to_char(char* id) nogil: :param str id: A 8 Bytes long string with only 0 and 1 as characters :return: A single char where the nth bit correspond to the nth bytes of ``id`` :rtype: str + + Notes: + This function can be called without the python GIL """ cdef unsigned char i = 0 if id[0] == 1: @@ -82,6 +85,9 @@ cdef char* _longid_to_id(char* longid, int size=160) nogil except NULL: :param int size: The length of ``longid``, the default is 160. :return: A ``size``/8 corresponding base 256 string :rtype: str + + Notes: + This function can be called without the python GIL """ cdef int i cdef char* id @@ -104,6 +110,9 @@ cdef char* _id_to_longid(char* id, int size=20) nogil: :param int size: The length of ``id`` :return: The corresponding base 2 string :rtype: bytes + + Notes: + This function can be called without the python GIL """ global _BYTE_TO_BIT cdef char* ret = malloc((size * 8) * sizeof(char)) @@ -226,7 +235,7 @@ def enumerate_ids(size, id): return aux(size - 1, [id]) -def copy_doc(f1): +def _copy_doc(f1): """ A decorator coping docstring from another function @@ -271,15 +280,15 @@ class ID(object): else: self.value = self.to_bytes(id) - @copy_doc(u"".encode) + @_copy_doc(u"".encode) def encode(self, c): return self.value.encode(c) - @copy_doc(b"".startswith) + @_copy_doc(b"".startswith) def startswith(self, s): return self.value.startswith(s) - @copy_doc(b"".__getitem__) + @_copy_doc(b"".__getitem__) def __getitem__(self, i): return self.value[i] @@ -289,7 +298,7 @@ class ID(object): def __repr__(self): return binascii.b2a_hex(self.value).decode() - @copy_doc(b"".__eq__) + @_copy_doc(b"".__eq__) def __eq__(self, other): if isinstance(other, ID): return self.value == other.value @@ -298,7 +307,7 @@ class ID(object): else: return False - @copy_doc(b"".__lt__) + @_copy_doc(b"".__lt__) def __lt__(self, other): if isinstance(other, ID): return self.value < other.value @@ -309,7 +318,7 @@ class ID(object): "unsupported operand type(s) for <: 'ID' and '%s'" % type(other).__name__ ) - @copy_doc(b"".__len__) + @_copy_doc(b"".__len__) def __len__(self): return len(self.value) @@ -347,7 +356,7 @@ class ID(object): """ return self.__xor__(other) - @copy_doc(b"".__hash__) + @_copy_doc(b"".__hash__) def __hash__(self): return hash(self.value) diff --git a/docs/package/btdht.dht.rst b/docs/package/btdht.dht.rst index 962b6e4..bf4e59b 100644 --- a/docs/package/btdht.dht.rst +++ b/docs/package/btdht.dht.rst @@ -8,6 +8,15 @@ btdht.dht module .. autoclass:: DHT_BASE :show-inheritance: :members: + :undoc-members: + :exclude-members: bind_ip, bind_port, debuglvl, last_msg, last_msg_rep, ignored_ip, ignored_net, + myid, prefix, prefix, prefix, stoped, threads, token, mytoken, transaction_type, to_send, + to_schedule, zombie, root, sock, + save, load, start, stop, stop_bg, init_socket, is_alive, debug, sleep, bootstarp, + build_table, announce_peer, get_peers, get_closest_nodes, sendto, clean, clean_long, + register_message, on_announce_peer_response, on_announce_peer_query, on_find_node_query, + on_find_node_response, on_get_peers_query, on_get_peers_response, on_ping_query, + on_ping_response, on_error .. autoattribute:: bind_ip @@ -85,6 +94,49 @@ btdht.dht module A list of looping iterator to schedule, passed to :attr:`_scheduler` + .. autoattribute:: zombie + + + .. automethod:: save(filename=None, max_node=None) + .. automethod:: load(filename=None, max_node=None) + .. automethod:: start(start_routing_table=True, start_scheduler=True) + .. automethod:: stop + .. automethod:: stop_bg + .. automethod:: init_socket + .. automethod:: is_alive + + .. automethod:: debug(lvl, msg) + .. automethod:: sleep(t, fstop=None) + + .. automethod:: bootstarp( + addresses=[ + ("router.utorrent.com", 6881), + ("grenade.genua.fr", 6880), + ("dht.transmissionbt.com", 6881) + ] + ) + .. automethod:: build_table + .. automethod:: announce_peer(info_hash, port, delay=0, block=True) + .. automethod:: get_peers(hash, delay=0, block=True, callback=None, limit=10) + + .. automethod:: get_closest_nodes(id, compact=False) + .. automethod:: sendto(msg, addr) + + .. automethod:: clean + .. automethod:: clean_long + + .. automethod:: register_message(msg) + + .. automethod:: on_announce_peer_response(query, response) + .. automethod:: on_announce_peer_query(query) + .. automethod:: on_find_node_query(query) + .. automethod:: on_find_node_response(query, response) + .. automethod:: on_get_peers_query(query) + .. automethod:: on_get_peers_response(query, response) + .. automethod:: on_ping_query(query) + .. automethod:: on_ping_response(query, response) + .. automethod:: on_error(error, query=None) + .. autoclass:: DHT :show-inheritance: @@ -94,6 +146,10 @@ btdht.dht module .. autoclass:: Node :show-inheritance: :members: + :undoc-members: + :exclude-members: port, last_response, last_query, failed, id, good, bad, ip, + compact_info, from_compact_infos, from_compact_info, announce_peer, find_node, get_peers, + ping .. autoattribute:: port @@ -131,10 +187,20 @@ btdht.dht module IP address of the node in dotted notation + .. automethod:: compact_info + .. automethod:: from_compact_infos(infos) + .. automethod:: from_compact_info(info) + .. automethod:: announce_peer(dht, info_hash, port) + .. automethod:: find_node(dht, target) + .. automethod:: get_peers(dht, info_hash) + .. automethod:: ping(dht) .. autoclass:: Bucket :show-inheritance: :members: + :undoc-members: + :exclude-members: max_size, last_changed, id, id_length, own, random_id, get_node, add, split, + merge, to_refresh .. autoattribute:: max_size @@ -152,9 +218,23 @@ btdht.dht module Number of signifiant bit in :attr:`id` + .. autoattribute:: to_refresh + + .. automethod:: random_id + .. automethod:: add(dht, node) + .. automethod:: get_node(id) + .. automethod:: own(id) + .. automethod:: split(rt, dht) + .. automethod:: merge(bucket) + .. autoclass:: RoutingTable :show-inheritance: :members: + :undoc-members: + :exclude-members: debuglvl, trie, stoped, need_merge, threads, to_schedule, prefix, zombie, + stop_bg, stop, start, is_alive, register_torrent, release_torrent, register_torrent_longterm, + release_torrent_longterm, register_dht, release_dht, empty, debug, stats, heigth, get_node, + find, get_closest_nodes, add, split, merge .. autoattribute:: debuglvl @@ -184,3 +264,29 @@ btdht.dht module Prefix in logs and threads name + .. autoattribute:: zombie + + + .. automethod:: start + .. automethod:: stop + .. automethod:: stop_bg + .. automethod:: is_alive + + .. automethod:: register_torrent(id) + .. automethod:: release_torrent(id) + .. automethod:: register_torrent_longterm(id) + .. automethod:: release_torrent_longterm(id) + .. automethod:: register_dht(dht) + .. automethod:: release_dht(dht) + .. automethod:: empty + + .. automethod:: debug(lvl, msg) + .. automethod:: stats() + .. automethod:: heigth + + .. automethod:: find(id) + .. automethod:: get_node(id) + .. automethod:: get_closest_nodes(id, bad=False) + .. automethod:: add(dht, node) + .. automethod:: split(dht, bucket) + .. automethod:: merge diff --git a/docs/package/btdht.krcp.rst b/docs/package/btdht.krcp.rst index 3bc6e5b..6fa198f 100644 --- a/docs/package/btdht.krcp.rst +++ b/docs/package/btdht.krcp.rst @@ -1,7 +1,95 @@ btdht.krcp module -================= +================== .. automodule:: btdht.krcp + :show-inheritance: + +.. autoclass:: BError + :show-inheritance: + :undoc-members: + :members: + :exclude-members: e, t, y + + .. autoattribute:: e + + A list. The first element is an :class:`int` representing the error code. + The second element is a string containing the error message + + .. autoattribute:: t + + string value representing a transaction ID, must be set to the query transaction ID + for which an error is raises. + + .. autoattribute:: y + + The ``y`` key of the error message. For an error message, it is always ``b"e"`` + + +.. autoclass:: GenericError + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: MethodUnknownError + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: ProtocolError + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: ServerError + :members: + :undoc-members: + :show-inheritance: + +.. autoclass:: BMessage :members: :undoc-members: :show-inheritance: + :exclude-members: addr, errmsg, errno, q, t, v, y, decode, encode, get, response + + + .. autoattribute:: addr + + The couple (ip, port) source of the message + + .. autoattribute:: errmsg + + The error message of the message if the message is and erro message + + .. autoattribute:: errno + + The error number of the message if the message is and erro message + + .. autoattribute:: q + + The ``q`` key of the message, should only be define if the message is a query (:attr:`y` is + ``"q"``). It countains the name of the RPC method the query is asking for. Can be + `b'ping'``, ``b'find_node'``, ``b'get_peers'``, ``b'announce_peer'``, ... + + .. autoattribute:: t + + The ``t`` key, a random string, transaction id used to match queries and responses together. + + .. autoattribute:: v + + The ``v`` key of the message. This attribute is not describe in the BEP5 that describe the + bittorent DHT protocol. It it use as a version flag. Many bittorent client set it to + the name and version of the client. + + .. autoattribute:: y + + The ``y` key of the message. Possible value are ``"q"`` for a query, `"r"` for a response + and ``"e"`` for an error. + + + .. automethod:: __getitem__(key) + .. automethod:: __delitem__(key) + .. automethod:: __setitem__(key, value) + .. automethod:: decode(data, datalen) + .. automethod:: encode + .. automethod:: get(key, default=None) + .. automethod:: response(dht) From d6c4b58e221f3b630dc543918b05ff3329eecfda Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sun, 4 Dec 2016 15:04:26 +0100 Subject: [PATCH 22/30] Use twine to upload builds to pypi --- Makefile | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index e3ec37e..6bf676b 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,11 @@ .PHONY: build dist docs VERSION=`python setup.py -V` +WHL_FILES := $(wildcard dist/*.whl) +WHL_ASC := $(WHL_FILES:=.asc) +DIST_FILE := $(wildcard dist/*.tar.gz) +DIST_ASC := $(DIST_FILE:=.asc) + build: python setup.py build @@ -15,14 +20,16 @@ uninstall: pip uninstall btdht || true -clean: - rm -rf build dist btdht.egg-info - find ./btdht/ -name '*.c' -delete - find ./ -name '*~' -delete +dist/%.asc: + gpg --detach-sign -a $(@:.asc=) + +publish_pypi_release: test_venv test_venv/bin/twine dist sign_release + test_venv/bin/twine upload --sign dist/* -publish_pypi_release: - python setup.py sdist upload --sign +sign_release: $(WHL_ASC) $(DIST_ASC) +test_venv/bin/twine: + test_venv/bin/pip install twine test_venv: test_venv/bin/python @@ -36,5 +43,16 @@ test_venv/bin/sphinx-build: test_venv docs: test_venv/bin/sphinx-build bash -c "source test_venv/bin/activate; cd docs; make html" +clean: + rm -rf build dist btdht.egg-info + find ./btdht/ -name '*.c' -delete + find ./ -name '*.pyc' -delete + find ./ -name '*~' -delete + clean_docs: cd docs; make clean + +clean_test_venv: + rm -rf test_venv + +clean_all: clean clean_test_venv clean_docs From 80316e38f31b57e01c82db3c7df86cc7dcba8e08 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sun, 4 Dec 2016 15:11:29 +0100 Subject: [PATCH 23/30] If btdht cannot be imported in docs/conf.py, enable cython dynamic import with pyximport --- docs/conf.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 59df1c5..6e07197 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -28,8 +28,6 @@ import os import sys -#import pyximport -#pyximport.install() sys.path.append(os.path.abspath('..')) @@ -37,6 +35,12 @@ sys.path.pop() +try: + import btdht +except ImportError: + import pyximport + pyximport.install() + sys.path.append(os.path.abspath('..')) # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom From 9141a3f31b7b27236c2c9895b6808d7087ac741d Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sun, 4 Dec 2016 17:36:40 +0100 Subject: [PATCH 24/30] Update README --- README.rst | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 93 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 799bbb6..b526c28 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,7 @@ -btdht: An event based implementation of the Bittorrent distributed hash table -============================================================================= +btdht: An python implementation of the Bittorrent distributed hash table +======================================================================== +|github_version| |pypi_version| |licence| |doc| The aim of btdht is to provide a powerful implementation of the Bittorrent mainline DHT easily extended to build application over the DHT. @@ -11,15 +12,103 @@ The implementation is fully compliant with the `BEP5 `_ + * python 2.7 or 3.4 or above * `datrie `_ * `netaddr `_ +Build dependencies +------------------ + * A C compiler + * `cython `_ + * python header files + + +Installation +------------ + +The recommended installation mode is to use a `virtualenv `__. + +To Install ``btdht`` using the last published release, run:: + + $ pip install btdht + +Alternatively if you want to use the version of the git repository, you can clone it:: + + $ git clone https://github.com/nitmir/btdht + $ cd btdht + $ pip install -r requirements-dev.txt + +Then, run ``make install`` to compile the sources and create a python package and install it with pip. + +For installing or building on linux and unix systems, you will need a C compiler and the python +headers (installing the packages ``build-essential`` and ``python-dev`` should be enough on debian +like systems, you'll probably gonna need ``make``, ``gcc``, ``python2-devel`` and ``redhat-rpm-config`` +on centos like systems). + +On windows systems, we provide pre-builded releases so just running ``pip install btdht`` should be fine. +If you want to build from the sources of the repository, you will also need a `C compiler `__. + + +Usage examples +-------------- + +Search for the peers announcing the torrent ``0403fb4728bd788fbcb67e87d6feb241ef38c75a`` +(`Ubuntu 16.10 Desktop (64-bit) `__) + +.. code-block:: python + + >>> import btdht + >>> import binascii + >>> dht = btdht.DHT() + >>> dht.start() # now wait at least 15s for the dht to boostrap + init socket for 4c323257aa6c4c5c6ccae118db93ccce5bb05d92 + Bootstraping + >>> dht.get_peers(binascii.a2b_hex("0403fb4728bd788fbcb67e87d6feb241ef38c75a")) + [ + ('81.171.107.75', 17744), + ('94.242.250.86', 3813), + ('88.175.164.228', 32428), + ('82.224.107.213', 61667), + ('85.56.118.178', 6881), + ('78.196.28.4', 38379), + ('82.251.140.70', 32529), + ('78.198.108.3', 10088), + ('78.235.153.136', 10619), + ('88.189.113.32', 33192), + ('81.57.9.183', 5514), + ('82.251.17.155', 14721), + ('88.168.207.178', 31466), + ('82.238.89.236', 32970), + ('78.226.209.88', 2881), + ('5.164.219.48', 6881), + ('78.225.252.39', 31002) + ] + +Subsequent calls to get_peers may return more peers. + +You may also inherit ``htdht.DHT_BASE`` and overload some of the ``on_`msg`_(query|response)`` +functions. See the `doc `_ for a full overview of the ``btdht`` API. .. [#] Maymounkov, P., & Mazieres, D. (2002, March). Kademlia: A peer-to-peer information system based on the xor metric. In International Workshop on Peer-to-Peer Systems (pp. 53-65). Springer Berlin Heidelberg. + + +.. |pypi_version| image:: https://badges.genua.fr/pypi/v/btdht.svg + :target: https://pypi.python.org/pypi/btdht + +.. |github_version| image:: https://badges.genua.fr/github/tag/nitmir/btdht.svg?label=github + :target: https://github.com/nitmir/btdht/releases/latest + +.. |licence| image:: https://badges.genua.fr/pypi/l/btdht.svg + :target: https://www.gnu.org/licenses/gpl-3.0.html + +.. |doc| image:: https://badges.genua.fr/local/readthedocs/?version=latest + :target: http://btdht.readthedocs.io From 9180aebcfe7a5e1372fc8afd1d4a67834afcad0c Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sun, 4 Dec 2016 17:38:42 +0100 Subject: [PATCH 25/30] README typos --- README.rst | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index b526c28..249a9f0 100644 --- a/README.rst +++ b/README.rst @@ -1,5 +1,5 @@ -btdht: An python implementation of the Bittorrent distributed hash table -======================================================================== +btdht: A python implementation of the Bittorrent distributed hash table +======================================================================= |github_version| |pypi_version| |licence| |doc| @@ -17,16 +17,18 @@ For example, this implementation uses a bucket-based approach for the routing ta Dependencies ------------ - * python 2.7 or 3.4 or above - * `datrie `_ - * `netaddr `_ + +* python 2.7 or 3.4 or above +* `datrie `_ +* `netaddr `_ Build dependencies ------------------ - * A C compiler - * `cython `_ - * python header files + +* A C compiler +* `cython `_ +* python header files Installation From bff223d9e51136e2365e4524813bb06774b63900 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sun, 4 Dec 2016 19:30:10 +0100 Subject: [PATCH 26/30] btdht is fully documented --- README.rst | 2 +- btdht/dht.pyx | 47 ++++++++++------ btdht/exceptions.py | 21 +++++++ btdht/utils.pyx | 105 ++++++++++++++++++----------------- docs/conf.py | 1 + docs/package/btdht.dht.rst | 22 ++++++-- docs/package/btdht.krcp.rst | 4 ++ docs/package/btdht.utils.rst | 71 +++++++++++++++++++++++ 8 files changed, 198 insertions(+), 75 deletions(-) diff --git a/README.rst b/README.rst index 249a9f0..424d4ca 100644 --- a/README.rst +++ b/README.rst @@ -94,7 +94,7 @@ Search for the peers announcing the torrent ``0403fb4728bd788fbcb67e87d6feb241ef Subsequent calls to get_peers may return more peers. -You may also inherit ``htdht.DHT_BASE`` and overload some of the ``on_`msg`_(query|response)`` +You may also inherit ``btdht.DHT_BASE`` and overload some of the ``on_`msg`_(query|response)`` functions. See the `doc `_ for a full overview of the ``btdht`` API. diff --git a/btdht/dht.pyx b/btdht/dht.pyx index f12af4c..341d4d7 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -66,8 +66,8 @@ cdef class DHT_BASE: :param int debuglvl: Level of verbosity, default to ``0``. :param str prefix: A prefix to use in logged messages. The default is ``""``. :param int process_queue_size: Size of the queue of messages waiting to be processed by user - defines functions (on_`msg`_(query|response)). see the :meth:`register_message` method. - The default to ``500``. + defines functions (on_`msg`_(query|response)). see the + :meth:`register_message` method. The default to ``500``. :param list ignored_net: An list of ip networks in cidr notation (``"1.2.3.4/5"``) to ignore. The default is the value of the attribute :attr:`ignored_net`. @@ -1224,9 +1224,6 @@ cdef class DHT_BASE: * on error reception, the function ``on_error`` will be called with the error and the query as parameter * The message kind is in the ``q`` key of any dht query message - - Args: - msg (str): a dht message type like ping, find_node, get_peers or announce_peer """ self._to_process_registered.add(msg) @@ -1503,6 +1500,10 @@ cdef class DHT_BASE: :return A couple (decoded message, query) if the message is a response or an error, (decoded message, None) otherwise :rtype: tuple + :raises TransactionIdUnknown: If the decoded message has no ``t`` key + :raises MethodUnknownError: If the queried method is unknown + :raises ValueError: If the message is neither a query, a response or an error + :raises ProtocolError: If the message is malformed """ msg = BMessage(addr=addr, debug=self.debuglvl) msg.decode(s, len(s)) @@ -1869,6 +1870,8 @@ cdef class Node: :param DHT_BASE dht: The dht instance to use to send the message :param bytes info_hash: A 160bits (20 bytes) torrent id to announce :param int port: The tcp port where data for ``info_hash`` is available + :raises NoTokenError: if we have no valid token for ``info_hash``. Try to call + :meth:`get_peers` on this ``info_hash`` first. """ cdef char* tk @@ -1978,9 +1981,9 @@ class Bucket(list): def get_node(self, id): """ - :return: A :class:`Node` with :attr:`Node.id`` equal to ``id`` + :return: A :class:`Node` with :attr:`Node.id` equal to ``id`` :rtype: Node - :raises: :class:`NotFound` if no node is found within this bucket + :raises NotFound: if no node is found within this bucket """ for n in self: if n.id == id: @@ -1993,17 +1996,17 @@ class Bucket(list): :param DHT_BASE dht: The dht instance the node to add is from :param Node node: A node to add to the bucket - :raises: :class:`BucketFull` if the bucket is full + :raises BucketFull: if the bucket is full Notes: The addition of a node to a bucket is done as follow: - * if the bucket is not full, just add the node - * if the bucket is full - * if there is some bad nodes in the bucket, remove a bad node and add the - node - * if there is some questionnable nodes (neither good not bad), send a ping - request to the oldest one, discard the node - * if all nodes are good in the bucket, discard the node + * if the bucket is not full, just add the node + * if the bucket is full + * if there is some bad nodes in the bucket, remove a bad node and add the + node + * if there is some questionnable nodes (neither good not bad), send a ping + request to the oldest one, discard the node + * if all nodes are good in the bucket, discard the node """ if not self.own(node.id): raise ValueError("Wrong Bucket") @@ -2044,6 +2047,8 @@ class Bucket(list): :return: A couple of two bucket, the first one this the last significant bit of its id equal to 0, the second, equal to 1 :rtype: tuple + :raises BucketNotFull: If the bucket has not :attr:`max_size` elements (and so the split + is not needed) """ if len(self) < self.max_size: raise BucketNotFull("Bucket not Full %r" % self) @@ -2102,7 +2107,15 @@ class Bucket(list): ) -DHT = type("DHT", (DHT_BASE,), {'__doc__': DHT_BASE.__doc__}) +DHT = type( + "DHT", + (DHT_BASE,), + { + '__doc__': "\n A DHT class ready for instanciation\n%s" % ( + "\n".join(DHT_BASE.__doc__.split('\n')[2:]), + ) + } +) class RoutingTable(object): @@ -2464,7 +2477,7 @@ class RoutingTable(object): :param bytes id: A 160 bits (20 Bytes) identifier :return: A node with id ``id`` :rtype: Node - :raises: :class:`NotFound` if no nodes is found + :raises NotFound: if no nodes is found """ b = self.find(id) return b.get_node(id) diff --git a/btdht/exceptions.py b/btdht/exceptions.py index 99b3664..8002c98 100644 --- a/btdht/exceptions.py +++ b/btdht/exceptions.py @@ -1,27 +1,48 @@ class BucketFull(Exception): + """ + Raised then trying to add a node to a :class:`Bucket` that + already contains :class:`Bucket.max_size` elements. + """ pass class BucketNotFull(Exception): + """ + Raises then trying to split a split a :class:`Bucket` that + contains less than :class:`Bucket.max_size` elements. + """ pass class NoTokenError(Exception): + """ + Raised then trying to annonce to a node we download an info_hash + using :meth:`Node.announce_peer` but we do not known any valid + token. The error should always be catch and never seen by btdht users. + """ pass class FailToStop(Exception): + """Raises then we are tying to stop threads but failing at it""" pass class TransactionIdUnknown(Exception): + """Raised then receiving a response with an unknown ``t`` key""" pass class MissingT(ValueError): + """Raised while decoding of a dht message if that message of no key ``t``""" pass class DecodeError(ValueError): + """Raised while decoding a dht message""" pass class BcodeError(Exception): + """Raised by :func:`btdht.utils.bdecode` and :func:`btdht.utils.bencode` functions""" pass class NotFound(Exception): + """ + Raised when trying to get a node that do not exists from a :class:`Bucket` + """ pass diff --git a/btdht/utils.pyx b/btdht/utils.pyx index 6cfdc81..7039856 100644 --- a/btdht/utils.pyx +++ b/btdht/utils.pyx @@ -31,7 +31,7 @@ from libc.string cimport strlen, strncmp, strcmp, strncpy, strcpy from cython.parallel import prange from .krcp cimport _decode_string, _decode_int as _decode_long -from .exceptions import BcodeError +from .exceptions import BcodeError, FailToStop cdef extern from "ctype.h": int isdigit(int c) @@ -104,7 +104,7 @@ cdef char* _longid_to_id(char* longid, int size=160) nogil except NULL: cdef char* _id_to_longid(char* id, int size=20) nogil: """ Convert a random string ``id`` of length ``size`` to its base 2 equivalent. - For example, "\0\xFF" is converted to "0000000011111111" + For example, "\\0\\xFF" is converted to "0000000011111111" :param bytes id: A random string :param int size: The length of ``id`` @@ -125,7 +125,8 @@ cdef char* _id_to_longid(char* id, int size=20) nogil: def id_to_longid(char* id, int l=20): """ convert a random bytes to a unicode string of 1 and 0 - example : "\0" -> "00000000" + + For instance: ``"\\0"`` -> ``"00000000"`` :param bytes id: A random string :param int size: The length of ``id`` @@ -141,6 +142,8 @@ def id_to_longid(char* id, int l=20): def nbit(s, n): """ + Allow to retrieve the value of the nth bit of ``s`` + :param bytes s: A byte string :param int n: A bit number (n must be smaller than 8 times the length of ``s``) :return: The value of the nth bit of ``s`` (``0`` or ``1``) @@ -158,6 +161,8 @@ _NFLIP_BITS = [ ] def nflip(s, n): """ + Allow to flip the nth bit of ``s`` + :param bytes s: A byte string :param int n: A bit number (n must be smaller than 8 times the length of ``s``) :return: The same string except for the nth bit was flip @@ -178,6 +183,8 @@ _NSET_BIT0 = [ ] def nset(s, n , i): """ + Allow to set the value of the nth bit of ``s`` + :param bytes s: A byte string :param int n: A bit number (n must be smaller than 8 times the length of ``s``) :param int i: A bit value (``0`` or ``1``) @@ -194,30 +201,33 @@ def nset(s, n , i): def enumerate_ids(size, id): """ + Enumerate 2 to the power of ``size`` ids from ``id`` + :param int size: A number of bit to flip in id :param bytes id: A 160 bit (20 Bytes) long id :return: A list of ``id`` and 2 to the power of ``size`` (minus one) ids the furthest from each other :rtype: list - For instance: if id=("\0" * 20) (~0 * 160), ``enumerate_ids(4, id)`` will return a list with - * '\x00\x00\x00\x00\x00...' (~00000000...) - * '\x80\x00\x00\x00\x00...' (~10000000...) - * '@\x00\x00\x00\x00...' (~0100000000...) - * '\xc0\x00\x00\x00\x00...' (~11000000...) + For instance: if ``id=("\\0" * 20)`` (~0 * 160), ``enumerate_ids(4, id)`` will + return a list with + * ``'\\x00\\x00\\x00\\x00\\x00...'`` (~00000000...) + * ``'\\x80\\x00\\x00\\x00\\x00...'`` (~10000000...) + * ``'@\\x00\\x00\\x00\\x00.......'`` (~0100000000...) + * ``'\\xc0\\x00\\x00\\x00\\x00...'`` (~11000000...) The can be see as the tree:: - \x00 - / \ - 1/ \0 - / \ - \xc0 \x00 - 1/ \0 1/ \0 - / \ / \ - \xc0 \x80 @ \x00 + \\x00 + / \\ + 1/ \\0 + / \\ + \\xc0 \\x00 + 1/ \\0 1/ \\0 + / \\ / \\ + \\xc0 \\x80 @ \\x00 - The root is ``id``, at each level n, we set the nth bit of of 1 left and 0 right, ``size`` + The root is ``id``, at each level n, we set the nth bit to 1 left and 0 right, ``size`` if the level we return. This function may be usefull to lanch multiple DHT instance with ids the most distributed @@ -255,12 +265,16 @@ class ID(object): :param id: An optional initial value (:class:`bytes` or :class:`ID`). If not specified, a random 160 bit value is generated. """ + + #: :class:`bytes`, Actual value of the :class:`ID` + value = None + @classmethod def to_bytes(cls, id): """ :param id: A :class:`bytes` or :class:`ID` :return: The value of the ``id`` - :rtype; bytes + :rtype: bytes """ try: return id.value @@ -280,10 +294,6 @@ class ID(object): else: self.value = self.to_bytes(id) - @_copy_doc(u"".encode) - def encode(self, c): - return self.value.encode(c) - @_copy_doc(b"".startswith) def startswith(self, s): return self.value.startswith(s) @@ -325,7 +335,7 @@ class ID(object): def __xor__(self, other): """ - Permor a XOR bit by bit between the current id and ``other`` + Perform a XOR bit by bit between the current id and ``other`` :param other: A :class:`bytes` or :class:`ID` :return: The resulted XORed bit by bit string @@ -645,6 +655,8 @@ def _bdecode2(s, ii=None): def ip_in_nets(ip, nets): """ + Test if ``ip`` is in one of the networks of ``nets`` + :param str ip: An ip, in dotted notation :param list nets: A list of :obj:`netaddr.IPNetwork` :return: ``True`` if ip is in one of the listed networks, ``False`` otherwise @@ -721,22 +733,6 @@ class PollableQueue(Queue.Queue): return Queue.Queue._get(self, *args, **kwargs) -class SplitQueue(PollableQueue): - def _init(self, maxsize): - self.queue = collections.OrderedDict() - - def _put(self, item): - if not item[0] in self.queue: - self.queue[item[0]] = item[1:-1] + (set(),) - self._signal_put() - self.queue[item[0]][-1].add(item[-1]) - - def _get(self): - self._comsume_get() - (key, value) = self.queue.popitem(False) - return (key, ) + value - - class Scheduler(object): """ Schedule weightless threads and DHTs io @@ -987,25 +983,30 @@ class Scheduler(object): t.start() self._threads.append(t) - def stop(self, wait=True): - """stop the scheduler""" + def stop(self): + """ + stop the scheduler + + :raises FailToStop: if we fail to stop one of the scheduler threads after 30 seconds + """ if self._stoped: print("Already stoped or stoping in progress") return self._stoped = True self._init_attrs() - if wait: - self._threads = [t for t in self._threads[:] if t.is_alive()] - for i in range(0, 30): - if self._threads: - if i > 5: - print("Waiting for %s threads to terminate" % len(self._threads)) - time.sleep(1) - self._threads = [t for t in self._threads[:] if t.is_alive()] - else: - break + self._threads = [t for t in self._threads[:] if t.is_alive()] + for i in range(0, 30): + if self._threads: + if i > 5: + print("Waiting for %s threads to terminate" % len(self._threads)) + time.sleep(1) + self._threads = [t for t in self._threads[:] if t.is_alive()] else: - print("Unable to stop the scheduler threads, giving up") + break + else: + print("Unable to stop the scheduler threads, giving up") + if self._threads: + raise FailToStop(self._threads) def stop_bg(self): """Lauch the stop process of the dht and return immediately""" diff --git a/docs/conf.py b/docs/conf.py index 6e07197..c9f1ea1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -52,6 +52,7 @@ 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode', + 'sphinx.ext.autosummary', ] # Add any paths that contain templates here, relative to this directory. diff --git a/docs/package/btdht.dht.rst b/docs/package/btdht.dht.rst index bf4e59b..49a7b0e 100644 --- a/docs/package/btdht.dht.rst +++ b/docs/package/btdht.dht.rst @@ -2,7 +2,24 @@ btdht.dht module ================ .. automodule:: btdht.dht + :members: + :undoc-members: :show-inheritance: + :exclude-members: DHT_BASE, DHT, Node, Bucket, RoutingTable + + +.. autosummary:: + + DHT + DHT_BASE + Node + Bucket + RoutingTable + +.. autoclass:: DHT + :show-inheritance: + :members: + :undoc-members: .. autoclass:: DHT_BASE @@ -138,11 +155,6 @@ btdht.dht module .. automethod:: on_error(error, query=None) -.. autoclass:: DHT - :show-inheritance: - :members: - :undoc-members: - .. autoclass:: Node :show-inheritance: :members: diff --git a/docs/package/btdht.krcp.rst b/docs/package/btdht.krcp.rst index 6fa198f..fcc9203 100644 --- a/docs/package/btdht.krcp.rst +++ b/docs/package/btdht.krcp.rst @@ -2,7 +2,11 @@ btdht.krcp module ================== .. automodule:: btdht.krcp + :members: + :undoc-members: :show-inheritance: + :exclude-members: BError, GenericError, MethodUnknownError, ProtocolError, ServerError, + BMessage .. autoclass:: BError :show-inheritance: diff --git a/docs/package/btdht.utils.rst b/docs/package/btdht.utils.rst index 1a0a550..fbb9fdf 100644 --- a/docs/package/btdht.utils.rst +++ b/docs/package/btdht.utils.rst @@ -5,3 +5,74 @@ btdht.utils module :members: :undoc-members: :show-inheritance: + :exclude-members: ID, PollableQueue, Scheduler, id_to_longid, nbit, nflip, nset, enumerate_ids, + bencode, bdecode, ip_in_nets + +.. autosummary:: + bencode + bdecode + enumerate_ids + id_to_longid + ip_in_nets + nbit + nflip + nset + ID + PollableQueue + Scheduler + +.. autofunction:: bencode(obj) +.. autofunction:: bdecode(s) +.. autofunction:: enumerate_ids(size, id) +.. autofunction:: id_to_longid(id, l=20) +.. autofunction:: ip_in_nets(ip, nets) +.. autofunction:: nbit(s, n) +.. autofunction:: nflip(s, n) +.. autofunction:: nset(s, n , i) + +.. autoclass:: ID + :show-inheritance: + :members: + :undoc-members: + :exclude-members: value, to_bytes, startswith + + .. autoattribute:: value + + :class:`bytes`, Actual value of the :class:`ID` + + .. automethod:: to_bytes(id) + .. automethod:: startswith(s) + .. automethod:: __getitem__(i) + .. automethod:: __xor__(other) + +.. autoclass:: PollableQueue + :show-inheritance: + :members: + :undoc-members: + :inherited-members: + :exclude-members: sock + + .. autoattribute:: sock + + A :class:`socket.socket` object ready for read then here is something to pull from the queue + + + +.. autoclass:: Scheduler + :show-inheritance: + :members: + :undoc-members: + :exclude-members: zombie, start, stop, stop_bg, is_alive, thread_alive, add_dht, del_dht, + add_thread, del_thread + + .. autoattribute:: zombie + + .. automethod:: start(name_prefix="scheduler") + .. automethod:: stop + .. automethod:: stop_bg + .. automethod:: is_alive + .. automethod:: thread_alive(name) + .. automethod:: add_dht(dht) + .. automethod:: del_dht(dht) + .. automethod:: add_thread(name, function, user=False) + .. automethod:: del_thread(name, stop_if_empty=True) From 86fdc953d0da7795db856bf07240e1b29a7575e5 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Sun, 4 Dec 2016 19:30:51 +0100 Subject: [PATCH 27/30] Compile btdht at least once before trying to build the doc --- Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6bf676b..6287600 100644 --- a/Makefile +++ b/Makefile @@ -37,10 +37,14 @@ test_venv/bin/python: virtualenv test_venv test_venv/bin/pip install -U --requirement requirements-dev.txt +test_venv/lib/python2.7/site-packages/btdht: + python setup.py sdist + test_venv/bin/pip install --no-cache-dir --no-deps --upgrade --force-reinstall --find-links ./dist/btdht-${VERSION}.tar.gz btdht + test_venv/bin/sphinx-build: test_venv test_venv/bin/pip install Sphinx sphinx_rtd_theme -docs: test_venv/bin/sphinx-build +docs: test_venv/bin/sphinx-build test_venv/lib/python2.7/site-packages/btdht bash -c "source test_venv/bin/activate; cd docs; make html" clean: From 4f0b4c99c606ed6002fe25948b6b3548d4a396f2 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Mon, 5 Dec 2016 00:46:34 +0100 Subject: [PATCH 28/30] Add doc for the scheduler param on DHT_BASE --- btdht/dht.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/btdht/dht.pyx b/btdht/dht.pyx index 341d4d7..801f3e8 100644 --- a/btdht/dht.pyx +++ b/btdht/dht.pyx @@ -70,6 +70,8 @@ cdef class DHT_BASE: :meth:`register_message` method. The default to ``500``. :param list ignored_net: An list of ip networks in cidr notation (``"1.2.3.4/5"``) to ignore. The default is the value of the attribute :attr:`ignored_net`. + :param btdht.utils.Scheduler scheduler: A optional :class:`Scheduler` + instance. If not specified, a new :class:`Scheduler` is instanciated. Note: try to use same ``id`` and ``bind_port`` over dht restart to increase From fffa367fba31bb1e59cd09c77374c61f88951e91 Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Tue, 6 Dec 2016 10:04:42 +0100 Subject: [PATCH 29/30] Update README: only prebuild for python 2.7 and 3.5 on windows --- README.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 424d4ca..cdb94e6 100644 --- a/README.rst +++ b/README.rst @@ -53,8 +53,9 @@ headers (installing the packages ``build-essential`` and ``python-dev`` should b like systems, you'll probably gonna need ``make``, ``gcc``, ``python2-devel`` and ``redhat-rpm-config`` on centos like systems). -On windows systems, we provide pre-builded releases so just running ``pip install btdht`` should be fine. -If you want to build from the sources of the repository, you will also need a `C compiler `__. +On windows systems, we provide pre-builded releases for python 2.7 and 3.5 so just running +``pip install btdht`` should be fine. If you want to build from the sources of the repository or, +for another python version, you will also need a `C compiler `__. Usage examples From fd46726f0ddaa4e7172740997f30b67f10392b0f Mon Sep 17 00:00:00 2001 From: Valentin Samir Date: Tue, 6 Dec 2016 10:09:55 +0100 Subject: [PATCH 30/30] Update to version 0.3.0 * Python 3 compatibility * Windows compatibility * Document using sphinx * Refactor to use weightless thread, introduce a Scheduler class * Only send error message upon bad query. Always put a valid t in error message * Then called _add_peer_queried, check that port number is > 0 * Style --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 1630604..659fc20 100755 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ except ImportError: has_cython = False -VERSION = "0.2.0" +VERSION = "0.3.0" if __name__ == "__main__": c_extensions = [