@@ -62,7 +62,7 @@ def sanitize_domain_type(domain_type):
6262 return 'regular'
6363
6464######## DOMAINS ########
65- def get_all_domains_up (domain_type ):
65+ def get_all_domains_up (domain_type , r_list = True ):
6666 '''
6767 Get all domain up (at least one time)
6868
@@ -72,7 +72,13 @@ def get_all_domains_up(domain_type):
7272 :return: list of domain
7373 :rtype: list
7474 '''
75- return list (r_serv_onion .smembers ("full_{}_up" .format (domain_type )))
75+ domains = r_serv_onion .smembers ("full_{}_up" .format (domain_type ))
76+ if r_list :
77+ if domains :
78+ list (domains )
79+ else :
80+ domains = []
81+ return domains
7682
7783def get_domains_up_by_month (date_year_month , domain_type , rlist = False ):
7884 '''
@@ -128,6 +134,64 @@ def get_domains_up_by_daterange(date_from, date_to, domain_type):
128134 domains_up = []
129135 return domains_up
130136
137+ def paginate_iterator (iter_elems , nb_obj = 50 , page = 1 ):
138+ dict_page = {}
139+ dict_page ['nb_all_elem' ] = len (iter_elems )
140+ nb_pages = dict_page ['nb_all_elem' ] / nb_obj
141+ if not nb_pages .is_integer ():
142+ nb_pages = int (nb_pages )+ 1
143+ else :
144+ nb_pages = int (nb_pages )
145+ if page > nb_pages :
146+ page = nb_pages
147+
148+ # multiple pages
149+ if nb_pages > 1 :
150+ dict_page ['list_elem' ] = []
151+ start = nb_obj * (page - 1 )
152+ stop = (nb_obj * page ) - 1
153+ current_index = 0
154+ for elem in iter_elems :
155+ if current_index > stop :
156+ break
157+ if start <= current_index and stop >= current_index :
158+ dict_page ['list_elem' ].append (elem )
159+ current_index += 1
160+ stop += 1
161+ if stop > dict_page ['nb_all_elem' ]:
162+ stop = dict_page ['nb_all_elem' ]
163+
164+ else :
165+ start = 0
166+ stop = dict_page ['nb_all_elem' ]
167+ dict_page ['list_elem' ] = list (iter_elems )
168+ dict_page ['page' ] = page
169+ dict_page ['nb_pages' ] = nb_pages
170+ # UI
171+ dict_page ['nb_first_elem' ] = start + 1
172+ dict_page ['nb_last_elem' ] = stop
173+ return dict_page
174+
175+ def domains_up_by_page (domain_type , nb_obj = 28 , page = 1 ):
176+ '''
177+ Get a list of domains up (alpha sorted)
178+
179+ :param domain_type: domain type
180+ :type domain_type: str
181+
182+ :return: list of domain
183+ :rtype: list
184+ '''
185+ domains = sorted (get_all_domains_up (domain_type , r_list = False ))
186+ domains = paginate_iterator (domains , nb_obj = nb_obj , page = page )
187+
188+ # # TODO: get tags + root_screenshot + metadata
189+ l_domains = []
190+ for domain in domains ['list_elem' ]:
191+ l_domains .append (get_domain_metadata (domain , domain_type , first_seen = True , last_ckeck = True , status = True , ports = True , tags = True , screenshot = True ))
192+ domains ['list_elem' ] = l_domains
193+ return domains
194+
131195######## DOMAIN ########
132196
133197def get_domain_type (domain ):
@@ -367,7 +431,15 @@ def get_domain_tags(domain):
367431 '''
368432 return Tag .get_obj_tag (domain )
369433
370- def get_domain_metadata (domain , domain_type , first_seen = True , last_ckeck = True , status = True , ports = True , tags = False ):
434+ def get_domain_random_screenshot (domain ):
435+ '''
436+ Retun last screenshot (core item).
437+
438+ :param domain: crawled domain
439+ '''
440+ return Screenshot .get_randon_domain_screenshot (domain )
441+
442+ def get_domain_metadata (domain , domain_type , first_seen = True , last_ckeck = True , status = True , ports = True , tags = False , screenshot = False ):
371443 '''
372444 Get Domain basic metadata
373445
@@ -384,6 +456,7 @@ def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, s
384456 :rtype: dict
385457 '''
386458 dict_metadata = {}
459+ dict_metadata ['id' ] = domain
387460 if first_seen :
388461 res = get_domain_first_seen (domain , domain_type = domain_type )
389462 if res is not None :
@@ -398,6 +471,8 @@ def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, s
398471 dict_metadata ['ports' ] = get_domain_all_ports (domain , domain_type )
399472 if tags :
400473 dict_metadata ['tags' ] = get_domain_tags (domain )
474+ if screenshot :
475+ dict_metadata ['screenshot' ] = get_domain_random_screenshot (domain )
401476 return dict_metadata
402477
403478def get_domain_metadata_basic (domain , domain_type = None ):
0 commit comments