#! venv/bin/python import sys import json from datetime import datetime from pygbif import species as species from pygbif import occurrences as occ import urllib.request TAXREF="TaxRef/TAXREFv17.txt" def download_image(url, save_as): try: urllib.request.urlretrieve(url, save_as) except urllib.error.HTTPError as e: print("Error : {} {} {}".format(e.code, e.reason, e.url)) #o = occ.get(key = 4507978353) #print(o) FAMILLE=4 RANG=14 LB_NOM=15 NOM_COMPLET=17 NOM_VALIDE=19 FR=23 def taxref_species(p): splist=[] print(p) with open(TAXREF) as f: line = f.readline() i=0 while line: l = line.split("\t") l = [x.strip("\"") for x in l] #print (l[14], "ES", sep="#") if "ES" == l[RANG] and (l[FR] and( "P" == l[FR] or "E"==l[FR] or "I" == l[FR] ) ) and p in l[FAMILLE] : #print("{} / {} / {}/ {}".format( l[FAMILLE] , l[LB_NOM], l[NOM_VALIDE] , l[RANG] )) #print("{} / {} ".format( l[22] , l[23])) if not l[NOM_VALIDE] in splist: splist.append(l[NOM_VALIDE]) line = f.readline() f.close() with open("TAXREF_"+p+".txt","w") as f : for s in splist: f.write(s+"\n") f.close() return splist def write_gbif(fname,mode,data): with open(fname,mode) as f : f.writelines(data) f.close() def main(argv): splist = taxref_species(argv[1]) #print (splist) print ("NB species " + str(len(splist))) keys = [ species.name_backbone(x, verbose=True)['usageKey'] for x in splist[:1] ] out = [ occ.search(taxonKey = x, limit=0,mediaType='StillImage')['count'] for x in keys ] x = dict(zip(splist, out)) sorted(x.items(), key=lambda z:z[1], reverse=True) print(x) data_gbif = {} data_gbif["last_update"] = format(datetime.now()) data_gbif["results"] = [] for k in x: print (k , '->', x[k] , ' occurences') total_occurences = x[k] counter = 0 limit = 100 for counter in range(0,total_occurences,limit): out = [ occ.search(taxonKey = x, limit = limit, offset=counter, mediaType = 'StillImage') for x in keys ] #print (out) #print( out[0]['results']) data_gbif["results"] += out[0]['results'] write_gbif("gbif.txt","w",json.dumps(data_gbif, indent=2)) for r in data_gbif['results']: for i,m in enumerate(r['media']) : ext="unkown" if m['format'] == "image/jpeg" : ext="jpeg" save_as = "{}_{:03d}.{}".format(r['key'], i+1, ext) save_as = "StillImage/" + save_as print(save_as) download_image(m["identifier"], save_as) if __name__ == "__main__": main(sys.argv)