diff --git a/search.py b/search.py new file mode 100755 index 0000000..e5f00db --- /dev/null +++ b/search.py @@ -0,0 +1,110 @@ +#! venv/bin/python +import sys +import json + +from datetime import datetime +from pygbif import species as species +from pygbif import occurrences as occ +import urllib.request + +TAXREF="TaxRef/TAXREFv17.txt" + + +def download_image(url, save_as): + try: + urllib.request.urlretrieve(url, save_as) + except urllib.error.HTTPError as e: + print("Error : {} {} {}".format(e.code, e.reason, e.url)) + + + +#o = occ.get(key = 4507978353) +#print(o) + +FAMILLE=4 +RANG=14 +LB_NOM=15 +NOM_COMPLET=17 +NOM_VALIDE=19 +FR=23 + +def taxref_species(p): + splist=[] + print(p) + with open(TAXREF) as f: + line = f.readline() + i=0 + while line: + l = line.split("\t") + l = [x.strip("\"") for x in l] + #print (l[14], "ES", sep="#") + if "ES" == l[RANG] and (l[FR] and( "P" == l[FR] or "E"==l[FR] or "I" == l[FR] ) ) and p in l[FAMILLE] : + #print("{} / {} / {}/ {}".format( l[FAMILLE] , l[LB_NOM], l[NOM_VALIDE] , l[RANG] )) + #print("{} / {} ".format( l[22] , l[23])) + if not l[NOM_VALIDE] in splist: splist.append(l[NOM_VALIDE]) + + + line = f.readline() + + f.close() + + with open("TAXREF_"+p+".txt","w") as f : + for s in splist: + f.write(s+"\n") + f.close() + return splist + +def write_gbif(fname,mode,data): + with open(fname,mode) as f : + f.writelines(data) + f.close() + + + + +def main(argv): + splist = taxref_species(argv[1]) + #print (splist) + print ("NB species " + str(len(splist))) + + keys = [ species.name_backbone(x, verbose=True)['usageKey'] for x in splist[:1] ] + out = [ occ.search(taxonKey = x, limit=0,mediaType='StillImage')['count'] for x in keys ] + + x = dict(zip(splist, out)) + sorted(x.items(), key=lambda z:z[1], reverse=True) + print(x) + data_gbif = {} + data_gbif["last_update"] = format(datetime.now()) + data_gbif["results"] = [] + + + for k in x: + print (k , '->', x[k] , ' occurences') + total_occurences = x[k] + counter = 0 + limit = 100 + for counter in range(0,total_occurences,limit): + out = [ occ.search(taxonKey = x, limit = limit, offset=counter, mediaType = 'StillImage') for x in keys ] + + + #print (out) + #print( out[0]['results']) + + data_gbif["results"] += out[0]['results'] + + write_gbif("gbif.txt","w",json.dumps(data_gbif, indent=2)) + + for r in data_gbif['results']: + for i,m in enumerate(r['media']) : + ext="unkown" + if m['format'] == "image/jpeg" : + ext="jpeg" + save_as = "{}_{:03d}.{}".format(r['key'], i+1, ext) + save_as = "StillImage/" + save_as + print(save_as) + download_image(m["identifier"], save_as) + + + +if __name__ == "__main__": + main(sys.argv)