111 lines
2.8 KiB
Python
Executable File
111 lines
2.8 KiB
Python
Executable File
#! venv/bin/python
|
|
import sys
|
|
import json
|
|
|
|
from datetime import datetime
|
|
from pygbif import species as species
|
|
from pygbif import occurrences as occ
|
|
import urllib.request
|
|
|
|
TAXREF="TaxRef/TAXREFv17.txt"
|
|
|
|
|
|
def download_image(url, save_as):
|
|
try:
|
|
urllib.request.urlretrieve(url, save_as)
|
|
except urllib.error.HTTPError as e:
|
|
print("Error : {} {} {}".format(e.code, e.reason, e.url))
|
|
|
|
|
|
|
|
#o = occ.get(key = 4507978353)
|
|
#print(o)
|
|
|
|
FAMILLE=4
|
|
RANG=14
|
|
LB_NOM=15
|
|
NOM_COMPLET=17
|
|
NOM_VALIDE=19
|
|
FR=23
|
|
|
|
def taxref_species(p):
|
|
splist=[]
|
|
print(p)
|
|
with open(TAXREF) as f:
|
|
line = f.readline()
|
|
i=0
|
|
while line:
|
|
l = line.split("\t")
|
|
l = [x.strip("\"") for x in l]
|
|
#print (l[14], "ES", sep="#")
|
|
if "ES" == l[RANG] and (l[FR] and( "P" == l[FR] or "E"==l[FR] or "I" == l[FR] ) ) and p in l[FAMILLE] :
|
|
#print("{} / {} / {}/ {}".format( l[FAMILLE] , l[LB_NOM], l[NOM_VALIDE] , l[RANG] ))
|
|
#print("{} / {} ".format( l[22] , l[23]))
|
|
if not l[NOM_VALIDE] in splist: splist.append(l[NOM_VALIDE])
|
|
|
|
|
|
line = f.readline()
|
|
|
|
f.close()
|
|
|
|
with open("TAXREF_"+p+".txt","w") as f :
|
|
for s in splist:
|
|
f.write(s+"\n")
|
|
f.close()
|
|
return splist
|
|
|
|
def write_gbif(fname,mode,data):
|
|
with open(fname,mode) as f :
|
|
f.writelines(data)
|
|
f.close()
|
|
|
|
|
|
|
|
|
|
def main(argv):
|
|
splist = taxref_species(argv[1])
|
|
#print (splist)
|
|
print ("NB species " + str(len(splist)))
|
|
|
|
keys = [ species.name_backbone(x, verbose=True)['usageKey'] for x in splist[:1] ]
|
|
out = [ occ.search(taxonKey = x, limit=0,mediaType='StillImage')['count'] for x in keys ]
|
|
|
|
x = dict(zip(splist, out))
|
|
sorted(x.items(), key=lambda z:z[1], reverse=True)
|
|
print(x)
|
|
data_gbif = {}
|
|
data_gbif["last_update"] = format(datetime.now())
|
|
data_gbif["results"] = []
|
|
|
|
|
|
for k in x:
|
|
print (k , '->', x[k] , ' occurences')
|
|
total_occurences = x[k]
|
|
counter = 0
|
|
limit = 100
|
|
for counter in range(0,total_occurences,limit):
|
|
out = [ occ.search(taxonKey = x, limit = limit, offset=counter, mediaType = 'StillImage') for x in keys ]
|
|
|
|
|
|
#print (out)
|
|
#print( out[0]['results'])
|
|
|
|
data_gbif["results"] += out[0]['results']
|
|
|
|
write_gbif("gbif.txt","w",json.dumps(data_gbif, indent=2))
|
|
|
|
for r in data_gbif['results']:
|
|
for i,m in enumerate(r['media']) :
|
|
ext="unkown"
|
|
if m['format'] == "image/jpeg" :
|
|
ext="jpeg"
|
|
save_as = "{}_{:03d}.{}".format(r['key'], i+1, ext)
|
|
save_as = "StillImage/" + save_as
|
|
print(save_as)
|
|
download_image(m["identifier"], save_as)
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main(sys.argv)
|