Add 1st version of search

This commit is contained in:
Nicolas Bertrand 2024-05-28 08:53:26 +02:00
parent 8c193f1e7f
commit 803b07a4f4
1 changed files with 110 additions and 0 deletions

110
search.py Executable file
View File

@ -0,0 +1,110 @@
#! venv/bin/python
import sys
import json
from datetime import datetime
from pygbif import species as species
from pygbif import occurrences as occ
import urllib.request
TAXREF="TaxRef/TAXREFv17.txt"
def download_image(url, save_as):
try:
urllib.request.urlretrieve(url, save_as)
except urllib.error.HTTPError as e:
print("Error : {} {} {}".format(e.code, e.reason, e.url))
#o = occ.get(key = 4507978353)
#print(o)
FAMILLE=4
RANG=14
LB_NOM=15
NOM_COMPLET=17
NOM_VALIDE=19
FR=23
def taxref_species(p):
splist=[]
print(p)
with open(TAXREF) as f:
line = f.readline()
i=0
while line:
l = line.split("\t")
l = [x.strip("\"") for x in l]
#print (l[14], "ES", sep="#")
if "ES" == l[RANG] and (l[FR] and( "P" == l[FR] or "E"==l[FR] or "I" == l[FR] ) ) and p in l[FAMILLE] :
#print("{} / {} / {}/ {}".format( l[FAMILLE] , l[LB_NOM], l[NOM_VALIDE] , l[RANG] ))
#print("{} / {} ".format( l[22] , l[23]))
if not l[NOM_VALIDE] in splist: splist.append(l[NOM_VALIDE])
line = f.readline()
f.close()
with open("TAXREF_"+p+".txt","w") as f :
for s in splist:
f.write(s+"\n")
f.close()
return splist
def write_gbif(fname,mode,data):
with open(fname,mode) as f :
f.writelines(data)
f.close()
def main(argv):
splist = taxref_species(argv[1])
#print (splist)
print ("NB species " + str(len(splist)))
keys = [ species.name_backbone(x, verbose=True)['usageKey'] for x in splist[:1] ]
out = [ occ.search(taxonKey = x, limit=0,mediaType='StillImage')['count'] for x in keys ]
x = dict(zip(splist, out))
sorted(x.items(), key=lambda z:z[1], reverse=True)
print(x)
data_gbif = {}
data_gbif["last_update"] = format(datetime.now())
data_gbif["results"] = []
for k in x:
print (k , '->', x[k] , ' occurences')
total_occurences = x[k]
counter = 0
limit = 100
for counter in range(0,total_occurences,limit):
out = [ occ.search(taxonKey = x, limit = limit, offset=counter, mediaType = 'StillImage') for x in keys ]
#print (out)
#print( out[0]['results'])
data_gbif["results"] += out[0]['results']
write_gbif("gbif.txt","w",json.dumps(data_gbif, indent=2))
for r in data_gbif['results']:
for i,m in enumerate(r['media']) :
ext="unkown"
if m['format'] == "image/jpeg" :
ext="jpeg"
save_as = "{}_{:03d}.{}".format(r['key'], i+1, ext)
save_as = "StillImage/" + save_as
print(save_as)
download_image(m["identifier"], save_as)
if __name__ == "__main__":
main(sys.argv)