Add 1st version of search

2024-05-28 08:53:26 +02:00 · 2024-05-28 08:53:26 +02:00 · 803b07a4f4
parent 8c193f1e7f
commit 803b07a4f4
1 changed files with 110 additions and 0 deletions
--- a/search.py
+++ b/search.py
@ -0,0 +1,110 @@
+#! venv/bin/python
+import sys
+import json
+
+from datetime import datetime
+from pygbif import species as species
+from pygbif import occurrences as occ
+import urllib.request
+
+TAXREF="TaxRef/TAXREFv17.txt"
+
+
+def download_image(url, save_as):
+    try:
+        urllib.request.urlretrieve(url, save_as)
+    except urllib.error.HTTPError as e:
+        print("Error : {} {} {}".format(e.code, e.reason, e.url))
+
+
+
+#o = occ.get(key = 4507978353)
+#print(o)
+
+FAMILLE=4
+RANG=14
+LB_NOM=15
+NOM_COMPLET=17
+NOM_VALIDE=19
+FR=23
+
+def taxref_species(p):
+    splist=[]
+    print(p)
+    with open(TAXREF) as f:
+        line = f.readline()
+        i=0
+        while line:
+            l = line.split("\t")
+            l = [x.strip("\"") for x in l]
+            #print (l[14], "ES", sep="#")
+            if  "ES" == l[RANG] and (l[FR] and( "P" == l[FR] or "E"==l[FR] or "I" == l[FR] ) )  and p in l[FAMILLE] :
+                #print("{} /  {} /  {}/  {}".format( l[FAMILLE] , l[LB_NOM], l[NOM_VALIDE] , l[RANG] ))
+                #print("{} /  {} ".format( l[22] , l[23]))
+                if not l[NOM_VALIDE] in splist: splist.append(l[NOM_VALIDE])
+
+
+            line = f.readline()
+
+    f.close()
+
+    with open("TAXREF_"+p+".txt","w") as f :
+        for s in splist:
+            f.write(s+"\n")
+    f.close()
+    return splist
+
+def write_gbif(fname,mode,data):
+    with open(fname,mode) as f :
+            f.writelines(data)
+    f.close()
+
+
+
+
+def main(argv):
+    splist = taxref_species(argv[1])
+    #print (splist)
+    print ("NB species " + str(len(splist)))
+
+    keys = [ species.name_backbone(x, verbose=True)['usageKey'] for x in splist[:1] ]
+    out = [ occ.search(taxonKey = x, limit=0,mediaType='StillImage')['count'] for x in keys ]
+
+    x = dict(zip(splist, out))
+    sorted(x.items(), key=lambda z:z[1], reverse=True)
+    print(x)
+    data_gbif = {}
+    data_gbif["last_update"] = format(datetime.now())
+    data_gbif["results"] = []
+
+
+    for k in x:
+        print (k , '->', x[k] , ' occurences')
+        total_occurences = x[k]
+        counter = 0
+        limit = 100
+        for counter in range(0,total_occurences,limit):
+            out = [ occ.search(taxonKey = x, limit = limit, offset=counter, mediaType = 'StillImage') for x in keys ]
+
+
+            #print (out)
+            #print( out[0]['results'])
+
+            data_gbif["results"] += out[0]['results']
+
+    write_gbif("gbif.txt","w",json.dumps(data_gbif, indent=2))
+
+    for r in data_gbif['results']:
+        for i,m in enumerate(r['media']) :
+            ext="unkown"
+            if m['format'] == "image/jpeg" :
+                ext="jpeg"
+            save_as = "{}_{:03d}.{}".format(r['key'], i+1, ext)
+            save_as = "StillImage/" + save_as
+            print(save_as)
+            download_image(m["identifier"], save_as)
+
+
+
+if __name__ == "__main__":
+    main(sys.argv)