You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
118 lines
4.3 KiB
Python
118 lines
4.3 KiB
Python
import json
|
|
import time
|
|
|
|
import pandas as pd
|
|
|
|
from cozo import *
|
|
|
|
|
|
def remove_nan(d):
|
|
return {k: v for (k, v) in d.items() if v is not None and v == v}
|
|
|
|
|
|
def insert_data(destroy_on_exit):
|
|
db = CozoDb('db_test_flights', destroy_on_exit=destroy_on_exit)
|
|
try:
|
|
payload = [
|
|
*DefAttrs('country')
|
|
.code(Typing.string, index=Indexing.identity)
|
|
.desc(Typing.string)(),
|
|
|
|
*DefAttrs('continent')
|
|
.code(Typing.string, index=Indexing.identity)
|
|
.desc(Typing.string)(),
|
|
|
|
*DefAttrs('airport')
|
|
.iata(Typing.string, index=Indexing.identity)
|
|
.icao(Typing.string, index=Indexing.indexed)
|
|
.city(Typing.string, index=Indexing.indexed)
|
|
.desc(Typing.string)
|
|
.region(Typing.string, index=Indexing.indexed)
|
|
.country(Typing.ref)
|
|
.runways(Typing.int)
|
|
.longest(Typing.int)
|
|
.altitude(Typing.int)
|
|
.lat(Typing.float)
|
|
.lon(Typing.float)(),
|
|
|
|
*DefAttrs('route')
|
|
.src(Typing.ref)
|
|
.dst(Typing.ref)
|
|
.distance(Typing.int)(),
|
|
|
|
*DefAttrs('geo')
|
|
.contains(Typing.ref)(),
|
|
]
|
|
start_time = time.time()
|
|
tx_res = db.tx_attr(payload)['results']
|
|
end_time = time.time()
|
|
|
|
print(f'{len(tx_res)} attributes added in {(end_time - start_time) * 1000:.3f}ms')
|
|
|
|
insertions = []
|
|
nodes = pd.read_csv('air-routes-latest-nodes.csv', index_col=0)
|
|
|
|
continents = nodes[nodes['~label'] == 'continent']
|
|
for tuple in continents.itertuples():
|
|
put_payload = remove_nan(
|
|
{'_temp_id': str(tuple.Index), 'continent.code': tuple._3, 'continent.desc': tuple._5})
|
|
insertions.append(Put(put_payload))
|
|
|
|
country_idx = {}
|
|
countries = nodes[nodes['~label'] == 'country']
|
|
for tuple in countries.itertuples():
|
|
put_payload = remove_nan({'_temp_id': str(tuple.Index), 'country.code': tuple._3, 'country.desc': tuple._5})
|
|
country_idx[tuple._3] = str(tuple.Index)
|
|
insertions.append(Put(put_payload))
|
|
|
|
airports = nodes[nodes['~label'] == 'airport']
|
|
for tuple in airports.itertuples():
|
|
put_payload = remove_nan({
|
|
'_temp_id': str(tuple.Index),
|
|
'airport.iata': tuple._3,
|
|
'airport.icao': None if tuple._4 == 'none' else tuple._4,
|
|
'airport.desc': tuple._5,
|
|
'airport.region': tuple._6,
|
|
'airport.runways': int(tuple._7),
|
|
'airport.longest': int(tuple._8),
|
|
'airport.altitude': int(tuple._9),
|
|
'airport.country': country_idx[tuple._10],
|
|
'airport.city': tuple._11,
|
|
'airport.lat': tuple._12,
|
|
'airport.lon': tuple._13
|
|
})
|
|
insertions.append(Put(put_payload))
|
|
|
|
edges = pd.read_csv('air-routes-latest-edges.csv', index_col=0)
|
|
|
|
for tuple in edges[edges['~label'] == 'route'].itertuples():
|
|
payload = remove_nan(
|
|
{'route.src': str(tuple._1), 'route.dst': str(tuple._2), 'route.distance': int(tuple._4)})
|
|
insertions.append(Put(payload))
|
|
|
|
for tuple in edges[edges['~label'] == 'contains'].itertuples():
|
|
payload = remove_nan({'_temp_id': str(tuple._1), 'geo.contains': str(tuple._2)})
|
|
insertions.append(Put(payload))
|
|
|
|
start_time = time.time()
|
|
d_res = db.tx(insertions)['results']
|
|
end_time = time.time()
|
|
print(f'{len(d_res)} node data added in {(end_time - start_time) * 1000:.3f}ms')
|
|
print(f'{len(d_res) / (end_time - start_time):.0f} insertions per second')
|
|
except Exception as e:
|
|
print(f'data already exists? {e}')
|
|
return db
|
|
|
|
|
|
if __name__ == '__main__':
|
|
db = insert_data(False)
|
|
start_time = time.time()
|
|
res = db.run([Q(['?c', '?code', '?desc'],
|
|
T.country.code('?c', 'VN'),
|
|
Unify('?c', 10000239),
|
|
T.country.code('?c', '?code'),
|
|
T.country.desc('?c', '?desc'))])
|
|
end_time = time.time()
|
|
print(json.dumps(res, indent=2))
|
|
print(f'{len(res)} results fetched in {(end_time - start_time) * 1000:.3f}ms')
|