-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathutilities.py
More file actions
106 lines (90 loc) · 3.63 KB
/
utilities.py
File metadata and controls
106 lines (90 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import json
from multiprocessing.sharedctypes import Value
from turtle import pos
from typing import Set
import requests
from sklearn import datasets
import os
from rdflib import URIRef
from io import StringIO
import pandas as pd
def fetch_overview_data(endpoint):
""" Fetches an overview of the class hierarchies of the specified triples store.
"""
query = """
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?c (MIN(?label) AS ?label1) ?superclass (count(?x) as ?count) WHERE {
SERVICE <""" + endpoint + """> {
?x a ?c.
OPTIONAL {?c rdfs:label ?label} .
?c rdfs:subClassOf ?superclass.
filter (?c != ?superclass &&
!exists {?c rdfs:subClassOf ?othersuper. ?othersuper rdfs:subClassOf ?superclass.
filter(?c != ?othersuper && ?othersuper != ?superclass)})
}
} group by ?c ?label1 ?superclass HAVING(?count > 1) order by desc(?count)
"""
"""
define a default dataset in case the expected one is not accessible
or database is down
"""
try:
response = requests.get(endpoint, params={'query': query}, headers={'Accept': 'text/csv'})
response.raise_for_status()
return response.text
except requests.exceptions.HTTPError as err:
raise requests.HTTPError(err)
def parse_sunburst(csv: str):
""" Converts a class hierarchy csv dataset into a hierarchical JSON format.
"""
reverse_dict = {}
parents = set()
iris = set()
# go through each line of results, excluding the header
for line in csv.split('\n')[1:-1]:
elems = line.split(',')
if len(elems)==4:
iri, label, parent, count = [elem.strip() for elem in elems]
if label == "":
label = iri
parents.add(parent)
iris.add(iri)
else:
raise RuntimeError("No 4 elements in csv line!")
try:
reverse_dict[parent].append({'iri': iri, 'label': label, 'count': count})
except KeyError:
reverse_dict[parent] = [{'iri': iri, 'label': label, 'count': count}]
# no data was returned
if reverse_dict == {}:
return json.dumps({})
possible_parents = parents - iris
# if there is more than one possible parent, try to find the one that has Thing in it
if len(possible_parents) > 1:
use_parent = [elem for elem in list(possible_parents) if 'Thing' in elem]
if len(use_parent) == 0:
use_parent = list(possible_parents)
else:
use_parent = list(possible_parents)
base_node = {'iri': use_parent[0], 'label': use_parent[0], 'count': 1}
#base_node = {'iri': 'http://www.w3.org/2002/07/owl#Thing', 'label': 'Thing'}
return json.dumps(__make_children(base_node, reverse_dict))
def __make_children(node, reverse_dict):
obj = {'name': node['label'], 'iri': node['iri']}
if node['iri'] in reverse_dict:
children = []
for child in reverse_dict[node['iri']]:
children.append(__make_children(child, reverse_dict))
obj['children'] = children
else:
obj['size'] = node['count']
return obj
def parse_json_string_to_df(json_data):
#print(json_data)
json_obj = json.loads(json_data)
print(json_obj)
csv_rows = [','.join(json_obj['columns'])] + [','.join(row) for row in json_obj['rows']]
dataframe = pd.read_csv(StringIO('\n'.join(csv_rows)))
return dataframe