Filling the data warehouse of the semantic network.
Create a database.
tables = [
(drop = true, create = true, file = "sql/create_V.sql", name = "V"),
(drop = true, create = true, file = "sql/create_C.sql", name = "C"),
(drop = true, create = true, file = "sql/create_R.sql", name = "R"),
(drop = true, create = true, file = "sql/create_A.sql", name = "A"),
(drop = true, create = true, file = "sql/create_RC.sql", name = "RC"),
(drop = true, create = true, file = "sql/create_AC.sql", name = "AC"),
(drop = true, create = true, file = "sql/create_AR.sql", name = "AR"),
(drop = true, create = true, file = "sql/create_ARC.sql", name = "ARC"),
(drop = true, create = true, file = "sql/create_O.sql", name = "O"),
(drop = true, create = true, file = "sql/create_CO.sql", name = "CO"),
(drop = true, create = true, file = "sql/create_ACO.sql", name = "ACO"),
(drop = true, create = true, file = "sql/create_RCO.sql", name = "RCO"),
(drop = true, create = true, file = "sql/create_ARCO.sql", name = "ARCO"),
]
file = "semantic.duckdb"
function create_table(db, t)
o = ""
if t.drop
sql = "DROP TABLE IF EXISTS $(t.name);"
r = DuckDB.execute(db, sql)
o = o * "executed:\n$sql\nresult: $r\n"
end
if t.create
sql = read(t.file, String)
r = DuckDB.execute(db, sql)
o = o * "executed:\n$sql\nresult: $r\n"
end
o == "" ? "table: $t.name - nothing executed\n" : o
end
function drop_kb(db)
ts = ["ARCO","RCO","ACO","CO","O","ARC","AR","AC","RC","V","A","R","C"]
for t in ts
DuckDB.execute(db, "DROP TABLE IF EXISTS $t;")
end
for t in ts
DuckDB.execute(db, "DROP SEQUENCE IF EXISTS SEQ_$t;")
end
end
function create_seq(db)
ts = ["ARCO","RCO","ACO","CO","O","ARC","AR","AC","RC","V","A","R","C"]
for t in ts
DuckDB.execute(db, "CREATE SEQUENCE IF NOT EXISTS SEQ_$t;")
end
end
function create_kb(ts, file)
db = DuckDB.open(file)
out = ""
drop_kb(db)
create_seq(db)
for t in ts
out = out * create_table(db, t)
end
(db, Text(out))
endUpload the document and preprocess the text.
function proc_document(file)
l = StringDocument(text(FileDocument(file)))
TextAnalysis.remove_whitespace!(l)
sents = TextAnalysis.sentence_tokenize(Languages.Ukrainian(), TextAnalysis.text(l))
crps = Corpus([StringDocument(String(s)) for s in sents])
languages!(crps, Languages.Ukrainian())
remove_case!(crps)
sent_lcase = [TextAnalysis.text(d) for d in crps]
prepare!(crps, strip_punctuation | strip_numbers)
#remove_words!(crps, ["на","і","що","в","до","не","для"])
update_lexicon!(crps)
#lex = lexicon(crps)
update_inverse_index!(crps)
inverse_index(crps)
(sent_lcase, crps)
end
l1_sent_lcase, l1_crps = proc_document("Лекція 1.txt")Database query dictionaries.
sql_inserts = Dict(
:A => "INSERT INTO A (id, v) VALUES(nextval('SEQ_A'), ?) RETURNING id",
:C => "INSERT INTO C (id, v) VALUES(nextval('SEQ_C'), ?) RETURNING id",
:V => "INSERT INTO V (id, value) VALUES(nextval('SEQ_V'), ?) RETURNING id",
:AC => "INSERT INTO AC (id, c, a, v) VALUES(nextval('SEQ_AC'), ?, ?, ?) RETURNING id",
:R => "INSERT INTO R (id, v) VALUES(nextval('SEQ_R'), ?) RETURNING id",
:AR => "INSERT INTO AR (id, r, a, v) VALUES(nextval('SEQ_AR'), ?, ?, ?) RETURNING id",
:RC => "INSERT INTO RC (id, cf, r, ct) VALUES(nextval('SEQ_RC'), ?, ?, ?) RETURNING id",
:ARC => "INSERT INTO ARC (id, rc, a, v) VALUES(nextval('SEQ_ARC'), ?, ?, ?) RETURNING id",
:O => "INSERT INTO O (id, v) VALUES(nextval('SEQ_O'), ?) RETURNING id",
:CO => "INSERT INTO CO (id, c, o) VALUES(nextval('SEQ_CO'), ?, ?) RETURNING id",
:ACO => "INSERT INTO ACO (id, co, a, v) VALUES(nextval('SEQ_ACO'), ?, ?, ?) RETURNING id",
:RCO => "INSERT INTO RCO (id, rc, of, ot) VALUES(nextval('SEQ_RCO'), ?, ?, ?) RETURNING id",
:ARCO => "INSERT INTO ARCO (id, rco, a, v) VALUES(nextval('SEQ_ARCO'), ?, ?, ?) RETURNING id",
)
sql_selects = Dict(
:A => "SELECT id, v, (SELECT V.value FROM V WHERE V.id = A.v) as name FROM A",
:C => "SELECT id, v, (SELECT V.value FROM V WHERE V.id = C.v) as name FROM C",
:V => "SELECT * FROM V",
:AC => """SELECT AC.id, AC.c, AC.a, AC.v,
(SELECT V.value FROM C, V WHERE C.id = AC.c AND V.id = C.v) as c_name,
(SELECT V.value FROM A, V WHERE A.id = AC.a AND V.id = A.v) as a_name,
(SELECT value FROM V WHERE V.id = AC.v) as value
FROM AC
""",
:R => "SELECT id, v, (SELECT V.value FROM V WHERE V.id = R.v) as name FROM R",
:AR => """SELECT AR.id, AR.r, AR.a, AR.v,
(SELECT V.value FROM R, V WHERE R.id = AR.r AND V.id = R.v) as r_name,
(SELECT V.value FROM A, V WHERE A.id = AR.a AND V.id = A.v) as a_name,
(SELECT value FROM V WHERE V.id = AR.v) as value
FROM AR
""",
:RC => """SELECT RC.id, RC.r as r, RC.cf as cf, RC.ct as ct,
(SELECT V.value FROM C, V WHERE C.id = RC.cf AND V.id = C.v) as cf_name,
(SELECT V.value FROM R, V WHERE R.id = RC.r AND V.id = R.v) as r_name,
(SELECT V.value FROM C, V WHERE C.id = RC.ct AND V.id = C.v) as ct_name
FROM RC
""",
:ARC => """SELECT ARC.id, ARC.rc as rc, ARC.a as a, ARC.v as v,
(SELECT V.value FROM R, RC, V WHERE RC.id = ARC.rc AND R.id = RC.r AND V.id = R.v) as r_name,
(SELECT V.value FROM C, RC, V WHERE RC.id = ARC.rc AND C.id = RC.cf AND V.id = C.v) as cf_name,
(SELECT V.value FROM C, RC, V WHERE RC.id = ARC.rc AND C.id = RC.ct AND V.id = C.v) as ct_name,
(SELECT V.value FROM A, V WHERE A.id = ARC.a AND V.id = A.v) as a_name,
(SELECT value FROM V WHERE V.id = ARC.v) as value
FROM ARC
""",
:O => "SELECT id, flag, v, (SELECT V.value FROM V WHERE V.id = O.v) as name FROM O",
:CO => """SELECT CO.id, CO.c as c, CO.o as o,
(SELECT V.value FROM C, V WHERE C.id = CO.c AND V.id = C.v) as c_name,
(SELECT V.value FROM O, V WHERE O.id = CO.o AND V.id = O.v) as o_name
FROM CO
""",
:ACO => """SELECT ACO.id, ACO.co, ACO.a, ACO.v,
(SELECT V.value FROM C, CO, V WHERE C.id = CO.c AND CO.id = ACO.co AND V.id = C.v) as c_name,
(SELECT V.value FROM O, CO, V WHERE O.id = CO.o AND CO.id = ACO.co AND V.id = O.v) as o_name,
(SELECT V.value FROM A, V WHERE A.id = ACO.a AND V.id = A.v) as a_name,
(SELECT value FROM V WHERE V.id = ACO.v) as value
FROM ACO
""",
:RCO => """SELECT RCO.id, RCO.rc as rc, RCO.of as of, RCO.ot as ot,
(SELECT V.value FROM R, V WHERE R.id = RC.r AND V.id = R.v) as r_name,
(SELECT V.value FROM O, V WHERE O.id = RCO.of AND V.id = O.v) as of_name,
(SELECT V.value FROM O, V WHERE O.id = RCO.ot AND V.id = O.v) as ot_name
FROM RCO, RC
WHERE RC.id = RCO.rc
""",
:ARCO => """SELECT ARCO.id, ARCO.rco, ARCO.a, ARCO.v,
(SELECT V.value FROM R, V WHERE R.id = RC.r AND V.id = R.v) as r_name,
(SELECT V.value FROM O, V WHERE O.id = RCO.of AND V.id = O.v) as of_name,
(SELECT V.value FROM O, V WHERE O.id = RCO.ot AND V.id = O.v) as ot_name,
(SELECT V.value FROM A, V WHERE A.id = ARCO.a AND V.id = A.v) as a_name,
(SELECT value FROM V WHERE V.id = ARCO.v) as value
FROM ARCO, RCO, RC
WHERE RC.id = RCO.rc AND RCO.id = ARCO.rco
""",
)
sql_select_ids = Dict(
:A => "SELECT id FROM A WHERE v = ?",
:C => "SELECT id FROM C WHERE v = ?",
:V => "SELECT id FROM V WHERE value = ?",
:AC => "SELECT id FROM AC WHERE c = ? AND a = ?",
:R => "SELECT id FROM R WHERE v = ?",
:AR => "SELECT id FROM AR WHERE r = ? AND a = ?",
:RC => "SELECT id FROM RC WHERE cf = ? AND r = ? AND ct = ?",
:ARC => "SELECT id FROM ARC WHERE rc = ? AND a = ?",
:O => "SELECT id FROM O WHERE v = ?",
:CO => "SELECT id FROM CO WHERE c = ? AND o = ?",
:ACO => "SELECT id FROM ACO WHERE co = ? AND a = ?",
:RCO => "SELECT id FROM RCO WHERE of = ? AND rc = ? AND ot = ?",
:ARCO => "SELECT id FROM ARCO WHERE rco = ? AND a = ?",
)Auxiliary functions for working with the database.
function select_all(db, table)
#db = DuckDB.DB(file)
ret = DataFrame(DuckDB.execute(db, sql_selects[table]))
#DBInterface.close!(db)
ret
end
function id(kb, table, params)
sql_id = sql_select_ids[table]
df_id = DataFrame(DuckDB.execute(kb, sql_id,
table in [:AC, :AR, :ARC, :ACO, :ARCO] ? params[1:end-1] : params))
ret = nrow(df_id) == 0 ? -1 : df_id[1, :id]
end
function value(kb, id)
sql = "SELECT value FROM V WHERE id = ?"
df = DataFrame(DuckDB.execute(kb, sql, [id]))
v = nrow(df) == 0 ? "not found" : df[1, :value]
end
function value_count!(db, param)
sql_upd = "UPDATE V SET count = count + 1 WHERE id = ?"
df = DataFrame(DuckDB.execute(db, sql_upd, [param]))
param
end
function id(df, s)
df[only(findall(==(s), df.name)), :][:id]
end
function insert(db, table, params)
sql_id = sql_select_ids[table]
ret = id(db, table, params)
if ret == -1
sql_ins = sql_inserts[table]
df = DataFrame(DuckDB.execute(db, sql_ins, params))
if table in [:A, :C, :R, :O, :AC, :AR, :ARC, :ACO, :ARCO]
value_count!(db, params[end])
end
ret = nrow(df) == 0 ? -1 : df[1, :id]
end
ret
endLoading data into the semantic network with setting of attributes and their values.
kb = let
kb, out = create_kb(tables, file)
v_nothing = insert(kb, :V, ["nothing"])
v_author1 = insert(kb, :V, ["Голуб Б.Л."])
v_doc1 = insert(kb, :V, ["Лекція 1"])
v_c_doc = insert(kb, :V, ["Документ"])
v_c_author = insert(kb, :V, ["Автор"])
v_c_sentence = insert(kb, :V, ["Речення"])
v_c_word = insert(kb, :V, ["Слово"])
v_a_title = insert(kb, :V, ["Назва"])
v_a_name = insert(kb, :V, ["Ім'я"])
v_a_number = insert(kb, :V, ["Номер"])
v_r_is_author = insert(kb, :V, ["є автором"])
v_r_has_parts = insert(kb, :V, ["складається з"])
# select_all(kb, :V)
insert(kb, :C, [v_c_doc])
insert(kb, :C, [v_c_author])
insert(kb, :C, [v_c_sentence])
insert(kb, :C, [v_c_word])
# select_all(kb, :C)
insert(kb, :A, [v_a_title])
insert(kb, :A, [v_a_name])
insert(kb, :A, [v_a_number])
# select_all(kb, :A)
insert(kb, :AC, [id(kb, :C, [v_c_author]), id(kb, :A, [v_a_name]), v_nothing])
insert(kb, :AC, [id(kb, :C, [v_c_doc]), id(kb, :A, [v_a_title]), v_nothing])
# select_all(kb, :AC)
insert(kb, :R, [v_r_is_author])
insert(kb, :R, [v_r_has_parts])
# select_all(kb, :R)
insert(kb, :AR, [id(kb, :R, [v_r_has_parts]), id(kb, :A, [v_a_number]), v_nothing])
# select_all(kb, :AR)
insert(kb, :RC, [id(kb, :C, [v_c_author]), id(kb, :R, [v_r_is_author]), id(kb, :C, [v_c_doc])])
rc1 = insert(kb, :RC, [id(kb, :C, [v_c_doc]), id(kb, :R, [v_r_has_parts]), id(kb, :C, [v_c_sentence])])
rc2 = insert(kb, :RC, [id(kb, :C, [v_c_sentence]), id(kb, :R, [v_r_has_parts]), id(kb, :C, [v_c_word])])
# select_all(kb, :RC)
insert(kb, :ARC, [rc1, id(kb, :A, [v_a_number]), v_nothing])
insert(kb, :ARC, [rc2, id(kb, :A, [v_a_number]), v_nothing])
# select_all(kb, :ARC)
insert(kb, :O, [v_author1])
insert(kb, :O, [v_doc1])
co1 = insert(kb, :CO, [id(kb, :C, [v_c_author]), id(kb, :O, [v_author1])])
co2 = insert(kb, :CO, [id(kb, :C, [v_c_doc]), id(kb, :O, [v_doc1])])
# filter(r -> r.c in [id(kb, :C, ["Автор"]), id(kb, :C, ["Документ"])], select_all(kb, :CO))
insert(kb, :ACO, [co1, id(kb, :A, [v_a_name]), v_author1])
insert(kb, :ACO, [co2, id(kb, :A, [v_a_title]), v_doc1])
c = id(kb, :C, [v_c_sentence])
for s in l1_sent_lcase
o = insert(kb, :O, [insert(kb, :V, [s])])
if o > 0
insert(kb, :CO, [c, o])
end
end
# filter(r -> r.c == c, select_all(kb, :CO))
c = id(kb, :C, [v_c_word])
for s in keys(lexicon(l1_crps))
o = insert(kb, :O, [insert(kb, :V, [s])])
if o > 0
insert(kb, :CO, [c, o])
end
end
# filter(r -> r.c == c, select_all(kb, :CO))
r = id(kb, :R, [v_r_has_parts])
cf = id(kb, :C, [v_c_doc])
ct = id(kb, :C, [v_c_sentence])
rc = id(kb, :RC, [cf, r, ct])
of = id(kb, :O, [v_doc1])
a = id(kb, :A, [v_a_number])
for i in eachindex(l1_sent_lcase)
ot = id(kb, :O, [insert(kb, :V, [l1_sent_lcase[i]])])
rco = insert(kb, :RCO, [rc, of, ot])
insert(kb, :ARCO, [rco, a, insert(kb, :V, ["$i"])])
end
# filter(r -> r.rc == rc, select_all(kb, :RCO))
# select_all(kb, :ARCO)
# DuckDB.execute(kb, "EXPORT DATABASE 'semantic.duckdb'")
kb
endQueries to the logic programming knowledge base.
function select(db, table)
DataFrame(DuckDB.execute(db, "SELECT * FROM $table"))
end
function table2clauses(db, t)
df = select(db, t)
clauses = Vector{Julog.Clause}(undef, nrow(df))
i = 0
for r in eachrow(df)
i+=1
clauses[i] = Clause(
Compound(t, [Const(x) for x in r]), [])
end
clauses
end
Turn the database into a logic programming knowledge base.
clauses = let
cl = Clause[]
for t in tables
cl = vcat(cl, table2clauses(db, t))
end
cl
end Combine facts and rules (chapter 5) into a single knowledge base.
kb = vcat(clauses, rules)Perform test queries to the knowledge base using the created rules
resolve(@julog([Attr(ID, Name)]), kb)
resolve(@julog([Cat(ID, Name)]), kb)
resolve(@julog([Rel(ID, Name)]), kb)
resolve(@julog([Obj(ID, Name)]), kb)
resolve(@julog([CatObj(ID, CatID, ObjID, CatName, ObjName)]), kb)
resolve(@julog([RelCat(ID, RelID, CatFromID, CatToID, RelName, CatFromName, CatToName)]), kb)
resolve(@julog([RelCatObj(ID, RelCatID, CatFromID, ObjFromID, CatToID, ObjToID, RelName, CatFromName, ObjFromName, CatToName, ObjToName)]), kb)
resolve(@julog([AttrCat(ID, CatID, AttrID, ValueID, CatName, AttrName, Value)]), kb)
resolve(@julog([AttrCatObj(ID, CatObjID, CatID, ObjID, AttrID, ValueID, CatName, ObjName, AttrName, Value)]), kb)
resolve(@julog([AttrRel(ID, RelID, AttrID, ValueID, RelName, AttrName, Value)]), kb)
resolve(@julog([AttrRelCat(ID, RelCatID, RelID, CatFromID, CatToID, AttrID, ValueID, RelName, CatFromName, CatToName, AttrName, Value)]), kb)
resolve(@julog([AttrRelCatObj(ID, RelCatObjID, RelID, CatFromID, ObjFromID, CatToID, ObjToID, AttrID, ValueID, RelName, CatFromName, ObjFromName, CatToName, ObjToName, AttrName, Value)]), kb)