commit: dd272a5a06458f857565431c094ead95a726f9c9
parent: 61de3ad4cde3d0abd49bd1d617189057fabe6d63
author: Chris Noxz <chris@noxz.tech>
date: Mon, 4 Dec 2023 13:52:38 +0100
redesign db relations
1 file changed, 33 insertions(+), 21 deletions(-)
diff --git a/extract.sh b/extract.sh
@@ -8,27 +8,38 @@ cachedir="${workingdir}/cache"
# create cache directory if not existing
[ ! -d "${cachedir}" ] && mkdir -p "${cachedir}"
-# remove database
+# remove old database
rm -f "${dbpath}"
+# reset id
+taskid=-1
+tagid=-1
+imageid=-1
+
# create database
sqlite3 "${dbpath}" '
CREATE TABLE task(
- id MD5 PRIMARY KEY,
+ id INTEGER PRIMARY KEY,
course TEXT,
semester TEXT,
num INTEGER
);
CREATE TABLE tag(
- id MD5 PRIMARY KEY,
- taskid MD5,
+ id integer PRIMARY KEY,
tagname TEXT,
- FOREIGN KEY(taskid) REFERENCES task(id)
+ UNIQUE (tagname)
+);
+CREATE TABLE tasktag(
+ taskid INTEGER,
+ tagid INTEGER,
+
+ FOREIGN KEY(taskid) REFERENCES task(id),
+ FOREIGN KEY(tagid) REFERENCES tag(id)
);
CREATE TABLE image(
- id MD5 PRIMARY KEY,
- taskid MD5,
+ id integer PRIMARY KEY,
+ taskid INTEGER,
idx INTEGER,
image BLOB,
@@ -37,6 +48,7 @@ CREATE TABLE image(
# loop through data lines
while read -r line; do
+ taskid=$((taskid+1))
# extract data from line
url="${line%%|*}"; line="${line#*|}"
course="${line%%|*}"; line="${line#*|}"
@@ -51,9 +63,7 @@ while read -r line; do
# download pdf if not existing
[ ! -f "${pdfpath}" ] && curl -s "${url}" -o "${pdfpath}"
- i=0
- taskid=""
- while true; do
+ i=-1; while true; do
i=$((i+1))
# get cropbox
@@ -64,8 +74,7 @@ while read -r line; do
pngpath="${cachedir}"/"${pngname}"
# insert task into database
- if [ "${i}" -eq 1 ]; then
- taskid="${pngname%%.*}"
+ if [ "${i}" -eq 0 ]; then
sqlite3 "${dbpath}" "
INSERT INTO task VALUES(
'${taskid}',
@@ -75,16 +84,20 @@ while read -r line; do
);"
# insert tags into database
- j=0
while true; do
- j=$((j+1))
+ tagid=$((tagid+1))
tag="${tags%%,*}";
sqlite3 "${dbpath}" "
- INSERT INTO tag VALUES(
- '$(echo "${taskid}.${j}" | md5sum | cut -d' ' -f1)',
- '${taskid}',
+ INSERT OR IGNORE INTO tag VALUES(
+ '${tagid}',
'${tag}'
- );"
+ );
+ INSERT INTO tasktag(taskid, tagid)
+ SELECT ${taskid}, id
+ FROM tag
+ WHERE tagname='${tag}'
+ ;
+ "
[ "${tags}" = "${tags#*,}" ] && break
tags="${tags#*,}"
done
@@ -104,9 +117,10 @@ while read -r line; do
# insert images into database
if [ "${taskid}" != "" ]; then
+ imageid=$((imageid+1))
sqlite3 "${dbpath}" "
INSERT INTO image VALUES(
- '${pngname%%.*}',
+ '${imageid}',
'${taskid}',
'${i}',
readfile('${pngpath}')
@@ -119,5 +133,3 @@ while read -r line; do
done
done < "${datpath}"
-
-