snto

Swedish National Test Organizer
git clone https://noxz.tech/git/snto.git
snto

commit: dd272a5a06458f857565431c094ead95a726f9c9
parent: 61de3ad4cde3d0abd49bd1d617189057fabe6d63
author: Chris Noxz <chris@noxz.tech>
date:   Mon, 4 Dec 2023 13:52:38 +0100
redesign db relations
Mextract.sh54++++++++++++--------
1 file changed, 33 insertions(+), 21 deletions(-)
diff --git a/extract.sh b/extract.sh
@@ -8,27 +8,38 @@ cachedir="${workingdir}/cache"
 # create cache directory if not existing
 [ ! -d "${cachedir}" ] && mkdir -p "${cachedir}"
 
-# remove database
+# remove old database
 rm -f "${dbpath}"
 
+# reset id
+taskid=-1
+tagid=-1
+imageid=-1
+
 # create database
 sqlite3 "${dbpath}" '
 CREATE TABLE task(
-	id          MD5 PRIMARY KEY,
+	id          INTEGER PRIMARY KEY,
 	course      TEXT,
 	semester    TEXT,
 	num         INTEGER
 );
 CREATE TABLE tag(
-	id          MD5 PRIMARY KEY,
-	taskid      MD5,
+	id          integer PRIMARY KEY,
 	tagname     TEXT,
 
-	FOREIGN KEY(taskid) REFERENCES task(id)
+	UNIQUE (tagname)
+);
+CREATE TABLE tasktag(
+	taskid      INTEGER,
+	tagid       INTEGER,
+
+	FOREIGN KEY(taskid) REFERENCES task(id),
+	FOREIGN KEY(tagid) REFERENCES tag(id)
 );
 CREATE TABLE image(
-	id          MD5 PRIMARY KEY,
-	taskid      MD5,
+	id          integer PRIMARY KEY,
+	taskid      INTEGER,
 	idx         INTEGER,
 	image       BLOB,
 
@@ -37,6 +48,7 @@ CREATE TABLE image(
 
 # loop through data lines
 while read -r line; do
+	taskid=$((taskid+1))
 	# extract data from line
 	url="${line%%|*}"; line="${line#*|}"
 	course="${line%%|*}"; line="${line#*|}"
@@ -51,9 +63,7 @@ while read -r line; do
 	# download pdf if not existing
 	[ ! -f "${pdfpath}" ] && curl -s "${url}" -o "${pdfpath}"
 
-	i=0
-	taskid=""
-	while true; do
+	i=-1; while true; do
 		i=$((i+1))
 
 		# get cropbox
@@ -64,8 +74,7 @@ while read -r line; do
 		pngpath="${cachedir}"/"${pngname}"
 
 		# insert task into database
-		if [ "${i}" -eq 1 ]; then
-			taskid="${pngname%%.*}"
+		if [ "${i}" -eq 0 ]; then
 			sqlite3 "${dbpath}" "
 			INSERT INTO task VALUES(
 				'${taskid}',
@@ -75,16 +84,20 @@ while read -r line; do
 			);"
 
 			# insert tags into database
-			j=0
 			while true; do
-				j=$((j+1))
+				tagid=$((tagid+1))
 				tag="${tags%%,*}";
 				sqlite3 "${dbpath}" "
-				INSERT INTO tag VALUES(
-					'$(echo "${taskid}.${j}" | md5sum | cut -d' ' -f1)',
-					'${taskid}',
+				INSERT OR IGNORE INTO tag VALUES(
+					'${tagid}',
 					'${tag}'
-				);"
+				);
+				INSERT INTO tasktag(taskid, tagid)
+					SELECT ${taskid}, id
+					FROM tag
+					WHERE tagname='${tag}'
+				;
+				"
 				[ "${tags}" = "${tags#*,}" ] && break
 				tags="${tags#*,}"
 			done
@@ -104,9 +117,10 @@ while read -r line; do
 
 		# insert images into database
 		if [ "${taskid}" != "" ]; then
+			imageid=$((imageid+1))
 			sqlite3 "${dbpath}" "
 			INSERT INTO image VALUES(
-				'${pngname%%.*}',
+				'${imageid}',
 				'${taskid}',
 				'${i}',
 				readfile('${pngpath}')
@@ -119,5 +133,3 @@ while read -r line; do
 	done
 
 done < "${datpath}"
-
-