Skip to content

Commit 18071ac

Browse files
committed
Refactored CSV/RDF import scripts
1 parent c609fe6 commit 18071ac

File tree

12 files changed

+90
-146
lines changed

12 files changed

+90
-146
lines changed

bin/add-file.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ print_usage()
1717
printf " --description DESCRIPTION Description of the file (optional)\n"
1818
printf "\n"
1919
printf " --file ABS_PATH Absolute path to the file\n"
20-
printf " --file-content-type MEDIA_TYPE Media type of the file (optional)\n"
20+
printf " --content-type MEDIA_TYPE Media type of the file (optional)\n"
2121
}
2222

2323
hash curl 2>/dev/null || { echo >&2 "curl not on \$PATH. Aborting."; exit 1; }
@@ -63,8 +63,8 @@ do
6363
shift # past argument
6464
shift # past value
6565
;;
66-
--file-content-type)
67-
file_content_type="$2"
66+
--content-type)
67+
content_type="$2"
6868
shift # past argument
6969
shift # past value
7070
;;
@@ -98,17 +98,17 @@ if [ -z "$file" ] ; then
9898
print_usage
9999
exit 1
100100
fi
101-
if [ -z "$file_content_type" ] ; then
101+
if [ -z "$content_type" ] ; then
102102
# determine content-type if not provided
103-
file_content_type=$(file -b --mime-type "$file")
103+
content_type=$(file -b --mime-type "$file")
104104
fi
105105

106106
# https://stackoverflow.com/questions/19116016/what-is-the-right-way-to-post-multipart-form-data-using-curl
107107

108108
rdf_post+="-F \"rdf=\"\n"
109109
rdf_post+="-F \"sb=file\"\n"
110110
rdf_post+="-F \"pu=http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#fileName\"\n"
111-
rdf_post+="-F \"ol=@${file};type=${file_content_type}\"\n"
111+
rdf_post+="-F \"ol=@${file};type=${content_type}\"\n"
112112
rdf_post+="-F \"pu=http://purl.org/dc/terms/title\"\n"
113113
rdf_post+="-F \"ol=${title}\"\n"
114114
rdf_post+="-F \"pu=http://www.w3.org/1999/02/22-rdf-syntax-ns#type\"\n"

bin/imports/import-csv.sh

Lines changed: 14 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ print_usage()
1212
{
1313
printf "Transforms CSV data into RDF using a SPARQL query and imports it.\n"
1414
printf "\n"
15-
printf "Usage: %s options\n" "$0"
15+
printf "Usage: %s options TARGET_URI\n" "$0"
1616
printf "\n"
1717
printf "Options:\n"
1818
printf " -f, --cert-pem-file CERT_FILE .pem file with the WebID certificate of the agent\n"
@@ -25,12 +25,8 @@ print_usage()
2525
printf " --slug STRING String that will be used as URI path segment (optional)\n"
2626
printf "\n"
2727
printf " --query-file ABS_PATH Absolute path to the text file with the SPARQL query string\n"
28-
printf " --query-doc-slug STRING String that will be used as the query's URI path segment (optional)\n"
29-
printf " --file ABS_PATH Absolute path to the CSV file\n"
30-
printf " --file-slug STRING String that will be used as the file's URI path segment (optional)\n"
31-
printf " --file-doc-slug STRING String that will be used as the file document's URI path segment (optional)\n"
28+
printf " --csv-file ABS_PATH Absolute path to the CSV file\n"
3229
printf " --delimiter CHAR CSV delimiter char (default: ',')\n"
33-
printf " --import-slug STRING String that will be used as the import's URI path segment (optional)\n"
3430
}
3531

3632
args=()
@@ -69,23 +65,8 @@ do
6965
shift # past argument
7066
shift # past value
7167
;;
72-
--query-doc-slug)
73-
query_doc_slug="$2"
74-
shift # past argument
75-
shift # past value
76-
;;
77-
--file)
78-
file="$2"
79-
shift # past argument
80-
shift # past value
81-
;;
82-
--file-slug)
83-
file_slug="$2"
84-
shift # past argument
85-
shift # past value
86-
;;
87-
--file-doc-slug)
88-
file_doc_slug="$2"
68+
--csv-file)
69+
csv_file="$2"
8970
shift # past argument
9071
shift # past value
9172
;;
@@ -94,11 +75,6 @@ do
9475
shift # past argument
9576
shift # past value
9677
;;
97-
--import-slug)
98-
import_slug="$2"
99-
shift # past argument
100-
shift # past value
101-
;;
10278
*) # unknown arguments
10379
args+=("$1") # save it in an array for later
10480
shift # past argument
@@ -107,6 +83,8 @@ do
10783
done
10884
set -- "${args[@]}" # restore args
10985

86+
target="$1"
87+
11088
if [ -z "$cert_pem_file" ] ; then
11189
print_usage
11290
exit 1
@@ -127,7 +105,7 @@ if [ -z "$query_file" ] ; then
127105
print_usage
128106
exit 1
129107
fi
130-
if [ -z "$file" ] ; then
108+
if [ -z "$csv_file" ] ; then
131109
print_usage
132110
exit 1
133111
fi
@@ -142,27 +120,6 @@ fi
142120
# Generate query ID for fragment identifier
143121
query_id=$(uuidgen | tr '[:upper:]' '[:lower:]')
144122

145-
# Create the imports/ container first (ignore error if it already exists)
146-
create-container.sh \
147-
-b "$base" \
148-
-f "$cert_pem_file" \
149-
-p "$cert_password" \
150-
--proxy "$proxy" \
151-
--title "Imports" \
152-
--parent "$base" \
153-
--slug "imports" 2>/dev/null || true
154-
155-
# Create the import item document
156-
import_doc=$(create-item.sh \
157-
-b "$base" \
158-
-f "$cert_pem_file" \
159-
-p "$cert_password" \
160-
--proxy "$proxy" \
161-
--title "$title" \
162-
--container "${base}imports/" \
163-
--slug "$query_doc_slug"
164-
)
165-
166123
# Add the CONSTRUCT query to the item using fragment identifier
167124
# TO-DO: fix ambigous add-construct.sh script names
168125
"$(dirname "$0")/../add-construct.sh" \
@@ -173,10 +130,10 @@ import_doc=$(create-item.sh \
173130
--title "$title" \
174131
--uri "#${query_id}" \
175132
--query-file "$query_file" \
176-
"$import_doc"
133+
"$target"
177134

178135
# The query URI is the document with fragment
179-
query="${import_doc}#${query_id}"
136+
query="${target}#${query_id}"
180137

181138
# Add the file to the import item
182139
add-file.sh \
@@ -185,12 +142,12 @@ add-file.sh \
185142
-p "$cert_password" \
186143
--proxy "$proxy" \
187144
--title "$title" \
188-
--file "$file" \
189-
--file-content-type "text/csv" \
190-
"$import_doc"
145+
--file "$csv_file" \
146+
--content-type "text/csv" \
147+
"$target"
191148

192149
# Calculate file URI from SHA1 hash
193-
sha1sum=$(shasum -a 1 "$file" | awk '{print $1}')
150+
sha1sum=$(shasum -a 1 "$csv_file" | awk '{print $1}')
194151
file_uri="${base}uploads/${sha1sum}"
195152

196153
# Generate import ID for fragment identifier
@@ -207,5 +164,5 @@ add-csv-import.sh \
207164
--query "$query" \
208165
--file "$file_uri" \
209166
--delimiter "$delimiter" \
210-
"$import_doc"
167+
"$target"
211168

bin/imports/import-rdf.sh

Lines changed: 23 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ function onexit() {
1010

1111
print_usage()
1212
{
13-
printf "Transforms CSV data into RDF using a SPARQL query and imports it.\n"
13+
printf "Transforms RDF data using a SPARQL query and imports it.\n"
1414
printf "\n"
15-
printf "Usage: %s options\n" "$0"
15+
printf "Usage: %s options TARGET_URI\n" "$0"
1616
printf "\n"
1717
printf "Options:\n"
1818
printf " -f, --cert-pem-file CERT_FILE .pem file with the WebID certificate of the agent\n"
@@ -25,13 +25,9 @@ print_usage()
2525
printf " --slug STRING String that will be used as URI path segment (optional)\n"
2626
printf "\n"
2727
printf " --query-file ABS_PATH Absolute path to the text file with the SPARQL query string (optional)\n"
28-
printf " --query-doc-slug STRING String that will be used as the query's URI path segment (optional)\n"
2928
printf " --graph GRAPH_URI URI of the graph (optional)\n"
30-
printf " --file ABS_PATH Absolute path to the CSV file (optional)\n"
31-
printf " --file-slug STRING String that will be used as the file's URI path segment (optional)\n"
32-
printf " --file-doc-slug STRING String that will be used as the file document's URI path segment (optional)\n"
33-
printf " --file-content-type MEDIA_TYPE Media type of the file\n"
34-
printf " --import-slug STRING String that will be used as the import's URI path segment (optional)\n"
29+
printf " --rdf-file ABS_PATH Absolute path to the RDF file (optional)\n"
30+
printf " --content-type MEDIA_TYPE Media type of the file\n"
3531
}
3632

3733
args=()
@@ -75,33 +71,13 @@ do
7571
shift # past argument
7672
shift # past value
7773
;;
78-
--query-doc-slug)
79-
query_doc_slug="$2"
74+
--rdf-file)
75+
rdf_file="$2"
8076
shift # past argument
8177
shift # past value
8278
;;
83-
--file)
84-
file="$2"
85-
shift # past argument
86-
shift # past value
87-
;;
88-
--file-slug)
89-
file_slug="$2"
90-
shift # past argument
91-
shift # past value
92-
;;
93-
--file-doc-slug)
94-
file_doc_slug="$2"
95-
shift # past argument
96-
shift # past value
97-
;;
98-
--file-content-type)
99-
file_content_type="$2"
100-
shift # past argument
101-
shift # past value
102-
;;
103-
--import-slug)
104-
import_slug="$2"
79+
--content-type)
80+
content_type="$2"
10581
shift # past argument
10682
shift # past value
10783
;;
@@ -113,6 +89,8 @@ do
11389
done
11490
set -- "${args[@]}" # restore args
11591

92+
target="$1"
93+
11694
if [ -z "$cert_pem_file" ] ; then
11795
print_usage
11896
exit 1
@@ -129,11 +107,11 @@ if [ -z "$title" ] ; then
129107
print_usage
130108
exit 1
131109
fi
132-
if [ -z "$file" ] ; then
110+
if [ -z "$rdf_file" ] ; then
133111
print_usage
134112
exit 1
135113
fi
136-
if [ -z "$file_content_type" ] ; then
114+
if [ -z "$content_type" ] ; then
137115
print_usage
138116
exit 1
139117
fi
@@ -142,27 +120,6 @@ if [ -z "$proxy" ] ; then
142120
proxy="$base"
143121
fi
144122

145-
# Create the imports/ container first
146-
create-container.sh \
147-
-b "$base" \
148-
-f "$cert_pem_file" \
149-
-p "$cert_password" \
150-
--proxy "$proxy" \
151-
--title "Imports" \
152-
--parent "$base" \
153-
--slug "imports"
154-
155-
# Create the import item document
156-
import_doc=$(create-item.sh \
157-
-b "$base" \
158-
-f "$cert_pem_file" \
159-
-p "$cert_password" \
160-
--proxy "$proxy" \
161-
--title "$title" \
162-
--container "${base}imports/" \
163-
--slug "$query_doc_slug"
164-
)
165-
166123
if [ -n "$query_file" ] ; then
167124
# Generate query ID for fragment identifier
168125
query_id=$(uuidgen | tr '[:upper:]' '[:lower:]')
@@ -177,10 +134,10 @@ if [ -n "$query_file" ] ; then
177134
--title "$title" \
178135
--uri "#${query_id}" \
179136
--query-file "$query_file" \
180-
"$import_doc"
137+
"$target"
181138

182139
# The query URI is the document with fragment
183-
query="${import_doc}#${query_id}"
140+
query="${target}#${query_id}"
184141
fi
185142

186143
# Add the file to the import item
@@ -190,13 +147,13 @@ add-file.sh \
190147
-p "$cert_password" \
191148
--proxy "$proxy" \
192149
--title "$title" \
193-
--file "$file" \
194-
--file-content-type "$file_content_type" \
195-
"$import_doc"
150+
--file "$rdf_file" \
151+
--content-type "$content_type" \
152+
"$target"
196153

197154
# Calculate file URI from SHA1 hash
198-
sha1sum=$(shasum -a 1 "$file" | awk '{print $1}')
199-
file_uri="${base}uploads/${sha1sum}"
155+
sha1sum=$(shasum -a 1 "$rdf_file" | awk '{print $1}')
156+
rdf_file_uri="${base}uploads/${sha1sum}"
200157

201158
# Generate import ID for fragment identifier
202159
import_id=$(uuidgen | tr '[:upper:]' '[:lower:]')
@@ -211,8 +168,8 @@ if [ -n "$query" ] ; then
211168
--title "$title" \
212169
--uri "#${import_id}" \
213170
--query "$query" \
214-
--file "$file_uri" \
215-
"$import_doc"
171+
--file "$rdf_file_uri" \
172+
"$target"
216173
else
217174
add-rdf-import.sh \
218175
-b "$base" \
@@ -222,6 +179,6 @@ else
222179
--title "$title" \
223180
--uri "#${import_id}" \
224181
--graph "$graph" \
225-
--file "$file_uri" \
226-
"$import_doc"
182+
--file "$rdf_file_uri" \
183+
"$target"
227184
fi

http-tests/admin/model/ontology-import-upload-no-deadlock.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ add-file.sh \
4646
-b "$END_USER_BASE_URL" \
4747
--title "Test ontology for upload import" \
4848
--file "$pwd/test-ontology-import.ttl" \
49-
--file-content-type "${file_content_type}" \
49+
--content-type "${file_content_type}" \
5050
"$file_doc"
5151

5252
# Step 2: Extract the uploaded file URI (content-addressed)

http-tests/imports/GET-file-range.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ add-file.sh \
4040
-b "$END_USER_BASE_URL" \
4141
--title "Random file" \
4242
--file "$filename" \
43-
--file-content-type "${file_content_type}" \
43+
--content-type "${file_content_type}" \
4444
"$file_doc"
4545

4646
# Calculate file URI from SHA1 hash

http-tests/imports/GET-file-sha1sum.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ add-file.sh \
4747
-b "$END_USER_BASE_URL" \
4848
--title "Random file" \
4949
--file "$filename" \
50-
--file-content-type "${file_content_type}" \
50+
--content-type "${file_content_type}" \
5151
"$file_doc"
5252

5353
# Calculate file URI from SHA1 hash

0 commit comments

Comments
 (0)