csvtable.c
Go to the documentation of this file.
1
19#ifdef STANDALONE
20#include <sqlite3.h>
21#else
22#include <sqlite3ext.h>
23static SQLITE_EXTENSION_INIT1
24#endif
25
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29#include <limits.h>
30
31#ifdef _WIN32
32#include <windows.h>
33#define strcasecmp _stricmp
34#define strncasecmp _strnicmp
35#endif
36
43typedef struct csv_file {
44 FILE *f;
45 char *sep;
46 char *quot;
47 int isdos;
48 int maxl;
49 char *line;
50 long pos0;
51 int maxc;
52 int ncols;
53 char **cols;
55
62typedef struct csv_guess_fmt {
63 int nlines;
64 int hist[256];
66
73typedef struct csv_vtab {
74 sqlite3_vtab vtab;
76 int convert;
77 char coltypes[1];
79
86typedef struct {
87 sqlite3_vtab_cursor cursor;
88 long pos;
90
96static void
97append_free(char **in)
98{
99 long *p = (long *) *in;
100
101 if (p) {
102 p -= 2;
103 sqlite3_free(p);
104 *in = 0;
105 }
106}
107
117static char *
118append(char **in, char const *append, char quote)
119{
120 long *p = (long *) *in;
121 long len, maxlen, actlen;
122 int i;
123 char *pp;
124 int nappend = append ? strlen(append) : 0;
125
126 if (p) {
127 p -= 2;
128 maxlen = p[0];
129 actlen = p[1];
130 } else {
131 maxlen = actlen = 0;
132 }
133 len = nappend + actlen;
134 if (quote) {
135 len += 2;
136 for (i = 0; i < nappend; i++) {
137 if (append[i] == quote) {
138 len++;
139 }
140 }
141 } else if (!nappend) {
142 return *in;
143 }
144 if (len >= maxlen - 1) {
145 long *q;
146
147 maxlen = (len + 0x03ff) & (~0x3ff);
148 q = (long *) sqlite3_realloc(p, maxlen + 1 + 2 * sizeof (long));
149 if (!q) {
150 return 0;
151 }
152 if (!p) {
153 q[1] = 0;
154 }
155 p = q;
156 p[0] = maxlen;
157 *in = (char *) (p + 2);
158 }
159 pp = *in + actlen;
160 if (quote) {
161 *pp++ = quote;
162 for (i = 0; i < nappend; i++) {
163 *pp++ = append[i];
164 if (append[i] == quote) {
165 *pp++ = quote;
166 }
167 }
168 *pp++ = quote;
169 *pp = '\0';
170 } else {
171 if (nappend) {
172 memcpy(pp, append, nappend);
173 pp += nappend;
174 *pp = '\0';
175 }
176 }
177 p[1] = pp - *in;
178 return *in;
179}
180
187static char *
188unquote(char const *in)
189{
190 char c, *ret;
191 int i;
192
193 ret = sqlite3_malloc(strlen(in) + 1);
194 if (ret) {
195 c = in[0];
196 if ((c == '"') || (c == '\'')) {
197 i = strlen(in + 1);
198 if ((i > 0) && (in[i] == c)) {
199 strcpy(ret, in + 1);
200 ret[i - 1] = '\0';
201 return ret;
202 }
203 }
204 strcpy(ret, in);
205 }
206 return ret;
207}
208
215static int
216maptype(char const *type)
217{
218 int typelen = type ? strlen(type) : 0;
219
220 if ((typelen >= 3) &&
221 (strncasecmp(type, "integer", 7) == 0)) {
222 return SQLITE_INTEGER;
223 }
224 if ((typelen >= 6) &&
225 (strncasecmp(type, "double", 6) == 0)) {
226 return SQLITE_FLOAT;
227 }
228 if ((typelen >= 5) &&
229 (strncasecmp(type, "float", 5) == 0)) {
230 return SQLITE_FLOAT;
231 }
232 if ((typelen >= 4) &&
233 (strncasecmp(type, "real", 4) == 0)) {
234 return SQLITE_FLOAT;
235 }
236 return SQLITE_TEXT;
237}
238
245static void
246conv_names(char **names, int ncols)
247{
248 int i;
249 char *p, *q;
250 static const char ws[] = "\n\t\r\b\v ";
251
252 if (!names || ncols <= 0) {
253 return;
254 }
255 for (i = 0; i < ncols; i++) {
256 p = names[i];
257
258 while (*p) {
259 if (strchr(ws, *p)) {
260 *p++ = '_';
261 q = p;
262 while (*q && strchr(ws, *q)) {
263 ++q;
264 }
265 if (*q && q > p) {
266 strcpy(p, q);
267 }
268 continue;
269 }
270 ++p;
271 }
272 }
273}
274
285static void
286result_or_bind(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
287 char *data, int len, int type)
288{
289 char *endp;
290
291 if (!data) {
292 if (ctx) {
293 sqlite3_result_null(ctx);
294 } else {
295 sqlite3_bind_null(stmt, idx);
296 }
297 return;
298 }
299 if (type == SQLITE_INTEGER) {
300 sqlite_int64 val;
301#if defined(_WIN32) || defined(_WIN64)
302 char endc;
303
304 if (sscanf(data, "%I64d%c", &val, &endc) == 1) {
305 if (ctx) {
306 sqlite3_result_int64(ctx, val);
307 } else {
308 sqlite3_bind_int64(stmt, idx, val);
309 }
310 return;
311 }
312#else
313 endp = 0;
314#ifdef __osf__
315 val = strtol(data, &endp, 0);
316#else
317 val = strtoll(data, &endp, 0);
318#endif
319 if (endp && (endp != data) && !*endp) {
320 if (ctx) {
321 sqlite3_result_int64(ctx, val);
322 } else {
323 sqlite3_bind_int64(stmt, idx, val);
324 }
325 return;
326 }
327#endif
328 } else if (type == SQLITE_FLOAT) {
329 double val;
330
331 endp = 0;
332 val = strtod(data, &endp);
333 if (endp && (endp != data) && !*endp) {
334 if (ctx) {
335 sqlite3_result_double(ctx, val);
336 } else {
337 sqlite3_bind_double(stmt, idx, val);
338 }
339 return;
340 }
341 }
342 if (ctx) {
343 sqlite3_result_text(ctx, data, len, SQLITE_TRANSIENT);
344 } else {
345 sqlite3_bind_text(stmt, idx, data, len, SQLITE_TRANSIENT);
346 }
347}
348
359static int
360process_col(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
361 char *data, int type, int conv)
362{
363 char c, *p;
364 const char flchars[] = "Ee+-.,0123456789";
365
366 if (!data) {
367 goto putdata;
368 }
369
370 /*
371 * Floating point number test,
372 * converts single comma to dot.
373 */
374 c = data[0];
375 if ((c != '\0') && strchr(flchars + 2, c)) {
376 p = data + 1;
377 while (*p && strchr(flchars, *p)) {
378 ++p;
379 }
380 if (*p == '\0') {
381 char *first = 0;
382 int n = 0;
383
384 p = data;
385 while (p) {
386 p = strchr(p, ',');
387 if (!p) {
388 break;
389 }
390 if (++n == 1) {
391 first = p;
392 }
393 ++p;
394 }
395 if (first) {
396 *first = '.';
397 goto putdata;
398 }
399 }
400 }
401 if (conv) {
402 char *utf = sqlite3_malloc(strlen(data) * 2 + 2);
403
404 if (utf) {
405 p = utf;
406 while ((c = *data) != '\0') {
407 if (((conv & 10) == 10) && (c == '\\')) {
408 if (data[1] == 'q') {
409 *p++ = '\'';
410 data += 2;
411 continue;
412 }
413 }
414 if ((conv & 2) && (c == '\\')) {
415 char c2 = data[1];
416
417 switch (c2) {
418 case '\0':
419 goto convdone;
420 case 'n':
421 *p = '\n';
422 break;
423 case 't':
424 *p = '\t';
425 break;
426 case 'r':
427 *p = '\r';
428 break;
429 case 'f':
430 *p = '\f';
431 break;
432 case 'v':
433 *p = '\v';
434 break;
435 case 'b':
436 *p = '\b';
437 break;
438 case 'a':
439 *p = '\a';
440 break;
441 case '?':
442 *p = '\?';
443 break;
444 case '\'':
445 *p = '\'';
446 break;
447 case '"':
448 *p = '\"';
449 break;
450 case '\\':
451 *p = '\\';
452 break;
453 default:
454 *p++ = c;
455 *p = c2;
456 break;
457 }
458 p++;
459 data += 2;
460 continue;
461 }
462 if ((conv & 1) && (c & 0x80)) {
463 *p++ = 0xc0 | ((c >> 6) & 0x1f);
464 *p++ = 0x80 | (c & 0x3f);
465 } else {
466 *p++ = c;
467 }
468 data++;
469 }
470convdone:
471 *p = '\0';
472 result_or_bind(ctx, stmt, idx, utf, p - utf, type);
473 sqlite3_free(utf);
474 return SQLITE_OK;
475 } else {
476 if (ctx) {
477 sqlite3_result_error(ctx, "out of memory", -1);
478 }
479 return SQLITE_NOMEM;
480 }
481 }
482putdata:
483 result_or_bind(ctx, stmt, idx, data, -1, type);
484 return SQLITE_OK;
485}
486
495static csv_file *
496csv_open(const char *filename, const char *sep, const char *quot)
497{
498 FILE *f;
499 csv_file *csv;
500
501#ifdef _WIN32
502 f = fopen(filename, "rb");
503#else
504 f = fopen(filename, "r");
505#endif
506 if (!f) {
507 return 0;
508 }
509 csv = sqlite3_malloc(sizeof (csv_file));
510 if (!csv) {
511error0:
512 fclose(f);
513 return 0;
514 }
515 csv->f = f;
516 if (sep && sep[0]) {
517 csv->sep = sqlite3_malloc(strlen(sep) + 1);
518 if (!csv->sep) {
519error1:
520 sqlite3_free(csv);
521 goto error0;
522 }
523 strcpy(csv->sep, sep);
524 } else {
525 csv->sep = 0;
526 }
527 if (quot && quot[0]) {
528 csv->quot = sqlite3_malloc(strlen(quot) + 1);
529 if (!csv->quot) {
530 if (csv->sep) {
531 sqlite3_free(csv->sep);
532 }
533 goto error1;
534 }
535 strcpy(csv->quot, quot);
536 } else {
537 csv->quot = 0;
538 }
539 csv->isdos = 0;
540 csv->maxl = 0;
541 csv->line = 0;
542 csv->pos0 = 0;
543 csv->maxc = 0;
544 csv->ncols = 0;
545 csv->cols = 0;
546 return csv;
547}
548
554static void
556{
557 if (csv) {
558 if (csv->sep) {
559 sqlite3_free(csv->sep);
560 }
561 if (csv->quot) {
562 sqlite3_free(csv->quot);
563 }
564 if (csv->line) {
565 sqlite3_free(csv->line);
566 }
567 if (csv->cols) {
568 sqlite3_free(csv->cols);
569 }
570 if (csv->f) {
571 fclose(csv->f);
572 }
573 sqlite3_free(csv);
574 }
575}
576
583static int
585{
586 if (csv && csv->f) {
587 return feof(csv->f);
588 }
589 return 1;
590}
591
599static long
600csv_seek(csv_file *csv, long pos)
601{
602 if (csv && csv->f) {
603 return fseek(csv->f, pos, SEEK_SET);
604 }
605 return EOF;
606}
607
613static void
615{
616 if (csv && csv->f) {
617 csv_seek(csv, csv->pos0);
618 }
619}
620
627static long
629{
630 if (csv && csv->f) {
631 return ftell(csv->f);
632 }
633 return EOF;
634}
635
643static int
645{
646 int i, index = 0, inq = 0, c, col;
647 char *p, *sep;
648
649 if (!csv || !csv->f) {
650 return EOF;
651 }
652 while (1) {
653 c = fgetc(csv->f);
654 if (c == EOF) {
655 if (index > 0) {
656 break;
657 }
658 return EOF;
659 }
660 if (c == '\0') {
661 continue;
662 }
663 if (c == '\r') {
664 int c2 = fgetc(csv->f);
665 c = '\n';
666
667 if (c2 == '\n') {
668 csv->isdos = 1;
669 } else if (c2 != EOF) {
670 ungetc(c2, csv->f);
671 }
672 }
673 /* check for DOS EOF (Ctrl-Z) */
674 if (csv->isdos && (c == '\032')) {
675 int c2 = fgetc(csv->f);
676
677 if (c2 == EOF) {
678 if (index > 0) {
679 break;
680 }
681 return EOF;
682 }
683 ungetc(c2, csv->f);
684 }
685 if (index >= csv->maxl - 1) {
686 int n = csv->maxl * 2;
687 char *line;
688
689 if (n <= 0) {
690 n = 4096;
691 }
692 line = sqlite3_malloc(n);
693 if (!line) {
694 return EOF;
695 }
696 if (csv->line) {
697 memcpy(line, csv->line, index);
698 sqlite3_free(csv->line);
699 }
700 csv->maxl = n;
701 csv->line = line;
702 }
703 csv->line[index++] = c;
704 if (csv->quot && (p = strchr(csv->quot, c))) {
705 if (inq) {
706 if (*p == inq) {
707 inq = 0;
708 }
709 } else {
710 inq = *p;
711 }
712 }
713 if (!inq && (c == '\n')) {
714 break;
715 }
716 }
717 if (guess) {
718 for (i = 0; i < index; i++) {
719 guess->hist[csv->line[i] & 0xFF] += 1;
720 }
721 guess->nlines += 1;
722 csv->ncols = 0;
723 return 0;
724 }
725
726 for (i = index - 1; i >= 0; i--) {
727 if (csv->line[i] != '\n') {
728 break;
729 }
730 }
731 index = i + 1;
732 csv->line[index] = '\0';
733 i = inq = col = 0;
734 sep = csv->sep ? csv->sep : ";";
735 if (!csv->cols) {
736 int n = 128;
737
738 csv->cols = sqlite3_malloc(sizeof (char *) * n);
739 if (!csv->cols) {
740 return EOF;
741 }
742 csv->maxc = n;
743 }
744 csv->cols[col++] = csv->line;
745 while (i < index) {
746 if (csv->quot && (p = strchr(csv->quot, csv->line[i]))) {
747 if (inq) {
748 if (*p == inq) {
749 inq = 0;
750 }
751 } else {
752 inq = *p;
753 }
754 }
755 if (!inq && (p = strchr(sep, csv->line[i]))) {
756 p = csv->line + i;
757 *p = '\0';
758 if (col >= csv->maxc) {
759 int n = csv->maxc * 2;
760 char **cols;
761
762 cols = sqlite3_realloc(csv->cols, sizeof (char *) * n);
763 if (!cols) {
764 return EOF;
765 }
766 csv->cols = cols;
767 csv->maxc = n;
768 }
769 csv->cols[col++] = p + 1;
770 }
771 ++i;
772 }
773 csv->ncols = col;
774
775 /* strip off quotes */
776 if (csv->quot) {
777 for (i = 0; i < col; i++) {
778 if (*csv->cols[i]) {
779 p = strchr(csv->quot, *csv->cols[i]);
780 if (p) {
781 char *src, *dst;
782
783 c = *p;
784 csv->cols[i] += 1;
785 sep = csv->cols[i] + strlen(csv->cols[i]) - 1;
786 if ((sep >= csv->cols[i]) && (*sep == c)) {
787 *sep = '\0';
788 }
789 /* collapse quote escape sequences */
790 src = csv->cols[i];
791 dst = 0;
792 while (*src) {
793 if ((*src == c) && (src[1] == c)) {
794 if (!dst) {
795 dst = src;
796 }
797 src++;
798 while (*src) {
799 *dst++ = *src++;
800 if (*src == c) {
801 --src;
802 break;
803 }
804 }
805 }
806 ++src;
807 }
808 if (dst) {
809 *dst++ = '\0';
810 }
811 }
812 }
813 }
814 }
815 return col;
816}
817
824static int
826{
827 if (csv && csv->cols) {
828 return csv->ncols;
829 }
830 return 0;
831}
832
840static char *
842{
843 if (csv && csv->cols && (n >= 0) && (n < csv->ncols)) {
844 return csv->cols[n];
845 }
846 return 0;
847}
848
855static int
857{
858 csv_guess_fmt guess;
859 int i, n;
860 char *p, sep[32], quot[4];
861 const struct {
862 int c;
863 int min;
864 } sep_test[] = {
865 { ',', 2 },
866 { ';', 2 },
867 { '\t', 2 },
868 { ' ', 4 },
869 { '|', 2 }
870 };
871
872 if (!csv) {
873 return EOF;
874 }
875 memset(&guess, 0, sizeof (guess));
876 csv->pos0 = 0;
877 csv_rewind(csv);
878 for (i = n = 0; i < 10; i++) {
879 n = csv_getline(csv, &guess);
880 if (n == EOF) {
881 break;
882 }
883 }
884 csv_rewind(csv);
885 if (n && !i) {
886 return EOF;
887 }
888 p = quot;
889 n = '"';
890 if (guess.hist[n] > 1) {
891 *p++ = n;
892 }
893 n = '\'';
894 if (guess.hist[n] > 1) {
895 *p++ = n;
896 }
897 *p = '\0';
898 p = sep;
899 for (i = 0; i < sizeof (sep_test) / sizeof (sep_test[0]); i++) {
900 if (guess.hist[sep_test[i].c] > sep_test[i].min * guess.nlines) {
901 *p++ = sep_test[i].c;
902 }
903 }
904 *p = '\0';
905 if (quot[0]) {
906 p = sqlite3_malloc(strlen(quot) + 1);
907 if (p) {
908 strcpy(p, quot);
909 if (csv->quot) {
910 sqlite3_free(csv->quot);
911 }
912 csv->quot = p;
913 } else {
914 return EOF;
915 }
916 }
917 if (sep[0]) {
918 p = sqlite3_malloc(strlen(sep) + 1);
919 if (p) {
920 strcpy(p, sep);
921 if (csv->sep) {
922 sqlite3_free(csv->sep);
923 }
924 csv->sep = p;
925 } else {
926 return EOF;
927 }
928 }
929 return 0;
930}
931
965static int
966csv_vtab_connect(sqlite3* db, void *aux, int argc, const char * const *argv,
967 sqlite3_vtab **vtabp, char **errp)
968{
969 csv_file *csv;
970 int rc = SQLITE_ERROR, i, k, ncnames, row1, *colmap = 0;
971 char **cnames, *schema = 0, **nargv;
972 csv_vtab *vtab = 0;
973
974 if (argc < 4) {
975 *errp = sqlite3_mprintf("input file name missing");
976 return SQLITE_ERROR;
977 }
978 nargv = sqlite3_malloc(sizeof (char *) * argc);
979 memset(nargv, 0, sizeof (char *) * argc);
980 for (i = 3; i < argc; i++) {
981 nargv[i] = unquote(argv[i]);
982 }
983 csv = csv_open(nargv[3], (argc > 6) ? nargv[6] : 0,
984 (argc > 7) ? nargv[7] : 0);
985 if (!csv) {
986 *errp = sqlite3_mprintf("unable to open input file");
987cleanup:
988 append_free(&schema);
989 for (i = 3; i < argc; i++) {
990 if (nargv[i]) {
991 sqlite3_free(nargv[i]);
992 }
993 }
994 if (vtab) {
995 sqlite3_free(vtab);
996 }
997 if (colmap) {
998 sqlite3_free(colmap);
999 }
1000 return rc;
1001 }
1002 if (!csv->sep && !csv->quot) {
1003 csv_guess(csv);
1004 }
1005 csv->pos0 = 0;
1006 row1 = 0;
1007 if (argc > 4) {
1008 row1 = strtol(nargv[4], 0, 10);
1009 }
1010 if (row1) {
1011 /* use column names from 1st row */
1012 csv_getline(csv, 0);
1013 if (csv->ncols < 1) {
1014 csv_close(csv);
1015 *errp = sqlite3_mprintf("unable to get column names");
1016 goto cleanup;
1017 }
1018 csv->pos0 = csv_tell(csv);
1019 csv_rewind(csv);
1020 ncnames = csv_ncols(csv);
1021 cnames = csv->cols;
1022 } else if (argc > 8) {
1023 ncnames = argc - 8;
1024 cnames = (char **) nargv + 8;
1025 } else {
1026 /* use number of columns from 1st row */
1027 csv_getline(csv, 0);
1028 if (csv->ncols < 1) {
1029 csv_close(csv);
1030 *errp = sqlite3_mprintf("unable to get column names");
1031 goto cleanup;
1032 }
1033 csv_rewind(csv);
1034 ncnames = csv_ncols(csv);
1035 cnames = 0;
1036 }
1037 colmap = sqlite3_malloc(sizeof (int) * ncnames);
1038 if (!colmap) {
1039 csv_close(csv);
1040 *errp = sqlite3_mprintf("out of memory");
1041 goto cleanup;
1042 }
1043 memset(colmap, 0, sizeof (int) * ncnames);
1044 vtab = sqlite3_malloc(sizeof (csv_vtab) + ncnames);
1045 if (!vtab) {
1046 csv_close(csv);
1047 *errp = sqlite3_mprintf("out of memory");
1048 goto cleanup;
1049 }
1050 memset(vtab, 0, sizeof (*vtab));
1051 vtab->convert = 0;
1052 if (argc > 5) {
1053 vtab->convert = strtol(nargv[5], 0, 10);
1054 if (row1 && (vtab->convert & 4)) {
1055 conv_names(cnames, ncnames);
1056 }
1057 }
1058 vtab->csv = csv;
1059 append(&schema, "CREATE TABLE x(", 0);
1060 for (i = 0; cnames && (i < ncnames); i++) {
1061 if (!cnames[i] || (cnames[i][0] == '\0')) {
1062 continue;
1063 }
1064 k = strlen(cnames[i]);
1065 if ((k > 7) && (strncasecmp("column_", cnames[i], 7) == 0)) {
1066 char c;
1067
1068 if (sscanf(cnames[i] + 7, "%d%c", &k, &c) == 1) {
1069 colmap[i] = k;
1070 }
1071 }
1072 }
1073 for (i = 0; i < ncnames; i++) {
1074 vtab->coltypes[i] = SQLITE_TEXT;
1075 if (!cnames || !cnames[i] || (cnames[i][0] == '\0')) {
1076 int want = i + 1;
1077 char colname[64];
1078
1079 while (1) {
1080 for (k = 0; k < ncnames; k++) {
1081 if ((k != i) && (colmap[k] == want)) {
1082 want++;
1083 break;
1084 }
1085 }
1086 if (k >= ncnames) {
1087 colmap[i] = want;
1088 break;
1089 }
1090 }
1091 sprintf(colname, "column_%d", colmap[i]);
1092 append(&schema, colname, '"');
1093 } else if (row1 > 0) {
1094 append(&schema, cnames[i], '"');
1095 } else if (row1 < 0) {
1096 append(&schema, cnames[i], '"');
1097 if (i + 8 < argc) {
1098 char *type = nargv[i + 8];
1099
1100 append(&schema, " ", 0);
1101 append(&schema, type, 0);
1102 vtab->coltypes[i] = maptype(type);
1103 }
1104 } else {
1105 char *type = cnames[i];
1106
1107 append(&schema, cnames[i], 0);
1108 while (*type && !strchr(" \t", *type)) {
1109 type++;
1110 }
1111 while (*type && strchr(" \t", *type)) {
1112 type++;
1113 }
1114 vtab->coltypes[i] = maptype(type);
1115 }
1116 if (i < ncnames - 1) {
1117 append(&schema, ",", 0);
1118 }
1119 }
1120 append(&schema, ")", 0);
1121 rc = sqlite3_declare_vtab(db, schema);
1122 if (rc != SQLITE_OK) {
1123 csv_close(csv);
1124 *errp = sqlite3_mprintf("table definition failed, error %d, "
1125 "schema '%s'", rc, schema);
1126 goto cleanup;
1127 }
1128 *vtabp = &vtab->vtab;
1129 *errp = 0;
1130 vtab = 0;
1131 goto cleanup;
1132}
1133
1145static int
1146csv_vtab_create(sqlite3* db, void *aux, int argc,
1147 const char *const *argv,
1148 sqlite3_vtab **vtabp, char **errp)
1149{
1150 return csv_vtab_connect(db, aux, argc, argv, vtabp, errp);
1151}
1152
1159static int
1160csv_vtab_disconnect(sqlite3_vtab *vtab)
1161{
1162 csv_vtab *tab = (csv_vtab *) vtab;
1163
1164 csv_close(tab->csv);
1165 sqlite3_free(tab);
1166 return SQLITE_OK;
1167}
1168
1175static int
1176csv_vtab_destroy(sqlite3_vtab *vtab)
1177{
1178 return csv_vtab_disconnect(vtab);
1179}
1180
1188static int
1189csv_vtab_bestindex(sqlite3_vtab *vtab, sqlite3_index_info *info)
1190{
1191 return SQLITE_OK;
1192}
1193
1201static int
1202csv_vtab_open(sqlite3_vtab *vtab, sqlite3_vtab_cursor **cursorp)
1203{
1204 csv_cursor *cur = sqlite3_malloc(sizeof (*cur));
1205 csv_vtab *tab = (csv_vtab *) vtab;
1206
1207 if (!cur) {
1208 return SQLITE_ERROR;
1209 }
1210 cur->cursor.pVtab = vtab;
1211 csv_rewind(tab->csv);
1212 cur->pos = csv_tell(tab->csv);
1213 *cursorp = &cur->cursor;
1214 return SQLITE_OK;
1215}
1216
1223static int
1224csv_vtab_close(sqlite3_vtab_cursor *cursor)
1225{
1226 sqlite3_free(cursor);
1227 return SQLITE_OK;
1228}
1229
1236static int
1237csv_vtab_next(sqlite3_vtab_cursor *cursor)
1238{
1239 csv_cursor *cur = (csv_cursor *) cursor;
1240 csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
1241
1242 cur->pos = csv_tell(tab->csv);
1243 csv_getline(tab->csv, 0);
1244 return SQLITE_OK;
1245}
1246
1257static int
1258csv_vtab_filter(sqlite3_vtab_cursor *cursor, int idxNum,
1259 const char *idxStr, int argc, sqlite3_value **argv)
1260{
1261 csv_cursor *cur = (csv_cursor *) cursor;
1262 csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
1263
1264 csv_rewind(tab->csv);
1265 return csv_vtab_next(cursor);
1266}
1267
1274static int
1275csv_vtab_eof(sqlite3_vtab_cursor *cursor)
1276{
1277 csv_cursor *cur = (csv_cursor *) cursor;
1278 csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
1279
1280 return csv_eof(tab->csv);
1281}
1282
1291static int
1292csv_vtab_column(sqlite3_vtab_cursor *cursor, sqlite3_context *ctx, int n)
1293{
1294 csv_cursor *cur = (csv_cursor *) cursor;
1295 csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
1296 char *data = csv_coldata(tab->csv, n);
1297
1298 return process_col(ctx, 0, 0, data, tab->coltypes[n], tab->convert);
1299}
1300
1308static int
1309csv_vtab_rowid(sqlite3_vtab_cursor *cursor, sqlite_int64 *rowidp)
1310{
1311 csv_cursor *cur = (csv_cursor *) cursor;
1312
1313 *rowidp = cur->pos;
1314 return SQLITE_OK;
1315}
1316
1317#if (SQLITE_VERSION_NUMBER > 3004000)
1318
1325static int
1326csv_vtab_rename(sqlite3_vtab *vtab, const char *newname)
1327{
1328 return SQLITE_OK;
1329}
1330
1331#endif
1332
1337static const sqlite3_module csv_vtab_mod = {
1338 1, /* iVersion */
1339 csv_vtab_create, /* xCreate */
1340 csv_vtab_connect, /* xConnect */
1341 csv_vtab_bestindex, /* xBestIndex */
1342 csv_vtab_disconnect, /* xDisconnect */
1343 csv_vtab_destroy, /* xDestroy */
1344 csv_vtab_open, /* xOpen */
1345 csv_vtab_close, /* xClose */
1346 csv_vtab_filter, /* xFilter */
1347 csv_vtab_next, /* xNext */
1348 csv_vtab_eof, /* xEof */
1349 csv_vtab_column, /* xColumn */
1350 csv_vtab_rowid, /* xRowid */
1351 0, /* xUpdate */
1352 0, /* xBegin */
1353 0, /* xSync */
1354 0, /* xCommit */
1355 0, /* xRollback */
1356 0, /* xFindFunction */
1357#if (SQLITE_VERSION_NUMBER > 3004000)
1358 csv_vtab_rename, /* xRename */
1359#endif
1360};
1361
1389static void
1390csv_import_func(sqlite3_context *ctx, int argc, sqlite3_value **argv)
1391{
1392 csv_file *csv;
1393 int rc, i, k, ncnames, row1, convert = 0, useargs = 0, *colmap = 0;
1394 char *tname, *fname, *sql = 0, **cnames, *coltypes = 0;
1395 sqlite3 *db = (sqlite3 *) sqlite3_user_data(ctx);
1396 sqlite3_stmt *stmt = 0;
1397
1398 if (argc < 2) {
1399 sqlite3_result_error(ctx, "need at least 2 arguments", -1);
1400 return;
1401 }
1402 tname = (char *) sqlite3_value_text(argv[0]);
1403 if (!tname) {
1404 sqlite3_result_error(ctx, "table name is NULL", -1);
1405 return;
1406 }
1407 fname = (char *) sqlite3_value_text(argv[1]);
1408 if (!fname) {
1409 sqlite3_result_error(ctx, "file name is NULL", -1);
1410 return;
1411 }
1412 csv = csv_open(fname,
1413 (argc > 4) ? (char *) sqlite3_value_text(argv[4]) : 0,
1414 (argc > 5) ? (char *) sqlite3_value_text(argv[5]) : 0);
1415 if (!csv) {
1416 sqlite3_result_error(ctx, "unable to open input file", -1);
1417cleanup:
1418 if (stmt) {
1419 sqlite3_finalize(stmt);
1420 }
1421 append_free(&sql);
1422 if (colmap) {
1423 sqlite3_free(colmap);
1424 }
1425 if (csv) {
1426 csv_close(csv);
1427 }
1428 return;
1429 }
1430 if (!csv->sep && !csv->quot) {
1431 csv_guess(csv);
1432 }
1433 csv->pos0 = 0;
1434 row1 = 0;
1435 if (argc > 2) {
1436 row1 = sqlite3_value_int(argv[2]);
1437 }
1438 if (row1) {
1439 /* use column names from 1st row */
1440 csv_getline(csv, 0);
1441 if (csv->ncols < 1) {
1442 sqlite3_result_error(ctx, "unable to get column names", -1);
1443 goto cleanup;
1444 }
1445 csv->pos0 = csv_tell(csv);
1446 csv_rewind(csv);
1447 ncnames = csv_ncols(csv);
1448 cnames = csv->cols;
1449 } else if (argc > 6) {
1450 ncnames = argc - 6;
1451 cnames = 0;
1452 useargs = 1;
1453 } else {
1454 /* use number of columns from 1st row */
1455 csv_getline(csv, 0);
1456 if (csv->ncols < 1) {
1457 sqlite3_result_error(ctx, "unable to get column names", -1);
1458 goto cleanup;
1459 }
1460 csv_rewind(csv);
1461 ncnames = csv_ncols(csv);
1462 cnames = 0;
1463 }
1464 convert = 0;
1465 if (argc > 3) {
1466 convert = sqlite3_value_int(argv[3]);
1467 if (row1 && (convert & 4)) {
1468 conv_names(cnames, ncnames);
1469 }
1470 }
1471 /* test if table exists */
1472 append(&sql, "PRAGMA table_info(", 0);
1473 append(&sql, tname, '"');
1474 append(&sql, ")", 0);
1475 if (!sql) {
1476oom:
1477 sqlite3_result_error(ctx, "out of memory", -1);
1478 goto cleanup;
1479 }
1480 rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
1481 append_free(&sql);
1482 if (rc != SQLITE_OK) {
1483prepfail:
1484 sqlite3_result_error(ctx, "prepare failed", -1);
1485 goto cleanup;
1486 }
1487 /* find number of colums */
1488 i = 0;
1489 rc = sqlite3_step(stmt);
1490 while (rc == SQLITE_ROW) {
1491 i++;
1492 rc = sqlite3_step(stmt);
1493 }
1494 if (rc != SQLITE_DONE) {
1495selfail:
1496 sqlite3_result_error(ctx, "select failed", -1);
1497 goto cleanup;
1498 }
1499 if (i > 0) {
1500 /* get column types */
1501 sqlite3_reset(stmt);
1502 ncnames = i;
1503 coltypes = sqlite3_malloc(ncnames);
1504 if (!coltypes) {
1505 goto oom;
1506 }
1507 rc = sqlite3_step(stmt);
1508 i = 0;
1509 while (rc == SQLITE_ROW) {
1510 coltypes[i++] = maptype((char *) sqlite3_column_text(stmt, 2));
1511 rc = sqlite3_step(stmt);
1512 }
1513 if (rc != SQLITE_DONE) {
1514 goto selfail;
1515 }
1516 } else {
1517 /* create new table */
1518 sqlite3_finalize(stmt);
1519 stmt = 0;
1520 colmap = (int *) sqlite3_malloc(ncnames + sizeof (int) * ncnames);
1521 if (!colmap) {
1522 goto oom;
1523 }
1524 memset(colmap, 0, sizeof (int) * ncnames);
1525 coltypes = (char *) (colmap + ncnames);
1526 append(&sql, "CREATE TABLE ", 0);
1527 append(&sql, tname, '"');
1528 append(&sql, "(", 0);
1529 for (i = 0; cnames && (i < ncnames); i++) {
1530 if (!cnames[i] || (cnames[i][0] == '\0')) {
1531 continue;
1532 }
1533 k = strlen(cnames[i]);
1534 if ((k > 7) && (strncasecmp("column_", cnames[i], 7) == 0)) {
1535 char c;
1536
1537 if (sscanf(cnames[i] + 7, "%d%c", &k, &c) == 1) {
1538 colmap[i] = k;
1539 }
1540 }
1541 }
1542 for (i = 0; i < ncnames; i++) {
1543 int want = i + 1;
1544 char colname[64];
1545
1546 coltypes[i] = SQLITE_TEXT;
1547 if (useargs) {
1548 char *type = (char *) sqlite3_value_text(argv[i + 6]);
1549
1550 if (!type) {
1551 goto defcol;
1552 }
1553 append(&sql, type, 0);
1554 while (*type && !strchr(" \t", *type)) {
1555 type++;
1556 }
1557 while (*type && strchr(" \t", *type)) {
1558 type++;
1559 }
1560 coltypes[i] = maptype(type);
1561 } else if (!cnames || !cnames[i]) {
1562defcol:
1563 while (1) {
1564 for (k = 0; k < ncnames; k++) {
1565 if ((k != i) && (colmap[k] == want)) {
1566 want++;
1567 break;
1568 }
1569 }
1570 if (k >= ncnames) {
1571 colmap[i] = want;
1572 break;
1573 }
1574 }
1575 sprintf(colname, "column_%d", colmap[i]);
1576 append(&sql, colname, '"');
1577 } else if (row1 > 0) {
1578 append(&sql, cnames[i], '"');
1579 } else if (row1 < 0) {
1580 append(&sql, cnames[i], '"');
1581 if (i + 6 < argc) {
1582 char *type = (char *) sqlite3_value_text(argv[i + 6]);
1583
1584 if (type) {
1585 append(&sql, " ", 0);
1586 append(&sql, type, 0);
1587 coltypes[i] = maptype(type);
1588 }
1589 }
1590 }
1591 if (i < ncnames - 1) {
1592 append(&sql, ",", 0);
1593 }
1594 }
1595 append(&sql, ")", 0);
1596 rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
1597 if (rc != SQLITE_OK) {
1598 goto prepfail;
1599 }
1600 rc = sqlite3_step(stmt);
1601 if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
1602 sqlite3_result_error(ctx, "create table failed", -1);
1603 goto cleanup;
1604 }
1605 append_free(&sql);
1606 }
1607 sqlite3_finalize(stmt);
1608 stmt = 0;
1609 /* make INSERT statement */
1610 append(&sql, "INSERT INTO ", 0);
1611 append(&sql, tname, '"');
1612 append(&sql, " VALUES(", 0);
1613 for (i = 0; i < ncnames; i++) {
1614 append(&sql, (i < ncnames - 1) ? "?," : "?)", 0);
1615 }
1616 rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
1617 if (rc != SQLITE_OK) {
1618 goto prepfail;
1619 }
1620 append_free(&sql);
1621 /* import the CSV file */
1622 row1 = 0;
1623 while (csv_getline(csv, 0) != EOF) {
1624 for (i = 0; i < ncnames; i++) {
1625 char *data = csv_coldata(csv, i);
1626
1627 rc = process_col(0, stmt, i + 1, data, coltypes[i], convert);
1628 if (rc != SQLITE_OK) {
1629 goto inserr;
1630 }
1631 }
1632 rc = sqlite3_step(stmt);
1633 if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
1634 if ((rc != SQLITE_MISMATCH) && (rc != SQLITE_CONSTRAINT)) {
1635inserr:
1636 sqlite3_result_error(ctx, "insert failed", -1);
1637 goto cleanup;
1638 }
1639 } else {
1640 row1++;
1641 }
1642 sqlite3_reset(stmt);
1643 }
1644 sqlite3_result_int(ctx, row1);
1645 goto cleanup;
1646}
1647
1654#ifndef STANDALONE
1655static
1656#endif
1657int
1658csv_vtab_init(sqlite3 *db)
1659{
1660 sqlite3_create_function(db, "import_csv", -1, SQLITE_UTF8,
1661 (void *) db, csv_import_func, 0, 0);
1662 return sqlite3_create_module(db, "csvtable", &csv_vtab_mod, 0);
1663}
1664
1665#ifndef STANDALONE
1666
1675int
1676sqlite3_extension_init(sqlite3 *db, char **errmsg,
1677 const sqlite3_api_routines *api)
1678{
1679 SQLITE_EXTENSION_INIT2(api);
1680 return csv_vtab_init(db);
1681}
1682
1683#endif
static int csv_vtab_rowid(sqlite3_vtab_cursor *cursor, sqlite_int64 *rowidp)
Return current rowid of virtual table cursor.
Definition csvtable.c:1309
static char * csv_coldata(csv_file *csv, int n)
Return nth column of current row in CSV file.
Definition csvtable.c:841
struct csv_guess_fmt csv_guess_fmt
static int csv_vtab_init(sqlite3 *db)
Module initializer creating SQLite functions and modules.
Definition csvtable.c:1658
struct csv_file csv_file
static void csv_close(csv_file *csv)
Close CSV file handle.
Definition csvtable.c:555
static int csv_guess(csv_file *csv)
Guess CSV layout of CSV file handle.
Definition csvtable.c:856
static int csv_ncols(csv_file *csv)
Return number of columns of current row in CSV file.
Definition csvtable.c:825
static void conv_names(char **names, int ncols)
Convert and collapse white space in column names to underscore.
Definition csvtable.c:246
static char * append(char **in, char const *append, char quote)
Append a string to dynamically allocated string buffer with optional quoting.
Definition csvtable.c:118
struct csv_vtab csv_vtab
static const sqlite3_module csv_vtab_mod
SQLite module descriptor.
Definition csvtable.c:1337
static int csv_getline(csv_file *csv, csv_guess_fmt *guess)
Read and process one line of CSV file handle.
Definition csvtable.c:644
static int csv_vtab_disconnect(sqlite3_vtab *vtab)
Disconnect virtual table.
Definition csvtable.c:1160
static void append_free(char **in)
Free dynamically allocated string buffer.
Definition csvtable.c:97
static int csv_vtab_close(sqlite3_vtab_cursor *cursor)
Close virtual table cursor.
Definition csvtable.c:1224
static void result_or_bind(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx, char *data, int len, int type)
Make result data or parameter binding accoring to type.
Definition csvtable.c:286
int sqlite3_extension_init(sqlite3 *db, char **errmsg, const sqlite3_api_routines *api)
Initializer for SQLite extension load mechanism.
Definition csvtable.c:1676
static int csv_vtab_destroy(sqlite3_vtab *vtab)
Destroy virtual table.
Definition csvtable.c:1176
static int csv_vtab_connect(sqlite3 *db, void *aux, int argc, const char *const *argv, sqlite3_vtab **vtabp, char **errp)
Connect to virtual table.
Definition csvtable.c:966
static int csv_eof(csv_file *csv)
Test EOF on CSV file handle.
Definition csvtable.c:584
static csv_file * csv_open(const char *filename, const char *sep, const char *quot)
Open CSV file for reading and return handle to it.
Definition csvtable.c:496
static char * unquote(char const *in)
Strip off quotes given string.
Definition csvtable.c:188
static void csv_import_func(sqlite3_context *ctx, int argc, sqlite3_value **argv)
Import CSV file as table into database.
Definition csvtable.c:1390
static int csv_vtab_open(sqlite3_vtab *vtab, sqlite3_vtab_cursor **cursorp)
Open virtual table and return cursor.
Definition csvtable.c:1202
static int csv_vtab_next(sqlite3_vtab_cursor *cursor)
Retrieve next row from virtual table cursor.
Definition csvtable.c:1237
static int csv_vtab_eof(sqlite3_vtab_cursor *cursor)
Return end of table state of virtual table cursor.
Definition csvtable.c:1275
static int maptype(char const *type)
Map string to SQLite data type.
Definition csvtable.c:216
static long csv_tell(csv_file *csv)
Return current position of CSV file handle.
Definition csvtable.c:628
static long csv_seek(csv_file *csv, long pos)
Position CSV file handle.
Definition csvtable.c:600
static int csv_vtab_column(sqlite3_vtab_cursor *cursor, sqlite3_context *ctx, int n)
Return column data of virtual table.
Definition csvtable.c:1292
static int csv_vtab_filter(sqlite3_vtab_cursor *cursor, int idxNum, const char *idxStr, int argc, sqlite3_value **argv)
Filter function for virtual table.
Definition csvtable.c:1258
static void csv_rewind(csv_file *csv)
Rewind CSV file handle.
Definition csvtable.c:614
static int csv_vtab_create(sqlite3 *db, void *aux, int argc, const char *const *argv, sqlite3_vtab **vtabp, char **errp)
Create virtual table.
Definition csvtable.c:1146
static int process_col(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx, char *data, int type, int conv)
Process one column of the current row.
Definition csvtable.c:360
static int csv_vtab_bestindex(sqlite3_vtab *vtab, sqlite3_index_info *info)
Determines information for filter function according to constraints.
Definition csvtable.c:1189
#define min(a, b)
Structure to describe CSV virtual table cursor.
Definition csvtable.c:86
sqlite3_vtab_cursor cursor
SQLite virtual table cursor.
Definition csvtable.c:87
long pos
CSV file position.
Definition csvtable.c:88
Structure to implement CSV file handle.
Definition csvtable.c:43
int isdos
true, when DOS format detected
Definition csvtable.c:47
char * line
line buffer
Definition csvtable.c:49
int ncols
number of columns
Definition csvtable.c:52
char * sep
column separator characters
Definition csvtable.c:45
int maxc
max.
Definition csvtable.c:51
char * quot
text quoting characters
Definition csvtable.c:46
long pos0
file position for rewind
Definition csvtable.c:50
int maxl
max.
Definition csvtable.c:48
char ** cols
column buffer
Definition csvtable.c:53
FILE * f
CSV file.
Definition csvtable.c:44
Info to guess CSV layout.
Definition csvtable.c:62
int hist[256]
Definition csvtable.c:64
Structure to describe a CSV virtual table.
Definition csvtable.c:73
int convert
convert flags
Definition csvtable.c:76
sqlite3_vtab vtab
SQLite virtual table.
Definition csvtable.c:74
csv_file * csv
CSV file handle.
Definition csvtable.c:75
char coltypes[1]
column types
Definition csvtable.c:77
Driver internal structure representing SQL statement (HSTMT).

Generated on Sun Nov 26 2023 by doxygen.
Contact: chw@ch-werner.de