GnuCash 2.4.99
gnc-csv-model.c
00001 #include "gnc-csv-model.h"
00002 
00003 
00004 #include <glib/gi18n.h>
00005 
00006 #include <goffice/goffice-features.h>
00007 #if (GO_VERSION_EPOCH == 0) && (GO_VERSION_MAJOR == 7) && (GO_VERSION_MINOR == 8)
00008 /* For libgoffice-0.7.8, disable its internal inclusion of <regutf8.h>
00009    so to avoid clashing symbol definitions with <regex.h> */
00010 # define GO_REGUTF8_H
00011 #endif
00012 #include <goffice/utils/go-glib-extras.h>
00013 
00014 #include "gnc-ui-util.h"
00015 
00016 #include <string.h>
00017 #include <sys/time.h>
00018 
00019 #include <sys/types.h>
00020 #include <sys/stat.h>
00021 #include <regex.h>
00022 #include <unistd.h>
00023 #include <fcntl.h>
00024 #include <stdlib.h>
00025 #include <math.h>
00026 #include <time.h>
00027 #ifndef HAVE_LOCALTIME_R
00028 #include "localtime_r.h"
00029 #endif
00030 
00031 static QofLogModule log_module = GNC_MOD_IMPORT;
00032 
00033 const int num_date_formats = 5;
00034 const gchar* date_format_user[] = {N_("y-m-d"),
00035                                    N_("d-m-y"),
00036                                    N_("m-d-y"),
00037                                    N_("d-m"),
00038                                    N_("m-d")
00039                                   };
00040 
00041 const int num_currency_formats = 3;
00042 const gchar* currency_format_user[] = {N_("Locale"),
00043                                        N_("Period: 123,456.78"),
00044                                        N_("Comma: 123.456,78")
00045                                       };
00046 
00047 /* This array contains all of the different strings for different column types. */
00048 gchar* gnc_csv_column_type_strs[GNC_CSV_NUM_COL_TYPES] = {N_("None"),
00049         N_("Date"),
00050         N_("Description"),
00051         N_("Account"),
00052         N_("Balance"),
00053         N_("Deposit"),
00054         N_("Withdrawal"),
00055         N_("Num")
00056                                                          };
00057 
00061 static StfParseOptions_t* default_parse_options(void)
00062 {
00063     StfParseOptions_t* options = stf_parse_options_new();
00064     stf_parse_options_set_type(options, PARSE_TYPE_CSV);
00065     stf_parse_options_csv_set_separators(options, ",", NULL);
00066     return options;
00067 }
00068 
00076 static time_t parse_date_with_year(const char* date_str, int format)
00077 {
00078     time_t rawtime; /* The integer time */
00079     struct tm retvalue, test_retvalue; /* The time in a broken-down structure */
00080 
00081     int i, j, mem_length, orig_year = -1, orig_month = -1, orig_day = -1;
00082 
00083     /* Buffer for containing individual parts (e.g. year, month, day) of a date */
00084     char date_segment[5];
00085 
00086     /* The compiled regular expression */
00087     regex_t preg = {0};
00088 
00089     /* An array containing indices specifying the matched substrings in date_str */
00090     regmatch_t pmatch[4] = { {0}, {0}, {0}, {0} };
00091 
00092     /* The regular expression for parsing dates */
00093     const char* regex = "^ *([0-9]+) *[-/.'] *([0-9]+) *[-/.'] *([0-9]+).*$|^ *([0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]).*$";
00094 
00095     /* We get our matches using the regular expression. */
00096     regcomp(&preg, regex, REG_EXTENDED);
00097     regexec(&preg, date_str, 4, pmatch, 0);
00098     regfree(&preg);
00099 
00100     /* If there wasn't a match, there was an error. */
00101     if (pmatch[0].rm_eo == 0)
00102         return -1;
00103 
00104     /* If this is a string without separators ... */
00105     if (pmatch[1].rm_so == -1)
00106     {
00107         /* ... we will fill in the indices based on the user's selection. */
00108         int k = 0; /* k traverses date_str by keeping track of where separators "should" be. */
00109         j = 1; /* j traverses pmatch. */
00110         for (i = 0; date_format_user[format][i]; i++)
00111         {
00112             char segment_type = date_format_user[format][i];
00113             /* Only do something if this is a meaningful character */
00114             if (segment_type == 'y' || segment_type == 'm' || segment_type == 'd')
00115             {
00116                 pmatch[j].rm_so = k;
00117                 switch (segment_type)
00118                 {
00119                 case 'm':
00120                 case 'd':
00121                     k += 2;
00122                     break;
00123 
00124                 case 'y':
00125                     k += 4;
00126                     break;
00127                 }
00128 
00129                 pmatch[j].rm_eo = k;
00130                 j++;
00131             }
00132         }
00133     }
00134 
00135     /* Put some sane values in retvalue by using the current time for
00136      * the non-year-month-day parts of the date. */
00137     time(&rawtime);
00138     localtime_r(&rawtime, &retvalue);
00139 
00140     /* j traverses pmatch (index 0 contains the entire string, so we
00141      * start at index 1 for the first meaningful match). */
00142     j = 1;
00143     /* Go through the date format and interpret the matches in order of
00144      * the sections in the date format. */
00145     for (i = 0; date_format_user[format][i]; i++)
00146     {
00147         char segment_type = date_format_user[format][i];
00148         /* Only do something if this is a meaningful character */
00149         if (segment_type == 'y' || segment_type == 'm' || segment_type == 'd')
00150         {
00151             /* Copy the matching substring into date_segment so that we can
00152              * convert it into an integer. */
00153             mem_length = pmatch[j].rm_eo - pmatch[j].rm_so;
00154             memcpy(date_segment, date_str + pmatch[j].rm_so, mem_length);
00155             date_segment[mem_length] = '\0';
00156 
00157             /* Set the appropriate member of retvalue. Save the original
00158              * values so that we can check if the change when we use mktime
00159              * below. */
00160             switch (segment_type)
00161             {
00162             case 'y':
00163                 retvalue.tm_year = atoi(date_segment);
00164 
00165                 /* Handle two-digit years. */
00166                 if (retvalue.tm_year < 100)
00167                 {
00168                     /* We allow two-digit years in the range 1969 - 2068. */
00169                     if (retvalue.tm_year < 69)
00170                         retvalue.tm_year += 100;
00171                 }
00172                 else
00173                     retvalue.tm_year -= 1900;
00174                 orig_year = retvalue.tm_year;
00175                 break;
00176 
00177             case 'm':
00178                 orig_month = retvalue.tm_mon = atoi(date_segment) - 1;
00179                 break;
00180 
00181             case 'd':
00182                 orig_day = retvalue.tm_mday = atoi(date_segment);
00183                 break;
00184             }
00185             j++;
00186         }
00187     }
00188     /* Convert back to an integer. If mktime leaves retvalue unchanged,
00189      * everything is okay; otherwise, an error has occurred. */
00190     /* We have to use a "test" date value to account for changes in
00191      * daylight savings time, which can cause a date change with mktime
00192      * near midnight, causing the code to incorrectly think a date is
00193      * incorrect. */
00194     test_retvalue = retvalue;
00195     mktime(&test_retvalue);
00196     retvalue.tm_isdst = test_retvalue.tm_isdst;
00197     rawtime = mktime(&retvalue);
00198     if (retvalue.tm_mday == orig_day &&
00199     retvalue.tm_mon == orig_month &&
00200     retvalue.tm_year == orig_year)
00201     {
00202         return rawtime;
00203     }
00204     else
00205     {
00206         return -1;
00207     }
00208 }
00209 
00217 static time_t parse_date_without_year(const char* date_str, int format)
00218 {
00219     time_t rawtime; /* The integer time */
00220     struct tm retvalue, test_retvalue; /* The time in a broken-down structure */
00221 
00222     int i, j, mem_length, orig_year = -1, orig_month = -1, orig_day = -1;
00223 
00224     /* Buffer for containing individual parts (e.g. year, month, day) of a date */
00225     gchar* date_segment;
00226 
00227     /* The compiled regular expression */
00228     regex_t preg = {0};
00229 
00230     /* An array containing indices specifying the matched substrings in date_str */
00231     regmatch_t pmatch[3] = { {0}, {0}, {0} };
00232 
00233     /* The regular expression for parsing dates */
00234     const char* regex = "^ *([0-9]+) *[-/.'] *([0-9]+).*$";
00235 
00236     /* We get our matches using the regular expression. */
00237     regcomp(&preg, regex, REG_EXTENDED);
00238     regexec(&preg, date_str, 3, pmatch, 0);
00239     regfree(&preg);
00240 
00241     /* If there wasn't a match, there was an error. */
00242     if (pmatch[0].rm_eo == 0)
00243         return -1;
00244 
00245     /* Put some sane values in retvalue by using the current time for
00246      * the non-year-month-day parts of the date. */
00247     time(&rawtime);
00248     localtime_r(&rawtime, &retvalue);
00249     orig_year = retvalue.tm_year;
00250 
00251     /* j traverses pmatch (index 0 contains the entire string, so we
00252      * start at index 1 for the first meaningful match). */
00253     j = 1;
00254     /* Go through the date format and interpret the matches in order of
00255      * the sections in the date format. */
00256     for (i = 0; date_format_user[format][i]; i++)
00257     {
00258         char segment_type = date_format_user[format][i];
00259         /* Only do something if this is a meaningful character */
00260         if (segment_type == 'm' || segment_type == 'd')
00261         {
00262             /* Copy the matching substring into date_segment so that we can
00263              * convert it into an integer. */
00264             mem_length = pmatch[j].rm_eo - pmatch[j].rm_so;
00265             date_segment = g_new(gchar, mem_length);
00266             memcpy(date_segment, date_str + pmatch[j].rm_so, mem_length);
00267             date_segment[mem_length] = '\0';
00268 
00269             /* Set the appropriate member of retvalue. Save the original
00270              * values so that we can check if the change when we use mktime
00271              * below. */
00272             switch (segment_type)
00273             {
00274             case 'm':
00275                 orig_month = retvalue.tm_mon = atoi(date_segment) - 1;
00276                 break;
00277 
00278             case 'd':
00279                 orig_day = retvalue.tm_mday = atoi(date_segment);
00280                 break;
00281             }
00282             g_free(date_segment);
00283             j++;
00284         }
00285     }
00286     /* Convert back to an integer. If mktime leaves retvalue unchanged,
00287      * everything is okay; otherwise, an error has occurred. */
00288     /* We have to use a "test" date value to account for changes in
00289      * daylight savings time, which can cause a date change with mktime
00290      * near midnight, causing the code to incorrectly think a date is
00291      * incorrect. */
00292     test_retvalue = retvalue;
00293     mktime(&test_retvalue);
00294     retvalue.tm_isdst = test_retvalue.tm_isdst;
00295     rawtime = mktime(&retvalue);
00296     if (retvalue.tm_mday == orig_day &&
00297     retvalue.tm_mon == orig_month &&
00298     retvalue.tm_year == orig_year)
00299     {
00300         return rawtime;
00301     }
00302     else
00303     {
00304         return -1;
00305     }
00306 }
00307 
00316 time_t parse_date(const char* date_str, int format)
00317 {
00318     if (strchr(date_format_user[format], 'y'))
00319         return parse_date_with_year(date_str, format);
00320     else
00321         return parse_date_without_year(date_str, format);
00322 }
00323 
00327 GncCsvParseData* gnc_csv_new_parse_data(void)
00328 {
00329     GncCsvParseData* parse_data = g_new(GncCsvParseData, 1);
00330     parse_data->encoding = "UTF-8";
00331     /* All of the data pointers are initially NULL. This is so that, if
00332      * gnc_csv_parse_data_free is called before all of the data is
00333      * initialized, only the data that needs to be freed is freed. */
00334     parse_data->raw_str.begin = parse_data->raw_str.end
00335     = parse_data->file_str.begin = parse_data->file_str.end = NULL;
00336     parse_data->orig_lines = NULL;
00337     parse_data->orig_row_lengths = NULL;
00338     parse_data->column_types = NULL;
00339     parse_data->error_lines = parse_data->transactions = NULL;
00340     parse_data->options = default_parse_options();
00341     parse_data->date_format = -1;
00342     parse_data->currency_format = 0;
00343     parse_data->chunk = g_string_chunk_new(100 * 1024);
00344     parse_data->start_row = 0;
00345     parse_data->end_row = 1000;
00346     return parse_data;
00347 }
00348 
00352 void gnc_csv_parse_data_free(GncCsvParseData* parse_data)
00353 {
00354     /* All non-NULL pointers have been initialized and must be freed. */
00355 
00356     if (parse_data->raw_mapping != NULL)
00357     {
00358         g_mapped_file_unref(parse_data->raw_mapping);
00359     }
00360 
00361     if (parse_data->file_str.begin != NULL)
00362         g_free(parse_data->file_str.begin);
00363 
00364     if (parse_data->orig_lines != NULL)
00365         stf_parse_general_free(parse_data->orig_lines);
00366 
00367     if (parse_data->orig_row_lengths != NULL)
00368         g_array_free(parse_data->orig_row_lengths, FALSE);
00369 
00370     if (parse_data->options != NULL)
00371         stf_parse_options_free(parse_data->options);
00372 
00373     if (parse_data->column_types != NULL)
00374         g_array_free(parse_data->column_types, TRUE);
00375 
00376     if (parse_data->error_lines != NULL)
00377         g_list_free(parse_data->error_lines);
00378 
00379     if (parse_data->transactions != NULL)
00380     {
00381         GList* transactions = parse_data->transactions;
00382         /* We have to free the GncCsvTransLine's that are at each node in
00383          * the list before freeing the entire list. */
00384         do
00385         {
00386             g_free(transactions->data);
00387             transactions = g_list_next(transactions);
00388         }
00389         while (transactions != NULL);
00390         g_list_free(parse_data->transactions);
00391     }
00392 
00393     g_free(parse_data->chunk);
00394     g_free(parse_data);
00395 }
00396 
00405 int gnc_csv_convert_encoding(GncCsvParseData* parse_data, const char* encoding,
00406 GError** error)
00407 {
00408     gsize bytes_read, bytes_written;
00409 
00410     /* If parse_data->file_str has already been initialized it must be
00411      * freed first. (This should always be the case, since
00412      * gnc_csv_load_file should always be called before this
00413      * function.) */
00414     if (parse_data->file_str.begin != NULL)
00415         g_free(parse_data->file_str.begin);
00416 
00417     /* Do the actual translation to UTF-8. */
00418     parse_data->file_str.begin = g_convert(parse_data->raw_str.begin,
00419     parse_data->raw_str.end - parse_data->raw_str.begin,
00420     "UTF-8", encoding, &bytes_read, &bytes_written,
00421     error);
00422     /* Handle errors that occur. */
00423     if (parse_data->file_str.begin == NULL)
00424         return 1;
00425 
00426     /* On success, save the ending pointer of the translated data and
00427      * the encoding type and return 0. */
00428     parse_data->file_str.end = parse_data->file_str.begin + bytes_written;
00429     parse_data->encoding = (gchar*)encoding;
00430     return 0;
00431 }
00432 
00445 int gnc_csv_load_file(GncCsvParseData* parse_data, const char* filename,
00446 GError** error)
00447 {
00448     const char* guess_enc = NULL;
00449 
00450     /* Get the raw data first and handle an error if one occurs. */
00451     parse_data->raw_mapping = g_mapped_file_new(filename, FALSE, error);
00452     if (parse_data->raw_mapping == NULL)
00453     {
00454         /* TODO Handle file opening errors more specifically,
00455          * e.g. inexistent file versus no read permission. */
00456         parse_data->raw_str.begin = NULL;
00457         g_clear_error (error);
00458         g_set_error(error, 0, GNC_CSV_FILE_OPEN_ERR, "%s", _("File opening failed."));
00459         return 1;
00460     }
00461 
00462     /* Copy the mapping's contents into parse-data->raw_str. */
00463     parse_data->raw_str.begin = g_mapped_file_get_contents(parse_data->raw_mapping);
00464     parse_data->raw_str.end = parse_data->raw_str.begin + g_mapped_file_get_length(parse_data->raw_mapping);
00465 
00466     /* Make a guess at the encoding of the data. */
00467     if (!g_mapped_file_get_length(parse_data->raw_mapping) == 0)
00468         guess_enc = go_guess_encoding((const char*)(parse_data->raw_str.begin),
00469         (size_t)(parse_data->raw_str.end - parse_data->raw_str.begin),
00470         "UTF-8", NULL);
00471     if (guess_enc == NULL)
00472     {
00473         g_set_error(error, 0, GNC_CSV_ENCODING_ERR, "%s", _("Unknown encoding."));
00474         return 1;
00475     }
00476     /* Convert using the guessed encoding into parse_data->file_str and
00477      * handle any errors that occur. */
00478     gnc_csv_convert_encoding(parse_data, guess_enc, error);
00479     if (parse_data->file_str.begin == NULL)
00480     {
00481         g_set_error(error, 0, GNC_CSV_ENCODING_ERR, "%s", _("Unknown encoding."));
00482         return 1;
00483     }
00484     else
00485         return 0;
00486 }
00487 
00500 int gnc_csv_parse(GncCsvParseData* parse_data, gboolean guessColTypes, GError** error)
00501 {
00502     /* max_cols is the number of columns in the row with the most columns. */
00503     int i, max_cols = 0;
00504 
00505     if (parse_data->orig_lines != NULL)
00506     {
00507         stf_parse_general_free(parse_data->orig_lines);
00508     }
00509 
00510     /* If everything is fine ... */
00511     if (parse_data->file_str.begin != NULL)
00512     {
00513         /* Do the actual parsing. */
00514         parse_data->orig_lines = stf_parse_general(parse_data->options, parse_data->chunk,
00515         parse_data->file_str.begin,
00516         parse_data->file_str.end);
00517     }
00518     /* If we couldn't get the encoding right, we just want an empty array. */
00519     else
00520     {
00521         parse_data->orig_lines = g_ptr_array_new();
00522     }
00523 
00524     /* Record the original row lengths of parse_data->orig_lines. */
00525     if (parse_data->orig_row_lengths != NULL)
00526         g_array_free(parse_data->orig_row_lengths, FALSE);
00527 
00528     parse_data->orig_row_lengths =
00529     g_array_sized_new(FALSE, FALSE, sizeof(int), parse_data->orig_lines->len);
00530 
00531     g_array_set_size(parse_data->orig_row_lengths, parse_data->orig_lines->len);
00532     parse_data->orig_max_row = 0;
00533     for (i = 0; i < parse_data->orig_lines->len; i++)
00534     {
00535         int length = ((GPtrArray*)parse_data->orig_lines->pdata[i])->len;
00536         parse_data->orig_row_lengths->data[i] = length;
00537         if (length > parse_data->orig_max_row)
00538             parse_data->orig_max_row = length;
00539     }
00540 
00541     /* If it failed, generate an error. */
00542     if (parse_data->orig_lines == NULL)
00543     {
00544         g_set_error(error, 0, 0, "Parsing failed.");
00545         return 1;
00546     }
00547 
00548     /* Now that we have data, let's set max_cols. */
00549     for (i = 0; i < parse_data->orig_lines->len; i++)
00550     {
00551         if (max_cols < ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len)
00552             max_cols = ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len;
00553     }
00554 
00555     if (guessColTypes)
00556     {
00557         /* Free parse_data->column_types if it's already been created. */
00558         if (parse_data->column_types != NULL)
00559             g_array_free(parse_data->column_types, TRUE);
00560 
00561         /* Create parse_data->column_types and fill it with guesses based
00562          * on the contents of each column. */
00563         parse_data->column_types = g_array_sized_new(FALSE, FALSE, sizeof(int),
00564         max_cols);
00565         g_array_set_size(parse_data->column_types, max_cols);
00566         /* TODO Make it actually guess. */
00567         for (i = 0; i < parse_data->column_types->len; i++)
00568         {
00569             parse_data->column_types->data[i] = GNC_CSV_NONE;
00570         }
00571     }
00572     else
00573     {
00574         /* If we don't need to guess column types, we will simply set any
00575          * new columns that are created that didn't exist before to "None"
00576          * since we don't want gibberish to appear. Note:
00577          * parse_data->column_types should have already been
00578          * initialized, so we don't check for it being NULL. */
00579         int i = parse_data->column_types->len;
00580         g_array_set_size(parse_data->column_types, max_cols);
00581         for (; i < parse_data->column_types->len; i++)
00582         {
00583             parse_data->column_types->data[i] = GNC_CSV_NONE;
00584         }
00585     }
00586 
00587     return 0;
00588 }
00589 
00591 typedef struct
00592 {
00593     int date_format; 
00594     int currency_format; 
00595     Account* account; 
00596     GList* properties; 
00597 } TransPropertyList;
00598 
00600 typedef struct
00601 {
00602     int type; 
00604     void* value; 
00605     TransPropertyList* list; 
00606 } TransProperty;
00607 
00611 static TransProperty* trans_property_new(int type, TransPropertyList* list)
00612 {
00613     TransProperty* prop = g_new(TransProperty, 1);
00614     prop->type = type;
00615     prop->list = list;
00616     prop->value = NULL;
00617     return prop;
00618 }
00619 
00623 static void trans_property_free(TransProperty* prop)
00624 {
00625     switch (prop->type)
00626     {
00627         /* The types for "Date" and "Balance" (time_t and gnc_numeric,
00628          * respectively) are typically not pointed to, we have to free
00629          * them, unlike types like char* ("Description"). */
00630     case GNC_CSV_DATE:
00631     case GNC_CSV_BALANCE:
00632     case GNC_CSV_DEPOSIT:
00633     case GNC_CSV_WITHDRAWAL:
00634         if (prop->value != NULL)
00635             g_free(prop->value);
00636         break;
00637     }
00638     g_free(prop);
00639 }
00640 
00648 static gboolean trans_property_set(TransProperty* prop, char* str)
00649 {
00650     char *endptr, *possible_currency_symbol, *str_dupe;
00651     gnc_numeric val;
00652     switch (prop->type)
00653     {
00654     case GNC_CSV_DATE:
00655         prop->value = g_new(time_t, 1);
00656         *((time_t*)(prop->value)) = parse_date(str, prop->list->date_format);
00657         return *((time_t*)(prop->value)) != -1;
00658 
00659     case GNC_CSV_DESCRIPTION:
00660     case GNC_CSV_NUM:
00661         prop->value = g_strdup(str);
00662         return TRUE;
00663 
00664     case GNC_CSV_BALANCE:
00665     case GNC_CSV_DEPOSIT:
00666     case GNC_CSV_WITHDRAWAL:
00667         str_dupe = g_strdup(str); /* First, we make a copy so we can't mess up real data. */
00668 
00669         /* Go through str_dupe looking for currency symbols. */
00670         for (possible_currency_symbol = str_dupe; *possible_currency_symbol;
00671         possible_currency_symbol = g_utf8_next_char(possible_currency_symbol))
00672         {
00673             if (g_unichar_type(g_utf8_get_char(possible_currency_symbol)) == G_UNICODE_CURRENCY_SYMBOL)
00674             {
00675                 /* If we find a currency symbol, save the position just ahead
00676                  * of the currency symbol (next_symbol), and find the null
00677                  * terminator of the string (last_symbol). */
00678                 char *next_symbol = g_utf8_next_char(possible_currency_symbol), *last_symbol = next_symbol;
00679                 while (*last_symbol)
00680                     last_symbol = g_utf8_next_char(last_symbol);
00681 
00682                 /* Move all of the string (including the null byte, which is
00683                  * why we have +1 in the size parameter) following the
00684                  * currency symbol back one character, thereby overwriting the
00685                  * currency symbol. */
00686                 memmove(possible_currency_symbol, next_symbol, last_symbol - next_symbol + 1);
00687                 break;
00688             }
00689         }
00690 
00691         /* Currency format */
00692         switch (prop->list->currency_format)
00693         {
00694         case 0:
00695             /* Currancy locale */
00696             if (!(xaccParseAmount(str_dupe, TRUE, &val, &endptr)))
00697             {
00698                 g_free(str_dupe);
00699                 return FALSE;
00700             }
00701             break;
00702         case 1:
00703             /* Currancy decimal period */
00704             if (!(xaccParseAmountExtended(str_dupe, TRUE, '-', '.', ',', "\003\003", "$+", &val, &endptr)))
00705             {
00706                 g_free(str_dupe);
00707                 return FALSE;
00708             }
00709             break;
00710         case 2:
00711             /* Currancy decimal comma */
00712             if (!(xaccParseAmountExtended(str_dupe, TRUE, '-', ',', '.', "\003\003", "$+", &val, &endptr)))
00713             {
00714                 g_free(str_dupe);
00715                 return FALSE;
00716             }
00717             break;
00718         }
00719 
00720         prop->value = g_new(gnc_numeric, 1);
00721         *((gnc_numeric*)(prop->value)) = val;
00722         g_free(str_dupe);
00723         return TRUE;
00724 
00725     }
00726     return FALSE; /* We should never actually get here. */
00727 }
00728 
00734 static TransPropertyList* trans_property_list_new(Account* account, int date_format, int currency_format)
00735 {
00736     TransPropertyList* list = g_new(TransPropertyList, 1);
00737     list->account = account;
00738     list->date_format = date_format;
00739     list->currency_format = currency_format;
00740     list->properties = NULL;
00741     return list;
00742 }
00743 
00747 static void trans_property_list_free(TransPropertyList* list)
00748 {
00749     /* Free all of the properties in this list before freeeing the list itself. */
00750     GList* properties_begin = list->properties;
00751     while (list->properties != NULL)
00752     {
00753         trans_property_free((TransProperty*)(list->properties->data));
00754         list->properties = g_list_next(list->properties);
00755     }
00756     g_list_free(properties_begin);
00757     g_free(list);
00758 }
00759 
00765 static void trans_property_list_add(TransProperty* property)
00766 {
00767     property->list->properties = g_list_append(property->list->properties, property);
00768 }
00769 
00776 static void trans_add_split(Transaction* trans, Account* account, QofBook* book,
00777 gnc_numeric amount)
00778 {
00779     Split* split = xaccMallocSplit(book);
00780     xaccSplitSetAccount(split, account);
00781     xaccSplitSetParent(split, trans);
00782     xaccSplitSetAmount(split, amount);
00783     xaccSplitSetValue(split, amount);
00784     //xaccSplitSetAction(split, "Deposit");
00785 }
00786 
00794 static gboolean trans_property_list_verify_essentials(TransPropertyList* list, gchar** error)
00795 {
00796     int i;
00797     /* possible_errors lists the ways in which a list can fail this test. */
00798     enum PossibleErrorTypes {NO_DATE, NO_AMOUNT, NUM_OF_POSSIBLE_ERRORS};
00799     gchar* possible_errors[NUM_OF_POSSIBLE_ERRORS] =
00800     {
00801         N_("No date column."),
00802         N_("No balance, deposit, or withdrawal column.")
00803     };
00804     int possible_error_lengths[NUM_OF_POSSIBLE_ERRORS] = {0};
00805     GList *properties_begin = list->properties, *errors_list = NULL;
00806 
00807     /* Go through each of the properties and erase possible errors. */
00808     while (list->properties)
00809     {
00810         switch (((TransProperty*)(list->properties->data))->type)
00811         {
00812         case GNC_CSV_DATE:
00813             possible_errors[NO_DATE] = NULL;
00814             break;
00815 
00816         case GNC_CSV_BALANCE:
00817         case GNC_CSV_DEPOSIT:
00818         case GNC_CSV_WITHDRAWAL:
00819             possible_errors[NO_AMOUNT] = NULL;
00820             break;
00821         }
00822         list->properties = g_list_next(list->properties);
00823     }
00824     list->properties = properties_begin;
00825 
00826     /* Accumulate a list of the actual errors. */
00827     for (i = 0; i < NUM_OF_POSSIBLE_ERRORS; i++)
00828     {
00829         if (possible_errors[i] != NULL)
00830         {
00831             errors_list = g_list_append(errors_list, GINT_TO_POINTER(i));
00832             /* Since we added an error, we want to also store its length for
00833              * when we construct the full error string. */
00834             possible_error_lengths[i] = strlen(_(possible_errors[i]));
00835         }
00836     }
00837 
00838     /* If there are no errors, we can quit now. */
00839     if (errors_list == NULL)
00840         return TRUE;
00841     else
00842     {
00843         /* full_error_size is the full length of the error message. */
00844         int full_error_size = 0, string_length = 0;
00845         GList* errors_list_begin = errors_list;
00846         gchar *error_message, *error_message_begin;
00847 
00848         /* Find the value for full_error_size. */
00849         while (errors_list)
00850         {
00851             /* We add an extra 1 to account for spaces in between messages. */
00852             full_error_size += possible_error_lengths[GPOINTER_TO_INT(errors_list->data)] + 1;
00853             errors_list = g_list_next(errors_list);
00854         }
00855         errors_list = errors_list_begin;
00856 
00857         /* Append the error messages one after another. */
00858         error_message = error_message_begin = g_new(gchar, full_error_size);
00859         while (errors_list)
00860         {
00861             i = GPOINTER_TO_INT(errors_list->data);
00862             string_length = possible_error_lengths[i];
00863 
00864             /* Copy the error message and put a space after it. */
00865             strncpy(error_message, _(possible_errors[i]), string_length);
00866             error_message += string_length;
00867             *error_message = ' ';
00868             error_message++;
00869 
00870             errors_list = g_list_next(errors_list);
00871         }
00872         *error_message = '\0'; /* Replace the last space with the null byte. */
00873         g_list_free(errors_list_begin);
00874 
00875         *error = error_message_begin;
00876         return FALSE;
00877     }
00878 }
00879 
00885 static GncCsvTransLine* trans_property_list_to_trans(TransPropertyList* list, gchar** error)
00886 {
00887     GncCsvTransLine* trans_line = g_new(GncCsvTransLine, 1);
00888     GList* properties_begin = list->properties;
00889     QofBook* book = gnc_account_get_book(list->account);
00890     gnc_commodity* currency = xaccAccountGetCommodity(list->account);
00891     gnc_numeric amount = double_to_gnc_numeric(0.0, xaccAccountGetCommoditySCU(list->account),
00892                          GNC_HOW_RND_ROUND_HALF_UP);
00893 
00894     /* This flag is set to TRUE if we can use the "Deposit" or "Withdrawal" column. */
00895     gboolean amount_set = FALSE;
00896 
00897     /* The balance is 0 by default. */
00898     trans_line->balance_set = FALSE;
00899     trans_line->balance = amount;
00900 
00901     /* We make the line_no -1 just to mark that it hasn't been set. We
00902      * may get rid of line_no soon anyway, so it's not particularly
00903      * important. */
00904     trans_line->line_no = -1;
00905 
00906     /* Make sure this is a transaction with all the columns we need. */
00907     if (!trans_property_list_verify_essentials(list, error))
00908     {
00909         g_free(trans_line);
00910         return NULL;
00911     }
00912 
00913     trans_line->trans = xaccMallocTransaction(book);
00914     xaccTransBeginEdit(trans_line->trans);
00915     xaccTransSetCurrency(trans_line->trans, currency);
00916 
00917     /* Go through each of the properties and edit the transaction accordingly. */
00918     list->properties = properties_begin;
00919     while (list->properties != NULL)
00920     {
00921         TransProperty* prop = (TransProperty*)(list->properties->data);
00922         switch (prop->type)
00923         {
00924         case GNC_CSV_DATE:
00925             xaccTransSetDatePostedSecs(trans_line->trans, *((time_t*)(prop->value)));
00926             break;
00927 
00928         case GNC_CSV_DESCRIPTION:
00929             xaccTransSetDescription(trans_line->trans, (char*)(prop->value));
00930             break;
00931 
00932         case GNC_CSV_NUM:
00933             xaccTransSetNum(trans_line->trans, (char*)(prop->value));
00934             break;
00935 
00936         case GNC_CSV_DEPOSIT: /* Add deposits to the existing amount. */
00937             if (prop->value != NULL)
00938             {
00939                 amount = gnc_numeric_add(*((gnc_numeric*)(prop->value)),
00940                                          amount,
00941                                          xaccAccountGetCommoditySCU(list->account),
00942                                          GNC_HOW_RND_ROUND_HALF_UP);
00943                 amount_set = TRUE;
00944                 /* We will use the "Deposit" and "Withdrawal" columns in preference to "Balance". */
00945                 trans_line->balance_set = FALSE;
00946             }
00947             break;
00948 
00949         case GNC_CSV_WITHDRAWAL: /* Withdrawals are just negative deposits. */
00950             if (prop->value != NULL)
00951             {
00952                 amount = gnc_numeric_add(gnc_numeric_neg(*((gnc_numeric*)(prop->value))),
00953                                          amount,
00954                                          xaccAccountGetCommoditySCU(list->account),
00955                                          GNC_HOW_RND_ROUND_HALF_UP);
00956                 amount_set = TRUE;
00957                 /* We will use the "Deposit" and "Withdrawal" columns in preference to "Balance". */
00958                 trans_line->balance_set = FALSE;
00959             }
00960             break;
00961 
00962         case GNC_CSV_BALANCE: /* The balance gets stored in a separate field in trans_line. */
00963             /* We will use the "Deposit" and "Withdrawal" columns in preference to "Balance". */
00964             if (!amount_set && prop->value != NULL)
00965             {
00966                 /* This gets put into the actual transaction at the end of gnc_csv_parse_to_trans. */
00967                 trans_line->balance = *((gnc_numeric*)(prop->value));
00968                 trans_line->balance_set = TRUE;
00969             }
00970             break;
00971         }
00972         list->properties = g_list_next(list->properties);
00973     }
00974 
00975     /* Add a split with the cumulative amount value. */
00976     trans_add_split(trans_line->trans, list->account, book, amount);
00977 
00978     return trans_line;
00979 }
00980 
00991 int gnc_csv_parse_to_trans(GncCsvParseData* parse_data, Account* account,
00992                            gboolean redo_errors)
00993 {
00994     gboolean hasBalanceColumn;
00995     int i, j, max_cols = 0;
00996     GArray* column_types = parse_data->column_types;
00997     GList *error_lines = NULL, *begin_error_lines = NULL;
00998 
00999     /* last_transaction points to the last element in
01000      * parse_data->transactions, or NULL if it's empty. */
01001     GList* last_transaction = NULL;
01002 
01003     /* Free parse_data->error_lines and parse_data->transactions if they
01004      * already exist. */
01005     if (redo_errors) /* If we're redoing errors, we save freeing until the end. */
01006     {
01007         begin_error_lines = error_lines = parse_data->error_lines;
01008     }
01009     else
01010     {
01011         if (parse_data->error_lines != NULL)
01012         {
01013             g_list_free(parse_data->error_lines);
01014         }
01015         if (parse_data->transactions != NULL)
01016         {
01017             g_list_free(parse_data->transactions);
01018         }
01019     }
01020     parse_data->error_lines = NULL;
01021 
01022     if (redo_errors) /* If we're looking only at error data ... */
01023     {
01024         if (parse_data->transactions == NULL)
01025         {
01026             last_transaction = NULL;
01027         }
01028         else
01029         {
01030             /* Move last_transaction to the end. */
01031             last_transaction = parse_data->transactions;
01032             while (g_list_next(last_transaction) != NULL)
01033             {
01034                 last_transaction = g_list_next(last_transaction);
01035             }
01036         }
01037         /* ... we use only the lines in error_lines. */
01038         if (error_lines == NULL)
01039             i = parse_data->orig_lines->len; /* Don't go into the for loop. */
01040         else
01041             i = GPOINTER_TO_INT(error_lines->data);
01042     }
01043     else /* Otherwise, we look at all the data. */
01044     {
01045         /* The following while-loop effectively behaves like the following for-loop:
01046          * for(i = 0; i < parse_data->orig_lines->len; i++). */
01047         i = parse_data->start_row;
01048         last_transaction = NULL;
01049     }
01050 
01051     /* set parse_data->end_row to number of lines */
01052     if (parse_data->end_row > parse_data->orig_lines->len)
01053         parse_data->end_row = parse_data->orig_lines->len;
01054 
01055     while (i < parse_data->end_row)
01056     {
01057         GPtrArray* line = parse_data->orig_lines->pdata[i];
01058         /* This flag is TRUE if there are any errors in this row. */
01059         gboolean errors = FALSE;
01060         gchar* error_message = NULL;
01061         TransPropertyList* list = trans_property_list_new(account, parse_data->date_format, parse_data->currency_format );
01062         GncCsvTransLine* trans_line = NULL;
01063 
01064         for (j = 0; j < line->len; j++)
01065         {
01066             /* We do nothing in "None" or "Account" columns. */
01067             if ((column_types->data[j] != GNC_CSV_NONE) && (column_types->data[j] != GNC_CSV_ACCOUNT))
01068             {
01069                 /* Affect the transaction appropriately. */
01070                 TransProperty* property = trans_property_new(column_types->data[j], list);
01071                 gboolean succeeded = trans_property_set(property, line->pdata[j]);
01072 
01073                 /* TODO Maybe move error handling to within TransPropertyList functions? */
01074                 if (succeeded)
01075                 {
01076                     trans_property_list_add(property);
01077                 }
01078                 else
01079                 {
01080                     errors = TRUE;
01081                     error_message = g_strdup_printf(_("%s column could not be understood."),
01082                                                     _(gnc_csv_column_type_strs[property->type]));
01083                     trans_property_free(property);
01084                     break;
01085                 }
01086             }
01087         }
01088 
01089         /* If we had success, add the transaction to parse_data->transaction. */
01090         if (!errors)
01091         {
01092             trans_line = trans_property_list_to_trans(list, &error_message);
01093             errors = trans_line == NULL;
01094         }
01095 
01096         trans_property_list_free(list);
01097 
01098         /* If there were errors, add this line to parse_data->error_lines. */
01099         if (errors)
01100         {
01101             parse_data->error_lines = g_list_append(parse_data->error_lines,
01102                                                     GINT_TO_POINTER(i));
01103             /* If there's already an error message, we need to replace it. */
01104             if (line->len > (int)(parse_data->orig_row_lengths->data[i]))
01105             {
01106                 g_free(line->pdata[line->len - 1]);
01107                 line->pdata[line->len - 1] = error_message;
01108             }
01109             else
01110             {
01111                 /* Put the error message at the end of the line. */
01112                 g_ptr_array_add(line, error_message);
01113             }
01114         }
01115         else
01116         {
01117             /* If all went well, add this transaction to the list. */
01118             trans_line->line_no = i;
01119 
01120             /* We keep the transactions sorted by date. We start at the end
01121              * of the list and go backward, simply because the file itself
01122              * is probably also sorted by date (but we need to handle the
01123              * exception anyway). */
01124 
01125             /* If we can just put it at the end, do so and increment last_transaction. */
01126             if (last_transaction == NULL ||
01127                     xaccTransGetDate(((GncCsvTransLine*)(last_transaction->data))->trans) <= xaccTransGetDate(trans_line->trans))
01128             {
01129                 parse_data->transactions = g_list_append(parse_data->transactions, trans_line);
01130                 /* If this is the first transaction, we need to get last_transaction on track. */
01131                 if (last_transaction == NULL)
01132                     last_transaction = parse_data->transactions;
01133                 else /* Otherwise, we can just continue. */
01134                     last_transaction = g_list_next(last_transaction);
01135             }
01136             /* Otherwise, search backward for the correct spot. */
01137             else
01138             {
01139                 GList* insertion_spot = last_transaction;
01140                 while (insertion_spot != NULL &&
01141                         xaccTransGetDate(((GncCsvTransLine*)(insertion_spot->data))->trans) > xaccTransGetDate(trans_line->trans))
01142                 {
01143                     insertion_spot = g_list_previous(insertion_spot);
01144                 }
01145                 /* Move insertion_spot one location forward since we have to
01146                  * use the g_list_insert_before function. */
01147                 if (insertion_spot == NULL) /* We need to handle the case of inserting at the beginning of the list. */
01148                     insertion_spot = parse_data->transactions;
01149                 else
01150                     insertion_spot = g_list_next(insertion_spot);
01151 
01152                 parse_data->transactions = g_list_insert_before(parse_data->transactions, insertion_spot, trans_line);
01153             }
01154         }
01155 
01156         /* Increment to the next row. */
01157         if (redo_errors)
01158         {
01159             /* Move to the next error line in the list. */
01160             error_lines = g_list_next(error_lines);
01161             if (error_lines == NULL)
01162                 i = parse_data->orig_lines->len; /* Don't continue the for loop. */
01163             else
01164                 i = GPOINTER_TO_INT(error_lines->data);
01165         }
01166         else
01167         {
01168             i++;
01169         }
01170     }
01171 
01172     /* If we have a balance column, set the appropriate amounts on the transactions. */
01173     hasBalanceColumn = FALSE;
01174     for (i = 0; i < parse_data->column_types->len; i++)
01175     {
01176         if (parse_data->column_types->data[i] == GNC_CSV_BALANCE)
01177         {
01178             hasBalanceColumn = TRUE;
01179             break;
01180         }
01181     }
01182 
01183     if (hasBalanceColumn)
01184     {
01185         GList* transactions = parse_data->transactions;
01186 
01187         /* balance_offset is how much the balance currently in the account
01188          * differs from what it will be after the transactions are
01189          * imported. This will be sum of all the previous transactions for
01190          * any given transaction. */
01191         gnc_numeric balance_offset = double_to_gnc_numeric(0.0,
01192                                      xaccAccountGetCommoditySCU(account),
01193                                      GNC_HOW_RND_ROUND_HALF_UP);
01194         while (transactions != NULL)
01195         {
01196             GncCsvTransLine* trans_line = (GncCsvTransLine*)transactions->data;
01197             if (trans_line->balance_set)
01198             {
01199                 time_t date = xaccTransGetDate(trans_line->trans);
01200                 /* Find what the balance should be by adding the offset to the actual balance. */
01201                 gnc_numeric existing_balance = gnc_numeric_add(balance_offset,
01202                                                xaccAccountGetBalanceAsOfDate(account, date),
01203                                                xaccAccountGetCommoditySCU(account),
01204                                                GNC_HOW_RND_ROUND_HALF_UP);
01205 
01206                 /* The amount of the transaction is the difference between the new and existing balance. */
01207                 gnc_numeric amount = gnc_numeric_sub(trans_line->balance,
01208                                                      existing_balance,
01209                                                      xaccAccountGetCommoditySCU(account),
01210                                                      GNC_HOW_RND_ROUND_HALF_UP);
01211 
01212                 SplitList* splits = xaccTransGetSplitList(trans_line->trans);
01213                 while (splits)
01214                 {
01215                     SplitList* next_splits = g_list_next(splits);
01216                     xaccSplitDestroy((Split*)splits->data);
01217                     splits = next_splits;
01218                 }
01219 
01220                 trans_add_split(trans_line->trans, account, gnc_account_get_book(account), amount);
01221 
01222                 /* This new transaction needs to be added to the balance offset. */
01223                 balance_offset = gnc_numeric_add(balance_offset,
01224                                                  amount,
01225                                                  xaccAccountGetCommoditySCU(account),
01226                                                  GNC_HOW_RND_ROUND_HALF_UP);
01227             }
01228             transactions = g_list_next(transactions);
01229         }
01230     }
01231 
01232     if (redo_errors) /* Now that we're at the end, we do the freeing. */
01233     {
01234         g_list_free(begin_error_lines);
01235     }
01236 
01237     /* We need to resize parse_data->column_types since errors may have added columns. */
01238     for (i = 0; i < parse_data->orig_lines->len; i++)
01239     {
01240         if (max_cols < ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len)
01241             max_cols = ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len;
01242     }
01243     i = parse_data->column_types->len;
01244     parse_data->column_types = g_array_set_size(parse_data->column_types, max_cols);
01245     for (; i < max_cols; i++)
01246     {
01247         parse_data->column_types->data[i] = GNC_CSV_NONE;
01248     }
01249 
01250     return 0;
01251 }
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines