GnuCash 2.4.99
Data Structures | Enumerations | Functions | Variables
gnc-csv-model.h File Reference

CSV import GUI. More...

#include "config.h"
#include "Account.h"
#include "Transaction.h"
#include "stf/stf-parse.h"

Go to the source code of this file.

Data Structures

struct  GncCsvStr
struct  GncCsvTransLine
struct  GncCsvParseData

Enumerations

enum  GncCsvColumnType {
  GNC_CSV_NONE, GNC_CSV_DATE, GNC_CSV_DESCRIPTION, GNC_CSV_ACCOUNT,
  GNC_CSV_BALANCE, GNC_CSV_DEPOSIT, GNC_CSV_WITHDRAWAL, GNC_CSV_NUM,
  GNC_CSV_NUM_COL_TYPES
}
enum  GncCsvErrorType { GNC_CSV_FILE_OPEN_ERR, GNC_CSV_ENCODING_ERR }

Functions

GncCsvParseDatagnc_csv_new_parse_data (void)
void gnc_csv_parse_data_free (GncCsvParseData *parse_data)
int gnc_csv_load_file (GncCsvParseData *parse_data, const char *filename, GError **error)
int gnc_csv_convert_encoding (GncCsvParseData *parse_data, const char *encoding, GError **error)
int gnc_csv_parse (GncCsvParseData *parse_data, gboolean guessColTypes, GError **error)
int gnc_csv_parse_to_trans (GncCsvParseData *parse_data, Account *account, gboolean redo_errors)
time_t parse_date (const char *date_str, int format)

Variables

const int num_currency_formats
const gchar * currency_format_user []
const int num_date_formats
const gchar * date_format_user []
gchar * gnc_csv_column_type_strs []

Detailed Description

CSV import GUI.

gnc-csv-model.h

Author:
Copyright (c) 2007 Benny Sperisen <lasindi@gmail.com>

Definition in file gnc-csv-model.h.


Enumeration Type Documentation

Enumeration for column types. These are the different types of columns that can exist in a CSV/Fixed-Width file. There should be no two columns with the same type except for the GNC_CSV_NONE type.

Definition at line 41 of file gnc-csv-model.h.

                      {GNC_CSV_NONE,
                       GNC_CSV_DATE,
                       GNC_CSV_DESCRIPTION,
                       GNC_CSV_ACCOUNT,
                       GNC_CSV_BALANCE,
                       GNC_CSV_DEPOSIT,
                       GNC_CSV_WITHDRAWAL,
                       GNC_CSV_NUM,
                       GNC_CSV_NUM_COL_TYPES
                      };

Enumeration for error types. These are the different types of errors that various functions used for the CSV/Fixed-Width importer can have.

Definition at line 55 of file gnc-csv-model.h.

                     {GNC_CSV_FILE_OPEN_ERR,
                      GNC_CSV_ENCODING_ERR
                     };

Function Documentation

int gnc_csv_convert_encoding ( GncCsvParseData parse_data,
const char *  encoding,
GError **  error 
)

Converts raw file data using a new encoding. This function must be called after gnc_csv_load_file only if gnc_csv_load_file guessed the wrong encoding.

Parameters:
parse_dataData that is being parsed
encodingEncoding that data should be translated using
errorWill point to an error on failure
Returns:
0 on success, 1 on failure

Definition at line 405 of file gnc-csv-model.c.

{
    gsize bytes_read, bytes_written;

    /* If parse_data->file_str has already been initialized it must be
     * freed first. (This should always be the case, since
     * gnc_csv_load_file should always be called before this
     * function.) */
    if (parse_data->file_str.begin != NULL)
        g_free(parse_data->file_str.begin);

    /* Do the actual translation to UTF-8. */
    parse_data->file_str.begin = g_convert(parse_data->raw_str.begin,
    parse_data->raw_str.end - parse_data->raw_str.begin,
    "UTF-8", encoding, &bytes_read, &bytes_written,
    error);
    /* Handle errors that occur. */
    if (parse_data->file_str.begin == NULL)
        return 1;

    /* On success, save the ending pointer of the translated data and
     * the encoding type and return 0. */
    parse_data->file_str.end = parse_data->file_str.begin + bytes_written;
    parse_data->encoding = (gchar*)encoding;
    return 0;
}
int gnc_csv_load_file ( GncCsvParseData parse_data,
const char *  filename,
GError **  error 
)

Loads a file into a GncCsvParseData. This is the first function that must be called after createing a new GncCsvParseData. If this fails because the file couldn't be opened, no more functions can be called on the parse data until this succeeds (or until it fails because of an encoding guess error). If it fails because the encoding could not be guessed, gnc_csv_convert_encoding must be called until it succeeds.

Parameters:
parse_dataData that is being parsed
filenameName of the file that should be opened
errorWill contain an error if there is a failure
Returns:
0 on success, 1 on failure

Definition at line 445 of file gnc-csv-model.c.

{
    const char* guess_enc = NULL;

    /* Get the raw data first and handle an error if one occurs. */
    parse_data->raw_mapping = g_mapped_file_new(filename, FALSE, error);
    if (parse_data->raw_mapping == NULL)
    {
        /* TODO Handle file opening errors more specifically,
         * e.g. inexistent file versus no read permission. */
        parse_data->raw_str.begin = NULL;
        g_clear_error (error);
        g_set_error(error, 0, GNC_CSV_FILE_OPEN_ERR, "%s", _("File opening failed."));
        return 1;
    }

    /* Copy the mapping's contents into parse-data->raw_str. */
    parse_data->raw_str.begin = g_mapped_file_get_contents(parse_data->raw_mapping);
    parse_data->raw_str.end = parse_data->raw_str.begin + g_mapped_file_get_length(parse_data->raw_mapping);

    /* Make a guess at the encoding of the data. */
    if (!g_mapped_file_get_length(parse_data->raw_mapping) == 0)
        guess_enc = go_guess_encoding((const char*)(parse_data->raw_str.begin),
        (size_t)(parse_data->raw_str.end - parse_data->raw_str.begin),
        "UTF-8", NULL);
    if (guess_enc == NULL)
    {
        g_set_error(error, 0, GNC_CSV_ENCODING_ERR, "%s", _("Unknown encoding."));
        return 1;
    }
    /* Convert using the guessed encoding into parse_data->file_str and
     * handle any errors that occur. */
    gnc_csv_convert_encoding(parse_data, guess_enc, error);
    if (parse_data->file_str.begin == NULL)
    {
        g_set_error(error, 0, GNC_CSV_ENCODING_ERR, "%s", _("Unknown encoding."));
        return 1;
    }
    else
        return 0;
}
GncCsvParseData* gnc_csv_new_parse_data ( void  )

Constructor for GncCsvParseData.

Returns:
Pointer to a new GncCSvParseData

Definition at line 327 of file gnc-csv-model.c.

{
    GncCsvParseData* parse_data = g_new(GncCsvParseData, 1);
    parse_data->encoding = "UTF-8";
    /* All of the data pointers are initially NULL. This is so that, if
     * gnc_csv_parse_data_free is called before all of the data is
     * initialized, only the data that needs to be freed is freed. */
    parse_data->raw_str.begin = parse_data->raw_str.end
    = parse_data->file_str.begin = parse_data->file_str.end = NULL;
    parse_data->orig_lines = NULL;
    parse_data->orig_row_lengths = NULL;
    parse_data->column_types = NULL;
    parse_data->error_lines = parse_data->transactions = NULL;
    parse_data->options = default_parse_options();
    parse_data->date_format = -1;
    parse_data->currency_format = 0;
    parse_data->chunk = g_string_chunk_new(100 * 1024);
    parse_data->start_row = 0;
    parse_data->end_row = 1000;
    return parse_data;
}
int gnc_csv_parse ( GncCsvParseData parse_data,
gboolean  guessColTypes,
GError **  error 
)

Parses a file into cells. This requires having an encoding that works (see gnc_csv_convert_encoding). parse_data->options should be set according to how the user wants before calling this function. (Note: this function must be called with guessColTypes as TRUE before it is ever called with it as FALSE.) (Note: if guessColTypes is TRUE, all the column types will be GNC_CSV_NONE right now.)

Parameters:
parse_dataData that is being parsed
guessColTypesTRUE to guess what the types of columns are based on the cell contents
errorWill contain an error if there is a failure
Returns:
0 on success, 1 on failure

Definition at line 500 of file gnc-csv-model.c.

{
    /* max_cols is the number of columns in the row with the most columns. */
    int i, max_cols = 0;

    if (parse_data->orig_lines != NULL)
    {
        stf_parse_general_free(parse_data->orig_lines);
    }

    /* If everything is fine ... */
    if (parse_data->file_str.begin != NULL)
    {
        /* Do the actual parsing. */
        parse_data->orig_lines = stf_parse_general(parse_data->options, parse_data->chunk,
        parse_data->file_str.begin,
        parse_data->file_str.end);
    }
    /* If we couldn't get the encoding right, we just want an empty array. */
    else
    {
        parse_data->orig_lines = g_ptr_array_new();
    }

    /* Record the original row lengths of parse_data->orig_lines. */
    if (parse_data->orig_row_lengths != NULL)
        g_array_free(parse_data->orig_row_lengths, FALSE);

    parse_data->orig_row_lengths =
    g_array_sized_new(FALSE, FALSE, sizeof(int), parse_data->orig_lines->len);

    g_array_set_size(parse_data->orig_row_lengths, parse_data->orig_lines->len);
    parse_data->orig_max_row = 0;
    for (i = 0; i < parse_data->orig_lines->len; i++)
    {
        int length = ((GPtrArray*)parse_data->orig_lines->pdata[i])->len;
        parse_data->orig_row_lengths->data[i] = length;
        if (length > parse_data->orig_max_row)
            parse_data->orig_max_row = length;
    }

    /* If it failed, generate an error. */
    if (parse_data->orig_lines == NULL)
    {
        g_set_error(error, 0, 0, "Parsing failed.");
        return 1;
    }

    /* Now that we have data, let's set max_cols. */
    for (i = 0; i < parse_data->orig_lines->len; i++)
    {
        if (max_cols < ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len)
            max_cols = ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len;
    }

    if (guessColTypes)
    {
        /* Free parse_data->column_types if it's already been created. */
        if (parse_data->column_types != NULL)
            g_array_free(parse_data->column_types, TRUE);

        /* Create parse_data->column_types and fill it with guesses based
         * on the contents of each column. */
        parse_data->column_types = g_array_sized_new(FALSE, FALSE, sizeof(int),
        max_cols);
        g_array_set_size(parse_data->column_types, max_cols);
        /* TODO Make it actually guess. */
        for (i = 0; i < parse_data->column_types->len; i++)
        {
            parse_data->column_types->data[i] = GNC_CSV_NONE;
        }
    }
    else
    {
        /* If we don't need to guess column types, we will simply set any
         * new columns that are created that didn't exist before to "None"
         * since we don't want gibberish to appear. Note:
         * parse_data->column_types should have already been
         * initialized, so we don't check for it being NULL. */
        int i = parse_data->column_types->len;
        g_array_set_size(parse_data->column_types, max_cols);
        for (; i < parse_data->column_types->len; i++)
        {
            parse_data->column_types->data[i] = GNC_CSV_NONE;
        }
    }

    return 0;
}
void gnc_csv_parse_data_free ( GncCsvParseData parse_data)

Destructor for GncCsvParseData.

Parameters:
parse_dataParse data whose memory will be freed

Definition at line 352 of file gnc-csv-model.c.

{
    /* All non-NULL pointers have been initialized and must be freed. */

    if (parse_data->raw_mapping != NULL)
    {
        g_mapped_file_unref(parse_data->raw_mapping);
    }

    if (parse_data->file_str.begin != NULL)
        g_free(parse_data->file_str.begin);

    if (parse_data->orig_lines != NULL)
        stf_parse_general_free(parse_data->orig_lines);

    if (parse_data->orig_row_lengths != NULL)
        g_array_free(parse_data->orig_row_lengths, FALSE);

    if (parse_data->options != NULL)
        stf_parse_options_free(parse_data->options);

    if (parse_data->column_types != NULL)
        g_array_free(parse_data->column_types, TRUE);

    if (parse_data->error_lines != NULL)
        g_list_free(parse_data->error_lines);

    if (parse_data->transactions != NULL)
    {
        GList* transactions = parse_data->transactions;
        /* We have to free the GncCsvTransLine's that are at each node in
         * the list before freeing the entire list. */
        do
        {
            g_free(transactions->data);
            transactions = g_list_next(transactions);
        }
        while (transactions != NULL);
        g_list_free(parse_data->transactions);
    }

    g_free(parse_data->chunk);
    g_free(parse_data);
}
int gnc_csv_parse_to_trans ( GncCsvParseData parse_data,
Account account,
gboolean  redo_errors 
)

Creates a list of transactions from parsed data. Transactions that could be created from rows are placed in parse_data->transactions; rows that fail are placed in parse_data->error_lines. (Note: there is no way for this function to "fail," i.e. it only returns 0, so it may be changed to a void function in the future.)

Parameters:
parse_dataData that is being parsed
accountAccount with which transactions are created
redo_errorsTRUE to convert only error data, FALSE for all data
Returns:
0 on success, 1 on failure

Definition at line 991 of file gnc-csv-model.c.

{
    gboolean hasBalanceColumn;
    int i, j, max_cols = 0;
    GArray* column_types = parse_data->column_types;
    GList *error_lines = NULL, *begin_error_lines = NULL;

    /* last_transaction points to the last element in
     * parse_data->transactions, or NULL if it's empty. */
    GList* last_transaction = NULL;

    /* Free parse_data->error_lines and parse_data->transactions if they
     * already exist. */
    if (redo_errors) /* If we're redoing errors, we save freeing until the end. */
    {
        begin_error_lines = error_lines = parse_data->error_lines;
    }
    else
    {
        if (parse_data->error_lines != NULL)
        {
            g_list_free(parse_data->error_lines);
        }
        if (parse_data->transactions != NULL)
        {
            g_list_free(parse_data->transactions);
        }
    }
    parse_data->error_lines = NULL;

    if (redo_errors) /* If we're looking only at error data ... */
    {
        if (parse_data->transactions == NULL)
        {
            last_transaction = NULL;
        }
        else
        {
            /* Move last_transaction to the end. */
            last_transaction = parse_data->transactions;
            while (g_list_next(last_transaction) != NULL)
            {
                last_transaction = g_list_next(last_transaction);
            }
        }
        /* ... we use only the lines in error_lines. */
        if (error_lines == NULL)
            i = parse_data->orig_lines->len; /* Don't go into the for loop. */
        else
            i = GPOINTER_TO_INT(error_lines->data);
    }
    else /* Otherwise, we look at all the data. */
    {
        /* The following while-loop effectively behaves like the following for-loop:
         * for(i = 0; i < parse_data->orig_lines->len; i++). */
        i = parse_data->start_row;
        last_transaction = NULL;
    }

    /* set parse_data->end_row to number of lines */
    if (parse_data->end_row > parse_data->orig_lines->len)
        parse_data->end_row = parse_data->orig_lines->len;

    while (i < parse_data->end_row)
    {
        GPtrArray* line = parse_data->orig_lines->pdata[i];
        /* This flag is TRUE if there are any errors in this row. */
        gboolean errors = FALSE;
        gchar* error_message = NULL;
        TransPropertyList* list = trans_property_list_new(account, parse_data->date_format, parse_data->currency_format );
        GncCsvTransLine* trans_line = NULL;

        for (j = 0; j < line->len; j++)
        {
            /* We do nothing in "None" or "Account" columns. */
            if ((column_types->data[j] != GNC_CSV_NONE) && (column_types->data[j] != GNC_CSV_ACCOUNT))
            {
                /* Affect the transaction appropriately. */
                TransProperty* property = trans_property_new(column_types->data[j], list);
                gboolean succeeded = trans_property_set(property, line->pdata[j]);

                /* TODO Maybe move error handling to within TransPropertyList functions? */
                if (succeeded)
                {
                    trans_property_list_add(property);
                }
                else
                {
                    errors = TRUE;
                    error_message = g_strdup_printf(_("%s column could not be understood."),
                                                    _(gnc_csv_column_type_strs[property->type]));
                    trans_property_free(property);
                    break;
                }
            }
        }

        /* If we had success, add the transaction to parse_data->transaction. */
        if (!errors)
        {
            trans_line = trans_property_list_to_trans(list, &error_message);
            errors = trans_line == NULL;
        }

        trans_property_list_free(list);

        /* If there were errors, add this line to parse_data->error_lines. */
        if (errors)
        {
            parse_data->error_lines = g_list_append(parse_data->error_lines,
                                                    GINT_TO_POINTER(i));
            /* If there's already an error message, we need to replace it. */
            if (line->len > (int)(parse_data->orig_row_lengths->data[i]))
            {
                g_free(line->pdata[line->len - 1]);
                line->pdata[line->len - 1] = error_message;
            }
            else
            {
                /* Put the error message at the end of the line. */
                g_ptr_array_add(line, error_message);
            }
        }
        else
        {
            /* If all went well, add this transaction to the list. */
            trans_line->line_no = i;

            /* We keep the transactions sorted by date. We start at the end
             * of the list and go backward, simply because the file itself
             * is probably also sorted by date (but we need to handle the
             * exception anyway). */

            /* If we can just put it at the end, do so and increment last_transaction. */
            if (last_transaction == NULL ||
                    xaccTransGetDate(((GncCsvTransLine*)(last_transaction->data))->trans) <= xaccTransGetDate(trans_line->trans))
            {
                parse_data->transactions = g_list_append(parse_data->transactions, trans_line);
                /* If this is the first transaction, we need to get last_transaction on track. */
                if (last_transaction == NULL)
                    last_transaction = parse_data->transactions;
                else /* Otherwise, we can just continue. */
                    last_transaction = g_list_next(last_transaction);
            }
            /* Otherwise, search backward for the correct spot. */
            else
            {
                GList* insertion_spot = last_transaction;
                while (insertion_spot != NULL &&
                        xaccTransGetDate(((GncCsvTransLine*)(insertion_spot->data))->trans) > xaccTransGetDate(trans_line->trans))
                {
                    insertion_spot = g_list_previous(insertion_spot);
                }
                /* Move insertion_spot one location forward since we have to
                 * use the g_list_insert_before function. */
                if (insertion_spot == NULL) /* We need to handle the case of inserting at the beginning of the list. */
                    insertion_spot = parse_data->transactions;
                else
                    insertion_spot = g_list_next(insertion_spot);

                parse_data->transactions = g_list_insert_before(parse_data->transactions, insertion_spot, trans_line);
            }
        }

        /* Increment to the next row. */
        if (redo_errors)
        {
            /* Move to the next error line in the list. */
            error_lines = g_list_next(error_lines);
            if (error_lines == NULL)
                i = parse_data->orig_lines->len; /* Don't continue the for loop. */
            else
                i = GPOINTER_TO_INT(error_lines->data);
        }
        else
        {
            i++;
        }
    }

    /* If we have a balance column, set the appropriate amounts on the transactions. */
    hasBalanceColumn = FALSE;
    for (i = 0; i < parse_data->column_types->len; i++)
    {
        if (parse_data->column_types->data[i] == GNC_CSV_BALANCE)
        {
            hasBalanceColumn = TRUE;
            break;
        }
    }

    if (hasBalanceColumn)
    {
        GList* transactions = parse_data->transactions;

        /* balance_offset is how much the balance currently in the account
         * differs from what it will be after the transactions are
         * imported. This will be sum of all the previous transactions for
         * any given transaction. */
        gnc_numeric balance_offset = double_to_gnc_numeric(0.0,
                                     xaccAccountGetCommoditySCU(account),
                                     GNC_HOW_RND_ROUND_HALF_UP);
        while (transactions != NULL)
        {
            GncCsvTransLine* trans_line = (GncCsvTransLine*)transactions->data;
            if (trans_line->balance_set)
            {
                time_t date = xaccTransGetDate(trans_line->trans);
                /* Find what the balance should be by adding the offset to the actual balance. */
                gnc_numeric existing_balance = gnc_numeric_add(balance_offset,
                                               xaccAccountGetBalanceAsOfDate(account, date),
                                               xaccAccountGetCommoditySCU(account),
                                               GNC_HOW_RND_ROUND_HALF_UP);

                /* The amount of the transaction is the difference between the new and existing balance. */
                gnc_numeric amount = gnc_numeric_sub(trans_line->balance,
                                                     existing_balance,
                                                     xaccAccountGetCommoditySCU(account),
                                                     GNC_HOW_RND_ROUND_HALF_UP);

                SplitList* splits = xaccTransGetSplitList(trans_line->trans);
                while (splits)
                {
                    SplitList* next_splits = g_list_next(splits);
                    xaccSplitDestroy((Split*)splits->data);
                    splits = next_splits;
                }

                trans_add_split(trans_line->trans, account, gnc_account_get_book(account), amount);

                /* This new transaction needs to be added to the balance offset. */
                balance_offset = gnc_numeric_add(balance_offset,
                                                 amount,
                                                 xaccAccountGetCommoditySCU(account),
                                                 GNC_HOW_RND_ROUND_HALF_UP);
            }
            transactions = g_list_next(transactions);
        }
    }

    if (redo_errors) /* Now that we're at the end, we do the freeing. */
    {
        g_list_free(begin_error_lines);
    }

    /* We need to resize parse_data->column_types since errors may have added columns. */
    for (i = 0; i < parse_data->orig_lines->len; i++)
    {
        if (max_cols < ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len)
            max_cols = ((GPtrArray*)(parse_data->orig_lines->pdata[i]))->len;
    }
    i = parse_data->column_types->len;
    parse_data->column_types = g_array_set_size(parse_data->column_types, max_cols);
    for (; i < max_cols; i++)
    {
        parse_data->column_types->data[i] = GNC_CSV_NONE;
    }

    return 0;
}
time_t parse_date ( const char *  date_str,
int  format 
)

Parses a string into a date, given a format. This function requires only knowing the order in which the year, month and day appear. For example, 01-02-2003 will be parsed the same way as 01/02/2003.

Parameters:
date_strThe string containing a date being parsed
formatAn index specifying a format in date_format_user
Returns:
The parsed value of date_str on success or -1 on failure

Definition at line 316 of file gnc-csv-model.c.

{
    if (strchr(date_format_user[format], 'y'))
        return parse_date_with_year(date_str, format);
    else
        return parse_date_without_year(date_str, format);
}
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines