GnuCash 2.4.99
import-backend.c
00001 /********************************************************************\
00002  * This program is free software; you can redistribute it and/or    *
00003  * modify it under the terms of the GNU General Public License as   *
00004  * published by the Free Software Foundation; either version 2 of   *
00005  * the License, or (at your option) any later version.              *
00006  *                                                                  *
00007  * This program is distributed in the hope that it will be useful,  *
00008  * but WITHOUT ANY WARRANTY; without even the implied warranty of   *
00009  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    *
00010  * GNU General Public License for more details.                     *
00011  *                                                                  *
00012  * You should have received a copy of the GNU General Public License*
00013  * along with this program; if not, contact:                        *
00014  *                                                                  *
00015  * Free Software Foundation           Voice:  +1-617-542-5942       *
00016  * 51 Franklin Street, Fifth Floor    Fax:    +1-617-542-2652       *
00017  * Boston, MA  02110-1301,  USA       gnu@gnu.org                   *
00018 \********************************************************************/
00029 #include "config.h"
00030 
00031 #include <gtk/gtk.h>
00032 #include <glib/gi18n.h>
00033 #include <stdlib.h>
00034 #include <math.h>
00035 
00036 #include <errno.h>
00037 
00038 #include "gnc-gconf-utils.h"
00039 #include "import-backend.h"
00040 #include "import-utilities.h"
00041 #include "Account.h"
00042 #include "Query.h"
00043 #include "gnc-engine.h"
00044 #include "gnc-ui-util.h"
00045 
00046 #define GCONF_SECTION "dialogs/import/generic_matcher"
00047 #define BAYES_OPTION  "use_bayes"
00048 
00049 /********************************************************************\
00050  *   Constants                                                      *
00051 \********************************************************************/
00052 
00053 static QofLogModule log_module = GNC_MOD_IMPORT;
00054 
00055 /********************************************************************\
00056  *   Constants, should ideally be defined a user preference dialog  *
00057 \********************************************************************/
00058 
00059 static const int MATCH_DATE_THRESHOLD = 4; /*within 4 days*/
00060 static const int MATCH_DATE_NOT_THRESHOLD = 14;
00061 
00062 /********************************************************************\
00063  *   Forward declared prototypes                                    *
00064 \********************************************************************/
00065 
00066 static void
00067 matchmap_store_destination (GncImportMatchMap *matchmap,
00068                             GNCImportTransInfo *trans_info,
00069                             gboolean use_match);
00070 
00071 
00072 /********************************************************************\
00073  *               Structures passed between the functions            *
00074 \********************************************************************/
00075 
00076 struct _transactioninfo
00077 {
00078     Transaction * trans;
00079     Split * first_split;
00080 
00081     /* GList of GNCImportMatchInfo's, one for each possible duplicate match. */
00082     GList * match_list;
00083     GNCImportMatchInfo * selected_match_info;
00084     gboolean match_selected_manually;
00085 
00086     GNCImportAction action;
00087     GNCImportAction previous_action;
00088 
00089     /* A list of tokenized strings to use for bayesian matching purposes */
00090     GList * match_tokens;
00091 
00092     /* In case of a single destination account it is stored here. */
00093     Account *dest_acc;
00094     gboolean dest_acc_selected_manually;
00095 
00096     /* Reference id to link gnc transaction to external object. E.g. aqbanking job id. */
00097     guint32 ref_id;
00098 };
00099 
00100 struct _matchinfo
00101 {
00102     Transaction * trans;
00103     Split * split;
00104     /*GNC_match_probability probability;*/
00105     gint probability;
00106     gboolean update_proposed;
00107 };
00108 
00109 /* Some simple getters and setters for the above data types. */
00110 
00111 GList *
00112 gnc_import_TransInfo_get_match_list (const GNCImportTransInfo *info)
00113 {
00114     g_assert (info);
00115     return info->match_list;
00116 }
00117 
00118 Transaction *
00119 gnc_import_TransInfo_get_trans (const GNCImportTransInfo *info)
00120 {
00121     g_assert (info);
00122     return info->trans;
00123 }
00124 
00125 gboolean
00126 gnc_import_TransInfo_is_balanced (const GNCImportTransInfo *info)
00127 {
00128     g_assert (info);
00129     /* Assume that the importer won't create a transaction that involves two or more
00130        currencies and no non-currency commodity.  In that case can use the simpler
00131        value imbalance check. */
00132     if (gnc_numeric_zero_p(xaccTransGetImbalanceValue(gnc_import_TransInfo_get_trans(info))))
00133     {
00134         return TRUE;
00135     }
00136     else
00137     {
00138         return FALSE;
00139     }
00140 }
00141 
00142 Split *
00143 gnc_import_TransInfo_get_fsplit (const GNCImportTransInfo *info)
00144 {
00145     g_assert (info);
00146     return info->first_split;
00147 }
00148 
00149 GNCImportMatchInfo *
00150 gnc_import_TransInfo_get_selected_match (const GNCImportTransInfo *info)
00151 {
00152     g_assert (info);
00153     return info->selected_match_info;
00154 }
00155 
00156 void
00157 gnc_import_TransInfo_set_selected_match (GNCImportTransInfo *info,
00158         GNCImportMatchInfo *match,
00159         gboolean selected_manually)
00160 {
00161     g_assert (info);
00162     info->selected_match_info = match;
00163     info->match_selected_manually = selected_manually;
00164 }
00165 
00166 gboolean
00167 gnc_import_TransInfo_get_match_selected_manually (const GNCImportTransInfo *info)
00168 {
00169     g_assert (info);
00170     return info->match_selected_manually;
00171 }
00172 
00173 GNCImportAction
00174 gnc_import_TransInfo_get_action (const GNCImportTransInfo *info)
00175 {
00176     g_assert (info);
00177     return info->action;
00178 }
00179 
00180 void
00181 gnc_import_TransInfo_set_action (GNCImportTransInfo *info,
00182                                  GNCImportAction action)
00183 {
00184     g_assert (info);
00185     if (action != info->action)
00186     {
00187         info->previous_action = info->action;
00188         info->action = action;
00189     }
00190 }
00191 
00192 Account *
00193 gnc_import_TransInfo_get_destacc (const GNCImportTransInfo *info)
00194 {
00195     g_assert (info);
00196     return info->dest_acc;
00197 }
00198 void gnc_import_TransInfo_set_destacc (GNCImportTransInfo *info,
00199                                        Account *acc,
00200                                        gboolean selected_manually)
00201 {
00202     g_assert (info);
00203     info->dest_acc = acc;
00204     info->dest_acc_selected_manually = selected_manually;
00205 
00206     /* Store the mapping to the other account in the MatchMap. */
00207     if (selected_manually)
00208     {
00209         matchmap_store_destination (NULL, info, FALSE);
00210     }
00211 }
00212 
00213 gboolean
00214 gnc_import_TransInfo_get_destacc_selected_manually (const GNCImportTransInfo *info)
00215 {
00216     g_assert (info);
00217     return info->dest_acc_selected_manually;
00218 }
00219 
00220 guint32
00221 gnc_import_TransInfo_get_ref_id (const GNCImportTransInfo *info)
00222 {
00223     g_assert (info);
00224     return info->ref_id;
00225 }
00226 
00227 void
00228 gnc_import_TransInfo_set_ref_id (GNCImportTransInfo *info,
00229                                  guint32 ref_id)
00230 {
00231     g_assert (info);
00232     info->ref_id = ref_id;
00233 }
00234 
00235 
00236 Split *
00237 gnc_import_MatchInfo_get_split (const GNCImportMatchInfo * info)
00238 {
00239     g_assert (info);
00240     return info->split;
00241 }
00242 
00243 gint
00244 gnc_import_MatchInfo_get_probability (const GNCImportMatchInfo * info)
00245 {
00246     if (info)
00247     {
00248         return info->probability;
00249     }
00250     else
00251     {
00252         return 0;
00253     }
00254 }
00255 
00256 void gnc_import_TransInfo_delete (GNCImportTransInfo *info)
00257 {
00258     if (info)
00259     {
00260         g_list_free (info->match_list);
00261         /*If the transaction exists and is still open, it must be destroyed*/
00262         if (info->trans && xaccTransIsOpen(info->trans))
00263         {
00264             xaccTransDestroy(info->trans);
00265             xaccTransCommitEdit(info->trans);
00266         }
00267         if (info->match_tokens)
00268         {
00269             GList *node;
00270 
00271             for (node = info->match_tokens; node; node = node->next)
00272                 g_free (node->data);
00273 
00274             g_list_free (info->match_tokens);
00275         }
00276         g_free(info);
00277     }
00278 }
00279 
00280 GdkPixbuf* gen_probability_pixbuf(gint score_original, GNCImportSettings *settings, GtkWidget * widget)
00281 {
00282     GdkPixbuf* retval = NULL;
00283     gint i, j;
00284     gint score;
00285     const gint height = 15;
00286     const gint width_each_bar = 7;
00287     gchar * green_bar = ("bggggb ");
00288     gchar * yellow_bar = ("byyyyb ");
00289     gchar * red_bar = ("brrrrb ");
00290     gchar * black_bar = ("bbbbbb ");
00291     const gint width_first_bar = 1;
00292     gchar * black_first_bar = ("b");
00293     const gint num_colors = 5;
00294     gchar * size_str;
00295     gchar * none_color_str = g_strdup_printf("  c None");
00296     gchar * green_color_str = g_strdup_printf("g c green");
00297     gchar * yellow_color_str = g_strdup_printf("y c yellow");
00298     gchar * red_color_str = g_strdup_printf("r c red");
00299     gchar * black_color_str = g_strdup_printf("b c black");
00300     gchar * xpm[2+num_colors+height];
00301     gint add_threshold, clear_threshold;
00302 
00303     g_assert(settings);
00304     g_assert(widget);
00305     if (score_original < 0)
00306     {
00307         score = 0;
00308     }
00309     else
00310     {
00311         score = score_original;
00312     }
00313     size_str = g_strdup_printf("%d%s%d%s%d%s", (width_each_bar * score) + width_first_bar/*width*/, " ", height, " ", num_colors, " 1"/*characters per pixel*/);
00314 
00315     /*DEBUG("Begin");*/
00316     xpm[0] = size_str;
00317     xpm[1] = none_color_str;
00318     xpm[2] = green_color_str;
00319     xpm[3] = yellow_color_str;
00320     xpm[4] = red_color_str;
00321     xpm[5] = black_color_str;
00322     add_threshold = gnc_import_Settings_get_add_threshold(settings);
00323     clear_threshold = gnc_import_Settings_get_clear_threshold(settings);
00324 
00325     for (i = 0; i < height; i++)
00326     {
00327         xpm[num_colors+1+i] = g_new0(char, (width_each_bar * score) + width_first_bar + 1);
00328         for (j = 0; j <= score; j++)
00329         {
00330             if (i == 0 || i == height - 1)
00331             {
00332                 if (j == 0)
00333                 {
00334                     strcat(xpm[num_colors+1+i], black_first_bar);
00335                 }
00336                 else
00337                 {
00338                     strcat(xpm[num_colors+1+i], black_bar);
00339                 }
00340             }
00341             else
00342             {
00343                 if (j == 0)
00344                 {
00345                     strcat(xpm[num_colors+1+i], black_first_bar);
00346                 }
00347                 else if (j <= add_threshold)
00348                 {
00349                     strcat(xpm[num_colors+1+i], red_bar);
00350                 }
00351                 else if (j >= clear_threshold)
00352                 {
00353                     strcat(xpm[num_colors+1+i], green_bar);
00354                 }
00355                 else
00356                 {
00357                     strcat(xpm[num_colors+1+i], yellow_bar);
00358                 }
00359             }
00360         }
00361     }
00362 
00363     retval =  gdk_pixbuf_new_from_xpm_data((const gchar **)xpm);
00364     for (i = 0; i <= num_colors + height; i++)
00365     {
00366         /*DEBUG("free_loop i=%d%s%s",i,": ",xpm[i]);*/
00367         g_free(xpm[i]);
00368     }
00369 
00370     return retval;
00371 }
00372 
00373 /*************************************************************************
00374  * MatchMap- related functions (storing and retrieving)
00375  */
00376 
00377 /* Tokenize a string and append to an existing GList(or an empty GList)
00378  * the tokens
00379  */
00380 static GList*
00381 tokenize_string(GList* existing_tokens, const char *string)
00382 {
00383     char **tokenized_strings; /* array of strings returned by g_strsplit() */
00384     char **stringpos;
00385 
00386     tokenized_strings = g_strsplit(string, " ", 0);
00387     stringpos = tokenized_strings;
00388 
00389     /* add each token to the token GList */
00390     while (stringpos && *stringpos)
00391     {
00392         /* prepend the char* to the token GList */
00393         existing_tokens = g_list_prepend(existing_tokens, g_strdup(*stringpos));
00394 
00395         /* then move to the next string */
00396         stringpos++;
00397     }
00398 
00399     /* free up the strings that g_strsplit() created */
00400     g_strfreev(tokenized_strings);
00401 
00402     return existing_tokens;
00403 }
00404 
00405 /* create and return a list of tokens for a given transaction info. */
00406 static GList*
00407 TransactionGetTokens(GNCImportTransInfo *info)
00408 {
00409     Transaction* transaction;
00410     GList* tokens;
00411     const char* text;
00412     time_t transtime;
00413     struct tm *tm_struct;
00414     char local_day_of_week[16];
00415     Split* split;
00416     int split_index;
00417 
00418     g_return_val_if_fail (info, NULL);
00419     if (info->match_tokens) return info->match_tokens;
00420 
00421     transaction = gnc_import_TransInfo_get_trans(info);
00422     g_assert(transaction);
00423 
00424     tokens = 0; /* start off with an empty list */
00425 
00426     /* make tokens from the transaction description */
00427     text = xaccTransGetDescription(transaction);
00428     tokens = tokenize_string(tokens, text);
00429 
00430     /* The day of week the transaction occured is a good indicator of
00431      * what account this transaction belongs in.  Get the date and covert
00432      * it to day of week as a token
00433      */
00434     transtime = xaccTransGetDate(transaction);
00435     tm_struct = gmtime(&transtime);
00436     if (!qof_strftime(local_day_of_week, sizeof(local_day_of_week), "%A", tm_struct))
00437     {
00438         PERR("TransactionGetTokens: error, strftime failed\n");
00439     }
00440 
00441     /* we cannot add a locally allocated string to this array, dup it so
00442      * it frees the same way the rest do
00443      */
00444     tokens = g_list_prepend(tokens, g_strdup(local_day_of_week));
00445 
00446     /* make tokens from the memo of each split of this transaction */
00447     split_index = 0;
00448     while ((split = xaccTransGetSplit(transaction, split_index)))
00449     {
00450         text = xaccSplitGetMemo(split);
00451         tokens = tokenize_string(tokens, text);
00452         split_index++; /* next split */
00453     }
00454 
00455     /* remember the list of tokens for later.. */
00456     info->match_tokens = tokens;
00457 
00458     /* return the pointer to the GList */
00459     return tokens;
00460 }
00461 
00462 /* searches using the GNCImportTransInfo through all existing transactions
00463  * if there is an exact match of the description and memo
00464  */
00465 static Account *
00466 matchmap_find_destination (GncImportMatchMap *matchmap, GNCImportTransInfo *info)
00467 {
00468     GncImportMatchMap *tmp_map;
00469     Account *result;
00470     GList* tokens;
00471     gboolean useBayes;
00472 
00473     g_assert (info);
00474     tmp_map = ((matchmap != NULL) ? matchmap :
00475                gnc_imap_create_from_account
00476                (xaccSplitGetAccount
00477                 (gnc_import_TransInfo_get_fsplit (info))));
00478 
00479     useBayes = gnc_gconf_get_bool(GCONF_SECTION, BAYES_OPTION, NULL);
00480     if (useBayes)
00481     {
00482         /* get the tokens for this transaction* */
00483         tokens = TransactionGetTokens(info);
00484 
00485         /* try to find the destination account for this transaction from its tokens */
00486         result = gnc_imap_find_account_bayes(tmp_map, tokens);
00487 
00488     }
00489     else
00490     {
00491         /* old system of transaction to account matching */
00492         result = gnc_imap_find_account
00493                  (tmp_map, GNCIMPORT_DESC,
00494                   xaccTransGetDescription (gnc_import_TransInfo_get_trans (info)));
00495     }
00496 
00497     /* Disable matching by memo, until bayesian filtering is implemented.
00498      * It's currently unlikely to help, and has adverse effects,
00499      * causing false positives, since very often the type of the
00500      * transaction is stored there.
00501 
00502        if (result == NULL)
00503        result = gnc_imap_find_account
00504        (tmp_map, GNCIMPORT_MEMO,
00505        xaccSplitGetMemo (gnc_import_TransInfo_get_fsplit (info)));
00506     */
00507 
00508     if (matchmap == NULL)
00509         gnc_imap_destroy (tmp_map);
00510 
00511     return result;
00512 }
00513 
00518 static void
00519 matchmap_store_destination (GncImportMatchMap *matchmap,
00520                             GNCImportTransInfo *trans_info,
00521                             gboolean use_match)
00522 {
00523     GncImportMatchMap *tmp_matchmap = NULL;
00524     Account *dest;
00525     const char *descr, *memo;
00526     GList *tokens;
00527     gboolean useBayes;
00528 
00529     g_assert (trans_info);
00530 
00531     /* This will store the destination account of the selected match if
00532        the reconcile match selected has only two splits.  Good idea
00533        Christian! */
00534     dest = ((use_match) ?
00535             xaccSplitGetAccount
00536             (xaccSplitGetOtherSplit
00537              (gnc_import_MatchInfo_get_split
00538               (gnc_import_TransInfo_get_selected_match (trans_info)))) :
00539             gnc_import_TransInfo_get_destacc (trans_info));
00540     if (dest == NULL)
00541         return;
00542 
00543     tmp_matchmap = ((matchmap != NULL) ?
00544                     matchmap :
00545                     gnc_imap_create_from_account
00546                     (xaccSplitGetAccount
00547                      (gnc_import_TransInfo_get_fsplit (trans_info))));
00548 
00549     /* see what matching system we are currently using */
00550     useBayes = gnc_gconf_get_bool(GCONF_SECTION, BAYES_OPTION, NULL);
00551     if (useBayes)
00552     {
00553         /* tokenize this transaction */
00554         tokens = TransactionGetTokens(trans_info);
00555 
00556         /* add the tokens to the imap with the given destination account */
00557         gnc_imap_add_account_bayes(tmp_matchmap, tokens, dest);
00558 
00559     }
00560     else
00561     {
00562         /* old matching system */
00563         descr = xaccTransGetDescription
00564                 (gnc_import_TransInfo_get_trans (trans_info));
00565         if (descr && (strlen (descr) > 0))
00566             gnc_imap_add_account (tmp_matchmap,
00567                                   GNCIMPORT_DESC,
00568                                   descr,
00569                                   dest);
00570         memo = xaccSplitGetMemo
00571                (gnc_import_TransInfo_get_fsplit (trans_info));
00572         if (memo && (strlen (memo) > 0))
00573             gnc_imap_add_account (tmp_matchmap,
00574                                   GNCIMPORT_MEMO,
00575                                   memo,
00576                                   dest);
00577     } /* if(useBayes) */
00578 
00579     if (matchmap == NULL)
00580         gnc_imap_destroy (tmp_matchmap);
00581 }
00582 
00583 
00584 
00587 static void split_find_match (GNCImportTransInfo * trans_info,
00588                               Split * split,
00589                               gint display_threshold,
00590                               double fuzzy_amount_difference)
00591 {
00592     /* DEBUG("Begin"); */
00593 
00594     /*Ignore the split if the transaction is open for edit, meaning it
00595       was just downloaded. */
00596     if (xaccTransIsOpen(xaccSplitGetParent(split)) == FALSE)
00597     {
00598         GNCImportMatchInfo * match_info;
00599         gint prob = 0;
00600         gboolean update_proposed;
00601         double downloaded_split_amount, match_split_amount;
00602         time_t match_time, download_time;
00603         int datediff_day;
00604         Transaction *new_trans = gnc_import_TransInfo_get_trans (trans_info);
00605         Split *new_trans_fsplit = gnc_import_TransInfo_get_fsplit (trans_info);
00606 
00607         /* Matching heuristics */
00608 
00609         /* Amount heuristics */
00610         downloaded_split_amount =
00611             gnc_numeric_to_double (xaccSplitGetAmount(new_trans_fsplit));
00612         /*DEBUG(" downloaded_split_amount=%f", downloaded_split_amount);*/
00613         match_split_amount = gnc_numeric_to_double(xaccSplitGetAmount(split));
00614         /*DEBUG(" match_split_amount=%f", match_split_amount);*/
00615         if (fabs(downloaded_split_amount - match_split_amount) < 1e-6)
00616             /* bug#347791: Double type shouldn't be compared for exact
00617                equality, so we're using fabs() instead. */
00618             /*if (gnc_numeric_equal(xaccSplitGetAmount
00619               (new_trans_fsplit),
00620               xaccSplitGetAmount(split)))
00621               -- gnc_numeric_equal is an expensive function call */
00622         {
00623             prob = prob + 3;
00624             /*DEBUG("heuristics:  probability + 3 (amount)");*/
00625         }
00626         else if (fabs (downloaded_split_amount - match_split_amount) <=
00627                  fuzzy_amount_difference)
00628         {
00629             /* ATM fees are sometimes added directly in the transaction.
00630                So you withdraw 100$ and get charged 101,25$ in the same
00631                transaction */
00632             prob = prob + 2;
00633             /*DEBUG("heuristics:  probability + 2 (amount)");*/
00634         }
00635         else
00636         {
00637             /* If a transaction's amount doesn't match within the
00638                threshold, it's very unlikely to be the same transaction
00639                so we give it an extra -5 penality */
00640             prob = prob - 5;
00641             /* DEBUG("heuristics:  probability - 1 (amount)"); */
00642         }
00643 
00644         /* Date heuristics */
00645         match_time = xaccTransGetDate (xaccSplitGetParent (split));
00646         download_time = xaccTransGetDate (new_trans);
00647         datediff_day = abs(match_time - download_time) / 86400;
00648         /* Sorry, there are not really functions around at all that
00649                  provide for less hacky calculation of days of date
00650                  differences. Whatever. On the other hand, the difference
00651                  calculation itself will work regardless of month/year
00652                  turnarounds. */
00653         /*DEBUG("diff day %d", datediff_day);*/
00654         if (datediff_day == 0)
00655         {
00656             prob = prob + 3;
00657             /*DEBUG("heuristics:  probability + 3 (date)");*/
00658         }
00659         else if (datediff_day <= MATCH_DATE_THRESHOLD)
00660         {
00661             prob = prob + 2;
00662             /*DEBUG("heuristics:  probability + 2 (date)");*/
00663         }
00664         else if (datediff_day > MATCH_DATE_NOT_THRESHOLD)
00665         {
00666             /* Extra penalty if that split lies awfully far away from
00667                the given one. */
00668             prob = prob - 5;
00669             /*DEBUG("heuristics:  probability - 5 (date)"); */
00670             /* Changed 2005-02-21: Revert the hard-limiting behaviour
00671                back to the previous large penalty. (Changed 2004-11-27:
00672                The penalty is so high that we can forget about this
00673                split anyway and skip the rest of the tests.) */
00674         }
00675 
00676         /* Check if date and amount are identical */
00677         update_proposed = (prob < 6);
00678 
00679         /* Check number heuristics */
00680         {
00681             const char *new_trans_str = xaccTransGetNum(new_trans);
00682             if (new_trans_str && strlen(new_trans_str) != 0)
00683             {
00684                 long new_trans_number, split_number;
00685                 const gchar *split_str;
00686                 char *endptr;
00687                 gboolean conversion_ok = TRUE;
00688 
00689                 /* To distinguish success/failure after strtol call */
00690                 errno = 0;
00691                 new_trans_number = strtol(new_trans_str, &endptr, 10);
00692                 /* Possible addressed problems: over/underflow, only non
00693                              numbers on string and string empty */
00694                 if (errno || endptr == new_trans_str)
00695                     conversion_ok = FALSE;
00696 
00697                 split_str = xaccTransGetNum (xaccSplitGetParent (split));
00698                 errno = 0;
00699                 split_number = strtol(split_str, &endptr, 10);
00700                 if (errno || endptr == split_str)
00701                     conversion_ok = FALSE;
00702 
00703                 if ( (conversion_ok && (split_number == new_trans_number)) ||
00704                         (safe_strcmp(new_trans_str, split_str) == 0) )
00705                 {
00706                     /* An exact match of the Check number gives a +4 */
00707                     prob += 4;
00708                     /*DEBUG("heuristics:  probability + 4 (Check number)");*/
00709                 }
00710                 else if (strlen(new_trans_str) > 0 && strlen(split_str) > 0)
00711                 {
00712                     /* If both number are not empty yet do not match, add a
00713                                  little extra penality */
00714                     prob -= 2;
00715                 }
00716             }
00717         }
00718 
00719         /* Memo heuristics */
00720         {
00721             const char *memo = xaccSplitGetMemo(new_trans_fsplit);
00722             if (memo && strlen(memo) != 0)
00723             {
00724                 if (safe_strcasecmp(memo, xaccSplitGetMemo(split)) == 0)
00725                 {
00726                     /* An exact match of memo gives a +2 */
00727                     prob = prob + 2;
00728                     /* DEBUG("heuristics:  probability + 2 (memo)"); */
00729                 }
00730                 else if ((strncasecmp(memo, xaccSplitGetMemo(split),
00731                                       strlen(xaccSplitGetMemo(split)) / 2)
00732                           == 0))
00733                 {
00734                     /* Very primitive fuzzy match worth +1.  This matches the
00735                                  first 50% of the strings to skip annoying transaction
00736                                  number some banks seem to include in the memo but someone
00737                                  should write something more sophisticated */
00738                     prob = prob + 1;
00739                     /*DEBUG("heuristics:  probability + 1 (memo)");     */
00740                 }
00741             }
00742         }
00743 
00744         /* Description heuristics */
00745         {
00746             const char *descr = xaccTransGetDescription(new_trans);
00747             if (descr && strlen(descr) != 0)
00748             {
00749                 if (safe_strcasecmp(descr,
00750                                     xaccTransGetDescription(xaccSplitGetParent(split)))
00751                         == 0)
00752                 {
00753                     /*An exact match of Description gives a +2 */
00754                     prob = prob + 2;
00755                     /*DEBUG("heuristics:  probability + 2 (description)");*/
00756                 }
00757                 else if ((strncasecmp(descr,
00758                                       xaccTransGetDescription (xaccSplitGetParent(split)),
00759                                       strlen(xaccTransGetDescription (new_trans)) / 2)
00760                           == 0))
00761                 {
00762                     /* Very primitive fuzzy match worth +1.  This matches the
00763                                  first 50% of the strings to skip annoying transaction
00764                                  number some banks seem to include in the memo but someone
00765                                  should write something more sophisticated */
00766                     prob = prob + 1;
00767                     /*DEBUG("heuristics:  probability + 1 (description)");      */
00768                 }
00769             }
00770         }
00771 
00772         /* Is the probability high enough? Otherwise do nothing and return. */
00773         if (prob < display_threshold)
00774         {
00775             return;
00776         }
00777 
00778         /* The probability is high enough, so allocate an object
00779                  here. Allocating it only when it's actually being used is
00780                  probably quite some performance gain. */
00781         match_info = g_new0(GNCImportMatchInfo, 1);
00782 
00783         match_info->probability = prob;
00784         match_info->update_proposed = update_proposed;
00785         match_info->split = split;
00786         match_info->trans = xaccSplitGetParent(split);
00787 
00788 
00789         /* Append that to the list. Do not use g_list_append because
00790                    it is slow. The list is sorted afterwards anyway. */
00791         trans_info->match_list =
00792             g_list_prepend(trans_info->match_list,
00793                            match_info);
00794     }
00795 }/* end split_find_match */
00796 
00797 
00800 void gnc_import_find_split_matches(GNCImportTransInfo *trans_info,
00801                                    gint process_threshold,
00802                                    double fuzzy_amount_difference,
00803                                    gint match_date_hardlimit)
00804 {
00805     GList * list_element;
00806     Query *query = qof_query_create_for(GNC_ID_SPLIT);
00807     g_assert (trans_info);
00808 
00809     /* Get list of splits of the originating account. */
00810     {
00811         /* We used to traverse *all* splits of the account by using
00812            xaccAccountGetSplitList, which is a bad idea because 90% of these
00813            splits are outside the date range that is interesting. We should
00814            rather use a query according to the date region, which is
00815            implemented here.
00816         */
00817         Account *importaccount =
00818             xaccSplitGetAccount (gnc_import_TransInfo_get_fsplit (trans_info));
00819         time_t download_time = xaccTransGetDate (gnc_import_TransInfo_get_trans (trans_info));
00820 
00821         qof_query_set_book (query, gnc_get_current_book());
00822         xaccQueryAddSingleAccountMatch (query, importaccount,
00823                                         QOF_QUERY_AND);
00824         xaccQueryAddDateMatchTT (query,
00825                                  TRUE, download_time - match_date_hardlimit * 86400,
00826                                  TRUE, download_time + match_date_hardlimit * 86400,
00827                                  QOF_QUERY_AND);
00828         list_element = qof_query_run (query);
00829         /* Sigh. Doesnt help too much. We still create and run one query
00830            for each imported transaction. Maybe it would improve
00831            performance further if there is one single (master-)query at
00832            the beginning, matching the full date range and all accounts in
00833            question. However, this doesnt quite work because this function
00834            here is called from each gnc_gen_trans_list_add_trans(), which
00835            is called one at a time. Therefore the whole importer would
00836            have to change its behaviour: Accept the imported txns via
00837            gnc_gen_trans_list_add_trans(), and only when
00838            gnc_gen_trans_list_run() is called, then calculate all the
00839            different match candidates. That's too much work for now.
00840         */
00841     }
00842 
00843     /* Traverse that list, calling split_find_match on each one. Note
00844        that xaccAccountForEachSplit is declared in Account.h but
00845        implemented nowhere :-( */
00846     while (list_element != NULL)
00847     {
00848         split_find_match (trans_info, list_element->data,
00849                           process_threshold, fuzzy_amount_difference);
00850         list_element = g_list_next (list_element);
00851     }
00852 
00853     qof_query_destroy (query);
00854 }
00855 
00856 
00857 /***********************************************************************
00858  */
00859 
00862 gboolean
00863 gnc_import_process_trans_item (GncImportMatchMap *matchmap,
00864                                GNCImportTransInfo *trans_info)
00865 {
00866     Split * other_split;
00867     gnc_numeric imbalance_value;
00868 
00869     /* DEBUG("Begin"); */
00870 
00871     g_assert (trans_info);
00872     /*DEBUG("Iteration %d, action %d, split %s", i,
00873         trans_info->action,
00874         xaccTransGetDescription (gnc_import_TransInfo_get_trans
00875         (trans_info)))*/
00876     switch (gnc_import_TransInfo_get_action (trans_info))
00877     {
00878     case GNCImport_SKIP:
00879         return FALSE;
00880     case GNCImport_ADD:
00881         /* Transaction gets imported. */
00882 
00883         /* Is the transaction not balanced and there is a non-NULL destination account? */
00884         if (gnc_import_TransInfo_is_balanced(trans_info) == FALSE
00885                 && gnc_import_TransInfo_get_destacc(trans_info) != NULL)
00886         {
00887             /* Create the 'other' split. */
00888             Split *split =
00889                 xaccMallocSplit
00890                 (gnc_account_get_book
00891                  (gnc_import_TransInfo_get_destacc (trans_info)));
00892             xaccTransAppendSplit
00893             (gnc_import_TransInfo_get_trans (trans_info), split);
00894             xaccAccountInsertSplit
00895             (gnc_import_TransInfo_get_destacc (trans_info), split);
00896             /*xaccSplitSetBaseValue
00897               (split,
00898                gnc_numeric_neg(xaccTransGetImbalance
00899                        (gnc_import_TransInfo_get_trans (trans_info))),
00900                xaccTransGetCurrency
00901                (gnc_import_TransInfo_get_trans (trans_info)));*/
00902             {
00903                 /* This is a quick workaround for the bug described in
00904                                  http://gnucash.org/pipermail/gnucash-devel/2003-August/009982.html
00905                        Assume that importers won't create transactions involving two or more
00906                        currencies so we can use xaccTransGetImbalanceValue. */
00907                 imbalance_value =
00908                     gnc_numeric_neg (xaccTransGetImbalanceValue
00909                                      (gnc_import_TransInfo_get_trans (trans_info)));
00910                 xaccSplitSetValue (split, imbalance_value);
00911                 xaccSplitSetAmount (split, imbalance_value);
00912             }
00913             /*xaccSplitSetMemo (split, _("Auto-Balance split"));
00914               -- disabled due to popular request */
00915         }
00916 
00917         xaccSplitSetReconcile(gnc_import_TransInfo_get_fsplit (trans_info), CREC);
00918         /*Set reconcile date to today*/
00919         xaccSplitSetDateReconciledSecs(gnc_import_TransInfo_get_fsplit (trans_info),
00920                                        time(NULL));
00921         /* Done editing. */
00922         xaccTransCommitEdit(gnc_import_TransInfo_get_trans (trans_info));
00923         return TRUE;
00924     case GNCImport_UPDATE:
00925     {
00926         GNCImportMatchInfo *selected_match =
00927             gnc_import_TransInfo_get_selected_match(trans_info);
00928 
00929         /* If there is no selection, ignore this transaction. */
00930         if (!selected_match)
00931         {
00932             PWARN("No matching translaction to be cleared was chosen. Imported transaction will be ignored.");
00933             break;
00934         }
00935 
00936         /* Transaction gets not imported but the matching one gets
00937            updated and reconciled. */
00938         if (gnc_import_MatchInfo_get_split(selected_match) == NULL)
00939         {
00940             PERR("The split I am trying to update and reconcile is NULL, shouldn't happen!");
00941         }
00942         else
00943         {
00944             /* Update and reconcile the matching transaction */
00945             /*DEBUG("BeginEdit selected_match")*/
00946             xaccTransBeginEdit(selected_match->trans);
00947 
00948             xaccTransSetDatePostedSecs(selected_match->trans,
00949                                        xaccTransGetDate(xaccSplitGetParent(
00950                                                    gnc_import_TransInfo_get_fsplit(trans_info))));
00951 
00952             xaccSplitSetAmount(selected_match->split,
00953                                xaccSplitGetAmount(
00954                                    gnc_import_TransInfo_get_fsplit(trans_info)));
00955             xaccSplitSetValue(selected_match->split,
00956                               xaccSplitGetValue(
00957                                   gnc_import_TransInfo_get_fsplit(trans_info)));
00958 
00959             imbalance_value = xaccTransGetImbalanceValue(
00960                                   gnc_import_TransInfo_get_trans(trans_info));
00961             other_split = xaccSplitGetOtherSplit(selected_match->split);
00962             if (!gnc_numeric_zero_p(imbalance_value) && other_split)
00963             {
00964                 if (xaccSplitGetReconcile(other_split) == NREC)
00965                 {
00966                     imbalance_value = gnc_numeric_neg(imbalance_value);
00967                     xaccSplitSetValue(other_split, imbalance_value);
00968                     xaccSplitSetAmount(other_split, imbalance_value);
00969                 }
00970                 /* else GC will automatically insert a split to equity
00971                    to balance the transaction */
00972             }
00973 
00974             xaccTransSetDescription(selected_match->trans,
00975                                     xaccTransGetDescription(
00976                                         gnc_import_TransInfo_get_trans(trans_info)));
00977 
00978             if (xaccSplitGetReconcile(selected_match->split) == NREC)
00979             {
00980                 xaccSplitSetReconcile(selected_match->split, CREC);
00981             }
00982 
00983             /* Set reconcile date to today */
00984             xaccSplitSetDateReconciledSecs(selected_match->split, time(NULL));
00985 
00986             /* Copy the online id to the reconciled transaction, so
00987                the match will be remembered */
00988             if (gnc_import_split_has_online_id(trans_info->first_split))
00989             {
00990                 gnc_import_set_split_online_id(selected_match->split,
00991                                                gnc_import_get_split_online_id(trans_info->first_split));
00992             }
00993 
00994             /* Done editing. */
00995             /*DEBUG("CommitEdit selected_match")*/
00996             xaccTransCommitEdit(selected_match->trans);
00997 
00998             /* Store the mapping to the other account in the MatchMap. */
00999             matchmap_store_destination(matchmap, trans_info, TRUE);
01000 
01001             /* Erase the downloaded transaction */
01002             xaccTransDestroy(trans_info->trans);
01003             /*DEBUG("CommitEdit trans")*/
01004             xaccTransCommitEdit(trans_info->trans);
01005             /* Very important: Make sure the freed transaction is not freed again! */
01006             trans_info->trans = NULL;
01007         }
01008     }
01009     return TRUE;
01010     case GNCImport_CLEAR:
01011     {
01012         GNCImportMatchInfo *selected_match =
01013             gnc_import_TransInfo_get_selected_match (trans_info);
01014 
01015         /* If there is no selection, ignore this transaction. */
01016         if (!selected_match)
01017         {
01018             PWARN("No matching translaction to be cleared was chosen. Imported transaction will be ignored.");
01019             break;
01020         }
01021 
01022         /* Transaction gets not imported but the matching one gets
01023            reconciled. */
01024         if (gnc_import_MatchInfo_get_split (selected_match) == NULL)
01025         {
01026             PERR("The split I am trying to reconcile is NULL, shouldn't happen!");
01027         }
01028         else
01029         {
01030             /* Reconcile the matching transaction */
01031             /*DEBUG("BeginEdit selected_match")*/
01032             xaccTransBeginEdit(selected_match->trans);
01033 
01034             if (xaccSplitGetReconcile
01035                     (selected_match->split) == NREC)
01036                 xaccSplitSetReconcile
01037                 (selected_match->split, CREC);
01038             /* Set reconcile date to today */
01039             xaccSplitSetDateReconciledSecs
01040             (selected_match->split, time(NULL));
01041 
01042             /* Copy the online id to the reconciled transaction, so
01043                          the match will be remembered */
01044             if (gnc_import_split_has_online_id(trans_info->first_split))
01045                 gnc_import_set_split_online_id
01046                 (selected_match->split,
01047                  gnc_import_get_split_online_id(trans_info->first_split));
01048 
01049             /* Done editing. */
01050             /*DEBUG("CommitEdit selected_match")*/
01051             xaccTransCommitEdit
01052             (selected_match->trans);
01053 
01054             /* Store the mapping to the other account in the MatchMap. */
01055             matchmap_store_destination (matchmap, trans_info, TRUE);
01056 
01057             /* Erase the downloaded transaction */
01058             xaccTransDestroy(trans_info->trans);
01059             /*DEBUG("CommitEdit trans")*/
01060             xaccTransCommitEdit(trans_info->trans);
01061             /* Very important: Make sure the freed transaction is not freed again! */
01062             trans_info->trans = NULL;
01063         }
01064     }
01065     return TRUE;
01066     default:
01067         DEBUG("Invalid GNCImportAction for this imported transaction.");
01068     }
01069     /*DEBUG("End");*/
01070     return FALSE;
01071 }
01072 
01073 /********************************************************************\
01074  * check_trans_online_id() Callback function used by
01075  * gnc_import_exists_online_id.  Takes pointers to transaction and split,
01076  * returns 0 if their online_id kvp_frames do NOT match, or if the split
01077  * belongs to the transaction
01078 \********************************************************************/
01079 static gint check_trans_online_id(Transaction *trans1, void *user_data)
01080 {
01081     Account *account;
01082     Split *split1;
01083     Split *split2 = user_data;
01084     const gchar *online_id1;
01085     const gchar *online_id2;
01086 
01087     account = xaccSplitGetAccount(split2);
01088     split1 = xaccTransFindSplitByAccount(trans1, account);
01089     if (split1 == split2)
01090         return 0;
01091 
01092     /* hack - we really want to iterate over the _splits_ of the account
01093        instead of the transactions */
01094     g_assert(split1 != NULL);
01095 
01096     if (gnc_import_split_has_online_id(split1))
01097         online_id1 = gnc_import_get_split_online_id(split1);
01098     else
01099         online_id1 = gnc_import_get_trans_online_id(trans1);
01100 
01101     online_id2 = gnc_import_get_split_online_id(split2);
01102 
01103     if ((online_id1 == NULL) ||
01104             (online_id2 == NULL) ||
01105             (strcmp(online_id1, online_id2) != 0))
01106     {
01107         return 0;
01108     }
01109     else
01110     {
01111         /*printf("test_trans_online_id(): Duplicate found\n");*/
01112         return 1;
01113     }
01114 }
01115 
01118 gboolean gnc_import_exists_online_id (Transaction *trans)
01119 {
01120     int i;
01121     gboolean online_id_exists = FALSE;
01122     Account *dest_acct;
01123     Split *source_split;
01124 
01125     /* Look for an online_id in the first split */
01126     source_split = xaccTransGetSplit(trans, 0);
01127     g_assert(source_split);
01128 
01129     /* DEBUG("%s%d%s","Checking split ",i," for duplicates"); */
01130     dest_acct = xaccSplitGetAccount(source_split);
01131     online_id_exists = xaccAccountForEachTransaction(dest_acct,
01132                        check_trans_online_id,
01133                        source_split);
01134 
01135     /* If it does, abort the process for this transaction, since it is
01136        already in the system. */
01137     if (online_id_exists == TRUE)
01138     {
01139         DEBUG("%s", "Transaction with same online ID exists, destroying current transaction");
01140         xaccTransDestroy(trans);
01141         xaccTransCommitEdit(trans);
01142     }
01143     return online_id_exists;
01144 }
01145 
01146 
01147 /* ******************************************************************
01148  */
01149 
01151 GNCImportTransInfo *
01152 gnc_import_TransInfo_new (Transaction *trans, GncImportMatchMap *matchmap)
01153 {
01154     GNCImportTransInfo *transaction_info;
01155     Split *split;
01156     g_assert (trans);
01157 
01158     transaction_info = g_new0(GNCImportTransInfo, 1);
01159 
01160     transaction_info->trans = trans;
01161     /* Only use first split, the source split */
01162     split = xaccTransGetSplit(trans, 0);
01163     g_assert(split);
01164     transaction_info->first_split = split;
01165 
01166     /* Try to find a previously selected destination account
01167        string match for the ADD action */
01168     gnc_import_TransInfo_set_destacc (transaction_info,
01169                                       matchmap_find_destination (matchmap, transaction_info),
01170                                       FALSE);
01171     return transaction_info;
01172 }
01173 
01174 
01176 static gint compare_probability (gconstpointer a,
01177                                  gconstpointer b)
01178 {
01179     return(((GNCImportMatchInfo *)b)->probability -
01180            ((GNCImportMatchInfo *)a)->probability);
01181 }
01182 
01187 void
01188 gnc_import_TransInfo_init_matches (GNCImportTransInfo *trans_info,
01189                                    GNCImportSettings *settings)
01190 {
01191     GNCImportMatchInfo * best_match = NULL;
01192     g_assert (trans_info);
01193 
01194 
01195     /* Find all split matches in originating account. */
01196     gnc_import_find_split_matches(trans_info,
01197                                   gnc_import_Settings_get_display_threshold (settings),
01198                                   gnc_import_Settings_get_fuzzy_amount (settings),
01199                                   gnc_import_Settings_get_match_date_hardlimit (settings));
01200 
01201     if (trans_info->match_list != NULL)
01202     {
01203         trans_info->match_list = g_list_sort(trans_info->match_list,
01204                                              compare_probability);
01205         best_match = g_list_nth_data(trans_info->match_list, 0);
01206         gnc_import_TransInfo_set_selected_match (trans_info,
01207                 best_match,
01208                 FALSE);
01209         if (best_match != NULL &&
01210                 best_match->probability >= gnc_import_Settings_get_clear_threshold(settings))
01211         {
01212             trans_info->action = GNCImport_CLEAR;
01213             trans_info->selected_match_info = best_match;
01214         }
01215         else if (best_match == NULL ||
01216                  best_match->probability <= gnc_import_Settings_get_add_threshold(settings))
01217         {
01218             trans_info->action = GNCImport_ADD;
01219         }
01220         else if (gnc_import_Settings_get_action_skip_enabled(settings))
01221         {
01222             trans_info->action = GNCImport_SKIP;
01223         }
01224         else if (gnc_import_Settings_get_action_update_enabled(settings))
01225         {
01226             trans_info->action = GNCImport_UPDATE;
01227         }
01228         else
01229         {
01230             trans_info->action = GNCImport_ADD;
01231         }
01232     }
01233     else
01234     {
01235         trans_info->action = GNCImport_ADD;
01236     }
01237     if (best_match &&
01238             trans_info->action == GNCImport_CLEAR &&
01239             gnc_import_Settings_get_action_update_enabled(settings))
01240     {
01241         if (best_match->update_proposed)
01242         {
01243             trans_info->action = GNCImport_UPDATE;
01244         }
01245     }
01246 
01247     trans_info->previous_action = trans_info->action;
01248 }
01249 
01250 
01251 /* Try to automatch a transaction to a destination account if the */
01252 /* transaction hasn't already been manually assigned to another account */
01253 gboolean
01254 gnc_import_TransInfo_refresh_destacc (GNCImportTransInfo *transaction_info,
01255                                       GncImportMatchMap *matchmap)
01256 {
01257     Account *orig_destacc;
01258     Account *new_destacc = NULL;
01259     g_assert(transaction_info);
01260 
01261     orig_destacc = gnc_import_TransInfo_get_destacc(transaction_info);
01262 
01263     /* if we haven't manually selected a destination account for this transaction */
01264     if (gnc_import_TransInfo_get_destacc_selected_manually(transaction_info) == FALSE)
01265     {
01266         /* Try to find the destination account for this transaction based on prior ones */
01267         new_destacc = matchmap_find_destination(matchmap, transaction_info);
01268         gnc_import_TransInfo_set_destacc(transaction_info, new_destacc, FALSE);
01269     }
01270     else
01271     {
01272         new_destacc = orig_destacc;
01273     }
01274 
01275     /* account has changed */
01276     if (new_destacc != orig_destacc)
01277     {
01278         return TRUE;
01279     }
01280     else   /* account is the same */
01281     {
01282         return FALSE;
01283     }
01284 }
01285 
01286 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines