00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00030 #include "config.h"
00031 #include <string.h>
00032 #include <glib.h>
00033 #include "import-match-map.h"
00034 #include "gnc-ui-util.h"
00035 #include "gnc-engine.h"
00036
00037
00038
00039
00040
00041 static QofLogModule log_module = GNC_MOD_IMPORT;
00042
00043
00044 struct _GncImportMatchMap
00045 {
00046 kvp_frame * frame;
00047 Account * acc;
00048 QofBook * book;
00049 };
00050
00051 #define IMAP_FRAME "import-map"
00052 #define IMAP_FRAME_BAYES "import-map-bayes"
00053
00054 static GncImportMatchMap *
00055 gnc_imap_create_from_frame (kvp_frame *frame, Account *acc, QofBook *book)
00056 {
00057 GncImportMatchMap *imap;
00058
00059 g_return_val_if_fail (frame != NULL, NULL);
00060 g_return_val_if_fail ((acc && !book) || (!acc && book), NULL);
00061
00062 imap = g_new0(GncImportMatchMap, 1);
00063 imap->frame = frame;
00064
00065
00066
00067
00068 if (acc)
00069 book = gnc_account_get_book (acc);
00070 imap->acc = acc;
00071 imap->book = book;
00072
00073 return imap;
00074 }
00075
00077 GncImportMatchMap * gnc_imap_create_from_account (Account *acc)
00078 {
00079 kvp_frame * frame;
00080
00081 if (!acc) return NULL;
00082 frame = xaccAccountGetSlots (acc);
00083 g_return_val_if_fail (frame != NULL, NULL);
00084
00085 return gnc_imap_create_from_frame (frame, acc, NULL);
00086 }
00087
00088 GncImportMatchMap * gnc_imap_create_from_book (QofBook *book)
00089 {
00090 kvp_frame * frame;
00091
00092 if (!book) return NULL;
00093 frame = qof_book_get_slots (book);
00094 g_return_val_if_fail (frame != NULL, NULL);
00095
00096 return gnc_imap_create_from_frame (frame, NULL, book);
00097 }
00098
00100 void gnc_imap_destroy (GncImportMatchMap *imap)
00101 {
00102 if (!imap) return;
00103 g_free (imap);
00104 }
00105
00107 void gnc_imap_clear (GncImportMatchMap *imap)
00108 {
00109 if (!imap) return;
00110
00111
00112 kvp_frame_set_slot_path (imap->frame, NULL, IMAP_FRAME);
00113
00114
00115 kvp_frame_set_slot_path (imap->frame, NULL, IMAP_FRAME_BAYES);
00116
00117
00118 }
00119
00121 Account * gnc_imap_find_account (GncImportMatchMap *imap, const char *category,
00122 const char *key)
00123 {
00124 kvp_value *value;
00125 GncGUID * guid;
00126
00127 if (!imap || !key) return NULL;
00128 if (!category)
00129 {
00130 category = key;
00131 key = NULL;
00132 }
00133
00134 value = kvp_frame_get_slot_path (imap->frame, IMAP_FRAME, category, key, NULL);
00135 if (!value) return NULL;
00136
00137 guid = kvp_value_get_guid (value);
00138 return xaccAccountLookup (guid, imap->book);
00139 }
00140
00142 void gnc_imap_add_account (GncImportMatchMap *imap, const char *category,
00143 const char *key, Account *acc)
00144 {
00145 kvp_value *value;
00146
00147 if (!imap || !key || !acc || (strlen (key) == 0)) return;
00148 if (!category)
00149 {
00150 category = key;
00151 key = NULL;
00152 }
00153
00154 value = kvp_value_new_guid (xaccAccountGetGUID (acc));
00155 g_return_if_fail (value != NULL);
00156
00157 kvp_frame_set_slot_path (imap->frame, value, IMAP_FRAME, category, key, NULL);
00158 kvp_value_delete (value);
00159
00160
00161 }
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171 struct account_token_count
00172 {
00173 char* account_name;
00174 gint64 token_count;
00175 };
00176
00180 struct token_accounts_info
00181 {
00182 GList *accounts;
00183 gint64 total_count;
00184 };
00185
00189 static void buildTokenInfo(const char *key, kvp_value *value, gpointer data)
00190 {
00191 struct token_accounts_info *tokenInfo = (struct token_accounts_info*)data;
00192 struct account_token_count* this_account;
00193
00194
00195
00196
00197
00198 tokenInfo->total_count += kvp_value_get_gint64(value);
00199
00200
00201 this_account = (struct account_token_count*)
00202 g_new0(struct account_token_count, 1);
00203
00204
00205 this_account->account_name = (char*)key;
00206 this_account->token_count = kvp_value_get_gint64(value);
00207
00208
00209 tokenInfo->accounts = g_list_prepend(tokenInfo->accounts, this_account);
00210 }
00211
00216 struct account_probability
00217 {
00218 double product;
00219 double product_difference;
00220 };
00221
00226 #define PROBABILITY_FACTOR 100000
00227 static void buildProbabilities(gpointer key, gpointer value, gpointer data)
00228 {
00229 GHashTable *final_probabilities = (GHashTable*)data;
00230 struct account_probability *account_p = (struct account_probability*)value;
00231
00232
00233
00234
00235
00236 gint32 probability =
00237 (account_p->product /
00238 (account_p->product + account_p->product_difference))
00239 * PROBABILITY_FACTOR;
00240
00241 PINFO("P('%s') = '%d'\n", (char*)key, probability);
00242
00243 g_hash_table_insert(final_probabilities, key, GINT_TO_POINTER(probability));
00244 }
00245
00247 static void freeProbabilities(gpointer key, gpointer value, gpointer data)
00248 {
00249
00250
00251
00252 g_free(value);
00253 }
00254
00258 struct account_info
00259 {
00260 char* account_name;
00261 gint32 probability;
00262 };
00263
00270 static void highestProbability(gpointer key, gpointer value, gpointer data)
00271 {
00272 struct account_info *account_i = (struct account_info*)data;
00273
00274
00275 if (GPOINTER_TO_INT(value) > account_i->probability)
00276 {
00277
00278 account_i->probability = GPOINTER_TO_INT(value);
00279 account_i->account_name = key;
00280 }
00281 }
00282
00283
00284 #define threshold (.90 * PROBABILITY_FACTOR)
00285
00287 Account* gnc_imap_find_account_bayes(GncImportMatchMap *imap, GList *tokens)
00288 {
00289 struct token_accounts_info tokenInfo;
00291 GList *current_token;
00293 GList *current_account_token;
00295 struct account_token_count *account_c;
00298 struct account_probability *account_p;
00301 GHashTable *running_probabilities = g_hash_table_new(g_str_hash, g_str_equal);
00302 GHashTable *final_probabilities = g_hash_table_new(g_str_hash, g_str_equal);
00303 struct account_info account_i;
00304 kvp_value* value;
00305 kvp_frame* token_frame;
00306
00307 ENTER(" ");
00308
00309
00310 if (!imap)
00311 {
00312 PINFO("imap is null, returning null");
00313 LEAVE(" ");
00314 return NULL;
00315 }
00316
00317
00318
00319
00320 for (current_token = tokens; current_token; current_token = current_token->next)
00321 {
00322
00323 memset(&tokenInfo, 0, sizeof(struct token_accounts_info));
00324
00325 PINFO("token: '%s'", (char*)current_token->data);
00326
00327
00328
00329
00330
00331 value = kvp_frame_get_slot_path(imap->frame, IMAP_FRAME_BAYES,
00332 (char*)current_token->data, NULL);
00333
00334
00335 if (!value)
00336 continue;
00337
00338
00339
00340
00341 token_frame = kvp_value_get_frame(value);
00342
00343
00344 if (!token_frame)
00345 {
00346 PERR("token '%s' has no accounts", (char*)current_token->data);
00347 continue;
00348 }
00349
00350
00351
00352
00353
00354 kvp_frame_for_each_slot(token_frame, buildTokenInfo, &tokenInfo);
00355
00356
00357
00358
00359 for (current_account_token = tokenInfo.accounts; current_account_token;
00360 current_account_token = current_account_token->next)
00361 {
00362
00363 account_c = (struct account_token_count*)current_account_token->data;
00364
00365 PINFO("account_c->account_name('%s'), "
00366 "account_c->token_count('%ld')/total_count('%ld')",
00367 account_c->account_name, (long)account_c->token_count,
00368 (long)tokenInfo.total_count);
00369
00370 account_p = g_hash_table_lookup(running_probabilities,
00371 account_c->account_name);
00372
00373
00374
00375
00376 if (account_p)
00377 {
00378 account_p->product =
00379 ((double)account_c->token_count / (double)tokenInfo.total_count)
00380 * account_p->product;
00381 account_p->product_difference =
00382 ((double)1 - ((double)account_c->token_count /
00383 (double)tokenInfo.total_count))
00384 * account_p->product_difference;
00385 PINFO("product == %f, product_difference == %f",
00386 account_p->product, account_p->product_difference);
00387 }
00388 else
00389 {
00390
00391 PINFO("adding a new entry for this account");
00392 account_p = (struct account_probability*)
00393 g_new0(struct account_probability, 1);
00394
00395
00396 account_p->product = ((double)account_c->token_count /
00397 (double)tokenInfo.total_count);
00398 account_p->product_difference =
00399 (double)1 - ((double)account_c->token_count /
00400 (double)tokenInfo.total_count);
00401
00402 PINFO("product == %f, product_difference == %f",
00403 account_p->product, account_p->product_difference);
00404
00405
00406
00407 g_hash_table_insert(running_probabilities,
00408 account_c->account_name, account_p);
00409 }
00410 }
00411
00412
00413 for (current_account_token = tokenInfo.accounts; current_account_token;
00414 current_account_token = current_account_token->next)
00415 {
00416
00417 g_free((struct account_token_count*)current_account_token->data);
00418 }
00419
00420 g_list_free(tokenInfo.accounts);
00421 }
00422
00423
00424
00425
00426 g_hash_table_foreach(running_probabilities, buildProbabilities,
00427 final_probabilities);
00428
00429
00430 memset(&account_i, 0, sizeof(struct account_info));
00431 g_hash_table_foreach(final_probabilities, highestProbability, &account_i);
00432
00433
00434 g_hash_table_foreach(running_probabilities, freeProbabilities, NULL);
00435
00436
00437 g_hash_table_destroy(running_probabilities);
00438 g_hash_table_destroy(final_probabilities);
00439
00440 PINFO("highest P('%s') = '%d'",
00441 account_i.account_name ? account_i.account_name : "(null)",
00442 account_i.probability);
00443
00444
00445 if (account_i.probability >= threshold)
00446 {
00447 PINFO("found match");
00448 LEAVE(" ");
00449 return gnc_account_lookup_by_full_name(gnc_book_get_root_account(imap->book),
00450 account_i.account_name);
00451 }
00452
00453 PINFO("no match");
00454 LEAVE(" ");
00455
00456 return NULL;
00457 }
00458
00459
00461 void gnc_imap_add_account_bayes(GncImportMatchMap *imap, GList *tokens, Account *acc)
00462 {
00463 GList *current_token;
00464 kvp_value *value;
00465 gint64 token_count;
00466 char* account_fullname;
00467 kvp_value *new_value;
00468
00469 ENTER(" ");
00470
00471
00472 if (!imap)
00473 {
00474 LEAVE(" ");
00475 return;
00476 }
00477
00478 account_fullname = gnc_account_get_full_name(acc);
00479
00480 PINFO("account name: '%s'\n", account_fullname);
00481
00482
00483 for (current_token = g_list_first(tokens); current_token;
00484 current_token = current_token->next)
00485 {
00486
00487
00488
00489
00490 if (!current_token->data || (*((char*)current_token->data) == '\0'))
00491 continue;
00492
00493
00494 token_count = 0;
00495
00496 PINFO("adding token '%s'\n", (char*)current_token->data);
00497
00498
00499 value = kvp_frame_get_slot_path(imap->frame, IMAP_FRAME_BAYES,
00500 (char*)current_token->data, account_fullname,
00501 NULL);
00502
00503
00504
00505
00506
00507 if (value)
00508 {
00509 PINFO("found existing value of '%ld'\n",
00510 (long)kvp_value_get_gint64(value));
00511
00512
00513 token_count += kvp_value_get_gint64(value);
00514 }
00515
00516
00517 token_count++;
00518
00519
00520 new_value = kvp_value_new_gint64(token_count);
00521
00522
00523
00524
00525 kvp_frame_set_slot_path(imap->frame, new_value, IMAP_FRAME_BAYES,
00526 (char*)current_token->data, account_fullname, NULL);
00527
00528
00529
00530 kvp_value_delete(new_value);
00531 }
00532
00533
00534 g_free(account_fullname);
00535
00536 LEAVE(" ");
00537 }
00538