00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <stdio.h>
00011 #include <stdlib.h>
00012 #include <errno.h>
00013 #include <stdarg.h>
00014 #include <string.h>
00015 #include <math.h>
00016 #include <assert.h>
00017 #include <new>
00018
00019 #include "bsapi.h"
00020 #include "getopt.h"
00021
00022 #ifdef WIN32
00023 #define DIRSEP "\\"
00024 #else
00025 #define DIRSEP "/"
00026 #endif
00027
00028
00029 #define LID_MIN_LEN_TO_PROCESS 2.0f // 2 seconds
00030 #define LID_BELOW_MIN_LEN_TEXT "(too_short)"
00031 #define LID_MAX_WEIGHTS 64
00032 #define LID_OUT_COLUMN_CHARS "snlr"
00033
00034
00035 #define LID_DEF_WAVE_FMT "lin16"
00036 #define LID_DEF_WAVE_EXT "raw"
00037 #define LID_DEF_NCHANNELS 1
00038
00039
00040
00041
00042 class ErrorHandler : public SErrorCallbackI
00043 {
00044 public:
00045 ErrorHandler() : mVerbose(false) {;}
00046 virtual void BSAPI_METHOD OnTextMessage(SUnknownI *pSender, message_type type, unsigned int messageId, const char *pMessage)
00047 {
00048 unsigned int iid = pSender ? pSender->GetIID() : SIID_UNDEFINED;
00049 switch(type)
00050 {
00051 case mtError:
00052 fprintf(stderr, "ERROR: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00053 break;
00054 case mtWarning:
00055 fprintf(stderr, "WARNING: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00056 break;
00057 case mtLog:
00058 LogMessage(pMessage);
00059 break;
00060 }
00061 }
00062
00063 void LogMessage(const char *pMessage, ...)
00064 {
00065 if (mVerbose)
00066 {
00067 va_list ap;
00068 va_start(ap, pMessage);
00069 vfprintf(stderr, pMessage, ap);
00070 fprintf(stderr, "\n");
00071 va_end(ap);
00072 }
00073 }
00074
00075 void SetVerbose(bool verbose) {mVerbose = verbose;}
00076
00077 protected:
00078 bool mVerbose;
00079 } gErrorHandler;
00080
00081 void help()
00082 {
00083 puts("\n Language identification ");
00084 printf(" %s\n", BSAPIVersion());
00085 puts(" ================================================================ ");
00086 puts(" ");
00087 puts(" USAGE: lid2 [options] ");
00088 puts(" ");
00089 puts(" system configuration: ");
00090 puts(" -c file configuration file ");
00091 puts(" -m dir model directory ");
00092 puts(" -a str1,str2... active languages ");
00093 puts(" -v verbose mode ");
00094 puts(" ");
00095 puts(" input: ");
00096 puts(" -i file input file ");
00097 puts(" -l file list of input files ");
00098 puts(" -d dir input directory ");
00099 puts(" -e str [raw] extension of audio files ");
00100 puts(" -w fmt [lin16] waveform format (lin16, lin8, alaw, mulaw) ");
00101 puts(" -n num [1] number of channels in audio files ");
00102 puts(" -p start,len active waveform part (in seconds) ");
00103 puts(" ");
00104 puts(" output: ");
00105 puts(" -s file output score file ");
00106 puts(" -o suppress the 'too short' output ");
00107 puts(" -r produce scores for all languages ");
00108 puts(" -u add even the UBM score ");
00109 puts(" -k columns enable column output format (see columns) ");
00110 puts(" ");
00111 puts(" columns: (columns to print are specified by string of ");
00112 puts(" the characters below, e.g. lsn) ");
00113 puts(" s raw score ");
00114 puts(" n score normalized to <0, 100> ");
00115 puts(" l speech length ");
00116 puts(" r record length ");
00117 puts(" ");
00118 puts(" training: ");
00119 puts(" -t enable training ");
00120 puts(" -g str name of language to be trained ");
00121 puts(" ");
00122 }
00123
00124
00125 void DumpColWise(FILE *pFileHandle, const char *pInputName, char **ppNames, float *pScores, int nModels,
00126 float recordLength, float speechLength, const char *pColumnFmt, bool ubmPresent, bool suppressTooShort)
00127 {
00128 for (int i = 0; i < nModels; i++)
00129 {
00130 fprintf(pFileHandle, "%s %s", pInputName, ppNames[i]);
00131 for (const char *pc = pColumnFmt; *pc != '\0'; pc++)
00132 {
00133 switch (*pc)
00134 {
00135 case 's':
00136 if (speechLength < LID_MIN_LEN_TO_PROCESS && !suppressTooShort)
00137 fprintf(pFileHandle, " -inf");
00138 else
00139 fprintf(pFileHandle, " %.3f", pScores[i]);
00140 break;
00141
00142 case 'n':
00143 if (speechLength < LID_MIN_LEN_TO_PROCESS && !suppressTooShort)
00144 fprintf(pFileHandle, " 0.000");
00145 else
00146 fprintf(pFileHandle, " %.3f", (i == 0 && ubmPresent) ? pScores[i] : expf(pScores[i]) * 100.0f);
00147 break;
00148
00149 case 'l':
00150 fprintf(pFileHandle, " %.3f", speechLength);
00151 break;
00152
00153 case 'r':
00154 fprintf(pFileHandle, " %.3f", recordLength);
00155 break;
00156 }
00157 }
00158
00159 if (speechLength < LID_MIN_LEN_TO_PROCESS && !suppressTooShort)
00160 fprintf(pFileHandle, " %s", LID_BELOW_MIN_LEN_TEXT);
00161
00162 fprintf(pFileHandle, "\n");
00163 }
00164 }
00165
00166
00167
00168 void DumpRowWise(FILE *pFileHandle, const char *pInputName, char **ppNames, float *pScores, int nModels,
00169 float speechLength, bool suppressTooShort)
00170 {
00171 static bool first_time = true;
00172
00173 if (first_time)
00174 {
00175 if (nModels > 0)
00176 fprintf(pFileHandle, "%s", ppNames[0]);
00177 for (int i = 1; i < nModels; i++)
00178 fprintf(pFileHandle, " %s", ppNames[i]);
00179 fprintf(pFileHandle, "\n");
00180 first_time = false;
00181 }
00182
00183 fprintf(pFileHandle, "%s", pInputName);
00184 for (int i = 0; i < nModels; i++)
00185 {
00186 if (speechLength < LID_MIN_LEN_TO_PROCESS && !suppressTooShort)
00187 fprintf(pFileHandle, " -inf");
00188 else
00189 fprintf(pFileHandle, " %.3f", pScores[i]);
00190 }
00191
00192 if (speechLength < LID_MIN_LEN_TO_PROCESS && !suppressTooShort)
00193 fprintf(pFileHandle, " %s", LID_BELOW_MIN_LEN_TEXT);
00194
00195 fprintf(pFileHandle, "\n");
00196 }
00197
00198
00199 bool DumpScore(FILE *pFileHandle, const char *pInputName, SScoresI *pScores, bool dumpAllScores,
00200 const char *pColumnFmt, bool addUBMScore, bool suppressTooShort)
00201 {
00202 float speech_length = pScores->GetTestLength();
00203 float record_length = 0.0f;
00204
00205 if (dumpAllScores)
00206 {
00207 int num = pScores->GetNScores();
00208 char **ppnames = addUBMScore ? pScores->GetExtendedNames() : pScores->GetNames();
00209 float *pscores = addUBMScore ? pScores->GetExtendedScores() : pScores->GetScores();
00210
00211
00212 if (!ppnames || !pscores)
00213 return false;
00214
00215 if (addUBMScore)
00216 num++;
00217
00218
00219 if (pColumnFmt)
00220 {
00221 DumpColWise(pFileHandle, pInputName, ppnames, pscores, num, record_length,
00222 speech_length, pColumnFmt, addUBMScore, suppressTooShort);
00223 }
00224 else
00225 {
00226 DumpRowWise(pFileHandle, pInputName, ppnames, pscores, num, speech_length, suppressTooShort);
00227 }
00228 }
00229 else
00230 {
00231 float score;
00232 char *pname = pScores->GetBestName(&score);
00233
00234 if (!pname)
00235 return false;
00236
00237 DumpColWise(pFileHandle, pInputName, &pname, &score, 1, record_length,
00238 speech_length, (pColumnFmt ? pColumnFmt : "s"), !strcmp(pname, "other"), suppressTooShort);
00239 }
00240
00241 return true;
00242 }
00243
00244 #ifdef PROCESS_WAVEFORM
00245 char *LoadFileToMem(const char *pFile, int *pLen)
00246 {
00247
00248 FILE *pf;
00249 pf = fopen(pFile, "rb");
00250 if(!pf)
00251 return 0;
00252
00253
00254 fseek(pf, 0, SEEK_END);
00255 size_t size = ftell(pf);
00256 rewind(pf);
00257
00258 if(pLen)
00259 *pLen = static_cast<int>(size);
00260
00261 char *pbuffer = new (std::nothrow) char [size + 1];
00262 if(!pbuffer)
00263 {
00264 fclose(pf);
00265 return 0;
00266 }
00267
00268 size_t nbytes = fread(pbuffer, 1, size, pf);
00269 if(nbytes != size)
00270 {
00271 fclose(pf);
00272 delete [] pbuffer;
00273 return 0;
00274 }
00275 fclose(pf);
00276 pbuffer[size] = '\0';
00277 return pbuffer;
00278 }
00279 #endif
00280
00281 int main(int argc, char *argv[])
00282 {
00283
00284 const char *pconfig_file = 0;
00285 const char *pmodel_dir = 0;
00286 const char *pinput_file = 0;
00287 const char *plist_file = 0;
00288 const char *pinput_dir = 0;
00289 const char *pwave_fmt = LID_DEF_WAVE_FMT;
00290 const char *pwave_ext = LID_DEF_WAVE_EXT;
00291 const char *poutput_file = 0;
00292 const char *pcolumn_fmt = 0;
00293 const char *plang_name = 0;
00294 const char *pactive_langs = 0;
00295 int nchannels = LID_DEF_NCHANNELS;
00296 bool training_mode = false;
00297 bool dump_all_scores = false;
00298 bool add_ubm_score = false;
00299 bool supp_too_short = false;
00300 float acwf_start = 0;
00301 float acwf_len = 0;
00302
00303
00304 if(argc == 1)
00305 {
00306 help();
00307 return 0;
00308 }
00309
00310 optind = 0;
00311 while (1)
00312 {
00313 int c = getopt(argc, argv, const_cast<char *>("-c:m:i:l:d:e:n:w:s:a:otrvg:uk:p:"));
00314 if(c == -1)
00315 break;
00316
00317 switch(c)
00318 {
00319 case 'c':
00320 pconfig_file = optarg;
00321 break;
00322 case 'm':
00323 pmodel_dir = optarg;
00324 break;
00325 case 'i':
00326 pinput_file = optarg;
00327 break;
00328 case 'l':
00329 plist_file = optarg;
00330 break;
00331 case 'd':
00332 pinput_dir = optarg;
00333 break;
00334 case 'w':
00335 pwave_fmt = optarg;
00336 break;
00337 case 'e':
00338 pwave_ext = optarg;
00339 break;
00340 case 'n':
00341 if(sscanf(optarg, "%d", &nchannels) != 1 || nchannels < 1)
00342 {
00343 fprintf(stderr, "ERROR: Invalid number of channels: %s.\n", optarg);
00344 return 1;
00345 }
00346 break;
00347 case 's':
00348 poutput_file = optarg;
00349 break;
00350 case 'o':
00351 supp_too_short = true;
00352 break;
00353 case 'v':
00354 gErrorHandler.SetVerbose(true);
00355 break;
00356 case 't':
00357 training_mode = true;
00358 break;
00359 case 'r':
00360 dump_all_scores = true;
00361 break;
00362 case 'u':
00363 add_ubm_score = true;
00364 break;
00365 case 'k':
00366 pcolumn_fmt = optarg;
00367 if (strspn(pcolumn_fmt, LID_OUT_COLUMN_CHARS) != strlen(pcolumn_fmt))
00368 {
00369 fprintf(stderr, "ERROR: Wrong format of output columns string. "
00370 "The set of allowed characters is '%s'.\n", LID_OUT_COLUMN_CHARS);
00371 return 1;
00372 }
00373 break;
00374 case 'g':
00375 plang_name = optarg;
00376 break;
00377 case 'a':
00378 pactive_langs = optarg;
00379 break;
00380 case 'p':
00381 if(sscanf(optarg, "%f,%f", &acwf_start, &acwf_len) != 2)
00382 {
00383 fprintf(stderr, "ERROR: Can not parse active waveform part '%s'\n", optarg);
00384 return 1;
00385 }
00386 break;
00387 case '?':
00388 fprintf(stderr, "ERROR: Command line parsing error.\n");
00389 return 1;
00390 default :
00391 fprintf(stderr, "ERROR: Command line parsing error. Unexpected argument '%s'.\n", optarg);
00392 return 1;
00393 }
00394 }
00395
00396
00397 SLicenseManagerI *plicman = BSAPIGetLicenseManager();
00398 if (plicman)
00399 {
00400 plicman->SetErrorHandler(&gErrorHandler);
00401 plicman->RegisterLicenseFile("license.dat");
00402 }
00403
00404
00405 SLID2I *plid = static_cast<SLID2I *>(BSAPICreateInstance(SIID_LID2));
00406 if(!plid)
00407 {
00408 return 1;
00409 }
00410
00411
00412 plid->SetErrorHandler(&gErrorHandler);
00413
00414
00415 char pdefault_cfg[1024];
00416 sprintf(pdefault_cfg, "settings%ssigmodelling", DIRSEP);
00417 if(!plid->Init((pconfig_file ? pconfig_file : pdefault_cfg)))
00418 {
00419 plid->Release();
00420 return 1;
00421 }
00422
00423
00424
00425 SWaveformFormatConvertorI *pwc = plid->GetWaveformFormatConvertor();
00426 if (pwc)
00427 {
00428 pwc->SetNChannels(nchannels);
00429 pwc->SetInputFormatStr(pwave_fmt);
00430 }
00431
00432
00433
00434
00435
00436 if(training_mode)
00437 {
00438
00439
00440 if(pmodel_dir && !plid->SetModelDirectory(pmodel_dir))
00441 {
00442 plid->Release();
00443 return 1;
00444 }
00445
00446
00447 if((pinput_file || pinput_dir) && !plang_name)
00448 {
00449 fprintf(stderr, "ERROR: Training using one file or directory without knowing language name. Please set -g language\n");
00450 plid->Release();
00451 return 1;
00452 }
00453
00454
00455
00456
00457 int nreq_iters = plid->GetNRequestedTrainingIters();
00458
00459 int i;
00460 for(i = 0; i < nreq_iters; i++)
00461 {
00462
00463
00464
00465 if(!plid->StartTrainingIteration())
00466 {
00467 plid->Release();
00468 return 1;
00469 }
00470
00471 #ifdef PROCESS_WAVEFORM
00472
00473
00474
00475 if(pinput_file)
00476 {
00477 int nbytes = 0;
00478 char *pwaveform = LoadFileToMem(pinput_file, &nbytes);
00479 if(!pwaveform)
00480 {
00481 plid->Release();
00482 return 1;
00483 }
00484
00485 bool result = plid->AddWaveform(plang_name, pwaveform, nbytes);
00486 delete [] pwaveform;
00487 if(!result)
00488 {
00489 plid->Release();
00490 return 1;
00491 }
00492 }
00493 #else
00494
00495 if(pinput_file && !plid->AddFile(plang_name, pinput_file))
00496 {
00497 plid->Release();
00498 return 1;
00499 }
00500 #endif
00501
00502
00503
00504 if(plist_file && !plid->AddFilesFromListFile(plist_file))
00505 {
00506 plid->Release();
00507 return 1;
00508 }
00509
00510
00511 if(pinput_dir && !plid->AddFilesFromDirectory(plang_name, pinput_dir, pwave_ext))
00512 {
00513 plid->Release();
00514 return 1;
00515 }
00516 }
00517
00518
00519
00520
00521 if(!plid->FinishTraining())
00522 {
00523 plid->Release();
00524 return 1;
00525 }
00526
00527
00528
00529
00530
00531
00532
00533 }
00534 else
00535 {
00536
00537
00538
00539
00540
00541 SScoreMemoryI *pscore_mem = static_cast<SScoreMemoryI *>(BSAPICreateInstance(SIID_SCOREMEMORY));
00542 if(!pscore_mem)
00543 {
00544 plid->Release();
00545 return 1;
00546 }
00547 pscore_mem->SetErrorHandler(&gErrorHandler);
00548 plid->SetTarget(pscore_mem);
00549
00550
00551
00552 if(pmodel_dir && !plid->SetModelDirectory(pmodel_dir))
00553 {
00554 plid->Release();
00555 pscore_mem->Release();
00556 return 1;
00557 }
00558
00559
00560 if(!plid->SetActiveWaveformPart(acwf_start, acwf_len))
00561 {
00562 plid->Release();
00563 pscore_mem->Release();
00564 return 1;
00565 }
00566
00567
00568 if(pactive_langs)
00569 {
00570 if (!plid->ActivateModels(pactive_langs))
00571 {
00572 plid->Release();
00573 pscore_mem->Release();
00574 return 1;
00575 }
00576 }
00577 else
00578 {
00579 if (!plid->ActivateAllModels())
00580 {
00581 plid->Release();
00582 pscore_mem->Release();
00583 return 1;
00584 }
00585 }
00586
00587
00588
00589
00590
00591 FILE *pf_out = stdout;
00592 if(poutput_file)
00593 {
00594 pf_out = fopen(poutput_file, "w");
00595 if(!pf_out)
00596 {
00597 fprintf(stderr, "ERROR: Can not open output score file '%s'.", poutput_file);
00598 plid->Release();
00599 pscore_mem->Release();
00600 return 1;
00601 }
00602 }
00603
00604
00605 if(pinput_file)
00606 {
00607 gErrorHandler.LogMessage("Processing file: %s", pinput_file);
00608 #ifdef PROCESS_WAVEFORM
00609
00610
00611
00612 int nbytes = 0;
00613 char *pwaveform = LoadFileToMem(pinput_file, &nbytes);
00614 if(!pwaveform)
00615 {
00616 plid->Release();
00617 pscore_mem->Release();
00618 return 1;
00619 }
00620
00621 bool result = plid->TestWaveform(pwaveform, nbytes);
00622 delete [] pwaveform;
00623 if(!result)
00624 #else
00625 if(!plid->TestFile(pinput_file))
00626 #endif
00627 {
00628 plid->Release();
00629 pscore_mem->Release();
00630 return 1;
00631 }
00632 if(!pscore_mem->Empty())
00633 DumpScore(pf_out, pinput_file, pscore_mem->GetScores(), dump_all_scores, pcolumn_fmt, add_ubm_score, supp_too_short);
00634 }
00635
00636
00637 if(plist_file)
00638 {
00639 SFileListI *plist = static_cast<SFileListI *>(BSAPICreateInstance(SIID_FILELIST));
00640 if(!plist)
00641 {
00642 fprintf(stderr, "Memory allocation error.");
00643 plid->Release();
00644 pscore_mem->Release();
00645 return 1;
00646 }
00647 plist->SetErrorHandler(&gErrorHandler);
00648 if(!plist->AddList(plist_file))
00649 {
00650 plist->Release();
00651 plid->Release();
00652 pscore_mem->Release();
00653 return 1;
00654 }
00655
00656 plist->FirstLine();
00657 char ptarget[1024];
00658 char psource[1024];
00659 int start;
00660 int end;
00661 float prob;
00662 while(plist->GetLine(ptarget, psource, &start, &end, &prob))
00663 {
00664 gErrorHandler.LogMessage("Processing file: %s", psource);
00665 if(!plid->TestFile(psource))
00666 {
00667 plist->Release();
00668 plid->Release();
00669 pscore_mem->Release();
00670 return 1;
00671 }
00672 if(!pscore_mem->Empty())
00673 DumpScore(pf_out, psource, pscore_mem->GetScores(), dump_all_scores, pcolumn_fmt, add_ubm_score, supp_too_short);
00674 }
00675 plist->Release();
00676 }
00677
00678
00679
00680 if(pinput_dir)
00681 {
00682 SFileSnifferI *psniffer = static_cast<SFileSnifferI *>(BSAPICreateInstance(SIID_FILESNIFFER));
00683 if(!psniffer)
00684 {
00685 fprintf(stderr, "Memory allocation error.");
00686 plid->Release();
00687 pscore_mem->Release();
00688 return 1;
00689 }
00690 psniffer->SetErrorHandler(&gErrorHandler);
00691 psniffer->AddDirectory(pinput_dir);
00692 psniffer->AddWantedSuffix(pwave_ext);
00693
00694 if(!psniffer->FirstFile())
00695 {
00696 psniffer->Release();
00697 plid->Release();
00698 pscore_mem->Release();
00699 return 1;
00700 }
00701
00702 char psource[1024];
00703 while(psniffer->GetFile(psource, sizeof(psource) - 1))
00704 {
00705 gErrorHandler.LogMessage("Processing file: %s", psource);
00706 if(!plid->TestFile(psource))
00707 {
00708 psniffer->Release();
00709 plid->Release();
00710 pscore_mem->Release();
00711 return 1;
00712 }
00713 if(!pscore_mem->Empty())
00714 DumpScore(pf_out, psource, pscore_mem->GetScores(), dump_all_scores, pcolumn_fmt, add_ubm_score, supp_too_short);
00715 }
00716 psniffer->Release();
00717 }
00718
00719
00720 if(pf_out != stdout)
00721 fclose(pf_out);
00722
00723 pscore_mem->Release();
00724 }
00725
00726
00727 plid->Release();
00728
00729 return 0;
00730 }