00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <stdio.h>
00011 #include <stdlib.h>
00012 #include <errno.h>
00013 #include <stdarg.h>
00014 #include <dirent.h>
00015
00016 #include "bsapi.h"
00017 #include "getopt.h"
00018
00019 #ifdef WIN32
00020 #define DIRSEP "\\"
00021 #else
00022 #define DIRSEP "/"
00023 #endif
00024
00025 #define LID_MIN_LEN_TO_PROCESS 2.0f // 2 seconds
00026 #define LID_BELOW_MIN_LEN_TEXT "(too_short)"
00027
00028
00029
00030
00031 class ErrorHandler : public SErrorCallbackI
00032 {
00033 public:
00034 ErrorHandler() : mVerbose(false) {;}
00035 virtual void BSAPI_METHOD OnTextMessage(unsigned int iId, message_type type, unsigned int messageId, char *pMessage)
00036 {
00037 switch(type)
00038 {
00039 case mtError:
00040 fprintf(stderr, "ERROR: %s - %s\n", BSAPIInterfaceId2Text(iId), pMessage);
00041 break;
00042 case mtWarning:
00043 fprintf(stderr, "WARNING: %s - %s\n", BSAPIInterfaceId2Text(iId), pMessage);
00044 break;
00045 case mtLog:
00046 LogMessage(pMessage);
00047 break;
00048 }
00049 }
00050
00051 void LogMessage(const char *pMessage, ...)
00052 {
00053 if (mVerbose)
00054 {
00055 va_list ap;
00056 va_start(ap, pMessage);
00057 vfprintf(stderr, pMessage, ap);
00058 fprintf(stderr, "\n");
00059 va_end(ap);
00060 }
00061 }
00062
00063 void SetVerbose(bool verbose) {mVerbose = verbose;}
00064
00065 protected:
00066 bool mVerbose;
00067 } gErrorHandler;
00068
00069 void help()
00070 {
00071 puts("\n Language identification ");
00072 printf(" %s\n", BSAPIVersion());
00073 puts(" ================================================================ ");
00074 puts(" ");
00075 puts(" USAGE: lid [options] ");
00076 puts(" ");
00077 puts(" system configuration: ");
00078 puts(" -c file configuration file ");
00079 puts(" -m dir model directory ");
00080 puts(" -a str1,str2... active languages ");
00081 puts(" ");
00082 puts(" input: ");
00083 puts(" -i file input file ");
00084 puts(" -l file list of input files ");
00085 puts(" -d dir input directory ");
00086 puts(" -e str [raw] extension of audio files ");
00087 puts(" -w fmt [lin16] waveform format (lin16, lin8, alaw, mulaw) ");
00088 puts(" -n num [1] number of channels in audio files ");
00089 puts(" -p start,len active waveform part (in seconds) ");
00090 puts(" ");
00091 puts(" output: ");
00092 puts(" -s file output score file ");
00093 puts(" -r produce scores for all languages ");
00094 puts(" -u add even the UBM score ");
00095 puts(" -v verbose mode ");
00096 puts(" ");
00097 puts(" training: ");
00098 puts(" -t enable training ");
00099 puts(" -g str name of language to be trained ");
00100 puts(" ");
00101 }
00102
00103
00104 bool DumpScore(char *pInputName, SLIDI *plid, bool dumpAllScores, bool addUBMScore, FILE *pFileHandle)
00105 {
00106
00107
00108
00109 if(dumpAllScores)
00110 {
00111 static bool first_time = true;
00112 int i;
00113 int num;
00114 if(first_time)
00115 {
00116
00117 char **pplang_names = plid->GetModelNames(&num);
00118 if(!pplang_names)
00119 return false;
00120 if(addUBMScore)
00121 fprintf(pFileHandle, "UBM");
00122 for(i = 1; i < num; i++)
00123 {
00124 if(i == 1 && !addUBMScore)
00125 fprintf(pFileHandle, "%s", pplang_names[i]);
00126 else
00127 fprintf(pFileHandle, " %s", pplang_names[i]);
00128 }
00129 fprintf(pFileHandle, "\n");
00130 first_time = false;
00131 }
00132 float *pscores = plid->GetModelScores(&num);
00133 if(!pscores)
00134 return false;
00135 fprintf(pFileHandle, "%s", pInputName);
00136 if (plid->GetTestLength() < LID_MIN_LEN_TO_PROCESS)
00137 {
00138 if(addUBMScore)
00139 fprintf(pFileHandle, " -inf");
00140 for(i = 1; i < num; i++)
00141 fprintf(pFileHandle, " -inf");
00142 fprintf(pFileHandle, " %s", LID_BELOW_MIN_LEN_TEXT);
00143 }
00144 else
00145 {
00146 if(addUBMScore)
00147 fprintf(pFileHandle, " %f", pscores[0]);
00148 for(i = 1; i < num; i++)
00149 fprintf(pFileHandle, " %f", pscores[i]);
00150 }
00151 fprintf(pFileHandle, "\n");
00152 }
00153
00154
00155 else
00156 {
00157 float score;
00158 char *pwinning_lang = plid->GetBestModel(&score);
00159 if(!pwinning_lang)
00160 return false;
00161 if (plid->GetTestLength() < LID_MIN_LEN_TO_PROCESS)
00162 fprintf(pFileHandle, "%s %s -inf", pInputName, LID_BELOW_MIN_LEN_TEXT);
00163 else
00164 fprintf(pFileHandle, "%s %s %f", pInputName, pwinning_lang, score);
00165 fprintf(pFileHandle, "\n");
00166 }
00167 return true;
00168 }
00169
00170 bool DirectoryExists(const char *pPath)
00171 {
00172 DIR *pdir = opendir(pPath);
00173 if (!pdir)
00174 return false;
00175
00176 closedir(pdir);
00177 return true;
00178 }
00179
00180 int main(int argc, char *argv[])
00181 {
00182
00183 char *pconfig_file = 0;
00184 char *pmodel_dir = 0;
00185 char *pinput_file = 0;
00186 char *plist_file = 0;
00187 char *pinput_dir = 0;
00188 char *pwave_fmt = "lin16";
00189 char *pwave_ext = "raw";
00190 char *poutput_file = 0;
00191 char *plang_name = 0;
00192 char *pactive_langs = 0;
00193 int nchannels = 1;
00194 bool training_mode = false;
00195 bool dump_all_scores = false;
00196 bool add_ubm_score = false;
00197 float acwf_start = 0;
00198 float acwf_len = 0;
00199
00200
00201 if(argc == 1)
00202 {
00203 help();
00204 return 0;
00205 }
00206
00207 optind = 0;
00208 while (1)
00209 {
00210 int c = getopt(argc, argv, const_cast<char *>("-c:m:i:l:d:e:n:w:s:a:o:trvg:up:"));
00211 if(c == -1)
00212 break;
00213
00214 switch(c)
00215 {
00216 case 'c':
00217 pconfig_file = optarg;
00218 break;
00219 case 'm':
00220 pmodel_dir = optarg;
00221 break;
00222 case 'i':
00223 pinput_file = optarg;
00224 break;
00225 case 'l':
00226 plist_file = optarg;
00227 break;
00228 case 'd':
00229 pinput_dir = optarg;
00230 break;
00231 case 'w':
00232 pwave_fmt = optarg;
00233 break;
00234 case 'e':
00235 pwave_ext = optarg;
00236 break;
00237 case 'n':
00238 if(sscanf(optarg, "%d", &nchannels) != 1 || nchannels < 1)
00239 {
00240 fprintf(stderr, "ERROR: Invalid number of channels: %s.\n", optarg);
00241 return 1;
00242 }
00243 break;
00244 case 's':
00245 poutput_file = optarg;
00246 break;
00247 case 'v':
00248 gErrorHandler.SetVerbose(true);
00249 break;
00250 case 't':
00251 training_mode = true;
00252 break;
00253 case 'r':
00254 dump_all_scores = true;
00255 break;
00256 case 'u':
00257 add_ubm_score = true;
00258 break;
00259 case 'g':
00260 plang_name = optarg;
00261 break;
00262 case 'a':
00263 pactive_langs = optarg;
00264 break;
00265 case 'p':
00266 if(sscanf(optarg, "%f,%f", &acwf_start, &acwf_len) != 2)
00267 {
00268 fprintf(stderr, "ERROR: Can not parse active waveform part '%s'\n", optarg);
00269 return 1;
00270 }
00271 break;
00272 case '?':
00273 fprintf(stderr, "ERROR: Command line parsing error.\n");
00274 return 1;
00275 }
00276 }
00277
00278
00279 SLicenseManagerI *plicman = BSAPIGetLicenseManager();
00280 if (plicman)
00281 {
00282 plicman->SetErrorHandler(&gErrorHandler);
00283 plicman->RegisterLicenseFile("license.dat");
00284 }
00285
00286
00287 SLIDI *plid = static_cast<SLIDI *>(BSAPICreateInstance(SIID_LID));
00288 if(!plid)
00289 {
00290 return 1;
00291 }
00292
00293
00294 plid->SetErrorHandler(&gErrorHandler);
00295
00296
00297 char pdefault_cfg[1024];
00298 sprintf(pdefault_cfg, "settings%ssigmodelling", DIRSEP);
00299 if(!plid->Init((pconfig_file ? pconfig_file : pdefault_cfg)))
00300 {
00301 plid->Release();
00302 return 1;
00303 }
00304
00305
00306
00307 SWaveformFormatConvertorI *pwc = plid->GetWaveformFormatConvertor();
00308 if (pwc)
00309 {
00310 pwc->SetNChannels(nchannels);
00311 pwc->SetInputFormatStr(pwave_fmt);
00312 }
00313
00314
00315
00316
00317
00318 if(training_mode)
00319 {
00320
00321
00322 if(pmodel_dir && !plid->SetModelDirectory(pmodel_dir))
00323 {
00324 plid->Release();
00325 return 1;
00326 }
00327
00328
00329 if((pinput_file || pinput_dir) && !plang_name)
00330 {
00331 fprintf(stderr, "ERROR: Training using one file or directory without knowing language name. Please set -g language\n");
00332 plid->Release();
00333 return 1;
00334 }
00335
00336
00337
00338
00339 int nreq_iters = plid->GetNRequestedTrainingIters();
00340
00341 int i;
00342 for(i = 0; i < nreq_iters; i++)
00343 {
00344
00345
00346
00347 if(!plid->StartTrainingIteration())
00348 {
00349 plid->Release();
00350 return 1;
00351 }
00352
00353
00354 if(pinput_file && !plid->AddFile(plang_name, pinput_file))
00355 {
00356 plid->Release();
00357 return 1;
00358 }
00359
00360
00361
00362 if(plist_file && !plid->AddFilesFromListFile(plist_file))
00363 {
00364 plid->Release();
00365 return 1;
00366 }
00367
00368
00369 if(pinput_dir && !plid->AddFilesFromDirectory(plang_name, pinput_dir, pwave_ext))
00370 {
00371 plid->Release();
00372 return 1;
00373 }
00374 }
00375
00376
00377
00378
00379 if(!plid->FinishTraining())
00380 {
00381 plid->Release();
00382 return 1;
00383 }
00384
00385 if (plang_name && (plid->GetTrainingLength(plang_name) < LID_MIN_LEN_TO_PROCESS))
00386 {
00387 fprintf(stderr, "WARNING: Training record(s) contain only %.3f seconds of speech. "
00388 "At least %f seconds are needed to obtain significant results.\n",
00389 plid->GetTrainingLength(plang_name), LID_MIN_LEN_TO_PROCESS);
00390 }
00391 }
00392 else
00393 {
00394
00395
00396 if (pmodel_dir && !DirectoryExists(pmodel_dir))
00397 {
00398 fprintf(stderr, "ERROR: Model directory '%s' does not exist.\n", pmodel_dir);
00399 plid->Release();
00400 return 1;
00401 }
00402
00403
00404
00405 if(pmodel_dir && !plid->SetModelDirectory(pmodel_dir))
00406 {
00407 plid->Release();
00408 return 1;
00409 }
00410
00411
00412 if(!plid->SetActiveWaveformPart(acwf_start, acwf_len))
00413 {
00414 plid->Release();
00415 return 1;
00416 }
00417
00418
00419 if(pactive_langs)
00420 plid->ActivateModels(pactive_langs);
00421 else
00422 plid->ActivateAllModels();
00423
00424
00425
00426
00427
00428 FILE *pf_out = stdout;
00429 if(poutput_file)
00430 {
00431 pf_out = fopen(poutput_file, "w");
00432 if(!pf_out)
00433 {
00434 fprintf(stderr, "ERROR: Can not open output score file '%s'.", poutput_file);
00435 plid->Release();
00436 return 1;
00437 }
00438 }
00439
00440
00441 if(pinput_file)
00442 {
00443 gErrorHandler.LogMessage("Processing file: %s", pinput_file);
00444 if(!plid->TestFile(pinput_file))
00445 {
00446 plid->Release();
00447 return 1;
00448 }
00449 DumpScore(pinput_file, plid, dump_all_scores, add_ubm_score, pf_out);
00450 }
00451
00452
00453 if(plist_file)
00454 {
00455 SFileListI *plist = static_cast<SFileListI *>(BSAPICreateInstance(SIID_FILELIST));
00456 if(!plist)
00457 {
00458 fprintf(stderr, "Memory allocation error.");
00459 plid->Release();
00460 return 1;
00461 }
00462 plist->SetErrorHandler(&gErrorHandler);
00463 if(!plist->AddList(plist_file))
00464 {
00465 plist->Release();
00466 plid->Release();
00467 return 1;
00468 }
00469
00470 plist->FirstLine();
00471 char ptarget[1024];
00472 char psource[1024];
00473 int start;
00474 int end;
00475 float prob;
00476 while(plist->GetLine(ptarget, psource, &start, &end, &prob))
00477 {
00478 gErrorHandler.LogMessage("Processing file: %s", psource);
00479 if(!plid->TestFile(psource))
00480 {
00481 plist->Release();
00482 plid->Release();
00483 return 1;
00484 }
00485 DumpScore(psource, plid, dump_all_scores, add_ubm_score, pf_out);
00486 }
00487 plist->Release();
00488 }
00489
00490
00491
00492 if(pinput_dir)
00493 {
00494 SFileSnifferI *psniffer = static_cast<SFileSnifferI *>(BSAPICreateInstance(SIID_FILESNIFFER));
00495 if(!psniffer)
00496 {
00497 fprintf(stderr, "Memory allocation error.");
00498 plid->Release();
00499 return 1;
00500 }
00501 psniffer->SetErrorHandler(&gErrorHandler);
00502 psniffer->AddDirectory(pinput_dir);
00503 psniffer->AddWantedSuffix(pwave_ext);
00504
00505 if(!psniffer->FirstFile())
00506 {
00507 psniffer->Release();
00508 plid->Release();
00509 return 1;
00510 }
00511
00512 char psource[1024];
00513 while(psniffer->GetFile(psource, sizeof(psource) - 1))
00514 {
00515 gErrorHandler.LogMessage("Processing file: %s", psource);
00516 if(!plid->TestFile(psource))
00517 {
00518 psniffer->Release();
00519 plid->Release();
00520 return 1;
00521 }
00522 DumpScore(psource, plid, dump_all_scores, add_ubm_score, pf_out);
00523 }
00524 psniffer->Release();
00525 }
00526
00527
00528 if(pf_out != stdout)
00529 fclose(pf_out);
00530 }
00531
00532
00533 plid->Release();
00534
00535 return 0;
00536 }