00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <stdio.h>
00011 #include <stdlib.h>
00012 #include <errno.h>
00013 #include <stdarg.h>
00014 #include <cstring>
00015 #include <string>
00016 #include <cassert>
00017
00018 #include "bsapi.h"
00019 #include "getopt.h"
00020 #include "labtarget.h"
00021
00022 #ifdef WIN32
00023 #define DIRSEP "\\"
00024 #else
00025 #define DIRSEP "/"
00026 #endif
00027
00028 #define KWS_LIC_SECS_TO_EXPIRE_WARN (10 * 24 * 3600)
00029
00030 SLabelTarget gHypTarget;
00031 SLabelTarget gDetTarget;
00032
00033
00034
00035
00036 class ErrorHandler : public SErrorCallbackI
00037 {
00038 public:
00039 ErrorHandler() : mVerbose(false) {;}
00040 virtual void BSAPI_METHOD OnTextMessage(unsigned int iId, message_type type, unsigned int messageId, char *pMessage)
00041 {
00042 switch(type)
00043 {
00044 case mtError:
00045 fprintf(stdout, "ERROR: %s - %s\n", BSAPIInterfaceId2Text(iId), pMessage);
00046 break;
00047 case mtWarning:
00048 fprintf(stdout, "WARNING: %s - %s\n", BSAPIInterfaceId2Text(iId), pMessage);
00049 break;
00050 case mtLog:
00051 LogMessage(pMessage);
00052 break;
00053 }
00054 }
00055
00056 void LogMessage(const char *pMessage, ...)
00057 {
00058 if (mVerbose)
00059 {
00060 va_list ap;
00061 va_start(ap, pMessage);
00062 vfprintf(stdout, pMessage, ap);
00063 fprintf(stdout, "\n");
00064 va_end(ap);
00065 }
00066 }
00067
00068 void SetVerbose(bool verbose) {mVerbose = verbose;}
00069
00070 protected:
00071 bool mVerbose;
00072 } gErrorHandler;
00073
00074 std::string change_file_suffix(const char *pFilename, const char *pNewSuffix)
00075 {
00076 assert(pFilename);
00077 assert(pNewSuffix);
00078 std::string res = pFilename;
00079 size_t pos_dot = res.rfind('.');
00080 size_t pos_slash1 = res.rfind('/');
00081 size_t pos_slash2 = res.rfind('\\');
00082 size_t pos_slash = std::string::npos;
00083 if (pos_slash1 != std::string::npos && pos_slash2 != std::string::npos)
00084 pos_slash = pos_slash1 > pos_slash2 ? pos_slash1 : pos_slash2;
00085 else
00086 pos_slash = pos_slash1 != std::string::npos ? pos_slash1 : pos_slash2;
00087
00088 if (pos_dot == std::string::npos || (pos_slash != std::string::npos && pos_slash > pos_dot))
00089 res += pNewSuffix;
00090 else
00091 res.replace(pos_dot, strlen(pNewSuffix), pNewSuffix);
00092
00093 return res;
00094 }
00095
00096 void help()
00097 {
00098 puts("\n Keyword spotting ");
00099 printf(" %s\n", BSAPIVersion());
00100 puts(" ================================================================ ");
00101 puts(" ");
00102 puts(" USAGE: kws [options] ");
00103 puts(" ");
00104 puts(" system configuration: ");
00105 puts(" -c file configuration file ");
00106 puts(" ");
00107 puts(" processing one file: ");
00108 puts(" -i file input file ");
00109 puts(" -o file detection file ");
00110 puts(" -h file hypothesis file ");
00111 puts(" ");
00112 puts(" processing list of files: ");
00113 puts(" -l file list of input files ");
00114 puts(" -p file detection master file (for all input files) ");
00115 puts(" -q file hypothesis master file (for all input files) ");
00116 puts(" -o \"none\" will not produce detection file per each file ");
00117 puts(" -h \"none\" will not produce hypothesis file per each file");
00118 puts(" ");
00119 puts(" processing directory: ");
00120 puts(" -d dir input directory ");
00121 puts(" -e str [raw] extension of audio files ");
00122 puts(" -p file detection master file (for all input files) ");
00123 puts(" -q file hypothesis master file (for all input files) ");
00124 puts(" -o \"none\" will not produce detection file per each file ");
00125 puts(" -h \"none\" will not produce hypothesis file per each file");
00126 puts(" ");
00127 puts(" NOTE: in case of -l or -d the output will be written ");
00128 puts(" to *.hyp and *.det where * is the audio filename ");
00129 puts(" without extension. Can be switched off by -o none, -h none ");
00130 puts(" ");
00131 puts(" other options: ");
00132 puts(" -v verbose mode ");
00133 puts(" -s fmt [mlf] time format (seconds, mlf) ");
00134 puts(" -y file list of keywords and pronunciations in XML ");
00135 puts(" -t file XSLT to be linked with the list of keywords ");
00136 puts(" -w fmt [lin16] waveform format (lin16, lin8, alaw, mulaw) ");
00137 puts(" -n num [1] number of channels in audio files, takes into ");
00138 puts(" account for non header files (.raw) ");
00139 puts(" -x file list of keywords in XML format ");
00140 puts(" -k file list of keywords in plain text format ");
00141 puts(" ");
00142 }
00143
00144 int main(int argc, char **argv)
00145 {
00146
00147 char *pconfig_file = 0;
00148 char *pinput_file = 0;
00149 char *plist_file = 0;
00150 char *pinput_dir = 0;
00151 char *pwave_ext = "raw";
00152 char *pwave_fmt = "lin16";
00153 char *pkwlxml_file = 0;
00154 char *pkwltxt_file = 0;
00155 char *pkwlxsl_file = 0;
00156 char *poutdet_file = 0;
00157 char *pouthyp_file = 0;
00158 char *poutdetm_file = 0;
00159 char *pouthypm_file = 0;
00160 char *ptime_fmt = "mlf";
00161 char *poutkwl_file = 0;
00162 int nchannels = 1;
00163
00164 float acwf_start = 0;
00165 float acwf_len = 0;
00166
00167
00168 if(argc == 1)
00169 {
00170 help();
00171 return 0;
00172 }
00173
00174 optind = 0;
00175 while (1)
00176 {
00177 int c = getopt(argc, argv, const_cast<char *>("-c:i:l:d:e:w:n:x:k:o:h:p:q:s:y:t:v"));
00178 if(c == -1)
00179 break;
00180
00181 switch(c)
00182 {
00183 case 'c':
00184 pconfig_file = optarg;
00185 break;
00186 case 'i':
00187 pinput_file = optarg;
00188 break;
00189 case 'l':
00190 plist_file = optarg;
00191 break;
00192 case 'd':
00193 pinput_dir = optarg;
00194 break;
00195 case 'e':
00196 pwave_ext = optarg;
00197 break;
00198 case 'w':
00199 pwave_fmt = optarg;
00200 break;
00201 case 'n':
00202 if(sscanf(optarg, "%d", &nchannels) != 1 || nchannels < 1)
00203 {
00204 fprintf(stderr, "ERROR: Invalid number of channels: %s.\n", optarg);
00205 return 1;
00206 }
00207 break;
00208 case 'x':
00209 pkwlxml_file = optarg;
00210 break;
00211 case 'k':
00212 pkwltxt_file = optarg;
00213 break;
00214 case 'o':
00215 poutdet_file = optarg;
00216 break;
00217 case 'h':
00218 pouthyp_file = optarg;
00219 break;
00220 case 'p':
00221 poutdetm_file = optarg;
00222 break;
00223 case 'q':
00224 pouthypm_file = optarg;
00225 break;
00226 case 's':
00227 ptime_fmt = optarg;
00228 break;
00229 case 'y':
00230 poutkwl_file = optarg;
00231 break;
00232 case 't':
00233 pkwlxsl_file = optarg;
00234 break;
00235 case 'v':
00236 gErrorHandler.SetVerbose(true);
00237 break;
00238 case '?':
00239 fprintf(stderr, "ERROR: Command line parsing error.\n");
00240 return 1;
00241 }
00242 }
00243
00244 if (!((pkwlxml_file == 0) ^ (pkwltxt_file == 0)))
00245 {
00246 fprintf(stderr, "ERROR: List of keywords must be given either in XML ('x') or plain text format ('k').\n");
00247 return 1;
00248 }
00249
00250
00251 SLicenseManagerI *plicman = BSAPIGetLicenseManager();
00252 if (plicman)
00253 {
00254 plicman->SetErrorHandler(&gErrorHandler);
00255 plicman->RegisterLicenseFile("license.dat");
00256 }
00257
00258 gHypTarget.SetTimeFormatStr(ptime_fmt);
00259 gDetTarget.SetTimeFormatStr(ptime_fmt);
00260
00261 SKeywordSpottingI *pkws = static_cast<SKeywordSpottingI *>(BSAPICreateInstance(SIID_KWS));
00262 if(!pkws)
00263 {
00264 return 1;
00265 }
00266
00267 pkws->SetErrorHandler(&gErrorHandler);
00268
00269 if (plicman)
00270 {
00271 long_long secs_to_expire = plicman->GetSecsToExpire(SIID_KWS);
00272 if (secs_to_expire < KWS_LIC_SECS_TO_EXPIRE_WARN)
00273 fprintf(stderr, "WARNING: License will expire in %d day(s).\n", static_cast<int>(secs_to_expire / (24 * 3600)));
00274 }
00275
00276
00277 char pdefault_cfg[1024];
00278 sprintf(pdefault_cfg, "settings%smain_config", DIRSEP);
00279 if(!pkws->Init((pconfig_file ? pconfig_file : pdefault_cfg)))
00280 {
00281 pkws->Release();
00282 return 1;
00283 }
00284
00285
00286
00287 SBlockSetI * pbset = pkws->GetBlockSet();
00288 if (!pbset)
00289 {
00290 pkws->Release();
00291 return 1;
00292 }
00293
00294 SWaveformFormatConvertorI *pwc = static_cast<SWaveformFormatConvertorI *>(pbset->GetBlock("convertor"));
00295 if (pwc)
00296 {
00297 pwc->SetNChannels(nchannels);
00298 pwc->SetInputFormatStr(pwave_fmt);
00299 }
00300
00301
00302 SKeywordListI * mpKeywordList;
00303 mpKeywordList=pkws->GetKeywordList();
00304 if(!mpKeywordList)
00305 {
00306 pkws->Release();
00307 return 1;
00308 }
00309
00310 if(!mpKeywordList->Load(
00311 pkwlxml_file ? pkwlxml_file : pkwltxt_file,
00312 pkwlxml_file ? SKeywordListI::ffXML : SKeywordListI::ffPlain))
00313 {
00314 pkws->Release();
00315 return 1;
00316 }
00317
00318 if(!mpKeywordList->AddPronunciations(SKeywordListI::apmCondAdd))
00319 {
00320 pkws->Release();
00321 return 1;
00322 }
00323
00324 if(!mpKeywordList->CheckKeywords())
00325 {
00326 pkws->Release();
00327 return 1;
00328 }
00329
00330 if(poutkwl_file && !mpKeywordList->Save(poutkwl_file, pkwlxsl_file))
00331 {
00332 pkws->Release();
00333 return 1;
00334 }
00335
00336
00337 pkws->SetDetectionsTarget(&gDetTarget);
00338 pkws->SetHypothesesTarget(&gHypTarget);
00339
00340 gHypTarget.StartProcessing(pouthypm_file);
00341 gDetTarget.StartProcessing(poutdetm_file);
00342
00343
00344 if(pinput_file)
00345 {
00346 gErrorHandler.LogMessage("Processing file: %s", pinput_file);
00347
00348 gHypTarget.StartProcessingFile(pinput_file, pouthyp_file);
00349 gDetTarget.StartProcessingFile(pinput_file, poutdet_file);
00350 if(!pkws->ProcessFile(pinput_file, 0, 0))
00351 {
00352 pkws->Release();
00353 return 1;
00354 }
00355 gHypTarget.FinishProcessingFile();
00356 gDetTarget.FinishProcessingFile();
00357 }
00358
00359
00360 if(plist_file)
00361 {
00362 SFileListI *plist = static_cast<SFileListI *>(BSAPICreateInstance(SIID_FILELIST));
00363 if(!plist)
00364 {
00365 fprintf(stderr, "Memory allocation error.");
00366 pkws->Release();
00367 return 1;
00368 }
00369 plist->SetErrorHandler(&gErrorHandler);
00370 if(!plist->AddList(plist_file))
00371 {
00372 plist->Release();
00373 pkws->Release();
00374 return 1;
00375 }
00376
00377 plist->FirstLine();
00378 char ptarget[1024];
00379 char psource[1024];
00380 int start;
00381 int end;
00382 float prob;
00383 while(plist->GetLine(ptarget, psource, &start, &end, &prob))
00384 {
00385 gErrorHandler.LogMessage("Processing file: %s", psource);
00386
00387 std::string outhyp_file = change_file_suffix(psource, ".hyp");
00388 std::string outdet_file = change_file_suffix(psource, ".det");
00389
00390 if( !pouthyp_file || (pouthyp_file && strcmp(pouthyp_file,"none") != 0 && strcmp(pouthyp_file,"") != 0 )){
00391 gHypTarget.StartProcessingFile(psource, outhyp_file.c_str());
00392 }else{
00393 gHypTarget.StartProcessingFile(psource, 0);
00394 }
00395
00396 if( !poutdet_file || (poutdet_file && strcmp(poutdet_file,"none") != 0 && strcmp(poutdet_file,"") != 0 )){
00397 gDetTarget.StartProcessingFile(psource, outdet_file.c_str());
00398 }else{
00399 gDetTarget.StartProcessingFile(psource, 0);
00400 }
00401
00402 if(!pkws->ProcessFile(psource, 0, 0))
00403 {
00404 plist->Release();
00405 pkws->Release();
00406 return 1;
00407 }
00408 gHypTarget.FinishProcessingFile();
00409 gDetTarget.FinishProcessingFile();
00410 }
00411 plist->Release();
00412 }
00413
00414
00415
00416 if(pinput_dir)
00417 {
00418 SFileSnifferI *psniffer = static_cast<SFileSnifferI *>(BSAPICreateInstance(SIID_FILESNIFFER));
00419 if(!psniffer)
00420 {
00421 fprintf(stderr, "Memory allocation error.");
00422 pkws->Release();
00423 return 1;
00424 }
00425 psniffer->SetErrorHandler(&gErrorHandler);
00426 psniffer->AddDirectory(pinput_dir);
00427 psniffer->AddWantedSuffix(pwave_ext);
00428
00429 if(!psniffer->FirstFile())
00430 {
00431 psniffer->Release();
00432 pkws->Release();
00433 return 1;
00434 }
00435
00436 char psource[1024];
00437 while(psniffer->GetFile(psource, sizeof(psource) - 1))
00438 {
00439 gErrorHandler.LogMessage("Processing file: %s", psource);
00440
00441 std::string outhyp_file = change_file_suffix(psource, ".hyp");
00442 std::string outdet_file = change_file_suffix(psource, ".det");
00443
00444 if( !pouthyp_file || (pouthyp_file && strcmp(pouthyp_file,"none") != 0 && strcmp(pouthyp_file,"") != 0 )){
00445 gHypTarget.StartProcessingFile(psource, outhyp_file.c_str());
00446 }else{
00447 gHypTarget.StartProcessingFile(psource, 0);
00448 }
00449
00450 if( !poutdet_file || (poutdet_file && strcmp(poutdet_file,"none") != 0 && strcmp(poutdet_file,"") != 0 )){
00451 gDetTarget.StartProcessingFile(psource, outdet_file.c_str());
00452 }else{
00453 gDetTarget.StartProcessingFile(psource, 0);
00454 }
00455
00456 if(!pkws->ProcessFile(psource, 0, 0))
00457 {
00458 psniffer->Release();
00459 pkws->Release();
00460 return 1;
00461 }
00462 gHypTarget.FinishProcessingFile();
00463 gDetTarget.FinishProcessingFile();
00464 }
00465 psniffer->Release();
00466 }
00467
00468 gHypTarget.FinishProcessing();
00469 gDetTarget.FinishProcessing();
00470
00471 pkws->Release();
00472 return 0;
00473 }