keyword_spotting.cpp

This example shows how to use the SKeywordSpottingI interface.

00001 /*****************************************************************
00002  *  BSAPI Keyword Spotting Example                               *
00003  *                                                               *
00004  *  Copyright   : (C) 2006-2008 by Petr Schwarz & Pavel Matejka  *
00005  *                         & Tomas Cipr                          *
00006  *                                                               *
00007  *  Email       : {schwarz,matejka,cipr}@phonexia.com            *
00008  ****************************************************************/
00009 
00010 #include <stdio.h>
00011 #include <stdlib.h>
00012 #include <errno.h>
00013 #include <stdarg.h>
00014 #include <cstring>
00015 #include <string>
00016 #include <cassert>
00017 
00018 #include "bsapi.h"
00019 #include "getopt.h"
00020 #include "labtarget.h"
00021 
00022 #ifdef WIN32
00023   #define DIRSEP "\\"
00024 #else
00025   #define DIRSEP "/"
00026 #endif
00027 
00028 #define KWS_LIC_SECS_TO_EXPIRE_WARN  (10 * 24 * 3600)
00029 
00030 SLabelTarget gHypTarget;
00031 SLabelTarget gDetTarget;
00032 
00033 // This is an error handling object. If an error occur, a message is sent to this object at first. 
00034 // Then the running function will exit with the false or 0 return value. The object also accept 
00035 // warning and logging messages.
00036 class ErrorHandler : public SErrorCallbackI 
00037 {
00038   public:
00039     ErrorHandler() : mVerbose(false) {;}
00040     virtual void BSAPI_METHOD OnTextMessage(unsigned int iId, message_type type, unsigned int messageId, char *pMessage)
00041     {
00042       switch(type)
00043       {
00044         case mtError:
00045           fprintf(stdout, "ERROR: %s - %s\n", BSAPIInterfaceId2Text(iId), pMessage);
00046           break;
00047         case mtWarning:
00048           fprintf(stdout, "WARNING: %s - %s\n", BSAPIInterfaceId2Text(iId), pMessage);
00049           break;
00050         case mtLog:
00051           LogMessage(pMessage);
00052           break;
00053       }
00054     }
00055 
00056     void LogMessage(const char *pMessage, ...)
00057     {
00058       if (mVerbose)
00059       {
00060         va_list ap;
00061         va_start(ap, pMessage);
00062         vfprintf(stdout, pMessage, ap);
00063         fprintf(stdout, "\n");
00064         va_end(ap);
00065       }
00066     }
00067     
00068     void SetVerbose(bool verbose) {mVerbose = verbose;}
00069   
00070   protected:
00071     bool mVerbose;
00072 } gErrorHandler;
00073 
00074 std::string change_file_suffix(const char *pFilename, const char *pNewSuffix)
00075 {
00076   assert(pFilename);
00077   assert(pNewSuffix);
00078   std::string res = pFilename;
00079   size_t pos_dot = res.rfind('.');
00080   size_t pos_slash1 = res.rfind('/');
00081   size_t pos_slash2 = res.rfind('\\');
00082   size_t pos_slash = std::string::npos;
00083   if (pos_slash1 != std::string::npos && pos_slash2 != std::string::npos)
00084     pos_slash = pos_slash1 > pos_slash2 ? pos_slash1 : pos_slash2;
00085   else
00086     pos_slash = pos_slash1 != std::string::npos ? pos_slash1 : pos_slash2;
00087 
00088   if (pos_dot == std::string::npos || (pos_slash != std::string::npos && pos_slash > pos_dot))
00089     res += pNewSuffix;
00090   else
00091     res.replace(pos_dot, strlen(pNewSuffix), pNewSuffix);
00092 
00093   return res;
00094 }
00095 
00096 void help()
00097 { 
00098   puts("\n Keyword spotting                                               ");
00099   printf(" %s\n", BSAPIVersion());
00100   puts(" ================================================================ ");
00101   puts("                                                                  ");
00102   puts(" USAGE: kws [options]                                             ");
00103   puts("                                                                  ");
00104   puts(" system configuration:                                            ");
00105   puts("   -c file         configuration file                             ");
00106   puts("                                                                  ");
00107   puts(" processing one file:                                             ");
00108   puts("   -i file         input file                                     ");
00109   puts("   -o file         detection file                                 ");
00110   puts("   -h file         hypothesis file                                ");
00111   puts("                                                                  ");
00112   puts(" processing list of files:                                        ");
00113   puts("   -l file         list of input files                            ");
00114   puts("   -p file         detection master file (for all input files)    ");
00115   puts("   -q file         hypothesis master file (for all input files)   ");
00116   puts("   -o \"none\"       will not produce detection file per each file ");
00117   puts("   -h \"none\"       will not produce hypothesis file per each file");
00118   puts("                                                                  ");
00119   puts(" processing directory:                                            ");
00120   puts("   -d dir          input directory                                ");
00121   puts("   -e str [raw]    extension of audio files                       ");
00122   puts("   -p file         detection master file (for all input files)    ");
00123   puts("   -q file         hypothesis master file (for all input files)   ");
00124   puts("   -o \"none\"       will not produce detection file per each file ");
00125   puts("   -h \"none\"       will not produce hypothesis file per each file");
00126   puts("                                                                  ");
00127   puts(" NOTE: in case of -l or -d the output will be written             ");
00128   puts("       to *.hyp and *.det where * is the audio filename           ");
00129   puts("       without extension. Can be switched off by -o none, -h none ");
00130   puts("                                                                  ");
00131   puts(" other options:                                                   ");
00132   puts("   -v              verbose mode                                   ");
00133   puts("   -s fmt [mlf]    time format (seconds, mlf)                     ");
00134   puts("   -y file         list of keywords and pronunciations in XML     ");
00135   puts("   -t file         XSLT to be linked with the list of keywords    ");
00136   puts("   -w fmt [lin16]  waveform format (lin16, lin8, alaw, mulaw)     ");
00137   puts("   -n num [1]      number of channels in audio files, takes into  ");
00138   puts("                   account for non header files (.raw)            ");
00139   puts("   -x file         list of keywords in XML format                 ");
00140   puts("   -k file         list of keywords in plain text format          ");
00141   puts("                                                                  ");
00142 }
00143 
00144 int main(int argc, char **argv)
00145 {
00146   // initial configuration variables
00147   char *pconfig_file   = 0;
00148   char *pinput_file    = 0;
00149   char *plist_file     = 0;
00150   char *pinput_dir     = 0;
00151   char *pwave_ext      = "raw";
00152   char *pwave_fmt      = "lin16";
00153   char *pkwlxml_file   = 0;
00154   char *pkwltxt_file   = 0;
00155   char *pkwlxsl_file   = 0;
00156   char *poutdet_file   = 0;
00157   char *pouthyp_file   = 0;
00158   char *poutdetm_file  = 0;
00159   char *pouthypm_file  = 0;
00160   char *ptime_fmt      = "mlf";
00161   char *poutkwl_file   = 0;
00162   int  nchannels       = 1;  
00163 
00164   float acwf_start     = 0;
00165   float acwf_len       = 0;
00166 
00167   // command line parsing
00168   if(argc == 1)
00169   {
00170     help();
00171     return 0;
00172   }
00173 
00174   optind = 0;
00175   while (1)
00176   {
00177     int c = getopt(argc, argv, const_cast<char *>("-c:i:l:d:e:w:n:x:k:o:h:p:q:s:y:t:v"));
00178     if(c == -1)
00179       break;
00180 
00181     switch(c)
00182     {
00183       case 'c':
00184         pconfig_file = optarg;
00185         break;
00186       case 'i':
00187         pinput_file = optarg;
00188         break;
00189       case 'l':
00190         plist_file = optarg;
00191         break;
00192       case 'd':
00193         pinput_dir = optarg;
00194         break;
00195       case 'e':
00196         pwave_ext = optarg;
00197         break;
00198       case 'w':
00199         pwave_fmt = optarg;
00200         break;
00201      case 'n':
00202        if(sscanf(optarg, "%d", &nchannels) != 1 || nchannels < 1)
00203        {
00204          fprintf(stderr, "ERROR: Invalid number of channels: %s.\n", optarg);
00205          return 1;
00206        }
00207        break;
00208       case 'x':
00209         pkwlxml_file = optarg;
00210         break;
00211       case 'k':
00212         pkwltxt_file = optarg;
00213         break;
00214       case 'o':
00215         poutdet_file = optarg;
00216         break;
00217       case 'h':
00218         pouthyp_file = optarg;
00219         break;
00220       case 'p':
00221         poutdetm_file = optarg;
00222         break;
00223       case 'q':
00224         pouthypm_file = optarg;
00225         break;
00226       case 's':
00227         ptime_fmt = optarg;
00228         break;
00229       case 'y':
00230         poutkwl_file = optarg;
00231         break;
00232       case 't':
00233         pkwlxsl_file = optarg;
00234         break;
00235       case 'v':
00236         gErrorHandler.SetVerbose(true);
00237         break;
00238       case '?':
00239         fprintf(stderr, "ERROR: Command line parsing error.\n");
00240         return 1;
00241     }
00242   }
00243 
00244   if (!((pkwlxml_file == 0) ^ (pkwltxt_file == 0)))
00245   {
00246     fprintf(stderr, "ERROR: List of keywords must be given either in XML ('x') or plain text format ('k').\n");
00247     return 1;
00248   }
00249 
00250   // register license file for SKeywordSpottingI
00251   SLicenseManagerI *plicman = BSAPIGetLicenseManager();
00252   if (plicman)
00253   {
00254     plicman->SetErrorHandler(&gErrorHandler);
00255     plicman->RegisterLicenseFile("license.dat");
00256   }
00257 
00258   gHypTarget.SetTimeFormatStr(ptime_fmt);
00259   gDetTarget.SetTimeFormatStr(ptime_fmt);
00260 
00261   SKeywordSpottingI *pkws = static_cast<SKeywordSpottingI *>(BSAPICreateInstance(SIID_KWS));
00262   if(!pkws)
00263   {
00264     return 1;
00265   }
00266   
00267   pkws->SetErrorHandler(&gErrorHandler);
00268 
00269   if (plicman)
00270   {
00271     long_long secs_to_expire = plicman->GetSecsToExpire(SIID_KWS);
00272     if (secs_to_expire < KWS_LIC_SECS_TO_EXPIRE_WARN)
00273       fprintf(stderr, "WARNING: License will expire in %d day(s).\n", static_cast<int>(secs_to_expire / (24 * 3600)));
00274   }
00275 
00276   // read configuration file and configure the instance
00277   char pdefault_cfg[1024];
00278   sprintf(pdefault_cfg, "settings%smain_config", DIRSEP);  // Visual C does not have snprintf
00279   if(!pkws->Init((pconfig_file ? pconfig_file : pdefault_cfg)))
00280   {
00281     pkws->Release();
00282     return 1;
00283   }
00284 
00285   // It is also possible to configure the waveform source.
00286   // If there are multiple channels, the channels are concatenated and just one decision made.
00287   SBlockSetI * pbset = pkws->GetBlockSet();
00288   if (!pbset)
00289   {
00290     pkws->Release();
00291     return 1;
00292   }
00293   
00294   SWaveformFormatConvertorI *pwc = static_cast<SWaveformFormatConvertorI *>(pbset->GetBlock("convertor"));
00295   if (pwc)
00296   {
00297     pwc->SetNChannels(nchannels);
00298     pwc->SetInputFormatStr(pwave_fmt);
00299   }
00300 
00301   // Prepare keyword list
00302   SKeywordListI * mpKeywordList;
00303   mpKeywordList=pkws->GetKeywordList();
00304   if(!mpKeywordList)
00305   {
00306     pkws->Release();
00307     return 1;
00308   }
00309   
00310   if(!mpKeywordList->Load(
00311     pkwlxml_file ? pkwlxml_file : pkwltxt_file,
00312     pkwlxml_file ? SKeywordListI::ffXML : SKeywordListI::ffPlain))
00313   {
00314     pkws->Release();
00315     return 1;
00316   }
00317   
00318   if(!mpKeywordList->AddPronunciations(SKeywordListI::apmCondAdd))
00319   {
00320     pkws->Release();
00321     return 1;
00322   }
00323   
00324   if(!mpKeywordList->CheckKeywords())
00325   {
00326     pkws->Release();
00327     return 1;
00328   }
00329   
00330   if(poutkwl_file && !mpKeywordList->Save(poutkwl_file, pkwlxsl_file))
00331   {
00332     pkws->Release();
00333     return 1;
00334   }
00335 
00336   // Set targets
00337   pkws->SetDetectionsTarget(&gDetTarget);
00338   pkws->SetHypothesesTarget(&gHypTarget);
00339 
00340   gHypTarget.StartProcessing(pouthypm_file);
00341   gDetTarget.StartProcessing(poutdetm_file);
00342 
00343   // the input is one file
00344   if(pinput_file)
00345   {
00346     gErrorHandler.LogMessage("Processing file: %s", pinput_file);
00347 
00348     gHypTarget.StartProcessingFile(pinput_file, pouthyp_file);
00349     gDetTarget.StartProcessingFile(pinput_file, poutdet_file);
00350     if(!pkws->ProcessFile(pinput_file, 0, 0))
00351     {
00352       pkws->Release();
00353       return 1;
00354     }
00355     gHypTarget.FinishProcessingFile();
00356     gDetTarget.FinishProcessingFile();
00357   }
00358 
00359   // the input is listfile
00360   if(plist_file)
00361   {
00362     SFileListI *plist = static_cast<SFileListI *>(BSAPICreateInstance(SIID_FILELIST));
00363     if(!plist)
00364     {
00365       fprintf(stderr, "Memory allocation error.");
00366       pkws->Release();
00367       return 1;
00368     }
00369     plist->SetErrorHandler(&gErrorHandler);
00370     if(!plist->AddList(plist_file))
00371     {
00372       plist->Release();
00373       pkws->Release();
00374       return 1;
00375     }
00376 
00377     plist->FirstLine();
00378     char ptarget[1024];
00379     char psource[1024];
00380     int start;
00381     int end;
00382     float prob;
00383     while(plist->GetLine(ptarget, psource, &start, &end, &prob))
00384     {
00385       gErrorHandler.LogMessage("Processing file: %s", psource);
00386 
00387       std::string outhyp_file = change_file_suffix(psource, ".hyp");
00388       std::string outdet_file = change_file_suffix(psource, ".det");
00389       
00390       if( !pouthyp_file || (pouthyp_file &&  strcmp(pouthyp_file,"none") != 0 &&  strcmp(pouthyp_file,"") != 0 )){
00391         gHypTarget.StartProcessingFile(psource, outhyp_file.c_str());
00392       }else{
00393         gHypTarget.StartProcessingFile(psource, 0);
00394       }
00395 
00396       if( !poutdet_file || (poutdet_file && strcmp(poutdet_file,"none") != 0 &&  strcmp(poutdet_file,"") != 0 )){
00397         gDetTarget.StartProcessingFile(psource, outdet_file.c_str());
00398       }else{
00399         gDetTarget.StartProcessingFile(psource, 0);
00400       }
00401 
00402       if(!pkws->ProcessFile(psource, 0, 0))
00403       {
00404         plist->Release();
00405         pkws->Release();
00406         return 1;
00407       }
00408       gHypTarget.FinishProcessingFile();
00409       gDetTarget.FinishProcessingFile();
00410     }
00411     plist->Release();
00412   }
00413 
00414   // the input is a directory
00415   // The FileSniffer can monitor a directory for incoming files. Here, the dictionary is searched just once.
00416   if(pinput_dir)
00417   {
00418     SFileSnifferI *psniffer = static_cast<SFileSnifferI *>(BSAPICreateInstance(SIID_FILESNIFFER));
00419     if(!psniffer)
00420     {
00421       fprintf(stderr, "Memory allocation error.");
00422       pkws->Release();
00423       return 1;
00424     }
00425     psniffer->SetErrorHandler(&gErrorHandler);
00426     psniffer->AddDirectory(pinput_dir);
00427     psniffer->AddWantedSuffix(pwave_ext);
00428 
00429     if(!psniffer->FirstFile())
00430     {
00431       psniffer->Release();
00432       pkws->Release();
00433       return 1;
00434     }
00435 
00436     char psource[1024];
00437     while(psniffer->GetFile(psource, sizeof(psource) - 1))
00438     {
00439       gErrorHandler.LogMessage("Processing file: %s", psource);
00440 
00441       std::string outhyp_file = change_file_suffix(psource, ".hyp");
00442       std::string outdet_file = change_file_suffix(psource, ".det");
00443 
00444       if( !pouthyp_file || (pouthyp_file &&  strcmp(pouthyp_file,"none") != 0 &&  strcmp(pouthyp_file,"") != 0 )){
00445         gHypTarget.StartProcessingFile(psource, outhyp_file.c_str());
00446       }else{
00447         gHypTarget.StartProcessingFile(psource, 0);
00448       }
00449 
00450       if( !poutdet_file || (poutdet_file && strcmp(poutdet_file,"none") != 0 &&  strcmp(poutdet_file,"") != 0 )){
00451         gDetTarget.StartProcessingFile(psource, outdet_file.c_str());
00452       }else{
00453         gDetTarget.StartProcessingFile(psource, 0);
00454       }
00455 
00456       if(!pkws->ProcessFile(psource, 0, 0))
00457       {
00458         psniffer->Release();
00459         pkws->Release();
00460         return 1;
00461       }
00462       gHypTarget.FinishProcessingFile();
00463       gDetTarget.FinishProcessingFile();
00464     }
00465     psniffer->Release();
00466   }
00467 
00468   gHypTarget.FinishProcessing();
00469   gDetTarget.FinishProcessing();
00470 
00471   pkws->Release();
00472   return 0;
00473 }

Generated on Wed Jul 15 10:10:04 2009 for BSAPI by  doxygen 1.4.7