gid.cpp

This example shows how to use the SGenderID2I interface.

00001 /*****************************************************************
00002  *  BSAPI Gender Identification Example                          *
00003  *                                                               *
00004  *  Author      : Petr Schwarz, Pavel Matejka, Tomas Cipr        *
00005  *  Copyright   : (C) 2006-2011 by Phonexia s.r.o                *
00006  *                                                               *
00007  *  For more info, please contact us at support@phonexia.com     *
00008  *****************************************************************/
00009 
00010 #include <stdio.h>
00011 #include <errno.h>
00012 #include <stdarg.h>
00013 #include <stdlib.h>
00014 #include <string.h>
00015 #include <assert.h>
00016 #include <math.h>
00017 #include <float.h>
00018 #include <limits.h>
00019 #include "bsapi.h"
00020 
00021 #ifdef _WIN32
00022   #include "getopt.h"
00023   #define DIRSEP "\\"
00024 #else
00025   #include <unistd.h>
00026   #define DIRSEP "/"
00027 #endif
00028 
00029 #define GID_LOG_0                 -1.0e10
00030 #define GID_LOG_MIN                (0.5 * GID_LOG_0)
00031 #define GID_MIN_LOG_DIFF           log(DBL_EPSILON)
00032 #define GID_LOWER_SCORE_THRESHOLD -1e5
00033 #define GID_DEF_SCORE_SHARPNESS    4.0f
00034 #define GID_DEF_GENDER_BALANCE     0.0f
00035 #define GID_DEF_UNK_CLASS_THR      80.0f
00036 #define GID_DEF_NCHANNELS          1
00037 #define GID_UNKNOWN_CLASS_LETTER   'U'
00038 #define GID_UNKNOWN_CLASS_NAME     "UNKNOWN"
00039 
00040 enum InputModeType
00041 {
00042   GID_IMT_NONE,
00043   GID_IMT_FILE,
00044   GID_IMT_LIST,
00045   GID_IMT_DIR,
00046 };
00047 
00048 enum GenderType
00049 {
00050   GID_GNDR_FEMALE = 0,
00051   GID_GNDR_MALE,
00052   GID_GNDR_NGENDERS
00053 };
00054 
00055 void help(const char *pProgName)
00056 { 
00057   puts("\n Gender Identification                                          ");
00058 printf(" %s\n", BSAPIVersion());
00059   puts(" ================================================================ ");
00060   puts("                                                                  ");
00061 printf(" USAGE: %s [options]\n", pProgName);
00062   puts("                                                                  ");
00063   puts(" options:                                                         ");
00064   puts("   -c file           configuration file                           ");
00065   puts("   -i file           input file                                   ");
00066   puts("   -l file           list of input files                          ");
00067   puts("   -d dir            input directory                              ");
00068   puts("   -e str  [alw,wav] extensions of input files (comma separated)  ");     
00069   puts("   -w fmt  [alaw]    waveform format (lin16, lin8, alaw, mulaw)   ");
00070   puts("   -n num  [1]       number of channels in audio files            ");
00071   puts("   -o file [stdout]  output score file                            ");
00072   puts("   -r                produce scores for both genders              ");
00073   puts("   -s num  [4.0]     score sharpness (positive number)            ");
00074   puts("   -b num  [0.0]     gender balance (-1.0, 1.0)                   ");
00075   puts("                        -1.0 ... 100% males                       ");
00076   puts("                         0.0 ... equally balanced                 ");
00077   puts("                         1.0 ... 100% females                     ");
00078 printf("   -u num  [80.0]    unknown class (%c) score thr. (50.0, 100.0)\n", GID_UNKNOWN_CLASS_LETTER);
00079   puts("                        50.0 ... unknown class off                ");
00080   puts("                       >50.0 ... unknown class if less than thr.  ");
00081   puts("   -v                verbose mode                                 ");
00082   puts("                                                                  ");
00083 }
00084 
00085 // This is an error handling object. If an error occur, a message is sent to 
00086 // this object at first. Then the running function will exit with the false 
00087 // or 0 return value. The object also accept warning and logging messages.
00088 class ErrorHandler : public SErrorCallbackI 
00089 {
00090   public:
00091     ErrorHandler() : mVerbose(false) {;}
00092     
00093     virtual void BSAPI_METHOD OnTextMessage(
00094       SUnknownI *pSender, message_type type, unsigned int messageId, const char *pMessage)
00095     {
00096       unsigned int iid = pSender ? pSender->GetIID() : SIID_UNDEFINED;
00097       switch(type)
00098       {
00099         case mtError:
00100           fprintf(stderr, "ERROR: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00101           break;
00102         case mtWarning:
00103           fprintf(stderr, "WARNING: %s - %s\n", BSAPIInterfaceId2Text(iid), pMessage);
00104           break;
00105         case mtLog:
00106           LogMessage(pMessage);
00107           break;
00108       }
00109     }
00110     
00111     void LogMessage(const char *pMessage, ...)
00112     {
00113       if (mVerbose)
00114       {
00115         va_list ap;
00116         va_start(ap, pMessage);
00117         vfprintf(stderr, pMessage, ap);
00118         fprintf(stderr, "\n");
00119         va_end(ap);
00120       }
00121     }
00122     
00123     void SetVerbose(bool verbose = true) {mVerbose = verbose;}
00124   
00125   protected:
00126     bool mVerbose;
00127 } gErrorHandler;
00128 
00129 bool str2float(const char *pStr, float *pValue)
00130 {
00131   char *pend_ptr = 0;
00132   
00133   errno = 0;
00134   *pValue = strtod(pStr, &pend_ptr);
00135     
00136   if (errno != 0 || *pend_ptr != '\0')
00137   {
00138     fprintf(stderr, 
00139       "ERROR: Can not convert string '%s' to floating point number.\n", pStr);
00140     return false;
00141   }
00142 
00143   return true;
00144 }
00145 
00146 double log_add(double x, double y)
00147 {
00148   double diff;
00149   
00150   if (x < y)
00151   {
00152     diff = x - y;
00153     x = y;
00154   }
00155   else
00156   {
00157     diff = y - x;
00158   }
00159   
00160   if (x < GID_LOG_MIN)
00161     return GID_LOG_0;
00162   
00163   if (diff < GID_MIN_LOG_DIFF)
00164     return  x;
00165   
00166   return x + log(1.0 + exp(diff));
00167 } 
00168 
00169 void log_like2post_prob(
00170     float& scoreFemale, 
00171     float& scoreMale, 
00172     float sharpness = 1.0f, 
00173     float balance = 0.0f)
00174 {
00175   balance = (balance + 1.0f) / 2.0f;
00176   scoreMale   = sharpness * scoreMale   + logf(1.0f - balance);
00177   scoreFemale = sharpness * scoreFemale + logf(balance);
00178   
00179   if (scoreMale < GID_LOWER_SCORE_THRESHOLD)
00180     scoreMale = GID_LOWER_SCORE_THRESHOLD;
00181     
00182   if (scoreFemale < GID_LOWER_SCORE_THRESHOLD)
00183     scoreFemale = GID_LOWER_SCORE_THRESHOLD;
00184   
00185   float sum = log_add(scoreMale, scoreFemale);
00186   
00187   scoreMale   -= sum;
00188   scoreFemale -= sum;
00189 
00190   // renormalization - in the first pass, the registers could have been in 
00191   // an arrea of saturation with very high rounding
00192   sum = log_add(scoreMale, scoreFemale);
00193 
00194   scoreMale   = expf(scoreMale   - sum) * 100.0f;
00195   scoreFemale = expf(scoreFemale - sum) * 100.0f;
00196 }
00197 
00198 bool process_file(const char *pInputFile,
00199                   FILE *pOutput,
00200                   SGenderID2I *pGenderId,
00201                   SScoreMemoryI *pScoreMemory,
00202                   float sharpness,
00203                   float balance,
00204                   float unkThreshold,
00205                   bool  dumpAllScores)
00206 {
00207   static bool header_printed = false;
00208 
00209   gErrorHandler.LogMessage("Processing file: %s", pInputFile);
00210   if (!pGenderId->TestFile(pInputFile))
00211     return false;
00212 
00213   if (pScoreMemory->Empty())
00214     return false;
00215 
00216   SScoresI *pscores_container = pScoreMemory->GetScores();
00217   assert(pscores_container);
00218   int length = pscores_container->GetNScores();
00219   char **ppnames = pscores_container->GetNames();
00220   float *pscores = pscores_container->GetScores();
00221 
00222   assert(pscores && ppnames && (GID_GNDR_NGENDERS == length));
00223   
00224   log_like2post_prob(
00225     pscores[GID_GNDR_FEMALE], 
00226     pscores[GID_GNDR_MALE], 
00227     sharpness, balance);
00228   
00229   int winner = (pscores[GID_GNDR_FEMALE] > pscores[GID_GNDR_MALE]) ? 
00230     GID_GNDR_FEMALE : GID_GNDR_MALE;
00231   
00232   if (dumpAllScores)
00233   {
00234     if (!header_printed)
00235     {
00236       // print output score file header
00237       for (int i = 0; i < length; i++)
00238         fprintf(pOutput, "%s ", ppnames[i]);
00239       fprintf(pOutput, "\n");
00240 
00241       header_printed = true;
00242     }
00243 
00244     fprintf(pOutput, "%s %f %f %c\n", 
00245       pInputFile,
00246       pscores[GID_GNDR_FEMALE], 
00247       pscores[GID_GNDR_MALE],
00248       (pscores[winner] < unkThreshold) ? GID_UNKNOWN_CLASS_LETTER : ppnames[winner][0]);
00249   }
00250   else
00251   {
00252     if (pscores[winner] < unkThreshold)
00253       fprintf(pOutput, "%s %s %f %c\n", pInputFile, GID_UNKNOWN_CLASS_NAME, pscores[winner], ppnames[winner][0]);
00254     else
00255       fprintf(pOutput, "%s %s %f\n", pInputFile, ppnames[winner], pscores[winner]);
00256   }
00257 
00258   return true;
00259 }
00260 
00261 bool process_list(const char *pInputList,
00262                   FILE *pOutput,
00263                   SGenderID2I *pGenderId,
00264                   SScoreMemoryI *pScoreMemory,
00265                   float sharpness,
00266                   float balance,
00267                   float unkThreshold,
00268                   bool  dumpAllScores)
00269 {
00270   gErrorHandler.LogMessage("Processing list of files: %s", pInputList);
00271   SFileListI *plist = static_cast<SFileListI *>(BSAPICreateInstance(SIID_FILELIST));
00272   if (!plist)
00273   {
00274     fprintf(stderr, "ERROR: Memory allocation error.\n");
00275     return false;
00276   }
00277 
00278   plist->SetErrorHandler(&gErrorHandler);
00279   if (!plist->AddList(pInputList))
00280   {
00281     plist->Release();
00282     return false;
00283   }
00284   
00285   char ptarget[1024], psource[1024];
00286   int start, end;
00287   float prob;
00288   
00289   plist->FirstLine();      
00290   while (plist->GetLine(ptarget, psource, &start, &end, &prob))
00291   {
00292     process_file(psource, pOutput, pGenderId, pScoreMemory, sharpness, 
00293       balance, unkThreshold, dumpAllScores);
00294   }
00295   
00296   plist->Release();
00297   return true;
00298 }
00299 
00300 bool process_dir(const char *pInputDir,
00301                  const char *pSuffixes,
00302                  FILE *pOutput,
00303                  SGenderID2I *pGenderId,
00304                  SScoreMemoryI *pScoreMemory,
00305                  float sharpness,
00306                  float balance,
00307                  float unkThreshold,
00308                  bool  dumpAllScores)
00309 {
00310   gErrorHandler.LogMessage("Processing directory: %s", pInputDir);
00311   SFileSnifferI *pdir = static_cast<SFileSnifferI *>(BSAPICreateInstance(SIID_FILESNIFFER));
00312   if (!pdir)
00313   {
00314     fprintf(stderr, "ERROR: Memory allocation error.\n");
00315     return false;
00316   }
00317   
00318   pdir->SetErrorHandler(&gErrorHandler);
00319   if (!pdir->AddDirectory(pInputDir))
00320   {
00321     pdir->Release();
00322     return false;
00323   }
00324   
00325   char psuffixes[1024];
00326   psuffixes[sizeof(psuffixes) - 1] = '\0';
00327   strncpy(psuffixes, pSuffixes, sizeof(psuffixes) - 1);
00328   char *psuffix = strtok(psuffixes, ",; ");
00329   while (psuffix)
00330   {
00331     pdir->AddWantedSuffix(psuffix);
00332     psuffix = strtok(0, ",; ");
00333   }
00334 
00335   if (!pdir->FirstFile())
00336   {
00337     pdir->Release();
00338     return false;
00339   }
00340       
00341   char psource[1024];
00342   while(pdir->GetFile(psource, sizeof(psource) - 1))
00343   {
00344     process_file(psource, pOutput, pGenderId, pScoreMemory, sharpness, 
00345       balance, unkThreshold, dumpAllScores);
00346   }
00347   
00348   pdir->Release();
00349   return true; 
00350 }
00351 
00352 int main(int argc, char *argv[])
00353 {
00354   // init configuration variables
00355   InputModeType mode        = GID_IMT_NONE;
00356   const char *pconfig_file  = 0;
00357   const char *pinput_format = "alaw";
00358   const char *pinput_ext    = "alw,wav";
00359   const char *pinput        = 0;
00360   const char *poutput_file  = 0;
00361   float sharpness           = GID_DEF_SCORE_SHARPNESS;
00362   float balance             = GID_DEF_GENDER_BALANCE;
00363   float unk_thr             = GID_DEF_UNK_CLASS_THR;
00364   int   nchannels           = GID_DEF_NCHANNELS;
00365   bool  dump_all_scores     = false;
00366 
00367   // no input parameters given
00368   if(argc == 1)
00369   {
00370     help(argv[0]);
00371     return 0;
00372   }
00373 
00374   // command line parsing
00375   int c;
00376   optind = 0;
00377   while ((c = getopt(argc, argv, const_cast<char *>("c:i:l:d:e:w:n:o:rs:b:u:v"))) != -1)
00378   {
00379     switch(c)
00380     {
00381       case 'c': pconfig_file  = optarg; break;
00382       case 'i': pinput = optarg; mode = GID_IMT_FILE; break;
00383       case 'l': pinput = optarg; mode = GID_IMT_LIST; break;
00384       case 'd': pinput = optarg; mode = GID_IMT_DIR;  break;
00385       case 'e': pinput_ext    = optarg; break;
00386       case 'w': pinput_format = optarg; break;
00387       case 'o': poutput_file  = optarg; break;
00388       case 'r': dump_all_scores = true; break;
00389       case 's': if (!str2float(optarg, &sharpness)) return 1; break;
00390       case 'b': if (!str2float(optarg, &balance))   return 1; break;
00391       case 'u': if (!str2float(optarg, &unk_thr))   return 1; break;
00392       case 'n': nchannels = atoi(optarg);   break;
00393       case 'v': gErrorHandler.SetVerbose(); break;
00394       case '?':
00395         fprintf(stderr, "ERROR: Command line parsing error.\n");
00396         return 1;
00397     }
00398   }
00399   
00400   // check if the config file was set
00401   if (!pconfig_file)
00402   {
00403     fprintf(stderr, "ERROR: Configuration file was not set (-c).\n");
00404     return 1;
00405   }
00406 
00407   // check if either input file or list file was set
00408   if (GID_IMT_NONE == mode)
00409   {
00410     fprintf(stderr, "ERROR: Input file, list or directory must be set (-i | -l | -d).\n");
00411     return 1;
00412   }
00413 
00414   // check the number of channels
00415   if (nchannels < 1)
00416   {
00417     fprintf(stderr, "ERROR: Invalid number of channels (-n).\n");
00418     return 1;
00419   }
00420 
00421   // check if sharpness is in allowed interval
00422   if (sharpness < 0.0f)
00423   {
00424     fprintf(stderr, "ERROR: Wrong value of sharpness '%f'. "
00425       "Must be positive number. (-s)\n", sharpness);
00426     return 1;
00427   }
00428 
00429   // check if balance is in allowed interval
00430   if (balance < -1.0f || balance > 1.0f)
00431   {
00432     fprintf(stderr, "ERROR: Wrong value of balance '%f'. "
00433       "Must be in (-1.0, 1.0). (-b)\n", balance);
00434     return 1;
00435   }
00436 
00437   // check if unknown class threshold is in allowed interval
00438   if (unk_thr < 50.0f || unk_thr > 100.0f)
00439   {
00440     fprintf(stderr, "ERROR: Wrong value of unknown class score threshold '%f'. "
00441       "Must be in (50.0, 100.0). (-u)\n", unk_thr);
00442     return 1;
00443   }
00444 
00445   // register license file for SGenderID2I
00446   SLicenseManagerI *plicman = BSAPIGetLicenseManager();
00447   if (plicman)
00448   {
00449     plicman->SetErrorHandler(&gErrorHandler);
00450     plicman->RegisterLicenseFile("license.dat");
00451   }
00452   
00453   // init gender ID
00454   SGenderID2I *pgender_id = static_cast<SGenderID2I *>(BSAPICreateInstance(SIID_GENDERID2));
00455   if (!pgender_id)
00456   {
00457     return 1;
00458   }
00459 
00460   pgender_id->SetErrorHandler(&gErrorHandler);
00461 
00462   if (!pgender_id->Init(pconfig_file))
00463   {
00464     pgender_id->Release();
00465     return 1;
00466   }
00467 
00468   // It is also possible to configure the waveform source.
00469   // If there are multiple channels, the channels are concatenated and just one decision made.
00470   SWaveformFormatConvertorI *pwc = pgender_id->GetWaveformFormatConvertor();
00471   if (pwc)
00472   {
00473     pwc->SetNChannels(nchannels);
00474     pwc->SetInputFormatStr(pinput_format);
00475   }
00476 
00477   // By default, the scores are sent from the gender identification system using a callback function.
00478   // It is possible to attach a score memory that remembers the last scores and enables to access them any time
00479   // by the GetScores function.
00480   SScoreMemoryI *pscore_mem = static_cast<SScoreMemoryI *>(BSAPICreateInstance(SIID_SCOREMEMORY));
00481   if(!pscore_mem)
00482   {
00483     fprintf(stderr, "ERROR: Memory allocation error.\n");
00484     pgender_id->Release();
00485     return 1;
00486   }
00487 
00488   pscore_mem->SetErrorHandler(&gErrorHandler);
00489   pgender_id->SetTarget(pscore_mem);
00490 
00491   // activate gender models before scoring
00492   if (!pgender_id->ActivateAllModels())
00493   {
00494     pgender_id->Release();
00495     pscore_mem->Release();
00496     return 1;
00497   }
00498 
00499   // check that both models are activated
00500   SModelListI *pmodel_list = pgender_id->GetModelList();
00501   if (!pmodel_list)
00502   {
00503     pgender_id->Release();
00504     pscore_mem->Release();
00505     return 1;
00506   }
00507 
00508   if (GID_GNDR_NGENDERS != pmodel_list->GetNModels())
00509   {
00510     fprintf(stderr, "ERROR: Some of gender models are missing.\n");
00511     pgender_id->Release();
00512     pscore_mem->Release();
00513     return 1;
00514   }
00515 
00516   // open output file
00517   FILE *poutput = poutput_file ? fopen(poutput_file, "w") : stdout;
00518   if (!poutput)
00519   {
00520     fprintf(stderr, "ERROR: Could not open output file '%s'.\n", poutput_file);
00521     pgender_id->Release();
00522     pscore_mem->Release();
00523     return 1;
00524   }
00525 
00526   // process file(s)
00527   bool processed = false;
00528   switch (mode)
00529   {
00530     case GID_IMT_FILE:
00531       processed = process_file(
00532         pinput, poutput, pgender_id, pscore_mem, sharpness, balance, unk_thr, dump_all_scores);
00533       break;
00534       
00535     case GID_IMT_LIST:
00536       processed = process_list(
00537         pinput, poutput, pgender_id, pscore_mem, sharpness, balance, unk_thr, dump_all_scores);
00538       break;
00539       
00540     case GID_IMT_DIR:
00541       processed = process_dir(
00542         pinput, pinput_ext, poutput, pgender_id, pscore_mem, sharpness, balance, unk_thr, dump_all_scores);
00543       break;
00544       
00545     case GID_IMT_NONE:
00546       break;
00547   }
00548 
00549   // cleanup
00550   if (poutput_file)
00551     fclose(poutput);
00552 
00553   pgender_id->Release();
00554   pscore_mem->Release();
00555   return (processed ? 0 : 1);
00556 }

Generated on Wed Apr 11 10:00:17 2012 for BSAPI by  doxygen 1.4.7