00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <stdio.h>
00011 #include <errno.h>
00012 #include <stdarg.h>
00013 #include <stdlib.h>
00014 #include <string.h>
00015 #include <assert.h>
00016 #include <math.h>
00017 #include <float.h>
00018 #include <limits.h>
00019 #include "bsapi.h"
00020
00021 #ifdef WIN32
00022 #include "getopt.h"
00023 #define DIRSEP "\\"
00024 #else
00025 #include <unistd.h>
00026 #define DIRSEP "/"
00027 #endif
00028
00029 #define GID_LOG_0 -1.0e10
00030 #define GID_LOG_MIN (0.5 * GID_LOG_0)
00031 #define GID_MIN_LOG_DIFF log(DBL_EPSILON)
00032 #define GID_LOWER_SCORE_THRESHOLD -1e5
00033 #define GID_DEF_SCORE_SHARPNESS 4.0f
00034 #define GID_DEF_GENDER_BALANCE 0.0f
00035 #define GID_DEF_UNK_CLASS_THR 80.0f
00036 #define GID_DEF_NCHANNELS 1
00037 #define GID_UNKNOWN_CLASS_LETTER 'U'
00038 #define GID_UNKNOWN_CLASS_NAME "UNKNOWN"
00039
00040 enum InputModeType
00041 {
00042 GID_IMT_NONE,
00043 GID_IMT_FILE,
00044 GID_IMT_LIST,
00045 GID_IMT_DIR,
00046 };
00047
00048 enum GenderType
00049 {
00050 GID_GNDR_FEMALE = 1,
00051 GID_GNDR_MALE,
00052 GID_GNDR_NGENDERS
00053 };
00054
00055 void help(const char *pProgName)
00056 {
00057 puts("\n Gender Identification ");
00058 printf(" %s\n", BSAPIVersion());
00059 puts(" ================================================================ ");
00060 puts(" ");
00061 printf(" USAGE: %s [options]\n", pProgName);
00062 puts(" ");
00063 puts(" options: ");
00064 puts(" -c file configuration file ");
00065 puts(" -i file input file ");
00066 puts(" -l file list of input files ");
00067 puts(" -d dir input directory ");
00068 puts(" -e str [alw,wav] extensions of input files (comma separated) ");
00069 puts(" -w fmt [alaw] waveform format (lin16, lin8, alaw, mulaw) ");
00070 puts(" -n num [1] number of channels in audio files ");
00071 puts(" -o file [stdout] output score file ");
00072 puts(" -r produce scores for both genders ");
00073 puts(" -s num [4.0] score sharpness (positive number) ");
00074 puts(" -b num [0.0] gender balance (-1.0, 1.0) ");
00075 puts(" -1.0 ... 100% males ");
00076 puts(" 0.0 ... equally balanced ");
00077 puts(" 1.0 ... 100% females ");
00078 printf(" -u num [80.0] unknown class (%c) score thr. (50.0, 100.0)\n", GID_UNKNOWN_CLASS_LETTER);
00079 puts(" 50.0 ... unknown class off ");
00080 puts(" >50.0 ... unknown class if less than thr. ");
00081 puts(" -v verbose mode ");
00082 puts(" ");
00083 }
00084
00085
00086
00087
00088 class ErrorHandler : public SErrorCallbackI
00089 {
00090 public:
00091 ErrorHandler() : mVerbose(false) {;}
00092
00093 virtual void BSAPI_METHOD OnTextMessage(
00094 unsigned int iId, message_type type, unsigned int messageId, char *pMessage)
00095 {
00096 switch(type)
00097 {
00098 case mtError:
00099 fprintf(stderr, "ERROR: %s - %s\n", BSAPIInterfaceId2Text(iId), pMessage);
00100 break;
00101 case mtWarning:
00102 fprintf(stderr, "WARNING: %s - %s\n", BSAPIInterfaceId2Text(iId), pMessage);
00103 break;
00104 case mtLog:
00105 LogMessage(pMessage);
00106 break;
00107 }
00108 }
00109
00110 void LogMessage(const char *pMessage, ...)
00111 {
00112 if (mVerbose)
00113 {
00114 va_list ap;
00115 va_start(ap, pMessage);
00116 vfprintf(stderr, pMessage, ap);
00117 fprintf(stderr, "\n");
00118 va_end(ap);
00119 }
00120 }
00121
00122 void SetVerbose(bool verbose = true) {mVerbose = verbose;}
00123
00124 protected:
00125 bool mVerbose;
00126 } gErrorHandler;
00127
00128 bool str2float(const char *pStr, float *pValue)
00129 {
00130 char *pend_ptr = 0;
00131
00132 errno = 0;
00133 *pValue = strtof(pStr, &pend_ptr);
00134
00135 if (errno != 0 || *pend_ptr != '\0')
00136 {
00137 fprintf(stderr,
00138 "ERROR: Can not convert string '%s' to floating point number.\n", pStr);
00139 return false;
00140 }
00141
00142 return true;
00143 }
00144
00145 void print_output_header(FILE *pOutput, SGenderIDI *pGenderId)
00146 {
00147 int length = 0;
00148 char **ppnames = pGenderId->GetModelNames(&length);
00149 for (int i = 1; i < length; i++)
00150 fprintf(pOutput, "%s ", ppnames[i]);
00151 fprintf(pOutput, "\n");
00152 }
00153
00154 double log_add(double x, double y)
00155 {
00156 double diff;
00157
00158 if (x < y)
00159 {
00160 diff = x - y;
00161 x = y;
00162 }
00163 else
00164 {
00165 diff = y - x;
00166 }
00167
00168 if (x < GID_LOG_MIN)
00169 return GID_LOG_0;
00170
00171 if (diff < GID_MIN_LOG_DIFF)
00172 return x;
00173
00174 return x + log(1.0 + exp(diff));
00175 }
00176
00177 void log_like2post_prob(
00178 float& scoreFemale,
00179 float& scoreMale,
00180 float sharpness = 1.0f,
00181 float balance = 0.0f)
00182 {
00183 balance = (balance + 1.0f) / 2.0f;
00184 scoreMale = sharpness * scoreMale + logf(1.0f - balance);
00185 scoreFemale = sharpness * scoreFemale + logf(balance);
00186
00187 if (scoreMale < GID_LOWER_SCORE_THRESHOLD)
00188 scoreMale = GID_LOWER_SCORE_THRESHOLD;
00189
00190 if (scoreFemale < GID_LOWER_SCORE_THRESHOLD)
00191 scoreFemale = GID_LOWER_SCORE_THRESHOLD;
00192
00193 float sum = log_add(scoreMale, scoreFemale);
00194
00195 scoreMale -= sum;
00196 scoreFemale -= sum;
00197
00198
00199
00200 sum = log_add(scoreMale, scoreFemale);
00201
00202 scoreMale = expf(scoreMale - sum) * 100.0f;
00203 scoreFemale = expf(scoreFemale - sum) * 100.0f;
00204 }
00205
00206 bool process_file(char *pInputFile,
00207 FILE *pOutput,
00208 SGenderIDI *pGenderId,
00209 float sharpness,
00210 float balance,
00211 float unkThreshold,
00212 bool dumpAllScores)
00213 {
00214 gErrorHandler.LogMessage("Processing file: %s", pInputFile);
00215 if (!pGenderId->TestFile(pInputFile))
00216 return false;
00217
00218 fprintf(pOutput, "%s", pInputFile);
00219
00220 int length = 0;
00221 char **ppnames = pGenderId->GetModelNames(&length);
00222 float *pscores = pGenderId->GetModelScores(&length);
00223 assert(pscores && ppnames && (GID_GNDR_NGENDERS == length));
00224
00225 log_like2post_prob(
00226 pscores[GID_GNDR_FEMALE],
00227 pscores[GID_GNDR_MALE],
00228 sharpness, balance);
00229
00230 int winner = (pscores[GID_GNDR_FEMALE] > pscores[GID_GNDR_MALE]) ?
00231 GID_GNDR_FEMALE : GID_GNDR_MALE;
00232
00233 if (dumpAllScores)
00234 {
00235 fprintf(pOutput, " %f %f %c\n",
00236 pscores[GID_GNDR_FEMALE],
00237 pscores[GID_GNDR_MALE],
00238 (pscores[winner] < unkThreshold) ? GID_UNKNOWN_CLASS_LETTER : ppnames[winner][0]);
00239 }
00240 else
00241 {
00242 if (pscores[winner] < unkThreshold)
00243 fprintf(pOutput, " %s %f %c\n", GID_UNKNOWN_CLASS_NAME, pscores[winner], ppnames[winner][0]);
00244 else
00245 fprintf(pOutput, " %s %f\n", ppnames[winner], pscores[winner]);
00246 }
00247
00248 return true;
00249 }
00250
00251 bool process_list(char *pInputList,
00252 FILE *pOutput,
00253 SGenderIDI *pGenderId,
00254 float sharpness,
00255 float balance,
00256 float unkThreshold,
00257 bool dumpAllScores)
00258 {
00259 gErrorHandler.LogMessage("Processing list of files: %s", pInputList);
00260 SFileListI *plist = static_cast<SFileListI *>(BSAPICreateInstance(SIID_FILELIST));
00261 if (!plist)
00262 {
00263 fprintf(stderr, "ERROR: Memory allocation error.\n");
00264 return false;
00265 }
00266
00267 plist->SetErrorHandler(&gErrorHandler);
00268 if (!plist->AddList(pInputList))
00269 {
00270 plist->Release();
00271 return false;
00272 }
00273
00274 char ptarget[1024], psource[1024];
00275 int start, end;
00276 float prob;
00277
00278 plist->FirstLine();
00279 while (plist->GetLine(ptarget, psource, &start, &end, &prob))
00280 {
00281 process_file(psource, pOutput, pGenderId, sharpness,
00282 balance, unkThreshold, dumpAllScores);
00283 }
00284
00285 plist->Release();
00286 return true;
00287 }
00288
00289 bool process_dir(char *pInputDir,
00290 const char *pSuffixes,
00291 FILE *pOutput,
00292 SGenderIDI *pGenderId,
00293 float sharpness,
00294 float balance,
00295 float unkThreshold,
00296 bool dumpAllScores)
00297 {
00298 gErrorHandler.LogMessage("Processing directory: %s", pInputDir);
00299 SFileSnifferI *pdir = static_cast<SFileSnifferI *>(BSAPICreateInstance(SIID_FILESNIFFER));
00300 if (!pdir)
00301 {
00302 fprintf(stderr, "ERROR: Memory allocation error.\n");
00303 return false;
00304 }
00305
00306 pdir->SetErrorHandler(&gErrorHandler);
00307 if (!pdir->AddDirectory(pInputDir))
00308 {
00309 pdir->Release();
00310 return false;
00311 }
00312
00313 char psuffixes[1024];
00314 psuffixes[sizeof(psuffixes) - 1] = '\0';
00315 strncpy(psuffixes, pSuffixes, sizeof(psuffixes) - 1);
00316 char *psuffix = strtok(psuffixes, ",; ");
00317 while (psuffix)
00318 {
00319 pdir->AddWantedSuffix(psuffix);
00320 psuffix = strtok(0, ",; ");
00321 }
00322
00323 if (!pdir->FirstFile())
00324 {
00325 pdir->Release();
00326 return false;
00327 }
00328
00329 char psource[1024];
00330 while(pdir->GetFile(psource, sizeof(psource) - 1))
00331 {
00332 process_file(psource, pOutput, pGenderId, sharpness,
00333 balance, unkThreshold, dumpAllScores);
00334 }
00335
00336 pdir->Release();
00337 }
00338
00339 int main(int argc, char *argv[])
00340 {
00341
00342 InputModeType mode = GID_IMT_NONE;
00343 char *pconfig_file = 0;
00344 char *pinput_format = "alaw";
00345 char *pinput_ext = "alw,wav";
00346 char *pinput = 0;
00347 char *poutput_file = 0;
00348 float sharpness = GID_DEF_SCORE_SHARPNESS;
00349 float balance = GID_DEF_GENDER_BALANCE;
00350 float unk_thr = GID_DEF_UNK_CLASS_THR;
00351 int nchannels = GID_DEF_NCHANNELS;
00352 bool dump_all_scores = false;
00353
00354
00355 if(argc == 1)
00356 {
00357 help(argv[0]);
00358 return 0;
00359 }
00360
00361
00362 int c;
00363 optind = 0;
00364 while ((c = getopt(argc, argv, const_cast<char *>("c:i:l:d:e:w:n:o:rs:b:u:v"))) != -1)
00365 {
00366 switch(c)
00367 {
00368 case 'c': pconfig_file = optarg; break;
00369 case 'i': pinput = optarg; mode = GID_IMT_FILE; break;
00370 case 'l': pinput = optarg; mode = GID_IMT_LIST; break;
00371 case 'd': pinput = optarg; mode = GID_IMT_DIR; break;
00372 case 'e': pinput_ext = optarg; break;
00373 case 'w': pinput_format = optarg; break;
00374 case 'o': poutput_file = optarg; break;
00375 case 'r': dump_all_scores = true; break;
00376 case 's': if (!str2float(optarg, &sharpness)) return 1; break;
00377 case 'b': if (!str2float(optarg, &balance)) return 1; break;
00378 case 'u': if (!str2float(optarg, &unk_thr)) return 1; break;
00379 case 'n': nchannels = atoi(optarg); break;
00380 case 'v': gErrorHandler.SetVerbose(); break;
00381 case '?':
00382 fprintf(stderr, "ERROR: Command line parsing error.\n");
00383 return 1;
00384 }
00385 }
00386
00387
00388 if (!pconfig_file)
00389 {
00390 fprintf(stderr, "ERROR: Configuration file was not set (-c).\n");
00391 return 1;
00392 }
00393
00394
00395 if (GID_IMT_NONE == mode)
00396 {
00397 fprintf(stderr, "ERROR: Input file, list or directory must be set (-i | -l | -d).\n");
00398 return 1;
00399 }
00400
00401
00402 if (nchannels < 1)
00403 {
00404 fprintf(stderr, "ERROR: Invalid number of channels (-n).\n");
00405 return 1;
00406 }
00407
00408
00409 if (sharpness < 0.0f)
00410 {
00411 fprintf(stderr, "ERROR: Wrong value of sharpness '%f'. "
00412 "Must be positive number. (-s)\n", sharpness);
00413 return 1;
00414 }
00415
00416
00417 if (balance < -1.0f || balance > 1.0f)
00418 {
00419 fprintf(stderr, "ERROR: Wrong value of balance '%f'. "
00420 "Must be in (-1.0, 1.0). (-b)\n", balance);
00421 return 1;
00422 }
00423
00424
00425 if (unk_thr < 50.0f || unk_thr > 100.0f)
00426 {
00427 fprintf(stderr, "ERROR: Wrong value of unknown class score threshold '%f'. "
00428 "Must be in (50.0, 100.0). (-u)\n", unk_thr);
00429 return 1;
00430 }
00431
00432
00433 SLicenseManagerI *plicman = BSAPIGetLicenseManager();
00434 if (plicman)
00435 {
00436 plicman->SetErrorHandler(&gErrorHandler);
00437 plicman->RegisterLicenseFile("license.dat");
00438 }
00439
00440
00441 SGenderIDI *pgender_id = static_cast<SGenderIDI *>(BSAPICreateInstance(SIID_GENDERID));
00442 if (!pgender_id)
00443 {
00444 return 1;
00445 }
00446
00447 pgender_id->SetErrorHandler(&gErrorHandler);
00448
00449 if (!pgender_id->Init(pconfig_file))
00450 {
00451 pgender_id->Release();
00452 return 1;
00453 }
00454
00455
00456
00457 SWaveformFormatConvertorI *pwc = pgender_id->GetWaveformFormatConvertor();
00458 if (pwc)
00459 {
00460 pwc->SetNChannels(nchannels);
00461 pwc->SetInputFormatStr(pinput_format);
00462 }
00463
00464
00465 if (!pgender_id->ActivateAllModels())
00466 {
00467 pgender_id->Release();
00468 return 1;
00469 }
00470
00471
00472 int nmodels = 0;
00473 pgender_id->GetModelNames(&nmodels);
00474 if (GID_GNDR_NGENDERS != nmodels)
00475 {
00476 fprintf(stderr, "ERROR: Some of gender models are missing.\n");
00477 pgender_id->Release();
00478 return 1;
00479 }
00480
00481
00482 FILE *poutput = poutput_file ? fopen(poutput_file, "w") : stdout;
00483 if (!poutput)
00484 {
00485 fprintf(stderr, "ERROR: Could not open output file '%s'.\n", poutput_file);
00486 pgender_id->Release();
00487 return 1;
00488 }
00489
00490
00491 if (dump_all_scores)
00492 print_output_header(poutput, pgender_id);
00493
00494
00495 bool processed = false;
00496 switch (mode)
00497 {
00498 case GID_IMT_FILE:
00499 processed = process_file(
00500 pinput, poutput, pgender_id, sharpness, balance, unk_thr, dump_all_scores);
00501 break;
00502
00503 case GID_IMT_LIST:
00504 processed = process_list(
00505 pinput, poutput, pgender_id, sharpness, balance, unk_thr, dump_all_scores);
00506 break;
00507
00508 case GID_IMT_DIR:
00509 processed = process_dir(
00510 pinput, pinput_ext, poutput, pgender_id, sharpness, balance, unk_thr, dump_all_scores);
00511 break;
00512
00513 case GID_IMT_NONE:
00514 break;
00515 }
00516
00517
00518 if (poutput_file)
00519 fclose(poutput);
00520
00521 pgender_id->Release();
00522 return (processed ? 0 : 1);
00523 }