/* * ptts.cxx * * Text To Speech classes * * Portable Windows Library * * Copyright (c) 2002 Equivalence Pty. Ltd. * * The contents of this file are subject to the Mozilla Public License * Version 1.0 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See * the License for the specific language governing rights and limitations * under the License. * * The Original Code is Portable Windows Library. * * The Initial Developer of the Original Code is Equivalence Pty. Ltd. * * Contributor(s): ______________________________________. * * $Log: ptts.cxx,v $ * Revision 1.18 2005/11/30 12:47:41 csoutheren * Removed tabs, reformatted some code, and changed tags for Doxygen * * Revision 1.17 2005/01/04 08:09:42 csoutheren * Fixed Linux configure problems * * Revision 1.16 2005/01/04 07:44:03 csoutheren * More changes to implement the new configuration methodology, and also to * attack the global static problem * * Revision 1.15 2004/10/23 10:56:15 ykiryanov * Added ifdef _WIN32_WCE for PocketPC 2003 SDK port * * Revision 1.14 2004/07/12 09:17:20 csoutheren * Fixed warnings and errors under Linux * * Revision 1.13 2004/07/06 10:12:54 csoutheren * Added static integer o factory template to assist in ensuring factories are instantiated * * Revision 1.12 2004/06/30 12:17:05 rjongbloed * Rewrite of plug in system to use single global variable for all factories to avoid all sorts * of issues with startup orders and Windows DLL multiple instances. * * Revision 1.11 2004/06/19 09:02:32 csoutheren * Added rate and volume to keep Festival happy * * Revision 1.10 2004/06/19 07:18:59 csoutheren * Change TTS engine registration to use abstract factory code * * Revision 1.9 2004/04/09 06:52:17 rjongbloed * Removed #pargma linker command for /delayload of DLL as documentations sais that * you cannot do this. * * Revision 1.8 2004/02/23 23:52:20 csoutheren * Added pragmas to avoid every Windows application needing to include libs explicitly * * Revision 1.7 2003/10/30 11:28:25 rjongbloed * Added dircetory name for Speech API under Windows. * * Revision 1.6 2003/09/17 06:02:13 csoutheren * Fixed windows header file problem caused by removing recursive headers * * Revision 1.5 2003/04/17 01:21:33 craigs * Added import of pybuildopts to correctly detect if P_SAPI is set * * Revision 1.4 2003/04/16 08:00:19 robertj * Windoes psuedo autoconf support * * Revision 1.3 2002/11/06 22:47:25 robertj * Fixed header comment (copyright etc) * * Revision 1.2 2002/08/14 15:18:25 craigs * Fixed Festval implementation * * Revision 1.1 2002/08/06 04:45:58 craigs * Initial version * */ #ifdef __GNUC__ #pragma implementation "ptts.h" #endif #include "ptbuildopts.h" //////////////////////////////////////////////////////////// #include PINSTANTIATE_FACTORY(PTextToSpeech, PString) // WIN32 COM stuff must be first in file to compile properly #if P_SAPI #if defined(P_SAPI_LIBRARY) #pragma comment(lib, P_SAPI_LIBRARY) #endif #ifndef _WIN32_DCOM #define _WIN32_DCOM 1 #endif #include #include #include #include #include #include #endif //////////////////////////////////////////////////////////// // this disables the winsock2 stuff in the Windows contain.h, to avoid header file problems #define P_KNOCKOUT_WINSOCK2 #include #include #include //////////////////////////////////////////////////////////// // // Text to speech using Microsoft's Speech API (SAPI) // Can be downloaded from http://www.microsoft.com/speech/download/sdk51 // #if P_SAPI #define MAX_FN_SIZE 1024 class PTextToSpeech_SAPI : public PTextToSpeech { PCLASSINFO(PTextToSpeech_SAPI, PTextToSpeech); public: PTextToSpeech_SAPI(); ~PTextToSpeech_SAPI(); // overrides PStringArray GetVoiceList(); BOOL SetVoice(const PString & voice); BOOL SetRate(unsigned rate); unsigned GetRate(); BOOL SetVolume(unsigned volume); unsigned GetVolume(); BOOL OpenFile (const PFilePath & fn); BOOL OpenChannel(PChannel * channel); BOOL IsOpen() { return opened; } BOOL Close (); BOOL Speak (const PString & str, TextType hint); protected: BOOL OpenVoice(); static PMutex refMutex; static int * refCount; PMutex mutex; CComPtr m_cpVoice; CComPtr cpWavStream; BOOL opened; BOOL usingFile; unsigned rate, volume; PString voice; }; PFactory::Worker sapiTTSFactory("Microsoft SAPI", false); int * PTextToSpeech_SAPI::refCount; PMutex PTextToSpeech_SAPI::refMutex; PTextToSpeech_SAPI::PTextToSpeech_SAPI() { PWaitAndSignal m(refMutex); if (refCount == NULL) { refCount = new int; *refCount = 1; ::CoInitializeEx(NULL, COINIT_MULTITHREADED); } else { (*refCount)++; } usingFile = opened = FALSE; } PTextToSpeech_SAPI::~PTextToSpeech_SAPI() { PWaitAndSignal m(refMutex); if ((--(*refCount)) == 0) { ::CoUninitialize(); delete refCount; refCount = NULL; } } BOOL PTextToSpeech_SAPI::OpenVoice() { PWaitAndSignal m(mutex); HRESULT hr = m_cpVoice.CoCreateInstance(CLSID_SpVoice); return (opened = SUCCEEDED(hr)); } BOOL PTextToSpeech_SAPI::OpenChannel(PChannel *) { PWaitAndSignal m(mutex); Close(); usingFile = FALSE; return (opened = FALSE); } BOOL PTextToSpeech_SAPI::OpenFile(const PFilePath & fn) { PWaitAndSignal m(mutex); Close(); usingFile = TRUE; if (!OpenVoice()) return FALSE; CSpStreamFormat wavFormat; wavFormat.AssignFormat(SPSF_8kHz16BitMono); WCHAR szwWavFileName[MAX_FN_SIZE] = L"";; USES_CONVERSION; wcscpy(szwWavFileName, T2W((const char *)fn)); HRESULT hr = SPBindToFile(szwWavFileName, SPFM_CREATE_ALWAYS, &cpWavStream, &wavFormat.FormatId(), wavFormat.WaveFormatExPtr()); if (!SUCCEEDED(hr)) { cpWavStream.Release(); return FALSE; } hr = m_cpVoice->SetOutput(cpWavStream, TRUE); return (opened = SUCCEEDED(hr)); } BOOL PTextToSpeech_SAPI::Close() { PWaitAndSignal m(mutex); if (!opened) return TRUE; if (usingFile) { if (opened) m_cpVoice->WaitUntilDone(INFINITE); cpWavStream.Release(); } if (opened) m_cpVoice.Release(); opened = FALSE; return TRUE; } BOOL PTextToSpeech_SAPI::Speak(const PString & otext, TextType hint) { PWaitAndSignal m(mutex); if (!IsOpen()) return FALSE; PString text = otext; // do various things to the string, depending upon the hint switch (hint) { case Digits: { } break; default: ; }; // quick hack to calculate length of Unicode string unsigned short * uStr = new unsigned short[text.GetLength()+1]; USES_CONVERSION; wcscpy(uStr, T2W((const char *)text)); HRESULT hr = m_cpVoice->Speak(uStr, SPF_DEFAULT, NULL); delete[] uStr; return SUCCEEDED(hr); } PStringArray PTextToSpeech_SAPI::GetVoiceList() { PWaitAndSignal m(mutex); PStringArray voiceList; CComPtr cpVoiceToken; CComPtr cpEnum; ULONG ulCount = 0; //Enumerate the available voices HRESULT hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum); // Get the number of voices if (SUCCEEDED(hr)) hr = cpEnum->GetCount(&ulCount); // Obtain a list of available voice tokens, set the voice to the token, and call Speak while (SUCCEEDED(hr) && ulCount--) { cpVoiceToken.Release(); if (SUCCEEDED(hr)) hr = cpEnum->Next(1, &cpVoiceToken, NULL ); if (SUCCEEDED(hr)) { voiceList.AppendString("voice"); } } return voiceList; } BOOL PTextToSpeech_SAPI::SetVoice(const PString & v) { PWaitAndSignal m(mutex); voice = v; return TRUE; } BOOL PTextToSpeech_SAPI::SetRate(unsigned v) { rate = v; return TRUE; } unsigned PTextToSpeech_SAPI::GetRate() { return rate; } BOOL PTextToSpeech_SAPI::SetVolume(unsigned v) { volume = v; return TRUE; } unsigned PTextToSpeech_SAPI::GetVolume() { return volume; } #endif // P_SAPI //////////////////////////////////////////////////////////// // // Generic text to speech using Festival // class PTextToSpeech_Festival : public PTextToSpeech { PCLASSINFO(PTextToSpeech_Festival, PTextToSpeech); public: PTextToSpeech_Festival(); ~PTextToSpeech_Festival(); // overrides PStringArray GetVoiceList(); BOOL SetVoice(const PString & voice); BOOL SetRate(unsigned rate); unsigned GetRate(); BOOL SetVolume(unsigned volume); unsigned GetVolume(); BOOL OpenFile (const PFilePath & fn); BOOL OpenChannel(PChannel * channel); BOOL IsOpen() { return opened; } BOOL Close (); BOOL Speak (const PString & str, TextType hint); protected: BOOL Invoke(const PString & str, const PFilePath & fn); PMutex mutex; BOOL opened; BOOL usingFile; PString text; PFilePath path; unsigned volume, rate; PString voice; }; PFactory::Worker festivalTTSFactory("Festival", false); PTextToSpeech_Festival::PTextToSpeech_Festival() { PWaitAndSignal m(mutex); usingFile = opened = FALSE; rate = 8000; volume = 100; } PTextToSpeech_Festival::~PTextToSpeech_Festival() { PWaitAndSignal m(mutex); } BOOL PTextToSpeech_Festival::OpenChannel(PChannel *) { PWaitAndSignal m(mutex); Close(); usingFile = FALSE; opened = FALSE; return TRUE; } BOOL PTextToSpeech_Festival::OpenFile(const PFilePath & fn) { PWaitAndSignal m(mutex); Close(); usingFile = TRUE; path = fn; opened = TRUE; PTRACE(3, "TTS\tWriting speech to " << fn); return TRUE; } BOOL PTextToSpeech_Festival::Close() { PWaitAndSignal m(mutex); if (!opened) return TRUE; BOOL stat = FALSE; if (usingFile) stat = Invoke(text, path); text = PString(); opened = FALSE; return stat; } BOOL PTextToSpeech_Festival::Speak(const PString & ostr, TextType hint) { PWaitAndSignal m(mutex); if (!IsOpen()) { PTRACE(3, "TTS\tAttempt to speak whilst engine not open"); return FALSE; } PString str = ostr; // do various things to the string, depending upon the hint switch (hint) { case Digits: default: ; }; if (usingFile) { PTRACE(3, "TTS\tSpeaking " << ostr); text = text & str; return TRUE; } PTRACE(3, "TTS\tStream mode not supported for Festival"); return FALSE; } PStringArray PTextToSpeech_Festival::GetVoiceList() { PWaitAndSignal m(mutex); PStringArray voiceList; voiceList.AppendString("default"); return voiceList; } BOOL PTextToSpeech_Festival::SetVoice(const PString & v) { PWaitAndSignal m(mutex); voice = v; return TRUE; } BOOL PTextToSpeech_Festival::SetRate(unsigned v) { rate = v; return TRUE; } unsigned PTextToSpeech_Festival::GetRate() { return rate; } BOOL PTextToSpeech_Festival::SetVolume(unsigned v) { volume = v; return TRUE; } unsigned PTextToSpeech_Festival::GetVolume() { return volume; } BOOL PTextToSpeech_Festival::Invoke(const PString & otext, const PFilePath & fname) { PString text = otext; text.Replace('\n', ' ', TRUE); text.Replace('\"', '\'', TRUE); text.Replace('\\', ' ', TRUE); text = "\"" + text + "\""; PString cmdLine = "echo " + text + " | ./text2wave -F " + PString(PString::Unsigned, rate) + " -otype riff > " + fname; #if 1 #ifndef _WIN32_WCE system(cmdLine); #endif return TRUE; #else PPipeChannel cmd; int code = -1; if (!cmd.Open(cmdLine, PPipeChannel::ReadWriteStd)) { PTRACE(2, "TTS\tCannot execute command " << cmd); } else { PTRACE(2, "TTS\tCreating " << fname << " using " << cmdLine); cmd.Execute(); code = cmd.WaitForTermination(); if (code >= 0) { PTRACE(2, "TTS\tdata generated"); } else { PTRACE(2, "TTS\tgeneration failed"); } } return code == 0; #endif } // End Of File ///////////////////////////////////////////////////////////////