; = Dc@s dZdZdZdZdZddddgZd Zd eZd Zd gZ d Z d k Z d k Z d k Z d kZd kZd kZd kZd kZd kZydklZWndklZnXy d kZWn eZnXy d kZWn eZnXyd kZeidWn=ej o1d kZeedoeidqbnXd k Z d k!Z!eZ"e oydk#l$Z"WqqXny0d k%Z&e&i'i(e dk)l*Z+dZ,Wnd Z,dZ+nXyd k-Z-d k.Z.WneZ-Z.nXy d k/Z0WnnXy d k1Z1WnnXde2fdYZ3de2fdYZ4de2fdYZ5e i6de _7e i6de _8e i6de _9hdd<d d!<d"d#<d$d%<d&d'<d(d)<d*d+<d,d-<d.d/<d0d1<d2d3<d4d5<d6d7<d8d9<d:d;<d<d=k<l<Z<d?Z;nXd@e<fdAYa>dBZ?ea@dCZAdDfdEYZBe,o&dFeBe&i'iCiDfdGYZEndHe iFfdIYZGdJeBeGfdKYZHdLeGfdMYZIdNZJdOeGfdPYZKdQZLdRe!iMe!iNfdSYZOdTZPgZQdUZRdVdWdXdYdZd[d\d]d^d_d`dadbdgZSgiTZUeSD]dZVeUeViWd[dciWdddeiWdfdgiWdhdiiWdjdkiWdbdldmdndoq[UZX[VgiTZUeXD]ZYeUe i6eYiZqV[UZ[[YdpZ\eRe\dqZ]drZ^dsZ_dtZ`duZae i6dve]e^e_fZbe i6dwe`eafZcdxZdeReddyZeeReee i6dzZfd{ZgeReghd|d}<d~d<dd<dd<dd<dd<dd<dd<dd<dd<dd<dd<dd<dd<dd<dd<dd<dd<dd s3.3sPythonsCopyright 2002-4, Mark Pilgrims'Mark Pilgrim s%Jason Diamond s'John Beimler s1Fazal Majid s!Aaron Swartz is.UniversalFeedParser/%s +http://feedparser.org/sapplication/atom+xml,application/rdf+xml,application/rss+xml,application/x-netcdf,application/xml;q=0.9,text/xml;q=0.2,*/*;q=0.1s drv_libxml2N(sStringIOissetdefaulttimeout(sTidy(sescapeicCs>|idd}|idd}|idd}|SdS(Ns&s&s>s>sd?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddf}dk}|iditttdditt|an|i tSdS(Niiiiii iiiiii i i iiiiiiiiiiiiiiiiiiiiiiii iiiiiiiiiiiiiiiiiiiiiiiiiii iiiiiiiiii[i.i<i(i+i!i&iiiiiiiiii]i$i*i)i;i^i-i/iiiiiiiii|i,i%i_i>i?iiiiiiiiii`i:i#i@i'i=i"iiaibicidieifigihiiiiiiiiiijikiliminioipiqiriiiiiiii~isitiuiviwixiyiziiiiiiiiiiiiiiiiiiiiiii{iAiBiCiDiEiFiGiHiIiiiiiii}iJiKiLiMiNiOiPiQiRiiiiiii\iiSiTiUiViWiXiYiZiiiiiii0i1i2i3i4i5i6i7i8i9iiiiiisi( s_ebcdic_to_ascii_mapsemapsstrings maketranssjoinsmapschrsrangesss translate(sssemapsstring((s./Sourceforge/rssparser.pys_ebcdic_to_asciis  @s_FeedParserMixincBstZhdd<dd<dd<dd<dd<dd<dd<dd<d d<d d<d d<d d<d d<dd<dd<dd<dd<dd<dd<dd<dd<dd <d!d"<d#d$<d%d&<d'd(<d)d*<d+d,<d-d.<d/d0<d1d2<d3d4<d5d6<d7d8<d9d:<d;d<<d=d><d?d@<dAdB<dCdD<dEdF<dGdH<dIdJ<dKdL<dMdN<dOdP<dQdR<dSdT<dUdV<dWdX<dYdZ<d[d\dZ?dZ@dZAdZBddZCdZDeDZEdZFeFZGdZHeHZIdZJeJZKdZLeLZMeLZNdZOeOZPdZQeQZRdZSeSZTdZUeUZVdZWeWZXdZYeYZZdZ[e[Z\dZ]e]Z^dZ_e_Z`dZaeaZbeaZceaZddZeeeZfeeZgeeZhdZidZjdZkdZldZmdZnenZoenZpdZqeqZreqZsdZtdZueuZvdZwewZxdZydZzdZ{dZ|dZ}e}Z~dZeZdkdZdZdZeZdZdZdZdZdZdZdZdZdZdZdZdZdZdZeZdZeZdZeZeZeZeZeZRS(Nsshttp://backend.userland.com/rsss%http://blogs.law.harvard.edu/tech/rssshttp://purl.org/rss/1.0/s&http://my.netscape.com/rdf/simple/0.9/shttp://example.com/newformat#shttp://example.com/nechoshttp://purl.org/echo/suri/of/echo/namespace#shttp://purl.org/pie/shttp://purl.org/atom/ns#s'http://purl.org/rss/1.0/modules/rss091#shttp://webns.net/mvcb/sadmins,http://purl.org/rss/1.0/modules/aggregation/sags)http://purl.org/rss/1.0/modules/annotate/sannotates!http://media.tangent.org/rss/1.0/saudios-http://backend.userland.com/blogChannelModules blogChannelshttp://web.resource.org/cc/sccs4http://backend.userland.com/creativeCommonsRssModulescreativeCommonss'http://purl.org/rss/1.0/modules/companyscos(http://purl.org/rss/1.0/modules/content/scontents&http://my.theinfo.org/changed/1.0/rss/scps http://purl.org/dc/elements/1.1/sdcshttp://purl.org/dc/terms/sdctermss&http://purl.org/rss/1.0/modules/email/semails&http://purl.org/rss/1.0/modules/event/sevshttp://postneo.com/icbm/sicbms&http://purl.org/rss/1.0/modules/image/simageshttp://xmlns.com/foaf/0.1/sfoafshttp://freshmeat.net/rss/fm/sfms%http://purl.org/rss/1.0/modules/link/sls4http://madskills.com/public/xml/rss/module/pingback/spingbacks.http://prismstandard.org/namespaces/1.2/basic/sprisms+http://www.w3.org/1999/02/22-rdf-syntax-ns#srdfs%http://www.w3.org/2000/01/rdf-schema#srdfss*http://purl.org/rss/1.0/modules/reference/srefs*http://purl.org/rss/1.0/modules/richequiv/sreqvs'http://purl.org/rss/1.0/modules/search/ssearchs&http://purl.org/rss/1.0/modules/slash/sslashs.http://purl.org/rss/1.0/modules/servicestatus/ssss-http://hacks.benhammersley.com/rss/streaming/sstrs-http://purl.org/rss/1.0/modules/subscription/ssubs,http://purl.org/rss/1.0/modules/syndication/ssys)http://purl.org/rss/1.0/modules/taxonomy/staxos*http://purl.org/rss/1.0/modules/threading/sthrs*http://purl.org/rss/1.0/modules/textinput/stis5http://madskills.com/public/xml/rss/module/trackback/s trackbacks$http://wellformedweb.org/CommentAPI/swfws%http://purl.org/rss/1.0/modules/wiki/swikis)http://schemas.xmlsoap.org/soap/envelope/ssoapshttp://www.w3.org/1999/xhtmlsxhtmls$http://www.w3.org/XML/1998/namespacesxmlslinksids wfw_commentswfw_commentrsssdocssurlscommentsslicensestitlessummarysinfostaglines copyrights descriptions text/htmlsapplication/xhtml+xmlsutf-8cCstotiidnt|_||_g|_d|_ d|_ d|_ d|_ d|_ d|_d|_d|_t|_h|_g|_g|_g|_|pd|_|pt|_|o||ids %s="%s"sescapeiis_stitleslinks descriptionsnamesurlswidthsheights_start_()s_debugssyssstderrswritestagsattrssappends_[1]sksvslowersdictsattrsDsgetsselfsbaseurislangsNonesfeeddatas basestacks langstacksprefixsuris startswithstrackNamespaces incontents contentparamsssplits handle_datasjoinstsfindssuffixs namespacemaps intextinputsinimages methodnamesgetattrsmethodsAttributeErrorspush(sselfstagsattrsslangs_[1]s methodnamessuffixsbaseurisprefixsurismethodsattrsDsvskst((s./Sourceforge/rssparser.pysunknown_starttag8sZ9S %        ## O! ' cCstotiid|n|iddjo|idd\}}nd|f\}}|i i ||}|o|d}nd||}yt ||}|Wn$tj o|i||nX|io|ii dd jod |idsescapei(s_debugssyssstderrswritestagsfindssplitsprefixssuffixsselfs namespacemapsgets methodnamesgetattrsmethodsAttributeErrorspops incontents contentparamss handle_datas basestacksbaseuris langstackslang(sselfstags methodnamessuffixsprefixsmethod((s./Sourceforge/rssparser.pysunknown_endtag~s6 ##     c Cs|i odSn|i}|ddddddddd d f jod |}nJ|d d jot|dd}n t|}t|id}|iddi |dS(Ns34s38s39s60s62sx22sx26sx27sx3csx3es&#%s;isxiisutf-8ii( sselfs elementstacksrefslowerstextsintscsunichrsencodesappend(sselfsrefscstext((s./Sourceforge/rssparser.pyshandle_charrefs  + cCs|i odSntotiid|n|dddddfjod|}nSd}y||Wnt j od|}nXt ||i d }|id d i |dS( Ns"entering handle_entityref with %s sltsgtsquotsampsaposs&%s;cCsxdk}t|do|i|Sn|i|}|ido |idot|dd!Snt|SdS(Nsname2codepoints&#s;ii( shtmlentitydefsshasattrsname2codepointsks entitydefss startswithsendswithsintsord(skshtmlentitydefs((s./Sourceforge/rssparser.pysname2cps   sutf-8ii( sselfs elementstacks_debugssyssstderrswritesrefstextsname2cpsKeyErrorsunichrsencodesappend(sselfsrefsname2cpstext((s./Sourceforge/rssparser.pyshandle_entityrefs  icCs_|i odSn|o|iiddjot|}n|iddi|dS(Nsmodesxmlii(sselfs elementstacksescapes contentparamssgets _xmlescapestextsappend(sselfstextsescape((s./Sourceforge/rssparser.pys handle_datas   cCsdS(N((sselfstext((s./Sourceforge/rssparser.pyshandle_commentscCsdS(N((sselfstext((s./Sourceforge/rssparser.pys handle_piscCsdS(N((sselfstext((s./Sourceforge/rssparser.pys handle_declscCstotiidn|i||d!djoe|iid|}|djot |i}n|i t |i|d|!d|dSn|iid|}|d SdS( Nsentering parse_declaration i s iiis>i( s_debugssyssstderrswritesselfsrawdatasisfindskslens handle_datas _xmlescape(sselfsisk((s./Sourceforge/rssparser.pysparse_declarations $ cCs||ftdfjo|i o d|_n|djo|i o d|_n| odSn|iddjo d}n|ii|o|i||i|s <\1>s's's"s"(srescompiles IGNORECASEssubsdatasreplacesselfsencodingstypestypess UnicodeTypesencodessgmllibs SGMLParsersfeed(sselfsdata((s./Sourceforge/rssparser.pysfeeds! cCsgi}|D]"\}}||i|fq~}gi}|D]<\}}|||ddfjo |ip|fqG~}|SdS(Nsrelstype(sappends_[1]sattrssksvslower(sselfsattrss_[1]sksv((s./Sourceforge/rssparser.pysnormalize_attrss9ScCstotiid|ndigi}|D] \}}|d||fq3~}||i jo|iidtn|iidtdS(Ns-_BaseHTMLProcessor, unknown_starttag, tag=%s ss %s="%s"s<%(tag)s%(strattrs)s />s<%(tag)s%(strattrs)s>(s_debugssyssstderrswritestagsjoinsappends_[1]sattrsskeysvaluesstrattrssselfselements_no_end_tagspiecesslocals(sselfstagsattrss_[1]sstrattrssvalueskey((s./Sourceforge/rssparser.pysunknown_starttags @cCs/||ijo|iidtndS(Ns (stagsselfselements_no_end_tagspiecessappendslocals(sselfstag((s./Sourceforge/rssparser.pysunknown_endtagscCs|iidtdS(Ns &#%(ref)s;(sselfspiecessappendslocals(sselfsref((s./Sourceforge/rssparser.pyshandle_charrefscCs|iidtdS(Ns &%(ref)s;(sselfspiecessappendslocals(sselfsref((s./Sourceforge/rssparser.pyshandle_entityref scCs3totiid|n|ii|dS(Ns)_BaseHTMLProcessor, handle_text, text=%s (s_debugssyssstderrswritestextsselfspiecessappend(sselfstext((s./Sourceforge/rssparser.pys handle_datascCs|iidtdS(Ns(sselfspiecessappendslocals(sselfstext((s./Sourceforge/rssparser.pyshandle_commentscCs|iidtdS(Ns (sselfspiecessappendslocals(sselfstext((s./Sourceforge/rssparser.pys handle_piscCs|iidtdS(Ns (sselfspiecessappendslocals(sselfstext((s./Sourceforge/rssparser.pys handle_decl ss-zA-Z][-_.a-zA-Z0-9:]*\s*cCs|i}t|}||jotdfSn|i||}|oW|i}|i }|t||jotdfSn|i |i fSn|i|tdfSdS(Ni(sselfsrawdataslensnsisNones_new_declname_matchsmsgroupsssstripsnameslowersends handle_data(sselfsis declstartpossnamesmsnsssrawdata((s./Sourceforge/rssparser.pys _scan_name(s      cCs;digi}|iD]}|t|q~SdS(s(Return processed HTML as a single stringsN(sjoinsappends_[1]sselfspiecesspsstr(sselfs_[1]sp((s./Sourceforge/rssparser.pysoutput9s(s__name__s __module__selements_no_end_tags__init__sresetsfeedsnormalize_attrssunknown_starttagsunknown_endtagshandle_charrefshandle_entityrefs handle_datashandle_comments handle_pis handle_declsrescompilesmatchs_new_declname_matchs _scan_namesoutput(((s./Sourceforge/rssparser.pys_BaseHTMLProcessors -          s_LooseFeedParsercBstZdZdZRS(NcCs*tii|ti||||dS(N(ssgmllibs SGMLParsers__init__sselfs_FeedParserMixinsbaseurisbaselangsencoding(sselfsbaseurisbaselangsencoding((s./Sourceforge/rssparser.pys__init__>scCs3|idd}|idd}|idd}|idd}|idd}|id d}|id d }|id d }|id d}|idd}|iiddjo^|idd}|idd}|idd}|id d}|idd}n|SdS(Ns<s<s<s>s>s>s&s&s&s"s"s"s's's'smodesescapedss&s"s'(sdatasreplacesselfs contentparamssget(sselfselementsdata((s./Sourceforge/rssparser.pysdecodeEntitiesBs"(s__name__s __module__s__init__sdecodeEntities(((s./Sourceforge/rssparser.pys_LooseFeedParser=s s_RelativeURIResolvercBs tZddfddfddfddfdd fd dfd d fd dfd dfddfddfddfddfddfddfddfddfddfddfddfddfddfddfddfddfgZdZdZdZRS( Nsashrefsappletscodebasesareas blockquotescitesbodys backgroundsdelsformsactionsframeslongdescssrcsiframesheadsprofilesimgsusemapsinputsinsslinksobjectsclassidsdatasqsscriptcCsti||||_dS(N(s_BaseHTMLProcessors__init__sselfsencodingsbaseuri(sselfsbaseurisencoding((s./Sourceforge/rssparser.pys__init__pscCsti|i|SdS(N(surlparsesurljoinsselfsbaseurisuri(sselfsuri((s./Sourceforge/rssparser.pys resolveURItscCs|i|}gi}|D]B\}}||||f|ijo |i |p|fq~}t i |||dS(N( sselfsnormalize_attrssattrssappends_[1]skeysvaluestags relative_uriss resolveURIs_BaseHTMLProcessorsunknown_starttag(sselfstagsattrssvalues_[1]skey((s./Sourceforge/rssparser.pysunknown_starttagwsY(s__name__s __module__s relative_uriss__init__s resolveURIsunknown_starttag(((s./Sourceforge/rssparser.pys_RelativeURIResolverUs  cCsEtotiidnt||}|i||i SdS(Nsentering _resolveRelativeURIs ( s_debugssyssstderrswrites_RelativeURIResolversbaseURIsencodingspsfeeds htmlSourcesoutput(s htmlSourcesbaseURIsencodingsp((s./Sourceforge/rssparser.pys_resolveRelativeURIs|s  s_HTMLSanitizercGBstZddddddddd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGgGZddHdIdJdKdLdMdNdOdPdQdRdSdTdUd dVdWdXdYdZd[d\d]dd^d_d`dadbdcdddedfdgdhd(didjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d7d~dddddddddddgGZddgZdZdZdZdZdZ dZ RS(Nsasabbrsacronymsaddresssareasbsbigs blockquotesbrsbuttonscaptionscenterscitescodescolscolgroupsddsdelsdfnsdirsdivsdlsdtsemsfieldsetsfontsformsh1sh2sh3sh4sh5sh6shrsisimgsinputsinsskbdslabelslegendslismapsmenusolsoptgroupsoptionspspresqssssampsselectssmallsspansstrikesstrongssubssupstablestbodystdstextareastfootsthstheadstrsttsusulsvarsacceptsaccept-charsets accesskeysactionsalignsaltsaxissborders cellpaddings cellspacingscharscharoffscharsetscheckedsclasssclearscolsscolspanscolorscompactscoordssdatetimesdisabledsenctypesforsframesheaderssheightshrefshreflangshspacesidsismapslangslongdescs maxlengthsmediasmethodsmultiplesnamesnohrefsnoshadesnowrapspromptsreadonlysrelsrevsrowssrowspansrulessscopesselectedsshapessizessrcsstartssummarystabindexstargetstitlestypesusemapsvalignsvaluesvspaceswidthsscriptsappletcCsti|d|_dS(Ni(s_BaseHTMLProcessorsresetsselfsunacceptablestack(sself((s./Sourceforge/rssparser.pysresets cCs||ij o+||ijo|id7_ndSn|i|}gi}|D]0\}}||i jo|||fqYqY~}t i |||dS(Ni(stagsselfsacceptable_elementss"unacceptable_elements_with_end_tagsunacceptablestacksnormalize_attrssattrssappends_[1]skeysvaluesacceptable_attributess_BaseHTMLProcessorsunknown_starttag(sselfstagsattrssvalues_[1]skey((s./Sourceforge/rssparser.pysunknown_starttagsGcCsP||ij o+||ijo|id8_ndSnti||dS(Ni(stagsselfsacceptable_elementss"unacceptable_elements_with_end_tagsunacceptablestacks_BaseHTMLProcessorsunknown_endtag(sselfstag((s./Sourceforge/rssparser.pysunknown_endtags cCsdS(N((sselfstext((s./Sourceforge/rssparser.pys handle_piscCsdS(N((sselfstext((s./Sourceforge/rssparser.pys handle_declscCs#|i oti||ndS(N(sselfsunacceptablestacks_BaseHTMLProcessors handle_datastext(sselfstext((s./Sourceforge/rssparser.pys handle_datas ( s__name__s __module__sacceptable_elementssacceptable_attributess"unacceptable_elements_with_end_tagsresetsunknown_starttagsunknown_endtags handle_pis handle_decls handle_data(((s./Sourceforge/rssparser.pys_HTMLSanitizers     cCst|}|i||i}tototi |dddddd\}}}}|i doD|iddd}|i do|iddd}qn|i do|iddd}qn|iid d }|SdS( Ns output_xhtmlisnumeric_entitiesswrapiss stream This function lets you define parsers that take any input source (URL, pathname to local or network file, or actual data as a string) and deal with it in a uniform manner. Returned object is guaranteed to have all the basic stdio read methods (read, readline, readlines). Just .close() the object when you're done with it. If the etag argument is supplied, it will be used as the value of an If-None-Match request header. If the modified argument is supplied, it must be a tuple of 9 integers as returned by gmtime() in the standard Python time module. This MUST be in GMT (Greenwich Mean Time). The formatted date/time will be used as the value of an If-Modified-Since request header. If the agent argument is supplied, it will be used as the value of a User-Agent request header. If the referrer argument is supplied, it will be used as the value of a Referer[sic] request header. If handlers is supplied, it is a list of handlers used to build a urllib2 opener. sreads-ishttpshttpssftps %s://%s%ss User-Agents If-None-MatchsMonsTuesWedsThusFrisSatsSunsJansFebsMarsAprsMaysJunsJulsAugsSepsOctsNovsDecsIf-Modified-Sinces#%s, %02d %s %04d %02d:%02d:%02d GMTiiiiiisReferersAccept-encodings gzip, deflatesgzipsdeflatess AuthorizationsBasic %ssAcceptN(+shasattrsurl_file_stream_or_stringssyssstdinsurlparsesagents USER_AGENTsNonesauthsbase64surllibs splittypesurltypesrests splithostsrealhosts splitusers user_passwds encodestringsstripsurllib2sRequestsrequests add_headersetagsmodifiedsshort_weekdayssmonthssreferrersgzipszlibs ACCEPT_HEADERsapplys build_openerstuples_FeedURLHandlershandlerssopeners addheaderssopenscloses _StringIOsstr(surl_file_stream_or_stringsetagsmodifiedsagentsreferrershandlerssurltypesshort_weekdayssauthsrealhostsopenersrestsmonthssrequests user_passwd((s./Sourceforge/rssparser.pys_open_resourcesb  # !*U" cCstid|dS(sLRegister a date handler function (takes string, returns 9-tuple date in GMT)iN(s_date_handlerssinsertsfunc(sfunc((s./Sourceforge/rssparser.pysregisterDateHandler>ss YYYY-?MM-?DDsYYYY-MMs YYYY-?OOOs YY-?MM-?DDsYY-?OOOsYYYYs-YY-?MMs-OOOs-YYs--MM-?DDs--MMs---DDsCCs(?P\d{4})sYYs(?P\d\d)sMMs(?P[01]\d)sDDs(?P[0123]\d)sOOOs(?P[0123]\d\d)s(?P\d\d$)s$(T?(?P\d{2}):(?P\d{2})s(:(?P\d{2}))?s6(?P[+-](?P\d{2})(:(?P\d{2}))?|Z)?)?c Cst}x&tD]}||}|oPq q W| odSn|iddfjodSn|i} | idd}|ot |}nd}| idd} | p | djot i d} nLt| djo,dt t i ddt | } n t | } | idd } | p | d jo%|o d } qmt i d } nt | } | id d}| oh|o |}q| id dp#| iddp| iddo d }qt i d}n t |}d | ijo t | d d dd } nx?d ddddgD](}| i|t od| |(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})cCsti|}| odSndhd|id<d|id<d|id<d |id <d |id <d |id<dd<}totii d|nt |SdS(s8Parse a string according to the OnBlog 8-bit date formatNsE%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)ssyearismonthisdayishourisminuteissecondiszonediffs+09:00sOnBlog date parsed as: %s ( s_korean_onblog_date_resmatchs dateStringsmsgroups w3dtfdates_debugssyssstderrswrites_parse_date_w3dtf(s dateStrings w3dtfdatesm((s./Sourceforge/rssparser.pys_parse_date_onblogscCs.ti|}| odSnt|id}|id}|tjo|d7}nt |}t |djod|}ndhd|id<d |id <d |id <d |<d|id<d|id<dd<}t ot iid|nt|SdS(s6Parse a string according to the Nate 8-bit date formatNiii is0sE%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)ssyearsmonthisdayishoursminuteissecondiszonediffs+09:00sNate date parsed as: %s (s_korean_nate_date_resmatchs dateStringsmsintsgroupshoursampms _korean_pmsstrslens w3dtfdates_debugssyssstderrswrites_parse_date_w3dtf(s dateStrings w3dtfdateshoursampmsm((s./Sourceforge/rssparser.pys_parse_date_nates  vs6(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})\.\d+cCsti|}| odSndhd|id<d|id<d|id<d |id <d |id <d |id<dd<}totii d|nt |SdS(s2Parse a string according to the MS SQL date formatNsE%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)ssyearismonthisdayishourisminuteissecondiszonediffs+09:00sMS SQL date parsed as: %s ( s_mssql_date_resmatchs dateStringsmsgroups w3dtfdates_debugssyssstderrswrites_parse_date_w3dtf(s dateStrings w3dtfdatesm((s./Sourceforge/rssparser.pys_parse_date_mssqlsuΙανuJanuΦεβuFebuΜάώuMaruΜαώuΑπρuApruΜάιuMayuΜαϊuΜαιuΙούνuJunuΙονuΙούλuJuluΙολuΑύγuAuguΑυγuΣεπuSepuΟκτuOctuΝοέuNovuΝοεuΔεκuDecuΚυρuSunuΔευuMonuΤριuTueuΤετuWeduΠεμuThuuΠαρuFriuΣαβuSatuL([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)cCs ti|}| odSny*t|id}t|id}Wn dSnXdhd|<d|id<d|<d |id <d |id <d |id<d|id<d|id<}t ot i i d|nt|SdS(s6Parse a string according to a Greek 8-bit date format.NiisP%(wday)s, %(day)s %(month)s %(year)s %(hour)s:%(minute)s:%(second)s %(zonediff)sswdaysdayismonthsyearishourisminuteissecondiszonediffisGreek date parsed as: %s (s_greek_date_format_resmatchs dateStringsms _greek_wdayssgroupswdays _greek_monthssmonths rfc822dates_debugssyssstderrswrites_parse_date_rfc822(s dateStringsmsmonths rfc822dateswday((s./Sourceforge/rssparser.pys_parse_date_greeksujanuáru01u februáriu02umárciusu03uáprilisu04umáujusu05ujúniusu06ujúliusu07u augusztusu08u szeptemberu09uoktóberu10unovemberu11udecemberu12u?(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})((\+|-)(\d{,2}:\d{2}))cCs,ti|}| odSnywt|id}|id}t|djod|}n|id}t|djod|}nWn dSnXdhd|id<d |<d |<d |<d |id <d|id<}t ot i id|nt|SdS(s:Parse a string according to a Hungarian 8-bit date format.Niiis0is:%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s%(zonediff)ssyearsmonthsdayshoursminuteiszonediffisHungarian date parsed as: %s (s_hungarian_date_format_resmatchs dateStringsms_hungarian_monthssgroupsmonthsdayslenshours w3dtfdates_debugssyssstderrswrites_parse_date_w3dtf(s dateStringshours w3dtfdatesmsmonthsday((s./Sourceforge/rssparser.pys_parse_date_hungarian5s$[c Csd}d}d} d}d}ti|} d|}d||f}ti|}|i |}|tjp|i|jodSn||||dddf} | ddjodSntiti| | |tiSdS( Nc Cst|id}|djo,dttiddt|}n|djodddfSn|id}|ot|}|dd}|dd}t }x||joti |||ddddddf }ti|d}t ||}||jo/||jo||}q|d}d }q||jo-||d jo||}q|d}qqW|||fSn|id }d}|t jo d}n9t|}|id }|ot|}nd}|||fSdS( Nsyearidiisjulianiiiiismonthsday(sintsmsgroupsyearstimesgmtimesjuliansmonthsdaysNonesjdaysmktimestsabssdiff(smsjuliansyearsmonthsjdaystsdiffsday((s./Sourceforge/rssparser.pys__extract_datePsH ,   *        cCs| odddfSn|id}| odddfSnt|}t|id}|id}|ot|}nd}|||fSdS(Nishourssminutessseconds(smsgroupshourssintsminutessseconds(smshourssminutessseconds((s./Sourceforge/rssparser.pys__extract_timeys cCs| odSn|id}| odSn|djodSnt|id}|id}|ot|}nd}|d|d}|ddjo | Sn|SdS( sAReturn the Time Zone Designator as an offset in seconds from UTC.istzdsZstzdhourss tzdminutesi<s+N(smsgroupstzdsintshourssminutessoffset(smshoursstzdsoffsetsminutes((s./Sourceforge/rssparser.pys __extract_tzds"  sd(?P\d\d\d\d)(?:(?P-|)(?:(?P\d\d\d)|(?P\d\d)(?:(?P=dsep)(?P\d\d))?))?s;(?P[-+](?P\d\d)(?::?(?P\d\d))|Z)sW(?P\d\d)(?P:|)(?P\d\d)(?:(?P=tsep)(?P\d\d(?:[.,]\d+)?))?s %s(?:T%s)?i(s__extract_dates__extract_times __extract_tzds __date_res__tzd_resrescompiles__tzd_rxs __time_res __datetime_res __datetime_rxsmatchs dateStringsmsNonesgroupsgmtstimesgmtimesmktimestimezone( s dateStrings__tzd_res__extract_times __datetime_resms __date_res__extract_dates __datetime_rxs __time_resgmts__tzd_rxs __extract_tzd((s./Sourceforge/rssparser.pys_parse_date_w3dtfOs  )    #cCs4ti|}|otiti|SndS(s8Parse an RFC822, RFC1123, RFC2822, or asctime-style dateN(srfc822s parsedate_tzs dateStringstmstimesgmtimes mktime_tz(s dateStringstm((s./Sourceforge/rssparser.pys_parse_date_rfc822ssATipsETi sCTisMTiDsPTicCsxtD]}yh||}| ownt|djo%totiidnt nt t ||SWqt j o7}to'tiid|it|fqqXqWtSdS(s6Parses a variety of date formats into a 9-tuple in GMTi s*date handler function must return 9-tuple s %s raised %s N(s_date_handlersshandlers dateStrings date9tupleslens_debugssyssstderrswrites ValueErrorsmapsints Exceptionses__name__sreprsNone(s dateStringseshandlers date9tuple((s./Sourceforge/rssparser.pys _parse_dates$   ' c Cs1d}d}d}d}||id\} }yj|d djot |}n.|d djo"d}t |di d}nt |djo"|d d jo|d d!d jo&d}t |d di d}n|d d jo"d }t |d i d}njt |djo"|d djo|d d!d jo&d }t |d d i d}n |d djo"d}t |di d}n|d djo"d}t |di d}n|d djo&d}t |ddi d}no|d djo&d}t |ddi d}n8|d djo&d}t |ddi d}nt idi|} Wn t} nX| o\| idi}|o.|dddddddd d!d"d#d$f jo |}qnd} d%d&d'f}d(d)f}| |jp| id*o | id+od,} |p |pd}n| |jp| id-o | id+od,} |pd.}nX| id-o|pd.}n7|o|id o|pd/}n|pd}||||| fSd0S(1s Get the character encoding of the XML document http_headers is a dictionary xml_data is a raw string (not Unicode) This is so much trickier than it sounds, it's not even funny. According to RFC 3023 ("XML Media Types"), if the HTTP Content-Type is application/xml, application/*+xml, application/xml-external-parsed-entity, or application/xml-dtd, the encoding given in the charset parameter of the HTTP Content-Type takes precedence over the encoding given in the XML prefix within the document, and defaults to "utf-8" if neither are specified. But, if the HTTP Content-Type is text/xml, text/*+xml, or text/xml-external-parsed-entity, the encoding given in the XML prefix within the document is ALWAYS IGNORED and only the encoding given in the charset parameter of the HTTP Content-Type header should be respected, and it defaults to "us-ascii" if not specified. Furthermore, discussion on the atom-syntax mailing list with the author of RFC 3023 leads me to the conclusion that any document served with a Content-Type of text/* and no charset parameter must be treated as us-ascii. (We now do this.) And also that it must always be flagged as non-well-formed. (We now do this too.) If Content-Type is unspecified (input was local file or non-HTTP source) or unrecognized (server just got it totally wrong), then go by the encoding given in the XML prefix of the document and default to "iso-8859-1" as per the HTTP specification (RFC 2616). Then, assuming we didn't find a character encoding in the HTTP headers (and the HTTP Content-type allowed us to look in the body), we need to sniff the first few bytes of the XML data and try to determine whether the encoding is ASCII-compatible. Section F of the XML specification shows the way here: http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info If the sniffed encoding is not ASCII-compatible, we need to make it ASCII compatible so that we can sniff further into the XML declaration to find the encoding attribute, which will tell us the true encoding. Of course, none of this guarantees that we will be able to parse the feed in the declared character encoding (assuming it was declared correctly, which many are not). CJKCodecs and iconv_codec help a lot; you should definitely install them if you can. http://cjkpython.i18n.org/ cCsH|pd}ti|\}}||iddiddfSdS(s takes HTTP Content-Type header and returns (content type, charset) If no charset is specified, returns (content type, '') If no content type is specified, returns ('', '') Both return parameters are guaranteed to be lowercase strings sscharsets'N(s content_typescgis parse_headersparamssgetsreplace(s content_typesparams((s./Sourceforge/rssparser.pys_parseHTTPContentTypes ss content-typeisLos<?sutf-16besutf-8isss<?sutf-16lessisiso-10646-ucs-2sucs-2s csunicodesiso-10646-ucs-4sucs-4scsucs4sutf-16sutf-32sutf_16sutf_32sutf16su16sapplication/xmlsapplication/xml-dtds&application/xml-external-parsed-entitystext/xmlstext/xml-external-parsed-entitys application/s+xmlistext/sus-asciis iso-8859-1N(s_parseHTTPContentTypessniffed_xml_encodings xml_encodings true_encodings http_headerssgetshttp_content_types http_encodingsxml_datas_ebcdic_to_asciisunicodesencodeslensrescompilesmatchsxml_encoding_matchsNonesgroupsslowersacceptable_content_typesapplication_content_typesstext_content_typess startswithsendswithshas_key( s http_headerssxml_datas http_encodings_parseHTTPContentTypestext_content_typess true_encodings xml_encodingsapplication_content_typesssniffed_xml_encodingsacceptable_content_typeshttp_content_typesxml_encoding_match((s./Sourceforge/rssparser.pys_getCharacterEncodingsv. 8 8     8 -- cCstotiid|nt|djo"|d djo|dd!djoPto5tiid|djotiidqnd}|d}nt|djo"|d d jo|dd!djoPto5tiid|d jotiid qnd }|d}n$|d d joPto5tiid|djotiidq|nd}|d }n|d djoPto5tiid|djotiidqnd}|d}nb|d djoPto5tiid|djotiidq>nd}|d}nt||}totiid|nt i d}d}|i |o|i||}n|d|}|idSdS(sChanges an XML data stream on the fly to specify a new encoding data is a raw sequence of bytes (not Unicode) that is presumed to be in %encoding already encoding is a string recognized by encodings.aliases s%entering _toUTF8, trying encoding %s iisssstripping BOM sutf-16bestrying utf-16be instead ssutf-16lestrying utf-16le instead issutf-8strying utf-8 instead ssutf-32bestrying utf-32be instead ssutf-32lestrying utf-32le instead s*successfully converted %s data to unicode s^<\?xml[^>]*?>s&u N(s_debugssyssstderrswritesencodingslensdatasunicodesnewdatasrescompiles declmatchsnewdeclssearchssubsencode(sdatasencodingsnewdeclsnewdatas declmatch((s./Sourceforge/rssparser.pys_toUTF8Qs^8 8    cCstidti}|id|}tidti}|i|}|o|dpd}|i i do d}nt }|id|}||fSdS(sStrips DOCTYPE from XML document, returns (rss_version, stripped_data) rss_version may be "rss091n" or None stripped_data is the same XML document, minus the DOCTYPE s]*?)>ss]*?)>isnetscapesrss091nN(srescompiles MULTILINEsentity_patternssubsdatasdoctype_patternsfindallsdoctype_resultssdoctypeslowerscountsversionsNone(sdatasdoctype_patternsdoctypesversionsentity_patternsdoctype_results((s./Sourceforge/rssparser.pys _stripDoctypes cCst} t| dh' 5  Y 0{ 3 N     E   r   ]  3   5