You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

sax.js 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. var NAMESPACE = require("./conventions").NAMESPACE;
  2. //[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
  3. //[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
  4. //[5] Name ::= NameStartChar (NameChar)*
  5. var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF
  6. var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\\u00B7\\u0300-\\u036F\\u203F-\\u2040]");
  7. var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$');
  8. //var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/
  9. //var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')
  10. //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
  11. //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
  12. var S_TAG = 0;//tag name offerring
  13. var S_ATTR = 1;//attr name offerring
  14. var S_ATTR_SPACE=2;//attr name end and space offer
  15. var S_EQ = 3;//=space?
  16. var S_ATTR_NOQUOT_VALUE = 4;//attr value(no quot value only)
  17. var S_ATTR_END = 5;//attr value end and no space(quot end)
  18. var S_TAG_SPACE = 6;//(attr value end || tag end ) && (space offer)
  19. var S_TAG_CLOSE = 7;//closed el<el />
  20. /**
  21. * Creates an error that will not be caught by XMLReader aka the SAX parser.
  22. *
  23. * @param {string} message
  24. * @param {any?} locator Optional, can provide details about the location in the source
  25. * @constructor
  26. */
  27. function ParseError(message, locator) {
  28. this.message = message
  29. this.locator = locator
  30. if(Error.captureStackTrace) Error.captureStackTrace(this, ParseError);
  31. }
  32. ParseError.prototype = new Error();
  33. ParseError.prototype.name = ParseError.name
  34. function XMLReader(){
  35. }
  36. XMLReader.prototype = {
  37. parse:function(source,defaultNSMap,entityMap){
  38. var domBuilder = this.domBuilder;
  39. domBuilder.startDocument();
  40. _copy(defaultNSMap ,defaultNSMap = {})
  41. parse(source,defaultNSMap,entityMap,
  42. domBuilder,this.errorHandler);
  43. domBuilder.endDocument();
  44. }
  45. }
  46. function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){
  47. function fixedFromCharCode(code) {
  48. // String.prototype.fromCharCode does not supports
  49. // > 2 bytes unicode chars directly
  50. if (code > 0xffff) {
  51. code -= 0x10000;
  52. var surrogate1 = 0xd800 + (code >> 10)
  53. , surrogate2 = 0xdc00 + (code & 0x3ff);
  54. return String.fromCharCode(surrogate1, surrogate2);
  55. } else {
  56. return String.fromCharCode(code);
  57. }
  58. }
  59. function entityReplacer(a){
  60. var k = a.slice(1,-1);
  61. if (Object.hasOwnProperty.call(entityMap, k)) {
  62. return entityMap[k];
  63. }else if(k.charAt(0) === '#'){
  64. return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x')))
  65. }else{
  66. errorHandler.error('entity not found:'+a);
  67. return a;
  68. }
  69. }
  70. function appendText(end){//has some bugs
  71. if(end>start){
  72. var xt = source.substring(start,end).replace(/&#?\w+;/g,entityReplacer);
  73. locator&&position(start);
  74. domBuilder.characters(xt,0,end-start);
  75. start = end
  76. }
  77. }
  78. function position(p,m){
  79. while(p>=lineEnd && (m = linePattern.exec(source))){
  80. lineStart = m.index;
  81. lineEnd = lineStart + m[0].length;
  82. locator.lineNumber++;
  83. //console.log('line++:',locator,startPos,endPos)
  84. }
  85. locator.columnNumber = p-lineStart+1;
  86. }
  87. var lineStart = 0;
  88. var lineEnd = 0;
  89. var linePattern = /.*(?:\r\n?|\n)|.*$/g
  90. var locator = domBuilder.locator;
  91. var parseStack = [{currentNSMap:defaultNSMapCopy}]
  92. var closeMap = {};
  93. var start = 0;
  94. while(true){
  95. try{
  96. var tagStart = source.indexOf('<',start);
  97. if(tagStart<0){
  98. if(!source.substr(start).match(/^\s*$/)){
  99. var doc = domBuilder.doc;
  100. var text = doc.createTextNode(source.substr(start));
  101. doc.appendChild(text);
  102. domBuilder.currentElement = text;
  103. }
  104. return;
  105. }
  106. if(tagStart>start){
  107. appendText(tagStart);
  108. }
  109. switch(source.charAt(tagStart+1)){
  110. case '/':
  111. var end = source.indexOf('>',tagStart+3);
  112. var tagName = source.substring(tagStart + 2, end).replace(/[ \t\n\r]+$/g, '');
  113. var config = parseStack.pop();
  114. if(end<0){
  115. tagName = source.substring(tagStart+2).replace(/[\s<].*/,'');
  116. errorHandler.error("end tag name: "+tagName+' is not complete:'+config.tagName);
  117. end = tagStart+1+tagName.length;
  118. }else if(tagName.match(/\s</)){
  119. tagName = tagName.replace(/[\s<].*/,'');
  120. errorHandler.error("end tag name: "+tagName+' maybe not complete');
  121. end = tagStart+1+tagName.length;
  122. }
  123. var localNSMap = config.localNSMap;
  124. var endMatch = config.tagName == tagName;
  125. var endIgnoreCaseMach = endMatch || config.tagName&&config.tagName.toLowerCase() == tagName.toLowerCase()
  126. if(endIgnoreCaseMach){
  127. domBuilder.endElement(config.uri,config.localName,tagName);
  128. if(localNSMap){
  129. for (var prefix in localNSMap) {
  130. if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
  131. domBuilder.endPrefixMapping(prefix);
  132. }
  133. }
  134. }
  135. if(!endMatch){
  136. errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName ); // No known test case
  137. }
  138. }else{
  139. parseStack.push(config)
  140. }
  141. end++;
  142. break;
  143. // end elment
  144. case '?':// <?...?>
  145. locator&&position(tagStart);
  146. end = parseInstruction(source,tagStart,domBuilder);
  147. break;
  148. case '!':// <!doctype,<![CDATA,<!--
  149. locator&&position(tagStart);
  150. end = parseDCC(source,tagStart,domBuilder,errorHandler);
  151. break;
  152. default:
  153. locator&&position(tagStart);
  154. var el = new ElementAttributes();
  155. var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
  156. //elStartEnd
  157. var end = parseElementStartPart(source,tagStart,el,currentNSMap,entityReplacer,errorHandler);
  158. var len = el.length;
  159. if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){
  160. el.closed = true;
  161. if(!entityMap.nbsp){
  162. errorHandler.warning('unclosed xml attribute');
  163. }
  164. }
  165. if(locator && len){
  166. var locator2 = copyLocator(locator,{});
  167. //try{//attribute position fixed
  168. for(var i = 0;i<len;i++){
  169. var a = el[i];
  170. position(a.offset);
  171. a.locator = copyLocator(locator,{});
  172. }
  173. domBuilder.locator = locator2
  174. if(appendElement(el,domBuilder,currentNSMap)){
  175. parseStack.push(el)
  176. }
  177. domBuilder.locator = locator;
  178. }else{
  179. if(appendElement(el,domBuilder,currentNSMap)){
  180. parseStack.push(el)
  181. }
  182. }
  183. if (NAMESPACE.isHTML(el.uri) && !el.closed) {
  184. end = parseHtmlSpecialContent(source,end,el.tagName,entityReplacer,domBuilder)
  185. } else {
  186. end++;
  187. }
  188. }
  189. }catch(e){
  190. if (e instanceof ParseError) {
  191. throw e;
  192. }
  193. errorHandler.error('element parse error: '+e)
  194. end = -1;
  195. }
  196. if(end>start){
  197. start = end;
  198. }else{
  199. //TODO: 这里有可能sax回退,有位置错误风险
  200. appendText(Math.max(tagStart,start)+1);
  201. }
  202. }
  203. }
  204. function copyLocator(f,t){
  205. t.lineNumber = f.lineNumber;
  206. t.columnNumber = f.columnNumber;
  207. return t;
  208. }
  209. /**
  210. * @see #appendElement(source,elStartEnd,el,selfClosed,entityReplacer,domBuilder,parseStack);
  211. * @return end of the elementStartPart(end of elementEndPart for selfClosed el)
  212. */
  213. function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,errorHandler){
  214. /**
  215. * @param {string} qname
  216. * @param {string} value
  217. * @param {number} startIndex
  218. */
  219. function addAttribute(qname, value, startIndex) {
  220. if (el.attributeNames.hasOwnProperty(qname)) {
  221. errorHandler.fatalError('Attribute ' + qname + ' redefined')
  222. }
  223. el.addValue(
  224. qname,
  225. // @see https://www.w3.org/TR/xml/#AVNormalize
  226. // since the xmldom sax parser does not "interpret" DTD the following is not implemented:
  227. // - recursive replacement of (DTD) entity references
  228. // - trimming and collapsing multiple spaces into a single one for attributes that are not of type CDATA
  229. value.replace(/[\t\n\r]/g, ' ').replace(/&#?\w+;/g, entityReplacer),
  230. startIndex
  231. )
  232. }
  233. var attrName;
  234. var value;
  235. var p = ++start;
  236. var s = S_TAG;//status
  237. while(true){
  238. var c = source.charAt(p);
  239. switch(c){
  240. case '=':
  241. if(s === S_ATTR){//attrName
  242. attrName = source.slice(start,p);
  243. s = S_EQ;
  244. }else if(s === S_ATTR_SPACE){
  245. s = S_EQ;
  246. }else{
  247. //fatalError: equal must after attrName or space after attrName
  248. throw new Error('attribute equal must after attrName'); // No known test case
  249. }
  250. break;
  251. case '\'':
  252. case '"':
  253. if(s === S_EQ || s === S_ATTR //|| s == S_ATTR_SPACE
  254. ){//equal
  255. if(s === S_ATTR){
  256. errorHandler.warning('attribute value must after "="')
  257. attrName = source.slice(start,p)
  258. }
  259. start = p+1;
  260. p = source.indexOf(c,start)
  261. if(p>0){
  262. value = source.slice(start, p);
  263. addAttribute(attrName, value, start-1);
  264. s = S_ATTR_END;
  265. }else{
  266. //fatalError: no end quot match
  267. throw new Error('attribute value no end \''+c+'\' match');
  268. }
  269. }else if(s == S_ATTR_NOQUOT_VALUE){
  270. value = source.slice(start, p);
  271. addAttribute(attrName, value, start);
  272. errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!');
  273. start = p+1;
  274. s = S_ATTR_END
  275. }else{
  276. //fatalError: no equal before
  277. throw new Error('attribute value must after "="'); // No known test case
  278. }
  279. break;
  280. case '/':
  281. switch(s){
  282. case S_TAG:
  283. el.setTagName(source.slice(start,p));
  284. case S_ATTR_END:
  285. case S_TAG_SPACE:
  286. case S_TAG_CLOSE:
  287. s =S_TAG_CLOSE;
  288. el.closed = true;
  289. case S_ATTR_NOQUOT_VALUE:
  290. case S_ATTR:
  291. break;
  292. case S_ATTR_SPACE:
  293. el.closed = true;
  294. break;
  295. //case S_EQ:
  296. default:
  297. throw new Error("attribute invalid close char('/')") // No known test case
  298. }
  299. break;
  300. case ''://end document
  301. errorHandler.error('unexpected end of input');
  302. if(s == S_TAG){
  303. el.setTagName(source.slice(start,p));
  304. }
  305. return p;
  306. case '>':
  307. switch(s){
  308. case S_TAG:
  309. el.setTagName(source.slice(start,p));
  310. case S_ATTR_END:
  311. case S_TAG_SPACE:
  312. case S_TAG_CLOSE:
  313. break;//normal
  314. case S_ATTR_NOQUOT_VALUE://Compatible state
  315. case S_ATTR:
  316. value = source.slice(start,p);
  317. if(value.slice(-1) === '/'){
  318. el.closed = true;
  319. value = value.slice(0,-1)
  320. }
  321. case S_ATTR_SPACE:
  322. if(s === S_ATTR_SPACE){
  323. value = attrName;
  324. }
  325. if(s == S_ATTR_NOQUOT_VALUE){
  326. errorHandler.warning('attribute "'+value+'" missed quot(")!');
  327. addAttribute(attrName, value, start)
  328. }else{
  329. if(!NAMESPACE.isHTML(currentNSMap['']) || !value.match(/^(?:disabled|checked|selected)$/i)){
  330. errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
  331. }
  332. addAttribute(value, value, start)
  333. }
  334. break;
  335. case S_EQ:
  336. throw new Error('attribute value missed!!');
  337. }
  338. // console.log(tagName,tagNamePattern,tagNamePattern.test(tagName))
  339. return p;
  340. /*xml space '\x20' | #x9 | #xD | #xA; */
  341. case '\u0080':
  342. c = ' ';
  343. default:
  344. if(c<= ' '){//space
  345. switch(s){
  346. case S_TAG:
  347. el.setTagName(source.slice(start,p));//tagName
  348. s = S_TAG_SPACE;
  349. break;
  350. case S_ATTR:
  351. attrName = source.slice(start,p)
  352. s = S_ATTR_SPACE;
  353. break;
  354. case S_ATTR_NOQUOT_VALUE:
  355. var value = source.slice(start, p);
  356. errorHandler.warning('attribute "'+value+'" missed quot(")!!');
  357. addAttribute(attrName, value, start)
  358. case S_ATTR_END:
  359. s = S_TAG_SPACE;
  360. break;
  361. //case S_TAG_SPACE:
  362. //case S_EQ:
  363. //case S_ATTR_SPACE:
  364. // void();break;
  365. //case S_TAG_CLOSE:
  366. //ignore warning
  367. }
  368. }else{//not space
  369. //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
  370. //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
  371. switch(s){
  372. //case S_TAG:void();break;
  373. //case S_ATTR:void();break;
  374. //case S_ATTR_NOQUOT_VALUE:void();break;
  375. case S_ATTR_SPACE:
  376. var tagName = el.tagName;
  377. if (!NAMESPACE.isHTML(currentNSMap['']) || !attrName.match(/^(?:disabled|checked|selected)$/i)) {
  378. errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!')
  379. }
  380. addAttribute(attrName, attrName, start);
  381. start = p;
  382. s = S_ATTR;
  383. break;
  384. case S_ATTR_END:
  385. errorHandler.warning('attribute space is required"'+attrName+'"!!')
  386. case S_TAG_SPACE:
  387. s = S_ATTR;
  388. start = p;
  389. break;
  390. case S_EQ:
  391. s = S_ATTR_NOQUOT_VALUE;
  392. start = p;
  393. break;
  394. case S_TAG_CLOSE:
  395. throw new Error("elements closed character '/' and '>' must be connected to");
  396. }
  397. }
  398. }//end outer switch
  399. //console.log('p++',p)
  400. p++;
  401. }
  402. }
  403. /**
  404. * @return true if has new namespace define
  405. */
  406. function appendElement(el,domBuilder,currentNSMap){
  407. var tagName = el.tagName;
  408. var localNSMap = null;
  409. //var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
  410. var i = el.length;
  411. while(i--){
  412. var a = el[i];
  413. var qName = a.qName;
  414. var value = a.value;
  415. var nsp = qName.indexOf(':');
  416. if(nsp>0){
  417. var prefix = a.prefix = qName.slice(0,nsp);
  418. var localName = qName.slice(nsp+1);
  419. var nsPrefix = prefix === 'xmlns' && localName
  420. }else{
  421. localName = qName;
  422. prefix = null
  423. nsPrefix = qName === 'xmlns' && ''
  424. }
  425. //can not set prefix,because prefix !== ''
  426. a.localName = localName ;
  427. //prefix == null for no ns prefix attribute
  428. if(nsPrefix !== false){//hack!!
  429. if(localNSMap == null){
  430. localNSMap = {}
  431. //console.log(currentNSMap,0)
  432. _copy(currentNSMap,currentNSMap={})
  433. //console.log(currentNSMap,1)
  434. }
  435. currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
  436. a.uri = NAMESPACE.XMLNS
  437. domBuilder.startPrefixMapping(nsPrefix, value)
  438. }
  439. }
  440. var i = el.length;
  441. while(i--){
  442. a = el[i];
  443. var prefix = a.prefix;
  444. if(prefix){//no prefix attribute has no namespace
  445. if(prefix === 'xml'){
  446. a.uri = NAMESPACE.XML;
  447. }if(prefix !== 'xmlns'){
  448. a.uri = currentNSMap[prefix || '']
  449. //{console.log('###'+a.qName,domBuilder.locator.systemId+'',currentNSMap,a.uri)}
  450. }
  451. }
  452. }
  453. var nsp = tagName.indexOf(':');
  454. if(nsp>0){
  455. prefix = el.prefix = tagName.slice(0,nsp);
  456. localName = el.localName = tagName.slice(nsp+1);
  457. }else{
  458. prefix = null;//important!!
  459. localName = el.localName = tagName;
  460. }
  461. //no prefix element has default namespace
  462. var ns = el.uri = currentNSMap[prefix || ''];
  463. domBuilder.startElement(ns,localName,tagName,el);
  464. //endPrefixMapping and startPrefixMapping have not any help for dom builder
  465. //localNSMap = null
  466. if(el.closed){
  467. domBuilder.endElement(ns,localName,tagName);
  468. if(localNSMap){
  469. for (prefix in localNSMap) {
  470. if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
  471. domBuilder.endPrefixMapping(prefix);
  472. }
  473. }
  474. }
  475. }else{
  476. el.currentNSMap = currentNSMap;
  477. el.localNSMap = localNSMap;
  478. //parseStack.push(el);
  479. return true;
  480. }
  481. }
  482. function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){
  483. if(/^(?:script|textarea)$/i.test(tagName)){
  484. var elEndStart = source.indexOf('</'+tagName+'>',elStartEnd);
  485. var text = source.substring(elStartEnd+1,elEndStart);
  486. if(/[&<]/.test(text)){
  487. if(/^script$/i.test(tagName)){
  488. //if(!/\]\]>/.test(text)){
  489. //lexHandler.startCDATA();
  490. domBuilder.characters(text,0,text.length);
  491. //lexHandler.endCDATA();
  492. return elEndStart;
  493. //}
  494. }//}else{//text area
  495. text = text.replace(/&#?\w+;/g,entityReplacer);
  496. domBuilder.characters(text,0,text.length);
  497. return elEndStart;
  498. //}
  499. }
  500. }
  501. return elStartEnd+1;
  502. }
  503. function fixSelfClosed(source,elStartEnd,tagName,closeMap){
  504. //if(tagName in closeMap){
  505. var pos = closeMap[tagName];
  506. if(pos == null){
  507. //console.log(tagName)
  508. pos = source.lastIndexOf('</'+tagName+'>')
  509. if(pos<elStartEnd){//忘记闭合
  510. pos = source.lastIndexOf('</'+tagName)
  511. }
  512. closeMap[tagName] =pos
  513. }
  514. return pos<elStartEnd;
  515. //}
  516. }
  517. function _copy (source, target) {
  518. for (var n in source) {
  519. if (Object.prototype.hasOwnProperty.call(source, n)) {
  520. target[n] = source[n];
  521. }
  522. }
  523. }
  524. function parseDCC(source,start,domBuilder,errorHandler){//sure start with '<!'
  525. var next= source.charAt(start+2)
  526. switch(next){
  527. case '-':
  528. if(source.charAt(start + 3) === '-'){
  529. var end = source.indexOf('-->',start+4);
  530. //append comment source.substring(4,end)//<!--
  531. if(end>start){
  532. domBuilder.comment(source,start+4,end-start-4);
  533. return end+3;
  534. }else{
  535. errorHandler.error("Unclosed comment");
  536. return -1;
  537. }
  538. }else{
  539. //error
  540. return -1;
  541. }
  542. default:
  543. if(source.substr(start+3,6) == 'CDATA['){
  544. var end = source.indexOf(']]>',start+9);
  545. domBuilder.startCDATA();
  546. domBuilder.characters(source,start+9,end-start-9);
  547. domBuilder.endCDATA()
  548. return end+3;
  549. }
  550. //<!DOCTYPE
  551. //startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId)
  552. var matchs = split(source,start);
  553. var len = matchs.length;
  554. if(len>1 && /!doctype/i.test(matchs[0][0])){
  555. var name = matchs[1][0];
  556. var pubid = false;
  557. var sysid = false;
  558. if(len>3){
  559. if(/^public$/i.test(matchs[2][0])){
  560. pubid = matchs[3][0];
  561. sysid = len>4 && matchs[4][0];
  562. }else if(/^system$/i.test(matchs[2][0])){
  563. sysid = matchs[3][0];
  564. }
  565. }
  566. var lastMatch = matchs[len-1]
  567. domBuilder.startDTD(name, pubid, sysid);
  568. domBuilder.endDTD();
  569. return lastMatch.index+lastMatch[0].length
  570. }
  571. }
  572. return -1;
  573. }
  574. function parseInstruction(source,start,domBuilder){
  575. var end = source.indexOf('?>',start);
  576. if(end){
  577. var match = source.substring(start,end).match(/^<\?(\S*)\s*([\s\S]*?)\s*$/);
  578. if(match){
  579. var len = match[0].length;
  580. domBuilder.processingInstruction(match[1], match[2]) ;
  581. return end+2;
  582. }else{//error
  583. return -1;
  584. }
  585. }
  586. return -1;
  587. }
  588. function ElementAttributes(){
  589. this.attributeNames = {}
  590. }
  591. ElementAttributes.prototype = {
  592. setTagName:function(tagName){
  593. if(!tagNamePattern.test(tagName)){
  594. throw new Error('invalid tagName:'+tagName)
  595. }
  596. this.tagName = tagName
  597. },
  598. addValue:function(qName, value, offset) {
  599. if(!tagNamePattern.test(qName)){
  600. throw new Error('invalid attribute:'+qName)
  601. }
  602. this.attributeNames[qName] = this.length;
  603. this[this.length++] = {qName:qName,value:value,offset:offset}
  604. },
  605. length:0,
  606. getLocalName:function(i){return this[i].localName},
  607. getLocator:function(i){return this[i].locator},
  608. getQName:function(i){return this[i].qName},
  609. getURI:function(i){return this[i].uri},
  610. getValue:function(i){return this[i].value}
  611. // ,getIndex:function(uri, localName)){
  612. // if(localName){
  613. //
  614. // }else{
  615. // var qName = uri
  616. // }
  617. // },
  618. // getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
  619. // getType:function(uri,localName){}
  620. // getType:function(i){},
  621. }
  622. function split(source,start){
  623. var match;
  624. var buf = [];
  625. var reg = /'[^']+'|"[^"]+"|[^\s<>\/=]+=?|(\/?\s*>|<)/g;
  626. reg.lastIndex = start;
  627. reg.exec(source);//skip <
  628. while(match = reg.exec(source)){
  629. buf.push(match);
  630. if(match[1])return buf;
  631. }
  632. }
  633. exports.XMLReader = XMLReader;
  634. exports.ParseError = ParseError;