You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

internal.js 6.2KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. "use strict";
  2. var Buffer = require("safer-buffer").Buffer;
  3. // Export Node.js internal encodings.
  4. module.exports = {
  5. // Encodings
  6. utf8: { type: "_internal", bomAware: true},
  7. cesu8: { type: "_internal", bomAware: true},
  8. unicode11utf8: "utf8",
  9. ucs2: { type: "_internal", bomAware: true},
  10. utf16le: "ucs2",
  11. binary: { type: "_internal" },
  12. base64: { type: "_internal" },
  13. hex: { type: "_internal" },
  14. // Codec.
  15. _internal: InternalCodec,
  16. };
  17. //------------------------------------------------------------------------------
  18. function InternalCodec(codecOptions, iconv) {
  19. this.enc = codecOptions.encodingName;
  20. this.bomAware = codecOptions.bomAware;
  21. if (this.enc === "base64")
  22. this.encoder = InternalEncoderBase64;
  23. else if (this.enc === "cesu8") {
  24. this.enc = "utf8"; // Use utf8 for decoding.
  25. this.encoder = InternalEncoderCesu8;
  26. // Add decoder for versions of Node not supporting CESU-8
  27. if (Buffer.from('eda0bdedb2a9', 'hex').toString() !== '💩') {
  28. this.decoder = InternalDecoderCesu8;
  29. this.defaultCharUnicode = iconv.defaultCharUnicode;
  30. }
  31. }
  32. }
  33. InternalCodec.prototype.encoder = InternalEncoder;
  34. InternalCodec.prototype.decoder = InternalDecoder;
  35. //------------------------------------------------------------------------------
  36. // We use node.js internal decoder. Its signature is the same as ours.
  37. var StringDecoder = require('string_decoder').StringDecoder;
  38. if (!StringDecoder.prototype.end) // Node v0.8 doesn't have this method.
  39. StringDecoder.prototype.end = function() {};
  40. function InternalDecoder(options, codec) {
  41. this.decoder = new StringDecoder(codec.enc);
  42. }
  43. InternalDecoder.prototype.write = function(buf) {
  44. if (!Buffer.isBuffer(buf)) {
  45. buf = Buffer.from(buf);
  46. }
  47. return this.decoder.write(buf);
  48. }
  49. InternalDecoder.prototype.end = function() {
  50. return this.decoder.end();
  51. }
  52. //------------------------------------------------------------------------------
  53. // Encoder is mostly trivial
  54. function InternalEncoder(options, codec) {
  55. this.enc = codec.enc;
  56. }
  57. InternalEncoder.prototype.write = function(str) {
  58. return Buffer.from(str, this.enc);
  59. }
  60. InternalEncoder.prototype.end = function() {
  61. }
  62. //------------------------------------------------------------------------------
  63. // Except base64 encoder, which must keep its state.
  64. function InternalEncoderBase64(options, codec) {
  65. this.prevStr = '';
  66. }
  67. InternalEncoderBase64.prototype.write = function(str) {
  68. str = this.prevStr + str;
  69. var completeQuads = str.length - (str.length % 4);
  70. this.prevStr = str.slice(completeQuads);
  71. str = str.slice(0, completeQuads);
  72. return Buffer.from(str, "base64");
  73. }
  74. InternalEncoderBase64.prototype.end = function() {
  75. return Buffer.from(this.prevStr, "base64");
  76. }
  77. //------------------------------------------------------------------------------
  78. // CESU-8 encoder is also special.
  79. function InternalEncoderCesu8(options, codec) {
  80. }
  81. InternalEncoderCesu8.prototype.write = function(str) {
  82. var buf = Buffer.alloc(str.length * 3), bufIdx = 0;
  83. for (var i = 0; i < str.length; i++) {
  84. var charCode = str.charCodeAt(i);
  85. // Naive implementation, but it works because CESU-8 is especially easy
  86. // to convert from UTF-16 (which all JS strings are encoded in).
  87. if (charCode < 0x80)
  88. buf[bufIdx++] = charCode;
  89. else if (charCode < 0x800) {
  90. buf[bufIdx++] = 0xC0 + (charCode >>> 6);
  91. buf[bufIdx++] = 0x80 + (charCode & 0x3f);
  92. }
  93. else { // charCode will always be < 0x10000 in javascript.
  94. buf[bufIdx++] = 0xE0 + (charCode >>> 12);
  95. buf[bufIdx++] = 0x80 + ((charCode >>> 6) & 0x3f);
  96. buf[bufIdx++] = 0x80 + (charCode & 0x3f);
  97. }
  98. }
  99. return buf.slice(0, bufIdx);
  100. }
  101. InternalEncoderCesu8.prototype.end = function() {
  102. }
  103. //------------------------------------------------------------------------------
  104. // CESU-8 decoder is not implemented in Node v4.0+
  105. function InternalDecoderCesu8(options, codec) {
  106. this.acc = 0;
  107. this.contBytes = 0;
  108. this.accBytes = 0;
  109. this.defaultCharUnicode = codec.defaultCharUnicode;
  110. }
  111. InternalDecoderCesu8.prototype.write = function(buf) {
  112. var acc = this.acc, contBytes = this.contBytes, accBytes = this.accBytes,
  113. res = '';
  114. for (var i = 0; i < buf.length; i++) {
  115. var curByte = buf[i];
  116. if ((curByte & 0xC0) !== 0x80) { // Leading byte
  117. if (contBytes > 0) { // Previous code is invalid
  118. res += this.defaultCharUnicode;
  119. contBytes = 0;
  120. }
  121. if (curByte < 0x80) { // Single-byte code
  122. res += String.fromCharCode(curByte);
  123. } else if (curByte < 0xE0) { // Two-byte code
  124. acc = curByte & 0x1F;
  125. contBytes = 1; accBytes = 1;
  126. } else if (curByte < 0xF0) { // Three-byte code
  127. acc = curByte & 0x0F;
  128. contBytes = 2; accBytes = 1;
  129. } else { // Four or more are not supported for CESU-8.
  130. res += this.defaultCharUnicode;
  131. }
  132. } else { // Continuation byte
  133. if (contBytes > 0) { // We're waiting for it.
  134. acc = (acc << 6) | (curByte & 0x3f);
  135. contBytes--; accBytes++;
  136. if (contBytes === 0) {
  137. // Check for overlong encoding, but support Modified UTF-8 (encoding NULL as C0 80)
  138. if (accBytes === 2 && acc < 0x80 && acc > 0)
  139. res += this.defaultCharUnicode;
  140. else if (accBytes === 3 && acc < 0x800)
  141. res += this.defaultCharUnicode;
  142. else
  143. // Actually add character.
  144. res += String.fromCharCode(acc);
  145. }
  146. } else { // Unexpected continuation byte
  147. res += this.defaultCharUnicode;
  148. }
  149. }
  150. }
  151. this.acc = acc; this.contBytes = contBytes; this.accBytes = accBytes;
  152. return res;
  153. }
  154. InternalDecoderCesu8.prototype.end = function() {
  155. var res = 0;
  156. if (this.contBytes > 0)
  157. res += this.defaultCharUnicode;
  158. return res;
  159. }