Skip to content

Commit 3128a5b

Browse files
committed
Merge branch 'develop'
# Conflicts: # package.json
2 parents 17126ba + d3a8b46 commit 3128a5b

File tree

5 files changed

+288
-407
lines changed

5 files changed

+288
-407
lines changed

Changes.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# 1.8.3 - 2022-05-07
2+
3+
- Updated email parser body
4+
- Fix for Redis/KeyDB crash when plugin is started as delivery only
5+
16
# 1.8.2 - 2022-04-17
27

38
- Added a new option "restart" to restart Haraka if there is an error on adding the email to the collection

email_body_utility.js

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
const EmailBodyUtility = function() {
22
const stream = require('stream');
33

4+
// const ced = require('ced');
45
const async = require('async');
5-
// const IsBase64 = require('is-base64');
66
const linkify = require('linkify-it')();
7-
// const ced = require('ced');
87
const Splitter = require('mailsplit').Splitter;
98
const detectCharacterEncoding = require('detect-character-encoding');
109

1110
const quotedPrintable = require('quoted-printable');
1211

13-
const _default_html_field_order = 'bodytext_html mailparser_html mailparser_text_as_html'.split(' ');
12+
const _default_html_field_order = 'mailparser_html bodytext_html mailparser_text_as_html'.split(' ');
1413
const _default_text_field_order = 'bodytext_plain mailparser_text'.split(' ');
1514

1615
const _haraka_bodytext_variations = 'haraka_bodytext haraka_body_text_encoded'.split(' ');
@@ -20,7 +19,7 @@ const EmailBodyUtility = function() {
2019

2120
const _iso_8859_charset_regex = /text\/html; charset=iso-8859-\d/img;
2221
const _windows_charset_regex = /text\/html;\s*charset=Windows-125(2|7)/img;
23-
const _is_base64_encoded_regex = /^(?:[A-Za-z\d+/]{4})*(?:[A-Za-z\d+/]{3}=|[A-Za-z\d+/]{2}==)?$/mg;
22+
const _is_base64_encoded_regex = /w\+?\+\s*$/;
2423

2524
const _uses_windows_1257_charset = /charset=Windows-1257/im;
2625
const _contains_html_invalid_unicode = /\x82/;
@@ -69,13 +68,13 @@ const EmailBodyUtility = function() {
6968
var text_field_order = prefer_mailparser ? 'mailparser_text bodytext_plain'.split(' ') : _default_text_field_order;
7069

7170
_log_module && console.log(`\ngetHtmlAndTextBody(), extracting 'html'...`);
72-
var html_info = !options.ignore_html_result ? _extractBody(email_obj, body, html_field_order, options) : { result: '' };
73-
_log_module && !has_rfc_822_message && console.log(`\ngetHtmlAndTextBody(), html result came from '${html_info.source}' and has a length of '${html_info.result.length}'`);
71+
var html_info = ! options.ignore_html_result ? _extractBody(email_obj, body, html_field_order, options) : { result: '' };
72+
_log_module && ! has_rfc_822_message && console.log(`\ngetHtmlAndTextBody(), html result came from '${html_info.source}' and has a length of '${html_info.result.length}'`);
7473

7574
_log_module && console.log(`\ngetHtmlAndTextBody(), extracting 'text'...`);
76-
var text_info = !options.ignore_text_result ? _extractBody(email_obj, body, text_field_order, options) : { result: '' };
75+
var text_info = ! options.ignore_text_result ? _extractBody(email_obj, body, text_field_order, options) : { result: '' };
7776

78-
_log_module && !has_rfc_822_message && console.log(`\ngetHtmlAndTextBody(), text result came from '${text_info.source}' and has a length of '${text_info.result.length}'`);
77+
_log_module && ! has_rfc_822_message && console.log(`\ngetHtmlAndTextBody(), text result came from '${text_info.source}' and has a length of '${text_info.result.length}'`);
7978
return waterfall_callback(null, html_info, text_info);
8079
},
8180
/* extract and append rfc822 info if present
@@ -98,14 +97,17 @@ const EmailBodyUtility = function() {
9897
return waterfall_callback(null, html_info, text_info);
9998
});
10099
},
101-
/* analyse results and overwrite html if text is better parsed */
100+
/* analyze results and overwrite html if text is better parsed */
102101
function(html_info, text_info, waterfall_callback) {
103102

104-
var use_text_for_html = !html_info.result // if we have no html result
103+
var use_text_for_html = ! html_info.result // if we have no html result
105104
||
106-
(text_info.result && html_info.source.includes('mailparser') && !text_info.source.includes('mailparser')) // if we have a text result, and the html result was from mailparser
105+
(text_info.result && html_info.source.includes('mailparser') && ! text_info.source.includes('mailparser') // if we have a text result, and the html result was from mailparser
107106
||
108-
(!html_info.has_valid_encoding && text_info.has_valid_encoding); // or we could not properly decode the content for the html but we could for the text
107+
(! html_info.has_valid_encoding && text_info.has_valid_encoding) // or we could not properly decode the content for the html but we could for the text
108+
);
109+
110+
var use_text_for_html = ! html_info.result;
109111

110112
// override any html mailparser result we have if there's a valid text result
111113
if (use_text_for_html) {
@@ -223,7 +225,11 @@ const EmailBodyUtility = function() {
223225
var field = field_order[i++];
224226
var result = getBodyByField(email_obj, body, field);
225227

226-
_log_module && console.log(`checking field '${field.toUpperCase()}', string: ${(result.body || '').substring(0,50)}...\n\n`);
228+
_log_module && console.log(`checking field '${field.toUpperCase()}', string: "${(result.body || '').substring(0,150)}..."\n`);
229+
// if result is unicode, then set set the result.body to null
230+
var is_base64_encoded = _is_base64_encoded_regex.test(result.body);
231+
_log_module && console.log('is_base64_encoded:', is_base64_encoded)
232+
if (is_base64_encoded) { result.body = null; }
227233

228234
var is_base64_encoded = false;
229235
if (result.body && typeof result.body === 'string' && result.body.length) {
@@ -245,7 +251,7 @@ const EmailBodyUtility = function() {
245251
result.body = null;
246252
}
247253

248-
! is_base64_encoded && _log_module && console.log(`\n\nbase64 NOT FOUND for field'${field.toUpperCase()}', string: ${string_body.substring(0,50)}...\n\n`);
254+
! is_base64_encoded && _log_module && console.log(`\n\nbase64 NOT FOUND for field '${field.toUpperCase()}', string: ${string_body.substring(0,50)}...\n\n`);
249255
}
250256
}
251257

@@ -326,7 +332,7 @@ const EmailBodyUtility = function() {
326332
};
327333

328334
default:
329-
console.log(`unknown field type requested for body field: '${field}'`);
335+
_log_module && console.log(`unknown field type requested for body field: '${field}'`);
330336
return {
331337
'body': '',
332338
'source': 'none'
@@ -343,6 +349,7 @@ const EmailBodyUtility = function() {
343349
var is_matching_node = is_requested_type && (haraka_obj.bodytext || haraka_obj.body_text_encoded)
344350
_log_module && !is_matching_node && console.log(`${'\t'.repeat(depth)} [${index}] not a matching node for type '${type}'`);
345351

352+
346353
if (is_matching_node) {
347354
_log_module && console.log(`${'\t'.repeat(depth)} [${index}] found a matching bodytype of length '${haraka_obj.bodytext.length || haraka_obj.body_text_encoded.length}' for type '${type}'`);
348355

@@ -362,6 +369,12 @@ const EmailBodyUtility = function() {
362369
if (_body_text) {
363370
try {
364371
bodytext_encoding = detectCharacterEncoding(Buffer.from(_body_text));
372+
// _log_module && console.log('!'.repeat(100))
373+
// _log_module && console.log(bodytext_encoding)
374+
// bodytext_encoding = ced(Buffer.from(_body_text));
375+
_log_module && console.log('!'.repeat(100))
376+
_log_module && console.log(bodytext_encoding)
377+
_log_module && console.log('!'.repeat(100))
365378
} catch(e) {}
366379
}
367380
// var bodytext_encoding = _body_text ? detectCharacterEncoding(Buffer.from(_body_text)) : {};

index.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ exports.register = function () {
3434
// Load on startup
3535
plugin.register_hook('init_master', 'initialize_mongodb');
3636
plugin.register_hook('init_child', 'initialize_mongodb');
37-
plugin.register_hook('init_master', 'initialize_redis');
38-
plugin.register_hook('init_child', 'initialize_redis');
3937

4038
// Enable for queue
4139
if (plugin.cfg.enable.queue === 'yes') {
40+
plugin.register_hook('init_master', 'initialize_redis');
41+
plugin.register_hook('init_child', 'initialize_redis');
4242
plugin.register_hook('data', 'enable_transaction_body_parse');
4343
plugin.register_hook('queue', 'queue_to_mongodb');
4444
// Define mime type

package.json

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "haraka-plugin-mongodb",
3-
"version": "1.8.2",
3+
"version": "1.8.3",
44
"description": "Haraka plugin that stores emails in MongoDb. Additionally, stores reports on delivery.",
55
"main": "index.js",
66
"scripts": {
@@ -31,27 +31,28 @@
3131
"homepage": "https://github.com/haraka/haraka-plugin-mongodb#readme",
3232
"dependencies": {
3333
"async": "^3.2.3",
34+
"ced": "^2.0.0",
3435
"decode-html": "^2.0.0",
3536
"detect-character-encoding": "^0.8.0",
36-
"fs-extra": "10",
37+
"fs-extra": "^10.1.0",
3738
"iconv": "^3.0.1",
3839
"ioredis": "^5.0.4",
3940
"linkify-it": "^3.0.3",
40-
"mailparser": "^3.4.0",
41-
"mailsplit": "5",
42-
"mime": "3",
41+
"mailparser": "^3.5.0",
42+
"mailsplit": "^5.3.2",
43+
"mime": "^3.0.0",
4344
"moment": "^2.29.3",
44-
"mongodb": "^3.7.3",
45-
"node-gyp": "9",
46-
"nodemailer": "^6.7.3",
45+
"mongodb": "^4.5.0",
46+
"node-gyp": "^9.0.0",
47+
"nodemailer": "^6.7.5",
4748
"quoted-printable": "^1.0.1",
4849
"string": "^3.3.3",
4950
"tlds": "^1.231.0",
5051
"uuid": "^8.3.2",
5152
"watch": "^1.0.2"
5253
},
5354
"devDependencies": {
54-
"eslint": "^7.32.0",
55+
"eslint": "^8.15.0",
5556
"eslint-plugin-haraka": "^1.0.14",
5657
"haraka-test-fixtures": "^1.0.33",
5758
"mocha": "*"

0 commit comments

Comments
 (0)