diff --git a/core/spreadsheetbulkexport.class.inc.php b/core/spreadsheetbulkexport.class.inc.php
index 1e87e967f..03958fd52 100644
--- a/core/spreadsheetbulkexport.class.inc.php
+++ b/core/spreadsheetbulkexport.class.inc.php
@@ -31,6 +31,7 @@ class SpreadsheetBulkExport extends TabularBulkExport
$oP->p(" *\tfields: (mandatory) the comma separated list of field codes to export (e.g: name,org_id,service_name...).");
$oP->p(" *\tno_localize: (optional) pass 1 to retrieve the raw (untranslated) values for enumerated fields. Default: 0.");
$oP->p(" *\tdate_format: the format to use when exporting date and time fields (default = the SQL format). e.g. 'Y-m-d H:i:s'");
+ $oP->p(" *\tformatted_text: set to 1 to formatted text fields with their HTML markup, 0 to remove formatting. Default is 1 (= formatted text)");
}
public function EnumFormParts()
@@ -51,7 +52,14 @@ class SpreadsheetBulkExport extends TabularBulkExport
$oP->add('
'.Dict::S('Core:BulkExport:SpreadsheetOptions').' ');
$oP->add('');
$oP->add('');
- $oP->add(' '.Dict::S('Core:BulkExport:OptionNoLocalize').' ');
+
+ $oP->add('');
+ $sChecked = (utils::ReadParam('formatted_text', 1) == 1) ? ' checked ' : '';
+ $oP->add(''.Dict::S('Core:BulkExport:TextFormat').' ');
+ $oP->add(' '); // Trick to pass the zero value if the checkbox below is unchecked, since we want the default value to be "1"
+ $oP->add(' '.Dict::S('Core:BulkExport:OptionFormattedText').' ');
+ $oP->add(' '.Dict::S('Core:BulkExport:OptionNoLocalize').' ');
+ $oP->add(' ');
$sDateTimeFormat = utils::ReadParam('date_format', (string)AttributeDateTime::GetFormat(), true, 'raw_data');
$sDefaultChecked = ($sDateTimeFormat == (string)AttributeDateTime::GetFormat()) ? ' checked' : '';
@@ -90,7 +98,8 @@ EOF
public function ReadParameters()
{
parent::ReadParameters();
-
+ $this->aStatusInfo['formatted_text'] = (bool)utils::ReadParam('formatted_text', 1, true);
+
$sDateFormatRadio = utils::ReadParam('spreadsheet_date_format_radio', '');
switch($sDateFormatRadio)
{
@@ -126,6 +135,7 @@ EOF
protected function GetValue($oObj, $sAttCode)
{
+ $bFormattedText = (array_key_exists('formatted_text', $this->aStatusInfo) ? $this->aStatusInfo['formatted_text'] : false);
switch($sAttCode)
{
case 'id':
@@ -147,6 +157,18 @@ EOF
{
$sRet = '';
}
+ elseif ($oAttDef instanceof AttributeText)
+ {
+ if ($bFormattedText)
+ {
+ // Replace paragraphs (..., etc) by line breaks ( ) since Excel (pre-2016) splits the cells when there is a paragraph
+ $sRet = static::HtmlToSpreadsheet($oObj->GetAsHTML($sAttCode));
+ }
+ else
+ {
+ $sRet = utils::HtmlToText($oObj->GetAsHTML($sAttCode));
+ }
+ }
elseif ($oAttDef instanceof AttributeString)
{
$sRet = $oObj->GetAsHTML($sAttCode);
@@ -232,6 +254,7 @@ EOF
$this->OptimizeColumnLoad($oSet);
$sExportDateTimeFormat = $this->aStatusInfo['date_format'];
+ $bFormattedText = (array_key_exists('formatted_text', $this->aStatusInfo) ? $this->aStatusInfo['formatted_text'] : false);
// Date & time formats
$oDateTimeFormat = new DateTimeFormat($sExportDateTimeFormat);
$oDateFormat = new DateTimeFormat($oDateTimeFormat->ToDateFormat());
@@ -289,6 +312,20 @@ EOF
// Trick for Excel: treat the content as text even if it begins with an equal sign
$sData .= "$sField ";
}
+ elseif ($oAttDef instanceof AttributeText)
+ {
+ if ($bFormattedText)
+ {
+ // Replace paragraphs (..., etc) by line breaks ( ) since Excel (pre-2016) splits the cells when there is a paragraph
+ $sField = static::HtmlToSpreadsheet($oObj->GetAsHTML($sAttCode));
+ }
+ else
+ {
+ // Convert to plain text
+ $sField = utils::HtmlToText($oObj->GetAsHTML($sAttCode));
+ }
+ $sData .= "$sField ";
+ }
else if($oAttDef instanceof AttributeString)
{
$sField = $oObj->GetAsHTML($sAttCode, $this->bLocalizeOutput);
@@ -354,4 +391,39 @@ EOF
{
return 'html';
}
+
+ /**
+ * Cleanup all markup displayed as line breaks (except tags) since this
+ * causes Excel (pre-2016) to generate extra lines in the table, thus breaking
+ * the tabular disposition of the export
+ * Note: Excel 2016 also refuses line breaks, so the only solution for this case is alas plain text
+ * @param string $sHtml The HTML to cleanup
+ * @return string The cleaned HTML
+ */
+ public static function HtmlToSpreadsheet($sHtml)
+ {
+ // The tags listed here are a subset of the whitelist defined in HTMLDOMSanitizer
+ // Tags causing a visual "line break" in the displayed page (i.e. display: block) => to be replaced by a followed by a
+ $aTagsToReplace = array(
+ 'pre', 'div', 'p', 'hr', 'center', 'h1', 'h2', 'h3', 'h4', 'li', 'fieldset', 'legend', 'nav', 'section', 'tr', 'caption',
+ );
+ // Tags to completely remove from the markup
+ $aTagsToRemove = array(
+ 'table', 'thead', 'tbody', 'ul', 'ol', 'td', 'th',
+ );
+
+ foreach($aTagsToReplace as $sTag)
+ {
+ $sHtml = preg_replace("|<{$sTag} ?([^>]*)>|i", '', $sHtml);
+ $sHtml = preg_replace("|{$sTag}>|i", ' ', $sHtml);
+ }
+
+ foreach($aTagsToRemove as $sTag)
+ {
+ $sHtml = preg_replace("|<{$sTag} ?([^>]*)>|i", '', $sHtml);
+ $sHtml = preg_replace("|{$sTag}>|i", '', $sHtml);
+ }
+
+ return $sHtml;
+ }
}